stash/pkg/scraper/scrapers.go

319 lines
7.7 KiB
Go

package scraper
import (
"encoding/json"
"errors"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/models"
)
type ScraperMethod string
const (
ScraperMethodScript ScraperMethod = "SCRIPT"
ScraperMethodBuiltin ScraperMethod = "BUILTIN"
)
var AllScraperMethod = []ScraperMethod{
ScraperMethodScript,
}
func (e ScraperMethod) IsValid() bool {
switch e {
case ScraperMethodScript:
return true
}
return false
}
type scraperConfig struct {
ID string `json:"id"`
Name string `json:"name"`
Type models.ScraperType `json:"type"`
Method ScraperMethod `json:"method"`
URLs []string `json:"urls"`
GetPerformerNames []string `json:"get_performer_names"`
GetPerformer []string `json:"get_performer"`
GetPerformerURL []string `json:"get_performer_url"`
scrapePerformerNamesFunc func(c scraperConfig, name string) ([]*models.ScrapedPerformer, error)
scrapePerformerFunc func(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
scrapePerformerURLFunc func(c scraperConfig, url string) (*models.ScrapedPerformer, error)
}
func (c scraperConfig) toScraper() *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
Type: c.Type,
Urls: c.URLs,
}
// determine supported actions
if len(c.URLs) > 0 {
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeURL)
}
if c.scrapePerformerNamesFunc != nil && c.scrapePerformerFunc != nil {
ret.SupportedScrapes = append(ret.SupportedScrapes, models.ScrapeTypeQuery)
}
return &ret
}
func (c *scraperConfig) postDecode() {
if c.Method == ScraperMethodScript {
// only set scrape performer names/performer if the applicable field is set
if len(c.GetPerformer) > 0 && len(c.GetPerformerNames) > 0 {
c.scrapePerformerNamesFunc = scrapePerformerNamesScript
c.scrapePerformerFunc = scrapePerformerScript
}
c.scrapePerformerURLFunc = scrapePerformerURLScript
}
}
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
return c.scrapePerformerNamesFunc(c, name)
}
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
return c.scrapePerformerFunc(c, scrapedPerformer)
}
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
return c.scrapePerformerURLFunc(c, url)
}
func runScraperScript(command []string, inString string, out interface{}) error {
cmd := exec.Command(command[0], command[1:]...)
cmd.Dir = config.GetScrapersPath()
stdin, err := cmd.StdinPipe()
if err != nil {
return err
}
go func() {
defer stdin.Close()
io.WriteString(stdin, inString)
}()
stderr, err := cmd.StderrPipe()
if err != nil {
logger.Error("Scraper stderr not available: " + err.Error())
}
stdout, err := cmd.StdoutPipe()
if nil != err {
logger.Error("Scraper stdout not available: " + err.Error())
}
if err = cmd.Start(); err != nil {
return errors.New("Error running scraper script")
}
// TODO - add a timeout here
decodeErr := json.NewDecoder(stdout).Decode(out)
stderrData, _ := ioutil.ReadAll(stderr)
stderrString := string(stderrData)
err = cmd.Wait()
if err != nil {
// error message should be in the stderr stream
logger.Errorf("scraper error when running command <%s>: %s", strings.Join(cmd.Args, " "), stderrString)
return errors.New("Error running scraper script")
}
if decodeErr != nil {
logger.Errorf("error decoding performer from scraper data: %s", err.Error())
return errors.New("Error decoding performer from scraper script")
}
return nil
}
func scrapePerformerNamesScript(c scraperConfig, name string) ([]*models.ScrapedPerformer, error) {
inString := `{"name": "` + name + `"}`
var performers []models.ScrapedPerformer
err := runScraperScript(c.GetPerformerNames, inString, &performers)
// convert to pointers
var ret []*models.ScrapedPerformer
if err == nil {
for i := 0; i < len(performers); i++ {
ret = append(ret, &performers[i])
}
}
return ret, err
}
func scrapePerformerScript(c scraperConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
inString, err := json.Marshal(scrapedPerformer)
if err != nil {
return nil, err
}
var ret models.ScrapedPerformer
err = runScraperScript(c.GetPerformer, string(inString), &ret)
return &ret, err
}
func scrapePerformerURLScript(c scraperConfig, url string) (*models.ScrapedPerformer, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedPerformer
err := runScraperScript(c.GetPerformerURL, string(inString), &ret)
return &ret, err
}
var scrapers []scraperConfig
func loadScraper(path string) (*scraperConfig, error) {
var scraper scraperConfig
file, err := os.Open(path)
defer file.Close()
if err != nil {
return nil, err
}
jsonParser := json.NewDecoder(file)
err = jsonParser.Decode(&scraper)
if err != nil {
return nil, err
}
// set id to the filename
id := filepath.Base(path)
id = id[:strings.LastIndex(id, ".")]
scraper.ID = id
scraper.postDecode()
return &scraper, nil
}
func loadScrapers() ([]scraperConfig, error) {
if scrapers != nil {
return scrapers, nil
}
path := config.GetScrapersPath()
scrapers = make([]scraperConfig, 0)
logger.Debugf("Reading scraper configs from %s", path)
scraperFiles, err := filepath.Glob(filepath.Join(path, "*.json"))
if err != nil {
logger.Errorf("Error reading scraper configs: %s", err.Error())
return nil, err
}
// add built-in freeones scraper
scrapers = append(scrapers, GetFreeonesScraper())
for _, file := range scraperFiles {
scraper, err := loadScraper(file)
if err != nil {
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
} else {
scrapers = append(scrapers, *scraper)
}
}
return scrapers, nil
}
func ListScrapers(scraperType models.ScraperType) ([]*models.Scraper, error) {
// read scraper config files from the directory and cache
scrapers, err := loadScrapers()
if err != nil {
return nil, err
}
var ret []*models.Scraper
for _, s := range scrapers {
// filter on type
if s.Type == scraperType {
ret = append(ret, s.toScraper())
}
}
return ret, nil
}
func findPerformerScraper(scraperID string) *scraperConfig {
// read scraper config files from the directory and cache
loadScrapers()
for _, s := range scrapers {
if s.ID == scraperID {
return &s
}
}
return nil
}
func findPerformerScraperURL(url string) *scraperConfig {
// read scraper config files from the directory and cache
loadScrapers()
for _, s := range scrapers {
for _, thisURL := range s.URLs {
if strings.Contains(url, thisURL) {
return &s
}
}
}
return nil
}
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
if s != nil {
return s.ScrapePerformerNames(query)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
}
func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
if s != nil {
return s.ScrapePerformer(scrapedPerformer)
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
}
func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
// find scraper that matches the url given
s := findPerformerScraperURL(url)
if s != nil {
return s.ScrapePerformerURL(url)
}
return nil, nil
}