stash/pkg/scraper/config.go

349 lines
8.8 KiB
Go

package scraper
import (
"io"
"os"
"path/filepath"
"strings"
"gopkg.in/yaml.v2"
"github.com/stashapp/stash/pkg/models"
)
type stashServer struct {
URL string `yaml:"url"`
}
type scraperAction string
const (
scraperActionScript scraperAction = "script"
scraperActionStash scraperAction = "stash"
scraperActionXPath scraperAction = "scrapeXPath"
)
var allScraperAction = []scraperAction{
scraperActionScript,
scraperActionStash,
scraperActionXPath,
}
func (e scraperAction) IsValid() bool {
switch e {
case scraperActionScript, scraperActionStash, scraperActionXPath:
return true
}
return false
}
type scraperTypeConfig struct {
Action scraperAction `yaml:"action"`
Script []string `yaml:"script,flow"`
Scraper string `yaml:"scraper"`
// for xpath name scraper only
QueryURL string `yaml:"queryURL"`
scraperConfig *scraperConfig
}
type scrapePerformerNamesFunc func(c scraperTypeConfig, name string) ([]*models.ScrapedPerformer, error)
type performerByNameConfig struct {
scraperTypeConfig `yaml:",inline"`
performScrape scrapePerformerNamesFunc
}
func (c *performerByNameConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerNamesScript
} else if c.Action == scraperActionStash {
c.performScrape = scrapePerformerNamesStash
} else if c.Action == scraperActionXPath {
c.performScrape = scrapePerformerNamesXPath
}
}
type scrapePerformerFragmentFunc func(c scraperTypeConfig, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
type performerByFragmentConfig struct {
scraperTypeConfig `yaml:",inline"`
performScrape scrapePerformerFragmentFunc
}
func (c *performerByFragmentConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerFragmentScript
} else if c.Action == scraperActionStash {
c.performScrape = scrapePerformerFragmentStash
}
}
type scrapeByURLConfig struct {
scraperTypeConfig `yaml:",inline"`
URL []string `yaml:"url,flow"`
}
func (c scrapeByURLConfig) matchesURL(url string) bool {
for _, thisURL := range c.URL {
if strings.Contains(url, thisURL) {
return true
}
}
return false
}
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
type scrapePerformerByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapePerformerByURLFunc
}
func (c *scrapePerformerByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerURLScript
} else if c.Action == scraperActionXPath {
c.performScrape = scrapePerformerURLXpath
}
}
type scrapeSceneFragmentFunc func(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error)
type sceneByFragmentConfig struct {
scraperTypeConfig `yaml:",inline"`
performScrape scrapeSceneFragmentFunc
}
func (c *sceneByFragmentConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneFragmentScript
} else if c.Action == scraperActionStash {
c.performScrape = scrapeSceneFragmentStash
}
}
type scrapeSceneByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedScene, error)
type scrapeSceneByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapeSceneByURLFunc
}
func (c *scrapeSceneByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneURLScript
} else if c.Action == scraperActionXPath {
c.performScrape = scrapeSceneURLXPath
}
}
type scraperDebugOptions struct {
PrintHTML bool `yaml:"printHTML"`
}
type scraperConfig struct {
ID string
Name string `yaml:"name"`
PerformerByName *performerByNameConfig `yaml:"performerByName"`
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"`
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
DebugOptions *scraperDebugOptions `yaml:"debug"`
StashServer *stashServer `yaml:"stashServer"`
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
}
func loadScraperFromYAML(id string, reader io.Reader) (*scraperConfig, error) {
ret := &scraperConfig{}
parser := yaml.NewDecoder(reader)
parser.SetStrict(true)
err := parser.Decode(&ret)
if err != nil {
return nil, err
}
ret.ID = id
// set the scraper interface
ret.initialiseConfigs()
return ret, nil
}
func loadScraperFromYAMLFile(path string) (*scraperConfig, error) {
file, err := os.Open(path)
defer file.Close()
if err != nil {
return nil, err
}
// set id to the filename
id := filepath.Base(path)
id = id[:strings.LastIndex(id, ".")]
return loadScraperFromYAML(id, file)
}
func (c *scraperConfig) initialiseConfigs() {
if c.PerformerByName != nil {
c.PerformerByName.resolveFn()
c.PerformerByName.scraperConfig = c
}
if c.PerformerByFragment != nil {
c.PerformerByFragment.resolveFn()
c.PerformerByFragment.scraperConfig = c
}
for _, s := range c.PerformerByURL {
s.resolveFn()
s.scraperConfig = c
}
if c.SceneByFragment != nil {
c.SceneByFragment.resolveFn()
c.SceneByFragment.scraperConfig = c
}
for _, s := range c.SceneByURL {
s.resolveFn()
s.scraperConfig = c
}
}
func (c scraperConfig) toScraper() *models.Scraper {
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
return &ret
}
func (c scraperConfig) supportsPerformers() bool {
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
}
func (c scraperConfig) matchesPerformerURL(url string) bool {
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c scraperConfig) ScrapePerformerNames(name string) ([]*models.ScrapedPerformer, error) {
if c.PerformerByName != nil && c.PerformerByName.performScrape != nil {
return c.PerformerByName.performScrape(c.PerformerByName.scraperTypeConfig, name)
}
return nil, nil
}
func (c scraperConfig) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
if c.PerformerByFragment != nil && c.PerformerByFragment.performScrape != nil {
return c.PerformerByFragment.performScrape(c.PerformerByFragment.scraperTypeConfig, scrapedPerformer)
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.ScrapePerformerURL(*scrapedPerformer.URL)
}
return nil, nil
}
func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) && scraper.performScrape != nil {
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c scraperConfig) supportsScenes() bool {
return c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
func (c scraperConfig) matchesSceneURL(url string) bool {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c scraperConfig) ScrapeScene(scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil && c.SceneByFragment.performScrape != nil {
return c.SceneByFragment.performScrape(c.SceneByFragment.scraperTypeConfig, scene)
}
return nil, nil
}
func (c scraperConfig) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) && scraper.performScrape != nil {
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}