stash/pkg/scraper/config.go

460 lines
10 KiB
Go
Raw Normal View History

2019-12-12 19:27:44 +00:00
package scraper
import (
"errors"
"fmt"
"io"
2019-12-12 19:27:44 +00:00
"os"
"path/filepath"
"strings"
"gopkg.in/yaml.v2"
"github.com/stashapp/stash/pkg/models"
)
type config struct {
ID string
path string
// The name of the scraper. This is displayed in the UI.
Name string `yaml:"name"`
2019-12-12 19:27:44 +00:00
// Configuration for querying performers by name
PerformerByName *scraperTypeConfig `yaml:"performerByName"`
2019-12-12 19:27:44 +00:00
// Configuration for querying performers by a Performer fragment
PerformerByFragment *scraperTypeConfig `yaml:"performerByFragment"`
2019-12-12 19:27:44 +00:00
// Configuration for querying a performer by a URL
PerformerByURL []*scrapeByURLConfig `yaml:"performerByURL"`
2019-12-12 19:27:44 +00:00
// Configuration for querying scenes by a Scene fragment
SceneByFragment *scraperTypeConfig `yaml:"sceneByFragment"`
2020-10-20 22:24:32 +00:00
// Configuration for querying gallery by a Gallery fragment
GalleryByFragment *scraperTypeConfig `yaml:"galleryByFragment"`
// Configuration for querying a scene by a URL
SceneByURL []*scrapeByURLConfig `yaml:"sceneByURL"`
2020-10-20 22:24:32 +00:00
// Configuration for querying a gallery by a URL
GalleryByURL []*scrapeByURLConfig `yaml:"galleryByURL"`
// Configuration for querying a movie by a URL
MovieByURL []*scrapeByURLConfig `yaml:"movieByURL"`
// Scraper debugging options
DebugOptions *scraperDebugOptions `yaml:"debug"`
2019-12-12 19:27:44 +00:00
// Stash server configuration
StashServer *stashServer `yaml:"stashServer"`
2019-12-12 19:27:44 +00:00
// Xpath scraping configurations
XPathScrapers mappedScrapers `yaml:"xPathScrapers"`
// Json scraping configurations
JsonScrapers mappedScrapers `yaml:"jsonScrapers"`
// Scraping driver options
DriverOptions *scraperDriverOptions `yaml:"driver"`
2019-12-12 19:27:44 +00:00
}
func (c config) validate() error {
if strings.TrimSpace(c.Name) == "" {
return errors.New("name must not be empty")
2019-12-12 19:27:44 +00:00
}
if c.PerformerByName != nil {
if err := c.PerformerByName.validate(); err != nil {
return err
}
}
2019-12-12 19:27:44 +00:00
if c.PerformerByFragment != nil {
if err := c.PerformerByFragment.validate(); err != nil {
return err
}
}
2019-12-12 19:27:44 +00:00
if c.SceneByFragment != nil {
if err := c.SceneByFragment.validate(); err != nil {
return err
}
2019-12-12 19:27:44 +00:00
}
for _, s := range c.PerformerByURL {
if err := s.validate(); err != nil {
return err
}
}
for _, s := range c.SceneByURL {
if err := s.validate(); err != nil {
return err
}
}
for _, s := range c.MovieByURL {
if err := s.validate(); err != nil {
return err
}
}
return nil
}
type stashServer struct {
URL string `yaml:"url"`
}
type scraperTypeConfig struct {
Action scraperAction `yaml:"action"`
Script []string `yaml:"script,flow"`
Scraper string `yaml:"scraper"`
// for xpath name scraper only
2020-10-22 00:56:04 +00:00
QueryURL string `yaml:"queryURL"`
QueryURLReplacements queryURLReplacements `yaml:"queryURLReplace"`
2019-12-12 19:27:44 +00:00
}
func (c scraperTypeConfig) validate() error {
if !c.Action.IsValid() {
return fmt.Errorf("%s is not a valid scraper action", c.Action)
2019-12-12 19:27:44 +00:00
}
if c.Action == scraperActionScript && len(c.Script) == 0 {
return errors.New("script is mandatory for script scraper action")
}
return nil
}
type scrapeByURLConfig struct {
scraperTypeConfig `yaml:",inline"`
URL []string `yaml:"url,flow"`
}
2019-12-12 19:27:44 +00:00
func (c scrapeByURLConfig) validate() error {
if len(c.URL) == 0 {
return errors.New("url is mandatory for scrape by url scrapers")
}
return c.scraperTypeConfig.validate()
}
func (c scrapeByURLConfig) matchesURL(url string) bool {
for _, thisURL := range c.URL {
if strings.Contains(url, thisURL) {
return true
}
}
return false
2019-12-12 19:27:44 +00:00
}
type scraperDebugOptions struct {
PrintHTML bool `yaml:"printHTML"`
}
type scraperCookies struct {
Name string `yaml:"Name"`
Value string `yaml:"Value"`
Domain string `yaml:"Domain"`
Path string `yaml:"Path"`
}
type cookieOptions struct {
CookieURL string `yaml:"CookieURL"`
Cookies []*scraperCookies `yaml:"Cookies"`
}
type clickOptions struct {
XPath string `yaml:"xpath"`
Sleep int `yaml:"sleep"`
}
type scraperDriverOptions struct {
UseCDP bool `yaml:"useCDP"`
Sleep int `yaml:"sleep"`
Clicks []*clickOptions `yaml:"clicks"`
Cookies []*cookieOptions `yaml:"cookies"`
}
func loadScraperFromYAML(id string, reader io.Reader) (*config, error) {
ret := &config{}
2019-12-12 19:27:44 +00:00
parser := yaml.NewDecoder(reader)
2019-12-12 19:27:44 +00:00
parser.SetStrict(true)
err := parser.Decode(&ret)
2019-12-12 19:27:44 +00:00
if err != nil {
return nil, err
}
ret.ID = id
if err := ret.validate(); err != nil {
return nil, err
}
2019-12-12 19:27:44 +00:00
return ret, nil
}
func loadScraperFromYAMLFile(path string) (*config, error) {
file, err := os.Open(path)
defer file.Close()
if err != nil {
return nil, err
}
// set id to the filename
id := filepath.Base(path)
id = id[:strings.LastIndex(id, ".")]
ret, err := loadScraperFromYAML(id, file)
if err != nil {
return nil, err
2019-12-12 19:27:44 +00:00
}
ret.path = path
return ret, nil
2019-12-12 19:27:44 +00:00
}
func (c config) toScraper() *models.Scraper {
2019-12-12 19:27:44 +00:00
ret := models.Scraper{
ID: c.ID,
Name: c.Name,
}
performer := models.ScraperSpec{}
if c.PerformerByName != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
}
if c.PerformerByFragment != nil {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.PerformerByURL) > 0 {
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.PerformerByURL {
performer.Urls = append(performer.Urls, v.URL...)
}
}
if len(performer.SupportedScrapes) > 0 {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
2020-10-20 22:24:32 +00:00
gallery := models.ScraperSpec{}
if c.GalleryByFragment != nil {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.GalleryByURL) > 0 {
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.GalleryByURL {
gallery.Urls = append(gallery.Urls, v.URL...)
}
}
if len(gallery.SupportedScrapes) > 0 {
ret.Gallery = &gallery
}
movie := models.ScraperSpec{}
if len(c.MovieByURL) > 0 {
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.MovieByURL {
movie.Urls = append(movie.Urls, v.URL...)
}
}
if len(movie.SupportedScrapes) > 0 {
ret.Movie = &movie
}
2019-12-12 19:27:44 +00:00
return &ret
}
func (c config) supportsPerformers() bool {
2019-12-12 19:27:44 +00:00
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
}
func (c config) matchesPerformerURL(url string) bool {
2019-12-12 19:27:44 +00:00
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c config) ScrapePerformerNames(name string, globalConfig GlobalConfig) ([]*models.ScrapedPerformer, error) {
if c.PerformerByName != nil {
s := getScraper(*c.PerformerByName, c, globalConfig)
return s.scrapePerformersByName(name)
2019-12-12 19:27:44 +00:00
}
return nil, nil
}
func (c config) ScrapePerformer(scrapedPerformer models.ScrapedPerformerInput, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
if c.PerformerByFragment != nil {
s := getScraper(*c.PerformerByFragment, c, globalConfig)
return s.scrapePerformerByFragment(scrapedPerformer)
2019-12-12 19:27:44 +00:00
}
// try to match against URL if present
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
return c.ScrapePerformerURL(*scrapedPerformer.URL, globalConfig)
}
2019-12-12 19:27:44 +00:00
return nil, nil
}
func (c config) ScrapePerformerURL(url string, globalConfig GlobalConfig) (*models.ScrapedPerformer, error) {
2019-12-12 19:27:44 +00:00
for _, scraper := range c.PerformerByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, c, globalConfig)
ret, err := s.scrapePerformerByURL(url)
2019-12-12 19:27:44 +00:00
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) supportsScenes() bool {
return c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
2020-10-20 22:24:32 +00:00
func (c config) supportsGalleries() bool {
return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0
}
func (c config) matchesSceneURL(url string) bool {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
2020-10-20 22:24:32 +00:00
func (c config) matchesGalleryURL(url string) bool {
for _, scraper := range c.GalleryByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c config) supportsMovies() bool {
return len(c.MovieByURL) > 0
}
func (c config) matchesMovieURL(url string) bool {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c config) ScrapeScene(scene models.SceneUpdateInput, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil {
s := getScraper(*c.SceneByFragment, c, globalConfig)
return s.scrapeSceneByFragment(scene)
}
return nil, nil
}
func (c config) ScrapeSceneURL(url string, globalConfig GlobalConfig) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, c, globalConfig)
ret, err := s.scrapeSceneByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
2020-10-20 22:24:32 +00:00
func (c config) ScrapeGallery(gallery models.GalleryUpdateInput, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
if c.GalleryByFragment != nil {
s := getScraper(*c.GalleryByFragment, c, globalConfig)
return s.scrapeGalleryByFragment(gallery)
}
return nil, nil
}
func (c config) ScrapeGalleryURL(url string, globalConfig GlobalConfig) (*models.ScrapedGallery, error) {
for _, scraper := range c.GalleryByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, c, globalConfig)
ret, err := s.scrapeGalleryByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}
func (c config) ScrapeMovieURL(url string, globalConfig GlobalConfig) (*models.ScrapedMovie, error) {
for _, scraper := range c.MovieByURL {
if scraper.matchesURL(url) {
s := getScraper(scraper.scraperTypeConfig, c, globalConfig)
ret, err := s.scrapeMovieByURL(url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}