diff --git a/graphql/documents/queries/scrapers/freeones.graphql b/graphql/documents/queries/scrapers/freeones.graphql index 9f366786d..6dfa700a1 100644 --- a/graphql/documents/queries/scrapers/freeones.graphql +++ b/graphql/documents/queries/scrapers/freeones.graphql @@ -1,27 +1,3 @@ -query ScrapeFreeones($performer_name: String!) { - scrapeFreeones(performer_name: $performer_name) { - name - url - twitter - instagram - birthdate - ethnicity - country - eye_color - height - measurements - fake_tits - career_length - tattoos - piercings - aliases - details - death_date - hair_color - weight - } -} - query ScrapeFreeonesPerformers($q: String!) { scrapeFreeonesPerformerList(query: $q) } \ No newline at end of file diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql index e6f435e98..3f6419fed 100644 --- a/graphql/schema/schema.graphql +++ b/graphql/schema/schema.graphql @@ -67,10 +67,12 @@ type Query { # Scrapers """List available scrapers""" - listPerformerScrapers: [Scraper!]! - listSceneScrapers: [Scraper!]! - listGalleryScrapers: [Scraper!]! - listMovieScrapers: [Scraper!]! + listScrapers(types: [ScrapeContentType!]!): [Scraper!]! + listPerformerScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [PERFORMER])") + listSceneScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [SCENE])") + listGalleryScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [GALLERY])") + listMovieScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [MOVIE])") + """Scrape for a single scene""" scrapeSingleScene(source: ScraperSourceInput!, input: ScrapeSingleSceneInput!): [ScrapedScene!]! @@ -88,6 +90,9 @@ type Query { """Scrape for a single movie""" scrapeSingleMovie(source: ScraperSourceInput!, input: ScrapeSingleMovieInput!): [ScrapedMovie!]! + "Scrapes content based on a URL" + scrapeURL(url: String!, ty: ScrapeContentType!): ScrapedContent + """Scrapes a complete performer record based on a URL""" scrapePerformerURL(url: String!): ScrapedPerformer """Scrapes a complete performer record based on a URL""" @@ -106,8 +111,6 @@ type Query { """Scrapes a complete gallery record based on an existing gallery""" scrapeGallery(scraper_id: ID!, gallery: GalleryUpdateInput!): ScrapedGallery @deprecated(reason: "use scrapeSingleGallery") - """Scrape a performer using Freeones""" - scrapeFreeones(performer_name: String!): ScrapedPerformer @deprecated(reason: "use scrapeSinglePerformer with scraper_id = builtin_freeones") """Scrape a list of performers from a query""" scrapeFreeonesPerformerList(query: String!): [String!]! @deprecated(reason: "use scrapeSinglePerformer with scraper_id = builtin_freeones") diff --git a/graphql/schema/types/scraper.graphql b/graphql/schema/types/scraper.graphql index ebe338e1c..fb0f9ce89 100644 --- a/graphql/schema/types/scraper.graphql +++ b/graphql/schema/types/scraper.graphql @@ -1,5 +1,5 @@ enum ScrapeType { - """From text query""" + """From text query""" NAME """From existing object""" FRAGMENT @@ -7,6 +7,22 @@ enum ScrapeType { URL } +"Type of the content a scraper generates" +enum ScrapeContentType { + GALLERY + MOVIE + PERFORMER + SCENE +} + +"Scraped Content is the forming union over the different scrapers" +union ScrapedContent = ScrapedStudio + | ScrapedTag + | ScrapedScene + | ScrapedGallery + | ScrapedMovie + | ScrapedPerformer + type ScraperSpec { """URLs matching these can be scraped with""" urls: [String!] @@ -26,6 +42,7 @@ type Scraper { movie: ScraperSpec } + type ScrapedStudio { """Set if studio matched""" stored_id: ID diff --git a/pkg/api/resolver.go b/pkg/api/resolver.go index 2317f64e5..bf588e93d 100644 --- a/pkg/api/resolver.go +++ b/pkg/api/resolver.go @@ -7,13 +7,22 @@ import ( "strconv" "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/manager" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/plugin" + "github.com/stashapp/stash/pkg/scraper" ) var ( + // ErrNotImplemented is an error which means the given functionality isn't implemented by the API. ErrNotImplemented = errors.New("not implemented") - ErrNotSupported = errors.New("not supported") + + // ErrNotSupported is returned whenever there's a test, which can be used to guard against the error, + // but the given parameters aren't supported by the system. + ErrNotSupported = errors.New("not supported") + + // ErrInput signifies errors where the input isn't valid for some reason. And no more specific error exists. + ErrInput = errors.New("input error") ) type hookExecutor interface { @@ -25,6 +34,10 @@ type Resolver struct { hookExecutor hookExecutor } +func (r *Resolver) scraperCache() *scraper.Cache { + return manager.GetInstance().ScraperCache +} + func (r *Resolver) Gallery() models.GalleryResolver { return &galleryResolver{r} } diff --git a/pkg/api/resolver_query_scraper.go b/pkg/api/resolver_query_scraper.go index 6bf542730..ed65ac44d 100644 --- a/pkg/api/resolver_query_scraper.go +++ b/pkg/api/resolver_query_scraper.go @@ -6,53 +6,57 @@ import ( "fmt" "strconv" - "github.com/stashapp/stash/pkg/manager" "github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/scraper" "github.com/stashapp/stash/pkg/scraper/stashbox" ) -// deprecated -func (r *queryResolver) ScrapeFreeones(ctx context.Context, performer_name string) (*models.ScrapedPerformer, error) { - scrapedPerformer := models.ScrapedPerformerInput{ - Name: &performer_name, - } - return manager.GetInstance().ScraperCache.ScrapePerformer(scraper.FreeonesScraperID, scrapedPerformer) +func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + return r.scraperCache().ScrapeURL(ctx, url, ty) } // deprecated func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query string) ([]string, error) { - scrapedPerformers, err := manager.GetInstance().ScraperCache.ScrapePerformerList(scraper.FreeonesScraperID, query) + content, err := r.scraperCache().ScrapeName(ctx, scraper.FreeonesScraperID, query, models.ScrapeContentTypePerformer) if err != nil { return nil, err } + performers, err := marshalScrapedPerformers(content) + if err != nil { + return nil, err + } + var ret []string - for _, v := range scrapedPerformers { - if v.Name != nil { - ret = append(ret, *v.Name) + for _, p := range performers { + if p.Name != nil { + ret = append(ret, *p.Name) } } return ret, nil } +func (r *queryResolver) ListScrapers(ctx context.Context, types []models.ScrapeContentType) ([]*models.Scraper, error) { + return r.scraperCache().ListScrapers(types), nil +} + func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Scraper, error) { - return manager.GetInstance().ScraperCache.ListPerformerScrapers(), nil + return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypePerformer}), nil } func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) { - return manager.GetInstance().ScraperCache.ListSceneScrapers(), nil + return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeScene}), nil } func (r *queryResolver) ListGalleryScrapers(ctx context.Context) ([]*models.Scraper, error) { - return manager.GetInstance().ScraperCache.ListGalleryScrapers(), nil + return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeGallery}), nil } func (r *queryResolver) ListMovieScrapers(ctx context.Context) ([]*models.Scraper, error) { - return manager.GetInstance().ScraperCache.ListMovieScrapers(), nil + return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeMovie}), nil } func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) { @@ -60,15 +64,29 @@ func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID strin return nil, nil } - return manager.GetInstance().ScraperCache.ScrapePerformerList(scraperID, query) + content, err := r.scraperCache().ScrapeName(ctx, scraperID, query, models.ScrapeContentTypePerformer) + if err != nil { + return nil, err + } + + return marshalScrapedPerformers(content) } func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - return manager.GetInstance().ScraperCache.ScrapePerformer(scraperID, scrapedPerformer) + content, err := r.scraperCache().ScrapeFragment(ctx, scraperID, scraper.Input{Performer: &scrapedPerformer}) + if err != nil { + return nil, err + } + return marshalScrapedPerformer(content) } func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { - return manager.GetInstance().ScraperCache.ScrapePerformerURL(url) + content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypePerformer) + if err != nil { + return nil, err + } + + return marshalScrapedPerformer(content) } func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string, query string) ([]*models.ScrapedScene, error) { @@ -76,44 +94,74 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string, return nil, nil } - return manager.GetInstance().ScraperCache.ScrapeSceneQuery(scraperID, query) + content, err := r.scraperCache().ScrapeName(ctx, scraperID, query, models.ScrapeContentTypeScene) + if err != nil { + return nil, err + } + + return marshalScrapedScenes(content) } func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) { id, err := strconv.Atoi(scene.ID) + if err != nil { + return nil, fmt.Errorf("%w: scene.ID is not an integer: '%s'", ErrInput, scene.ID) + } + + content, err := r.scraperCache().ScrapeID(ctx, scraperID, id, models.ScrapeContentTypeScene) if err != nil { return nil, err } - return manager.GetInstance().ScraperCache.ScrapeScene(scraperID, id) + return marshalScrapedScene(content) } func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) { - return manager.GetInstance().ScraperCache.ScrapeSceneURL(url) + content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeScene) + if err != nil { + return nil, err + } + + return marshalScrapedScene(content) } func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) { id, err := strconv.Atoi(gallery.ID) + if err != nil { + return nil, fmt.Errorf("%w: gallery id is not an integer: '%s'", ErrInput, gallery.ID) + } + + content, err := r.scraperCache().ScrapeID(ctx, scraperID, id, models.ScrapeContentTypeGallery) if err != nil { return nil, err } - return manager.GetInstance().ScraperCache.ScrapeGallery(scraperID, id) + return marshalScrapedGallery(content) } func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { - return manager.GetInstance().ScraperCache.ScrapeGalleryURL(url) + content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeGallery) + if err != nil { + return nil, err + } + + return marshalScrapedGallery(content) } func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { - return manager.GetInstance().ScraperCache.ScrapeMovieURL(url) + content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeMovie) + if err != nil { + return nil, err + } + + return marshalScrapedMovie(content) } func (r *queryResolver) QueryStashBoxScene(ctx context.Context, input models.StashBoxSceneQueryInput) ([]*models.ScrapedScene, error) { boxes := config.GetInstance().GetStashBoxes() if input.StashBoxIndex < 0 || input.StashBoxIndex >= len(boxes) { - return nil, fmt.Errorf("invalid stash_box_index %d", input.StashBoxIndex) + return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, input.StashBoxIndex) } client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager) @@ -133,7 +181,7 @@ func (r *queryResolver) QueryStashBoxPerformer(ctx context.Context, input models boxes := config.GetInstance().GetStashBoxes() if input.StashBoxIndex < 0 || input.StashBoxIndex >= len(boxes) { - return nil, fmt.Errorf("invalid stash_box_index %d", input.StashBoxIndex) + return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, input.StashBoxIndex) } client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager) @@ -153,7 +201,7 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) { boxes := config.GetInstance().GetStashBoxes() if index < 0 || index >= len(boxes) { - return nil, fmt.Errorf("invalid stash_box_index %d", index) + return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, index) } return stashbox.NewClient(*boxes[index], r.txnManager), nil @@ -161,7 +209,8 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) { func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) { if source.ScraperID != nil { - var singleScene *models.ScrapedScene + var c models.ScrapedContent + var content []models.ScrapedContent var err error switch { @@ -169,26 +218,24 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr var sceneID int sceneID, err = strconv.Atoi(*input.SceneID) if err != nil { - return nil, err + return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID) } - singleScene, err = manager.GetInstance().ScraperCache.ScrapeScene(*source.ScraperID, sceneID) + c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene) + content = []models.ScrapedContent{c} case input.SceneInput != nil: - singleScene, err = manager.GetInstance().ScraperCache.ScrapeSceneFragment(*source.ScraperID, *input.SceneInput) + c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput}) + content = []models.ScrapedContent{c} case input.Query != nil: - return manager.GetInstance().ScraperCache.ScrapeSceneQuery(*source.ScraperID, *input.Query) + content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene) default: - err = errors.New("scene_id, scene_input or query must be set") + err = fmt.Errorf("%w: scene_id, scene_input, or query must be set", ErrInput) } if err != nil { return nil, err } - if singleScene != nil { - return []*models.ScrapedScene{singleScene}, nil - } - - return nil, nil + return marshalScrapedScenes(content) } else if source.StashBoxIndex != nil { client, err := r.getStashBoxClient(*source.StashBoxIndex) if err != nil { @@ -201,10 +248,10 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr return client.QueryStashBoxScene(ctx, *input.Query) } - return nil, errors.New("scene_id or query must be set") + return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput) } - return nil, errors.New("scraper_id or stash_box_index must be set") + return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput) } func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) { @@ -225,20 +272,21 @@ func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.Scr func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSinglePerformerInput) ([]*models.ScrapedPerformer, error) { if source.ScraperID != nil { if input.PerformerInput != nil { - singlePerformer, err := manager.GetInstance().ScraperCache.ScrapePerformer(*source.ScraperID, *input.PerformerInput) + performer, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Performer: input.PerformerInput}) if err != nil { return nil, err } - if singlePerformer != nil { - return []*models.ScrapedPerformer{singlePerformer}, nil - } - - return nil, nil + return marshalScrapedPerformers([]models.ScrapedContent{performer}) } if input.Query != nil { - return manager.GetInstance().ScraperCache.ScrapePerformerList(*source.ScraperID, *input.Query) + content, err := r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypePerformer) + if err != nil { + return nil, err + } + + return marshalScrapedPerformers(content) } return nil, ErrNotImplemented @@ -288,38 +336,36 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source models } func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleGalleryInput) ([]*models.ScrapedGallery, error) { - if source.ScraperID != nil { - var singleGallery *models.ScrapedGallery - var err error - - switch { - case input.GalleryID != nil: - var galleryID int - galleryID, err = strconv.Atoi(*input.GalleryID) - if err != nil { - return nil, err - } - singleGallery, err = manager.GetInstance().ScraperCache.ScrapeGallery(*source.ScraperID, galleryID) - case input.GalleryInput != nil: - singleGallery, err = manager.GetInstance().ScraperCache.ScrapeGalleryFragment(*source.ScraperID, *input.GalleryInput) - default: - return nil, ErrNotImplemented - } - - if err != nil { - return nil, err - } - - if singleGallery != nil { - return []*models.ScrapedGallery{singleGallery}, nil - } - - return nil, nil - } else if source.StashBoxIndex != nil { + if source.StashBoxIndex != nil { return nil, ErrNotSupported } - return nil, errors.New("scraper_id must be set") + if source.ScraperID == nil { + return nil, fmt.Errorf("%w: scraper_id must be set", ErrInput) + } + + var c models.ScrapedContent + + switch { + case input.GalleryID != nil: + galleryID, err := strconv.Atoi(*input.GalleryID) + if err != nil { + return nil, fmt.Errorf("%w: gallery id is not an integer: '%s'", ErrInput, *input.GalleryID) + } + c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, galleryID, models.ScrapeContentTypeGallery) + if err != nil { + return nil, err + } + return marshalScrapedGalleries([]models.ScrapedContent{c}) + case input.GalleryInput != nil: + c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Gallery: input.GalleryInput}) + if err != nil { + return nil, err + } + return marshalScrapedGalleries([]models.ScrapedContent{c}) + default: + return nil, ErrNotImplemented + } } func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) { diff --git a/pkg/api/scraped_content.go b/pkg/api/scraped_content.go new file mode 100644 index 000000000..4a63cfb60 --- /dev/null +++ b/pkg/api/scraped_content.go @@ -0,0 +1,127 @@ +package api + +import ( + "fmt" + + "github.com/stashapp/stash/pkg/models" +) + +// marshalScrapedScenes converts ScrapedContent into ScrapedScene. If conversion fails, an +// error is returned to the caller. +func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedScene, error) { + var ret []*models.ScrapedScene + for _, c := range content { + if c == nil { + ret = append(ret, nil) + continue + } + + if s, ok := c.(*models.ScrapedScene); ok { + ret = append(ret, s) + } else { + return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedScene", models.ErrConversion) + } + } + + return ret, nil +} + +// marshalScrapedPerformers converts ScrapedContent into ScrapedPerformer. If conversion +// fails, an error is returned to the caller. +func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.ScrapedPerformer, error) { + var ret []*models.ScrapedPerformer + for _, c := range content { + if c == nil { + ret = append(ret, nil) + continue + } + + if p, ok := c.(*models.ScrapedPerformer); ok { + ret = append(ret, p) + } else { + return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedPerformer", models.ErrConversion) + } + } + + return ret, nil +} + +// marshalScrapedGalleries converts ScrapedContent into ScrapedGallery. If +// conversion fails, an error is returned. +func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.ScrapedGallery, error) { + var ret []*models.ScrapedGallery + for _, c := range content { + if c == nil { + ret = append(ret, nil) + continue + } + + if g, ok := c.(*models.ScrapedGallery); ok { + ret = append(ret, g) + } else { + return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedGallery", models.ErrConversion) + } + } + + return ret, nil +} + +// marshalScrapedMovies converts ScrapedContent into ScrapedMovie. If conversion +// fails, an error is returned. +func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMovie, error) { + var ret []*models.ScrapedMovie + for _, c := range content { + if c == nil { + ret = append(ret, nil) + continue + } + + if m, ok := c.(*models.ScrapedMovie); ok { + ret = append(ret, m) + } else { + return nil, fmt.Errorf("%w: cannot turn ScrapedConetnt into ScrapedMovie", models.ErrConversion) + } + } + + return ret, nil +} + +// marshalScrapedPerformer will marshal a single performer +func marshalScrapedPerformer(content models.ScrapedContent) (*models.ScrapedPerformer, error) { + p, err := marshalScrapedPerformers([]models.ScrapedContent{content}) + if err != nil { + return nil, err + } + + return p[0], nil +} + +// marshalScrapedScene will marshal a single scraped scene +func marshalScrapedScene(content models.ScrapedContent) (*models.ScrapedScene, error) { + s, err := marshalScrapedScenes([]models.ScrapedContent{content}) + if err != nil { + return nil, err + } + + return s[0], nil +} + +// marshalScrapedGallery will marshal a single scraped gallery +func marshalScrapedGallery(content models.ScrapedContent) (*models.ScrapedGallery, error) { + g, err := marshalScrapedGalleries([]models.ScrapedContent{content}) + if err != nil { + return nil, err + } + + return g[0], nil +} + +// marshalScrapedMovie will marshal a single scraped movie +func marshalScrapedMovie(content models.ScrapedContent) (*models.ScrapedMovie, error) { + m, err := marshalScrapedMovies([]models.ScrapedContent{content}) + if err != nil { + return nil, err + } + + return m[0], nil +} diff --git a/pkg/identify/identify.go b/pkg/identify/identify.go index d64a36fe6..2520618b3 100644 --- a/pkg/identify/identify.go +++ b/pkg/identify/identify.go @@ -12,7 +12,7 @@ import ( ) type SceneScraper interface { - ScrapeScene(sceneID int) (*models.ScrapedScene, error) + ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) } type SceneUpdatePostHookExecutor interface { @@ -34,7 +34,7 @@ type SceneIdentifier struct { } func (t *SceneIdentifier) Identify(ctx context.Context, txnManager models.TransactionManager, scene *models.Scene) error { - result, err := t.scrapeScene(scene) + result, err := t.scrapeScene(ctx, scene) if err != nil { return err } @@ -57,11 +57,11 @@ type scrapeResult struct { source ScraperSource } -func (t *SceneIdentifier) scrapeScene(scene *models.Scene) (*scrapeResult, error) { +func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene) (*scrapeResult, error) { // iterate through the input sources for _, source := range t.Sources { // scrape using the source - scraped, err := source.Scraper.ScrapeScene(scene.ID) + scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID) if err != nil { return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err) } diff --git a/pkg/identify/identify_test.go b/pkg/identify/identify_test.go index a598c04bb..e1366f997 100644 --- a/pkg/identify/identify_test.go +++ b/pkg/identify/identify_test.go @@ -17,7 +17,7 @@ type mockSceneScraper struct { results map[int]*models.ScrapedScene } -func (s mockSceneScraper) ScrapeScene(sceneID int) (*models.ScrapedScene, error) { +func (s mockSceneScraper) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) { if utils.IntInclude(s.errIDs, sceneID) { return nil, errors.New("scrape scene error") } diff --git a/pkg/manager/task_identify.go b/pkg/manager/task_identify.go index 0e8b789bf..e7e16df76 100644 --- a/pkg/manager/task_identify.go +++ b/pkg/manager/task_identify.go @@ -211,7 +211,7 @@ type stashboxSource struct { endpoint string } -func (s stashboxSource) ScrapeScene(sceneID int) (*models.ScrapedScene, error) { +func (s stashboxSource) ScrapeScene(_ context.Context, sceneID int) (*models.ScrapedScene, error) { results, err := s.FindStashBoxScenesByFingerprintsFlat([]string{strconv.Itoa(sceneID)}) if err != nil { return nil, fmt.Errorf("error querying stash-box using scene ID %d: %w", sceneID, err) @@ -233,8 +233,17 @@ type scraperSource struct { scraperID string } -func (s scraperSource) ScrapeScene(sceneID int) (*models.ScrapedScene, error) { - return s.cache.ScrapeScene(s.scraperID, sceneID) +func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) { + content, err := s.cache.ScrapeID(ctx, s.scraperID, sceneID, models.ScrapeContentTypeScene) + if err != nil { + return nil, err + } + + if scene, ok := content.(*models.ScrapedScene); ok { + return scene, nil + } + + return nil, errors.New("could not convert content to scene") } func (s scraperSource) String() string { diff --git a/pkg/models/errors.go b/pkg/models/errors.go index 54f5e1d00..3af2ff84c 100644 --- a/pkg/models/errors.go +++ b/pkg/models/errors.go @@ -2,4 +2,10 @@ package models import "errors" -var ErrNotFound = errors.New("not found") +var ( + // ErrNotFound signifies entities which are not found + ErrNotFound = errors.New("not found") + + // ErrConversion signifies conversion errors + ErrConversion = errors.New("conversion error") +) diff --git a/pkg/scraper/action.go b/pkg/scraper/action.go index d8c08da97..c49be9e2c 100644 --- a/pkg/scraper/action.go +++ b/pkg/scraper/action.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "net/http" "github.com/stashapp/stash/pkg/models" @@ -24,20 +25,20 @@ func (e scraperAction) IsValid() bool { } type scraperActionImpl interface { - scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) + scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) - scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) + scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) - scrapeScenesByName(name string) ([]*models.ScrapedScene, error) - scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) - scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) - scrapeSceneByURL(url string) (*models.ScrapedScene, error) + scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) + scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) + scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) + scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) - scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) + scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) - scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) + scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) - scrapeMovieByURL(url string) (*models.ScrapedMovie, error) + scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) } func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl { diff --git a/pkg/scraper/autotag.go b/pkg/scraper/autotag.go index 73a836224..92bc0a239 100644 --- a/pkg/scraper/autotag.go +++ b/pkg/scraper/autotag.go @@ -2,8 +2,8 @@ package scraper import ( "context" - "errors" "fmt" + "net/http" "strconv" "github.com/stashapp/stash/pkg/match" @@ -16,14 +16,12 @@ const ( autoTagScraperName = "Auto Tag" ) -var errNotSupported = errors.New("not supported") - type autotagScraper struct { txnManager models.TransactionManager globalConfig GlobalConfig } -func (s *autotagScraper) matchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) { +func autotagMatchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) { p, err := match.PathToPerformers(path, performerReader) if err != nil { return nil, fmt.Errorf("error matching performers: %w", err) @@ -47,7 +45,7 @@ func (s *autotagScraper) matchPerformers(path string, performerReader models.Per return ret, nil } -func (s *autotagScraper) matchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) { +func autotagMatchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) { st, err := match.PathToStudios(path, studioReader) if err != nil { return nil, fmt.Errorf("error matching studios: %w", err) @@ -64,7 +62,7 @@ func (s *autotagScraper) matchStudio(path string, studioReader models.StudioRead return nil, nil } -func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) { +func autotagMatchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) { t, err := match.PathToTags(path, tagReader) if err != nil { return nil, fmt.Errorf("error matching tags: %w", err) @@ -85,32 +83,24 @@ func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]* return ret, nil } -type autotagSceneScraper struct { - *autotagScraper -} - -func (c *autotagSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) { - return nil, errNotSupported -} - -func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) { +func (s autotagScraper) viaScene(ctx context.Context, _client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) { var ret *models.ScrapedScene // populate performers, studio and tags based on scene path - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := s.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { path := scene.Path - performers, err := c.matchPerformers(path, r.Performer()) + performers, err := autotagMatchPerformers(path, r.Performer()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaScene: %w", err) } - studio, err := c.matchStudio(path, r.Studio()) + studio, err := autotagMatchStudio(path, r.Studio()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaScene: %w", err) } - tags, err := c.matchTags(path, r.Tag()) + tags, err := autotagMatchTags(path, r.Tag()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaScene: %w", err) } if len(performers) > 0 || studio != nil || len(tags) > 0 { @@ -129,19 +119,7 @@ func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.Scrape return ret, nil } -func (c *autotagSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { - return nil, errNotSupported -} - -func (c *autotagSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) { - return nil, errNotSupported -} - -type autotagGalleryScraper struct { - *autotagScraper -} - -func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { +func (s autotagScraper) viaGallery(ctx context.Context, _client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) { if !gallery.Path.Valid { // not valid for non-path-based galleries return nil, nil @@ -150,20 +128,20 @@ func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*model var ret *models.ScrapedGallery // populate performers, studio and tags based on scene path - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { + if err := s.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { path := gallery.Path.String - performers, err := c.matchPerformers(path, r.Performer()) + performers, err := autotagMatchPerformers(path, r.Performer()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaGallery: %w", err) } - studio, err := c.matchStudio(path, r.Studio()) + studio, err := autotagMatchStudio(path, r.Studio()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaGallery: %w", err) } - tags, err := c.matchTags(path, r.Tag()) + tags, err := autotagMatchTags(path, r.Tag()) if err != nil { - return err + return fmt.Errorf("autotag scraper viaGallery: %w", err) } if len(performers) > 0 || studio != nil || len(tags) > 0 { @@ -182,12 +160,36 @@ func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*model return ret, nil } -func (c *autotagGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - return nil, errNotSupported +func (s autotagScraper) supports(ty models.ScrapeContentType) bool { + switch ty { + case models.ScrapeContentTypeScene: + return true + case models.ScrapeContentTypeGallery: + return true + } + + return false } -func (c *autotagGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) { - return nil, errNotSupported +func (s autotagScraper) supportsURL(url string, ty models.ScrapeContentType) bool { + return false +} + +func (s autotagScraper) spec() models.Scraper { + supportedScrapes := []models.ScrapeType{ + models.ScrapeTypeFragment, + } + + return models.Scraper{ + ID: autoTagScraperID, + Name: autoTagScraperName, + Scene: &models.ScraperSpec{ + SupportedScrapes: supportedScrapes, + }, + Gallery: &models.ScraperSpec{ + SupportedScrapes: supportedScrapes, + }, + } } func getAutoTagScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { @@ -196,23 +198,5 @@ func getAutoTagScraper(txnManager models.TransactionManager, globalConfig Global globalConfig: globalConfig, } - supportedScrapes := []models.ScrapeType{ - models.ScrapeTypeFragment, - } - - return scraper{ - ID: autoTagScraperID, - Spec: &models.Scraper{ - ID: autoTagScraperID, - Name: autoTagScraperName, - Scene: &models.ScraperSpec{ - SupportedScrapes: supportedScrapes, - }, - Gallery: &models.ScraperSpec{ - SupportedScrapes: supportedScrapes, - }, - }, - Scene: &autotagSceneScraper{&base}, - Gallery: &autotagGalleryScraper{&base}, - } + return base } diff --git a/pkg/scraper/cache.go b/pkg/scraper/cache.go new file mode 100644 index 000000000..7578317b9 --- /dev/null +++ b/pkg/scraper/cache.go @@ -0,0 +1,298 @@ +package scraper + +import ( + "context" + "crypto/tls" + "fmt" + "net/http" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/models" + "github.com/stashapp/stash/pkg/utils" +) + +const ( + // scrapeGetTimeout is the timeout for scraper HTTP requests. Includes transfer time. + // We may want to bump this at some point and use local context-timeouts if more granularity + // is needed. + scrapeGetTimeout = time.Second * 60 + + // maxIdleConnsPerHost is the maximum number of idle connections the HTTP client will + // keep on a per-host basis. + maxIdleConnsPerHost = 8 + + // maxRedirects defines the maximum number of redirects the HTTP client will follow + maxRedirects = 20 +) + +// GlobalConfig contains the global scraper options. +type GlobalConfig interface { + GetScraperUserAgent() string + GetScrapersPath() string + GetScraperCDPPath() string + GetScraperCertCheck() bool +} + +func isCDPPathHTTP(c GlobalConfig) bool { + return strings.HasPrefix(c.GetScraperCDPPath(), "http://") || strings.HasPrefix(c.GetScraperCDPPath(), "https://") +} + +func isCDPPathWS(c GlobalConfig) bool { + return strings.HasPrefix(c.GetScraperCDPPath(), "ws://") +} + +// Cache stores the database of scrapers +type Cache struct { + client *http.Client + scrapers map[string]scraper // Scraper ID -> Scraper + globalConfig GlobalConfig + txnManager models.TransactionManager +} + +// newClient creates a scraper-local http client we use throughout the scraper subsystem. +func newClient(gc GlobalConfig) *http.Client { + client := &http.Client{ + Transport: &http.Transport{ // ignore insecure certificates + TLSClientConfig: &tls.Config{InsecureSkipVerify: !gc.GetScraperCertCheck()}, + MaxIdleConnsPerHost: maxIdleConnsPerHost, + }, + Timeout: scrapeGetTimeout, + // defaultCheckRedirect code with max changed from 10 to maxRedirects + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= maxRedirects { + return fmt.Errorf("%w: gave up after %d redirects", ErrMaxRedirects, maxRedirects) + } + return nil + }, + } + + return client +} + +// NewCache returns a new Cache loading scraper configurations from the +// scraper path provided in the global config object. It returns a new +// instance and an error if the scraper directory could not be loaded. +// +// Scraper configurations are loaded from yml files in the provided scrapers +// directory and any subdirectories. +func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) { + // HTTP Client setup + client := newClient(globalConfig) + + scrapers, err := loadScrapers(globalConfig, txnManager) + if err != nil { + return nil, err + } + + return &Cache{ + client: client, + globalConfig: globalConfig, + scrapers: scrapers, + txnManager: txnManager, + }, nil +} + +func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) (map[string]scraper, error) { + path := globalConfig.GetScrapersPath() + scrapers := make(map[string]scraper) + + // Add built-in scrapers + freeOnes := getFreeonesScraper(txnManager, globalConfig) + autoTag := getAutoTagScraper(txnManager, globalConfig) + scrapers[freeOnes.spec().ID] = freeOnes + scrapers[autoTag.spec().ID] = autoTag + + logger.Debugf("Reading scraper configs from %s", path) + + scraperFiles := []string{} + err := utils.SymWalk(path, func(fp string, f os.FileInfo, err error) error { + if filepath.Ext(fp) == ".yml" { + c, err := loadConfigFromYAMLFile(fp) + if err != nil { + logger.Errorf("Error loading scraper %s: %v", fp, err) + } else { + scraper := newGroupScraper(*c, txnManager, globalConfig) + scrapers[scraper.spec().ID] = scraper + } + scraperFiles = append(scraperFiles, fp) + } + return nil + }) + + if err != nil { + logger.Errorf("Error reading scraper configs: %v", err) + return nil, err + } + + return scrapers, nil +} + +// ReloadScrapers clears the scraper cache and reloads from the scraper path. +// In the event of an error during loading, the cache will be left empty. +func (c *Cache) ReloadScrapers() error { + c.scrapers = nil + scrapers, err := loadScrapers(c.globalConfig, c.txnManager) + if err != nil { + return err + } + + c.scrapers = scrapers + return nil +} + +// ListScrapers lists scrapers matching one of the given types. +// Returns a list of scrapers, sorted by their ID. +func (c Cache) ListScrapers(tys []models.ScrapeContentType) []*models.Scraper { + var ret []*models.Scraper + for _, s := range c.scrapers { + for _, t := range tys { + if s.supports(t) { + spec := s.spec() + ret = append(ret, &spec) + break + } + } + } + + sort.Slice(ret, func(i, j int) bool { + return ret[i].ID < ret[j].ID + }) + + return ret +} + +// GetScraper returns the scraper matching the provided id. +func (c Cache) GetScraper(scraperID string) *models.Scraper { + s := c.findScraper(scraperID) + if s != nil { + spec := s.spec() + return &spec + } + + return nil +} + +func (c Cache) findScraper(scraperID string) scraper { + s, ok := c.scrapers[scraperID] + if ok { + return s + } + + return nil +} + +func (c Cache) ScrapeName(ctx context.Context, id, query string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { + // find scraper with the provided id + s := c.findScraper(id) + if s == nil { + return nil, fmt.Errorf("%w: id %s", ErrNotFound, id) + } + if !s.supports(ty) { + return nil, fmt.Errorf("%w: cannot use scraper %s as a %v scraper", ErrNotSupported, id, ty) + } + + ns, ok := s.(nameScraper) + if !ok { + return nil, fmt.Errorf("%w: cannot use scraper %s to scrape by name", ErrNotSupported, id) + } + + return ns.viaName(ctx, c.client, query, ty) +} + +// ScrapeFragment uses the given fragment input to scrape +func (c Cache) ScrapeFragment(ctx context.Context, id string, input Input) (models.ScrapedContent, error) { + s := c.findScraper(id) + if s == nil { + return nil, fmt.Errorf("%w: id %s", ErrNotFound, id) + } + + fs, ok := s.(fragmentScraper) + if !ok { + return nil, fmt.Errorf("%w: cannot use scraper %s as a fragment scraper", ErrNotSupported, id) + } + + content, err := fs.viaFragment(ctx, c.client, input) + if err != nil { + return nil, fmt.Errorf("error while fragment scraping with scraper %s: %w", id, err) + } + + return c.postScrape(ctx, content) +} + +// ScrapeURL scrapes a given url for the given content. Searches the scraper cache +// and picks the first scraper capable of scraping the given url into the desired +// content. Returns the scraped content or an error if the scrape fails. +func (c Cache) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + for _, s := range c.scrapers { + if s.supportsURL(url, ty) { + ul, ok := s.(urlScraper) + if !ok { + return nil, fmt.Errorf("%w: cannot use scraper %s as an url scraper", ErrNotSupported, s.spec().ID) + } + ret, err := ul.viaURL(ctx, c.client, url, ty) + if err != nil { + return nil, err + } + + if ret == nil { + return ret, nil + } + + return c.postScrape(ctx, ret) + } + } + + return nil, nil +} + +func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models.ScrapeContentType) (models.ScrapedContent, error) { + s := c.findScraper(scraperID) + if s == nil { + return nil, fmt.Errorf("%w: id %s", ErrNotFound, scraperID) + } + + if !s.supports(ty) { + return nil, fmt.Errorf("%w: cannot use scraper %s to scrape %v content", ErrNotSupported, scraperID, ty) + } + + var ret models.ScrapedContent + switch ty { + case models.ScrapeContentTypeScene: + ss, ok := s.(sceneScraper) + if !ok { + return nil, fmt.Errorf("%w: cannot use scraper %s as a scene scraper", ErrNotSupported, scraperID) + } + + scene, err := getScene(id, c.txnManager) + if err != nil { + return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err) + } + + ret, err = ss.viaScene(ctx, c.client, scene) + if err != nil { + return nil, fmt.Errorf("scraper %s: %w", scraperID, err) + } + case models.ScrapeContentTypeGallery: + gs, ok := s.(galleryScraper) + if !ok { + return nil, fmt.Errorf("%w: cannot use scraper %s as a gallery scraper", ErrNotSupported, scraperID) + } + + gallery, err := getGallery(id, c.txnManager) + if err != nil { + return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err) + } + + ret, err = gs.viaGallery(ctx, c.client, gallery) + if err != nil { + return nil, fmt.Errorf("scraper %s: %w", scraperID, err) + } + } + + return c.postScrape(ctx, ret) +} diff --git a/pkg/scraper/config.go b/pkg/scraper/config.go index ee12c7e90..4782fb47b 100644 --- a/pkg/scraper/config.go +++ b/pkg/scraper/config.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" + "github.com/stashapp/stash/pkg/models" "gopkg.in/yaml.v2" ) @@ -232,55 +233,118 @@ func loadConfigFromYAMLFile(path string) (*config, error) { return ret, nil } -func (c config) supportsPerformers() bool { - return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0 +func (c config) spec() models.Scraper { + ret := models.Scraper{ + ID: c.ID, + Name: c.Name, + } + + performer := models.ScraperSpec{} + if c.PerformerByName != nil { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName) + } + if c.PerformerByFragment != nil { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment) + } + if len(c.PerformerByURL) > 0 { + performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.PerformerByURL { + performer.Urls = append(performer.Urls, v.URL...) + } + } + + if len(performer.SupportedScrapes) > 0 { + ret.Performer = &performer + } + + scene := models.ScraperSpec{} + if c.SceneByFragment != nil { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment) + } + if c.SceneByName != nil && c.SceneByQueryFragment != nil { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName) + } + if len(c.SceneByURL) > 0 { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.SceneByURL { + scene.Urls = append(scene.Urls, v.URL...) + } + } + + if len(scene.SupportedScrapes) > 0 { + ret.Scene = &scene + } + + gallery := models.ScraperSpec{} + if c.GalleryByFragment != nil { + gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment) + } + if len(c.GalleryByURL) > 0 { + gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.GalleryByURL { + gallery.Urls = append(gallery.Urls, v.URL...) + } + } + + if len(gallery.SupportedScrapes) > 0 { + ret.Gallery = &gallery + } + + movie := models.ScraperSpec{} + if len(c.MovieByURL) > 0 { + movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.MovieByURL { + movie.Urls = append(movie.Urls, v.URL...) + } + } + + if len(movie.SupportedScrapes) > 0 { + ret.Movie = &movie + } + + return ret } -func (c config) matchesPerformerURL(url string) bool { - for _, scraper := range c.PerformerByURL { - if scraper.matchesURL(url) { - return true - } - } - - return false -} - -func (c config) supportsScenes() bool { - return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0 -} - -func (c config) supportsGalleries() bool { - return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0 -} - -func (c config) matchesSceneURL(url string) bool { - for _, scraper := range c.SceneByURL { - if scraper.matchesURL(url) { - return true - } - } - - return false -} - -func (c config) matchesGalleryURL(url string) bool { - for _, scraper := range c.GalleryByURL { - if scraper.matchesURL(url) { - return true - } - } - return false -} - -func (c config) supportsMovies() bool { - return len(c.MovieByURL) > 0 -} - -func (c config) matchesMovieURL(url string) bool { - for _, scraper := range c.MovieByURL { - if scraper.matchesURL(url) { - return true +func (c config) supports(ty models.ScrapeContentType) bool { + switch ty { + case models.ScrapeContentTypePerformer: + return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0 + case models.ScrapeContentTypeScene: + return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0 + case models.ScrapeContentTypeGallery: + return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0 + case models.ScrapeContentTypeMovie: + return len(c.MovieByURL) > 0 + } + + panic("Unhandled ScrapeContentType") +} + +func (c config) matchesURL(url string, ty models.ScrapeContentType) bool { + switch ty { + case models.ScrapeContentTypePerformer: + for _, scraper := range c.PerformerByURL { + if scraper.matchesURL(url) { + return true + } + } + case models.ScrapeContentTypeScene: + for _, scraper := range c.SceneByURL { + if scraper.matchesURL(url) { + return true + } + } + case models.ScrapeContentTypeGallery: + for _, scraper := range c.GalleryByURL { + if scraper.matchesURL(url) { + return true + } + } + case models.ScrapeContentTypeMovie: + for _, scraper := range c.MovieByURL { + if scraper.matchesURL(url) { + return true + } } } diff --git a/pkg/scraper/config_scraper.go b/pkg/scraper/config_scraper.go deleted file mode 100644 index 01ccd4f2b..000000000 --- a/pkg/scraper/config_scraper.go +++ /dev/null @@ -1,289 +0,0 @@ -package scraper - -import ( - "net/http" - - "github.com/stashapp/stash/pkg/models" -) - -type configSceneScraper struct { - *configScraper -} - -func (c *configSceneScraper) matchesURL(url string) bool { - return c.config.matchesSceneURL(url) -} - -func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) { - if c.config.SceneByName != nil { - s := c.config.getScraper(*c.config.SceneByName, c.client, c.txnManager, c.globalConfig) - return s.scrapeScenesByName(name) - } - - return nil, nil -} - -func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) { - if c.config.SceneByFragment != nil { - s := c.config.getScraper(*c.config.SceneByFragment, c.client, c.txnManager, c.globalConfig) - return s.scrapeSceneByScene(scene) - } - - return nil, nil -} - -func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { - if c.config.SceneByQueryFragment != nil { - s := c.config.getScraper(*c.config.SceneByQueryFragment, c.client, c.txnManager, c.globalConfig) - return s.scrapeSceneByFragment(scene) - } - - return nil, nil -} - -func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) { - for _, scraper := range c.config.SceneByURL { - if scraper.matchesURL(url) { - s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig) - ret, err := s.scrapeSceneByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -type configPerformerScraper struct { - *configScraper -} - -func (c *configPerformerScraper) matchesURL(url string) bool { - return c.config.matchesPerformerURL(url) -} - -func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) { - if c.config.PerformerByName != nil { - s := c.config.getScraper(*c.config.PerformerByName, c.client, c.txnManager, c.globalConfig) - return s.scrapePerformersByName(name) - } - - return nil, nil -} - -func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - if c.config.PerformerByFragment != nil { - s := c.config.getScraper(*c.config.PerformerByFragment, c.client, c.txnManager, c.globalConfig) - return s.scrapePerformerByFragment(scrapedPerformer) - } - - // try to match against URL if present - if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" { - return c.scrapeByURL(*scrapedPerformer.URL) - } - - return nil, nil -} - -func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) { - for _, scraper := range c.config.PerformerByURL { - if scraper.matchesURL(url) { - s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig) - ret, err := s.scrapePerformerByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -type configGalleryScraper struct { - *configScraper -} - -func (c *configGalleryScraper) matchesURL(url string) bool { - return c.config.matchesGalleryURL(url) -} - -func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { - if c.config.GalleryByFragment != nil { - s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig) - return s.scrapeGalleryByGallery(gallery) - } - - return nil, nil -} - -func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - if c.config.GalleryByFragment != nil { - // TODO - this should be galleryByQueryFragment - s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig) - return s.scrapeGalleryByFragment(gallery) - } - - return nil, nil -} - -func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) { - for _, scraper := range c.config.GalleryByURL { - if scraper.matchesURL(url) { - s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig) - ret, err := s.scrapeGalleryByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -type configMovieScraper struct { - *configScraper -} - -func (c *configMovieScraper) matchesURL(url string) bool { - return c.config.matchesMovieURL(url) -} - -func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) { - for _, scraper := range c.config.MovieByURL { - if scraper.matchesURL(url) { - s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig) - ret, err := s.scrapeMovieByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - return ret, nil - } - } - } - - return nil, nil -} - -type configScraper struct { - config config - client *http.Client - txnManager models.TransactionManager - globalConfig GlobalConfig -} - -func createScraperFromConfig(c config, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { - base := configScraper{ - client: client, - config: c, - txnManager: txnManager, - globalConfig: globalConfig, - } - - ret := scraper{ - ID: c.ID, - Spec: configScraperSpec(c), - } - - // only set fields if supported - if c.supportsPerformers() { - ret.Performer = &configPerformerScraper{&base} - } - if c.supportsGalleries() { - ret.Gallery = &configGalleryScraper{&base} - } - if c.supportsMovies() { - ret.Movie = &configMovieScraper{&base} - } - if c.supportsScenes() { - ret.Scene = &configSceneScraper{&base} - } - - return ret -} - -func configScraperSpec(c config) *models.Scraper { - ret := models.Scraper{ - ID: c.ID, - Name: c.Name, - } - - performer := models.ScraperSpec{} - if c.PerformerByName != nil { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName) - } - if c.PerformerByFragment != nil { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment) - } - if len(c.PerformerByURL) > 0 { - performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.PerformerByURL { - performer.Urls = append(performer.Urls, v.URL...) - } - } - - if len(performer.SupportedScrapes) > 0 { - ret.Performer = &performer - } - - scene := models.ScraperSpec{} - if c.SceneByFragment != nil { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment) - } - if c.SceneByName != nil && c.SceneByQueryFragment != nil { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName) - } - if len(c.SceneByURL) > 0 { - scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.SceneByURL { - scene.Urls = append(scene.Urls, v.URL...) - } - } - - if len(scene.SupportedScrapes) > 0 { - ret.Scene = &scene - } - - gallery := models.ScraperSpec{} - if c.GalleryByFragment != nil { - gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment) - } - if len(c.GalleryByURL) > 0 { - gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.GalleryByURL { - gallery.Urls = append(gallery.Urls, v.URL...) - } - } - - if len(gallery.SupportedScrapes) > 0 { - ret.Gallery = &gallery - } - - movie := models.ScraperSpec{} - if len(c.MovieByURL) > 0 { - movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL) - for _, v := range c.MovieByURL { - movie.Urls = append(movie.Urls, v.URL...) - } - } - - if len(movie.SupportedScrapes) > 0 { - ret.Movie = &movie - } - - return &ret -} diff --git a/pkg/scraper/freeones.go b/pkg/scraper/freeones.go index c50235cc4..7b6c81649 100644 --- a/pkg/scraper/freeones.go +++ b/pkg/scraper/freeones.go @@ -1,7 +1,6 @@ package scraper import ( - "net/http" "strings" "github.com/stashapp/stash/pkg/logger" @@ -47,7 +46,7 @@ xPathScrapers: - regex: \sBio\s*$ with: "" URL: //link[@rel="alternate" and @hreflang="x-default"]/@href - Twitter: //a[not(starts-with(@href,'https://twitter.com/FreeOnes'))][contains(@href,'twitter.com/')]/@href + Twitter: //a[not(starts-with(@href,'https://twitter.com/FreeOnes'))][contains(@href,'twitter.com/')]/@href Instagram: //a[contains(@href,'instagram.com/')]/@href Birthdate: selector: //span[contains(text(),'Born On')] @@ -124,7 +123,7 @@ xPathScrapers: # Last updated April 13, 2021 ` -func getFreeonesScraper(client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { +func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { yml := freeonesScraperConfig c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml)) @@ -132,5 +131,5 @@ func getFreeonesScraper(client *http.Client, txnManager models.TransactionManage logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error()) } - return createScraperFromConfig(*c, client, txnManager, globalConfig) + return newGroupScraper(*c, txnManager, globalConfig) } diff --git a/pkg/scraper/group.go b/pkg/scraper/group.go new file mode 100644 index 000000000..583f4dd87 --- /dev/null +++ b/pkg/scraper/group.go @@ -0,0 +1,186 @@ +package scraper + +import ( + "context" + "fmt" + "net/http" + + "github.com/stashapp/stash/pkg/models" +) + +type group struct { + config config + + txnManager models.TransactionManager + globalConf GlobalConfig +} + +func newGroupScraper(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper { + return group{ + config: c, + txnManager: txnManager, + globalConf: globalConfig, + } +} + +func (g group) spec() models.Scraper { + return g.config.spec() +} + +// fragmentScraper finds an appropriate fragment scraper based on input. +func (g group) fragmentScraper(input Input) *scraperTypeConfig { + switch { + case input.Performer != nil: + return g.config.PerformerByFragment + case input.Gallery != nil: + // TODO - this should be galleryByQueryFragment + return g.config.GalleryByFragment + case input.Scene != nil: + return g.config.SceneByQueryFragment + } + + return nil +} + +// scrapeFragmentInput analyzes the input and calls an appropriate scraperActionImpl +func scrapeFragmentInput(ctx context.Context, input Input, s scraperActionImpl) (models.ScrapedContent, error) { + switch { + case input.Performer != nil: + return s.scrapePerformerByFragment(*input.Performer) + case input.Gallery != nil: + return s.scrapeGalleryByFragment(*input.Gallery) + case input.Scene != nil: + return s.scrapeSceneByFragment(ctx, *input.Scene) + } + + return nil, ErrNotSupported +} + +func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error) { + stc := g.fragmentScraper(input) + if stc == nil { + // If there's no performer fragment scraper in the group, we try to use + // the URL scraper. Check if there's an URL in the input, and then shift + // to an URL scrape if it's present. + if input.Performer != nil && input.Performer.URL != nil && *input.Performer.URL != "" { + return g.viaURL(ctx, client, *input.Performer.URL, models.ScrapeContentTypePerformer) + } + + return nil, ErrNotSupported + } + + s := g.config.getScraper(*stc, client, g.txnManager, g.globalConf) + return scrapeFragmentInput(ctx, input, s) +} + +func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) { + if g.config.SceneByFragment == nil { + return nil, ErrNotSupported + } + + s := g.config.getScraper(*g.config.SceneByFragment, client, g.txnManager, g.globalConf) + return s.scrapeSceneByScene(ctx, scene) +} + +func (g group) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) { + if g.config.GalleryByFragment == nil { + return nil, ErrNotSupported + } + + s := g.config.getScraper(*g.config.GalleryByFragment, client, g.txnManager, g.globalConf) + return s.scrapeGalleryByGallery(ctx, gallery) +} + +func loadUrlCandidates(c config, ty models.ScrapeContentType) []*scrapeByURLConfig { + switch ty { + case models.ScrapeContentTypePerformer: + return c.PerformerByURL + case models.ScrapeContentTypeScene: + return c.SceneByURL + case models.ScrapeContentTypeMovie: + return c.MovieByURL + case models.ScrapeContentTypeGallery: + return c.GalleryByURL + } + + panic("loadUrlCandidates: unreachable") +} + +func scrapeByUrl(ctx context.Context, url string, s scraperActionImpl, ty models.ScrapeContentType) (models.ScrapedContent, error) { + switch ty { + case models.ScrapeContentTypePerformer: + return s.scrapePerformerByURL(ctx, url) + case models.ScrapeContentTypeScene: + return s.scrapeSceneByURL(ctx, url) + case models.ScrapeContentTypeMovie: + return s.scrapeMovieByURL(ctx, url) + case models.ScrapeContentTypeGallery: + return s.scrapeGalleryByURL(ctx, url) + } + + panic("scrapeByUrl: unreachable") +} + +func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) { + candidates := loadUrlCandidates(g.config, ty) + for _, scraper := range candidates { + if scraper.matchesURL(url) { + s := g.config.getScraper(scraper.scraperTypeConfig, client, g.txnManager, g.globalConf) + ret, err := scrapeByUrl(ctx, url, s, ty) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} + +func (g group) viaName(ctx context.Context, client *http.Client, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) { + switch ty { + case models.ScrapeContentTypePerformer: + if g.config.PerformerByName == nil { + break + } + + s := g.config.getScraper(*g.config.PerformerByName, client, g.txnManager, g.globalConf) + performers, err := s.scrapePerformersByName(ctx, name) + if err != nil { + return nil, err + } + content := make([]models.ScrapedContent, len(performers)) + for i := range performers { + content[i] = performers[i] + } + return content, nil + case models.ScrapeContentTypeScene: + if g.config.SceneByName == nil { + break + } + + s := g.config.getScraper(*g.config.SceneByName, client, g.txnManager, g.globalConf) + scenes, err := s.scrapeScenesByName(ctx, name) + if err != nil { + return nil, err + } + content := make([]models.ScrapedContent, len(scenes)) + for i := range scenes { + content[i] = scenes[i] + } + return content, nil + } + + return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty) +} + +func (g group) supports(ty models.ScrapeContentType) bool { + return g.config.supports(ty) +} + +func (g group) supportsURL(url string, ty models.ScrapeContentType) bool { + return g.config.matchesURL(url, ty) +} diff --git a/pkg/scraper/json.go b/pkg/scraper/json.go index 82bf1aa0b..d64227d84 100644 --- a/pkg/scraper/json.go +++ b/pkg/scraper/json.go @@ -3,6 +3,7 @@ package scraper import ( "context" "errors" + "fmt" "io" "net/http" "net/url" @@ -74,9 +75,9 @@ func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) { return docStr, err } -func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) { +func (s *jsonScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries - doc, scraper, err := s.scrapeURL(context.TODO(), u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -85,9 +86,9 @@ func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer return scraper.scrapePerformer(q) } -func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) { +func (s *jsonScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries - doc, scraper, err := s.scrapeURL(context.TODO(), u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -96,9 +97,9 @@ func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) return scraper.scrapeScene(q) } -func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) { +func (s *jsonScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries - doc, scraper, err := s.scrapeURL(context.TODO(), u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -107,9 +108,9 @@ func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, er return scraper.scrapeGallery(q) } -func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { +func (s *jsonScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries - doc, scraper, err := s.scrapeURL(context.TODO(), u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -118,7 +119,7 @@ func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) return scraper.scrapeMovie(q) } -func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { +func (s *jsonScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { scraper := s.getJsonScraper() if scraper == nil { @@ -147,7 +148,7 @@ func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedP return nil, errors.New("scrapePerformerByFragment not supported for json scraper") } -func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) { +func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { scraper := s.getJsonScraper() if scraper == nil { @@ -162,7 +163,7 @@ func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, e url := s.scraper.QueryURL url = strings.ReplaceAll(url, placeholder, escapedName) - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -172,7 +173,7 @@ func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, e return scraper.scrapeScenes(q) } -func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) { +func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { // construct the URL queryURL := queryURLParametersFromScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -186,7 +187,7 @@ func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedSc return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -196,7 +197,7 @@ func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedSc return scraper.scrapeScene(q) } -func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { // construct the URL queryURL := queryURLParametersFromScrapedScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -210,7 +211,7 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*mo return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -220,7 +221,7 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*mo return scraper.scrapeScene(q) } -func (s *jsonScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { +func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { // construct the URL queryURL := queryURLParametersFromGallery(gallery) if s.scraper.QueryURLReplacements != nil { @@ -234,7 +235,7 @@ func (s *jsonScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.S return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -260,12 +261,11 @@ type jsonQuery struct { scraper *jsonScraper } -func (q *jsonQuery) runQuery(selector string) []string { +func (q *jsonQuery) runQuery(selector string) ([]string, error) { value := gjson.Get(q.doc, selector) if !value.Exists() { - logger.Warnf("Could not find json path '%s' in json object", selector) - return nil + return nil, fmt.Errorf("could not find json path '%s' in json object", selector) } var ret []string @@ -278,7 +278,7 @@ func (q *jsonQuery) runQuery(selector string) []string { ret = append(ret, value.String()) } - return ret + return ret, nil } func (q *jsonQuery) subScrape(value string) mappedQuery { diff --git a/pkg/scraper/mapped.go b/pkg/scraper/mapped.go index 764cfa730..6a366ccdb 100644 --- a/pkg/scraper/mapped.go +++ b/pkg/scraper/mapped.go @@ -17,7 +17,7 @@ import ( ) type mappedQuery interface { - runQuery(selector string) []string + runQuery(selector string) ([]string, error) subScrape(value string) mappedQuery } @@ -51,7 +51,10 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe selector := attrConfig.Selector selector = s.applyCommon(common, selector) - found := q.runQuery(selector) + found, err := q.runQuery(selector) + if err != nil { + logger.Warnf("key '%v': %v", k, err) + } if len(found) > 0 { result := s.postProcess(q, attrConfig, found) @@ -423,7 +426,10 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string { ss := q.subScrape(value) if ss != nil { - found := ss.runQuery(subScrapeConfig.Selector) + found, err := ss.runQuery(subScrapeConfig.Selector) + if err != nil { + logger.Warnf("subscrape for '%v': %v", value, err) + } if len(found) > 0 { // check if we're concatenating the results into a single result diff --git a/pkg/scraper/postprocessing.go b/pkg/scraper/postprocessing.go new file mode 100644 index 000000000..d1542e5e8 --- /dev/null +++ b/pkg/scraper/postprocessing.go @@ -0,0 +1,224 @@ +package scraper + +import ( + "context" + "regexp" + "strings" + + "github.com/stashapp/stash/pkg/logger" + stash_config "github.com/stashapp/stash/pkg/manager/config" + "github.com/stashapp/stash/pkg/match" + "github.com/stashapp/stash/pkg/models" +) + +// postScrape handles post-processing of scraped content. If the content +// requires post-processing, this function fans out to the given content +// type and post-processes it. +func (c Cache) postScrape(ctx context.Context, content models.ScrapedContent) (models.ScrapedContent, error) { + // Analyze the concrete type, call the right post-processing function + switch v := content.(type) { + case *models.ScrapedPerformer: + return c.postScrapePerformer(ctx, v) + case models.ScrapedPerformer: + return c.postScrapePerformer(ctx, &v) + case *models.ScrapedScene: + return c.postScrapeScene(ctx, v) + case models.ScrapedScene: + return c.postScrapeScene(ctx, &v) + case *models.ScrapedGallery: + return c.postScrapeGallery(ctx, v) + case models.ScrapedGallery: + return c.postScrapeGallery(ctx, &v) + case *models.ScrapedMovie: + return c.postScrapeMovie(ctx, v) + case models.ScrapedMovie: + return c.postScrapeMovie(ctx, &v) + } + + // If nothing matches, pass the content through + return content, nil +} + +func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) (models.ScrapedContent, error) { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + tqb := r.Tag() + + tags, err := postProcessTags(tqb, ret.Tags) + if err != nil { + return err + } + ret.Tags = tags + + return nil + }); err != nil { + return nil, err + } + + // post-process - set the image if applicable + if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error()) + } + + return ret, nil +} + +func (c Cache) postScrapeMovie(ctx context.Context, ret *models.ScrapedMovie) (models.ScrapedContent, error) { + if ret.Studio != nil { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + return match.ScrapedStudio(r.Studio(), ret.Studio, nil) + }); err != nil { + return nil, err + } + } + + // post-process - set the image if applicable + if err := setMovieFrontImage(ctx, c.client, ret, c.globalConfig); err != nil { + logger.Warnf("could not set front image using URL %s: %v", *ret.FrontImage, err) + } + if err := setMovieBackImage(ctx, c.client, ret, c.globalConfig); err != nil { + logger.Warnf("could not set back image using URL %s: %v", *ret.BackImage, err) + } + + return ret, nil +} + +func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.ScrapedPerformer) error { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + tqb := r.Tag() + + tags, err := postProcessTags(tqb, ret.Tags) + if err != nil { + return err + } + ret.Tags = tags + + return nil + }); err != nil { + return err + } + + return nil +} + +func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (models.ScrapedContent, error) { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + pqb := r.Performer() + mqb := r.Movie() + tqb := r.Tag() + sqb := r.Studio() + + for _, p := range ret.Performers { + if err := c.postScrapeScenePerformer(ctx, p); err != nil { + return err + } + + if err := match.ScrapedPerformer(pqb, p, nil); err != nil { + return err + } + } + + for _, p := range ret.Movies { + err := match.ScrapedMovie(mqb, p) + if err != nil { + return err + } + } + + tags, err := postProcessTags(tqb, ret.Tags) + if err != nil { + return err + } + ret.Tags = tags + + if ret.Studio != nil { + err := match.ScrapedStudio(sqb, ret.Studio, nil) + if err != nil { + return err + } + } + + return nil + }); err != nil { + return nil, err + } + + // post-process - set the image if applicable + if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil { + logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err) + } + + return ret, nil +} + +func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery) (models.ScrapedContent, error) { + if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { + pqb := r.Performer() + tqb := r.Tag() + sqb := r.Studio() + + for _, p := range ret.Performers { + err := match.ScrapedPerformer(pqb, p, nil) + if err != nil { + return err + } + } + + tags, err := postProcessTags(tqb, ret.Tags) + if err != nil { + return err + } + ret.Tags = tags + + if ret.Studio != nil { + err := match.ScrapedStudio(sqb, ret.Studio, nil) + if err != nil { + return err + } + } + + return nil + }); err != nil { + return nil, err + } + + return ret, nil +} + +func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) { + var ret []*models.ScrapedTag + + excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns() + var excludeRegexps []*regexp.Regexp + + for _, excludePattern := range excludePatterns { + reg, err := regexp.Compile(strings.ToLower(excludePattern)) + if err != nil { + logger.Errorf("Invalid tag exclusion pattern :%v", err) + } else { + excludeRegexps = append(excludeRegexps, reg) + } + } + + var ignoredTags []string +ScrapeTag: + for _, t := range scrapedTags { + for _, reg := range excludeRegexps { + if reg.MatchString(strings.ToLower(t.Name)) { + ignoredTags = append(ignoredTags, t.Name) + continue ScrapeTag + } + } + + err := match.ScrapedTag(tqb, t) + if err != nil { + return nil, err + } + ret = append(ret, t) + } + + if len(ignoredTags) > 0 { + logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) + } + + return ret, nil +} diff --git a/pkg/scraper/scraper.go b/pkg/scraper/scraper.go index b842f3df4..c2d86bc5e 100644 --- a/pkg/scraper/scraper.go +++ b/pkg/scraper/scraper.go @@ -1,51 +1,77 @@ package scraper -import "github.com/stashapp/stash/pkg/models" +import ( + "context" + "errors" + "net/http" -type urlMatcher interface { - matchesURL(url string) bool + "github.com/stashapp/stash/pkg/models" +) + +var ( + // ErrMaxRedirects is returned if the max number of HTTP redirects are reached. + ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached") + + // ErrNotFound is returned when an entity isn't found + ErrNotFound = errors.New("scraper not found") + + // ErrNotSupported is returned when a given invocation isn't supported, and there + // is a guard function which should be able to guard against it. + ErrNotSupported = errors.New("scraper operation not supported") +) + +// Input coalesces inputs of different types into a single structure. +// The system expects one of these to be set, and the remaining to be +// set to nil. +type Input struct { + Performer *models.ScrapedPerformerInput + Scene *models.ScrapedSceneInput + Gallery *models.ScrapedGalleryInput } -type performerScraper interface { - scrapeByName(name string) ([]*models.ScrapedPerformer, error) - scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) - scrapeByURL(url string) (*models.ScrapedPerformer, error) +// scraper is the generic interface to the scraper subsystems +type scraper interface { + // spec returns the scraper specification, suitable for graphql + spec() models.Scraper + // supports tests if the scraper supports a given content type + supports(models.ScrapeContentType) bool + // supportsURL tests if the scraper supports scrapes of a given url, producing a given content type + supportsURL(url string, ty models.ScrapeContentType) bool } +// urlScraper is the interface of scrapers supporting url loads +type urlScraper interface { + scraper + + viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) +} + +// nameScraper is the interface of scrapers supporting name loads +type nameScraper interface { + scraper + + viaName(ctx context.Context, client *http.Client, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) +} + +// fragmentScraper is the interface of scrapers supporting fragment loads +type fragmentScraper interface { + scraper + + viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error) +} + +// sceneScraper is a scraper which supports scene scrapes with +// scene data as the input. type sceneScraper interface { - scrapeByName(name string) ([]*models.ScrapedScene, error) - scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) - scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) - scrapeByURL(url string) (*models.ScrapedScene, error) + scraper + + viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) } +// galleryScraper is a scraper which supports gallery scrapes with +// gallery data as the input. type galleryScraper interface { - scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) - scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) - scrapeByURL(url string) (*models.ScrapedGallery, error) -} - -type movieScraper interface { - scrapeByURL(url string) (*models.ScrapedMovie, error) -} - -type scraper struct { - ID string - Spec *models.Scraper - - Performer performerScraper - Scene sceneScraper - Gallery galleryScraper - Movie movieScraper -} - -func matchesURL(maybeURLMatcher interface{}, url string) bool { - if maybeURLMatcher != nil { - matcher, ok := maybeURLMatcher.(urlMatcher) - if ok { - return matcher.matchesURL(url) - } - } - - return false + scraper + + viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) } diff --git a/pkg/scraper/scrapers.go b/pkg/scraper/scrapers.go deleted file mode 100644 index 590991000..000000000 --- a/pkg/scraper/scrapers.go +++ /dev/null @@ -1,660 +0,0 @@ -package scraper - -import ( - "context" - "crypto/tls" - "errors" - "fmt" - "net/http" - "os" - "path/filepath" - "regexp" - "strings" - "time" - - "github.com/stashapp/stash/pkg/logger" - stash_config "github.com/stashapp/stash/pkg/manager/config" - "github.com/stashapp/stash/pkg/match" - "github.com/stashapp/stash/pkg/models" - "github.com/stashapp/stash/pkg/utils" -) - -var ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached") - -const ( - // scrapeGetTimeout is the timeout for scraper HTTP requests. Includes transfer time. - // We may want to bump this at some point and use local context-timeouts if more granularity - // is needed. - scrapeGetTimeout = time.Second * 60 - - // maxIdleConnsPerHost is the maximum number of idle connections the HTTP client will - // keep on a per-host basis. - maxIdleConnsPerHost = 8 - - // maxRedirects defines the maximum number of redirects the HTTP client will follow - maxRedirects = 20 -) - -// GlobalConfig contains the global scraper options. -type GlobalConfig interface { - GetScraperUserAgent() string - GetScrapersPath() string - GetScraperCDPPath() string - GetScraperCertCheck() bool -} - -func isCDPPathHTTP(c GlobalConfig) bool { - return strings.HasPrefix(c.GetScraperCDPPath(), "http://") || strings.HasPrefix(c.GetScraperCDPPath(), "https://") -} - -func isCDPPathWS(c GlobalConfig) bool { - return strings.HasPrefix(c.GetScraperCDPPath(), "ws://") -} - -// Cache stores scraper details. -type Cache struct { - client *http.Client - scrapers []scraper - globalConfig GlobalConfig - txnManager models.TransactionManager -} - -// newClient creates a scraper-local http client we use throughout the scraper subsystem. -func newClient(gc GlobalConfig) *http.Client { - client := &http.Client{ - Transport: &http.Transport{ // ignore insecure certificates - TLSClientConfig: &tls.Config{InsecureSkipVerify: !gc.GetScraperCertCheck()}, - MaxIdleConnsPerHost: maxIdleConnsPerHost, - }, - Timeout: scrapeGetTimeout, - // defaultCheckRedirect code with max changed from 10 to maxRedirects - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) >= maxRedirects { - return fmt.Errorf("after %d redirects: %w", maxRedirects, ErrMaxRedirects) - } - return nil - }, - } - - return client -} - -// NewCache returns a new Cache loading scraper configurations from the -// scraper path provided in the global config object. It returns a new -// instance and an error if the scraper directory could not be loaded. -// -// Scraper configurations are loaded from yml files in the provided scrapers -// directory and any subdirectories. -func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) { - // HTTP Client setup - client := newClient(globalConfig) - - scrapers, err := loadScrapers(globalConfig, client, txnManager) - if err != nil { - return nil, err - } - - return &Cache{ - client: client, - globalConfig: globalConfig, - scrapers: scrapers, - txnManager: txnManager, - }, nil -} - -func loadScrapers(globalConfig GlobalConfig, client *http.Client, txnManager models.TransactionManager) ([]scraper, error) { - path := globalConfig.GetScrapersPath() - scrapers := make([]scraper, 0) - - logger.Debugf("Reading scraper configs from %s", path) - scraperFiles := []string{} - err := utils.SymWalk(path, func(fp string, f os.FileInfo, err error) error { - if filepath.Ext(fp) == ".yml" { - scraperFiles = append(scraperFiles, fp) - } - return nil - }) - - if err != nil { - logger.Errorf("Error reading scraper configs: %s", err.Error()) - return nil, err - } - - // add built-in freeones scraper - scrapers = append(scrapers, getFreeonesScraper(client, txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig)) - - for _, file := range scraperFiles { - c, err := loadConfigFromYAMLFile(file) - if err != nil { - logger.Errorf("Error loading scraper %s: %s", file, err.Error()) - } else { - scraper := createScraperFromConfig(*c, client, txnManager, globalConfig) - scrapers = append(scrapers, scraper) - } - } - - return scrapers, nil -} - -// ReloadScrapers clears the scraper cache and reloads from the scraper path. -// In the event of an error during loading, the cache will be left empty. -func (c *Cache) ReloadScrapers() error { - c.scrapers = nil - scrapers, err := loadScrapers(c.globalConfig, c.client, c.txnManager) - if err != nil { - return err - } - - c.scrapers = scrapers - return nil -} - -// TODO - don't think this is needed -// UpdateConfig updates the global config for the cache. If the scraper path -// has changed, ReloadScrapers will need to be called separately. -func (c *Cache) UpdateConfig(globalConfig GlobalConfig) { - c.globalConfig = globalConfig -} - -// ListPerformerScrapers returns a list of scrapers that are capable of -// scraping performers. -func (c Cache) ListPerformerScrapers() []*models.Scraper { - var ret []*models.Scraper - for _, s := range c.scrapers { - // filter on type - if s.Performer != nil { - ret = append(ret, s.Spec) - } - } - - return ret -} - -// ListSceneScrapers returns a list of scrapers that are capable of -// scraping scenes. -func (c Cache) ListSceneScrapers() []*models.Scraper { - var ret []*models.Scraper - for _, s := range c.scrapers { - // filter on type - if s.Scene != nil { - ret = append(ret, s.Spec) - } - } - - return ret -} - -// ListGalleryScrapers returns a list of scrapers that are capable of -// scraping galleries. -func (c Cache) ListGalleryScrapers() []*models.Scraper { - var ret []*models.Scraper - for _, s := range c.scrapers { - // filter on type - if s.Gallery != nil { - ret = append(ret, s.Spec) - } - } - - return ret -} - -// ListMovieScrapers returns a list of scrapers that are capable of -// scraping scenes. -func (c Cache) ListMovieScrapers() []*models.Scraper { - var ret []*models.Scraper - for _, s := range c.scrapers { - // filter on type - if s.Movie != nil { - ret = append(ret, s.Spec) - } - } - - return ret -} - -// GetScraper returns the scraper matching the provided id. -func (c Cache) GetScraper(scraperID string) *models.Scraper { - ret := c.findScraper(scraperID) - if ret != nil { - return ret.Spec - } - - return nil -} - -func (c Cache) findScraper(scraperID string) *scraper { - for _, s := range c.scrapers { - if s.ID == scraperID { - return &s - } - } - - return nil -} - -// ScrapePerformerList uses the scraper with the provided ID to query for -// performers using the provided query string. It returns a list of -// scraped performer data. -func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) { - // find scraper with the provided id - s := c.findScraper(scraperID) - if s != nil && s.Performer != nil { - return s.Performer.scrapeByName(query) - } - - return nil, errors.New("Scraper with ID " + scraperID + " not found") -} - -// ScrapePerformer uses the scraper with the provided ID to scrape a -// performer using the provided performer fragment. -func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { - // find scraper with the provided id - s := c.findScraper(scraperID) - if s != nil && s.Performer != nil { - ret, err := s.Performer.scrapeByFragment(scrapedPerformer) - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapePerformer(context.TODO(), ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - - return nil, errors.New("Scraper with ID " + scraperID + " not found") -} - -// ScrapePerformerURL uses the first scraper it finds that matches the URL -// provided to scrape a performer. If no scrapers are found that matches -// the URL, then nil is returned. -func (c Cache) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) { - for _, s := range c.scrapers { - if matchesURL(s.Performer, url) { - ret, err := s.Performer.scrapeByURL(url) - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapePerformer(context.TODO(), ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - } - - return nil, nil -} - -func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) error { - if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { - tqb := r.Tag() - - tags, err := postProcessTags(tqb, ret.Tags) - if err != nil { - return err - } - ret.Tags = tags - - return nil - }); err != nil { - return err - } - - // post-process - set the image if applicable - if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error()) - } - - return nil -} - -func (c Cache) postScrapeScenePerformer(ret *models.ScrapedPerformer) error { - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - tqb := r.Tag() - - tags, err := postProcessTags(tqb, ret.Tags) - if err != nil { - return err - } - ret.Tags = tags - - return nil - }); err != nil { - return err - } - - return nil -} - -func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) error { - if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error { - pqb := r.Performer() - mqb := r.Movie() - tqb := r.Tag() - sqb := r.Studio() - - for _, p := range ret.Performers { - if err := c.postScrapeScenePerformer(p); err != nil { - return err - } - - if err := match.ScrapedPerformer(pqb, p, nil); err != nil { - return err - } - } - - for _, p := range ret.Movies { - err := match.ScrapedMovie(mqb, p) - if err != nil { - return err - } - } - - tags, err := postProcessTags(tqb, ret.Tags) - if err != nil { - return err - } - ret.Tags = tags - - if ret.Studio != nil { - err := match.ScrapedStudio(sqb, ret.Studio, nil) - if err != nil { - return err - } - } - - return nil - }); err != nil { - return err - } - - // post-process - set the image if applicable - if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err) - } - - return nil -} - -func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error { - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - pqb := r.Performer() - tqb := r.Tag() - sqb := r.Studio() - - for _, p := range ret.Performers { - err := match.ScrapedPerformer(pqb, p, nil) - if err != nil { - return err - } - } - - tags, err := postProcessTags(tqb, ret.Tags) - if err != nil { - return err - } - ret.Tags = tags - - if ret.Studio != nil { - err := match.ScrapedStudio(sqb, ret.Studio, nil) - if err != nil { - return err - } - } - - return nil - }); err != nil { - return err - } - - return nil -} - -// ScrapeScene uses the scraper with the provided ID to scrape a scene using existing data. -func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, error) { - // find scraper with the provided id - s := c.findScraper(scraperID) - if s != nil && s.Scene != nil { - // get scene from id - scene, err := getScene(sceneID, c.txnManager) - if err != nil { - return nil, err - } - - ret, err := s.Scene.scrapeByScene(scene) - - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapeScene(context.TODO(), ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - - return nil, errors.New("Scraper with ID " + scraperID + " not found") -} - -// ScrapeSceneQuery uses the scraper with the provided ID to query for -// scenes using the provided query string. It returns a list of -// scraped scene data. -func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.ScrapedScene, error) { - // find scraper with the provided id - s := c.findScraper(scraperID) - if s != nil && s.Scene != nil { - return s.Scene.scrapeByName(query) - } - - return nil, errors.New("Scraper with ID " + scraperID + " not found") -} - -// ScrapeSceneFragment uses the scraper with the provided ID to scrape a scene. -func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { - // find scraper with the provided id - s := c.findScraper(scraperID) - if s != nil && s.Scene != nil { - ret, err := s.Scene.scrapeByFragment(scene) - - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapeScene(context.TODO(), ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - - return nil, errors.New("Scraper with ID " + scraperID + " not found") -} - -// ScrapeSceneURL uses the first scraper it finds that matches the URL -// provided to scrape a scene. If no scrapers are found that matches -// the URL, then nil is returned. -func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) { - for _, s := range c.scrapers { - if matchesURL(s.Scene, url) { - ret, err := s.Scene.scrapeByURL(url) - - if err != nil { - return nil, err - } - - err = c.postScrapeScene(context.TODO(), ret) - if err != nil { - return nil, err - } - - return ret, nil - } - } - - return nil, nil -} - -// ScrapeGallery uses the scraper with the provided ID to scrape a gallery using existing data. -func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGallery, error) { - s := c.findScraper(scraperID) - if s != nil && s.Gallery != nil { - // get gallery from id - gallery, err := getGallery(galleryID, c.txnManager) - if err != nil { - return nil, err - } - - ret, err := s.Gallery.scrapeByGallery(gallery) - - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapeGallery(ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - - return nil, errors.New("Scraped with ID " + scraperID + " not found") -} - -// ScrapeGalleryFragment uses the scraper with the provided ID to scrape a gallery. -func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) { - s := c.findScraper(scraperID) - if s != nil && s.Gallery != nil { - ret, err := s.Gallery.scrapeByFragment(gallery) - - if err != nil { - return nil, err - } - - if ret != nil { - err = c.postScrapeGallery(ret) - if err != nil { - return nil, err - } - } - - return ret, nil - } - - return nil, errors.New("Scraped with ID " + scraperID + " not found") -} - -// ScrapeGalleryURL uses the first scraper it finds that matches the URL -// provided to scrape a scene. If no scrapers are found that matches -// the URL, then nil is returned. -func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) { - for _, s := range c.scrapers { - if matchesURL(s.Gallery, url) { - ret, err := s.Gallery.scrapeByURL(url) - - if err != nil { - return nil, err - } - - err = c.postScrapeGallery(ret) - if err != nil { - return nil, err - } - - return ret, nil - } - } - - return nil, nil -} - -// ScrapeMovieURL uses the first scraper it finds that matches the URL -// provided to scrape a movie. If no scrapers are found that matches -// the URL, then nil is returned. -func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) { - for _, s := range c.scrapers { - if s.Movie != nil && matchesURL(s.Movie, url) { - ret, err := s.Movie.scrapeByURL(url) - if err != nil { - return nil, err - } - - if ret.Studio != nil { - if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { - return match.ScrapedStudio(r.Studio(), ret.Studio, nil) - }); err != nil { - return nil, err - } - } - - // post-process - set the image if applicable - if err := setMovieFrontImage(context.TODO(), c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error()) - } - if err := setMovieBackImage(context.TODO(), c.client, ret, c.globalConfig); err != nil { - logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error()) - } - - return ret, nil - } - } - - return nil, nil -} - -func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) { - var ret []*models.ScrapedTag - - excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns() - var excludeRegexps []*regexp.Regexp - - for _, excludePattern := range excludePatterns { - reg, err := regexp.Compile(strings.ToLower(excludePattern)) - if err != nil { - logger.Errorf("Invalid tag exclusion pattern :%v", err) - } else { - excludeRegexps = append(excludeRegexps, reg) - } - } - - var ignoredTags []string -ScrapeTag: - for _, t := range scrapedTags { - for _, reg := range excludeRegexps { - if reg.MatchString(strings.ToLower(t.Name)) { - ignoredTags = append(ignoredTags, t.Name) - continue ScrapeTag - } - } - - err := match.ScrapedTag(tqb, t) - if err != nil { - return nil, err - } - ret = append(ret, t) - } - - if len(ignoredTags) > 0 { - logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", ")) - } - - return ret, nil -} diff --git a/pkg/scraper/script.go b/pkg/scraper/script.go index a53266e0e..59ab29f9f 100644 --- a/pkg/scraper/script.go +++ b/pkg/scraper/script.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "encoding/json" "errors" "fmt" @@ -89,7 +90,7 @@ func (s *scriptScraper) runScraperScript(inString string, out interface{}) error return nil } -func (s *scriptScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { +func (s *scriptScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { inString := `{"name": "` + name + `"}` var performers []models.ScrapedPerformer @@ -121,7 +122,7 @@ func (s *scriptScraper) scrapePerformerByFragment(scrapedPerformer models.Scrape return &ret, err } -func (s *scriptScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) { +func (s *scriptScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { inString := `{"url": "` + url + `"}` var ret models.ScrapedPerformer @@ -131,7 +132,7 @@ func (s *scriptScraper) scrapePerformerByURL(url string) (*models.ScrapedPerform return &ret, err } -func (s *scriptScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) { +func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { inString, err := json.Marshal(sceneToUpdateInput(scene)) if err != nil { @@ -145,7 +146,7 @@ func (s *scriptScraper) scrapeSceneByScene(scene *models.Scene) (*models.Scraped return &ret, err } -func (s *scriptScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) { +func (s *scriptScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { inString := `{"name": "` + name + `"}` var scenes []models.ScrapedScene @@ -163,7 +164,7 @@ func (s *scriptScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, return ret, err } -func (s *scriptScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *scriptScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { inString, err := json.Marshal(scene) if err != nil { @@ -177,7 +178,7 @@ func (s *scriptScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (* return &ret, err } -func (s *scriptScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { +func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { inString, err := json.Marshal(galleryToUpdateInput(gallery)) if err != nil { @@ -205,7 +206,7 @@ func (s *scriptScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInp return &ret, err } -func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) { +func (s *scriptScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { inString := `{"url": "` + url + `"}` var ret models.ScrapedScene @@ -215,7 +216,7 @@ func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, erro return &ret, err } -func (s *scriptScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) { +func (s *scriptScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { inString := `{"url": "` + url + `"}` var ret models.ScrapedGallery @@ -225,7 +226,7 @@ func (s *scriptScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, return &ret, err } -func (s *scriptScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { +func (s *scriptScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { inString := `{"url": "` + url + `"}` var ret models.ScrapedMovie diff --git a/pkg/scraper/stash.go b/pkg/scraper/stash.go index 50f5cc12a..be7502c3b 100644 --- a/pkg/scraper/stash.go +++ b/pkg/scraper/stash.go @@ -54,7 +54,7 @@ type stashFindPerformerNamesResultType struct { Performers []*stashFindPerformerNamePerformer `graphql:"performers"` } -func (s *stashScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { +func (s *stashScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { client := s.getStashClient() var q struct { @@ -72,7 +72,7 @@ func (s *stashScraper) scrapePerformersByName(name string) ([]*models.ScrapedPer }, } - err := client.Query(context.TODO(), &q, vars) + err := client.Query(ctx, &q, vars) if err != nil { return nil, err } @@ -175,7 +175,7 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) return &ret, nil } -func (s *stashScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) { +func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { client := s.getStashClient() var q struct { @@ -193,7 +193,7 @@ func (s *stashScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, }, } - err := client.Query(context.TODO(), &q, vars) + err := client.Query(ctx, &q, vars) if err != nil { return nil, err } @@ -222,7 +222,7 @@ type scrapedSceneStash struct { Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"` } -func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) { +func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { // query by MD5 var q struct { FindScene *scrapedSceneStash `graphql:"findSceneByHash(input: $c)"` @@ -243,7 +243,7 @@ func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS } client := s.getStashClient() - if err := client.Query(context.TODO(), &q, vars); err != nil { + if err := client.Query(ctx, &q, vars); err != nil { return nil, err } @@ -262,7 +262,7 @@ func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS return ret, nil } -func (s *stashScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *stashScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { return nil, errors.New("scrapeSceneByFragment not supported for stash scraper") } @@ -278,7 +278,7 @@ type scrapedGalleryStash struct { Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"` } -func (s *stashScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { +func (s *stashScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { var q struct { FindGallery *scrapedGalleryStash `graphql:"findGalleryByHash(input: $c)"` } @@ -296,7 +296,7 @@ func (s *stashScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models. } client := s.getStashClient() - if err := client.Query(context.TODO(), &q, vars); err != nil { + if err := client.Query(ctx, &q, vars); err != nil { return nil, err } @@ -313,19 +313,19 @@ func (s *stashScraper) scrapeGalleryByFragment(scene models.ScrapedGalleryInput) return nil, errors.New("scrapeGalleryByFragment not supported for stash scraper") } -func (s *stashScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) { +func (s *stashScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { return nil, errors.New("scrapePerformerByURL not supported for stash scraper") } -func (s *stashScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) { +func (s *stashScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { return nil, errors.New("scrapeSceneByURL not supported for stash scraper") } -func (s *stashScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) { +func (s *stashScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { return nil, errors.New("scrapeGalleryByURL not supported for stash scraper") } -func (s *stashScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { +func (s *stashScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { return nil, errors.New("scrapeMovieByURL not supported for stash scraper") } diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index 0f820a4cd..ce4ba60ce 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "errors" + "fmt" "net/http" "net/url" "regexp" @@ -39,14 +40,14 @@ func (s *xpathScraper) getXpathScraper() *mappedScraper { return s.config.XPathScrapers[s.scraper.Scraper] } -func (s *xpathScraper) scrapeURL(url string) (*html.Node, *mappedScraper, error) { +func (s *xpathScraper) scrapeURL(ctx context.Context, url string) (*html.Node, *mappedScraper, error) { scraper := s.getXpathScraper() if scraper == nil { return nil, nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, nil, err @@ -55,9 +56,9 @@ func (s *xpathScraper) scrapeURL(url string) (*html.Node, *mappedScraper, error) return doc, scraper, nil } -func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) { +func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries - doc, scraper, err := s.scrapeURL(u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -66,9 +67,9 @@ func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerforme return scraper.scrapePerformer(q) } -func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) { +func (s *xpathScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries - doc, scraper, err := s.scrapeURL(u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -77,9 +78,9 @@ func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error return scraper.scrapeScene(q) } -func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) { +func (s *xpathScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries - doc, scraper, err := s.scrapeURL(u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -88,9 +89,9 @@ func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, e return scraper.scrapeGallery(q) } -func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) { +func (s *xpathScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) { u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries - doc, scraper, err := s.scrapeURL(u) + doc, scraper, err := s.scrapeURL(ctx, u) if err != nil { return nil, err } @@ -99,7 +100,7 @@ func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error return scraper.scrapeMovie(q) } -func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) { +func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) { scraper := s.getXpathScraper() if scraper == nil { @@ -114,7 +115,7 @@ func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPer url := s.scraper.QueryURL url = strings.ReplaceAll(url, placeholder, escapedName) - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -128,7 +129,7 @@ func (s *xpathScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped return nil, errors.New("scrapePerformerByFragment not supported for xpath scraper") } -func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) { +func (s *xpathScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) { scraper := s.getXpathScraper() if scraper == nil { @@ -143,7 +144,7 @@ func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, url := s.scraper.QueryURL url = strings.ReplaceAll(url, placeholder, escapedName) - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -153,7 +154,7 @@ func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, return scraper.scrapeScenes(q) } -func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) { +func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) { // construct the URL queryURL := queryURLParametersFromScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -167,7 +168,7 @@ func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -177,7 +178,7 @@ func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS return scraper.scrapeScene(q) } -func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { +func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) { // construct the URL queryURL := queryURLParametersFromScrapedScene(scene) if s.scraper.QueryURLReplacements != nil { @@ -191,7 +192,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*m return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -201,7 +202,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*m return scraper.scrapeScene(q) } -func (s *xpathScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) { +func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) { // construct the URL queryURL := queryURLParametersFromGallery(gallery) if s.scraper.QueryURLReplacements != nil { @@ -215,7 +216,7 @@ func (s *xpathScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models. return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config") } - doc, err := s.loadURL(context.TODO(), url) + doc, err := s.loadURL(ctx, url) if err != nil { return nil, err @@ -260,11 +261,10 @@ type xpathQuery struct { scraper *xpathScraper } -func (q *xpathQuery) runQuery(selector string) []string { +func (q *xpathQuery) runQuery(selector string) ([]string, error) { found, err := htmlquery.QueryAll(q.doc, selector) if err != nil { - logger.Warnf("Error parsing xpath expression '%s': %s", selector, err.Error()) - return nil + return nil, fmt.Errorf("selector '%s': parse error: %v", selector, err) } var ret []string @@ -276,7 +276,7 @@ func (q *xpathQuery) runQuery(selector string) []string { } } - return ret + return ret, nil } func (q *xpathQuery) nodeText(n *html.Node) string { diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index ff01741b7..9393ea3c7 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -1,6 +1,7 @@ package scraper import ( + "context" "fmt" "net/http" "net/http/httptest" @@ -875,13 +876,23 @@ xPathScrapers: globalConfig := mockGlobalConfig{} client := &http.Client{} - s := createScraperFromConfig(*c, client, nil, globalConfig) - performer, err := s.Performer.scrapeByURL(ts.URL) + ctx := context.Background() + s := newGroupScraper(*c, nil, globalConfig) + us, ok := s.(urlScraper) + if !ok { + t.Error("couldn't convert scraper into url scraper") + } + content, err := us.viaURL(ctx, client, ts.URL, models.ScrapeContentTypePerformer) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) return } + performer, ok := content.(*models.ScrapedPerformer) + if !ok { + t.Error("couldn't convert scraped content into a performer") + } + verifyField(t, "The name", performer.Name, "Name") } diff --git a/ui/v2.5/src/core/StashService.ts b/ui/v2.5/src/core/StashService.ts index 21714b936..f1f7c870a 100644 --- a/ui/v2.5/src/core/StashService.ts +++ b/ui/v2.5/src/core/StashService.ts @@ -822,15 +822,6 @@ export const useDLNAStatus = () => fetchPolicy: "no-cache", }); -export const queryScrapeFreeones = (performerName: string) => - client.query({ - query: GQL.ScrapeFreeonesDocument, - variables: { - performer_name: performerName, - }, - fetchPolicy: "network-only", - }); - export const queryScrapePerformer = ( scraperId: string, scrapedPerformer: GQL.ScrapedPerformerInput