From 92837fe1f70c57e9b2fef8d966125288589e7735 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Mon, 16 Dec 2019 12:35:34 +1100 Subject: [PATCH] Add scene metadata scraping functionality (#236) * Add scene scraping functionality * Adapt to changed scraper config --- graphql/documents/data/scrapers.graphql | 75 +++++++++ .../queries/scrapers/scrapers.graphql | 78 +++------ graphql/schema/schema.graphql | 6 +- graphql/schema/types/scraper.graphql | 63 +++++++- pkg/api/resolver_query_scraper.go | 12 ++ pkg/scraper/config.go | 125 +++++++++++++-- pkg/scraper/freeones.go | 8 +- pkg/scraper/scrapers.go | 150 +++++++++++++++++- pkg/scraper/script.go | 24 +++ .../scenes/SceneDetails/SceneEditPanel.tsx | 133 ++++++++++++++++ ui/v2/src/core/StashService.ts | 23 +++ 11 files changed, 614 insertions(+), 83 deletions(-) create mode 100644 graphql/documents/data/scrapers.graphql diff --git a/graphql/documents/data/scrapers.graphql b/graphql/documents/data/scrapers.graphql new file mode 100644 index 000000000..292789454 --- /dev/null +++ b/graphql/documents/data/scrapers.graphql @@ -0,0 +1,75 @@ +fragment ScrapedPerformerData on ScrapedPerformer { + name + url + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases +} + +fragment ScrapedScenePerformerData on ScrapedScenePerformer { + id + name + url + twitter + instagram + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases +} + +fragment ScrapedSceneStudioData on ScrapedSceneStudio { + id + name + url +} + +fragment ScrapedSceneTagData on ScrapedSceneTag { + id + name +} + +fragment ScrapedSceneData on ScrapedScene { + title + details + url + date + + file { + size + duration + video_codec + audio_codec + width + height + framerate + bitrate + } + + studio { + ...ScrapedSceneStudioData + } + + tags { + ...ScrapedSceneTagData + } + + performers { + ...ScrapedScenePerformerData + } +} \ No newline at end of file diff --git a/graphql/documents/queries/scrapers/scrapers.graphql b/graphql/documents/queries/scrapers/scrapers.graphql index 539755cff..cc4fa15e9 100644 --- a/graphql/documents/queries/scrapers/scrapers.graphql +++ b/graphql/documents/queries/scrapers/scrapers.graphql @@ -9,71 +9,43 @@ query ListPerformerScrapers { } } -# query ListSceneScrapers { -# listSceneScrapers { -# id -# name -# scene { -# urls -# supported_scrapes -# } -# } -# } +query ListSceneScrapers { + listSceneScrapers { + id + name + scene { + urls + supported_scrapes + } + } +} query ScrapePerformerList($scraper_id: ID!, $query: String!) { scrapePerformerList(scraper_id: $scraper_id, query: $query) { - name - url - birthdate - ethnicity - country - eye_color - height - measurements - fake_tits - career_length - tattoos - piercings - aliases + ...ScrapedPerformerData } } query ScrapePerformer($scraper_id: ID!, $scraped_performer: ScrapedPerformerInput!) { scrapePerformer(scraper_id: $scraper_id, scraped_performer: $scraped_performer) { - name - url - twitter - instagram - birthdate - ethnicity - country - eye_color - height - measurements - fake_tits - career_length - tattoos - piercings - aliases + ...ScrapedPerformerData } } query ScrapePerformerURL($url: String!) { scrapePerformerURL(url: $url) { - name - url - twitter - instagram - birthdate - ethnicity - country - eye_color - height - measurements - fake_tits - career_length - tattoos - piercings - aliases + ...ScrapedPerformerData + } +} + +query ScrapeScene($scraper_id: ID!, $scene: SceneUpdateInput!) { + scrapeScene(scraper_id: $scraper_id, scene: $scene) { + ...ScrapedSceneData + } +} + +query ScrapeSceneURL($url: String!) { + scrapeSceneURL(url: $url) { + ...ScrapedSceneData } } \ No newline at end of file diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql index 01f89bfcd..03d7b88f1 100644 --- a/graphql/schema/schema.graphql +++ b/graphql/schema/schema.graphql @@ -47,13 +47,17 @@ type Query { """List available scrapers""" listPerformerScrapers: [Scraper!]! - #listSceneScrapers: [Scraper!]! + listSceneScrapers: [Scraper!]! """Scrape a list of performers based on name""" scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]! """Scrapes a complete performer record based on a scrapePerformerList result""" scrapePerformer(scraper_id: ID!, scraped_performer: ScrapedPerformerInput!): ScrapedPerformer """Scrapes a complete performer record based on a URL""" scrapePerformerURL(url: String!): ScrapedPerformer + """Scrapes a complete scene record based on an existing scene""" + scrapeScene(scraper_id: ID!, scene: SceneUpdateInput!): ScrapedScene + """Scrapes a complete performer record based on a URL""" + scrapeSceneURL(url: String!): ScrapedScene """Scrape a performer using Freeones""" scrapeFreeones(performer_name: String!): ScrapedPerformer diff --git a/graphql/schema/types/scraper.graphql b/graphql/schema/types/scraper.graphql index 5ce6b0de1..1dc153eb1 100644 --- a/graphql/schema/types/scraper.graphql +++ b/graphql/schema/types/scraper.graphql @@ -1,7 +1,10 @@ enum ScrapeType { - NAME - FRAGMENT - URL + """From text query""" + NAME + """From existing object""" + FRAGMENT + """From URL""" + URL } type ScraperSpec { @@ -15,7 +18,53 @@ type Scraper { name: String! """Details for performer scraper""" performer: ScraperSpec - # TODO - # """Details for scene scraper""" - # scene: ScraperSpec -} \ No newline at end of file + """Details for scene scraper""" + scene: ScraperSpec +} + + +type ScrapedScenePerformer { + """Set if performer matched""" + id: ID + name: String! + url: String + twitter: String + instagram: String + birthdate: String + ethnicity: String + country: String + eye_color: String + height: String + measurements: String + fake_tits: String + career_length: String + tattoos: String + piercings: String + aliases: String +} + +type ScrapedSceneStudio { + """Set if studio matched""" + id: ID + name: String! + url: String +} + +type ScrapedSceneTag { + """Set if tag matched""" + id: ID + name: String! +} + +type ScrapedScene { + title: String + details: String + url: String + date: String + + file: SceneFileType # Resolver + + studio: ScrapedSceneStudio + tags: [ScrapedSceneTag!] + performers: [ScrapedScenePerformer!] +} diff --git a/pkg/api/resolver_query_scraper.go b/pkg/api/resolver_query_scraper.go index ff702e115..ea5d36ad3 100644 --- a/pkg/api/resolver_query_scraper.go +++ b/pkg/api/resolver_query_scraper.go @@ -36,6 +36,10 @@ func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Sc return scraper.ListPerformerScrapers() } +func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) { + return scraper.ListSceneScrapers() +} + func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) { if query == "" { return nil, nil @@ -51,3 +55,11 @@ func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, s func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) { return scraper.ScrapePerformerURL(url) } + +func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) { + return scraper.ScrapeScene(scraperID, scene) +} + +func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) { + return scraper.ScrapeSceneURL(url) +} diff --git a/pkg/scraper/config.go b/pkg/scraper/config.go index 0e1913916..928e5c79f 100644 --- a/pkg/scraper/config.go +++ b/pkg/scraper/config.go @@ -59,22 +59,13 @@ func (c *performerByFragmentConfig) resolveFn() { } } -type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) - -type scraperByURLConfig struct { +type scrapeByURLConfig struct { scraperTypeConfig `yaml:",inline"` URL []string `yaml:"url,flow"` - performScrape scrapePerformerByURLFunc } -func (c *scraperByURLConfig) resolveFn() { - if c.Action == scraperActionScript { - c.performScrape = scrapePerformerURLScript - } -} - -func (s scraperByURLConfig) matchesURL(url string) bool { - for _, thisURL := range s.URL { +func (c scrapeByURLConfig) matchesURL(url string) bool { + for _, thisURL := range c.URL { if strings.Contains(url, thisURL) { return true } @@ -83,12 +74,53 @@ func (s scraperByURLConfig) matchesURL(url string) bool { return false } +type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) + +type scrapePerformerByURLConfig struct { + scrapeByURLConfig `yaml:",inline"` + performScrape scrapePerformerByURLFunc +} + +func (c *scrapePerformerByURLConfig) resolveFn() { + if c.Action == scraperActionScript { + c.performScrape = scrapePerformerURLScript + } +} + +type scrapeSceneFragmentFunc func(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) + +type sceneByFragmentConfig struct { + scraperTypeConfig `yaml:",inline"` + performScrape scrapeSceneFragmentFunc +} + +func (c *sceneByFragmentConfig) resolveFn() { + if c.Action == scraperActionScript { + c.performScrape = scrapeSceneFragmentScript + } +} + +type scrapeSceneByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedScene, error) + +type scrapeSceneByURLConfig struct { + scrapeByURLConfig `yaml:",inline"` + performScrape scrapeSceneByURLFunc +} + +func (c *scrapeSceneByURLConfig) resolveFn() { + if c.Action == scraperActionScript { + c.performScrape = scrapeSceneURLScript + } +} + type scraperConfig struct { ID string - Name string `yaml:"name"` - PerformerByName *performerByNameConfig `yaml:"performerByName"` - PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"` - PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"` + Name string `yaml:"name"` + PerformerByName *performerByNameConfig `yaml:"performerByName"` + PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"` + PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"` + SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"` + SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"` } func loadScraperFromYAML(path string) (*scraperConfig, error) { @@ -127,6 +159,13 @@ func (c *scraperConfig) initialiseConfigs() { for _, s := range c.PerformerByURL { s.resolveFn() } + + if c.SceneByFragment != nil { + c.SceneByFragment.resolveFn() + } + for _, s := range c.SceneByURL { + s.resolveFn() + } } func (c scraperConfig) toScraper() *models.Scraper { @@ -153,6 +192,21 @@ func (c scraperConfig) toScraper() *models.Scraper { ret.Performer = &performer } + scene := models.ScraperSpec{} + if c.SceneByFragment != nil { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment) + } + if len(c.SceneByURL) > 0 { + scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL) + for _, v := range c.SceneByURL { + scene.Urls = append(scene.Urls, v.URL...) + } + } + + if len(scene.SupportedScrapes) > 0 { + ret.Scene = &scene + } + return &ret } @@ -202,3 +256,42 @@ func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer, return nil, nil } + +func (c scraperConfig) supportsScenes() bool { + return c.SceneByFragment != nil || len(c.SceneByURL) > 0 +} + +func (c scraperConfig) matchesSceneURL(url string) bool { + for _, scraper := range c.SceneByURL { + if scraper.matchesURL(url) { + return true + } + } + + return false +} + +func (c scraperConfig) ScrapeScene(scene models.SceneUpdateInput) (*models.ScrapedScene, error) { + if c.SceneByFragment != nil && c.SceneByFragment.performScrape != nil { + return c.SceneByFragment.performScrape(c.SceneByFragment.scraperTypeConfig, scene) + } + + return nil, nil +} + +func (c scraperConfig) ScrapeSceneURL(url string) (*models.ScrapedScene, error) { + for _, scraper := range c.SceneByURL { + if scraper.matchesURL(url) && scraper.performScrape != nil { + ret, err := scraper.performScrape(scraper.scraperTypeConfig, url) + if err != nil { + return nil, err + } + + if ret != nil { + return ret, nil + } + } + } + + return nil, nil +} diff --git a/pkg/scraper/freeones.go b/pkg/scraper/freeones.go index 9f5b70b29..94bde0e19 100644 --- a/pkg/scraper/freeones.go +++ b/pkg/scraper/freeones.go @@ -30,10 +30,12 @@ func GetFreeonesScraper() scraperConfig { PerformerByFragment: &performerByFragmentConfig{ performScrape: GetPerformer, }, - PerformerByURL: []*scraperByURLConfig{ - &scraperByURLConfig{ + PerformerByURL: []*scrapePerformerByURLConfig{ + &scrapePerformerByURLConfig{ + scrapeByURLConfig: scrapeByURLConfig{ + URL: freeonesURLs, + }, performScrape: GetPerformerURL, - URL: freeonesURLs, }, }, } diff --git a/pkg/scraper/scrapers.go b/pkg/scraper/scrapers.go index 96cd9cd4b..925cd1eab 100644 --- a/pkg/scraper/scrapers.go +++ b/pkg/scraper/scrapers.go @@ -3,6 +3,7 @@ package scraper import ( "errors" "path/filepath" + "strconv" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/manager/config" @@ -61,7 +62,26 @@ func ListPerformerScrapers() ([]*models.Scraper, error) { return ret, nil } -func findPerformerScraper(scraperID string) *scraperConfig { +func ListSceneScrapers() ([]*models.Scraper, error) { + // read scraper config files from the directory and cache + scrapers, err := loadScrapers() + + if err != nil { + return nil, err + } + + var ret []*models.Scraper + for _, s := range scrapers { + // filter on type + if s.supportsScenes() { + ret = append(ret, s.toScraper()) + } + } + + return ret, nil +} + +func findScraper(scraperID string) *scraperConfig { // read scraper config files from the directory and cache loadScrapers() @@ -76,7 +96,7 @@ func findPerformerScraper(scraperID string) *scraperConfig { func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) { // find scraper with the provided id - s := findPerformerScraper(scraperID) + s := findScraper(scraperID) if s != nil { return s.ScrapePerformerNames(query) } @@ -86,7 +106,7 @@ func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerfo func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) { // find scraper with the provided id - s := findPerformerScraper(scraperID) + s := findScraper(scraperID) if s != nil { return s.ScrapePerformer(scrapedPerformer) } @@ -103,3 +123,127 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) { return nil, nil } + +func matchPerformer(p *models.ScrapedScenePerformer) error { + qb := models.NewPerformerQueryBuilder() + + performers, err := qb.FindByNames([]string{p.Name}, nil) + + if err != nil { + return err + } + + if len(performers) != 1 { + // ignore - cannot match + return nil + } + + id := strconv.Itoa(performers[0].ID) + p.ID = &id + return nil +} + +func matchStudio(s *models.ScrapedSceneStudio) error { + qb := models.NewStudioQueryBuilder() + + studio, err := qb.FindByName(s.Name, nil) + + if err != nil { + return err + } + + if studio == nil { + // ignore - cannot match + return nil + } + + id := strconv.Itoa(studio.ID) + s.ID = &id + return nil +} + +func matchTag(s *models.ScrapedSceneTag) error { + qb := models.NewTagQueryBuilder() + + tag, err := qb.FindByName(s.Name, nil) + + if err != nil { + return err + } + + if tag == nil { + // ignore - cannot match + return nil + } + + id := strconv.Itoa(tag.ID) + s.ID = &id + return nil +} + +func postScrapeScene(ret *models.ScrapedScene) error { + for _, p := range ret.Performers { + err := matchPerformer(p) + if err != nil { + return err + } + } + + for _, t := range ret.Tags { + err := matchTag(t) + if err != nil { + return err + } + } + + if ret.Studio != nil { + err := matchStudio(ret.Studio) + if err != nil { + return err + } + } + + return nil +} + +func ScrapeScene(scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) { + // find scraper with the provided id + s := findScraper(scraperID) + if s != nil { + ret, err := s.ScrapeScene(scene) + + if err != nil { + return nil, err + } + + err = postScrapeScene(ret) + if err != nil { + return nil, err + } + + return ret, nil + } + + return nil, errors.New("Scraper with ID " + scraperID + " not found") +} + +func ScrapeSceneURL(url string) (*models.ScrapedScene, error) { + for _, s := range scrapers { + if s.matchesSceneURL(url) { + ret, err := s.ScrapeSceneURL(url) + + if err != nil { + return nil, err + } + + err = postScrapeScene(ret) + if err != nil { + return nil, err + } + + return ret, nil + } + } + + return nil, nil +} diff --git a/pkg/scraper/script.go b/pkg/scraper/script.go index f8cc065bb..7dd6c93e7 100644 --- a/pkg/scraper/script.go +++ b/pkg/scraper/script.go @@ -106,3 +106,27 @@ func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedP return &ret, err } + +func scrapeSceneFragmentScript(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) { + inString, err := json.Marshal(scene) + + if err != nil { + return nil, err + } + + var ret models.ScrapedScene + + err = runScraperScript(c.Script, string(inString), &ret) + + return &ret, err +} + +func scrapeSceneURLScript(c scraperTypeConfig, url string) (*models.ScrapedScene, error) { + inString := `{"url": "` + url + `"}` + + var ret models.ScrapedScene + + err := runScraperScript(c.Script, string(inString), &ret) + + return &ret, err +} diff --git a/ui/v2/src/components/scenes/SceneDetails/SceneEditPanel.tsx b/ui/v2/src/components/scenes/SceneDetails/SceneEditPanel.tsx index 83882444a..da1a28033 100644 --- a/ui/v2/src/components/scenes/SceneDetails/SceneEditPanel.tsx +++ b/ui/v2/src/components/scenes/SceneDetails/SceneEditPanel.tsx @@ -11,6 +11,9 @@ import { Collapse, Icon, FileInput, + Menu, + Popover, + MenuItem, } from "@blueprintjs/core"; import _ from "lodash"; import React, { FunctionComponent, useEffect, useState } from "react"; @@ -42,6 +45,9 @@ export const SceneEditPanel: FunctionComponent = (props: IProps) => { const [tagIds, setTagIds] = useState(undefined); const [coverImage, setCoverImage] = useState(undefined); + const Scrapers = StashService.useListSceneScrapers(); + const [queryableScrapers, setQueryableScrapers] = useState([]); + const [isDeleteAlertOpen, setIsDeleteAlertOpen] = useState(false); const [deleteFile, setDeleteFile] = useState(false); const [deleteGenerated, setDeleteGenerated] = useState(true); @@ -55,6 +61,19 @@ export const SceneEditPanel: FunctionComponent = (props: IProps) => { const updateScene = StashService.useSceneUpdate(getSceneInput()); const deleteScene = StashService.useSceneDestroy(getSceneDeleteInput()); + useEffect(() => { + var newQueryableScrapers : GQL.ListSceneScrapersListSceneScrapers[] = []; + + if (!!Scrapers.data && Scrapers.data.listSceneScrapers) { + newQueryableScrapers = Scrapers.data.listSceneScrapers.filter((s) => { + return s.scene && s.scene.supported_scrapes.includes(GQL.ScrapeType.Fragment); + }); + } + + setQueryableScrapers(newQueryableScrapers); + + }, [Scrapers.data]) + function updateSceneEditState(state: Partial) { const perfIds = !!state.performers ? state.performers.map((performer) => performer.id) : undefined; const tIds = !!state.tags ? state.tags.map((tag) => tag.id) : undefined; @@ -186,6 +205,118 @@ export const SceneEditPanel: FunctionComponent = (props: IProps) => { function onCoverImageChange(event: React.FormEvent) { ImageUtils.onImageChange(event, onImageLoad); } + + async function onScrapeClicked(scraper : GQL.ListSceneScrapersListSceneScrapers) { + setIsLoading(true); + try { + const result = await StashService.queryScrapeScene(scraper.id, getSceneInput()); + if (!result.data || !result.data.scrapeScene) { return; } + updateSceneFromScrapedScene(result.data.scrapeScene); + } catch (e) { + ErrorUtils.handle(e); + } finally { + setIsLoading(false); + } + } + + function renderScraperMenuItem(scraper : GQL.ListSceneScrapersListSceneScrapers) { + return ( + { onScrapeClicked(scraper); }} + /> + ); + } + + function renderScraperMenu() { + if (!queryableScrapers || queryableScrapers.length == 0) { + return; + } + + const scraperMenu = ( + + {queryableScrapers ? queryableScrapers.map((s) => renderScraperMenuItem(s)) : undefined} + + ); + return ( + +