Add scene metadata scraping functionality (#236)

* Add scene scraping functionality

* Adapt to changed scraper config
This commit is contained in:
WithoutPants 2019-12-16 12:35:34 +11:00 committed by Leopere
parent f8a760d729
commit 92837fe1f7
11 changed files with 614 additions and 83 deletions

View File

@ -0,0 +1,75 @@
fragment ScrapedPerformerData on ScrapedPerformer {
name
url
birthdate
ethnicity
country
eye_color
height
measurements
fake_tits
career_length
tattoos
piercings
aliases
}
fragment ScrapedScenePerformerData on ScrapedScenePerformer {
id
name
url
twitter
instagram
birthdate
ethnicity
country
eye_color
height
measurements
fake_tits
career_length
tattoos
piercings
aliases
}
fragment ScrapedSceneStudioData on ScrapedSceneStudio {
id
name
url
}
fragment ScrapedSceneTagData on ScrapedSceneTag {
id
name
}
fragment ScrapedSceneData on ScrapedScene {
title
details
url
date
file {
size
duration
video_codec
audio_codec
width
height
framerate
bitrate
}
studio {
...ScrapedSceneStudioData
}
tags {
...ScrapedSceneTagData
}
performers {
...ScrapedScenePerformerData
}
}

View File

@ -9,71 +9,43 @@ query ListPerformerScrapers {
}
}
# query ListSceneScrapers {
# listSceneScrapers {
# id
# name
# scene {
# urls
# supported_scrapes
# }
# }
# }
query ListSceneScrapers {
listSceneScrapers {
id
name
scene {
urls
supported_scrapes
}
}
}
query ScrapePerformerList($scraper_id: ID!, $query: String!) {
scrapePerformerList(scraper_id: $scraper_id, query: $query) {
name
url
birthdate
ethnicity
country
eye_color
height
measurements
fake_tits
career_length
tattoos
piercings
aliases
...ScrapedPerformerData
}
}
query ScrapePerformer($scraper_id: ID!, $scraped_performer: ScrapedPerformerInput!) {
scrapePerformer(scraper_id: $scraper_id, scraped_performer: $scraped_performer) {
name
url
twitter
instagram
birthdate
ethnicity
country
eye_color
height
measurements
fake_tits
career_length
tattoos
piercings
aliases
...ScrapedPerformerData
}
}
query ScrapePerformerURL($url: String!) {
scrapePerformerURL(url: $url) {
name
url
twitter
instagram
birthdate
ethnicity
country
eye_color
height
measurements
fake_tits
career_length
tattoos
piercings
aliases
...ScrapedPerformerData
}
}
query ScrapeScene($scraper_id: ID!, $scene: SceneUpdateInput!) {
scrapeScene(scraper_id: $scraper_id, scene: $scene) {
...ScrapedSceneData
}
}
query ScrapeSceneURL($url: String!) {
scrapeSceneURL(url: $url) {
...ScrapedSceneData
}
}

View File

@ -47,13 +47,17 @@ type Query {
"""List available scrapers"""
listPerformerScrapers: [Scraper!]!
#listSceneScrapers: [Scraper!]!
listSceneScrapers: [Scraper!]!
"""Scrape a list of performers based on name"""
scrapePerformerList(scraper_id: ID!, query: String!): [ScrapedPerformer!]!
"""Scrapes a complete performer record based on a scrapePerformerList result"""
scrapePerformer(scraper_id: ID!, scraped_performer: ScrapedPerformerInput!): ScrapedPerformer
"""Scrapes a complete performer record based on a URL"""
scrapePerformerURL(url: String!): ScrapedPerformer
"""Scrapes a complete scene record based on an existing scene"""
scrapeScene(scraper_id: ID!, scene: SceneUpdateInput!): ScrapedScene
"""Scrapes a complete performer record based on a URL"""
scrapeSceneURL(url: String!): ScrapedScene
"""Scrape a performer using Freeones"""
scrapeFreeones(performer_name: String!): ScrapedPerformer

View File

@ -1,7 +1,10 @@
enum ScrapeType {
NAME
FRAGMENT
URL
"""From text query"""
NAME
"""From existing object"""
FRAGMENT
"""From URL"""
URL
}
type ScraperSpec {
@ -15,7 +18,53 @@ type Scraper {
name: String!
"""Details for performer scraper"""
performer: ScraperSpec
# TODO
# """Details for scene scraper"""
# scene: ScraperSpec
}
"""Details for scene scraper"""
scene: ScraperSpec
}
type ScrapedScenePerformer {
"""Set if performer matched"""
id: ID
name: String!
url: String
twitter: String
instagram: String
birthdate: String
ethnicity: String
country: String
eye_color: String
height: String
measurements: String
fake_tits: String
career_length: String
tattoos: String
piercings: String
aliases: String
}
type ScrapedSceneStudio {
"""Set if studio matched"""
id: ID
name: String!
url: String
}
type ScrapedSceneTag {
"""Set if tag matched"""
id: ID
name: String!
}
type ScrapedScene {
title: String
details: String
url: String
date: String
file: SceneFileType # Resolver
studio: ScrapedSceneStudio
tags: [ScrapedSceneTag!]
performers: [ScrapedScenePerformer!]
}

View File

@ -36,6 +36,10 @@ func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Sc
return scraper.ListPerformerScrapers()
}
func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) {
return scraper.ListSceneScrapers()
}
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
if query == "" {
return nil, nil
@ -51,3 +55,11 @@ func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, s
func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
return scraper.ScrapePerformerURL(url)
}
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
return scraper.ScrapeScene(scraperID, scene)
}
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
return scraper.ScrapeSceneURL(url)
}

View File

@ -59,22 +59,13 @@ func (c *performerByFragmentConfig) resolveFn() {
}
}
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
type scraperByURLConfig struct {
type scrapeByURLConfig struct {
scraperTypeConfig `yaml:",inline"`
URL []string `yaml:"url,flow"`
performScrape scrapePerformerByURLFunc
}
func (c *scraperByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerURLScript
}
}
func (s scraperByURLConfig) matchesURL(url string) bool {
for _, thisURL := range s.URL {
func (c scrapeByURLConfig) matchesURL(url string) bool {
for _, thisURL := range c.URL {
if strings.Contains(url, thisURL) {
return true
}
@ -83,12 +74,53 @@ func (s scraperByURLConfig) matchesURL(url string) bool {
return false
}
type scrapePerformerByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error)
type scrapePerformerByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapePerformerByURLFunc
}
func (c *scrapePerformerByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapePerformerURLScript
}
}
type scrapeSceneFragmentFunc func(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error)
type sceneByFragmentConfig struct {
scraperTypeConfig `yaml:",inline"`
performScrape scrapeSceneFragmentFunc
}
func (c *sceneByFragmentConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneFragmentScript
}
}
type scrapeSceneByURLFunc func(c scraperTypeConfig, url string) (*models.ScrapedScene, error)
type scrapeSceneByURLConfig struct {
scrapeByURLConfig `yaml:",inline"`
performScrape scrapeSceneByURLFunc
}
func (c *scrapeSceneByURLConfig) resolveFn() {
if c.Action == scraperActionScript {
c.performScrape = scrapeSceneURLScript
}
}
type scraperConfig struct {
ID string
Name string `yaml:"name"`
PerformerByName *performerByNameConfig `yaml:"performerByName"`
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
PerformerByURL []*scraperByURLConfig `yaml:"performerByURL"`
Name string `yaml:"name"`
PerformerByName *performerByNameConfig `yaml:"performerByName"`
PerformerByFragment *performerByFragmentConfig `yaml:"performerByFragment"`
PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"`
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
}
func loadScraperFromYAML(path string) (*scraperConfig, error) {
@ -127,6 +159,13 @@ func (c *scraperConfig) initialiseConfigs() {
for _, s := range c.PerformerByURL {
s.resolveFn()
}
if c.SceneByFragment != nil {
c.SceneByFragment.resolveFn()
}
for _, s := range c.SceneByURL {
s.resolveFn()
}
}
func (c scraperConfig) toScraper() *models.Scraper {
@ -153,6 +192,21 @@ func (c scraperConfig) toScraper() *models.Scraper {
ret.Performer = &performer
}
scene := models.ScraperSpec{}
if c.SceneByFragment != nil {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
}
if len(c.SceneByURL) > 0 {
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
for _, v := range c.SceneByURL {
scene.Urls = append(scene.Urls, v.URL...)
}
}
if len(scene.SupportedScrapes) > 0 {
ret.Scene = &scene
}
return &ret
}
@ -202,3 +256,42 @@ func (c scraperConfig) ScrapePerformerURL(url string) (*models.ScrapedPerformer,
return nil, nil
}
func (c scraperConfig) supportsScenes() bool {
return c.SceneByFragment != nil || len(c.SceneByURL) > 0
}
func (c scraperConfig) matchesSceneURL(url string) bool {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) {
return true
}
}
return false
}
func (c scraperConfig) ScrapeScene(scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
if c.SceneByFragment != nil && c.SceneByFragment.performScrape != nil {
return c.SceneByFragment.performScrape(c.SceneByFragment.scraperTypeConfig, scene)
}
return nil, nil
}
func (c scraperConfig) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, scraper := range c.SceneByURL {
if scraper.matchesURL(url) && scraper.performScrape != nil {
ret, err := scraper.performScrape(scraper.scraperTypeConfig, url)
if err != nil {
return nil, err
}
if ret != nil {
return ret, nil
}
}
}
return nil, nil
}

View File

@ -30,10 +30,12 @@ func GetFreeonesScraper() scraperConfig {
PerformerByFragment: &performerByFragmentConfig{
performScrape: GetPerformer,
},
PerformerByURL: []*scraperByURLConfig{
&scraperByURLConfig{
PerformerByURL: []*scrapePerformerByURLConfig{
&scrapePerformerByURLConfig{
scrapeByURLConfig: scrapeByURLConfig{
URL: freeonesURLs,
},
performScrape: GetPerformerURL,
URL: freeonesURLs,
},
},
}

View File

@ -3,6 +3,7 @@ package scraper
import (
"errors"
"path/filepath"
"strconv"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
@ -61,7 +62,26 @@ func ListPerformerScrapers() ([]*models.Scraper, error) {
return ret, nil
}
func findPerformerScraper(scraperID string) *scraperConfig {
func ListSceneScrapers() ([]*models.Scraper, error) {
// read scraper config files from the directory and cache
scrapers, err := loadScrapers()
if err != nil {
return nil, err
}
var ret []*models.Scraper
for _, s := range scrapers {
// filter on type
if s.supportsScenes() {
ret = append(ret, s.toScraper())
}
}
return ret, nil
}
func findScraper(scraperID string) *scraperConfig {
// read scraper config files from the directory and cache
loadScrapers()
@ -76,7 +96,7 @@ func findPerformerScraper(scraperID string) *scraperConfig {
func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformerNames(query)
}
@ -86,7 +106,7 @@ func ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerfo
func ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
// find scraper with the provided id
s := findPerformerScraper(scraperID)
s := findScraper(scraperID)
if s != nil {
return s.ScrapePerformer(scrapedPerformer)
}
@ -103,3 +123,127 @@ func ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
return nil, nil
}
func matchPerformer(p *models.ScrapedScenePerformer) error {
qb := models.NewPerformerQueryBuilder()
performers, err := qb.FindByNames([]string{p.Name}, nil)
if err != nil {
return err
}
if len(performers) != 1 {
// ignore - cannot match
return nil
}
id := strconv.Itoa(performers[0].ID)
p.ID = &id
return nil
}
func matchStudio(s *models.ScrapedSceneStudio) error {
qb := models.NewStudioQueryBuilder()
studio, err := qb.FindByName(s.Name, nil)
if err != nil {
return err
}
if studio == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(studio.ID)
s.ID = &id
return nil
}
func matchTag(s *models.ScrapedSceneTag) error {
qb := models.NewTagQueryBuilder()
tag, err := qb.FindByName(s.Name, nil)
if err != nil {
return err
}
if tag == nil {
// ignore - cannot match
return nil
}
id := strconv.Itoa(tag.ID)
s.ID = &id
return nil
}
func postScrapeScene(ret *models.ScrapedScene) error {
for _, p := range ret.Performers {
err := matchPerformer(p)
if err != nil {
return err
}
}
for _, t := range ret.Tags {
err := matchTag(t)
if err != nil {
return err
}
}
if ret.Studio != nil {
err := matchStudio(ret.Studio)
if err != nil {
return err
}
}
return nil
}
func ScrapeScene(scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
// find scraper with the provided id
s := findScraper(scraperID)
if s != nil {
ret, err := s.ScrapeScene(scene)
if err != nil {
return nil, err
}
err = postScrapeScene(ret)
if err != nil {
return nil, err
}
return ret, nil
}
return nil, errors.New("Scraper with ID " + scraperID + " not found")
}
func ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
for _, s := range scrapers {
if s.matchesSceneURL(url) {
ret, err := s.ScrapeSceneURL(url)
if err != nil {
return nil, err
}
err = postScrapeScene(ret)
if err != nil {
return nil, err
}
return ret, nil
}
}
return nil, nil
}

View File

@ -106,3 +106,27 @@ func scrapePerformerURLScript(c scraperTypeConfig, url string) (*models.ScrapedP
return &ret, err
}
func scrapeSceneFragmentScript(c scraperTypeConfig, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
inString, err := json.Marshal(scene)
if err != nil {
return nil, err
}
var ret models.ScrapedScene
err = runScraperScript(c.Script, string(inString), &ret)
return &ret, err
}
func scrapeSceneURLScript(c scraperTypeConfig, url string) (*models.ScrapedScene, error) {
inString := `{"url": "` + url + `"}`
var ret models.ScrapedScene
err := runScraperScript(c.Script, string(inString), &ret)
return &ret, err
}

View File

@ -11,6 +11,9 @@ import {
Collapse,
Icon,
FileInput,
Menu,
Popover,
MenuItem,
} from "@blueprintjs/core";
import _ from "lodash";
import React, { FunctionComponent, useEffect, useState } from "react";
@ -42,6 +45,9 @@ export const SceneEditPanel: FunctionComponent<IProps> = (props: IProps) => {
const [tagIds, setTagIds] = useState<string[] | undefined>(undefined);
const [coverImage, setCoverImage] = useState<string | undefined>(undefined);
const Scrapers = StashService.useListSceneScrapers();
const [queryableScrapers, setQueryableScrapers] = useState<GQL.ListSceneScrapersListSceneScrapers[]>([]);
const [isDeleteAlertOpen, setIsDeleteAlertOpen] = useState<boolean>(false);
const [deleteFile, setDeleteFile] = useState<boolean>(false);
const [deleteGenerated, setDeleteGenerated] = useState<boolean>(true);
@ -55,6 +61,19 @@ export const SceneEditPanel: FunctionComponent<IProps> = (props: IProps) => {
const updateScene = StashService.useSceneUpdate(getSceneInput());
const deleteScene = StashService.useSceneDestroy(getSceneDeleteInput());
useEffect(() => {
var newQueryableScrapers : GQL.ListSceneScrapersListSceneScrapers[] = [];
if (!!Scrapers.data && Scrapers.data.listSceneScrapers) {
newQueryableScrapers = Scrapers.data.listSceneScrapers.filter((s) => {
return s.scene && s.scene.supported_scrapes.includes(GQL.ScrapeType.Fragment);
});
}
setQueryableScrapers(newQueryableScrapers);
}, [Scrapers.data])
function updateSceneEditState(state: Partial<GQL.SceneDataFragment>) {
const perfIds = !!state.performers ? state.performers.map((performer) => performer.id) : undefined;
const tIds = !!state.tags ? state.tags.map((tag) => tag.id) : undefined;
@ -186,6 +205,118 @@ export const SceneEditPanel: FunctionComponent<IProps> = (props: IProps) => {
function onCoverImageChange(event: React.FormEvent<HTMLInputElement>) {
ImageUtils.onImageChange(event, onImageLoad);
}
async function onScrapeClicked(scraper : GQL.ListSceneScrapersListSceneScrapers) {
setIsLoading(true);
try {
const result = await StashService.queryScrapeScene(scraper.id, getSceneInput());
if (!result.data || !result.data.scrapeScene) { return; }
updateSceneFromScrapedScene(result.data.scrapeScene);
} catch (e) {
ErrorUtils.handle(e);
} finally {
setIsLoading(false);
}
}
function renderScraperMenuItem(scraper : GQL.ListSceneScrapersListSceneScrapers) {
return (
<MenuItem
text={scraper.name}
onClick={() => { onScrapeClicked(scraper); }}
/>
);
}
function renderScraperMenu() {
if (!queryableScrapers || queryableScrapers.length == 0) {
return;
}
const scraperMenu = (
<Menu>
{queryableScrapers ? queryableScrapers.map((s) => renderScraperMenuItem(s)) : undefined}
</Menu>
);
return (
<Popover content={scraperMenu} position="bottom">
<Button text="Scrape with..."/>
</Popover>
);
}
function urlScrapable(url: string) : boolean {
return !!url && !!Scrapers.data && Scrapers.data.listSceneScrapers && Scrapers.data.listSceneScrapers.some((s) => {
return !!s.scene && !!s.scene.urls && s.scene.urls.some((u) => { return url.includes(u); });
});
}
function updateSceneFromScrapedScene(scene : GQL.ScrapedSceneDataFragment) {
if (!title && scene.title) {
setTitle(scene.title);
}
if (!details && scene.details) {
setDetails(scene.details);
}
if (!date && scene.date) {
setDate(scene.date);
}
if (!studioId && scene.studio && scene.studio.id) {
setStudioId(scene.studio.id);
}
if ((!performerIds || performerIds.length == 0) && scene.performers && scene.performers.length > 0) {
let idPerfs = scene.performers.filter((p) => {
return p.id !== undefined && p.id !== null;
});
if (idPerfs.length > 0) {
let newIds = idPerfs.map((p) => p.id);
setPerformerIds(newIds as string[]);
}
}
if ((!tagIds || tagIds.length == 0) && scene.tags && scene.tags.length > 0) {
let idTags = scene.tags.filter((p) => {
return p.id !== undefined && p.id !== null;
});
if (idTags.length > 0) {
let newIds = idTags.map((p) => p.id);
setTagIds(newIds as string[]);
}
}
}
async function onScrapeSceneURL() {
if (!url) { return; }
setIsLoading(true);
try {
const result = await StashService.queryScrapeSceneURL(url);
if (!result.data || !result.data.scrapeSceneURL) { return; }
updateSceneFromScrapedScene(result.data.scrapeSceneURL);
} catch (e) {
ErrorUtils.handle(e);
} finally {
setIsLoading(false);
}
}
function maybeRenderScrapeButton() {
if (!url || !urlScrapable(url)) {
return undefined;
}
return (
<Button
minimal={true}
icon="import"
id="scrape-url-button"
onClick={() => onScrapeSceneURL()}/>
)
}
return (
<>
@ -212,6 +343,7 @@ export const SceneEditPanel: FunctionComponent<IProps> = (props: IProps) => {
onChange={(newValue: any) => setUrl(newValue.target.value)}
value={url}
/>
{maybeRenderScrapeButton()}
</FormGroup>
<FormGroup label="Date" helperText="YYYY-MM-DD">
@ -267,6 +399,7 @@ export const SceneEditPanel: FunctionComponent<IProps> = (props: IProps) => {
</div>
<Button className="edit-button" text="Save" intent="primary" onClick={() => onSave()}/>
<Button className="edit-button" text="Delete" intent="danger" onClick={() => setIsDeleteAlertOpen(true)}/>
{renderScraperMenu()}
</>
);
};

View File

@ -221,6 +221,10 @@ export class StashService {
return GQL.useScrapePerformer({ variables: { scraper_id: scraperId, scraped_performer: scrapedPerformer }});
}
public static useListSceneScrapers() {
return GQL.useListSceneScrapers();
}
public static useScrapeFreeonesPerformers(q: string) { return GQL.useScrapeFreeonesPerformers({ variables: { q } }); }
public static useMarkerStrings() { return GQL.useMarkerStrings(); }
public static useAllTags() { return GQL.useAllTags(); }
@ -425,6 +429,25 @@ export class StashService {
});
}
public static queryScrapeSceneURL(url: string) {
return StashService.client.query<GQL.ScrapeSceneUrlQuery>({
query: GQL.ScrapeSceneUrlDocument,
variables: {
url: url,
},
});
}
public static queryScrapeScene(scraperId: string, scene: GQL.SceneUpdateInput) {
return StashService.client.query<GQL.ScrapeSceneQuery>({
query: GQL.ScrapeSceneDocument,
variables: {
scraper_id: scraperId,
scene: scene,
},
});
}
public static queryMetadataScan(input: GQL.ScanMetadataInput) {
return StashService.client.query<GQL.MetadataScanQuery>({
query: GQL.MetadataScanDocument,