// Package scraper provides interfaces to interact with the scraper subsystem. // The [Cache] type is the main entry point to the scraper subsystem. package scraper import ( "context" "errors" "fmt" "io" "net/http" "strconv" "github.com/stashapp/stash/pkg/models" ) type Source struct { // Index of the configured stash-box instance to use. Should be unset if scraper_id is set StashBoxIndex *int `json:"stash_box_index"` // Stash-box endpoint StashBoxEndpoint *string `json:"stash_box_endpoint"` // Scraper ID to scrape with. Should be unset if stash_box_index is set ScraperID *string `json:"scraper_id"` } // Scraped Content is the forming union over the different scrapers type ScrapedContent interface { IsScrapedContent() } // Type of the content a scraper generates type ScrapeContentType string const ( ScrapeContentTypeGallery ScrapeContentType = "GALLERY" ScrapeContentTypeMovie ScrapeContentType = "MOVIE" ScrapeContentTypeGroup ScrapeContentType = "GROUP" ScrapeContentTypePerformer ScrapeContentType = "PERFORMER" ScrapeContentTypeScene ScrapeContentType = "SCENE" ) var AllScrapeContentType = []ScrapeContentType{ ScrapeContentTypeGallery, ScrapeContentTypeMovie, ScrapeContentTypeGroup, ScrapeContentTypePerformer, ScrapeContentTypeScene, } func (e ScrapeContentType) IsValid() bool { switch e { case ScrapeContentTypeGallery, ScrapeContentTypeMovie, ScrapeContentTypeGroup, ScrapeContentTypePerformer, ScrapeContentTypeScene: return true } return false } func (e ScrapeContentType) String() string { return string(e) } func (e *ScrapeContentType) UnmarshalGQL(v interface{}) error { str, ok := v.(string) if !ok { return fmt.Errorf("enums must be strings") } *e = ScrapeContentType(str) if !e.IsValid() { return fmt.Errorf("%s is not a valid ScrapeContentType", str) } return nil } func (e ScrapeContentType) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } type Scraper struct { ID string `json:"id"` Name string `json:"name"` // Details for performer scraper Performer *ScraperSpec `json:"performer"` // Details for scene scraper Scene *ScraperSpec `json:"scene"` // Details for gallery scraper Gallery *ScraperSpec `json:"gallery"` // Details for movie scraper Group *ScraperSpec `json:"group"` // Details for movie scraper Movie *ScraperSpec `json:"movie"` } type ScraperSpec struct { // URLs matching these can be scraped with Urls []string `json:"urls"` SupportedScrapes []ScrapeType `json:"supported_scrapes"` } type ScrapeType string const ( // From text query ScrapeTypeName ScrapeType = "NAME" // From existing object ScrapeTypeFragment ScrapeType = "FRAGMENT" // From URL ScrapeTypeURL ScrapeType = "URL" ) var AllScrapeType = []ScrapeType{ ScrapeTypeName, ScrapeTypeFragment, ScrapeTypeURL, } func (e ScrapeType) IsValid() bool { switch e { case ScrapeTypeName, ScrapeTypeFragment, ScrapeTypeURL: return true } return false } func (e ScrapeType) String() string { return string(e) } func (e *ScrapeType) UnmarshalGQL(v interface{}) error { str, ok := v.(string) if !ok { return fmt.Errorf("enums must be strings") } *e = ScrapeType(str) if !e.IsValid() { return fmt.Errorf("%s is not a valid ScrapeType", str) } return nil } func (e ScrapeType) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } var ( // ErrMaxRedirects is returned if the max number of HTTP redirects are reached. ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached") // ErrNotFound is returned when an entity isn't found ErrNotFound = errors.New("scraper not found") // ErrNotSupported is returned when a given invocation isn't supported, and there // is a guard function which should be able to guard against it. ErrNotSupported = errors.New("scraper operation not supported") ) // Input coalesces inputs of different types into a single structure. // The system expects one of these to be set, and the remaining to be // set to nil. type Input struct { Performer *ScrapedPerformerInput Scene *ScrapedSceneInput Gallery *ScrapedGalleryInput } // populateURL populates the URL field of the input based on the // URLs field of the input. Does nothing if the URL field is already set. func (i *Input) populateURL() { if i.Scene != nil && i.Scene.URL == nil && len(i.Scene.URLs) > 0 { i.Scene.URL = &i.Scene.URLs[0] } if i.Gallery != nil && i.Gallery.URL == nil && len(i.Gallery.URLs) > 0 { i.Gallery.URL = &i.Gallery.URLs[0] } if i.Performer != nil && i.Performer.URL == nil && len(i.Performer.URLs) > 0 { i.Performer.URL = &i.Performer.URLs[0] } } // simple type definitions that can help customize // actions per query type QueryType int const ( // for now only SearchQuery is needed SearchQuery QueryType = iota + 1 ) // scraper is the generic interface to the scraper subsystems type scraper interface { // spec returns the scraper specification, suitable for graphql spec() Scraper // supports tests if the scraper supports a given content type supports(ScrapeContentType) bool // supportsURL tests if the scraper supports scrapes of a given url, producing a given content type supportsURL(url string, ty ScrapeContentType) bool } // urlScraper is the interface of scrapers supporting url loads type urlScraper interface { scraper viaURL(ctx context.Context, client *http.Client, url string, ty ScrapeContentType) (ScrapedContent, error) } // nameScraper is the interface of scrapers supporting name loads type nameScraper interface { scraper viaName(ctx context.Context, client *http.Client, name string, ty ScrapeContentType) ([]ScrapedContent, error) } // fragmentScraper is the interface of scrapers supporting fragment loads type fragmentScraper interface { scraper viaFragment(ctx context.Context, client *http.Client, input Input) (ScrapedContent, error) } // sceneScraper is a scraper which supports scene scrapes with // scene data as the input. type sceneScraper interface { scraper viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*ScrapedScene, error) } // galleryScraper is a scraper which supports gallery scrapes with // gallery data as the input. type galleryScraper interface { scraper viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*ScrapedGallery, error) }