mirror of https://github.com/stashapp/stash.git
Refactor scraper top half (#1893)
* Simplify scraper listing Introduce an enum, scraper.Kind, which explains what we are looking for. Make it possible to match this from a scraper struct. Use the enum to rewrite all the listing code to use the same code path. * Use a map, nitpick ScrapePerformerList Let the cache store a map from ID of a scraper to the scraper. This improves lookups when there are many scrapers, making it practically O(1) rather than O(n). If many scrapers are stored, this is faster. Since range expressions work unchanged, we don't have to change much, and things will still work. make Kind a Stringer Rename ScraperPerformerList -> ScraperPerformerQuery since that name is used in the other scrapers, and we value consistency. Tune ScraperPerformerQuery: * Return static errors * Use the new functionality * When loading scrapers, do so directly Rather than first walking the directory structure to obtain file paths, fold the load directly in the the filepath walk. This makes the code for more direct. * Use static ErrNotFound If a scraper isn't found, return one static error. This paves the way for eventually doing our own error-presenter in gqlgen. * Store the cache in the Resolver state Putting the scraperCache directly in the resolver avoids the need to call manager.GetInstance() all over the place to get access to the scraper cache. The cache is stored by pointer, so it should be safe, since the cache will just update its internal state rather than being overwritten. We can now utilize the resolver state to grab the cache where needed. While here, pass context.Context from the resolver down into a function, which removes a context.TODO() * Introduce ScrapedContent Create a union in the GraphQL schema for all scraped content. This simplifies the internal implementation because we get variance on the output content type. Introduce a new type ScrapedContentType which signifies the scraped content you want as a caller. Use these to generalize the List interface and the URL scraping interface. * Simplify the scraper API Introduce a new interface for scraping. This interface is then used in the upper half of the scraper code, to make the code use one code flow rather than multiple code flows. Variance is currently at the old scraper structure. Add extending interfaces for the different ways of invoking scrapes. Use interface conversions to convert a scraper from the cache to a scraper supporting the extra methods. The return path returns models.ScrapedContent. Write a general postProcess function in the scraper, handling all ScrapedContent via type switching. This consolidates all postprocessing code flows. Introduce marhsallers in the resolver code for converting ScrapedContent into the underlying concrete types. Use this to plug the existing fields in the Query resolver, so everything still works. * ScrapedContent: add more marshalling functions Handle all marshalling of ScrapedContent through marhsalling functions. Removes some hand-rolled early variants of it, and replaces it with a canonical code flow. * Support loadByName via scraper_s In order to temporarily plug a hole in the current implementation, we use the older implementation as a hook to get the newer implementation to run. Later on, this can serve as a guide for how to implement the lower level bits inside the scrapers themselves. For now, it just enables support. * Plug the remaining scraper functions for now Since we would like to have a scraper which works in between refactors, plug the lower level parts of the scraper for now. It avoids us having to tackle this part just yet. * Move postprocessing to its own file There's enough postprocessing to clutter the main scrapers.go file. Move all of this into a new file, postprocessing to make the API simpler. It now lives in scrapers.go. * Scraper: Invoke API consistency scraper.Cache.ScrapeByName -> ScrapeName * Fix scraping scenes by URL Simple typo. While here, also make a single marshaller nil-aware. * Introduce scraper groups, consolidate loadByURL Rename `scraper_s` into `group`. A group is a group of scrapers with the same identity. This corresponds to a single YAML file for a scraper configuration. It defines a group which supports different types of scraping contexts. Move config into the group, and lift txnManager and globalConfig to the group. Because we now return models.ScrapedContent we can use interfaces to get variance from the different underlying scrapers. Use a type switch for the URL matcher candidates. And then again for the scrapers. This consolidates all URL scraping paths into one. While here, remove the urlMatcher interface which isn't needed. Also clean up the remaining interfaces for url scraping and delete code which has no purpose anymore. * Consolidate fragment scraping in one code path While here, abide the linters checks. * Refactor loadByFragment Give it the same treatment as loadByURL: Step 1: find a scraperActionImpl which works for the data. Step 2: use that to scrape Most of this is simple analysis on the data at hand. It can be pushed down further in a later commit, but for now we leave it here. * Remove configScraper, autotag is a scraper Remove the remains of the configScraper struct. It now lives on in the group struct. Kill the remaining interfaces from the old implementation while here. Remove group.specification since it can now be handled by a simple func call to spec(). Work through the autotag scraper. It now implements the scraper interface, so it can be used as a scraper. This also simplifies the autotag scraper quite a bit since it doens't have to implement a number of unsupported func calls. * Simplify the fragment scraper flow * Pass the context Eliminate a round of context.TODO() in the scraper code by passing the calling context down into the subsystem. This will gracefully allow for termination of remote calls if the client goes away for some reason in GraphQL requests. * Improve listScrapers in the schema Support lists of types we accept. * Be graceful on nil values in conversion Supporting nil-values make the API more robust in the case of partial results in a multi-scrape situation. * Improve listScrapers: output at-most-once Use the ID of a scraper to reduce the output set. If a scraper has been included, don't include it again. * Consolidate all API level errors into resolver.go * Reorder files and functions: scrapers.go -> cache.go: It almost contains nothing but the cache code. Move errors into scraper.go from here because It is a better place to have them living right now group.go: All of the group structure. This can now go from scraper.go, making it more lean. Move group create from config_scraper to here. config.go: Move the `(c config) spec()` call to here. config_scraper.go: Empty file by now * Name-update the scraper interfaces Use 'via' rather than 'loadBy'. The scrape happens via a given scrape method, so I think this is a nice name for it. * Rename scrapers for consistency. While here, improve the error formatting, so different errors come back differently. * Nuke the freeones field from the GraphQL schema * Fix autotag interfacing, refactor The autotag scraper uses a pointer receiver, but the rest of the code we use for scraping doesn't expect a pointer-receiver. Hence, to fix the autotag scraper, we change it to be a value receiver, like the rest of the code. Fix: viaScene, and viaGallery. While here, remove a couple of pointer-receiver methods which can be trivially rewritten into plain functions. * Protect against pointer interfaces The underlying code can be a bit inconsistent in what it returns. Introduce pointer-types in the postprocessing layer and handle them accordingly for now. Once a better understanding of the lower levels are understood, we can lift this. * Move ErrConversion into the models package. The conversion error pertains to the logic of converting models. Because of this, it should move there, so it is centralized. * Be consistent in scraper resolver error handling If we have a static error Err = errors.New(..) Then use it wrapped at the start: fmt.Errorf("%w: ...context...", Err) This reads better. While here, avoid using the underlying Atoi errors: they are verbose, and like 99% of the time, the user know what is wrong from the input string, so just give that back. Also, remove the scraper id from the error contexts: it is implicit, and the error wouldn't change if we used a different scraper, which the error message would imply. * Mark the list*Scrapers() API as deprecated The same functionality is now present in listScrapers. * Improve error formatting Think about how each error is going to be used and tweak them to be nicer. * Return a sorted list of scrapers This helps testing, it's closer to what we had, caches like stable data, and it is easier for humans. It also makes the output stable, because map iteration is randomized. * Fix listScrapers calls to return in ID-order Since we need the ordering to be by ID in all situations, it is easier to just generalize the cache listScrapers call to support multiple scraper types. This avoids a de-dupe map up the chain, since every scraper is only considered once. Sorting now happens in the cache listScrapers call. Use this generalized function in all resolvers, which are now simple passthroughs. * Remove UpdateConfig from the scraper cache. This isn't needed, so get rid of it. * Pull a context into identify Scraping scenes in the identify tasks now use a context from up the call chain. * Do not store the scraper cache in the resolver. Scraper caches are updated through manager.singleton•RefreshScraperCache, so we can't keep a pointer to it in the resolver. Instead, solve this by adding a fetcher method to the resolver type. This keeps it local to the resolver, while handling the problem of updating caches in the configuration.
This commit is contained in:
parent
8d94392cfb
commit
c1f89611e2
|
@ -1,27 +1,3 @@
|
|||
query ScrapeFreeones($performer_name: String!) {
|
||||
scrapeFreeones(performer_name: $performer_name) {
|
||||
name
|
||||
url
|
||||
twitter
|
||||
instagram
|
||||
birthdate
|
||||
ethnicity
|
||||
country
|
||||
eye_color
|
||||
height
|
||||
measurements
|
||||
fake_tits
|
||||
career_length
|
||||
tattoos
|
||||
piercings
|
||||
aliases
|
||||
details
|
||||
death_date
|
||||
hair_color
|
||||
weight
|
||||
}
|
||||
}
|
||||
|
||||
query ScrapeFreeonesPerformers($q: String!) {
|
||||
scrapeFreeonesPerformerList(query: $q)
|
||||
}
|
|
@ -67,10 +67,12 @@ type Query {
|
|||
# Scrapers
|
||||
|
||||
"""List available scrapers"""
|
||||
listPerformerScrapers: [Scraper!]!
|
||||
listSceneScrapers: [Scraper!]!
|
||||
listGalleryScrapers: [Scraper!]!
|
||||
listMovieScrapers: [Scraper!]!
|
||||
listScrapers(types: [ScrapeContentType!]!): [Scraper!]!
|
||||
listPerformerScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [PERFORMER])")
|
||||
listSceneScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [SCENE])")
|
||||
listGalleryScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [GALLERY])")
|
||||
listMovieScrapers: [Scraper!]! @deprecated(reason: "Use listScrapers(types: [MOVIE])")
|
||||
|
||||
|
||||
"""Scrape for a single scene"""
|
||||
scrapeSingleScene(source: ScraperSourceInput!, input: ScrapeSingleSceneInput!): [ScrapedScene!]!
|
||||
|
@ -88,6 +90,9 @@ type Query {
|
|||
"""Scrape for a single movie"""
|
||||
scrapeSingleMovie(source: ScraperSourceInput!, input: ScrapeSingleMovieInput!): [ScrapedMovie!]!
|
||||
|
||||
"Scrapes content based on a URL"
|
||||
scrapeURL(url: String!, ty: ScrapeContentType!): ScrapedContent
|
||||
|
||||
"""Scrapes a complete performer record based on a URL"""
|
||||
scrapePerformerURL(url: String!): ScrapedPerformer
|
||||
"""Scrapes a complete performer record based on a URL"""
|
||||
|
@ -106,8 +111,6 @@ type Query {
|
|||
"""Scrapes a complete gallery record based on an existing gallery"""
|
||||
scrapeGallery(scraper_id: ID!, gallery: GalleryUpdateInput!): ScrapedGallery @deprecated(reason: "use scrapeSingleGallery")
|
||||
|
||||
"""Scrape a performer using Freeones"""
|
||||
scrapeFreeones(performer_name: String!): ScrapedPerformer @deprecated(reason: "use scrapeSinglePerformer with scraper_id = builtin_freeones")
|
||||
"""Scrape a list of performers from a query"""
|
||||
scrapeFreeonesPerformerList(query: String!): [String!]! @deprecated(reason: "use scrapeSinglePerformer with scraper_id = builtin_freeones")
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
enum ScrapeType {
|
||||
"""From text query"""
|
||||
"""From text query"""
|
||||
NAME
|
||||
"""From existing object"""
|
||||
FRAGMENT
|
||||
|
@ -7,6 +7,22 @@ enum ScrapeType {
|
|||
URL
|
||||
}
|
||||
|
||||
"Type of the content a scraper generates"
|
||||
enum ScrapeContentType {
|
||||
GALLERY
|
||||
MOVIE
|
||||
PERFORMER
|
||||
SCENE
|
||||
}
|
||||
|
||||
"Scraped Content is the forming union over the different scrapers"
|
||||
union ScrapedContent = ScrapedStudio
|
||||
| ScrapedTag
|
||||
| ScrapedScene
|
||||
| ScrapedGallery
|
||||
| ScrapedMovie
|
||||
| ScrapedPerformer
|
||||
|
||||
type ScraperSpec {
|
||||
"""URLs matching these can be scraped with"""
|
||||
urls: [String!]
|
||||
|
@ -26,6 +42,7 @@ type Scraper {
|
|||
movie: ScraperSpec
|
||||
}
|
||||
|
||||
|
||||
type ScrapedStudio {
|
||||
"""Set if studio matched"""
|
||||
stored_id: ID
|
||||
|
|
|
@ -7,13 +7,22 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/plugin"
|
||||
"github.com/stashapp/stash/pkg/scraper"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotImplemented is an error which means the given functionality isn't implemented by the API.
|
||||
ErrNotImplemented = errors.New("not implemented")
|
||||
ErrNotSupported = errors.New("not supported")
|
||||
|
||||
// ErrNotSupported is returned whenever there's a test, which can be used to guard against the error,
|
||||
// but the given parameters aren't supported by the system.
|
||||
ErrNotSupported = errors.New("not supported")
|
||||
|
||||
// ErrInput signifies errors where the input isn't valid for some reason. And no more specific error exists.
|
||||
ErrInput = errors.New("input error")
|
||||
)
|
||||
|
||||
type hookExecutor interface {
|
||||
|
@ -25,6 +34,10 @@ type Resolver struct {
|
|||
hookExecutor hookExecutor
|
||||
}
|
||||
|
||||
func (r *Resolver) scraperCache() *scraper.Cache {
|
||||
return manager.GetInstance().ScraperCache
|
||||
}
|
||||
|
||||
func (r *Resolver) Gallery() models.GalleryResolver {
|
||||
return &galleryResolver{r}
|
||||
}
|
||||
|
|
|
@ -6,53 +6,57 @@ import (
|
|||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/stashapp/stash/pkg/manager"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/scraper"
|
||||
"github.com/stashapp/stash/pkg/scraper/stashbox"
|
||||
)
|
||||
|
||||
// deprecated
|
||||
func (r *queryResolver) ScrapeFreeones(ctx context.Context, performer_name string) (*models.ScrapedPerformer, error) {
|
||||
scrapedPerformer := models.ScrapedPerformerInput{
|
||||
Name: &performer_name,
|
||||
}
|
||||
return manager.GetInstance().ScraperCache.ScrapePerformer(scraper.FreeonesScraperID, scrapedPerformer)
|
||||
func (r *queryResolver) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
return r.scraperCache().ScrapeURL(ctx, url, ty)
|
||||
}
|
||||
|
||||
// deprecated
|
||||
func (r *queryResolver) ScrapeFreeonesPerformerList(ctx context.Context, query string) ([]string, error) {
|
||||
scrapedPerformers, err := manager.GetInstance().ScraperCache.ScrapePerformerList(scraper.FreeonesScraperID, query)
|
||||
content, err := r.scraperCache().ScrapeName(ctx, scraper.FreeonesScraperID, query, models.ScrapeContentTypePerformer)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
performers, err := marshalScrapedPerformers(content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var ret []string
|
||||
for _, v := range scrapedPerformers {
|
||||
if v.Name != nil {
|
||||
ret = append(ret, *v.Name)
|
||||
for _, p := range performers {
|
||||
if p.Name != nil {
|
||||
ret = append(ret, *p.Name)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListScrapers(ctx context.Context, types []models.ScrapeContentType) ([]*models.Scraper, error) {
|
||||
return r.scraperCache().ListScrapers(types), nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListPerformerScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return manager.GetInstance().ScraperCache.ListPerformerScrapers(), nil
|
||||
return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypePerformer}), nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListSceneScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return manager.GetInstance().ScraperCache.ListSceneScrapers(), nil
|
||||
return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeScene}), nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListGalleryScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return manager.GetInstance().ScraperCache.ListGalleryScrapers(), nil
|
||||
return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeGallery}), nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ListMovieScrapers(ctx context.Context) ([]*models.Scraper, error) {
|
||||
return manager.GetInstance().ScraperCache.ListMovieScrapers(), nil
|
||||
return r.scraperCache().ListScrapers([]models.ScrapeContentType{models.ScrapeContentTypeMovie}), nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
|
@ -60,15 +64,29 @@ func (r *queryResolver) ScrapePerformerList(ctx context.Context, scraperID strin
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
return manager.GetInstance().ScraperCache.ScrapePerformerList(scraperID, query)
|
||||
content, err := r.scraperCache().ScrapeName(ctx, scraperID, query, models.ScrapeContentTypePerformer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedPerformers(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformer(ctx context.Context, scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
return manager.GetInstance().ScraperCache.ScrapePerformer(scraperID, scrapedPerformer)
|
||||
content, err := r.scraperCache().ScrapeFragment(ctx, scraperID, scraper.Input{Performer: &scrapedPerformer})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return marshalScrapedPerformer(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapePerformerURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
return manager.GetInstance().ScraperCache.ScrapePerformerURL(url)
|
||||
content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypePerformer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedPerformer(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string, query string) ([]*models.ScrapedScene, error) {
|
||||
|
@ -76,44 +94,74 @@ func (r *queryResolver) ScrapeSceneQuery(ctx context.Context, scraperID string,
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
return manager.GetInstance().ScraperCache.ScrapeSceneQuery(scraperID, query)
|
||||
content, err := r.scraperCache().ScrapeName(ctx, scraperID, query, models.ScrapeContentTypeScene)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedScenes(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeScene(ctx context.Context, scraperID string, scene models.SceneUpdateInput) (*models.ScrapedScene, error) {
|
||||
id, err := strconv.Atoi(scene.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: scene.ID is not an integer: '%s'", ErrInput, scene.ID)
|
||||
}
|
||||
|
||||
content, err := r.scraperCache().ScrapeID(ctx, scraperID, id, models.ScrapeContentTypeScene)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return manager.GetInstance().ScraperCache.ScrapeScene(scraperID, id)
|
||||
return marshalScrapedScene(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeSceneURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
return manager.GetInstance().ScraperCache.ScrapeSceneURL(url)
|
||||
content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeScene)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedScene(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeGallery(ctx context.Context, scraperID string, gallery models.GalleryUpdateInput) (*models.ScrapedGallery, error) {
|
||||
id, err := strconv.Atoi(gallery.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: gallery id is not an integer: '%s'", ErrInput, gallery.ID)
|
||||
}
|
||||
|
||||
content, err := r.scraperCache().ScrapeID(ctx, scraperID, id, models.ScrapeContentTypeGallery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return manager.GetInstance().ScraperCache.ScrapeGallery(scraperID, id)
|
||||
return marshalScrapedGallery(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
|
||||
return manager.GetInstance().ScraperCache.ScrapeGalleryURL(url)
|
||||
content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeGallery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedGallery(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
|
||||
return manager.GetInstance().ScraperCache.ScrapeMovieURL(url)
|
||||
content, err := r.scraperCache().ScrapeURL(ctx, url, models.ScrapeContentTypeMovie)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedMovie(content)
|
||||
}
|
||||
|
||||
func (r *queryResolver) QueryStashBoxScene(ctx context.Context, input models.StashBoxSceneQueryInput) ([]*models.ScrapedScene, error) {
|
||||
boxes := config.GetInstance().GetStashBoxes()
|
||||
|
||||
if input.StashBoxIndex < 0 || input.StashBoxIndex >= len(boxes) {
|
||||
return nil, fmt.Errorf("invalid stash_box_index %d", input.StashBoxIndex)
|
||||
return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, input.StashBoxIndex)
|
||||
}
|
||||
|
||||
client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager)
|
||||
|
@ -133,7 +181,7 @@ func (r *queryResolver) QueryStashBoxPerformer(ctx context.Context, input models
|
|||
boxes := config.GetInstance().GetStashBoxes()
|
||||
|
||||
if input.StashBoxIndex < 0 || input.StashBoxIndex >= len(boxes) {
|
||||
return nil, fmt.Errorf("invalid stash_box_index %d", input.StashBoxIndex)
|
||||
return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, input.StashBoxIndex)
|
||||
}
|
||||
|
||||
client := stashbox.NewClient(*boxes[input.StashBoxIndex], r.txnManager)
|
||||
|
@ -153,7 +201,7 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) {
|
|||
boxes := config.GetInstance().GetStashBoxes()
|
||||
|
||||
if index < 0 || index >= len(boxes) {
|
||||
return nil, fmt.Errorf("invalid stash_box_index %d", index)
|
||||
return nil, fmt.Errorf("%w: invalid stash_box_index %d", ErrInput, index)
|
||||
}
|
||||
|
||||
return stashbox.NewClient(*boxes[index], r.txnManager), nil
|
||||
|
@ -161,7 +209,8 @@ func (r *queryResolver) getStashBoxClient(index int) (*stashbox.Client, error) {
|
|||
|
||||
func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleSceneInput) ([]*models.ScrapedScene, error) {
|
||||
if source.ScraperID != nil {
|
||||
var singleScene *models.ScrapedScene
|
||||
var c models.ScrapedContent
|
||||
var content []models.ScrapedContent
|
||||
var err error
|
||||
|
||||
switch {
|
||||
|
@ -169,26 +218,24 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
|
|||
var sceneID int
|
||||
sceneID, err = strconv.Atoi(*input.SceneID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("%w: sceneID is not an integer: '%s'", ErrInput, *input.SceneID)
|
||||
}
|
||||
singleScene, err = manager.GetInstance().ScraperCache.ScrapeScene(*source.ScraperID, sceneID)
|
||||
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, sceneID, models.ScrapeContentTypeScene)
|
||||
content = []models.ScrapedContent{c}
|
||||
case input.SceneInput != nil:
|
||||
singleScene, err = manager.GetInstance().ScraperCache.ScrapeSceneFragment(*source.ScraperID, *input.SceneInput)
|
||||
c, err = r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Scene: input.SceneInput})
|
||||
content = []models.ScrapedContent{c}
|
||||
case input.Query != nil:
|
||||
return manager.GetInstance().ScraperCache.ScrapeSceneQuery(*source.ScraperID, *input.Query)
|
||||
content, err = r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypeScene)
|
||||
default:
|
||||
err = errors.New("scene_id, scene_input or query must be set")
|
||||
err = fmt.Errorf("%w: scene_id, scene_input, or query must be set", ErrInput)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if singleScene != nil {
|
||||
return []*models.ScrapedScene{singleScene}, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
return marshalScrapedScenes(content)
|
||||
} else if source.StashBoxIndex != nil {
|
||||
client, err := r.getStashBoxClient(*source.StashBoxIndex)
|
||||
if err != nil {
|
||||
|
@ -201,10 +248,10 @@ func (r *queryResolver) ScrapeSingleScene(ctx context.Context, source models.Scr
|
|||
return client.QueryStashBoxScene(ctx, *input.Query)
|
||||
}
|
||||
|
||||
return nil, errors.New("scene_id or query must be set")
|
||||
return nil, fmt.Errorf("%w: scene_id or query must be set", ErrInput)
|
||||
}
|
||||
|
||||
return nil, errors.New("scraper_id or stash_box_index must be set")
|
||||
return nil, fmt.Errorf("%w: scraper_id or stash_box_index must be set", ErrInput)
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeMultiScenesInput) ([][]*models.ScrapedScene, error) {
|
||||
|
@ -225,20 +272,21 @@ func (r *queryResolver) ScrapeMultiScenes(ctx context.Context, source models.Scr
|
|||
func (r *queryResolver) ScrapeSinglePerformer(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSinglePerformerInput) ([]*models.ScrapedPerformer, error) {
|
||||
if source.ScraperID != nil {
|
||||
if input.PerformerInput != nil {
|
||||
singlePerformer, err := manager.GetInstance().ScraperCache.ScrapePerformer(*source.ScraperID, *input.PerformerInput)
|
||||
performer, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Performer: input.PerformerInput})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if singlePerformer != nil {
|
||||
return []*models.ScrapedPerformer{singlePerformer}, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
return marshalScrapedPerformers([]models.ScrapedContent{performer})
|
||||
}
|
||||
|
||||
if input.Query != nil {
|
||||
return manager.GetInstance().ScraperCache.ScrapePerformerList(*source.ScraperID, *input.Query)
|
||||
content, err := r.scraperCache().ScrapeName(ctx, *source.ScraperID, *input.Query, models.ScrapeContentTypePerformer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return marshalScrapedPerformers(content)
|
||||
}
|
||||
|
||||
return nil, ErrNotImplemented
|
||||
|
@ -288,38 +336,36 @@ func (r *queryResolver) ScrapeMultiPerformers(ctx context.Context, source models
|
|||
}
|
||||
|
||||
func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleGalleryInput) ([]*models.ScrapedGallery, error) {
|
||||
if source.ScraperID != nil {
|
||||
var singleGallery *models.ScrapedGallery
|
||||
var err error
|
||||
|
||||
switch {
|
||||
case input.GalleryID != nil:
|
||||
var galleryID int
|
||||
galleryID, err = strconv.Atoi(*input.GalleryID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
singleGallery, err = manager.GetInstance().ScraperCache.ScrapeGallery(*source.ScraperID, galleryID)
|
||||
case input.GalleryInput != nil:
|
||||
singleGallery, err = manager.GetInstance().ScraperCache.ScrapeGalleryFragment(*source.ScraperID, *input.GalleryInput)
|
||||
default:
|
||||
return nil, ErrNotImplemented
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if singleGallery != nil {
|
||||
return []*models.ScrapedGallery{singleGallery}, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
} else if source.StashBoxIndex != nil {
|
||||
if source.StashBoxIndex != nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
return nil, errors.New("scraper_id must be set")
|
||||
if source.ScraperID == nil {
|
||||
return nil, fmt.Errorf("%w: scraper_id must be set", ErrInput)
|
||||
}
|
||||
|
||||
var c models.ScrapedContent
|
||||
|
||||
switch {
|
||||
case input.GalleryID != nil:
|
||||
galleryID, err := strconv.Atoi(*input.GalleryID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: gallery id is not an integer: '%s'", ErrInput, *input.GalleryID)
|
||||
}
|
||||
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, galleryID, models.ScrapeContentTypeGallery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return marshalScrapedGalleries([]models.ScrapedContent{c})
|
||||
case input.GalleryInput != nil:
|
||||
c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Gallery: input.GalleryInput})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return marshalScrapedGalleries([]models.ScrapedContent{c})
|
||||
default:
|
||||
return nil, ErrNotImplemented
|
||||
}
|
||||
}
|
||||
|
||||
func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source models.ScraperSourceInput, input models.ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
// marshalScrapedScenes converts ScrapedContent into ScrapedScene. If conversion fails, an
|
||||
// error is returned to the caller.
|
||||
func marshalScrapedScenes(content []models.ScrapedContent) ([]*models.ScrapedScene, error) {
|
||||
var ret []*models.ScrapedScene
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
continue
|
||||
}
|
||||
|
||||
if s, ok := c.(*models.ScrapedScene); ok {
|
||||
ret = append(ret, s)
|
||||
} else {
|
||||
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedScene", models.ErrConversion)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// marshalScrapedPerformers converts ScrapedContent into ScrapedPerformer. If conversion
|
||||
// fails, an error is returned to the caller.
|
||||
func marshalScrapedPerformers(content []models.ScrapedContent) ([]*models.ScrapedPerformer, error) {
|
||||
var ret []*models.ScrapedPerformer
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
continue
|
||||
}
|
||||
|
||||
if p, ok := c.(*models.ScrapedPerformer); ok {
|
||||
ret = append(ret, p)
|
||||
} else {
|
||||
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedPerformer", models.ErrConversion)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// marshalScrapedGalleries converts ScrapedContent into ScrapedGallery. If
|
||||
// conversion fails, an error is returned.
|
||||
func marshalScrapedGalleries(content []models.ScrapedContent) ([]*models.ScrapedGallery, error) {
|
||||
var ret []*models.ScrapedGallery
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
continue
|
||||
}
|
||||
|
||||
if g, ok := c.(*models.ScrapedGallery); ok {
|
||||
ret = append(ret, g)
|
||||
} else {
|
||||
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedGallery", models.ErrConversion)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// marshalScrapedMovies converts ScrapedContent into ScrapedMovie. If conversion
|
||||
// fails, an error is returned.
|
||||
func marshalScrapedMovies(content []models.ScrapedContent) ([]*models.ScrapedMovie, error) {
|
||||
var ret []*models.ScrapedMovie
|
||||
for _, c := range content {
|
||||
if c == nil {
|
||||
ret = append(ret, nil)
|
||||
continue
|
||||
}
|
||||
|
||||
if m, ok := c.(*models.ScrapedMovie); ok {
|
||||
ret = append(ret, m)
|
||||
} else {
|
||||
return nil, fmt.Errorf("%w: cannot turn ScrapedConetnt into ScrapedMovie", models.ErrConversion)
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// marshalScrapedPerformer will marshal a single performer
|
||||
func marshalScrapedPerformer(content models.ScrapedContent) (*models.ScrapedPerformer, error) {
|
||||
p, err := marshalScrapedPerformers([]models.ScrapedContent{content})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return p[0], nil
|
||||
}
|
||||
|
||||
// marshalScrapedScene will marshal a single scraped scene
|
||||
func marshalScrapedScene(content models.ScrapedContent) (*models.ScrapedScene, error) {
|
||||
s, err := marshalScrapedScenes([]models.ScrapedContent{content})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s[0], nil
|
||||
}
|
||||
|
||||
// marshalScrapedGallery will marshal a single scraped gallery
|
||||
func marshalScrapedGallery(content models.ScrapedContent) (*models.ScrapedGallery, error) {
|
||||
g, err := marshalScrapedGalleries([]models.ScrapedContent{content})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return g[0], nil
|
||||
}
|
||||
|
||||
// marshalScrapedMovie will marshal a single scraped movie
|
||||
func marshalScrapedMovie(content models.ScrapedContent) (*models.ScrapedMovie, error) {
|
||||
m, err := marshalScrapedMovies([]models.ScrapedContent{content})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return m[0], nil
|
||||
}
|
|
@ -12,7 +12,7 @@ import (
|
|||
)
|
||||
|
||||
type SceneScraper interface {
|
||||
ScrapeScene(sceneID int) (*models.ScrapedScene, error)
|
||||
ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error)
|
||||
}
|
||||
|
||||
type SceneUpdatePostHookExecutor interface {
|
||||
|
@ -34,7 +34,7 @@ type SceneIdentifier struct {
|
|||
}
|
||||
|
||||
func (t *SceneIdentifier) Identify(ctx context.Context, txnManager models.TransactionManager, scene *models.Scene) error {
|
||||
result, err := t.scrapeScene(scene)
|
||||
result, err := t.scrapeScene(ctx, scene)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -57,11 +57,11 @@ type scrapeResult struct {
|
|||
source ScraperSource
|
||||
}
|
||||
|
||||
func (t *SceneIdentifier) scrapeScene(scene *models.Scene) (*scrapeResult, error) {
|
||||
func (t *SceneIdentifier) scrapeScene(ctx context.Context, scene *models.Scene) (*scrapeResult, error) {
|
||||
// iterate through the input sources
|
||||
for _, source := range t.Sources {
|
||||
// scrape using the source
|
||||
scraped, err := source.Scraper.ScrapeScene(scene.ID)
|
||||
scraped, err := source.Scraper.ScrapeScene(ctx, scene.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error scraping from %v: %v", source.Scraper, err)
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ type mockSceneScraper struct {
|
|||
results map[int]*models.ScrapedScene
|
||||
}
|
||||
|
||||
func (s mockSceneScraper) ScrapeScene(sceneID int) (*models.ScrapedScene, error) {
|
||||
func (s mockSceneScraper) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) {
|
||||
if utils.IntInclude(s.errIDs, sceneID) {
|
||||
return nil, errors.New("scrape scene error")
|
||||
}
|
||||
|
|
|
@ -211,7 +211,7 @@ type stashboxSource struct {
|
|||
endpoint string
|
||||
}
|
||||
|
||||
func (s stashboxSource) ScrapeScene(sceneID int) (*models.ScrapedScene, error) {
|
||||
func (s stashboxSource) ScrapeScene(_ context.Context, sceneID int) (*models.ScrapedScene, error) {
|
||||
results, err := s.FindStashBoxScenesByFingerprintsFlat([]string{strconv.Itoa(sceneID)})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error querying stash-box using scene ID %d: %w", sceneID, err)
|
||||
|
@ -233,8 +233,17 @@ type scraperSource struct {
|
|||
scraperID string
|
||||
}
|
||||
|
||||
func (s scraperSource) ScrapeScene(sceneID int) (*models.ScrapedScene, error) {
|
||||
return s.cache.ScrapeScene(s.scraperID, sceneID)
|
||||
func (s scraperSource) ScrapeScene(ctx context.Context, sceneID int) (*models.ScrapedScene, error) {
|
||||
content, err := s.cache.ScrapeID(ctx, s.scraperID, sceneID, models.ScrapeContentTypeScene)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if scene, ok := content.(*models.ScrapedScene); ok {
|
||||
return scene, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("could not convert content to scene")
|
||||
}
|
||||
|
||||
func (s scraperSource) String() string {
|
||||
|
|
|
@ -2,4 +2,10 @@ package models
|
|||
|
||||
import "errors"
|
||||
|
||||
var ErrNotFound = errors.New("not found")
|
||||
var (
|
||||
// ErrNotFound signifies entities which are not found
|
||||
ErrNotFound = errors.New("not found")
|
||||
|
||||
// ErrConversion signifies conversion errors
|
||||
ErrConversion = errors.New("conversion error")
|
||||
)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
|
@ -24,20 +25,20 @@ func (e scraperAction) IsValid() bool {
|
|||
}
|
||||
|
||||
type scraperActionImpl interface {
|
||||
scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error)
|
||||
scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error)
|
||||
scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||
scrapePerformerByURL(url string) (*models.ScrapedPerformer, error)
|
||||
scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error)
|
||||
|
||||
scrapeScenesByName(name string) ([]*models.ScrapedScene, error)
|
||||
scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error)
|
||||
scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
|
||||
scrapeSceneByURL(url string) (*models.ScrapedScene, error)
|
||||
scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error)
|
||||
scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error)
|
||||
scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
|
||||
scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error)
|
||||
|
||||
scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error)
|
||||
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error)
|
||||
scrapeGalleryByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error)
|
||||
scrapeGalleryByURL(url string) (*models.ScrapedGallery, error)
|
||||
scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error)
|
||||
|
||||
scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
|
||||
scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error)
|
||||
}
|
||||
|
||||
func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraperActionImpl {
|
||||
|
|
|
@ -2,8 +2,8 @@ package scraper
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/stashapp/stash/pkg/match"
|
||||
|
@ -16,14 +16,12 @@ const (
|
|||
autoTagScraperName = "Auto Tag"
|
||||
)
|
||||
|
||||
var errNotSupported = errors.New("not supported")
|
||||
|
||||
type autotagScraper struct {
|
||||
txnManager models.TransactionManager
|
||||
globalConfig GlobalConfig
|
||||
}
|
||||
|
||||
func (s *autotagScraper) matchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) {
|
||||
func autotagMatchPerformers(path string, performerReader models.PerformerReader) ([]*models.ScrapedPerformer, error) {
|
||||
p, err := match.PathToPerformers(path, performerReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error matching performers: %w", err)
|
||||
|
@ -47,7 +45,7 @@ func (s *autotagScraper) matchPerformers(path string, performerReader models.Per
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *autotagScraper) matchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) {
|
||||
func autotagMatchStudio(path string, studioReader models.StudioReader) (*models.ScrapedStudio, error) {
|
||||
st, err := match.PathToStudios(path, studioReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error matching studios: %w", err)
|
||||
|
@ -64,7 +62,7 @@ func (s *autotagScraper) matchStudio(path string, studioReader models.StudioRead
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) {
|
||||
func autotagMatchTags(path string, tagReader models.TagReader) ([]*models.ScrapedTag, error) {
|
||||
t, err := match.PathToTags(path, tagReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error matching tags: %w", err)
|
||||
|
@ -85,32 +83,24 @@ func (s *autotagScraper) matchTags(path string, tagReader models.TagReader) ([]*
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
type autotagSceneScraper struct {
|
||||
*autotagScraper
|
||||
}
|
||||
|
||||
func (c *autotagSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
|
||||
return nil, errNotSupported
|
||||
}
|
||||
|
||||
func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s autotagScraper) viaScene(ctx context.Context, _client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
var ret *models.ScrapedScene
|
||||
|
||||
// populate performers, studio and tags based on scene path
|
||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
if err := s.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
path := scene.Path
|
||||
performers, err := c.matchPerformers(path, r.Performer())
|
||||
performers, err := autotagMatchPerformers(path, r.Performer())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaScene: %w", err)
|
||||
}
|
||||
studio, err := c.matchStudio(path, r.Studio())
|
||||
studio, err := autotagMatchStudio(path, r.Studio())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaScene: %w", err)
|
||||
}
|
||||
|
||||
tags, err := c.matchTags(path, r.Tag())
|
||||
tags, err := autotagMatchTags(path, r.Tag())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaScene: %w", err)
|
||||
}
|
||||
|
||||
if len(performers) > 0 || studio != nil || len(tags) > 0 {
|
||||
|
@ -129,19 +119,7 @@ func (c *autotagSceneScraper) scrapeByScene(scene *models.Scene) (*models.Scrape
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (c *autotagSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
return nil, errNotSupported
|
||||
}
|
||||
|
||||
func (c *autotagSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
|
||||
return nil, errNotSupported
|
||||
}
|
||||
|
||||
type autotagGalleryScraper struct {
|
||||
*autotagScraper
|
||||
}
|
||||
|
||||
func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s autotagScraper) viaGallery(ctx context.Context, _client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
if !gallery.Path.Valid {
|
||||
// not valid for non-path-based galleries
|
||||
return nil, nil
|
||||
|
@ -150,20 +128,20 @@ func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*model
|
|||
var ret *models.ScrapedGallery
|
||||
|
||||
// populate performers, studio and tags based on scene path
|
||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
if err := s.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
path := gallery.Path.String
|
||||
performers, err := c.matchPerformers(path, r.Performer())
|
||||
performers, err := autotagMatchPerformers(path, r.Performer())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaGallery: %w", err)
|
||||
}
|
||||
studio, err := c.matchStudio(path, r.Studio())
|
||||
studio, err := autotagMatchStudio(path, r.Studio())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaGallery: %w", err)
|
||||
}
|
||||
|
||||
tags, err := c.matchTags(path, r.Tag())
|
||||
tags, err := autotagMatchTags(path, r.Tag())
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("autotag scraper viaGallery: %w", err)
|
||||
}
|
||||
|
||||
if len(performers) > 0 || studio != nil || len(tags) > 0 {
|
||||
|
@ -182,12 +160,36 @@ func (c *autotagGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*model
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (c *autotagGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
||||
return nil, errNotSupported
|
||||
func (s autotagScraper) supports(ty models.ScrapeContentType) bool {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypeScene:
|
||||
return true
|
||||
case models.ScrapeContentTypeGallery:
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *autotagGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
|
||||
return nil, errNotSupported
|
||||
func (s autotagScraper) supportsURL(url string, ty models.ScrapeContentType) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (s autotagScraper) spec() models.Scraper {
|
||||
supportedScrapes := []models.ScrapeType{
|
||||
models.ScrapeTypeFragment,
|
||||
}
|
||||
|
||||
return models.Scraper{
|
||||
ID: autoTagScraperID,
|
||||
Name: autoTagScraperName,
|
||||
Scene: &models.ScraperSpec{
|
||||
SupportedScrapes: supportedScrapes,
|
||||
},
|
||||
Gallery: &models.ScraperSpec{
|
||||
SupportedScrapes: supportedScrapes,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getAutoTagScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||
|
@ -196,23 +198,5 @@ func getAutoTagScraper(txnManager models.TransactionManager, globalConfig Global
|
|||
globalConfig: globalConfig,
|
||||
}
|
||||
|
||||
supportedScrapes := []models.ScrapeType{
|
||||
models.ScrapeTypeFragment,
|
||||
}
|
||||
|
||||
return scraper{
|
||||
ID: autoTagScraperID,
|
||||
Spec: &models.Scraper{
|
||||
ID: autoTagScraperID,
|
||||
Name: autoTagScraperName,
|
||||
Scene: &models.ScraperSpec{
|
||||
SupportedScrapes: supportedScrapes,
|
||||
},
|
||||
Gallery: &models.ScraperSpec{
|
||||
SupportedScrapes: supportedScrapes,
|
||||
},
|
||||
},
|
||||
Scene: &autotagSceneScraper{&base},
|
||||
Gallery: &autotagGalleryScraper{&base},
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
|
|
@ -0,0 +1,298 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
// scrapeGetTimeout is the timeout for scraper HTTP requests. Includes transfer time.
|
||||
// We may want to bump this at some point and use local context-timeouts if more granularity
|
||||
// is needed.
|
||||
scrapeGetTimeout = time.Second * 60
|
||||
|
||||
// maxIdleConnsPerHost is the maximum number of idle connections the HTTP client will
|
||||
// keep on a per-host basis.
|
||||
maxIdleConnsPerHost = 8
|
||||
|
||||
// maxRedirects defines the maximum number of redirects the HTTP client will follow
|
||||
maxRedirects = 20
|
||||
)
|
||||
|
||||
// GlobalConfig contains the global scraper options.
|
||||
type GlobalConfig interface {
|
||||
GetScraperUserAgent() string
|
||||
GetScrapersPath() string
|
||||
GetScraperCDPPath() string
|
||||
GetScraperCertCheck() bool
|
||||
}
|
||||
|
||||
func isCDPPathHTTP(c GlobalConfig) bool {
|
||||
return strings.HasPrefix(c.GetScraperCDPPath(), "http://") || strings.HasPrefix(c.GetScraperCDPPath(), "https://")
|
||||
}
|
||||
|
||||
func isCDPPathWS(c GlobalConfig) bool {
|
||||
return strings.HasPrefix(c.GetScraperCDPPath(), "ws://")
|
||||
}
|
||||
|
||||
// Cache stores the database of scrapers
|
||||
type Cache struct {
|
||||
client *http.Client
|
||||
scrapers map[string]scraper // Scraper ID -> Scraper
|
||||
globalConfig GlobalConfig
|
||||
txnManager models.TransactionManager
|
||||
}
|
||||
|
||||
// newClient creates a scraper-local http client we use throughout the scraper subsystem.
|
||||
func newClient(gc GlobalConfig) *http.Client {
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{ // ignore insecure certificates
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: !gc.GetScraperCertCheck()},
|
||||
MaxIdleConnsPerHost: maxIdleConnsPerHost,
|
||||
},
|
||||
Timeout: scrapeGetTimeout,
|
||||
// defaultCheckRedirect code with max changed from 10 to maxRedirects
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= maxRedirects {
|
||||
return fmt.Errorf("%w: gave up after %d redirects", ErrMaxRedirects, maxRedirects)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
// NewCache returns a new Cache loading scraper configurations from the
|
||||
// scraper path provided in the global config object. It returns a new
|
||||
// instance and an error if the scraper directory could not be loaded.
|
||||
//
|
||||
// Scraper configurations are loaded from yml files in the provided scrapers
|
||||
// directory and any subdirectories.
|
||||
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
|
||||
// HTTP Client setup
|
||||
client := newClient(globalConfig)
|
||||
|
||||
scrapers, err := loadScrapers(globalConfig, txnManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Cache{
|
||||
client: client,
|
||||
globalConfig: globalConfig,
|
||||
scrapers: scrapers,
|
||||
txnManager: txnManager,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func loadScrapers(globalConfig GlobalConfig, txnManager models.TransactionManager) (map[string]scraper, error) {
|
||||
path := globalConfig.GetScrapersPath()
|
||||
scrapers := make(map[string]scraper)
|
||||
|
||||
// Add built-in scrapers
|
||||
freeOnes := getFreeonesScraper(txnManager, globalConfig)
|
||||
autoTag := getAutoTagScraper(txnManager, globalConfig)
|
||||
scrapers[freeOnes.spec().ID] = freeOnes
|
||||
scrapers[autoTag.spec().ID] = autoTag
|
||||
|
||||
logger.Debugf("Reading scraper configs from %s", path)
|
||||
|
||||
scraperFiles := []string{}
|
||||
err := utils.SymWalk(path, func(fp string, f os.FileInfo, err error) error {
|
||||
if filepath.Ext(fp) == ".yml" {
|
||||
c, err := loadConfigFromYAMLFile(fp)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading scraper %s: %v", fp, err)
|
||||
} else {
|
||||
scraper := newGroupScraper(*c, txnManager, globalConfig)
|
||||
scrapers[scraper.spec().ID] = scraper
|
||||
}
|
||||
scraperFiles = append(scraperFiles, fp)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Error reading scraper configs: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return scrapers, nil
|
||||
}
|
||||
|
||||
// ReloadScrapers clears the scraper cache and reloads from the scraper path.
|
||||
// In the event of an error during loading, the cache will be left empty.
|
||||
func (c *Cache) ReloadScrapers() error {
|
||||
c.scrapers = nil
|
||||
scrapers, err := loadScrapers(c.globalConfig, c.txnManager)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.scrapers = scrapers
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListScrapers lists scrapers matching one of the given types.
|
||||
// Returns a list of scrapers, sorted by their ID.
|
||||
func (c Cache) ListScrapers(tys []models.ScrapeContentType) []*models.Scraper {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range c.scrapers {
|
||||
for _, t := range tys {
|
||||
if s.supports(t) {
|
||||
spec := s.spec()
|
||||
ret = append(ret, &spec)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(ret, func(i, j int) bool {
|
||||
return ret[i].ID < ret[j].ID
|
||||
})
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetScraper returns the scraper matching the provided id.
|
||||
func (c Cache) GetScraper(scraperID string) *models.Scraper {
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil {
|
||||
spec := s.spec()
|
||||
return &spec
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) findScraper(scraperID string) scraper {
|
||||
s, ok := c.scrapers[scraperID]
|
||||
if ok {
|
||||
return s
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) ScrapeName(ctx context.Context, id, query string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(id)
|
||||
if s == nil {
|
||||
return nil, fmt.Errorf("%w: id %s", ErrNotFound, id)
|
||||
}
|
||||
if !s.supports(ty) {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as a %v scraper", ErrNotSupported, id, ty)
|
||||
}
|
||||
|
||||
ns, ok := s.(nameScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s to scrape by name", ErrNotSupported, id)
|
||||
}
|
||||
|
||||
return ns.viaName(ctx, c.client, query, ty)
|
||||
}
|
||||
|
||||
// ScrapeFragment uses the given fragment input to scrape
|
||||
func (c Cache) ScrapeFragment(ctx context.Context, id string, input Input) (models.ScrapedContent, error) {
|
||||
s := c.findScraper(id)
|
||||
if s == nil {
|
||||
return nil, fmt.Errorf("%w: id %s", ErrNotFound, id)
|
||||
}
|
||||
|
||||
fs, ok := s.(fragmentScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as a fragment scraper", ErrNotSupported, id)
|
||||
}
|
||||
|
||||
content, err := fs.viaFragment(ctx, c.client, input)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while fragment scraping with scraper %s: %w", id, err)
|
||||
}
|
||||
|
||||
return c.postScrape(ctx, content)
|
||||
}
|
||||
|
||||
// ScrapeURL scrapes a given url for the given content. Searches the scraper cache
|
||||
// and picks the first scraper capable of scraping the given url into the desired
|
||||
// content. Returns the scraped content or an error if the scrape fails.
|
||||
func (c Cache) ScrapeURL(ctx context.Context, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
for _, s := range c.scrapers {
|
||||
if s.supportsURL(url, ty) {
|
||||
ul, ok := s.(urlScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as an url scraper", ErrNotSupported, s.spec().ID)
|
||||
}
|
||||
ret, err := ul.viaURL(ctx, c.client, url, ty)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret == nil {
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return c.postScrape(ctx, ret)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
s := c.findScraper(scraperID)
|
||||
if s == nil {
|
||||
return nil, fmt.Errorf("%w: id %s", ErrNotFound, scraperID)
|
||||
}
|
||||
|
||||
if !s.supports(ty) {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s to scrape %v content", ErrNotSupported, scraperID, ty)
|
||||
}
|
||||
|
||||
var ret models.ScrapedContent
|
||||
switch ty {
|
||||
case models.ScrapeContentTypeScene:
|
||||
ss, ok := s.(sceneScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as a scene scraper", ErrNotSupported, scraperID)
|
||||
}
|
||||
|
||||
scene, err := getScene(id, c.txnManager)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: unable to load scene id %v: %w", scraperID, id, err)
|
||||
}
|
||||
|
||||
ret, err = ss.viaScene(ctx, c.client, scene)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
case models.ScrapeContentTypeGallery:
|
||||
gs, ok := s.(galleryScraper)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: cannot use scraper %s as a gallery scraper", ErrNotSupported, scraperID)
|
||||
}
|
||||
|
||||
gallery, err := getGallery(id, c.txnManager)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: unable to load gallery id %v: %w", scraperID, id, err)
|
||||
}
|
||||
|
||||
ret, err = gs.viaGallery(ctx, c.client, gallery)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
|
||||
}
|
||||
}
|
||||
|
||||
return c.postScrape(ctx, ret)
|
||||
}
|
|
@ -8,6 +8,7 @@ import (
|
|||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
|
@ -232,55 +233,118 @@ func loadConfigFromYAMLFile(path string) (*config, error) {
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (c config) supportsPerformers() bool {
|
||||
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
|
||||
func (c config) spec() models.Scraper {
|
||||
ret := models.Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
}
|
||||
|
||||
performer := models.ScraperSpec{}
|
||||
if c.PerformerByName != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
|
||||
}
|
||||
if c.PerformerByFragment != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.PerformerByURL) > 0 {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.PerformerByURL {
|
||||
performer.Urls = append(performer.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(performer.SupportedScrapes) > 0 {
|
||||
ret.Performer = &performer
|
||||
}
|
||||
|
||||
scene := models.ScraperSpec{}
|
||||
if c.SceneByFragment != nil {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
|
||||
}
|
||||
if len(c.SceneByURL) > 0 {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.SceneByURL {
|
||||
scene.Urls = append(scene.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(scene.SupportedScrapes) > 0 {
|
||||
ret.Scene = &scene
|
||||
}
|
||||
|
||||
gallery := models.ScraperSpec{}
|
||||
if c.GalleryByFragment != nil {
|
||||
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.GalleryByURL) > 0 {
|
||||
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.GalleryByURL {
|
||||
gallery.Urls = append(gallery.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(gallery.SupportedScrapes) > 0 {
|
||||
ret.Gallery = &gallery
|
||||
}
|
||||
|
||||
movie := models.ScraperSpec{}
|
||||
if len(c.MovieByURL) > 0 {
|
||||
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.MovieByURL {
|
||||
movie.Urls = append(movie.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(movie.SupportedScrapes) > 0 {
|
||||
ret.Movie = &movie
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (c config) matchesPerformerURL(url string) bool {
|
||||
for _, scraper := range c.PerformerByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c config) supportsScenes() bool {
|
||||
return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0
|
||||
}
|
||||
|
||||
func (c config) supportsGalleries() bool {
|
||||
return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0
|
||||
}
|
||||
|
||||
func (c config) matchesSceneURL(url string) bool {
|
||||
for _, scraper := range c.SceneByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c config) matchesGalleryURL(url string) bool {
|
||||
for _, scraper := range c.GalleryByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (c config) supportsMovies() bool {
|
||||
return len(c.MovieByURL) > 0
|
||||
}
|
||||
|
||||
func (c config) matchesMovieURL(url string) bool {
|
||||
for _, scraper := range c.MovieByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
func (c config) supports(ty models.ScrapeContentType) bool {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
return c.PerformerByName != nil || c.PerformerByFragment != nil || len(c.PerformerByURL) > 0
|
||||
case models.ScrapeContentTypeScene:
|
||||
return (c.SceneByName != nil && c.SceneByQueryFragment != nil) || c.SceneByFragment != nil || len(c.SceneByURL) > 0
|
||||
case models.ScrapeContentTypeGallery:
|
||||
return c.GalleryByFragment != nil || len(c.GalleryByURL) > 0
|
||||
case models.ScrapeContentTypeMovie:
|
||||
return len(c.MovieByURL) > 0
|
||||
}
|
||||
|
||||
panic("Unhandled ScrapeContentType")
|
||||
}
|
||||
|
||||
func (c config) matchesURL(url string, ty models.ScrapeContentType) bool {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
for _, scraper := range c.PerformerByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case models.ScrapeContentTypeScene:
|
||||
for _, scraper := range c.SceneByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case models.ScrapeContentTypeGallery:
|
||||
for _, scraper := range c.GalleryByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case models.ScrapeContentTypeMovie:
|
||||
for _, scraper := range c.MovieByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,289 +0,0 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type configSceneScraper struct {
|
||||
*configScraper
|
||||
}
|
||||
|
||||
func (c *configSceneScraper) matchesURL(url string) bool {
|
||||
return c.config.matchesSceneURL(url)
|
||||
}
|
||||
|
||||
func (c *configSceneScraper) scrapeByName(name string) ([]*models.ScrapedScene, error) {
|
||||
if c.config.SceneByName != nil {
|
||||
s := c.config.getScraper(*c.config.SceneByName, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapeScenesByName(name)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configSceneScraper) scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
if c.config.SceneByFragment != nil {
|
||||
s := c.config.getScraper(*c.config.SceneByFragment, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapeSceneByScene(scene)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configSceneScraper) scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
if c.config.SceneByQueryFragment != nil {
|
||||
s := c.config.getScraper(*c.config.SceneByQueryFragment, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapeSceneByFragment(scene)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configSceneScraper) scrapeByURL(url string) (*models.ScrapedScene, error) {
|
||||
for _, scraper := range c.config.SceneByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||
ret, err := s.scrapeSceneByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type configPerformerScraper struct {
|
||||
*configScraper
|
||||
}
|
||||
|
||||
func (c *configPerformerScraper) matchesURL(url string) bool {
|
||||
return c.config.matchesPerformerURL(url)
|
||||
}
|
||||
|
||||
func (c *configPerformerScraper) scrapeByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||
if c.config.PerformerByName != nil {
|
||||
s := c.config.getScraper(*c.config.PerformerByName, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapePerformersByName(name)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configPerformerScraper) scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
if c.config.PerformerByFragment != nil {
|
||||
s := c.config.getScraper(*c.config.PerformerByFragment, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapePerformerByFragment(scrapedPerformer)
|
||||
}
|
||||
|
||||
// try to match against URL if present
|
||||
if scrapedPerformer.URL != nil && *scrapedPerformer.URL != "" {
|
||||
return c.scrapeByURL(*scrapedPerformer.URL)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configPerformerScraper) scrapeByURL(url string) (*models.ScrapedPerformer, error) {
|
||||
for _, scraper := range c.config.PerformerByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||
ret, err := s.scrapePerformerByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type configGalleryScraper struct {
|
||||
*configScraper
|
||||
}
|
||||
|
||||
func (c *configGalleryScraper) matchesURL(url string) bool {
|
||||
return c.config.matchesGalleryURL(url)
|
||||
}
|
||||
|
||||
func (c *configGalleryScraper) scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
if c.config.GalleryByFragment != nil {
|
||||
s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapeGalleryByGallery(gallery)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configGalleryScraper) scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
||||
if c.config.GalleryByFragment != nil {
|
||||
// TODO - this should be galleryByQueryFragment
|
||||
s := c.config.getScraper(*c.config.GalleryByFragment, c.client, c.txnManager, c.globalConfig)
|
||||
return s.scrapeGalleryByFragment(gallery)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *configGalleryScraper) scrapeByURL(url string) (*models.ScrapedGallery, error) {
|
||||
for _, scraper := range c.config.GalleryByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||
ret, err := s.scrapeGalleryByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type configMovieScraper struct {
|
||||
*configScraper
|
||||
}
|
||||
|
||||
func (c *configMovieScraper) matchesURL(url string) bool {
|
||||
return c.config.matchesMovieURL(url)
|
||||
}
|
||||
|
||||
func (c *configMovieScraper) scrapeByURL(url string) (*models.ScrapedMovie, error) {
|
||||
for _, scraper := range c.config.MovieByURL {
|
||||
if scraper.matchesURL(url) {
|
||||
s := c.config.getScraper(scraper.scraperTypeConfig, c.client, c.txnManager, c.globalConfig)
|
||||
ret, err := s.scrapeMovieByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type configScraper struct {
|
||||
config config
|
||||
client *http.Client
|
||||
txnManager models.TransactionManager
|
||||
globalConfig GlobalConfig
|
||||
}
|
||||
|
||||
func createScraperFromConfig(c config, client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||
base := configScraper{
|
||||
client: client,
|
||||
config: c,
|
||||
txnManager: txnManager,
|
||||
globalConfig: globalConfig,
|
||||
}
|
||||
|
||||
ret := scraper{
|
||||
ID: c.ID,
|
||||
Spec: configScraperSpec(c),
|
||||
}
|
||||
|
||||
// only set fields if supported
|
||||
if c.supportsPerformers() {
|
||||
ret.Performer = &configPerformerScraper{&base}
|
||||
}
|
||||
if c.supportsGalleries() {
|
||||
ret.Gallery = &configGalleryScraper{&base}
|
||||
}
|
||||
if c.supportsMovies() {
|
||||
ret.Movie = &configMovieScraper{&base}
|
||||
}
|
||||
if c.supportsScenes() {
|
||||
ret.Scene = &configSceneScraper{&base}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func configScraperSpec(c config) *models.Scraper {
|
||||
ret := models.Scraper{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
}
|
||||
|
||||
performer := models.ScraperSpec{}
|
||||
if c.PerformerByName != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeName)
|
||||
}
|
||||
if c.PerformerByFragment != nil {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.PerformerByURL) > 0 {
|
||||
performer.SupportedScrapes = append(performer.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.PerformerByURL {
|
||||
performer.Urls = append(performer.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(performer.SupportedScrapes) > 0 {
|
||||
ret.Performer = &performer
|
||||
}
|
||||
|
||||
scene := models.ScraperSpec{}
|
||||
if c.SceneByFragment != nil {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if c.SceneByName != nil && c.SceneByQueryFragment != nil {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeName)
|
||||
}
|
||||
if len(c.SceneByURL) > 0 {
|
||||
scene.SupportedScrapes = append(scene.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.SceneByURL {
|
||||
scene.Urls = append(scene.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(scene.SupportedScrapes) > 0 {
|
||||
ret.Scene = &scene
|
||||
}
|
||||
|
||||
gallery := models.ScraperSpec{}
|
||||
if c.GalleryByFragment != nil {
|
||||
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeFragment)
|
||||
}
|
||||
if len(c.GalleryByURL) > 0 {
|
||||
gallery.SupportedScrapes = append(gallery.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.GalleryByURL {
|
||||
gallery.Urls = append(gallery.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(gallery.SupportedScrapes) > 0 {
|
||||
ret.Gallery = &gallery
|
||||
}
|
||||
|
||||
movie := models.ScraperSpec{}
|
||||
if len(c.MovieByURL) > 0 {
|
||||
movie.SupportedScrapes = append(movie.SupportedScrapes, models.ScrapeTypeURL)
|
||||
for _, v := range c.MovieByURL {
|
||||
movie.Urls = append(movie.Urls, v.URL...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(movie.SupportedScrapes) > 0 {
|
||||
ret.Movie = &movie
|
||||
}
|
||||
|
||||
return &ret
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
|
@ -47,7 +46,7 @@ xPathScrapers:
|
|||
- regex: \sBio\s*$
|
||||
with: ""
|
||||
URL: //link[@rel="alternate" and @hreflang="x-default"]/@href
|
||||
Twitter: //a[not(starts-with(@href,'https://twitter.com/FreeOnes'))][contains(@href,'twitter.com/')]/@href
|
||||
Twitter: //a[not(starts-with(@href,'https://twitter.com/FreeOnes'))][contains(@href,'twitter.com/')]/@href
|
||||
Instagram: //a[contains(@href,'instagram.com/')]/@href
|
||||
Birthdate:
|
||||
selector: //span[contains(text(),'Born On')]
|
||||
|
@ -124,7 +123,7 @@ xPathScrapers:
|
|||
# Last updated April 13, 2021
|
||||
`
|
||||
|
||||
func getFreeonesScraper(client *http.Client, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||
func getFreeonesScraper(txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||
yml := freeonesScraperConfig
|
||||
|
||||
c, err := loadConfigFromYAML(FreeonesScraperID, strings.NewReader(yml))
|
||||
|
@ -132,5 +131,5 @@ func getFreeonesScraper(client *http.Client, txnManager models.TransactionManage
|
|||
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
|
||||
}
|
||||
|
||||
return createScraperFromConfig(*c, client, txnManager, globalConfig)
|
||||
return newGroupScraper(*c, txnManager, globalConfig)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
config config
|
||||
|
||||
txnManager models.TransactionManager
|
||||
globalConf GlobalConfig
|
||||
}
|
||||
|
||||
func newGroupScraper(c config, txnManager models.TransactionManager, globalConfig GlobalConfig) scraper {
|
||||
return group{
|
||||
config: c,
|
||||
txnManager: txnManager,
|
||||
globalConf: globalConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (g group) spec() models.Scraper {
|
||||
return g.config.spec()
|
||||
}
|
||||
|
||||
// fragmentScraper finds an appropriate fragment scraper based on input.
|
||||
func (g group) fragmentScraper(input Input) *scraperTypeConfig {
|
||||
switch {
|
||||
case input.Performer != nil:
|
||||
return g.config.PerformerByFragment
|
||||
case input.Gallery != nil:
|
||||
// TODO - this should be galleryByQueryFragment
|
||||
return g.config.GalleryByFragment
|
||||
case input.Scene != nil:
|
||||
return g.config.SceneByQueryFragment
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// scrapeFragmentInput analyzes the input and calls an appropriate scraperActionImpl
|
||||
func scrapeFragmentInput(ctx context.Context, input Input, s scraperActionImpl) (models.ScrapedContent, error) {
|
||||
switch {
|
||||
case input.Performer != nil:
|
||||
return s.scrapePerformerByFragment(*input.Performer)
|
||||
case input.Gallery != nil:
|
||||
return s.scrapeGalleryByFragment(*input.Gallery)
|
||||
case input.Scene != nil:
|
||||
return s.scrapeSceneByFragment(ctx, *input.Scene)
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error) {
|
||||
stc := g.fragmentScraper(input)
|
||||
if stc == nil {
|
||||
// If there's no performer fragment scraper in the group, we try to use
|
||||
// the URL scraper. Check if there's an URL in the input, and then shift
|
||||
// to an URL scrape if it's present.
|
||||
if input.Performer != nil && input.Performer.URL != nil && *input.Performer.URL != "" {
|
||||
return g.viaURL(ctx, client, *input.Performer.URL, models.ScrapeContentTypePerformer)
|
||||
}
|
||||
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*stc, client, g.txnManager, g.globalConf)
|
||||
return scrapeFragmentInput(ctx, input, s)
|
||||
}
|
||||
|
||||
func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
if g.config.SceneByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.SceneByFragment, client, g.txnManager, g.globalConf)
|
||||
return s.scrapeSceneByScene(ctx, scene)
|
||||
}
|
||||
|
||||
func (g group) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
if g.config.GalleryByFragment == nil {
|
||||
return nil, ErrNotSupported
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.GalleryByFragment, client, g.txnManager, g.globalConf)
|
||||
return s.scrapeGalleryByGallery(ctx, gallery)
|
||||
}
|
||||
|
||||
func loadUrlCandidates(c config, ty models.ScrapeContentType) []*scrapeByURLConfig {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
return c.PerformerByURL
|
||||
case models.ScrapeContentTypeScene:
|
||||
return c.SceneByURL
|
||||
case models.ScrapeContentTypeMovie:
|
||||
return c.MovieByURL
|
||||
case models.ScrapeContentTypeGallery:
|
||||
return c.GalleryByURL
|
||||
}
|
||||
|
||||
panic("loadUrlCandidates: unreachable")
|
||||
}
|
||||
|
||||
func scrapeByUrl(ctx context.Context, url string, s scraperActionImpl, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
return s.scrapePerformerByURL(ctx, url)
|
||||
case models.ScrapeContentTypeScene:
|
||||
return s.scrapeSceneByURL(ctx, url)
|
||||
case models.ScrapeContentTypeMovie:
|
||||
return s.scrapeMovieByURL(ctx, url)
|
||||
case models.ScrapeContentTypeGallery:
|
||||
return s.scrapeGalleryByURL(ctx, url)
|
||||
}
|
||||
|
||||
panic("scrapeByUrl: unreachable")
|
||||
}
|
||||
|
||||
func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error) {
|
||||
candidates := loadUrlCandidates(g.config, ty)
|
||||
for _, scraper := range candidates {
|
||||
if scraper.matchesURL(url) {
|
||||
s := g.config.getScraper(scraper.scraperTypeConfig, client, g.txnManager, g.globalConf)
|
||||
ret, err := scrapeByUrl(ctx, url, s, ty)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (g group) viaName(ctx context.Context, client *http.Client, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error) {
|
||||
switch ty {
|
||||
case models.ScrapeContentTypePerformer:
|
||||
if g.config.PerformerByName == nil {
|
||||
break
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.PerformerByName, client, g.txnManager, g.globalConf)
|
||||
performers, err := s.scrapePerformersByName(ctx, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
content := make([]models.ScrapedContent, len(performers))
|
||||
for i := range performers {
|
||||
content[i] = performers[i]
|
||||
}
|
||||
return content, nil
|
||||
case models.ScrapeContentTypeScene:
|
||||
if g.config.SceneByName == nil {
|
||||
break
|
||||
}
|
||||
|
||||
s := g.config.getScraper(*g.config.SceneByName, client, g.txnManager, g.globalConf)
|
||||
scenes, err := s.scrapeScenesByName(ctx, name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
content := make([]models.ScrapedContent, len(scenes))
|
||||
for i := range scenes {
|
||||
content[i] = scenes[i]
|
||||
}
|
||||
return content, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty)
|
||||
}
|
||||
|
||||
func (g group) supports(ty models.ScrapeContentType) bool {
|
||||
return g.config.supports(ty)
|
||||
}
|
||||
|
||||
func (g group) supportsURL(url string, ty models.ScrapeContentType) bool {
|
||||
return g.config.matchesURL(url, ty)
|
||||
}
|
|
@ -3,6 +3,7 @@ package scraper
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
@ -74,9 +75,9 @@ func (s *jsonScraper) loadURL(ctx context.Context, url string) (string, error) {
|
|||
return docStr, err
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||
func (s *jsonScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
||||
doc, scraper, err := s.scrapeURL(context.TODO(), u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -85,9 +86,9 @@ func (s *jsonScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer
|
|||
return scraper.scrapePerformer(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||
func (s *jsonScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
|
||||
doc, scraper, err := s.scrapeURL(context.TODO(), u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -96,9 +97,9 @@ func (s *jsonScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error)
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||
func (s *jsonScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
|
||||
doc, scraper, err := s.scrapeURL(context.TODO(), u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -107,9 +108,9 @@ func (s *jsonScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, er
|
|||
return scraper.scrapeGallery(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||
func (s *jsonScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
|
||||
doc, scraper, err := s.scrapeURL(context.TODO(), u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -118,7 +119,7 @@ func (s *jsonScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error)
|
|||
return scraper.scrapeMovie(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||
func (s *jsonScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
|
@ -147,7 +148,7 @@ func (s *jsonScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedP
|
|||
return nil, errors.New("scrapePerformerByFragment not supported for json scraper")
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
|
||||
func (s *jsonScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
|
||||
scraper := s.getJsonScraper()
|
||||
|
||||
if scraper == nil {
|
||||
|
@ -162,7 +163,7 @@ func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, e
|
|||
url := s.scraper.QueryURL
|
||||
url = strings.ReplaceAll(url, placeholder, escapedName)
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -172,7 +173,7 @@ func (s *jsonScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, e
|
|||
return scraper.scrapeScenes(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s *jsonScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -186,7 +187,7 @@ func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedSc
|
|||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -196,7 +197,7 @@ func (s *jsonScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedSc
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
func (s *jsonScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScrapedScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -210,7 +211,7 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*mo
|
|||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -220,7 +221,7 @@ func (s *jsonScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*mo
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *jsonScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *jsonScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromGallery(gallery)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -234,7 +235,7 @@ func (s *jsonScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.S
|
|||
return nil, errors.New("json scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -260,12 +261,11 @@ type jsonQuery struct {
|
|||
scraper *jsonScraper
|
||||
}
|
||||
|
||||
func (q *jsonQuery) runQuery(selector string) []string {
|
||||
func (q *jsonQuery) runQuery(selector string) ([]string, error) {
|
||||
value := gjson.Get(q.doc, selector)
|
||||
|
||||
if !value.Exists() {
|
||||
logger.Warnf("Could not find json path '%s' in json object", selector)
|
||||
return nil
|
||||
return nil, fmt.Errorf("could not find json path '%s' in json object", selector)
|
||||
}
|
||||
|
||||
var ret []string
|
||||
|
@ -278,7 +278,7 @@ func (q *jsonQuery) runQuery(selector string) []string {
|
|||
ret = append(ret, value.String())
|
||||
}
|
||||
|
||||
return ret
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (q *jsonQuery) subScrape(value string) mappedQuery {
|
||||
|
|
|
@ -17,7 +17,7 @@ import (
|
|||
)
|
||||
|
||||
type mappedQuery interface {
|
||||
runQuery(selector string) []string
|
||||
runQuery(selector string) ([]string, error)
|
||||
subScrape(value string) mappedQuery
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,10 @@ func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedRe
|
|||
selector := attrConfig.Selector
|
||||
selector = s.applyCommon(common, selector)
|
||||
|
||||
found := q.runQuery(selector)
|
||||
found, err := q.runQuery(selector)
|
||||
if err != nil {
|
||||
logger.Warnf("key '%v': %v", k, err)
|
||||
}
|
||||
|
||||
if len(found) > 0 {
|
||||
result := s.postProcess(q, attrConfig, found)
|
||||
|
@ -423,7 +426,10 @@ func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
|
|||
ss := q.subScrape(value)
|
||||
|
||||
if ss != nil {
|
||||
found := ss.runQuery(subScrapeConfig.Selector)
|
||||
found, err := ss.runQuery(subScrapeConfig.Selector)
|
||||
if err != nil {
|
||||
logger.Warnf("subscrape for '%v': %v", value, err)
|
||||
}
|
||||
|
||||
if len(found) > 0 {
|
||||
// check if we're concatenating the results into a single result
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/match"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
// postScrape handles post-processing of scraped content. If the content
|
||||
// requires post-processing, this function fans out to the given content
|
||||
// type and post-processes it.
|
||||
func (c Cache) postScrape(ctx context.Context, content models.ScrapedContent) (models.ScrapedContent, error) {
|
||||
// Analyze the concrete type, call the right post-processing function
|
||||
switch v := content.(type) {
|
||||
case *models.ScrapedPerformer:
|
||||
return c.postScrapePerformer(ctx, v)
|
||||
case models.ScrapedPerformer:
|
||||
return c.postScrapePerformer(ctx, &v)
|
||||
case *models.ScrapedScene:
|
||||
return c.postScrapeScene(ctx, v)
|
||||
case models.ScrapedScene:
|
||||
return c.postScrapeScene(ctx, &v)
|
||||
case *models.ScrapedGallery:
|
||||
return c.postScrapeGallery(ctx, v)
|
||||
case models.ScrapedGallery:
|
||||
return c.postScrapeGallery(ctx, &v)
|
||||
case *models.ScrapedMovie:
|
||||
return c.postScrapeMovie(ctx, v)
|
||||
case models.ScrapedMovie:
|
||||
return c.postScrapeMovie(ctx, &v)
|
||||
}
|
||||
|
||||
// If nothing matches, pass the content through
|
||||
return content, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) (models.ScrapedContent, error) {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeMovie(ctx context.Context, ret *models.ScrapedMovie) (models.ScrapedContent, error) {
|
||||
if ret.Studio != nil {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
return match.ScrapedStudio(r.Studio(), ret.Studio, nil)
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setMovieFrontImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set front image using URL %s: %v", *ret.FrontImage, err)
|
||||
}
|
||||
if err := setMovieBackImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("could not set back image using URL %s: %v", *ret.BackImage, err)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScenePerformer(ctx context.Context, ret *models.ScrapedPerformer) error {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) (models.ScrapedContent, error) {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
pqb := r.Performer()
|
||||
mqb := r.Movie()
|
||||
tqb := r.Tag()
|
||||
sqb := r.Studio()
|
||||
|
||||
for _, p := range ret.Performers {
|
||||
if err := c.postScrapeScenePerformer(ctx, p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := match.ScrapedPerformer(pqb, p, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, p := range ret.Movies {
|
||||
err := match.ScrapedMovie(mqb, p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := match.ScrapedStudio(sqb, ret.Studio, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeGallery(ctx context.Context, ret *models.ScrapedGallery) (models.ScrapedContent, error) {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
pqb := r.Performer()
|
||||
tqb := r.Tag()
|
||||
sqb := r.Studio()
|
||||
|
||||
for _, p := range ret.Performers {
|
||||
err := match.ScrapedPerformer(pqb, p, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := match.ScrapedStudio(sqb, ret.Studio, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
|
||||
var ret []*models.ScrapedTag
|
||||
|
||||
excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns()
|
||||
var excludeRegexps []*regexp.Regexp
|
||||
|
||||
for _, excludePattern := range excludePatterns {
|
||||
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
||||
if err != nil {
|
||||
logger.Errorf("Invalid tag exclusion pattern :%v", err)
|
||||
} else {
|
||||
excludeRegexps = append(excludeRegexps, reg)
|
||||
}
|
||||
}
|
||||
|
||||
var ignoredTags []string
|
||||
ScrapeTag:
|
||||
for _, t := range scrapedTags {
|
||||
for _, reg := range excludeRegexps {
|
||||
if reg.MatchString(strings.ToLower(t.Name)) {
|
||||
ignoredTags = append(ignoredTags, t.Name)
|
||||
continue ScrapeTag
|
||||
}
|
||||
}
|
||||
|
||||
err := match.ScrapedTag(tqb, t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret = append(ret, t)
|
||||
}
|
||||
|
||||
if len(ignoredTags) > 0 {
|
||||
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
|
@ -1,51 +1,77 @@
|
|||
package scraper
|
||||
|
||||
import "github.com/stashapp/stash/pkg/models"
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
type urlMatcher interface {
|
||||
matchesURL(url string) bool
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrMaxRedirects is returned if the max number of HTTP redirects are reached.
|
||||
ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached")
|
||||
|
||||
// ErrNotFound is returned when an entity isn't found
|
||||
ErrNotFound = errors.New("scraper not found")
|
||||
|
||||
// ErrNotSupported is returned when a given invocation isn't supported, and there
|
||||
// is a guard function which should be able to guard against it.
|
||||
ErrNotSupported = errors.New("scraper operation not supported")
|
||||
)
|
||||
|
||||
// Input coalesces inputs of different types into a single structure.
|
||||
// The system expects one of these to be set, and the remaining to be
|
||||
// set to nil.
|
||||
type Input struct {
|
||||
Performer *models.ScrapedPerformerInput
|
||||
Scene *models.ScrapedSceneInput
|
||||
Gallery *models.ScrapedGalleryInput
|
||||
}
|
||||
|
||||
type performerScraper interface {
|
||||
scrapeByName(name string) ([]*models.ScrapedPerformer, error)
|
||||
scrapeByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error)
|
||||
scrapeByURL(url string) (*models.ScrapedPerformer, error)
|
||||
// scraper is the generic interface to the scraper subsystems
|
||||
type scraper interface {
|
||||
// spec returns the scraper specification, suitable for graphql
|
||||
spec() models.Scraper
|
||||
// supports tests if the scraper supports a given content type
|
||||
supports(models.ScrapeContentType) bool
|
||||
// supportsURL tests if the scraper supports scrapes of a given url, producing a given content type
|
||||
supportsURL(url string, ty models.ScrapeContentType) bool
|
||||
}
|
||||
|
||||
// urlScraper is the interface of scrapers supporting url loads
|
||||
type urlScraper interface {
|
||||
scraper
|
||||
|
||||
viaURL(ctx context.Context, client *http.Client, url string, ty models.ScrapeContentType) (models.ScrapedContent, error)
|
||||
}
|
||||
|
||||
// nameScraper is the interface of scrapers supporting name loads
|
||||
type nameScraper interface {
|
||||
scraper
|
||||
|
||||
viaName(ctx context.Context, client *http.Client, name string, ty models.ScrapeContentType) ([]models.ScrapedContent, error)
|
||||
}
|
||||
|
||||
// fragmentScraper is the interface of scrapers supporting fragment loads
|
||||
type fragmentScraper interface {
|
||||
scraper
|
||||
|
||||
viaFragment(ctx context.Context, client *http.Client, input Input) (models.ScrapedContent, error)
|
||||
}
|
||||
|
||||
// sceneScraper is a scraper which supports scene scrapes with
|
||||
// scene data as the input.
|
||||
type sceneScraper interface {
|
||||
scrapeByName(name string) ([]*models.ScrapedScene, error)
|
||||
scrapeByScene(scene *models.Scene) (*models.ScrapedScene, error)
|
||||
scrapeByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error)
|
||||
scrapeByURL(url string) (*models.ScrapedScene, error)
|
||||
scraper
|
||||
|
||||
viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error)
|
||||
}
|
||||
|
||||
// galleryScraper is a scraper which supports gallery scrapes with
|
||||
// gallery data as the input.
|
||||
type galleryScraper interface {
|
||||
scrapeByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error)
|
||||
scrapeByFragment(gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error)
|
||||
scrapeByURL(url string) (*models.ScrapedGallery, error)
|
||||
}
|
||||
|
||||
type movieScraper interface {
|
||||
scrapeByURL(url string) (*models.ScrapedMovie, error)
|
||||
}
|
||||
|
||||
type scraper struct {
|
||||
ID string
|
||||
Spec *models.Scraper
|
||||
|
||||
Performer performerScraper
|
||||
Scene sceneScraper
|
||||
Gallery galleryScraper
|
||||
Movie movieScraper
|
||||
}
|
||||
|
||||
func matchesURL(maybeURLMatcher interface{}, url string) bool {
|
||||
if maybeURLMatcher != nil {
|
||||
matcher, ok := maybeURLMatcher.(urlMatcher)
|
||||
if ok {
|
||||
return matcher.matchesURL(url)
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
scraper
|
||||
|
||||
viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error)
|
||||
}
|
||||
|
|
|
@ -1,660 +0,0 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/match"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
||||
var ErrMaxRedirects = errors.New("maximum number of HTTP redirects reached")
|
||||
|
||||
const (
|
||||
// scrapeGetTimeout is the timeout for scraper HTTP requests. Includes transfer time.
|
||||
// We may want to bump this at some point and use local context-timeouts if more granularity
|
||||
// is needed.
|
||||
scrapeGetTimeout = time.Second * 60
|
||||
|
||||
// maxIdleConnsPerHost is the maximum number of idle connections the HTTP client will
|
||||
// keep on a per-host basis.
|
||||
maxIdleConnsPerHost = 8
|
||||
|
||||
// maxRedirects defines the maximum number of redirects the HTTP client will follow
|
||||
maxRedirects = 20
|
||||
)
|
||||
|
||||
// GlobalConfig contains the global scraper options.
|
||||
type GlobalConfig interface {
|
||||
GetScraperUserAgent() string
|
||||
GetScrapersPath() string
|
||||
GetScraperCDPPath() string
|
||||
GetScraperCertCheck() bool
|
||||
}
|
||||
|
||||
func isCDPPathHTTP(c GlobalConfig) bool {
|
||||
return strings.HasPrefix(c.GetScraperCDPPath(), "http://") || strings.HasPrefix(c.GetScraperCDPPath(), "https://")
|
||||
}
|
||||
|
||||
func isCDPPathWS(c GlobalConfig) bool {
|
||||
return strings.HasPrefix(c.GetScraperCDPPath(), "ws://")
|
||||
}
|
||||
|
||||
// Cache stores scraper details.
|
||||
type Cache struct {
|
||||
client *http.Client
|
||||
scrapers []scraper
|
||||
globalConfig GlobalConfig
|
||||
txnManager models.TransactionManager
|
||||
}
|
||||
|
||||
// newClient creates a scraper-local http client we use throughout the scraper subsystem.
|
||||
func newClient(gc GlobalConfig) *http.Client {
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{ // ignore insecure certificates
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: !gc.GetScraperCertCheck()},
|
||||
MaxIdleConnsPerHost: maxIdleConnsPerHost,
|
||||
},
|
||||
Timeout: scrapeGetTimeout,
|
||||
// defaultCheckRedirect code with max changed from 10 to maxRedirects
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= maxRedirects {
|
||||
return fmt.Errorf("after %d redirects: %w", maxRedirects, ErrMaxRedirects)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
// NewCache returns a new Cache loading scraper configurations from the
|
||||
// scraper path provided in the global config object. It returns a new
|
||||
// instance and an error if the scraper directory could not be loaded.
|
||||
//
|
||||
// Scraper configurations are loaded from yml files in the provided scrapers
|
||||
// directory and any subdirectories.
|
||||
func NewCache(globalConfig GlobalConfig, txnManager models.TransactionManager) (*Cache, error) {
|
||||
// HTTP Client setup
|
||||
client := newClient(globalConfig)
|
||||
|
||||
scrapers, err := loadScrapers(globalConfig, client, txnManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Cache{
|
||||
client: client,
|
||||
globalConfig: globalConfig,
|
||||
scrapers: scrapers,
|
||||
txnManager: txnManager,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func loadScrapers(globalConfig GlobalConfig, client *http.Client, txnManager models.TransactionManager) ([]scraper, error) {
|
||||
path := globalConfig.GetScrapersPath()
|
||||
scrapers := make([]scraper, 0)
|
||||
|
||||
logger.Debugf("Reading scraper configs from %s", path)
|
||||
scraperFiles := []string{}
|
||||
err := utils.SymWalk(path, func(fp string, f os.FileInfo, err error) error {
|
||||
if filepath.Ext(fp) == ".yml" {
|
||||
scraperFiles = append(scraperFiles, fp)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("Error reading scraper configs: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// add built-in freeones scraper
|
||||
scrapers = append(scrapers, getFreeonesScraper(client, txnManager, globalConfig), getAutoTagScraper(txnManager, globalConfig))
|
||||
|
||||
for _, file := range scraperFiles {
|
||||
c, err := loadConfigFromYAMLFile(file)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading scraper %s: %s", file, err.Error())
|
||||
} else {
|
||||
scraper := createScraperFromConfig(*c, client, txnManager, globalConfig)
|
||||
scrapers = append(scrapers, scraper)
|
||||
}
|
||||
}
|
||||
|
||||
return scrapers, nil
|
||||
}
|
||||
|
||||
// ReloadScrapers clears the scraper cache and reloads from the scraper path.
|
||||
// In the event of an error during loading, the cache will be left empty.
|
||||
func (c *Cache) ReloadScrapers() error {
|
||||
c.scrapers = nil
|
||||
scrapers, err := loadScrapers(c.globalConfig, c.client, c.txnManager)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.scrapers = scrapers
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO - don't think this is needed
|
||||
// UpdateConfig updates the global config for the cache. If the scraper path
|
||||
// has changed, ReloadScrapers will need to be called separately.
|
||||
func (c *Cache) UpdateConfig(globalConfig GlobalConfig) {
|
||||
c.globalConfig = globalConfig
|
||||
}
|
||||
|
||||
// ListPerformerScrapers returns a list of scrapers that are capable of
|
||||
// scraping performers.
|
||||
func (c Cache) ListPerformerScrapers() []*models.Scraper {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range c.scrapers {
|
||||
// filter on type
|
||||
if s.Performer != nil {
|
||||
ret = append(ret, s.Spec)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// ListSceneScrapers returns a list of scrapers that are capable of
|
||||
// scraping scenes.
|
||||
func (c Cache) ListSceneScrapers() []*models.Scraper {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range c.scrapers {
|
||||
// filter on type
|
||||
if s.Scene != nil {
|
||||
ret = append(ret, s.Spec)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// ListGalleryScrapers returns a list of scrapers that are capable of
|
||||
// scraping galleries.
|
||||
func (c Cache) ListGalleryScrapers() []*models.Scraper {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range c.scrapers {
|
||||
// filter on type
|
||||
if s.Gallery != nil {
|
||||
ret = append(ret, s.Spec)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// ListMovieScrapers returns a list of scrapers that are capable of
|
||||
// scraping scenes.
|
||||
func (c Cache) ListMovieScrapers() []*models.Scraper {
|
||||
var ret []*models.Scraper
|
||||
for _, s := range c.scrapers {
|
||||
// filter on type
|
||||
if s.Movie != nil {
|
||||
ret = append(ret, s.Spec)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetScraper returns the scraper matching the provided id.
|
||||
func (c Cache) GetScraper(scraperID string) *models.Scraper {
|
||||
ret := c.findScraper(scraperID)
|
||||
if ret != nil {
|
||||
return ret.Spec
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) findScraper(scraperID string) *scraper {
|
||||
for _, s := range c.scrapers {
|
||||
if s.ID == scraperID {
|
||||
return &s
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ScrapePerformerList uses the scraper with the provided ID to query for
|
||||
// performers using the provided query string. It returns a list of
|
||||
// scraped performer data.
|
||||
func (c Cache) ScrapePerformerList(scraperID string, query string) ([]*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Performer != nil {
|
||||
return s.Performer.scrapeByName(query)
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapePerformer uses the scraper with the provided ID to scrape a
|
||||
// performer using the provided performer fragment.
|
||||
func (c Cache) ScrapePerformer(scraperID string, scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Performer != nil {
|
||||
ret, err := s.Performer.scrapeByFragment(scrapedPerformer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapePerformer(context.TODO(), ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapePerformerURL uses the first scraper it finds that matches the URL
|
||||
// provided to scrape a performer. If no scrapers are found that matches
|
||||
// the URL, then nil is returned.
|
||||
func (c Cache) ScrapePerformerURL(url string) (*models.ScrapedPerformer, error) {
|
||||
for _, s := range c.scrapers {
|
||||
if matchesURL(s.Performer, url) {
|
||||
ret, err := s.Performer.scrapeByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapePerformer(context.TODO(), ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapePerformer(ctx context.Context, ret *models.ScrapedPerformer) error {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setPerformerImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %s", *ret.Image, err.Error())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScenePerformer(ret *models.ScrapedPerformer) error {
|
||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeScene(ctx context.Context, ret *models.ScrapedScene) error {
|
||||
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
||||
pqb := r.Performer()
|
||||
mqb := r.Movie()
|
||||
tqb := r.Tag()
|
||||
sqb := r.Studio()
|
||||
|
||||
for _, p := range ret.Performers {
|
||||
if err := c.postScrapeScenePerformer(p); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := match.ScrapedPerformer(pqb, p, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, p := range ret.Movies {
|
||||
err := match.ScrapedMovie(mqb, p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := match.ScrapedStudio(sqb, ret.Studio, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setSceneImage(ctx, c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set image using URL %s: %v", *ret.Image, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
|
||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
pqb := r.Performer()
|
||||
tqb := r.Tag()
|
||||
sqb := r.Studio()
|
||||
|
||||
for _, p := range ret.Performers {
|
||||
err := match.ScrapedPerformer(pqb, p, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := match.ScrapedStudio(sqb, ret.Studio, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ScrapeScene uses the scraper with the provided ID to scrape a scene using existing data.
|
||||
func (c Cache) ScrapeScene(scraperID string, sceneID int) (*models.ScrapedScene, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Scene != nil {
|
||||
// get scene from id
|
||||
scene, err := getScene(sceneID, c.txnManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret, err := s.Scene.scrapeByScene(scene)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapeScene(context.TODO(), ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapeSceneQuery uses the scraper with the provided ID to query for
|
||||
// scenes using the provided query string. It returns a list of
|
||||
// scraped scene data.
|
||||
func (c Cache) ScrapeSceneQuery(scraperID string, query string) ([]*models.ScrapedScene, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Scene != nil {
|
||||
return s.Scene.scrapeByName(query)
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapeSceneFragment uses the scraper with the provided ID to scrape a scene.
|
||||
func (c Cache) ScrapeSceneFragment(scraperID string, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
// find scraper with the provided id
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Scene != nil {
|
||||
ret, err := s.Scene.scrapeByFragment(scene)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapeScene(context.TODO(), ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraper with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapeSceneURL uses the first scraper it finds that matches the URL
|
||||
// provided to scrape a scene. If no scrapers are found that matches
|
||||
// the URL, then nil is returned.
|
||||
func (c Cache) ScrapeSceneURL(url string) (*models.ScrapedScene, error) {
|
||||
for _, s := range c.scrapers {
|
||||
if matchesURL(s.Scene, url) {
|
||||
ret, err := s.Scene.scrapeByURL(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = c.postScrapeScene(context.TODO(), ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ScrapeGallery uses the scraper with the provided ID to scrape a gallery using existing data.
|
||||
func (c Cache) ScrapeGallery(scraperID string, galleryID int) (*models.ScrapedGallery, error) {
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Gallery != nil {
|
||||
// get gallery from id
|
||||
gallery, err := getGallery(galleryID, c.txnManager)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret, err := s.Gallery.scrapeByGallery(gallery)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapeGallery(ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraped with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapeGalleryFragment uses the scraper with the provided ID to scrape a gallery.
|
||||
func (c Cache) ScrapeGalleryFragment(scraperID string, gallery models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
|
||||
s := c.findScraper(scraperID)
|
||||
if s != nil && s.Gallery != nil {
|
||||
ret, err := s.Gallery.scrapeByFragment(gallery)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
err = c.postScrapeGallery(ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("Scraped with ID " + scraperID + " not found")
|
||||
}
|
||||
|
||||
// ScrapeGalleryURL uses the first scraper it finds that matches the URL
|
||||
// provided to scrape a scene. If no scrapers are found that matches
|
||||
// the URL, then nil is returned.
|
||||
func (c Cache) ScrapeGalleryURL(url string) (*models.ScrapedGallery, error) {
|
||||
for _, s := range c.scrapers {
|
||||
if matchesURL(s.Gallery, url) {
|
||||
ret, err := s.Gallery.scrapeByURL(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = c.postScrapeGallery(ret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ScrapeMovieURL uses the first scraper it finds that matches the URL
|
||||
// provided to scrape a movie. If no scrapers are found that matches
|
||||
// the URL, then nil is returned.
|
||||
func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
|
||||
for _, s := range c.scrapers {
|
||||
if s.Movie != nil && matchesURL(s.Movie, url) {
|
||||
ret, err := s.Movie.scrapeByURL(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ret.Studio != nil {
|
||||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
return match.ScrapedStudio(r.Studio(), ret.Studio, nil)
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// post-process - set the image if applicable
|
||||
if err := setMovieFrontImage(context.TODO(), c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set front image using URL %s: %s", *ret.FrontImage, err.Error())
|
||||
}
|
||||
if err := setMovieBackImage(context.TODO(), c.client, ret, c.globalConfig); err != nil {
|
||||
logger.Warnf("Could not set back image using URL %s: %s", *ret.BackImage, err.Error())
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedTag) ([]*models.ScrapedTag, error) {
|
||||
var ret []*models.ScrapedTag
|
||||
|
||||
excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns()
|
||||
var excludeRegexps []*regexp.Regexp
|
||||
|
||||
for _, excludePattern := range excludePatterns {
|
||||
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
||||
if err != nil {
|
||||
logger.Errorf("Invalid tag exclusion pattern :%v", err)
|
||||
} else {
|
||||
excludeRegexps = append(excludeRegexps, reg)
|
||||
}
|
||||
}
|
||||
|
||||
var ignoredTags []string
|
||||
ScrapeTag:
|
||||
for _, t := range scrapedTags {
|
||||
for _, reg := range excludeRegexps {
|
||||
if reg.MatchString(strings.ToLower(t.Name)) {
|
||||
ignoredTags = append(ignoredTags, t.Name)
|
||||
continue ScrapeTag
|
||||
}
|
||||
}
|
||||
|
||||
err := match.ScrapedTag(tqb, t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret = append(ret, t)
|
||||
}
|
||||
|
||||
if len(ignoredTags) > 0 {
|
||||
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
@ -89,7 +90,7 @@ func (s *scriptScraper) runScraperScript(inString string, out interface{}) error
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||
func (s *scriptScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
|
||||
inString := `{"name": "` + name + `"}`
|
||||
|
||||
var performers []models.ScrapedPerformer
|
||||
|
@ -121,7 +122,7 @@ func (s *scriptScraper) scrapePerformerByFragment(scrapedPerformer models.Scrape
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||
func (s *scriptScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedPerformer
|
||||
|
@ -131,7 +132,7 @@ func (s *scriptScraper) scrapePerformerByURL(url string) (*models.ScrapedPerform
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s *scriptScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
inString, err := json.Marshal(sceneToUpdateInput(scene))
|
||||
|
||||
if err != nil {
|
||||
|
@ -145,7 +146,7 @@ func (s *scriptScraper) scrapeSceneByScene(scene *models.Scene) (*models.Scraped
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
|
||||
func (s *scriptScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
|
||||
inString := `{"name": "` + name + `"}`
|
||||
|
||||
var scenes []models.ScrapedScene
|
||||
|
@ -163,7 +164,7 @@ func (s *scriptScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene,
|
|||
return ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
func (s *scriptScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
inString, err := json.Marshal(scene)
|
||||
|
||||
if err != nil {
|
||||
|
@ -177,7 +178,7 @@ func (s *scriptScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *scriptScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
inString, err := json.Marshal(galleryToUpdateInput(gallery))
|
||||
|
||||
if err != nil {
|
||||
|
@ -205,7 +206,7 @@ func (s *scriptScraper) scrapeGalleryByFragment(gallery models.ScrapedGalleryInp
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||
func (s *scriptScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedScene
|
||||
|
@ -215,7 +216,7 @@ func (s *scriptScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, erro
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||
func (s *scriptScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedGallery
|
||||
|
@ -225,7 +226,7 @@ func (s *scriptScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery,
|
|||
return &ret, err
|
||||
}
|
||||
|
||||
func (s *scriptScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||
func (s *scriptScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
|
||||
inString := `{"url": "` + url + `"}`
|
||||
|
||||
var ret models.ScrapedMovie
|
||||
|
|
|
@ -54,7 +54,7 @@ type stashFindPerformerNamesResultType struct {
|
|||
Performers []*stashFindPerformerNamePerformer `graphql:"performers"`
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||
func (s *stashScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
|
||||
client := s.getStashClient()
|
||||
|
||||
var q struct {
|
||||
|
@ -72,7 +72,7 @@ func (s *stashScraper) scrapePerformersByName(name string) ([]*models.ScrapedPer
|
|||
},
|
||||
}
|
||||
|
||||
err := client.Query(context.TODO(), &q, vars)
|
||||
err := client.Query(ctx, &q, vars)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -175,7 +175,7 @@ func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash)
|
|||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
|
||||
func (s *stashScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
|
||||
client := s.getStashClient()
|
||||
|
||||
var q struct {
|
||||
|
@ -193,7 +193,7 @@ func (s *stashScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene,
|
|||
},
|
||||
}
|
||||
|
||||
err := client.Query(context.TODO(), &q, vars)
|
||||
err := client.Query(ctx, &q, vars)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -222,7 +222,7 @@ type scrapedSceneStash struct {
|
|||
Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"`
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s *stashScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
// query by MD5
|
||||
var q struct {
|
||||
FindScene *scrapedSceneStash `graphql:"findSceneByHash(input: $c)"`
|
||||
|
@ -243,7 +243,7 @@ func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS
|
|||
}
|
||||
|
||||
client := s.getStashClient()
|
||||
if err := client.Query(context.TODO(), &q, vars); err != nil {
|
||||
if err := client.Query(ctx, &q, vars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -262,7 +262,7 @@ func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
func (s *stashScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
return nil, errors.New("scrapeSceneByFragment not supported for stash scraper")
|
||||
}
|
||||
|
||||
|
@ -278,7 +278,7 @@ type scrapedGalleryStash struct {
|
|||
Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"`
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *stashScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
var q struct {
|
||||
FindGallery *scrapedGalleryStash `graphql:"findGalleryByHash(input: $c)"`
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ func (s *stashScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.
|
|||
}
|
||||
|
||||
client := s.getStashClient()
|
||||
if err := client.Query(context.TODO(), &q, vars); err != nil {
|
||||
if err := client.Query(ctx, &q, vars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -313,19 +313,19 @@ func (s *stashScraper) scrapeGalleryByFragment(scene models.ScrapedGalleryInput)
|
|||
return nil, errors.New("scrapeGalleryByFragment not supported for stash scraper")
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||
func (s *stashScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
return nil, errors.New("scrapePerformerByURL not supported for stash scraper")
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||
func (s *stashScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
return nil, errors.New("scrapeSceneByURL not supported for stash scraper")
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||
func (s *stashScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
|
||||
return nil, errors.New("scrapeGalleryByURL not supported for stash scraper")
|
||||
}
|
||||
|
||||
func (s *stashScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||
func (s *stashScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
|
||||
return nil, errors.New("scrapeMovieByURL not supported for stash scraper")
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
@ -39,14 +40,14 @@ func (s *xpathScraper) getXpathScraper() *mappedScraper {
|
|||
return s.config.XPathScrapers[s.scraper.Scraper]
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeURL(url string) (*html.Node, *mappedScraper, error) {
|
||||
func (s *xpathScraper) scrapeURL(ctx context.Context, url string) (*html.Node, *mappedScraper, error) {
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
return nil, nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
@ -55,9 +56,9 @@ func (s *xpathScraper) scrapeURL(url string) (*html.Node, *mappedScraper, error)
|
|||
return doc, scraper, nil
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
|
||||
func (s *xpathScraper) scrapePerformerByURL(ctx context.Context, url string) (*models.ScrapedPerformer, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for performer by URL queries
|
||||
doc, scraper, err := s.scrapeURL(u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -66,9 +67,9 @@ func (s *xpathScraper) scrapePerformerByURL(url string) (*models.ScrapedPerforme
|
|||
return scraper.scrapePerformer(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
|
||||
func (s *xpathScraper) scrapeSceneByURL(ctx context.Context, url string) (*models.ScrapedScene, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for scene by URL queries
|
||||
doc, scraper, err := s.scrapeURL(u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -77,9 +78,9 @@ func (s *xpathScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
|
||||
func (s *xpathScraper) scrapeGalleryByURL(ctx context.Context, url string) (*models.ScrapedGallery, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for gallery by URL queries
|
||||
doc, scraper, err := s.scrapeURL(u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -88,9 +89,9 @@ func (s *xpathScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, e
|
|||
return scraper.scrapeGallery(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
|
||||
func (s *xpathScraper) scrapeMovieByURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
|
||||
u := replaceURL(url, s.scraper) // allow a URL Replace for movie by URL queries
|
||||
doc, scraper, err := s.scrapeURL(u)
|
||||
doc, scraper, err := s.scrapeURL(ctx, u)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -99,7 +100,7 @@ func (s *xpathScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error
|
|||
return scraper.scrapeMovie(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
|
||||
func (s *xpathScraper) scrapePerformersByName(ctx context.Context, name string) ([]*models.ScrapedPerformer, error) {
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
|
@ -114,7 +115,7 @@ func (s *xpathScraper) scrapePerformersByName(name string) ([]*models.ScrapedPer
|
|||
url := s.scraper.QueryURL
|
||||
url = strings.ReplaceAll(url, placeholder, escapedName)
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -128,7 +129,7 @@ func (s *xpathScraper) scrapePerformerByFragment(scrapedPerformer models.Scraped
|
|||
return nil, errors.New("scrapePerformerByFragment not supported for xpath scraper")
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
|
||||
func (s *xpathScraper) scrapeScenesByName(ctx context.Context, name string) ([]*models.ScrapedScene, error) {
|
||||
scraper := s.getXpathScraper()
|
||||
|
||||
if scraper == nil {
|
||||
|
@ -143,7 +144,7 @@ func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene,
|
|||
url := s.scraper.QueryURL
|
||||
url = strings.ReplaceAll(url, placeholder, escapedName)
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -153,7 +154,7 @@ func (s *xpathScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene,
|
|||
return scraper.scrapeScenes(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
func (s *xpathScraper) scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -167,7 +168,7 @@ func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS
|
|||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -177,7 +178,7 @@ func (s *xpathScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedS
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
func (s *xpathScraper) scrapeSceneByFragment(ctx context.Context, scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromScrapedScene(scene)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -191,7 +192,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*m
|
|||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -201,7 +202,7 @@ func (s *xpathScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*m
|
|||
return scraper.scrapeScene(q)
|
||||
}
|
||||
|
||||
func (s *xpathScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
func (s *xpathScraper) scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error) {
|
||||
// construct the URL
|
||||
queryURL := queryURLParametersFromGallery(gallery)
|
||||
if s.scraper.QueryURLReplacements != nil {
|
||||
|
@ -215,7 +216,7 @@ func (s *xpathScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.
|
|||
return nil, errors.New("xpath scraper with name " + s.scraper.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := s.loadURL(context.TODO(), url)
|
||||
doc, err := s.loadURL(ctx, url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -260,11 +261,10 @@ type xpathQuery struct {
|
|||
scraper *xpathScraper
|
||||
}
|
||||
|
||||
func (q *xpathQuery) runQuery(selector string) []string {
|
||||
func (q *xpathQuery) runQuery(selector string) ([]string, error) {
|
||||
found, err := htmlquery.QueryAll(q.doc, selector)
|
||||
if err != nil {
|
||||
logger.Warnf("Error parsing xpath expression '%s': %s", selector, err.Error())
|
||||
return nil
|
||||
return nil, fmt.Errorf("selector '%s': parse error: %v", selector, err)
|
||||
}
|
||||
|
||||
var ret []string
|
||||
|
@ -276,7 +276,7 @@ func (q *xpathQuery) runQuery(selector string) []string {
|
|||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (q *xpathQuery) nodeText(n *html.Node) string {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
@ -875,13 +876,23 @@ xPathScrapers:
|
|||
globalConfig := mockGlobalConfig{}
|
||||
|
||||
client := &http.Client{}
|
||||
s := createScraperFromConfig(*c, client, nil, globalConfig)
|
||||
performer, err := s.Performer.scrapeByURL(ts.URL)
|
||||
ctx := context.Background()
|
||||
s := newGroupScraper(*c, nil, globalConfig)
|
||||
us, ok := s.(urlScraper)
|
||||
if !ok {
|
||||
t.Error("couldn't convert scraper into url scraper")
|
||||
}
|
||||
content, err := us.viaURL(ctx, client, ts.URL, models.ScrapeContentTypePerformer)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error scraping performer: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
performer, ok := content.(*models.ScrapedPerformer)
|
||||
if !ok {
|
||||
t.Error("couldn't convert scraped content into a performer")
|
||||
}
|
||||
|
||||
verifyField(t, "The name", performer.Name, "Name")
|
||||
}
|
||||
|
|
|
@ -822,15 +822,6 @@ export const useDLNAStatus = () =>
|
|||
fetchPolicy: "no-cache",
|
||||
});
|
||||
|
||||
export const queryScrapeFreeones = (performerName: string) =>
|
||||
client.query<GQL.ScrapeFreeonesQuery>({
|
||||
query: GQL.ScrapeFreeonesDocument,
|
||||
variables: {
|
||||
performer_name: performerName,
|
||||
},
|
||||
fetchPolicy: "network-only",
|
||||
});
|
||||
|
||||
export const queryScrapePerformer = (
|
||||
scraperId: string,
|
||||
scrapedPerformer: GQL.ScrapedPerformerInput
|
||||
|
|
Loading…
Reference in New Issue