2020-09-17 09:57:18 +00:00
|
|
|
package stashbox
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2021-09-27 00:55:23 +00:00
|
|
|
"io"
|
2020-09-17 09:57:18 +00:00
|
|
|
"net/http"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/Yamashou/gqlgenc/client"
|
|
|
|
|
|
|
|
"github.com/stashapp/stash/pkg/logger"
|
2021-10-11 12:06:06 +00:00
|
|
|
"github.com/stashapp/stash/pkg/match"
|
2020-09-17 09:57:18 +00:00
|
|
|
"github.com/stashapp/stash/pkg/models"
|
|
|
|
"github.com/stashapp/stash/pkg/scraper/stashbox/graphql"
|
|
|
|
"github.com/stashapp/stash/pkg/utils"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Client represents the client interface to a stash-box server instance.
|
|
|
|
type Client struct {
|
2021-01-18 01:23:20 +00:00
|
|
|
client *graphql.Client
|
|
|
|
txnManager models.TransactionManager
|
2021-11-14 20:51:52 +00:00
|
|
|
box models.StashBox
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewClient returns a new instance of a stash-box client.
|
2021-01-18 01:23:20 +00:00
|
|
|
func NewClient(box models.StashBox, txnManager models.TransactionManager) *Client {
|
2020-09-17 09:57:18 +00:00
|
|
|
authHeader := func(req *http.Request) {
|
|
|
|
req.Header.Set("ApiKey", box.APIKey)
|
|
|
|
}
|
|
|
|
|
|
|
|
client := &graphql.Client{
|
|
|
|
Client: client.NewClient(http.DefaultClient, box.Endpoint, authHeader),
|
|
|
|
}
|
|
|
|
|
|
|
|
return &Client{
|
2021-01-18 01:23:20 +00:00
|
|
|
client: client,
|
|
|
|
txnManager: txnManager,
|
2021-11-14 20:51:52 +00:00
|
|
|
box: box,
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Cache and reuse the scraper HTTP client (#1855)
* Add Cookies directly to the request
Rather than maintaining a cookie jar on a one-shot HTTP client, maintain
the jar ourselves: make a new jar, then use it to select the right
cookies.
The cookies are set on the request rather than on the client. This will
retain the current behavior as we are always throwing the client away
after each use.
This patch enables the lifting of the http client as well over time.
* Introduce a cached scraper HTTP client
The scraper cache is augmented with an *http.Client. These are safe for
concurrent use, so the pointer can safely be passed around. Push this
into scraper configurations where applicable, next to the txnManagers.
When we issue a loadUrl request, do so on the cached *http.Client,
which will reuse existing idle connections in the client if any are
present.
* Set MaxIdleConnsPerHost. Closes #1850
We allow for up to 8 idle connections to a single host. This should
make concurrent operation toward the same host reuse connections, even
for sizeable concurrency.
The number isn't bumped excessively high. We should probably limit
concurrency toward a single site anyway, since we'll be able to overrun
a site with queries quite easily if we have many concurrent goroutines
issuing requests at the same time.
* Reinstate driverOptions / useCDP check
Use DeMorgan's laws to invert the logic and exit early. Fixes tests
breaking.
* Documentation fixup.
* Use the scraper http.Client when fetching images
Fold image fetchers onto the cached scraper http.Client as well. This
makes the scraper have a single http.Client cache for all its
operations.
Thread the client upwards to the relevant attachment points: either the
cache, or a stash_box instance, which is extended to include a pointer
to the client.
Style roughly follows that of txnManagers.
* Use the same http Client as the GraphQL client use
Rather than using http.DefaultClient, use the same client as the
GraphQL client use in the stash_box subsystem. This localizes the
client used in the subsystem into the constructing New.. call.
* Hoist HTTP client construction
Create a function for initializaing the HTTP Client we use. While here
hoist magic numbers into constants. Introduce a proper static redirect
error and use it in the client code as well.
* Reinstate printCookies
This is a debugging function, and it might still come in handy in the
future at some point.
* Nitpick comment.
* Minor tidy
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2021-10-20 05:12:24 +00:00
|
|
|
func (c Client) getHTTPClient() *http.Client {
|
|
|
|
return c.client.Client.Client
|
|
|
|
}
|
|
|
|
|
2020-09-17 09:57:18 +00:00
|
|
|
// QueryStashBoxScene queries stash-box for scenes using a query string.
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
func (c Client) QueryStashBoxScene(ctx context.Context, queryStr string) ([]*models.ScrapedScene, error) {
|
|
|
|
scenes, err := c.client.SearchScene(ctx, queryStr)
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
sceneFragments := scenes.SearchScene
|
|
|
|
|
|
|
|
var ret []*models.ScrapedScene
|
|
|
|
for _, s := range sceneFragments {
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
ss, err := c.sceneFragmentToScrapedScene(ctx, s)
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
ret = append(ret, ss)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStashBoxScenesByFingerprints queries stash-box for scenes using every
|
2021-09-07 01:54:22 +00:00
|
|
|
// scene's MD5/OSHASH checksum, or PHash, and returns results in the same order
|
|
|
|
// as the input slice.
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxScenesByFingerprints(ctx context.Context, sceneIDs []string) ([][]*models.ScrapedScene, error) {
|
2021-09-07 01:54:22 +00:00
|
|
|
ids, err := utils.StringSliceToIntSlice(sceneIDs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-10-20 06:22:25 +00:00
|
|
|
var fingerprints []*graphql.FingerprintQueryInput
|
2021-09-07 01:54:22 +00:00
|
|
|
// map fingerprints to their scene index
|
|
|
|
fpToScene := make(map[string][]int)
|
|
|
|
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-09-07 01:54:22 +00:00
|
|
|
qb := r.Scene()
|
|
|
|
|
|
|
|
for index, sceneID := range ids {
|
|
|
|
scene, err := qb.Find(sceneID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if scene == nil {
|
|
|
|
return fmt.Errorf("scene with id %d not found", sceneID)
|
|
|
|
}
|
|
|
|
|
|
|
|
if scene.Checksum.Valid {
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: scene.Checksum.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmMd5,
|
|
|
|
})
|
2021-09-07 01:54:22 +00:00
|
|
|
fpToScene[scene.Checksum.String] = append(fpToScene[scene.Checksum.String], index)
|
|
|
|
}
|
|
|
|
|
|
|
|
if scene.OSHash.Valid {
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: scene.OSHash.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmOshash,
|
|
|
|
})
|
2021-09-07 01:54:22 +00:00
|
|
|
fpToScene[scene.OSHash.String] = append(fpToScene[scene.OSHash.String], index)
|
|
|
|
}
|
|
|
|
|
|
|
|
if scene.Phash.Valid {
|
|
|
|
phashStr := utils.PhashToString(scene.Phash.Int64)
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: phashStr,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmPhash,
|
|
|
|
})
|
2021-09-07 01:54:22 +00:00
|
|
|
fpToScene[phashStr] = append(fpToScene[phashStr], index)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
allScenes, err := c.findStashBoxScenesByFingerprints(ctx, fingerprints)
|
2021-09-07 01:54:22 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// set the matched scenes back in their original order
|
|
|
|
ret := make([][]*models.ScrapedScene, len(sceneIDs))
|
|
|
|
for _, s := range allScenes {
|
|
|
|
var addedTo []int
|
|
|
|
for _, fp := range s.Fingerprints {
|
|
|
|
sceneIndexes := fpToScene[fp.Hash]
|
|
|
|
for _, index := range sceneIndexes {
|
|
|
|
if !utils.IntInclude(addedTo, index) {
|
|
|
|
addedTo = append(addedTo, index)
|
|
|
|
ret[index] = append(ret[index], s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStashBoxScenesByFingerprintsFlat queries stash-box for scenes using every
|
|
|
|
// scene's MD5/OSHASH checksum, or PHash, and returns results a flat slice.
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxScenesByFingerprintsFlat(ctx context.Context, sceneIDs []string) ([]*models.ScrapedScene, error) {
|
2021-01-18 01:23:20 +00:00
|
|
|
ids, err := utils.StringSliceToIntSlice(sceneIDs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-10-20 06:22:25 +00:00
|
|
|
var fingerprints []*graphql.FingerprintQueryInput
|
2020-09-17 09:57:18 +00:00
|
|
|
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-01-18 01:23:20 +00:00
|
|
|
qb := r.Scene()
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
for _, sceneID := range ids {
|
|
|
|
scene, err := qb.Find(sceneID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
if scene == nil {
|
|
|
|
return fmt.Errorf("scene with id %d not found", sceneID)
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
if scene.Checksum.Valid {
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: scene.Checksum.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmMd5,
|
|
|
|
})
|
2021-01-18 01:23:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if scene.OSHash.Valid {
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: scene.OSHash.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmOshash,
|
|
|
|
})
|
2021-01-18 01:23:20 +00:00
|
|
|
}
|
2021-04-11 23:04:40 +00:00
|
|
|
|
|
|
|
if scene.Phash.Valid {
|
2021-10-20 06:22:25 +00:00
|
|
|
fingerprints = append(fingerprints, &graphql.FingerprintQueryInput{
|
|
|
|
Hash: utils.PhashToString(scene.Phash.Int64),
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmPhash,
|
|
|
|
})
|
2021-04-11 23:04:40 +00:00
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
2021-01-18 01:23:20 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
return c.findStashBoxScenesByFingerprints(ctx, fingerprints)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-10-20 06:22:25 +00:00
|
|
|
func (c Client) findStashBoxScenesByFingerprints(ctx context.Context, fingerprints []*graphql.FingerprintQueryInput) ([]*models.ScrapedScene, error) {
|
2020-09-17 09:57:18 +00:00
|
|
|
var ret []*models.ScrapedScene
|
2020-11-04 21:28:58 +00:00
|
|
|
for i := 0; i < len(fingerprints); i += 100 {
|
|
|
|
end := i + 100
|
|
|
|
if end > len(fingerprints) {
|
|
|
|
end = len(fingerprints)
|
|
|
|
}
|
2021-10-20 06:22:25 +00:00
|
|
|
scenes, err := c.client.FindScenesByFullFingerprints(ctx, fingerprints[i:end])
|
2020-11-04 21:28:58 +00:00
|
|
|
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-11-04 21:28:58 +00:00
|
|
|
|
2021-10-20 06:22:25 +00:00
|
|
|
sceneFragments := scenes.FindScenesByFullFingerprints
|
2020-11-04 21:28:58 +00:00
|
|
|
|
|
|
|
for _, s := range sceneFragments {
|
2021-11-14 20:51:52 +00:00
|
|
|
ss, err := c.sceneFragmentToScrapedScene(ctx, s)
|
2020-11-04 21:28:58 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
ret = append(ret, ss)
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) SubmitStashBoxFingerprints(ctx context.Context, sceneIDs []string, endpoint string) (bool, error) {
|
2021-01-18 01:23:20 +00:00
|
|
|
ids, err := utils.StringSliceToIntSlice(sceneIDs)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2020-10-24 03:31:39 +00:00
|
|
|
|
|
|
|
var fingerprints []graphql.FingerprintSubmission
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-01-18 01:23:20 +00:00
|
|
|
qb := r.Scene()
|
2020-10-24 03:31:39 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
for _, sceneID := range ids {
|
|
|
|
scene, err := qb.Find(sceneID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-10-24 03:31:39 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
if scene == nil {
|
|
|
|
continue
|
|
|
|
}
|
2020-10-24 03:31:39 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
stashIDs, err := qb.GetStashIDs(sceneID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
sceneStashID := ""
|
|
|
|
for _, stashID := range stashIDs {
|
|
|
|
if stashID.Endpoint == endpoint {
|
|
|
|
sceneStashID = stashID.StashID
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
if sceneStashID != "" {
|
|
|
|
if scene.Checksum.Valid && scene.Duration.Valid {
|
|
|
|
fingerprint := graphql.FingerprintInput{
|
|
|
|
Hash: scene.Checksum.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmMd5,
|
|
|
|
Duration: int(scene.Duration.Float64),
|
|
|
|
}
|
|
|
|
fingerprints = append(fingerprints, graphql.FingerprintSubmission{
|
|
|
|
SceneID: sceneStashID,
|
|
|
|
Fingerprint: &fingerprint,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if scene.OSHash.Valid && scene.Duration.Valid {
|
|
|
|
fingerprint := graphql.FingerprintInput{
|
|
|
|
Hash: scene.OSHash.String,
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmOshash,
|
|
|
|
Duration: int(scene.Duration.Float64),
|
|
|
|
}
|
|
|
|
fingerprints = append(fingerprints, graphql.FingerprintSubmission{
|
|
|
|
SceneID: sceneStashID,
|
|
|
|
Fingerprint: &fingerprint,
|
|
|
|
})
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
2021-04-11 23:04:40 +00:00
|
|
|
|
|
|
|
if scene.Phash.Valid && scene.Duration.Valid {
|
|
|
|
fingerprint := graphql.FingerprintInput{
|
|
|
|
Hash: utils.PhashToString(scene.Phash.Int64),
|
|
|
|
Algorithm: graphql.FingerprintAlgorithmPhash,
|
|
|
|
Duration: int(scene.Duration.Float64),
|
|
|
|
}
|
|
|
|
fingerprints = append(fingerprints, graphql.FingerprintSubmission{
|
|
|
|
SceneID: sceneStashID,
|
|
|
|
Fingerprint: &fingerprint,
|
|
|
|
})
|
|
|
|
}
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
|
|
|
}
|
2021-01-18 01:23:20 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return false, err
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
return c.submitStashBoxFingerprints(ctx, fingerprints)
|
2020-10-24 03:31:39 +00:00
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) submitStashBoxFingerprints(ctx context.Context, fingerprints []graphql.FingerprintSubmission) (bool, error) {
|
2020-10-24 03:31:39 +00:00
|
|
|
for _, fingerprint := range fingerprints {
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
_, err := c.client.SubmitFingerprint(ctx, fingerprint)
|
2020-10-24 03:31:39 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2021-05-03 04:21:20 +00:00
|
|
|
// QueryStashBoxPerformer queries stash-box for performers using a query string.
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) QueryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.StashBoxPerformerQueryResult, error) {
|
|
|
|
performers, err := c.queryStashBoxPerformer(ctx, queryStr)
|
2021-05-03 04:21:20 +00:00
|
|
|
|
|
|
|
res := []*models.StashBoxPerformerQueryResult{
|
|
|
|
{
|
|
|
|
Query: queryStr,
|
|
|
|
Results: performers,
|
|
|
|
},
|
|
|
|
}
|
2021-09-07 01:54:22 +00:00
|
|
|
|
|
|
|
// set the deprecated image field
|
|
|
|
for _, p := range res[0].Results {
|
|
|
|
if len(p.Images) > 0 {
|
|
|
|
p.Image = &p.Images[0]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-03 04:21:20 +00:00
|
|
|
return res, err
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) queryStashBoxPerformer(ctx context.Context, queryStr string) ([]*models.ScrapedPerformer, error) {
|
|
|
|
performers, err := c.client.SearchPerformer(ctx, queryStr)
|
2021-05-03 04:21:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
performerFragments := performers.SearchPerformer
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
var ret []*models.ScrapedPerformer
|
2021-05-03 04:21:20 +00:00
|
|
|
for _, fragment := range performerFragments {
|
|
|
|
performer := performerFragmentToScrapedScenePerformer(*fragment)
|
|
|
|
ret = append(ret, performer)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStashBoxPerformersByNames queries stash-box for performers by name
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxPerformersByNames(ctx context.Context, performerIDs []string) ([]*models.StashBoxPerformerQueryResult, error) {
|
2021-05-03 04:21:20 +00:00
|
|
|
ids, err := utils.StringSliceToIntSlice(performerIDs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var performers []*models.Performer
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-05-03 04:21:20 +00:00
|
|
|
qb := r.Performer()
|
|
|
|
|
|
|
|
for _, performerID := range ids {
|
|
|
|
performer, err := qb.Find(performerID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if performer == nil {
|
|
|
|
return fmt.Errorf("performer with id %d not found", performerID)
|
|
|
|
}
|
|
|
|
|
|
|
|
if performer.Name.Valid {
|
|
|
|
performers = append(performers, performer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
return c.findStashBoxPerformersByNames(ctx, performers)
|
2021-05-03 04:21:20 +00:00
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxPerformersByPerformerNames(ctx context.Context, performerIDs []string) ([][]*models.ScrapedPerformer, error) {
|
2021-09-07 01:54:22 +00:00
|
|
|
ids, err := utils.StringSliceToIntSlice(performerIDs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var performers []*models.Performer
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-09-07 01:54:22 +00:00
|
|
|
qb := r.Performer()
|
|
|
|
|
|
|
|
for _, performerID := range ids {
|
|
|
|
performer, err := qb.Find(performerID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if performer == nil {
|
|
|
|
return fmt.Errorf("performer with id %d not found", performerID)
|
|
|
|
}
|
|
|
|
|
|
|
|
if performer.Name.Valid {
|
|
|
|
performers = append(performers, performer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
results, err := c.findStashBoxPerformersByNames(ctx, performers)
|
2021-09-07 01:54:22 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var ret [][]*models.ScrapedPerformer
|
|
|
|
for _, r := range results {
|
|
|
|
ret = append(ret, r.Results)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) findStashBoxPerformersByNames(ctx context.Context, performers []*models.Performer) ([]*models.StashBoxPerformerQueryResult, error) {
|
2021-05-03 04:21:20 +00:00
|
|
|
var ret []*models.StashBoxPerformerQueryResult
|
|
|
|
for _, performer := range performers {
|
|
|
|
if performer.Name.Valid {
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
performerResults, err := c.queryStashBoxPerformer(ctx, performer.Name.String)
|
2021-05-03 04:21:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
result := models.StashBoxPerformerQueryResult{
|
|
|
|
Query: strconv.Itoa(performer.ID),
|
|
|
|
Results: performerResults,
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = append(ret, &result)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
2020-09-17 09:57:18 +00:00
|
|
|
func findURL(urls []*graphql.URLFragment, urlType string) *string {
|
|
|
|
for _, u := range urls {
|
|
|
|
if u.Type == urlType {
|
|
|
|
ret := u.URL
|
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-05-03 04:21:20 +00:00
|
|
|
func enumToStringPtr(e fmt.Stringer, titleCase bool) *string {
|
2020-09-17 09:57:18 +00:00
|
|
|
if e != nil {
|
|
|
|
ret := e.String()
|
2021-05-03 04:21:20 +00:00
|
|
|
if titleCase {
|
|
|
|
ret = strings.Title(strings.ToLower(ret))
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func formatMeasurements(m graphql.MeasurementsFragment) *string {
|
|
|
|
if m.BandSize != nil && m.CupSize != nil && m.Hip != nil && m.Waist != nil {
|
|
|
|
ret := fmt.Sprintf("%d%s-%d-%d", *m.BandSize, *m.CupSize, *m.Waist, *m.Hip)
|
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func formatCareerLength(start, end *int) *string {
|
|
|
|
if start == nil && end == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var ret string
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
switch {
|
|
|
|
case end == nil:
|
2020-09-17 09:57:18 +00:00
|
|
|
ret = fmt.Sprintf("%d -", *start)
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
case start == nil:
|
2021-05-03 04:21:20 +00:00
|
|
|
ret = fmt.Sprintf("- %d", *end)
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
default:
|
2020-09-17 09:57:18 +00:00
|
|
|
ret = fmt.Sprintf("%d - %d", *start, *end)
|
|
|
|
}
|
|
|
|
|
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func formatBodyModifications(m []*graphql.BodyModificationFragment) *string {
|
|
|
|
if len(m) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var retSlice []string
|
|
|
|
for _, f := range m {
|
|
|
|
if f.Description == nil {
|
|
|
|
retSlice = append(retSlice, f.Location)
|
|
|
|
} else {
|
|
|
|
retSlice = append(retSlice, fmt.Sprintf("%s, %s", f.Location, *f.Description))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret := strings.Join(retSlice, "; ")
|
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
|
Cache and reuse the scraper HTTP client (#1855)
* Add Cookies directly to the request
Rather than maintaining a cookie jar on a one-shot HTTP client, maintain
the jar ourselves: make a new jar, then use it to select the right
cookies.
The cookies are set on the request rather than on the client. This will
retain the current behavior as we are always throwing the client away
after each use.
This patch enables the lifting of the http client as well over time.
* Introduce a cached scraper HTTP client
The scraper cache is augmented with an *http.Client. These are safe for
concurrent use, so the pointer can safely be passed around. Push this
into scraper configurations where applicable, next to the txnManagers.
When we issue a loadUrl request, do so on the cached *http.Client,
which will reuse existing idle connections in the client if any are
present.
* Set MaxIdleConnsPerHost. Closes #1850
We allow for up to 8 idle connections to a single host. This should
make concurrent operation toward the same host reuse connections, even
for sizeable concurrency.
The number isn't bumped excessively high. We should probably limit
concurrency toward a single site anyway, since we'll be able to overrun
a site with queries quite easily if we have many concurrent goroutines
issuing requests at the same time.
* Reinstate driverOptions / useCDP check
Use DeMorgan's laws to invert the logic and exit early. Fixes tests
breaking.
* Documentation fixup.
* Use the scraper http.Client when fetching images
Fold image fetchers onto the cached scraper http.Client as well. This
makes the scraper have a single http.Client cache for all its
operations.
Thread the client upwards to the relevant attachment points: either the
cache, or a stash_box instance, which is extended to include a pointer
to the client.
Style roughly follows that of txnManagers.
* Use the same http Client as the GraphQL client use
Rather than using http.DefaultClient, use the same client as the
GraphQL client use in the stash_box subsystem. This localizes the
client used in the subsystem into the constructing New.. call.
* Hoist HTTP client construction
Create a function for initializaing the HTTP Client we use. While here
hoist magic numbers into constants. Introduce a proper static redirect
error and use it in the client code as well.
* Reinstate printCookies
This is a debugging function, and it might still come in handy in the
future at some point.
* Nitpick comment.
* Minor tidy
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2021-10-20 05:12:24 +00:00
|
|
|
func fetchImage(ctx context.Context, client *http.Client, url string) (*string, error) {
|
Toward better context handling (#1835)
* Use the request context
The code uses context.Background() in a flow where there is a
http.Request. Use the requests context instead.
* Use a true context in the plugin example
Let AddTag/RemoveTag take a context and use that context throughout
the example.
* Avoid the use of context.Background
Prefer context.TODO over context.Background deep in the call chain.
This marks the site as something which we need to context-handle
later, and also makes it clear to the reader that the context is
sort-of temporary in the code base.
While here, be consistent in handling the `act` variable in each
branch of the if .. { .. } .. check.
* Prefer context.TODO over context.Background
For the different scraping operations here, there is a context
higher up the call chain, which we ought to use. Mark the call-sites
as TODO for now, so we can come back later on a sweep of which parts
can be context-lifted.
* Thread context upwards
Initialization requires context for transactions. Thread the context
upward the call chain.
At the intialization call, add a context.TODO since we can't break this
yet. The singleton assumption prevents us from pulling it up into main for
now.
* make tasks context-aware
Change the task interface to understand contexts.
Pass the context down in some of the branches where it is needed.
* Make QueryStashBoxScene context-aware
This call naturally sits inside the request-context. Use it.
* Introduce a context in the JS plugin code
This allows us to use a context for HTTP calls inside the system.
Mark the context with a TODO at top level for now.
* Nitpick error formatting
Use %v rather than %s for error interfaces.
Do not begin an error strong with a capital letter.
* Avoid the use of http.Get in FFMPEG download chain
Since http.Get has no context, it isn't possible to break out or have
policy induced. The call will block until the GET completes. Rewrite
to use a http Request and provide a context.
Thread the context through the call chain for now. provide
context.TODO() at the top level of the initialization chain.
* Make getRemoteCDPWSAddress aware of contexts
Eliminate a call to http.Get and replace it with a context-aware
variant.
Push the context upwards in the call chain, but plug it before the
scraper interface so we don't have to rewrite said interface yet.
Plugged with context.TODO()
* Scraper: make the getImage function context-aware
Use a context, and pass it upwards. Plug it with context.TODO()
up the chain before the rewrite gets too much out of hand for now.
Minor tweaks along the way, remove a call to context.Background()
deep in the call chain.
* Make NOTIFY request context-aware
The call sits inside a Request-handler. So it's natural to use the
requests context as the context for the outgoing HTTP request.
* Use a context in the url scraper code
We are sitting in code which has a context, so utilize it for the
request as well.
* Use a context when checking versions
When we check the version of stash on Github, use a context. Thread
the context up to the initialization routine of the HTTP/GraphQL
server and plug it with a context.TODO() for now.
This paves the way for providing a context to the HTTP server code in a
future patch.
* Make utils func ReadImage context-aware
In almost all of the cases, there is a context in the call chain which
is a natural use. This is true for all the GraphQL mutations.
The exception is in task_stash_box_tag, so plug that task with
context.TODO() for now.
* Make stash-box get context-aware
Thread a context through the call chain until we hit the Client API.
Plug it with context.TODO() there for now.
* Enable the noctx linter
The code is now free of any uncontexted HTTP request. This means we
pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := client.Do(req)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
2021-09-27 00:55:23 +00:00
|
|
|
body, err := io.ReadAll(resp.Body)
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// determine the image type and set the base64 type
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
|
|
if contentType == "" {
|
|
|
|
contentType = http.DetectContentType(body)
|
|
|
|
}
|
|
|
|
|
|
|
|
img := "data:" + contentType + ";base64," + utils.GetBase64StringFromData(body)
|
|
|
|
return &img, nil
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
func performerFragmentToScrapedScenePerformer(p graphql.PerformerFragment) *models.ScrapedPerformer {
|
2020-10-24 03:31:39 +00:00
|
|
|
id := p.ID
|
|
|
|
images := []string{}
|
|
|
|
for _, image := range p.Images {
|
|
|
|
images = append(images, image.URL)
|
|
|
|
}
|
2021-09-07 01:54:22 +00:00
|
|
|
sp := &models.ScrapedPerformer{
|
|
|
|
Name: &p.Name,
|
2020-09-17 09:57:18 +00:00
|
|
|
Country: p.Country,
|
|
|
|
Measurements: formatMeasurements(p.Measurements),
|
|
|
|
CareerLength: formatCareerLength(p.CareerStartYear, p.CareerEndYear),
|
|
|
|
Tattoos: formatBodyModifications(p.Tattoos),
|
|
|
|
Piercings: formatBodyModifications(p.Piercings),
|
|
|
|
Twitter: findURL(p.Urls, "TWITTER"),
|
2020-10-24 03:31:39 +00:00
|
|
|
RemoteSiteID: &id,
|
|
|
|
Images: images,
|
2021-03-10 01:25:51 +00:00
|
|
|
// TODO - tags not currently supported
|
2020-09-17 09:57:18 +00:00
|
|
|
// graphql schema change to accommodate this. Leave off for now.
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
if len(sp.Images) > 0 {
|
|
|
|
sp.Image = &sp.Images[0]
|
|
|
|
}
|
|
|
|
|
2020-10-24 03:31:39 +00:00
|
|
|
if p.Height != nil && *p.Height > 0 {
|
2020-09-17 09:57:18 +00:00
|
|
|
hs := strconv.Itoa(*p.Height)
|
|
|
|
sp.Height = &hs
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.Birthdate != nil {
|
|
|
|
b := p.Birthdate.Date
|
|
|
|
sp.Birthdate = &b
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.Gender != nil {
|
2021-05-03 04:21:20 +00:00
|
|
|
sp.Gender = enumToStringPtr(p.Gender, false)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if p.Ethnicity != nil {
|
2021-05-03 04:21:20 +00:00
|
|
|
sp.Ethnicity = enumToStringPtr(p.Ethnicity, true)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if p.EyeColor != nil {
|
2021-05-03 04:21:20 +00:00
|
|
|
sp.EyeColor = enumToStringPtr(p.EyeColor, true)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-11-11 00:34:46 +00:00
|
|
|
if p.HairColor != nil {
|
|
|
|
sp.HairColor = enumToStringPtr(p.HairColor, true)
|
|
|
|
}
|
|
|
|
|
2020-09-17 09:57:18 +00:00
|
|
|
if p.BreastType != nil {
|
2021-05-03 04:21:20 +00:00
|
|
|
sp.FakeTits = enumToStringPtr(p.BreastType, true)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-12-07 22:36:06 +00:00
|
|
|
if len(p.Aliases) > 0 {
|
|
|
|
alias := strings.Join(p.Aliases, ", ")
|
|
|
|
sp.Aliases = &alias
|
|
|
|
}
|
|
|
|
|
2020-09-17 09:57:18 +00:00
|
|
|
return sp
|
|
|
|
}
|
|
|
|
|
Cache and reuse the scraper HTTP client (#1855)
* Add Cookies directly to the request
Rather than maintaining a cookie jar on a one-shot HTTP client, maintain
the jar ourselves: make a new jar, then use it to select the right
cookies.
The cookies are set on the request rather than on the client. This will
retain the current behavior as we are always throwing the client away
after each use.
This patch enables the lifting of the http client as well over time.
* Introduce a cached scraper HTTP client
The scraper cache is augmented with an *http.Client. These are safe for
concurrent use, so the pointer can safely be passed around. Push this
into scraper configurations where applicable, next to the txnManagers.
When we issue a loadUrl request, do so on the cached *http.Client,
which will reuse existing idle connections in the client if any are
present.
* Set MaxIdleConnsPerHost. Closes #1850
We allow for up to 8 idle connections to a single host. This should
make concurrent operation toward the same host reuse connections, even
for sizeable concurrency.
The number isn't bumped excessively high. We should probably limit
concurrency toward a single site anyway, since we'll be able to overrun
a site with queries quite easily if we have many concurrent goroutines
issuing requests at the same time.
* Reinstate driverOptions / useCDP check
Use DeMorgan's laws to invert the logic and exit early. Fixes tests
breaking.
* Documentation fixup.
* Use the scraper http.Client when fetching images
Fold image fetchers onto the cached scraper http.Client as well. This
makes the scraper have a single http.Client cache for all its
operations.
Thread the client upwards to the relevant attachment points: either the
cache, or a stash_box instance, which is extended to include a pointer
to the client.
Style roughly follows that of txnManagers.
* Use the same http Client as the GraphQL client use
Rather than using http.DefaultClient, use the same client as the
GraphQL client use in the stash_box subsystem. This localizes the
client used in the subsystem into the constructing New.. call.
* Hoist HTTP client construction
Create a function for initializaing the HTTP Client we use. While here
hoist magic numbers into constants. Introduce a proper static redirect
error and use it in the client code as well.
* Reinstate printCookies
This is a debugging function, and it might still come in handy in the
future at some point.
* Nitpick comment.
* Minor tidy
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2021-10-20 05:12:24 +00:00
|
|
|
func getFirstImage(ctx context.Context, client *http.Client, images []*graphql.ImageFragment) *string {
|
|
|
|
ret, err := fetchImage(ctx, client, images[0].URL)
|
2020-09-17 09:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Warnf("Error fetching image %s: %s", images[0].URL, err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
2020-10-24 03:31:39 +00:00
|
|
|
func getFingerprints(scene *graphql.SceneFragment) []*models.StashBoxFingerprint {
|
|
|
|
fingerprints := []*models.StashBoxFingerprint{}
|
|
|
|
for _, fp := range scene.Fingerprints {
|
|
|
|
fingerprint := models.StashBoxFingerprint{
|
|
|
|
Algorithm: fp.Algorithm.String(),
|
|
|
|
Hash: fp.Hash,
|
|
|
|
Duration: fp.Duration,
|
|
|
|
}
|
|
|
|
fingerprints = append(fingerprints, &fingerprint)
|
|
|
|
}
|
|
|
|
return fingerprints
|
|
|
|
}
|
|
|
|
|
2021-11-14 20:51:52 +00:00
|
|
|
func (c Client) sceneFragmentToScrapedScene(ctx context.Context, s *graphql.SceneFragment) (*models.ScrapedScene, error) {
|
2020-10-24 03:31:39 +00:00
|
|
|
stashID := s.ID
|
2020-09-17 09:57:18 +00:00
|
|
|
ss := &models.ScrapedScene{
|
2020-10-24 03:31:39 +00:00
|
|
|
Title: s.Title,
|
|
|
|
Date: s.Date,
|
|
|
|
Details: s.Details,
|
|
|
|
URL: findURL(s.Urls, "STUDIO"),
|
|
|
|
Duration: s.Duration,
|
|
|
|
RemoteSiteID: &stashID,
|
|
|
|
Fingerprints: getFingerprints(s),
|
2020-09-17 09:57:18 +00:00
|
|
|
// Image
|
|
|
|
// stash_id
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(s.Images) > 0 {
|
|
|
|
// TODO - #454 code sorts images by aspect ratio according to a wanted
|
|
|
|
// orientation. I'm just grabbing the first for now
|
2021-11-14 20:51:52 +00:00
|
|
|
ss.Image = getFirstImage(ctx, c.getHTTPClient(), s.Images)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-11-14 20:51:52 +00:00
|
|
|
if err := c.txnManager.WithReadTxn(ctx, func(r models.ReaderRepository) error {
|
2021-01-18 01:23:20 +00:00
|
|
|
pqb := r.Performer()
|
|
|
|
tqb := r.Tag()
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
if s.Studio != nil {
|
|
|
|
studioID := s.Studio.ID
|
2021-09-07 01:54:22 +00:00
|
|
|
ss.Studio = &models.ScrapedStudio{
|
2021-01-18 01:23:20 +00:00
|
|
|
Name: s.Studio.Name,
|
|
|
|
URL: findURL(s.Studio.Urls, "HOME"),
|
|
|
|
RemoteSiteID: &studioID,
|
|
|
|
}
|
|
|
|
|
2021-11-14 20:51:52 +00:00
|
|
|
err := match.ScrapedStudio(r.Studio(), ss.Studio, &c.box.Endpoint)
|
2021-01-18 01:23:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
for _, p := range s.Performers {
|
|
|
|
sp := performerFragmentToScrapedScenePerformer(p.Performer)
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-11-14 20:51:52 +00:00
|
|
|
err := match.ScrapedPerformer(pqb, sp, &c.box.Endpoint)
|
2021-01-18 01:23:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ss.Performers = append(ss.Performers, sp)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
for _, t := range s.Tags {
|
2021-09-07 01:54:22 +00:00
|
|
|
st := &models.ScrapedTag{
|
2021-01-18 01:23:20 +00:00
|
|
|
Name: t.Name,
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-10-11 12:06:06 +00:00
|
|
|
err := match.ScrapedTag(tqb, st)
|
2021-01-18 01:23:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-09-17 09:57:18 +00:00
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
ss.Tags = append(ss.Tags, st)
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 01:23:20 +00:00
|
|
|
return nil
|
|
|
|
}); err != nil {
|
|
|
|
return nil, err
|
2020-09-17 09:57:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ss, nil
|
|
|
|
}
|
2021-05-03 04:21:20 +00:00
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxPerformerByID(ctx context.Context, id string) (*models.ScrapedPerformer, error) {
|
|
|
|
performer, err := c.client.FindPerformerByID(ctx, id)
|
2021-05-03 04:21:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ret := performerFragmentToScrapedScenePerformer(*performer.FindPerformer)
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
Scraper refactor middle (#2043)
* Push scrapeByURL into scrapers
Replace ScrapePerfomerByURL, ScrapeMovie..., ... with ScrapeByURL in
the scraperActionImpl interface. This allows us to delete a lot of
repeated code in the scrapers and replace the central part with a
switch on the scraper type.
* Fold name scraping into one call
Follow up on scraper refactoring. Name scrapers use the same code path.
This allows us to restructure some code and kill some functions, adding
variance to the name scraping code. It allows us to remove some code
repetition as well.
* Do not export loop refs.
* Simplify fragment scraping
Generalize fragment scrapers into ScrapeByFragment. This simplifies
fragment code flows into a simpler pathing which should be easier
to handle in the future.
* Eliminate more context.TODO()
In a number of cases, we have a context now. Use the context rather than
TODO() for those cases in order to make those operations cancellable.
* Pass the context for the stashbox scraper
This removes all context.TODO() in the path of the stashbox scraper,
and replaces it with the context that's present on each of the paths.
* Pass the context into subscrapers
Mostly a mechanical update, where we pass in the context for
subscraping. This removes the final context.TODO() in the scraper
code.
* Warn on unknown fields from scripts
A common mistake for new script writers are that they return fields
not known to stash. For instance the name "description" is used rather
than "details".
Decode disallowing unknown fields. If this fails, use a tee-reader to
fall back to the old behavior, but print a warning for the user in this
case. Thus, we retain the old behavior, but print warnings for scripts
which fails the more strict unknown-fields detection.
* Nil-check before running the postprocessing chain
Fixes panics when scraping returns nil values.
* Lift nil-ness in post-postprocessing
If the struct we are trying to post-process is nil, we shouldn't
enter the postprocessing flow at all. Pass the struct as a value
rather than a pointer, eliminating nil-checks as we go. Use the
top-level postProcess call to make the nil-check and then abort there
if the object we are looking at is nil.
* Allow conversion routines to handle values
If we have a non-pointer type in the interface, we should also convert
those into ScrapedContent. Otherwise we get errors on deprecated
functions.
2021-11-26 00:20:06 +00:00
|
|
|
func (c Client) FindStashBoxPerformerByName(ctx context.Context, name string) (*models.ScrapedPerformer, error) {
|
|
|
|
performers, err := c.client.SearchPerformer(ctx, name)
|
2021-05-03 04:21:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-09-07 01:54:22 +00:00
|
|
|
var ret *models.ScrapedPerformer
|
2021-05-03 04:21:20 +00:00
|
|
|
for _, performer := range performers.SearchPerformer {
|
2021-05-25 01:03:09 +00:00
|
|
|
if strings.EqualFold(performer.Name, name) {
|
2021-05-03 04:21:20 +00:00
|
|
|
ret = performerFragmentToScrapedScenePerformer(*performer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|