stash/pkg/scraper/stash.go

404 lines
11 KiB
Go
Raw Normal View History

package scraper
import (
"context"
"database/sql"
"errors"
"strconv"
"github.com/jinzhu/copier"
"github.com/shurcooL/graphql"
"github.com/stashapp/stash/pkg/models"
)
type stashScraper struct {
scraper scraperTypeConfig
config config
globalConfig GlobalConfig
txnManager models.TransactionManager
}
func newStashScraper(scraper scraperTypeConfig, txnManager models.TransactionManager, config config, globalConfig GlobalConfig) *stashScraper {
return &stashScraper{
scraper: scraper,
config: config,
globalConfig: globalConfig,
txnManager: txnManager,
}
}
func (s *stashScraper) getStashClient() *graphql.Client {
url := s.config.StashServer.URL
return graphql.NewClient(url+"/graphql", nil)
}
type stashFindPerformerNamePerformer struct {
ID string `json:"id" graphql:"id"`
Name string `json:"name" graphql:"name"`
}
func (p stashFindPerformerNamePerformer) toPerformer() *models.ScrapedPerformer {
return &models.ScrapedPerformer{
Name: &p.Name,
// put id into the URL field
URL: &p.ID,
}
}
type stashFindPerformerNamesResultType struct {
Count int `graphql:"count"`
Performers []*stashFindPerformerNamePerformer `graphql:"performers"`
}
func (s *stashScraper) scrapePerformersByName(name string) ([]*models.ScrapedPerformer, error) {
client := s.getStashClient()
var q struct {
FindPerformers stashFindPerformerNamesResultType `graphql:"findPerformers(filter: $f)"`
}
page := 1
perPage := 10
vars := map[string]interface{}{
"f": models.FindFilterType{
Q: &name,
Page: &page,
PerPage: &perPage,
},
}
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
err := client.Query(context.TODO(), &q, vars)
if err != nil {
return nil, err
}
var ret []*models.ScrapedPerformer
for _, p := range q.FindPerformers.Performers {
ret = append(ret, p.toPerformer())
}
return ret, nil
}
// need a separate for scraped stash performers - does not include remote_site_id or image
type scrapedTagStash struct {
Name string `graphql:"name" json:"name"`
}
type scrapedPerformerStash struct {
Name *string `graphql:"name" json:"name"`
Gender *string `graphql:"gender" json:"gender"`
URL *string `graphql:"url" json:"url"`
Twitter *string `graphql:"twitter" json:"twitter"`
Instagram *string `graphql:"instagram" json:"instagram"`
Birthdate *string `graphql:"birthdate" json:"birthdate"`
Ethnicity *string `graphql:"ethnicity" json:"ethnicity"`
Country *string `graphql:"country" json:"country"`
EyeColor *string `graphql:"eye_color" json:"eye_color"`
Height *string `graphql:"height" json:"height"`
Measurements *string `graphql:"measurements" json:"measurements"`
FakeTits *string `graphql:"fake_tits" json:"fake_tits"`
CareerLength *string `graphql:"career_length" json:"career_length"`
Tattoos *string `graphql:"tattoos" json:"tattoos"`
Piercings *string `graphql:"piercings" json:"piercings"`
Aliases *string `graphql:"aliases" json:"aliases"`
Tags []*scrapedTagStash `graphql:"tags" json:"tags"`
Details *string `graphql:"details" json:"details"`
DeathDate *string `graphql:"death_date" json:"death_date"`
HairColor *string `graphql:"hair_color" json:"hair_color"`
Weight *string `graphql:"weight" json:"weight"`
}
func (s *stashScraper) scrapePerformerByFragment(scrapedPerformer models.ScrapedPerformerInput) (*models.ScrapedPerformer, error) {
client := s.getStashClient()
var q struct {
FindPerformer *scrapedPerformerStash `graphql:"findPerformer(id: $f)"`
}
performerID := *scrapedPerformer.URL
// get the id from the URL field
vars := map[string]interface{}{
"f": performerID,
}
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
err := client.Query(context.TODO(), &q, vars)
if err != nil {
return nil, err
}
// need to copy back to a scraped performer
ret := models.ScrapedPerformer{}
err = copier.Copy(&ret, q.FindPerformer)
if err != nil {
return nil, err
}
// get the performer image directly
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
ret.Image, err = getStashPerformerImage(context.TODO(), s.config.StashServer.URL, performerID, s.globalConfig)
if err != nil {
return nil, err
}
return &ret, nil
}
type scrapedStudioStash struct {
Name string `graphql:"name" json:"name"`
URL *string `graphql:"url" json:"url"`
}
type stashFindSceneNamesResultType struct {
Count int `graphql:"count"`
Scenes []*scrapedSceneStash `graphql:"scenes"`
}
func (s *stashScraper) scrapedStashSceneToScrapedScene(scene *scrapedSceneStash) (*models.ScrapedScene, error) {
ret := models.ScrapedScene{}
err := copier.Copy(&ret, scene)
if err != nil {
return nil, err
}
// get the performer image directly
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, scene.ID, s.globalConfig)
if err != nil {
return nil, err
}
return &ret, nil
}
func (s *stashScraper) scrapeScenesByName(name string) ([]*models.ScrapedScene, error) {
client := s.getStashClient()
var q struct {
FindScenes stashFindSceneNamesResultType `graphql:"findScenes(filter: $f)"`
}
page := 1
perPage := 10
vars := map[string]interface{}{
"f": models.FindFilterType{
Q: &name,
Page: &page,
PerPage: &perPage,
},
}
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
err := client.Query(context.TODO(), &q, vars)
if err != nil {
return nil, err
}
var ret []*models.ScrapedScene
for _, scene := range q.FindScenes.Scenes {
converted, err := s.scrapedStashSceneToScrapedScene(scene)
if err != nil {
return nil, err
}
ret = append(ret, converted)
}
return ret, nil
}
type scrapedSceneStash struct {
ID string `graphql:"id" json:"id"`
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
File *models.SceneFileType `graphql:"file" json:"file"`
Studio *scrapedStudioStash `graphql:"studio" json:"studio"`
Tags []*scrapedTagStash `graphql:"tags" json:"tags"`
Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"`
}
func (s *stashScraper) scrapeSceneByScene(scene *models.Scene) (*models.ScrapedScene, error) {
// query by MD5
var q struct {
FindScene *scrapedSceneStash `graphql:"findSceneByHash(input: $c)"`
2020-08-06 01:21:14 +00:00
}
type SceneHashInput struct {
Checksum *string `graphql:"checksum" json:"checksum"`
Oshash *string `graphql:"oshash" json:"oshash"`
}
input := SceneHashInput{
Checksum: &scene.Checksum.String,
Oshash: &scene.OSHash.String,
}
vars := map[string]interface{}{
2020-08-06 01:21:14 +00:00
"c": &input,
}
client := s.getStashClient()
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
if err := client.Query(context.TODO(), &q, vars); err != nil {
return nil, err
}
// need to copy back to a scraped scene
ret, err := s.scrapedStashSceneToScrapedScene(q.FindScene)
if err != nil {
return nil, err
}
// get the performer image directly
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
ret.Image, err = getStashSceneImage(context.TODO(), s.config.StashServer.URL, q.FindScene.ID, s.globalConfig)
if err != nil {
return nil, err
}
return ret, nil
}
func (s *stashScraper) scrapeSceneByFragment(scene models.ScrapedSceneInput) (*models.ScrapedScene, error) {
return nil, errors.New("scrapeSceneByFragment not supported for stash scraper")
}
type scrapedGalleryStash struct {
ID string `graphql:"id" json:"id"`
Title *string `graphql:"title" json:"title"`
Details *string `graphql:"details" json:"details"`
URL *string `graphql:"url" json:"url"`
Date *string `graphql:"date" json:"date"`
File *models.SceneFileType `graphql:"file" json:"file"`
Studio *scrapedStudioStash `graphql:"studio" json:"studio"`
Tags []*scrapedTagStash `graphql:"tags" json:"tags"`
Performers []*scrapedPerformerStash `graphql:"performers" json:"performers"`
}
2020-10-20 22:24:32 +00:00
func (s *stashScraper) scrapeGalleryByGallery(gallery *models.Gallery) (*models.ScrapedGallery, error) {
2020-10-20 22:24:32 +00:00
var q struct {
FindGallery *scrapedGalleryStash `graphql:"findGalleryByHash(input: $c)"`
2020-10-20 22:24:32 +00:00
}
type GalleryHashInput struct {
Checksum *string `graphql:"checksum" json:"checksum"`
}
input := GalleryHashInput{
Checksum: &gallery.Checksum,
2020-10-20 22:24:32 +00:00
}
vars := map[string]interface{}{
"c": &input,
}
client := s.getStashClient()
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
if err := client.Query(context.TODO(), &q, vars); err != nil {
2020-10-20 22:24:32 +00:00
return nil, err
}
// need to copy back to a scraped scene
ret := models.ScrapedGallery{}
if err := copier.Copy(&ret, q.FindGallery); err != nil {
2020-10-20 22:24:32 +00:00
return nil, err
}
return &ret, nil
}
func (s *stashScraper) scrapeGalleryByFragment(scene models.ScrapedGalleryInput) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByFragment not supported for stash scraper")
}
func (s *stashScraper) scrapePerformerByURL(url string) (*models.ScrapedPerformer, error) {
return nil, errors.New("scrapePerformerByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeSceneByURL(url string) (*models.ScrapedScene, error) {
return nil, errors.New("scrapeSceneByURL not supported for stash scraper")
}
2020-10-20 22:24:32 +00:00
func (s *stashScraper) scrapeGalleryByURL(url string) (*models.ScrapedGallery, error) {
return nil, errors.New("scrapeGalleryByURL not supported for stash scraper")
}
func (s *stashScraper) scrapeMovieByURL(url string) (*models.ScrapedMovie, error) {
return nil, errors.New("scrapeMovieByURL not supported for stash scraper")
}
func getScene(sceneID int, txnManager models.TransactionManager) (*models.Scene, error) {
var ret *models.Scene
if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
var err error
ret, err = r.Scene().Find(sceneID)
return err
}); err != nil {
return nil, err
}
return ret, nil
}
2020-10-20 22:24:32 +00:00
func sceneToUpdateInput(scene *models.Scene) models.SceneUpdateInput {
toStringPtr := func(s sql.NullString) *string {
if s.Valid {
return &s.String
}
return nil
2020-10-20 22:24:32 +00:00
}
dateToStringPtr := func(s models.SQLiteDate) *string {
if s.Valid {
return &s.String
}
return nil
}
return models.SceneUpdateInput{
ID: strconv.Itoa(scene.ID),
Title: toStringPtr(scene.Title),
Details: toStringPtr(scene.Details),
URL: toStringPtr(scene.URL),
Date: dateToStringPtr(scene.Date),
}
}
func getGallery(galleryID int, txnManager models.TransactionManager) (*models.Gallery, error) {
var ret *models.Gallery
if err := txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
var err error
ret, err = r.Gallery().Find(galleryID)
return err
}); err != nil {
return nil, err
}
return ret, nil
2020-10-20 22:24:32 +00:00
}
func galleryToUpdateInput(gallery *models.Gallery) models.GalleryUpdateInput {
toStringPtr := func(s sql.NullString) *string {
if s.Valid {
return &s.String
}
return nil
}
dateToStringPtr := func(s models.SQLiteDate) *string {
if s.Valid {
return &s.String
}
return nil
}
return models.GalleryUpdateInput{
ID: strconv.Itoa(gallery.ID),
Title: toStringPtr(gallery.Title),
Details: toStringPtr(gallery.Details),
URL: toStringPtr(gallery.URL),
Date: dateToStringPtr(gallery.Date),
}
}