2020-07-21 04:06:25 +00:00
|
|
|
package scraper
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2020-08-12 01:17:43 +00:00
|
|
|
"math"
|
2020-07-21 04:06:25 +00:00
|
|
|
"reflect"
|
|
|
|
"regexp"
|
2020-08-12 01:17:43 +00:00
|
|
|
"strconv"
|
2020-07-21 04:06:25 +00:00
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stashapp/stash/pkg/logger"
|
|
|
|
"github.com/stashapp/stash/pkg/models"
|
2021-04-29 01:38:55 +00:00
|
|
|
"github.com/stashapp/stash/pkg/utils"
|
2020-07-21 04:06:25 +00:00
|
|
|
"gopkg.in/yaml.v2"
|
|
|
|
)
|
|
|
|
|
|
|
|
type mappedQuery interface {
|
|
|
|
runQuery(selector string) []string
|
|
|
|
subScrape(value string) mappedQuery
|
|
|
|
}
|
|
|
|
|
|
|
|
type commonMappedConfig map[string]string
|
|
|
|
|
|
|
|
type mappedConfig map[string]mappedScraperAttrConfig
|
|
|
|
|
|
|
|
func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string {
|
|
|
|
if c == nil {
|
|
|
|
return src
|
|
|
|
}
|
|
|
|
|
|
|
|
ret := src
|
|
|
|
for commonKey, commonVal := range c {
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
ret = strings.ReplaceAll(ret, commonKey, commonVal)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedConfig) process(q mappedQuery, common commonMappedConfig) mappedResults {
|
|
|
|
var ret mappedResults
|
|
|
|
|
|
|
|
for k, attrConfig := range s {
|
|
|
|
|
|
|
|
if attrConfig.Fixed != "" {
|
|
|
|
// TODO - not sure if this needs to set _all_ indexes for the key
|
|
|
|
const i = 0
|
|
|
|
ret = ret.setKey(i, k, attrConfig.Fixed)
|
|
|
|
} else {
|
|
|
|
selector := attrConfig.Selector
|
|
|
|
selector = s.applyCommon(common, selector)
|
|
|
|
|
|
|
|
found := q.runQuery(selector)
|
|
|
|
|
|
|
|
if len(found) > 0 {
|
|
|
|
result := s.postProcess(q, attrConfig, found)
|
|
|
|
for i, text := range result {
|
|
|
|
ret = ret.setKey(i, k, text)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedConfig) postProcess(q mappedQuery, attrConfig mappedScraperAttrConfig, found []string) []string {
|
|
|
|
// check if we're concatenating the results into a single result
|
|
|
|
var ret []string
|
|
|
|
if attrConfig.hasConcat() {
|
|
|
|
result := attrConfig.concatenateResults(found)
|
|
|
|
result = attrConfig.postProcess(result, q)
|
|
|
|
if attrConfig.hasSplit() {
|
2021-04-29 01:38:55 +00:00
|
|
|
results := attrConfig.splitString(result)
|
2021-05-16 06:40:54 +00:00
|
|
|
results = attrConfig.cleanResults(results)
|
2021-04-29 01:38:55 +00:00
|
|
|
return results
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ret = []string{result}
|
|
|
|
} else {
|
|
|
|
for _, text := range found {
|
|
|
|
text = attrConfig.postProcess(text, q)
|
|
|
|
if attrConfig.hasSplit() {
|
|
|
|
return attrConfig.splitString(text)
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = append(ret, text)
|
|
|
|
}
|
2021-05-16 06:40:54 +00:00
|
|
|
ret = attrConfig.cleanResults(ret)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
type mappedSceneScraperConfig struct {
|
|
|
|
mappedConfig
|
|
|
|
|
2021-03-10 01:25:51 +00:00
|
|
|
Tags mappedConfig `yaml:"Tags"`
|
|
|
|
Performers mappedPerformerScraperConfig `yaml:"Performers"`
|
|
|
|
Studio mappedConfig `yaml:"Studio"`
|
|
|
|
Movies mappedConfig `yaml:"Movies"`
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
type _mappedSceneScraperConfig mappedSceneScraperConfig
|
|
|
|
|
|
|
|
const (
|
|
|
|
mappedScraperConfigSceneTags = "Tags"
|
|
|
|
mappedScraperConfigScenePerformers = "Performers"
|
|
|
|
mappedScraperConfigSceneStudio = "Studio"
|
|
|
|
mappedScraperConfigSceneMovies = "Movies"
|
|
|
|
)
|
|
|
|
|
|
|
|
func (s *mappedSceneScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|
|
|
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
|
|
|
// remarshal to yaml and pass that down to the base map
|
|
|
|
parentMap := make(map[string]interface{})
|
|
|
|
if err := unmarshal(parentMap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// move the known sub-fields to a separate map
|
|
|
|
thisMap := make(map[string]interface{})
|
|
|
|
|
|
|
|
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
|
|
|
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
|
|
|
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
|
|
|
thisMap[mappedScraperConfigSceneMovies] = parentMap[mappedScraperConfigSceneMovies]
|
|
|
|
|
|
|
|
delete(parentMap, mappedScraperConfigSceneTags)
|
|
|
|
delete(parentMap, mappedScraperConfigScenePerformers)
|
|
|
|
delete(parentMap, mappedScraperConfigSceneStudio)
|
|
|
|
delete(parentMap, mappedScraperConfigSceneMovies)
|
|
|
|
|
|
|
|
// re-unmarshal the sub-fields
|
|
|
|
yml, err := yaml.Marshal(thisMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// needs to be a different type to prevent infinite recursion
|
|
|
|
c := _mappedSceneScraperConfig{}
|
|
|
|
if err := yaml.Unmarshal(yml, &c); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
*s = mappedSceneScraperConfig(c)
|
|
|
|
|
|
|
|
yml, err = yaml.Marshal(parentMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-10-20 22:24:32 +00:00
|
|
|
type mappedGalleryScraperConfig struct {
|
|
|
|
mappedConfig
|
|
|
|
|
|
|
|
Tags mappedConfig `yaml:"Tags"`
|
|
|
|
Performers mappedConfig `yaml:"Performers"`
|
|
|
|
Studio mappedConfig `yaml:"Studio"`
|
|
|
|
}
|
|
|
|
type _mappedGalleryScraperConfig mappedGalleryScraperConfig
|
|
|
|
|
|
|
|
func (s *mappedGalleryScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|
|
|
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
|
|
|
// remarshal to yaml and pass that down to the base map
|
|
|
|
parentMap := make(map[string]interface{})
|
|
|
|
if err := unmarshal(parentMap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// move the known sub-fields to a separate map
|
|
|
|
thisMap := make(map[string]interface{})
|
|
|
|
|
|
|
|
thisMap[mappedScraperConfigSceneTags] = parentMap[mappedScraperConfigSceneTags]
|
|
|
|
thisMap[mappedScraperConfigScenePerformers] = parentMap[mappedScraperConfigScenePerformers]
|
|
|
|
thisMap[mappedScraperConfigSceneStudio] = parentMap[mappedScraperConfigSceneStudio]
|
|
|
|
|
|
|
|
delete(parentMap, mappedScraperConfigSceneTags)
|
|
|
|
delete(parentMap, mappedScraperConfigScenePerformers)
|
|
|
|
delete(parentMap, mappedScraperConfigSceneStudio)
|
|
|
|
|
|
|
|
// re-unmarshal the sub-fields
|
|
|
|
yml, err := yaml.Marshal(thisMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// needs to be a different type to prevent infinite recursion
|
|
|
|
c := _mappedGalleryScraperConfig{}
|
|
|
|
if err := yaml.Unmarshal(yml, &c); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
*s = mappedGalleryScraperConfig(c)
|
|
|
|
|
|
|
|
yml, err = yaml.Marshal(parentMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
type mappedPerformerScraperConfig struct {
|
|
|
|
mappedConfig
|
2021-03-10 01:25:51 +00:00
|
|
|
|
|
|
|
Tags mappedConfig `yaml:"Tags"`
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
2021-03-10 01:25:51 +00:00
|
|
|
type _mappedPerformerScraperConfig mappedPerformerScraperConfig
|
|
|
|
|
|
|
|
const (
|
|
|
|
mappedScraperConfigPerformerTags = "Tags"
|
|
|
|
)
|
2020-07-21 04:06:25 +00:00
|
|
|
|
|
|
|
func (s *mappedPerformerScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
2021-03-10 01:25:51 +00:00
|
|
|
// HACK - unmarshal to map first, then remove known scene sub-fields, then
|
|
|
|
// remarshal to yaml and pass that down to the base map
|
|
|
|
parentMap := make(map[string]interface{})
|
|
|
|
if err := unmarshal(parentMap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// move the known sub-fields to a separate map
|
|
|
|
thisMap := make(map[string]interface{})
|
|
|
|
|
|
|
|
thisMap[mappedScraperConfigPerformerTags] = parentMap[mappedScraperConfigPerformerTags]
|
|
|
|
|
|
|
|
delete(parentMap, mappedScraperConfigPerformerTags)
|
|
|
|
|
|
|
|
// re-unmarshal the sub-fields
|
|
|
|
yml, err := yaml.Marshal(thisMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// needs to be a different type to prevent infinite recursion
|
|
|
|
c := _mappedPerformerScraperConfig{}
|
|
|
|
if err := yaml.Unmarshal(yml, &c); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
*s = mappedPerformerScraperConfig(c)
|
|
|
|
|
|
|
|
yml, err = yaml.Marshal(parentMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
2020-08-10 05:34:15 +00:00
|
|
|
type mappedMovieScraperConfig struct {
|
|
|
|
mappedConfig
|
|
|
|
|
|
|
|
Studio mappedConfig `yaml:"Studio"`
|
|
|
|
}
|
|
|
|
type _mappedMovieScraperConfig mappedMovieScraperConfig
|
|
|
|
|
|
|
|
const (
|
|
|
|
mappedScraperConfigMovieStudio = "Studio"
|
|
|
|
)
|
|
|
|
|
|
|
|
func (s *mappedMovieScraperConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|
|
|
// HACK - unmarshal to map first, then remove known movie sub-fields, then
|
|
|
|
// remarshal to yaml and pass that down to the base map
|
|
|
|
parentMap := make(map[string]interface{})
|
|
|
|
if err := unmarshal(parentMap); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// move the known sub-fields to a separate map
|
|
|
|
thisMap := make(map[string]interface{})
|
|
|
|
|
|
|
|
thisMap[mappedScraperConfigMovieStudio] = parentMap[mappedScraperConfigMovieStudio]
|
|
|
|
|
|
|
|
delete(parentMap, mappedScraperConfigMovieStudio)
|
|
|
|
|
|
|
|
// re-unmarshal the sub-fields
|
|
|
|
yml, err := yaml.Marshal(thisMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// needs to be a different type to prevent infinite recursion
|
|
|
|
c := _mappedMovieScraperConfig{}
|
|
|
|
if err := yaml.Unmarshal(yml, &c); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
*s = mappedMovieScraperConfig(c)
|
|
|
|
|
|
|
|
yml, err = yaml.Marshal(parentMap)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := yaml.Unmarshal(yml, &s.mappedConfig); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
type mappedRegexConfig struct {
|
|
|
|
Regex string `yaml:"regex"`
|
|
|
|
With string `yaml:"with"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type mappedRegexConfigs []mappedRegexConfig
|
|
|
|
|
|
|
|
func (c mappedRegexConfig) apply(value string) string {
|
|
|
|
if c.Regex != "" {
|
|
|
|
re, err := regexp.Compile(c.Regex)
|
|
|
|
if err != nil {
|
|
|
|
logger.Warnf("Error compiling regex '%s': %s", c.Regex, err.Error())
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
ret := re.ReplaceAllString(value, c.With)
|
|
|
|
|
|
|
|
// trim leading and trailing whitespace
|
|
|
|
// this is done to maintain backwards compatibility with existing
|
|
|
|
// scrapers
|
|
|
|
ret = strings.TrimSpace(ret)
|
|
|
|
|
|
|
|
logger.Debugf(`Replace: '%s' with '%s'`, c.Regex, c.With)
|
|
|
|
logger.Debugf("Before: %s", value)
|
|
|
|
logger.Debugf("After: %s", ret)
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c mappedRegexConfigs) apply(value string) string {
|
|
|
|
// apply regex in order
|
|
|
|
for _, config := range c {
|
|
|
|
value = config.apply(value)
|
|
|
|
}
|
|
|
|
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
type postProcessAction interface {
|
|
|
|
Apply(value string, q mappedQuery) string
|
|
|
|
}
|
|
|
|
|
|
|
|
type postProcessParseDate string
|
|
|
|
|
|
|
|
func (p *postProcessParseDate) Apply(value string, q mappedQuery) string {
|
|
|
|
parseDate := string(*p)
|
|
|
|
|
2021-04-06 23:09:04 +00:00
|
|
|
const internalDateFormat = "2006-01-02"
|
|
|
|
|
2021-05-25 21:29:51 +00:00
|
|
|
valueLower := strings.ToLower(value)
|
|
|
|
if valueLower == "today" || valueLower == "yesterday" { // handle today, yesterday
|
2021-04-06 23:09:04 +00:00
|
|
|
dt := time.Now()
|
2021-05-25 21:29:51 +00:00
|
|
|
if valueLower == "yesterday" { // subtract 1 day from now
|
2021-04-06 23:09:04 +00:00
|
|
|
dt = dt.AddDate(0, 0, -1)
|
|
|
|
}
|
|
|
|
return dt.Format(internalDateFormat)
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
if parseDate == "" {
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
// try to parse the date using the pattern
|
|
|
|
// if it fails, then just fall back to the original value
|
|
|
|
parsedValue, err := time.Parse(parseDate, value)
|
|
|
|
if err != nil {
|
|
|
|
logger.Warnf("Error parsing date string '%s' using format '%s': %s", value, parseDate, err.Error())
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert it into our date format
|
|
|
|
return parsedValue.Format(internalDateFormat)
|
|
|
|
}
|
|
|
|
|
2021-05-21 02:20:12 +00:00
|
|
|
type postProcessSubtractDays bool
|
|
|
|
|
|
|
|
func (p *postProcessSubtractDays) Apply(value string, q mappedQuery) string {
|
|
|
|
const internalDateFormat = "2006-01-02"
|
|
|
|
|
|
|
|
i, err := strconv.Atoi(value)
|
|
|
|
if err != nil {
|
|
|
|
logger.Warnf("Error parsing day string %s: %s", value, err)
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
dt := time.Now()
|
|
|
|
dt = dt.AddDate(0, 0, -i)
|
|
|
|
return dt.Format(internalDateFormat)
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
type postProcessReplace mappedRegexConfigs
|
|
|
|
|
|
|
|
func (c *postProcessReplace) Apply(value string, q mappedQuery) string {
|
|
|
|
replace := mappedRegexConfigs(*c)
|
|
|
|
return replace.apply(value)
|
|
|
|
}
|
|
|
|
|
|
|
|
type postProcessSubScraper mappedScraperAttrConfig
|
|
|
|
|
|
|
|
func (p *postProcessSubScraper) Apply(value string, q mappedQuery) string {
|
|
|
|
subScrapeConfig := mappedScraperAttrConfig(*p)
|
|
|
|
|
|
|
|
logger.Debugf("Sub-scraping for: %s", value)
|
|
|
|
ss := q.subScrape(value)
|
|
|
|
|
|
|
|
if ss != nil {
|
|
|
|
found := ss.runQuery(subScrapeConfig.Selector)
|
|
|
|
|
|
|
|
if len(found) > 0 {
|
|
|
|
// check if we're concatenating the results into a single result
|
|
|
|
var result string
|
|
|
|
if subScrapeConfig.hasConcat() {
|
|
|
|
result = subScrapeConfig.concatenateResults(found)
|
|
|
|
} else {
|
|
|
|
result = found[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
result = subScrapeConfig.postProcess(result, ss)
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
type postProcessMap map[string]string
|
|
|
|
|
|
|
|
func (p *postProcessMap) Apply(value string, q mappedQuery) string {
|
|
|
|
// return the mapped value if present
|
|
|
|
m := *p
|
|
|
|
mapped, ok := m[value]
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
return mapped
|
|
|
|
}
|
|
|
|
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
2020-08-12 01:17:43 +00:00
|
|
|
type postProcessFeetToCm bool
|
|
|
|
|
|
|
|
func (p *postProcessFeetToCm) Apply(value string, q mappedQuery) string {
|
|
|
|
const foot_in_cm = 30.48
|
|
|
|
const inch_in_cm = 2.54
|
|
|
|
|
|
|
|
reg := regexp.MustCompile("[0-9]+")
|
|
|
|
filtered := reg.FindAllString(value, -1)
|
|
|
|
|
|
|
|
var feet float64
|
|
|
|
var inches float64
|
|
|
|
if len(filtered) > 0 {
|
|
|
|
feet, _ = strconv.ParseFloat(filtered[0], 64)
|
|
|
|
}
|
|
|
|
if len(filtered) > 1 {
|
|
|
|
inches, _ = strconv.ParseFloat(filtered[1], 64)
|
|
|
|
}
|
|
|
|
|
|
|
|
var centimeters = feet*foot_in_cm + inches*inch_in_cm
|
|
|
|
|
|
|
|
// Return rounded integer string
|
|
|
|
return strconv.Itoa(int(math.Round(centimeters)))
|
|
|
|
}
|
|
|
|
|
2021-04-26 03:31:25 +00:00
|
|
|
type postProcessLbToKg bool
|
|
|
|
|
|
|
|
func (p *postProcessLbToKg) Apply(value string, q mappedQuery) string {
|
|
|
|
const lb_in_kg = 0.45359237
|
|
|
|
w, err := strconv.ParseFloat(value, 64)
|
|
|
|
if err == nil {
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
w *= lb_in_kg
|
2021-04-26 03:31:25 +00:00
|
|
|
value = strconv.Itoa(int(math.Round(w)))
|
|
|
|
}
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
type mappedPostProcessAction struct {
|
2021-05-21 02:20:12 +00:00
|
|
|
ParseDate string `yaml:"parseDate"`
|
|
|
|
SubtractDays bool `yaml:"subtractDays"`
|
|
|
|
Replace mappedRegexConfigs `yaml:"replace"`
|
|
|
|
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
|
|
|
Map map[string]string `yaml:"map"`
|
|
|
|
FeetToCm bool `yaml:"feetToCm"`
|
|
|
|
LbToKg bool `yaml:"lbToKg"`
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (a mappedPostProcessAction) ToPostProcessAction() (postProcessAction, error) {
|
|
|
|
var found string
|
|
|
|
var ret postProcessAction
|
|
|
|
|
|
|
|
if a.ParseDate != "" {
|
|
|
|
found = "parseDate"
|
|
|
|
action := postProcessParseDate(a.ParseDate)
|
|
|
|
ret = &action
|
|
|
|
}
|
|
|
|
if len(a.Replace) > 0 {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "replace")
|
|
|
|
}
|
|
|
|
found = "replace"
|
|
|
|
action := postProcessReplace(a.Replace)
|
|
|
|
ret = &action
|
|
|
|
}
|
|
|
|
if a.SubScraper != nil {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "subScraper")
|
|
|
|
}
|
|
|
|
found = "subScraper"
|
|
|
|
action := postProcessSubScraper(*a.SubScraper)
|
|
|
|
ret = &action
|
|
|
|
}
|
|
|
|
if a.Map != nil {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "map")
|
|
|
|
}
|
|
|
|
found = "map"
|
|
|
|
action := postProcessMap(a.Map)
|
|
|
|
ret = &action
|
|
|
|
}
|
2020-08-12 01:17:43 +00:00
|
|
|
if a.FeetToCm {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "feetToCm")
|
|
|
|
}
|
|
|
|
found = "feetToCm"
|
|
|
|
action := postProcessFeetToCm(a.FeetToCm)
|
|
|
|
ret = &action
|
|
|
|
}
|
2021-04-26 03:31:25 +00:00
|
|
|
if a.LbToKg {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "lbToKg")
|
|
|
|
}
|
|
|
|
found = "lbToKg"
|
|
|
|
action := postProcessLbToKg(a.LbToKg)
|
|
|
|
ret = &action
|
|
|
|
}
|
2021-05-21 02:20:12 +00:00
|
|
|
if a.SubtractDays {
|
|
|
|
if found != "" {
|
|
|
|
return nil, fmt.Errorf("post-process actions must have a single field, found %s and %s", found, "subtractDays")
|
|
|
|
}
|
2021-09-09 04:10:08 +00:00
|
|
|
// found = "subtractDays"
|
2021-05-21 02:20:12 +00:00
|
|
|
action := postProcessSubtractDays(a.SubtractDays)
|
|
|
|
ret = &action
|
|
|
|
}
|
2020-07-21 04:06:25 +00:00
|
|
|
|
|
|
|
if ret == nil {
|
|
|
|
return nil, errors.New("invalid post-process action")
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type mappedScraperAttrConfig struct {
|
|
|
|
Selector string `yaml:"selector"`
|
|
|
|
Fixed string `yaml:"fixed"`
|
|
|
|
PostProcess []mappedPostProcessAction `yaml:"postProcess"`
|
|
|
|
Concat string `yaml:"concat"`
|
|
|
|
Split string `yaml:"split"`
|
|
|
|
|
|
|
|
postProcessActions []postProcessAction
|
|
|
|
|
Enable gocritic (#1848)
* Don't capitalize local variables
ValidCodecs -> validCodecs
* Capitalize deprecation markers
A deprecated marker should be capitalized.
* Use re.MustCompile for static regexes
If the regex fails to compile, it's a programmer error, and should be
treated as such. The regex is entirely static.
* Simplify else-if constructions
Rewrite
else { if cond {}}
to
else if cond {}
* Use a switch statement to analyze formats
Break an if-else chain. While here, simplify code flow.
Also introduce a proper static error for unsupported image formats,
paving the way for being able to check against the error.
* Rewrite ifElse chains into switch statements
The "Effective Go" https://golang.org/doc/effective_go#switch document
mentions it is more idiomatic to write if-else chains as switches when
it is possible.
Find all the plain rewrite occurrences in the code base and rewrite.
In some cases, the if-else chains are replaced by a switch scrutinizer.
That is, the code sequence
if x == 1 {
..
} else if x == 2 {
..
} else if x == 3 {
...
}
can be rewritten into
switch x {
case 1:
..
case 2:
..
case 3:
..
}
which is clearer for the compiler: it can decide if the switch is
better served by a jump-table then a branch-chain.
* Rewrite switches, introduce static errors
Introduce two new static errors:
* `ErrNotImplmented`
* `ErrNotSupported`
And use these rather than forming new generative errors whenever the
code is called. Code can now test on the errors (since they are static
and the pointers to them wont change).
Also rewrite ifElse chains into switches in this part of the code base.
* Introduce a StashBoxError in configuration
Since all stashbox errors are the same, treat them as such in the code
base. While here, rewrite an ifElse chain.
In the future, it might be beneifical to refactor configuration errors
into one error which can handle missing fields, which context the error
occurs in and so on. But for now, try to get an overview of the error
categories by hoisting them into static errors.
* Get rid of an else-block in transaction handling
If we succesfully `recover()`, we then always `panic()`. This means the
rest of the code is not reachable, so we can avoid having an else-block
here.
It also solves an ifElse-chain style check in the code base.
* Use strings.ReplaceAll
Rewrite
strings.Replace(s, o, n, -1)
into
strings.ReplaceAll(s, o, n)
To make it consistent and clear that we are doing an all-replace in the
string rather than replacing parts of it. It's more of a nitpick since
there are no implementation differences: the stdlib implementation is
just to supply -1.
* Rewrite via gocritic's assignOp
Statements of the form
x = x + e
is rewritten into
x += e
where applicable.
* Formatting
* Review comments handled
Stash-box is a proper noun.
Rewrite a switch into an if-chain which returns on the first error
encountered.
* Use context.TODO() over context.Background()
Patch in the same vein as everything else: use the TODO() marker so we
can search for it later and link it into the context tree/tentacle once
it reaches down to this level in the code base.
* Tell the linter to ignore a section in manager_tasks.go
The section is less readable, so mark it with a nolint for now. Because
the rewrite enables a ifElseChain, also mark that as nolint for now.
* Use strings.ReplaceAll over strings.Replace
* Apply an ifElse rewrite
else { if .. { .. } } rewrite into else if { .. }
* Use switch-statements over ifElseChains
Rewrite chains of if-else into switch statements. Where applicable,
add an early nil-guard to simplify case analysis. Also, in
ScanTask's Start(..), invert the logic to outdent the whole block, and
help the reader: if it's not a scene, the function flow is now far more
local to the top of the function, and it's clear that the rest of the
function has to do with scene management.
* Enable gocritic on the code base.
Disable appendAssign for now since we aren't passing that check yet.
* Document the nolint additions
* Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
|
|
|
// Deprecated: use PostProcess instead
|
2020-07-21 04:06:25 +00:00
|
|
|
ParseDate string `yaml:"parseDate"`
|
|
|
|
Replace mappedRegexConfigs `yaml:"replace"`
|
|
|
|
SubScraper *mappedScraperAttrConfig `yaml:"subScraper"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type _mappedScraperAttrConfig mappedScraperAttrConfig
|
|
|
|
|
|
|
|
func (c *mappedScraperAttrConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|
|
|
// try unmarshalling into a string first
|
|
|
|
if err := unmarshal(&c.Selector); err != nil {
|
|
|
|
// if it's a type error then we try to unmarshall to the full object
|
Errorlint sweep + minor linter tweaks (#1796)
* Replace error assertions with Go 1.13 style
Use `errors.As(..)` over type assertions. This enables better use of
wrapped errors in the future, and lets us pass some errorlint checks
in the process.
The rewrite is entirely mechanical, and uses a standard idiom for
doing so.
* Use Go 1.13's errors.Is(..)
Rather than directly checking for error equality, use errors.Is(..).
This protects against error wrapping issues in the future.
Even though something like sql.ErrNoRows doesn't need the wrapping, do
so anyway, for the sake of consistency throughout the code base.
The change almost lets us pass the `errorlint` Go checker except for
a missing case in `js.go` which is to be handled separately; it isn't
mechanical, like these changes are.
* Remove goconst
goconst isn't a useful linter in many cases, because it's false positive
rate is high. It's 100% for the current code base.
* Avoid direct comparison of errors in recover()
Assert that we are catching an error from recover(). If we are,
check that the error caught matches errStop.
* Enable the "errorlint" checker
Configure the checker to avoid checking for errorf wraps. These are
often false positives since the suggestion is to blanket wrap errors
with %w, and that exposes the underlying API which you might not want
to do.
The other warnings are good however, and with the current patch stack,
the code base passes all these checks as well.
* Configure rowserrcheck
The project uses sqlx. Configure rowserrcheck to include said package.
* Mechanically rewrite a large set of errors
Mechanically search for errors that look like
fmt.Errorf("...%s", err.Error())
and rewrite those into
fmt.Errorf("...%v", err)
The `fmt` package is error-aware and knows how to call err.Error()
itself.
The rationale is that this is more idiomatic Go; it paves the
way for using error wrapping later with %w in some sites.
This patch only addresses the entirely mechanical rewriting caught by
a project-side search/replace. There are more individual sites not
addressed by this patch.
2021-10-12 03:03:08 +00:00
|
|
|
var typeErr *yaml.TypeError
|
|
|
|
if !errors.As(err, &typeErr) {
|
2020-07-21 04:06:25 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// unmarshall to full object
|
|
|
|
// need it as a separate object
|
|
|
|
t := _mappedScraperAttrConfig{}
|
|
|
|
if err = unmarshal(&t); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
*c = mappedScraperAttrConfig(t)
|
|
|
|
}
|
|
|
|
|
|
|
|
return c.convertPostProcessActions()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *mappedScraperAttrConfig) convertPostProcessActions() error {
|
|
|
|
// ensure we don't have the old deprecated fields and the new post process field
|
|
|
|
if len(c.PostProcess) > 0 {
|
|
|
|
if c.ParseDate != "" || len(c.Replace) > 0 || c.SubScraper != nil {
|
|
|
|
return errors.New("cannot include postProcess and (parseDate, replace, subScraper) deprecated fields")
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert xpathPostProcessAction actions to postProcessActions
|
|
|
|
for _, a := range c.PostProcess {
|
|
|
|
action, err := a.ToPostProcessAction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
c.postProcessActions = append(c.postProcessActions, action)
|
|
|
|
}
|
|
|
|
|
|
|
|
c.PostProcess = nil
|
|
|
|
} else {
|
|
|
|
// convert old deprecated fields if present
|
|
|
|
// in same order as they used to be executed
|
|
|
|
if len(c.Replace) > 0 {
|
|
|
|
action := postProcessReplace(c.Replace)
|
|
|
|
c.postProcessActions = append(c.postProcessActions, &action)
|
|
|
|
c.Replace = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.SubScraper != nil {
|
|
|
|
action := postProcessSubScraper(*c.SubScraper)
|
|
|
|
c.postProcessActions = append(c.postProcessActions, &action)
|
|
|
|
c.SubScraper = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.ParseDate != "" {
|
|
|
|
action := postProcessParseDate(c.ParseDate)
|
|
|
|
c.postProcessActions = append(c.postProcessActions, &action)
|
|
|
|
c.ParseDate = ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c mappedScraperAttrConfig) hasConcat() bool {
|
|
|
|
return c.Concat != ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c mappedScraperAttrConfig) hasSplit() bool {
|
|
|
|
return c.Split != ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c mappedScraperAttrConfig) concatenateResults(nodes []string) string {
|
|
|
|
separator := c.Concat
|
2021-05-25 01:03:09 +00:00
|
|
|
return strings.Join(nodes, separator)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
2021-05-16 06:40:54 +00:00
|
|
|
func (c mappedScraperAttrConfig) cleanResults(nodes []string) []string {
|
|
|
|
cleaned := utils.StrUnique(nodes) // remove duplicate values
|
|
|
|
cleaned = utils.StrDelete(cleaned, "") // remove empty values
|
|
|
|
return cleaned
|
2021-04-29 01:38:55 +00:00
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
func (c mappedScraperAttrConfig) splitString(value string) []string {
|
|
|
|
separator := c.Split
|
|
|
|
var res []string
|
|
|
|
|
|
|
|
if separator == "" {
|
|
|
|
return []string{value}
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, str := range strings.Split(value, separator) {
|
|
|
|
if str != "" {
|
|
|
|
res = append(res, str)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c mappedScraperAttrConfig) postProcess(value string, q mappedQuery) string {
|
|
|
|
for _, action := range c.postProcessActions {
|
|
|
|
value = action.Apply(value, q)
|
|
|
|
}
|
|
|
|
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
type mappedScrapers map[string]*mappedScraper
|
|
|
|
|
|
|
|
type mappedScraper struct {
|
|
|
|
Common commonMappedConfig `yaml:"common"`
|
|
|
|
Scene *mappedSceneScraperConfig `yaml:"scene"`
|
2020-10-20 22:24:32 +00:00
|
|
|
Gallery *mappedGalleryScraperConfig `yaml:"gallery"`
|
2020-07-21 04:06:25 +00:00
|
|
|
Performer *mappedPerformerScraperConfig `yaml:"performer"`
|
2020-08-10 05:34:15 +00:00
|
|
|
Movie *mappedMovieScraperConfig `yaml:"movie"`
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type mappedResult map[string]string
|
|
|
|
type mappedResults []mappedResult
|
|
|
|
|
|
|
|
func (r mappedResult) apply(dest interface{}) {
|
|
|
|
destVal := reflect.ValueOf(dest)
|
|
|
|
|
|
|
|
// dest should be a pointer
|
|
|
|
destVal = destVal.Elem()
|
|
|
|
|
|
|
|
for key, value := range r {
|
|
|
|
field := destVal.FieldByName(key)
|
|
|
|
|
|
|
|
if field.IsValid() {
|
|
|
|
var reflectValue reflect.Value
|
|
|
|
if field.Kind() == reflect.Ptr {
|
|
|
|
// need to copy the value, otherwise everything is set to the
|
|
|
|
// same pointer
|
|
|
|
localValue := value
|
|
|
|
reflectValue = reflect.ValueOf(&localValue)
|
|
|
|
} else {
|
|
|
|
reflectValue = reflect.ValueOf(value)
|
|
|
|
}
|
|
|
|
|
|
|
|
field.Set(reflectValue)
|
|
|
|
} else {
|
|
|
|
logger.Errorf("Field %s does not exist in %T", key, dest)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r mappedResults) setKey(index int, key string, value string) mappedResults {
|
|
|
|
if index >= len(r) {
|
|
|
|
r = append(r, make(mappedResult))
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.Debugf(`[%d][%s] = %s`, index, key, value)
|
|
|
|
r[index][key] = value
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedScraper) scrapePerformer(q mappedQuery) (*models.ScrapedPerformer, error) {
|
|
|
|
var ret models.ScrapedPerformer
|
|
|
|
|
|
|
|
performerMap := s.Performer
|
|
|
|
if performerMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2021-03-10 01:25:51 +00:00
|
|
|
performerTagsMap := performerMap.Tags
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
results := performerMap.process(q, s.Common)
|
|
|
|
if len(results) > 0 {
|
|
|
|
results[0].apply(&ret)
|
2021-03-10 01:25:51 +00:00
|
|
|
|
|
|
|
// now apply the tags
|
|
|
|
if performerTagsMap != nil {
|
|
|
|
logger.Debug(`Processing performer tags:`)
|
|
|
|
tagResults := performerTagsMap.process(q, s.Common)
|
|
|
|
|
|
|
|
for _, p := range tagResults {
|
2021-09-07 01:54:22 +00:00
|
|
|
tag := &models.ScrapedTag{}
|
2021-03-10 01:25:51 +00:00
|
|
|
p.apply(tag)
|
|
|
|
ret.Tags = append(ret.Tags, tag)
|
|
|
|
}
|
|
|
|
}
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedScraper) scrapePerformers(q mappedQuery) ([]*models.ScrapedPerformer, error) {
|
|
|
|
var ret []*models.ScrapedPerformer
|
|
|
|
|
|
|
|
performerMap := s.Performer
|
|
|
|
if performerMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
results := performerMap.process(q, s.Common)
|
|
|
|
for _, r := range results {
|
|
|
|
var p models.ScrapedPerformer
|
|
|
|
r.apply(&p)
|
|
|
|
ret = append(ret, &p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
func (s mappedScraper) processScene(q mappedQuery, r mappedResult) *models.ScrapedScene {
|
2020-07-21 04:06:25 +00:00
|
|
|
var ret models.ScrapedScene
|
|
|
|
|
|
|
|
sceneScraperConfig := s.Scene
|
|
|
|
|
|
|
|
scenePerformersMap := sceneScraperConfig.Performers
|
|
|
|
sceneTagsMap := sceneScraperConfig.Tags
|
|
|
|
sceneStudioMap := sceneScraperConfig.Studio
|
|
|
|
sceneMoviesMap := sceneScraperConfig.Movies
|
|
|
|
|
2021-03-10 01:25:51 +00:00
|
|
|
scenePerformerTagsMap := scenePerformersMap.Tags
|
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
r.apply(&ret)
|
2021-03-10 01:25:51 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
// process performer tags once
|
|
|
|
var performerTagResults mappedResults
|
|
|
|
if scenePerformerTagsMap != nil {
|
|
|
|
performerTagResults = scenePerformerTagsMap.process(q, s.Common)
|
|
|
|
}
|
2021-03-10 01:25:51 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
// now apply the performers and tags
|
|
|
|
if scenePerformersMap.mappedConfig != nil {
|
|
|
|
logger.Debug(`Processing scene performers:`)
|
|
|
|
performerResults := scenePerformersMap.process(q, s.Common)
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
for _, p := range performerResults {
|
|
|
|
performer := &models.ScrapedPerformer{}
|
|
|
|
p.apply(performer)
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
for _, p := range performerTagResults {
|
2021-09-07 01:54:22 +00:00
|
|
|
tag := &models.ScrapedTag{}
|
2020-07-21 04:06:25 +00:00
|
|
|
p.apply(tag)
|
|
|
|
ret.Tags = append(ret.Tags, tag)
|
|
|
|
}
|
2021-09-14 04:54:53 +00:00
|
|
|
|
|
|
|
ret.Performers = append(ret.Performers, performer)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
2021-09-14 04:54:53 +00:00
|
|
|
}
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
if sceneTagsMap != nil {
|
|
|
|
logger.Debug(`Processing scene tags:`)
|
|
|
|
tagResults := sceneTagsMap.process(q, s.Common)
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
for _, p := range tagResults {
|
|
|
|
tag := &models.ScrapedTag{}
|
|
|
|
p.apply(tag)
|
|
|
|
ret.Tags = append(ret.Tags, tag)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
2021-09-14 04:54:53 +00:00
|
|
|
}
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
if sceneStudioMap != nil {
|
|
|
|
logger.Debug(`Processing scene studio:`)
|
|
|
|
studioResults := sceneStudioMap.process(q, s.Common)
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
if len(studioResults) > 0 {
|
|
|
|
studio := &models.ScrapedStudio{}
|
|
|
|
studioResults[0].apply(studio)
|
|
|
|
ret.Studio = studio
|
|
|
|
}
|
|
|
|
}
|
2020-07-21 04:06:25 +00:00
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
if sceneMoviesMap != nil {
|
|
|
|
logger.Debug(`Processing scene movies:`)
|
|
|
|
movieResults := sceneMoviesMap.process(q, s.Common)
|
|
|
|
|
|
|
|
for _, p := range movieResults {
|
|
|
|
movie := &models.ScrapedMovie{}
|
|
|
|
p.apply(movie)
|
|
|
|
ret.Movies = append(ret.Movies, movie)
|
2020-07-21 04:06:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-14 04:54:53 +00:00
|
|
|
return &ret
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedScraper) scrapeScenes(q mappedQuery) ([]*models.ScrapedScene, error) {
|
|
|
|
var ret []*models.ScrapedScene
|
|
|
|
|
|
|
|
sceneScraperConfig := s.Scene
|
|
|
|
sceneMap := sceneScraperConfig.mappedConfig
|
|
|
|
if sceneMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.Debug(`Processing scenes:`)
|
|
|
|
results := sceneMap.process(q, s.Common)
|
|
|
|
for _, r := range results {
|
|
|
|
logger.Debug(`Processing scene:`)
|
|
|
|
ret = append(ret, s.processScene(q, r))
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s mappedScraper) scrapeScene(q mappedQuery) (*models.ScrapedScene, error) {
|
|
|
|
var ret models.ScrapedScene
|
|
|
|
|
|
|
|
sceneScraperConfig := s.Scene
|
|
|
|
sceneMap := sceneScraperConfig.mappedConfig
|
|
|
|
if sceneMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.Debug(`Processing scene:`)
|
|
|
|
results := sceneMap.process(q, s.Common)
|
|
|
|
if len(results) > 0 {
|
|
|
|
ss := s.processScene(q, results[0])
|
|
|
|
ret = *ss
|
|
|
|
}
|
|
|
|
|
2020-07-21 04:06:25 +00:00
|
|
|
return &ret, nil
|
|
|
|
}
|
2020-08-10 05:34:15 +00:00
|
|
|
|
2020-10-20 22:24:32 +00:00
|
|
|
func (s mappedScraper) scrapeGallery(q mappedQuery) (*models.ScrapedGallery, error) {
|
|
|
|
var ret models.ScrapedGallery
|
|
|
|
|
|
|
|
galleryScraperConfig := s.Gallery
|
|
|
|
galleryMap := galleryScraperConfig.mappedConfig
|
|
|
|
if galleryMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
galleryPerformersMap := galleryScraperConfig.Performers
|
|
|
|
galleryTagsMap := galleryScraperConfig.Tags
|
|
|
|
galleryStudioMap := galleryScraperConfig.Studio
|
|
|
|
|
|
|
|
logger.Debug(`Processing gallery:`)
|
|
|
|
results := galleryMap.process(q, s.Common)
|
|
|
|
if len(results) > 0 {
|
|
|
|
results[0].apply(&ret)
|
|
|
|
|
|
|
|
// now apply the performers and tags
|
|
|
|
if galleryPerformersMap != nil {
|
|
|
|
logger.Debug(`Processing gallery performers:`)
|
|
|
|
performerResults := galleryPerformersMap.process(q, s.Common)
|
|
|
|
|
|
|
|
for _, p := range performerResults {
|
2021-09-07 01:54:22 +00:00
|
|
|
performer := &models.ScrapedPerformer{}
|
2020-10-20 22:24:32 +00:00
|
|
|
p.apply(performer)
|
|
|
|
ret.Performers = append(ret.Performers, performer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if galleryTagsMap != nil {
|
|
|
|
logger.Debug(`Processing gallery tags:`)
|
|
|
|
tagResults := galleryTagsMap.process(q, s.Common)
|
|
|
|
|
|
|
|
for _, p := range tagResults {
|
2021-09-07 01:54:22 +00:00
|
|
|
tag := &models.ScrapedTag{}
|
2020-10-20 22:24:32 +00:00
|
|
|
p.apply(tag)
|
|
|
|
ret.Tags = append(ret.Tags, tag)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if galleryStudioMap != nil {
|
|
|
|
logger.Debug(`Processing gallery studio:`)
|
|
|
|
studioResults := galleryStudioMap.process(q, s.Common)
|
|
|
|
|
|
|
|
if len(studioResults) > 0 {
|
2021-09-07 01:54:22 +00:00
|
|
|
studio := &models.ScrapedStudio{}
|
2020-10-20 22:24:32 +00:00
|
|
|
studioResults[0].apply(studio)
|
|
|
|
ret.Studio = studio
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &ret, nil
|
|
|
|
}
|
|
|
|
|
2020-08-10 05:34:15 +00:00
|
|
|
func (s mappedScraper) scrapeMovie(q mappedQuery) (*models.ScrapedMovie, error) {
|
|
|
|
var ret models.ScrapedMovie
|
|
|
|
|
|
|
|
movieScraperConfig := s.Movie
|
|
|
|
movieMap := movieScraperConfig.mappedConfig
|
|
|
|
if movieMap == nil {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
movieStudioMap := movieScraperConfig.Studio
|
|
|
|
|
|
|
|
results := movieMap.process(q, s.Common)
|
|
|
|
if len(results) > 0 {
|
|
|
|
results[0].apply(&ret)
|
|
|
|
|
|
|
|
if movieStudioMap != nil {
|
|
|
|
logger.Debug(`Processing movie studio:`)
|
|
|
|
studioResults := movieStudioMap.process(q, s.Common)
|
|
|
|
|
|
|
|
if len(studioResults) > 0 {
|
2021-09-07 01:54:22 +00:00
|
|
|
studio := &models.ScrapedStudio{}
|
2020-08-10 05:34:15 +00:00
|
|
|
studioResults[0].apply(studio)
|
|
|
|
ret.Studio = studio
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &ret, nil
|
|
|
|
}
|