stash/pkg/match/path.go

369 lines
8.6 KiB
Go
Raw Normal View History

package match
import (
"fmt"
"path/filepath"
"regexp"
"strings"
"unicode"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/sliceutil/stringslice"
)
const (
separatorChars = `.\-_ `
separatorPattern = `(?:_|[^\p{L}\w\d])+`
reNotLetterWordUnicode = `[^\p{L}\w\d]`
reNotLetterWord = `[^\w\d]`
)
var separatorRE = regexp.MustCompile(separatorPattern)
func getPathQueryRegex(name string) string {
// escape specific regex characters
name = regexp.QuoteMeta(name)
// handle path separators
const separator = `[` + separatorChars + `]`
Enable gocritic (#1848) * Don't capitalize local variables ValidCodecs -> validCodecs * Capitalize deprecation markers A deprecated marker should be capitalized. * Use re.MustCompile for static regexes If the regex fails to compile, it's a programmer error, and should be treated as such. The regex is entirely static. * Simplify else-if constructions Rewrite else { if cond {}} to else if cond {} * Use a switch statement to analyze formats Break an if-else chain. While here, simplify code flow. Also introduce a proper static error for unsupported image formats, paving the way for being able to check against the error. * Rewrite ifElse chains into switch statements The "Effective Go" https://golang.org/doc/effective_go#switch document mentions it is more idiomatic to write if-else chains as switches when it is possible. Find all the plain rewrite occurrences in the code base and rewrite. In some cases, the if-else chains are replaced by a switch scrutinizer. That is, the code sequence if x == 1 { .. } else if x == 2 { .. } else if x == 3 { ... } can be rewritten into switch x { case 1: .. case 2: .. case 3: .. } which is clearer for the compiler: it can decide if the switch is better served by a jump-table then a branch-chain. * Rewrite switches, introduce static errors Introduce two new static errors: * `ErrNotImplmented` * `ErrNotSupported` And use these rather than forming new generative errors whenever the code is called. Code can now test on the errors (since they are static and the pointers to them wont change). Also rewrite ifElse chains into switches in this part of the code base. * Introduce a StashBoxError in configuration Since all stashbox errors are the same, treat them as such in the code base. While here, rewrite an ifElse chain. In the future, it might be beneifical to refactor configuration errors into one error which can handle missing fields, which context the error occurs in and so on. But for now, try to get an overview of the error categories by hoisting them into static errors. * Get rid of an else-block in transaction handling If we succesfully `recover()`, we then always `panic()`. This means the rest of the code is not reachable, so we can avoid having an else-block here. It also solves an ifElse-chain style check in the code base. * Use strings.ReplaceAll Rewrite strings.Replace(s, o, n, -1) into strings.ReplaceAll(s, o, n) To make it consistent and clear that we are doing an all-replace in the string rather than replacing parts of it. It's more of a nitpick since there are no implementation differences: the stdlib implementation is just to supply -1. * Rewrite via gocritic's assignOp Statements of the form x = x + e is rewritten into x += e where applicable. * Formatting * Review comments handled Stash-box is a proper noun. Rewrite a switch into an if-chain which returns on the first error encountered. * Use context.TODO() over context.Background() Patch in the same vein as everything else: use the TODO() marker so we can search for it later and link it into the context tree/tentacle once it reaches down to this level in the code base. * Tell the linter to ignore a section in manager_tasks.go The section is less readable, so mark it with a nolint for now. Because the rewrite enables a ifElseChain, also mark that as nolint for now. * Use strings.ReplaceAll over strings.Replace * Apply an ifElse rewrite else { if .. { .. } } rewrite into else if { .. } * Use switch-statements over ifElseChains Rewrite chains of if-else into switch statements. Where applicable, add an early nil-guard to simplify case analysis. Also, in ScanTask's Start(..), invert the logic to outdent the whole block, and help the reader: if it's not a scene, the function flow is now far more local to the top of the function, and it's clear that the rest of the function has to do with scene management. * Enable gocritic on the code base. Disable appendAssign for now since we aren't passing that check yet. * Document the nolint additions * Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
ret := strings.ReplaceAll(name, " ", separator+"*")
ret = `(?:^|_|[^\p{L}\d])` + ret + `(?:$|_|[^\p{L}\d])`
return ret
}
func getPathWords(path string) []string {
retStr := path
// remove the extension
ext := filepath.Ext(retStr)
if ext != "" {
retStr = strings.TrimSuffix(retStr, ext)
}
// handle path separators
retStr = separatorRE.ReplaceAllString(retStr, " ")
words := strings.Split(retStr, " ")
// remove any single letter words
var ret []string
for _, w := range words {
if len(w) > 1 {
// #1450 - we need to open up the criteria for matching so that we
// can match where path has no space between subject names -
// ie name = "foo bar" - path = "foobar"
// we post-match afterwards, so we can afford to be a little loose
// with the query
// just use the first two characters
// #2293 - need to convert to unicode runes for the substring, otherwise
// the resulting string is corrupted.
ret = stringslice.StrAppendUnique(ret, string([]rune(w)[0:2]))
}
}
return ret
}
// https://stackoverflow.com/a/53069799
func allASCII(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] > unicode.MaxASCII {
return false
}
}
return true
}
// nameMatchesPath returns the index in the path for the right-most match.
// Returns -1 if not found.
func nameMatchesPath(name, path string) int {
// #2363 - optimisation: only use unicode character regexp if path contains
// unicode characters
re := nameToRegexp(name, !allASCII(path))
return regexpMatchesPath(re, path)
}
// nameToRegexp compiles a regexp pattern to match paths from the given name.
// Set useUnicode to true if this regexp is to be used on any strings with unicode characters.
func nameToRegexp(name string, useUnicode bool) *regexp.Regexp {
// escape specific regex characters
name = regexp.QuoteMeta(name)
name = strings.ToLower(name)
// handle path separators
const separator = `[` + separatorChars + `]`
// performance optimisation: only use \p{L} is useUnicode is true
notWord := reNotLetterWord
if useUnicode {
notWord = reNotLetterWordUnicode
}
Enable gocritic (#1848) * Don't capitalize local variables ValidCodecs -> validCodecs * Capitalize deprecation markers A deprecated marker should be capitalized. * Use re.MustCompile for static regexes If the regex fails to compile, it's a programmer error, and should be treated as such. The regex is entirely static. * Simplify else-if constructions Rewrite else { if cond {}} to else if cond {} * Use a switch statement to analyze formats Break an if-else chain. While here, simplify code flow. Also introduce a proper static error for unsupported image formats, paving the way for being able to check against the error. * Rewrite ifElse chains into switch statements The "Effective Go" https://golang.org/doc/effective_go#switch document mentions it is more idiomatic to write if-else chains as switches when it is possible. Find all the plain rewrite occurrences in the code base and rewrite. In some cases, the if-else chains are replaced by a switch scrutinizer. That is, the code sequence if x == 1 { .. } else if x == 2 { .. } else if x == 3 { ... } can be rewritten into switch x { case 1: .. case 2: .. case 3: .. } which is clearer for the compiler: it can decide if the switch is better served by a jump-table then a branch-chain. * Rewrite switches, introduce static errors Introduce two new static errors: * `ErrNotImplmented` * `ErrNotSupported` And use these rather than forming new generative errors whenever the code is called. Code can now test on the errors (since they are static and the pointers to them wont change). Also rewrite ifElse chains into switches in this part of the code base. * Introduce a StashBoxError in configuration Since all stashbox errors are the same, treat them as such in the code base. While here, rewrite an ifElse chain. In the future, it might be beneifical to refactor configuration errors into one error which can handle missing fields, which context the error occurs in and so on. But for now, try to get an overview of the error categories by hoisting them into static errors. * Get rid of an else-block in transaction handling If we succesfully `recover()`, we then always `panic()`. This means the rest of the code is not reachable, so we can avoid having an else-block here. It also solves an ifElse-chain style check in the code base. * Use strings.ReplaceAll Rewrite strings.Replace(s, o, n, -1) into strings.ReplaceAll(s, o, n) To make it consistent and clear that we are doing an all-replace in the string rather than replacing parts of it. It's more of a nitpick since there are no implementation differences: the stdlib implementation is just to supply -1. * Rewrite via gocritic's assignOp Statements of the form x = x + e is rewritten into x += e where applicable. * Formatting * Review comments handled Stash-box is a proper noun. Rewrite a switch into an if-chain which returns on the first error encountered. * Use context.TODO() over context.Background() Patch in the same vein as everything else: use the TODO() marker so we can search for it later and link it into the context tree/tentacle once it reaches down to this level in the code base. * Tell the linter to ignore a section in manager_tasks.go The section is less readable, so mark it with a nolint for now. Because the rewrite enables a ifElseChain, also mark that as nolint for now. * Use strings.ReplaceAll over strings.Replace * Apply an ifElse rewrite else { if .. { .. } } rewrite into else if { .. } * Use switch-statements over ifElseChains Rewrite chains of if-else into switch statements. Where applicable, add an early nil-guard to simplify case analysis. Also, in ScanTask's Start(..), invert the logic to outdent the whole block, and help the reader: if it's not a scene, the function flow is now far more local to the top of the function, and it's clear that the rest of the function has to do with scene management. * Enable gocritic on the code base. Disable appendAssign for now since we aren't passing that check yet. * Document the nolint additions * Document StashBoxBatchPerformerTagInput
2021-10-18 03:12:40 +00:00
reStr := strings.ReplaceAll(name, " ", separator+"*")
reStr = `(?:^|_|` + notWord + `)` + reStr + `(?:$|_|` + notWord + `)`
re := regexp.MustCompile(reStr)
return re
}
func regexpMatchesPath(r *regexp.Regexp, path string) int {
path = strings.ToLower(path)
found := r.FindAllStringIndex(path, -1)
if found == nil {
return -1
}
return found[len(found)-1][0]
}
func getPerformers(words []string, performerReader models.PerformerReader, cache *Cache) ([]*models.Performer, error) {
performers, err := performerReader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
swPerformers, err := getSingleLetterPerformers(cache, performerReader)
if err != nil {
return nil, err
}
return append(performers, swPerformers...), nil
}
func PathToPerformers(path string, reader models.PerformerReader, cache *Cache) ([]*models.Performer, error) {
words := getPathWords(path)
performers, err := getPerformers(words, reader, cache)
if err != nil {
return nil, err
}
var ret []*models.Performer
for _, p := range performers {
// TODO - commenting out alias handling until both sides work correctly
if nameMatchesPath(p.Name.String, path) != -1 { // || nameMatchesPath(p.Aliases.String, path) {
ret = append(ret, p)
}
}
return ret, nil
}
func getStudios(words []string, reader models.StudioReader, cache *Cache) ([]*models.Studio, error) {
studios, err := reader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
swStudios, err := getSingleLetterStudios(cache, reader)
if err != nil {
return nil, err
}
return append(studios, swStudios...), nil
}
// PathToStudio returns the Studio that matches the given path.
// Where multiple matching studios are found, the one that matches the latest
// position in the path is returned.
func PathToStudio(path string, reader models.StudioReader, cache *Cache) (*models.Studio, error) {
words := getPathWords(path)
candidates, err := getStudios(words, reader, cache)
if err != nil {
return nil, err
}
var ret *models.Studio
index := -1
for _, c := range candidates {
matchIndex := nameMatchesPath(c.Name.String, path)
if matchIndex != -1 && matchIndex > index {
ret = c
index = matchIndex
}
aliases, err := reader.GetAliases(c.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
matchIndex = nameMatchesPath(alias, path)
if matchIndex != -1 && matchIndex > index {
ret = c
index = matchIndex
}
}
}
return ret, nil
}
func getTags(words []string, reader models.TagReader, cache *Cache) ([]*models.Tag, error) {
tags, err := reader.QueryForAutoTag(words)
if err != nil {
return nil, err
}
swTags, err := getSingleLetterTags(cache, reader)
if err != nil {
return nil, err
}
return append(tags, swTags...), nil
}
func PathToTags(path string, reader models.TagReader, cache *Cache) ([]*models.Tag, error) {
words := getPathWords(path)
tags, err := getTags(words, reader, cache)
if err != nil {
return nil, err
}
var ret []*models.Tag
for _, t := range tags {
matches := false
if nameMatchesPath(t.Name, path) != -1 {
matches = true
}
if !matches {
aliases, err := reader.GetAliases(t.ID)
if err != nil {
return nil, err
}
for _, alias := range aliases {
if nameMatchesPath(alias, path) != -1 {
matches = true
break
}
}
}
if matches {
ret = append(ret, t)
}
}
return ret, nil
}
func PathToScenes(name string, paths []string, sceneReader models.SceneReader) ([]*models.Scene, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.SceneFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = scene.PathsFilter(paths)
pp := models.PerPageAll
scenes, err := scene.Query(sceneReader, &filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying scenes with regex '%s': %s", regex, err.Error())
}
var ret []*models.Scene
// paths may have unicode characters
const useUnicode = true
r := nameToRegexp(name, useUnicode)
for _, p := range scenes {
if regexpMatchesPath(r, p.Path) != -1 {
ret = append(ret, p)
}
}
return ret, nil
}
func PathToImages(name string, paths []string, imageReader models.ImageReader) ([]*models.Image, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.ImageFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = image.PathsFilter(paths)
pp := models.PerPageAll
images, err := image.Query(imageReader, &filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying images with regex '%s': %s", regex, err.Error())
}
var ret []*models.Image
// paths may have unicode characters
const useUnicode = true
r := nameToRegexp(name, useUnicode)
for _, p := range images {
if regexpMatchesPath(r, p.Path) != -1 {
ret = append(ret, p)
}
}
return ret, nil
}
func PathToGalleries(name string, paths []string, galleryReader models.GalleryReader) ([]*models.Gallery, error) {
regex := getPathQueryRegex(name)
organized := false
filter := models.GalleryFilterType{
Path: &models.StringCriterionInput{
Value: "(?i)" + regex,
Modifier: models.CriterionModifierMatchesRegex,
},
Organized: &organized,
}
filter.And = gallery.PathsFilter(paths)
pp := models.PerPageAll
gallerys, _, err := galleryReader.Query(&filter, &models.FindFilterType{
PerPage: &pp,
})
if err != nil {
return nil, fmt.Errorf("error querying gallerys with regex '%s': %s", regex, err.Error())
}
var ret []*models.Gallery
// paths may have unicode characters
const useUnicode = true
r := nameToRegexp(name, useUnicode)
for _, p := range gallerys {
if regexpMatchesPath(r, p.Path.String) != -1 {
ret = append(ret, p)
}
}
return ret, nil
}