mirror of https://github.com/stashapp/stash.git
Scrape tag exclusions (#1617)
* Add config option for scraper tag exclusion patterns Add a config option for exclusing tags / tag patterns from the scraper results. * Handle tag exclusion patterns during scraping
This commit is contained in:
parent
404eaa32d2
commit
dfd55346b2
|
@ -69,6 +69,7 @@ fragment ConfigScrapingData on ConfigScrapingResult {
|
|||
scraperUserAgent
|
||||
scraperCertCheck
|
||||
scraperCDPPath
|
||||
excludeTagPatterns
|
||||
}
|
||||
|
||||
fragment ConfigData on ConfigResult {
|
||||
|
|
|
@ -255,6 +255,8 @@ input ConfigScrapingInput {
|
|||
scraperCDPPath: String
|
||||
"""Whether the scraper should check for invalid certificates"""
|
||||
scraperCertCheck: Boolean!
|
||||
"""Tags blacklist during scraping"""
|
||||
excludeTagPatterns: [String!]
|
||||
}
|
||||
|
||||
type ConfigScrapingResult {
|
||||
|
@ -264,6 +266,8 @@ type ConfigScrapingResult {
|
|||
scraperCDPPath: String
|
||||
"""Whether the scraper should check for invalid certificates"""
|
||||
scraperCertCheck: Boolean!
|
||||
"""Tags blacklist during scraping"""
|
||||
excludeTagPatterns: [String!]!
|
||||
}
|
||||
|
||||
"""All configuration settings"""
|
||||
|
|
|
@ -312,6 +312,10 @@ func (r *mutationResolver) ConfigureScraping(ctx context.Context, input models.C
|
|||
refreshScraperCache = true
|
||||
}
|
||||
|
||||
if input.ExcludeTagPatterns != nil {
|
||||
c.Set(config.ScraperExcludeTagPatterns, input.ExcludeTagPatterns)
|
||||
}
|
||||
|
||||
c.Set(config.ScraperCertCheck, input.ScraperCertCheck)
|
||||
if refreshScraperCache {
|
||||
manager.GetInstance().RefreshScraperCache()
|
||||
|
|
|
@ -144,8 +144,9 @@ func makeConfigScrapingResult() *models.ConfigScrapingResult {
|
|||
scraperCDPPath := config.GetScraperCDPPath()
|
||||
|
||||
return &models.ConfigScrapingResult{
|
||||
ScraperUserAgent: &scraperUserAgent,
|
||||
ScraperCertCheck: config.GetScraperCertCheck(),
|
||||
ScraperCDPPath: &scraperCDPPath,
|
||||
ScraperUserAgent: &scraperUserAgent,
|
||||
ScraperCertCheck: config.GetScraperCertCheck(),
|
||||
ScraperCDPPath: &scraperCDPPath,
|
||||
ExcludeTagPatterns: config.GetScraperExcludeTagPatterns(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,6 +95,7 @@ const ScrapersPath = "scrapers_path"
|
|||
const ScraperUserAgent = "scraper_user_agent"
|
||||
const ScraperCertCheck = "scraper_cert_check"
|
||||
const ScraperCDPPath = "scraper_cdp_path"
|
||||
const ScraperExcludeTagPatterns = "scraper_exclude_tag_patterns"
|
||||
|
||||
// stash-box options
|
||||
const StashBoxes = "stash_boxes"
|
||||
|
@ -368,6 +369,15 @@ func (i *Instance) GetScraperCertCheck() bool {
|
|||
return ret
|
||||
}
|
||||
|
||||
func (i *Instance) GetScraperExcludeTagPatterns() []string {
|
||||
var ret []string
|
||||
if viper.IsSet(ScraperExcludeTagPatterns) {
|
||||
ret = viper.GetStringSlice(ScraperExcludeTagPatterns)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (i *Instance) GetStashBoxes() []*models.StashBox {
|
||||
var boxes []*models.StashBox
|
||||
viper.UnmarshalKey(StashBoxes, &boxes)
|
||||
|
|
|
@ -5,10 +5,12 @@ import (
|
|||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
stash_config "github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
@ -239,12 +241,11 @@ func (c Cache) postScrapePerformer(ret *models.ScrapedPerformer) error {
|
|||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
for _, t := range ret.Tags {
|
||||
err := MatchScrapedSceneTag(tqb, t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
|
@ -263,12 +264,11 @@ func (c Cache) postScrapeScenePerformer(ret *models.ScrapedScenePerformer) error
|
|||
if err := c.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
|
||||
tqb := r.Tag()
|
||||
|
||||
for _, t := range ret.Tags {
|
||||
err := MatchScrapedSceneTag(tqb, t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
|
@ -302,12 +302,11 @@ func (c Cache) postScrapeScene(ret *models.ScrapedScene) error {
|
|||
}
|
||||
}
|
||||
|
||||
for _, t := range ret.Tags {
|
||||
err := MatchScrapedSceneTag(tqb, t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
||||
|
@ -342,12 +341,11 @@ func (c Cache) postScrapeGallery(ret *models.ScrapedGallery) error {
|
|||
}
|
||||
}
|
||||
|
||||
for _, t := range ret.Tags {
|
||||
err := MatchScrapedSceneTag(tqb, t)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tags, err := postProcessTags(tqb, ret.Tags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ret.Tags = tags
|
||||
|
||||
if ret.Studio != nil {
|
||||
err := MatchScrapedSceneStudio(sqb, ret.Studio)
|
||||
|
@ -509,3 +507,42 @@ func (c Cache) ScrapeMovieURL(url string) (*models.ScrapedMovie, error) {
|
|||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func postProcessTags(tqb models.TagReader, scrapedTags []*models.ScrapedSceneTag) ([]*models.ScrapedSceneTag, error) {
|
||||
var ret []*models.ScrapedSceneTag
|
||||
|
||||
excludePatterns := stash_config.GetInstance().GetScraperExcludeTagPatterns()
|
||||
var excludeRegexps []*regexp.Regexp
|
||||
|
||||
for _, excludePattern := range excludePatterns {
|
||||
reg, err := regexp.Compile(strings.ToLower(excludePattern))
|
||||
if err != nil {
|
||||
logger.Errorf("Invalid tag exclusion pattern :%v", err)
|
||||
} else {
|
||||
excludeRegexps = append(excludeRegexps, reg)
|
||||
}
|
||||
}
|
||||
|
||||
var ignoredTags []string
|
||||
ScrapeTag:
|
||||
for _, t := range scrapedTags {
|
||||
for _, reg := range excludeRegexps {
|
||||
if reg.MatchString(strings.ToLower(t.Name)) {
|
||||
ignoredTags = append(ignoredTags, t.Name)
|
||||
continue ScrapeTag
|
||||
}
|
||||
}
|
||||
|
||||
err := MatchScrapedSceneTag(tqb, t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret = append(ret, t)
|
||||
}
|
||||
|
||||
if len(ignoredTags) > 0 {
|
||||
logger.Infof("Scraping ignored tags: %s", strings.Join(ignoredTags, ", "))
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
### ✨ New Features
|
||||
* Support excluding tag patterns when scraping. ([#1617](https://github.com/stashapp/stash/pull/1617))
|
||||
* Support setting a custom directory for default performer images. ([#1489](https://github.com/stashapp/stash/pull/1489))
|
||||
* Added filtering and sorting on scene marker count for tags. ([#1603](https://github.com/stashapp/stash/pull/1603))
|
||||
* Support excluding fields and editing tags when saving from scene tagger view. ([#1605](https://github.com/stashapp/stash/pull/1605))
|
||||
|
|
|
@ -17,9 +17,10 @@ import StashConfiguration from "./StashConfiguration";
|
|||
interface IExclusionPatternsProps {
|
||||
excludes: string[];
|
||||
setExcludes: (value: string[]) => void;
|
||||
demo: string;
|
||||
}
|
||||
|
||||
const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
||||
export const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
||||
function excludeRegexChanged(idx: number, value: string) {
|
||||
const newExcludes = props.excludes.map((regex, i) => {
|
||||
const ret = idx !== i ? regex : value;
|
||||
|
@ -35,8 +36,7 @@ const ExclusionPatterns: React.FC<IExclusionPatternsProps> = (props) => {
|
|||
}
|
||||
|
||||
function excludeAddRegex() {
|
||||
const demo = "sample\\.mp4$";
|
||||
const newExcludes = props.excludes.concat(demo);
|
||||
const newExcludes = props.excludes.concat(props.demo);
|
||||
|
||||
props.setExcludes(newExcludes);
|
||||
}
|
||||
|
@ -490,7 +490,11 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
id: "config.general.excluded_video_patterns_head",
|
||||
})}
|
||||
</h6>
|
||||
<ExclusionPatterns excludes={excludes} setExcludes={setExcludes} />
|
||||
<ExclusionPatterns
|
||||
excludes={excludes}
|
||||
setExcludes={setExcludes}
|
||||
demo="sample\.mp4$"
|
||||
/>
|
||||
<Form.Text className="text-muted">
|
||||
{intl.formatMessage({
|
||||
id: "config.general.excluded_video_patterns_desc",
|
||||
|
@ -514,6 +518,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
<ExclusionPatterns
|
||||
excludes={imageExcludes}
|
||||
setExcludes={setImageExcludes}
|
||||
demo="sample\.jpg$"
|
||||
/>
|
||||
<Form.Text className="text-muted">
|
||||
{intl.formatMessage({
|
||||
|
|
|
@ -14,6 +14,7 @@ import { useToast } from "src/hooks";
|
|||
import { TextUtils } from "src/utils";
|
||||
import { CollapseButton, Icon, LoadingIndicator } from "src/components/Shared";
|
||||
import { ScrapeType } from "src/core/generated-graphql";
|
||||
import { ExclusionPatterns } from "./SettingsConfigurationPanel";
|
||||
|
||||
interface IURLList {
|
||||
urls: string[];
|
||||
|
@ -96,6 +97,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||
undefined
|
||||
);
|
||||
const [scraperCertCheck, setScraperCertCheck] = useState<boolean>(true);
|
||||
const [excludeTagPatterns, setExcludeTagPatterns] = useState<string[]>([]);
|
||||
|
||||
const { data, error } = useConfiguration();
|
||||
|
||||
|
@ -103,6 +105,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||
scraperUserAgent,
|
||||
scraperCDPPath,
|
||||
scraperCertCheck,
|
||||
excludeTagPatterns,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -113,6 +116,7 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||
setScraperUserAgent(conf.scraping.scraperUserAgent ?? undefined);
|
||||
setScraperCDPPath(conf.scraping.scraperCDPPath ?? undefined);
|
||||
setScraperCertCheck(conf.scraping.scraperCertCheck);
|
||||
setExcludeTagPatterns(conf.scraping.excludeTagPatterns);
|
||||
}
|
||||
}, [data, error]);
|
||||
|
||||
|
@ -398,6 +402,24 @@ export const SettingsScrapingPanel: React.FC = () => {
|
|||
</Form.Group>
|
||||
</Form.Group>
|
||||
|
||||
<Form.Group>
|
||||
<h6>
|
||||
{intl.formatMessage({
|
||||
id: "config.scraping.excluded_tag_patterns_head",
|
||||
})}
|
||||
</h6>
|
||||
<ExclusionPatterns
|
||||
excludes={excludeTagPatterns}
|
||||
setExcludes={setExcludeTagPatterns}
|
||||
demo="4K"
|
||||
/>
|
||||
<Form.Text className="text-muted">
|
||||
{intl.formatMessage({
|
||||
id: "config.scraping.excluded_tag_patterns_desc",
|
||||
})}
|
||||
</Form.Text>
|
||||
</Form.Group>
|
||||
|
||||
<hr />
|
||||
|
||||
<h4>{intl.formatMessage({ id: "config.scraping.scrapers" })}</h4>
|
||||
|
|
|
@ -243,6 +243,8 @@
|
|||
"scraping": {
|
||||
"entity_metadata": "{entityType} Metadata",
|
||||
"entity_scrapers": "{entityType} scrapers",
|
||||
"excluded_tag_patterns_desc": "Regexps of tag names to exclude from scraping results",
|
||||
"excluded_tag_patterns_head": "Excluded Tag Patterns",
|
||||
"scrapers": "Scrapers",
|
||||
"search_by_name": "Search by name",
|
||||
"supported_types": "Supported types",
|
||||
|
|
Loading…
Reference in New Issue