mirror of https://github.com/stashapp/stash.git
Configurable scraper user agent string (#409)
* Add debug scrape option. Co-authored-by: HiddenPants255 <>
This commit is contained in:
parent
ff495361d9
commit
abf2b49803
|
@ -11,6 +11,7 @@ fragment ConfigGeneralData on ConfigGeneralResult {
|
|||
logLevel
|
||||
logAccess
|
||||
excludes
|
||||
scraperUserAgent
|
||||
}
|
||||
|
||||
fragment ConfigInterfaceData on ConfigInterfaceResult {
|
||||
|
|
|
@ -32,6 +32,8 @@ input ConfigGeneralInput {
|
|||
logAccess: Boolean!
|
||||
"""Array of file regexp to exclude from Scan"""
|
||||
excludes: [String!]
|
||||
"""Scraper user agent string"""
|
||||
scraperUserAgent: String
|
||||
}
|
||||
|
||||
type ConfigGeneralResult {
|
||||
|
@ -59,6 +61,8 @@ type ConfigGeneralResult {
|
|||
logAccess: Boolean!
|
||||
"""Array of file regexp to exclude from Scan"""
|
||||
excludes: [String!]!
|
||||
"""Scraper user agent string"""
|
||||
scraperUserAgent: String
|
||||
}
|
||||
|
||||
input ConfigInterfaceInput {
|
||||
|
|
|
@ -76,6 +76,10 @@ func (r *mutationResolver) ConfigureGeneral(ctx context.Context, input models.Co
|
|||
config.Set(config.Exclude, input.Excludes)
|
||||
}
|
||||
|
||||
if input.ScraperUserAgent != nil {
|
||||
config.Set(config.ScraperUserAgent, input.ScraperUserAgent)
|
||||
}
|
||||
|
||||
if err := config.Write(); err != nil {
|
||||
return makeConfigGeneralResult(), err
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ func makeConfigGeneralResult() *models.ConfigGeneralResult {
|
|||
maxTranscodeSize := config.GetMaxTranscodeSize()
|
||||
maxStreamingTranscodeSize := config.GetMaxStreamingTranscodeSize()
|
||||
|
||||
scraperUserAgent := config.GetScraperUserAgent()
|
||||
|
||||
return &models.ConfigGeneralResult{
|
||||
Stashes: config.GetStashPaths(),
|
||||
DatabasePath: config.GetDatabasePath(),
|
||||
|
@ -46,6 +48,7 @@ func makeConfigGeneralResult() *models.ConfigGeneralResult {
|
|||
LogLevel: config.GetLogLevel(),
|
||||
LogAccess: config.GetLogAccess(),
|
||||
Excludes: config.GetExcludes(),
|
||||
ScraperUserAgent: &scraperUserAgent,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,7 +62,6 @@ func makeConfigInterfaceResult() *models.ConfigInterfaceResult {
|
|||
cssEnabled := config.GetCSSEnabled()
|
||||
language := config.GetLanguage()
|
||||
|
||||
|
||||
return &models.ConfigInterfaceResult{
|
||||
SoundOnPreview: &soundOnPreview,
|
||||
WallShowTitle: &wallShowTitle,
|
||||
|
|
|
@ -22,7 +22,6 @@ const Password = "password"
|
|||
|
||||
const Database = "database"
|
||||
|
||||
const ScrapersPath = "scrapers_path"
|
||||
const Exclude = "exclude"
|
||||
|
||||
const MaxTranscodeSize = "max_transcode_size"
|
||||
|
@ -32,6 +31,10 @@ const Host = "host"
|
|||
const Port = "port"
|
||||
const ExternalHost = "external_host"
|
||||
|
||||
// scraping options
|
||||
const ScrapersPath = "scrapers_path"
|
||||
const ScraperUserAgent = "scraper_user_agent"
|
||||
|
||||
// i18n
|
||||
const Language = "language"
|
||||
|
||||
|
@ -115,6 +118,10 @@ func GetScrapersPath() string {
|
|||
return viper.GetString(ScrapersPath)
|
||||
}
|
||||
|
||||
func GetScraperUserAgent() string {
|
||||
return viper.GetString(ScraperUserAgent)
|
||||
}
|
||||
|
||||
func GetHost() string {
|
||||
return viper.GetString(Host)
|
||||
}
|
||||
|
|
|
@ -139,6 +139,10 @@ func (c *scrapeSceneByURLConfig) resolveFn() {
|
|||
}
|
||||
}
|
||||
|
||||
type scraperDebugOptions struct {
|
||||
PrintHTML bool `yaml:"printHTML"`
|
||||
}
|
||||
|
||||
type scraperConfig struct {
|
||||
ID string
|
||||
Name string `yaml:"name"`
|
||||
|
@ -148,8 +152,9 @@ type scraperConfig struct {
|
|||
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
|
||||
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
|
||||
|
||||
StashServer *stashServer `yaml:"stashServer"`
|
||||
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
|
||||
DebugOptions *scraperDebugOptions `yaml:"debug"`
|
||||
StashServer *stashServer `yaml:"stashServer"`
|
||||
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
|
||||
}
|
||||
|
||||
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"github.com/stashapp/stash/pkg/utils"
|
||||
)
|
||||
|
@ -52,8 +53,18 @@ func getImage(url string) (*string, error) {
|
|||
Timeout: imageGetTimeout,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
userAgent := config.GetScraperUserAgent()
|
||||
if userAgent != "" {
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
}
|
||||
|
||||
// assume is a URL for now
|
||||
resp, err := client.Get(url)
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"regexp"
|
||||
|
@ -10,11 +12,17 @@ import (
|
|||
|
||||
"github.com/antchfx/htmlquery"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/manager/config"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
// Timeout for the scrape http request. Includes transfer time. May want to make this
|
||||
// configurable at some point.
|
||||
const scrapeGetTimeout = time.Second * 30
|
||||
|
||||
type commonXPathConfig map[string]string
|
||||
|
||||
func (c commonXPathConfig) applyCommon(src string) string {
|
||||
|
@ -197,7 +205,7 @@ func (c xpathScraperAttrConfig) applySubScraper(value string) string {
|
|||
return value
|
||||
}
|
||||
|
||||
doc, err := htmlquery.LoadURL(value)
|
||||
doc, err := loadURL(value, nil)
|
||||
|
||||
if err != nil {
|
||||
logger.Warnf("Error getting URL '%s' for sub-scraper: %s", value, err.Error())
|
||||
|
@ -504,6 +512,42 @@ func (r xPathResults) setKey(index int, key string, value string) xPathResults {
|
|||
return r
|
||||
}
|
||||
|
||||
func loadURL(url string, c *scraperConfig) (*html.Node, error) {
|
||||
client := &http.Client{
|
||||
Timeout: scrapeGetTimeout,
|
||||
}
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
userAgent := config.GetScraperUserAgent()
|
||||
if userAgent != "" {
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret, err := html.Parse(r)
|
||||
|
||||
if err == nil && c != nil && c.DebugOptions != nil && c.DebugOptions.PrintHTML {
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, ret)
|
||||
logger.Infof("loadURL (%s) response: \n%s", url, b.String())
|
||||
}
|
||||
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||
|
||||
|
@ -511,7 +555,7 @@ func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPe
|
|||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := htmlquery.LoadURL(url)
|
||||
doc, err := loadURL(url, c.scraperConfig)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -527,7 +571,7 @@ func scrapeSceneURLXPath(c scraperTypeConfig, url string) (*models.ScrapedScene,
|
|||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := htmlquery.LoadURL(url)
|
||||
doc, err := loadURL(url, c.scraperConfig)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -551,7 +595,7 @@ func scrapePerformerNamesXPath(c scraperTypeConfig, name string) ([]*models.Scra
|
|||
u := c.QueryURL
|
||||
u = strings.Replace(u, placeholder, escapedName, -1)
|
||||
|
||||
doc, err := htmlquery.LoadURL(u)
|
||||
doc, err := loadURL(u, c.scraperConfig)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -29,6 +29,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
const [logLevel, setLogLevel] = useState<string>("Info");
|
||||
const [logAccess, setLogAccess] = useState<boolean>(true);
|
||||
const [excludes, setExcludes] = useState<string[]>([]);
|
||||
const [scraperUserAgent, setScraperUserAgent] = useState<string | undefined>(undefined);
|
||||
|
||||
const { data, error, loading } = StashService.useConfiguration();
|
||||
|
||||
|
@ -44,7 +45,8 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
logOut,
|
||||
logLevel,
|
||||
logAccess,
|
||||
excludes
|
||||
excludes,
|
||||
scraperUserAgent
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -66,6 +68,7 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
setLogLevel(conf.general.logLevel);
|
||||
setLogAccess(conf.general.logAccess);
|
||||
setExcludes(conf.general.excludes);
|
||||
setScraperUserAgent(conf.general.scraperUserAgent ?? undefined);
|
||||
}
|
||||
}, [data, error]);
|
||||
|
||||
|
@ -289,6 +292,22 @@ export const SettingsConfigurationPanel: React.FC = () => {
|
|||
|
||||
<hr />
|
||||
|
||||
<Form.Group id="generated-path">
|
||||
<h6>Scraping</h6>
|
||||
<Form.Control
|
||||
className="col col-sm-6 text-input"
|
||||
defaultValue={scraperUserAgent}
|
||||
onChange={(e: React.FormEvent<HTMLInputElement>) =>
|
||||
setScraperUserAgent(e.currentTarget.value)
|
||||
}
|
||||
/>
|
||||
<Form.Text className="text-muted">
|
||||
User-Agent string used during scrape http requests
|
||||
</Form.Text>
|
||||
</Form.Group>
|
||||
|
||||
<hr />
|
||||
|
||||
<Form.Group>
|
||||
<h4>Authentication</h4>
|
||||
<Form.Group id="username">
|
||||
|
|
|
@ -32,6 +32,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||
const [logLevel, setLogLevel] = useState<string>("Info");
|
||||
const [logAccess, setLogAccess] = useState<boolean>(true);
|
||||
const [excludes, setExcludes] = useState<(string)[]>([]);
|
||||
const [scraperUserAgent, setScraperUserAgent] = useState<string | undefined>(undefined);
|
||||
|
||||
const { data, error, loading } = StashService.useConfiguration();
|
||||
|
||||
|
@ -48,7 +49,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||
logLevel,
|
||||
logAccess,
|
||||
excludes,
|
||||
|
||||
scraperUserAgent,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -67,6 +68,7 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||
setLogLevel(conf.general.logLevel);
|
||||
setLogAccess(conf.general.logAccess);
|
||||
setExcludes(conf.general.excludes);
|
||||
setScraperUserAgent(conf.general.scraperUserAgent);
|
||||
}
|
||||
}, [data, error]);
|
||||
|
||||
|
@ -229,6 +231,18 @@ export const SettingsConfigurationPanel: FunctionComponent<IProps> = (props: IPr
|
|||
</FormGroup>
|
||||
<Divider />
|
||||
|
||||
<FormGroup>
|
||||
<H4>Scraping</H4>
|
||||
<FormGroup
|
||||
label="Scraper User-Agent string"
|
||||
helperText="User-Agent string used during scrape http requests"
|
||||
>
|
||||
<InputGroup value={scraperUserAgent} onChange={(e: any) => setScraperUserAgent(e.target.value)} />
|
||||
</FormGroup>
|
||||
</FormGroup>
|
||||
|
||||
<Divider />
|
||||
|
||||
<FormGroup>
|
||||
<H4>Authentication</H4>
|
||||
<FormGroup
|
||||
|
|
Loading…
Reference in New Issue