Scraper fixes (#332)

* Fix panic on invalid xpath

* Add missing attrs to scraped performer fragment
This commit is contained in:
WithoutPants 2020-01-25 14:36:24 +11:00 committed by Leopere
parent 2dc1b42a0a
commit 78eb527ec4
3 changed files with 25 additions and 1 deletions

View File

@ -1,6 +1,8 @@
fragment ScrapedPerformerData on ScrapedPerformer {
name
url
twitter
instagram
birthdate
ethnicity
country

View File

@ -55,7 +55,12 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) []
asStr = common.applyCommon(asStr)
}
found := htmlquery.Find(doc, asStr)
found, err := htmlquery.QueryAll(doc, asStr)
if err != nil {
logger.Warnf("Error parsing xpath expression '%s': %s", asStr, err.Error())
continue
}
if len(found) > 0 {
for i, elem := range found {
if i >= len(ret) {

View File

@ -730,3 +730,20 @@ xPathScrapers:
return
}
}
func TestLoadInvalidXPath(t *testing.T) {
config := make(xpathScraperConfig)
config["Name"] = `//a[id=']/span`
reader := strings.NewReader(htmlDoc1)
doc, err := htmlquery.Parse(reader)
if err != nil {
t.Errorf("Error loading document: %s", err.Error())
return
}
common := make(commonXPathConfig)
config.process(doc, common)
}