diff --git a/graphql/documents/data/scrapers.graphql b/graphql/documents/data/scrapers.graphql index 292789454..616465d0d 100644 --- a/graphql/documents/data/scrapers.graphql +++ b/graphql/documents/data/scrapers.graphql @@ -1,6 +1,8 @@ fragment ScrapedPerformerData on ScrapedPerformer { name url + twitter + instagram birthdate ethnicity country diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index ab4902b83..9b69026de 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -55,7 +55,12 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) [] asStr = common.applyCommon(asStr) } - found := htmlquery.Find(doc, asStr) + found, err := htmlquery.QueryAll(doc, asStr) + if err != nil { + logger.Warnf("Error parsing xpath expression '%s': %s", asStr, err.Error()) + continue + } + if len(found) > 0 { for i, elem := range found { if i >= len(ret) { diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index 45b6eca2b..7268bb6dc 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -730,3 +730,20 @@ xPathScrapers: return } } + +func TestLoadInvalidXPath(t *testing.T) { + config := make(xpathScraperConfig) + + config["Name"] = `//a[id=']/span` + + reader := strings.NewReader(htmlDoc1) + doc, err := htmlquery.Parse(reader) + + if err != nil { + t.Errorf("Error loading document: %s", err.Error()) + return + } + + common := make(commonXPathConfig) + config.process(doc, common) +}