diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go index 95291e1de..6219b10e8 100644 --- a/pkg/scraper/xpath.go +++ b/pkg/scraper/xpath.go @@ -189,8 +189,9 @@ func (q *xpathQuery) nodeText(n *html.Node) string { var ret string if n != nil && n.Type == html.CommentNode { ret = htmlquery.OutputHTML(n, true) + } else { + ret = htmlquery.InnerText(n) } - ret = htmlquery.InnerText(n) // trim all leading and trailing whitespace ret = strings.TrimSpace(ret) diff --git a/pkg/scraper/xpath_test.go b/pkg/scraper/xpath_test.go index 748928563..bd81b121c 100644 --- a/pkg/scraper/xpath_test.go +++ b/pkg/scraper/xpath_test.go @@ -138,7 +138,7 @@ const htmlDoc1 = ` Piercings: - None  + ; @@ -193,7 +193,7 @@ func makeXPathConfig() mappedPerformerScraperConfig { config.mappedConfig["Measurements"] = makeSimpleAttrConfig(makeCommonXPath("Measurements:")) config.mappedConfig["FakeTits"] = makeSimpleAttrConfig(makeCommonXPath("Fake boobs:")) config.mappedConfig["Tattoos"] = makeSimpleAttrConfig(makeCommonXPath("Tattoos:")) - config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:")) + config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:") + "/comment()") // special handling for birthdate birthdateAttrConfig := makeSimpleAttrConfig(makeCommonXPath("Date of Birth:")) @@ -295,7 +295,8 @@ func TestScrapePerformerXPath(t *testing.T) { const measurements = "34C-26-36" const fakeTits = "No" const careerLength = "2012 - 2019" - const tattoosPiercings = "None" + const tattoos = "None" + const piercings = "" const gender = "Female" const height = "170" @@ -313,8 +314,8 @@ func TestScrapePerformerXPath(t *testing.T) { verifyField(t, careerLength, performer.CareerLength, "CareerLength") - verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos") - verifyField(t, tattoosPiercings, performer.Piercings, "Piercings") + verifyField(t, tattoos, performer.Tattoos, "Tattoos") + verifyField(t, piercings, performer.Piercings, "Piercings") verifyField(t, height, performer.Height, "Height") }