package scraper
import (
"strings"
"testing"
"github.com/antchfx/htmlquery"
"github.com/stashapp/stash/pkg/models"
"gopkg.in/yaml.v2"
)
// adapted from https://www.freeones.com/html/m_links/bio_Mia_Malkova.php
const htmlDoc1 = `
Freeones: Mia Malkova Biography
Babe Name:
|
Mia Malkova
|
Profession:
|
Porn Star
|
Ethnicity:
|
Caucasian
|
Country of Origin:
|
United States
|
Date of Birth:
|
July 1, 1992 (27 years old)
|
Aliases:
|
Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica
|
Eye Color:
|
Hazel
|
Hair Color:
|
Blonde
|
Height:
|
|
Measurements:
|
34C-26-36
|
Fake boobs:
|
No
|
Career Start And End
|
2012 - 2019
(7 Years In The Business)
|
Tattoos:
|
None
|
Piercings:
|
None
|
Social Network Links:
|
|
`
func makeCommonXPath(attr string) string {
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
}
func makeReplaceRegex(regex string, with string) map[interface{}]interface{} {
ret := make(map[interface{}]interface{})
ret["regex"] = regex
ret["with"] = with
return ret
}
func makeXPathConfig() xpathScraperConfig {
config := make(xpathScraperConfig)
config["Name"] = makeCommonXPath("Babe Name:") + `/a`
config["Ethnicity"] = makeCommonXPath("Ethnicity:")
config["Country"] = makeCommonXPath("Country of Origin:")
config["Aliases"] = makeCommonXPath("Aliases:")
config["EyeColor"] = makeCommonXPath("Eye Color:")
config["Measurements"] = makeCommonXPath("Measurements:")
config["FakeTits"] = makeCommonXPath("Fake boobs:")
config["Height"] = makeCommonXPath("Height:")
config["Tattoos"] = makeCommonXPath("Tattoos:")
config["Piercings"] = makeCommonXPath("Piercings:")
// special handling for birthdate
birthdateAttrConfig := make(map[interface{}]interface{})
birthdateAttrConfig["selector"] = makeCommonXPath("Date of Birth:")
var birthdateReplace []interface{}
birthdateReplace = append(birthdateReplace, makeReplaceRegex(` \(.* years old\)`, ""))
birthdateAttrConfig["replace"] = birthdateReplace
birthdateAttrConfig["parseDate"] = "January 2, 2006" // "July 1, 1992 (27 years old) "
config["Birthdate"] = birthdateAttrConfig
// special handling for career length
careerLengthAttrConfig := make(map[interface{}]interface{})
// no colon in attribute header
careerLengthAttrConfig["selector"] = makeCommonXPath("Career Start And End")
var careerLengthReplace []interface{}
careerLengthReplace = append(careerLengthReplace, makeReplaceRegex(`\s+\(.*\)`, ""))
careerLengthAttrConfig["replace"] = careerLengthReplace
config["CareerLength"] = careerLengthAttrConfig
return config
}
func verifyField(t *testing.T, expected string, actual *string, field string) {
t.Helper()
if actual == nil || *actual != expected {
if actual == nil {
t.Errorf("Expected %s to be set to %s, instead got nil", field, expected)
} else {
t.Errorf("Expected %s to be set to %s, instead got %s", field, expected, *actual)
}
}
}
func TestScrapePerformerXPath(t *testing.T) {
reader := strings.NewReader(htmlDoc1)
doc, err := htmlquery.Parse(reader)
if err != nil {
t.Errorf("Error loading document: %s", err.Error())
return
}
xpathConfig := makeXPathConfig()
scraper := xpathScraper{
Performer: xpathConfig,
}
performer, err := scraper.scrapePerformer(doc)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
return
}
const performerName = "Mia Malkova"
const ethnicity = "Caucasian"
const country = "United States"
const birthdate = "1992-07-01"
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
const eyeColor = "Hazel"
const measurements = "34C-26-36"
const fakeTits = "No"
const careerLength = "2012 - 2019"
const tattoosPiercings = "None"
verifyField(t, performerName, performer.Name, "Name")
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
verifyField(t, country, performer.Country, "Country")
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
verifyField(t, aliases, performer.Aliases, "Aliases")
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
verifyField(t, measurements, performer.Measurements, "Measurements")
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
verifyField(t, careerLength, performer.CareerLength, "CareerLength")
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
}
func TestConcatXPath(t *testing.T) {
const firstName = "FirstName"
const lastName = "LastName"
const eyeColor = "EyeColor"
const separator = " "
const testDoc = `
` + firstName + `
` + lastName + `
` + eyeColor + `
`
reader := strings.NewReader(testDoc)
doc, err := htmlquery.Parse(reader)
if err != nil {
t.Errorf("Error loading document: %s", err.Error())
return
}
xpathConfig := make(xpathScraperConfig)
nameAttrConfig := make(map[interface{}]interface{})
nameAttrConfig["selector"] = "//div"
nameAttrConfig["concat"] = separator
xpathConfig["Name"] = nameAttrConfig
xpathConfig["EyeColor"] = "//span"
scraper := xpathScraper{
Performer: xpathConfig,
}
performer, err := scraper.scrapePerformer(doc)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
return
}
const performerName = firstName + separator + lastName
verifyField(t, performerName, performer.Name, "Name")
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
}
const sceneHTML = `
Test Video - Pornhub.com
Test Video
Added on: 2 months ago
Featured on: 1 month ago
Jump to your favorite action