package scraper
import (
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/antchfx/htmlquery"
"github.com/stashapp/stash/pkg/models"
"github.com/stretchr/testify/assert"
"gopkg.in/yaml.v2"
)
// adapted from https://www.freeones.com/html/m_links/bio_Mia_Malkova.php
const htmlDoc1 = `
Freeones: Mia Malkova Biography
Babe Name:
|
Mia Malkova
|
Profession:
|
Porn Star
|
Ethnicity:
|
Caucasian
|
Country of Origin:
|
United States
|
Date of Birth:
|
July 1, 1992 (27 years old)
|
Aliases:
|
Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica
|
Eye Color:
|
Hazel
|
Hair Color:
|
Blonde
|
Height:
|
5ft7
|
Measurements:
|
34C-26-36
|
Fake boobs:
|
No
|
Career Start And End
|
2012 - 2019
(7 Years In The Business)
|
Tattoos:
|
None
|
Piercings:
|
;
|
Social Network Links:
|
|
`
func makeCommonXPath(attr string) string {
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
}
func makeSimpleAttrConfig(str string) mappedScraperAttrConfig {
return mappedScraperAttrConfig{
Selector: str,
}
}
func makeReplaceRegex(regex string, with string) mappedRegexConfig {
ret := mappedRegexConfig{
Regex: regex,
With: with,
}
return ret
}
func makeXPathConfig() mappedPerformerScraperConfig {
config := mappedPerformerScraperConfig{
mappedConfig: make(mappedConfig),
}
config.mappedConfig["Name"] = makeSimpleAttrConfig(makeCommonXPath("Babe Name:") + `/a`)
config.mappedConfig["Ethnicity"] = makeSimpleAttrConfig(makeCommonXPath("Ethnicity:"))
config.mappedConfig["Aliases"] = makeSimpleAttrConfig(makeCommonXPath("Aliases:"))
config.mappedConfig["EyeColor"] = makeSimpleAttrConfig(makeCommonXPath("Eye Color:"))
config.mappedConfig["Measurements"] = makeSimpleAttrConfig(makeCommonXPath("Measurements:"))
config.mappedConfig["FakeTits"] = makeSimpleAttrConfig(makeCommonXPath("Fake boobs:"))
config.mappedConfig["Tattoos"] = makeSimpleAttrConfig(makeCommonXPath("Tattoos:"))
config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:") + "/comment()")
// special handling for birthdate
birthdateAttrConfig := makeSimpleAttrConfig(makeCommonXPath("Date of Birth:"))
var birthdateReplace mappedRegexConfigs
// make this leave the trailing space to test existing scrapers that do so
birthdateReplace = append(birthdateReplace, makeReplaceRegex(`\(.* years old\)`, ""))
birthdateReplaceAction := postProcessReplace(birthdateReplace)
birthdateParseDate := postProcessParseDate("January 2, 2006") // "July 1, 1992 (27 years old) "
birthdateAttrConfig.postProcessActions = []postProcessAction{
&birthdateReplaceAction,
&birthdateParseDate,
}
config.mappedConfig["Birthdate"] = birthdateAttrConfig
// special handling for career length
// no colon in attribute header
careerLengthAttrConfig := makeSimpleAttrConfig(makeCommonXPath("Career Start And End"))
var careerLengthReplace mappedRegexConfigs
careerLengthReplace = append(careerLengthReplace, makeReplaceRegex(`\s+\(.*\)`, ""))
careerLengthReplaceAction := postProcessReplace(careerLengthReplace)
careerLengthAttrConfig.postProcessActions = []postProcessAction{
&careerLengthReplaceAction,
}
config.mappedConfig["CareerLength"] = careerLengthAttrConfig
// use map post-process action for gender
genderConfig := makeSimpleAttrConfig(makeCommonXPath("Profession:"))
genderMapAction := make(postProcessMap)
genderMapAction["Porn Star"] = "Female"
genderConfig.postProcessActions = []postProcessAction{
&genderMapAction,
}
config.mappedConfig["Gender"] = genderConfig
// use fixed for height
config.mappedConfig["Country"] = mappedScraperAttrConfig{
Fixed: "United States",
}
heightConfig := makeSimpleAttrConfig(makeCommonXPath("Height:"))
heightConvAction := postProcessFeetToCm(true)
heightConfig.postProcessActions = []postProcessAction{
&heightConvAction,
}
config.mappedConfig["Height"] = heightConfig
return config
}
func verifyField(t *testing.T, expected string, actual *string, field string) {
t.Helper()
if actual == nil || *actual != expected {
if actual == nil {
t.Errorf("Expected %s to be set to %s, instead got nil", field, expected)
} else {
t.Errorf("Expected %s to be set to %s, instead got %s", field, expected, *actual)
}
}
}
func TestScrapePerformerXPath(t *testing.T) {
reader := strings.NewReader(htmlDoc1)
doc, err := htmlquery.Parse(reader)
if err != nil {
t.Errorf("Error loading document: %s", err.Error())
return
}
xpathConfig := makeXPathConfig()
scraper := mappedScraper{
Performer: &xpathConfig,
}
q := &xpathQuery{
doc: doc,
}
performer, err := scraper.scrapePerformer(q)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
return
}
const performerName = "Mia Malkova"
const ethnicity = "Caucasian"
const country = "United States"
const birthdate = "1992-07-01"
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
const eyeColor = "Hazel"
const measurements = "34C-26-36"
const fakeTits = "No"
const careerLength = "2012 - 2019"
const tattoos = "None"
const piercings = ""
const gender = "Female"
const height = "170"
verifyField(t, performerName, performer.Name, "Name")
verifyField(t, gender, performer.Gender, "Gender")
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
verifyField(t, country, performer.Country, "Country")
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
verifyField(t, aliases, performer.Aliases, "Aliases")
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
verifyField(t, measurements, performer.Measurements, "Measurements")
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
verifyField(t, careerLength, performer.CareerLength, "CareerLength")
verifyField(t, tattoos, performer.Tattoos, "Tattoos")
verifyField(t, piercings, performer.Piercings, "Piercings")
verifyField(t, height, performer.Height, "Height")
}
func TestConcatXPath(t *testing.T) {
const firstName = "FirstName"
const lastName = "LastName"
const eyeColor = "EyeColor"
const separator = " "
const testDoc = `
` + firstName + `
` + lastName + `
` + eyeColor + `
`
reader := strings.NewReader(testDoc)
doc, err := htmlquery.Parse(reader)
if err != nil {
t.Errorf("Error loading document: %s", err.Error())
return
}
xpathConfig := make(mappedConfig)
nameAttrConfig := mappedScraperAttrConfig{
Selector: "//div",
Concat: separator,
}
xpathConfig["Name"] = nameAttrConfig
xpathConfig["EyeColor"] = makeSimpleAttrConfig("//span")
scraper := mappedScraper{
Performer: &mappedPerformerScraperConfig{
mappedConfig: xpathConfig,
},
}
q := &xpathQuery{
doc: doc,
}
performer, err := scraper.scrapePerformer(q)
if err != nil {
t.Errorf("Error scraping performer: %s", err.Error())
return
}
const performerName = firstName + separator + lastName
verifyField(t, performerName, performer.Name, "Name")
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
}
const sceneHTML = `
Test Video - Pornhub.com