package scraper import ( "context" "fmt" "net/http" "net/http/httptest" "strings" "testing" "github.com/antchfx/htmlquery" "github.com/stashapp/stash/pkg/models" "github.com/stretchr/testify/assert" "gopkg.in/yaml.v2" ) // adapted from https://www.freeones.com/html/m_links/bio_Mia_Malkova.php const htmlDoc1 = ` Freeones: Mia Malkova Biography
Babe Name:
Mia Malkova 
Profession:
Porn Star
Ethnicity: Caucasian 
Country of Origin: United States
Date of Birth: July 1, 1992 (27 years old) 
Aliases: Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica 
Eye Color: Hazel 
Hair Color: Blonde 
Height: 5ft7
Weight: 126
Measurements: 34C-26-36
Fake boobs: No 
Career Start And End 2012 - 2019 (7 Years In The Business)
Tattoos: None 
Piercings: ;
Details: Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies – trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe it’s all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But it’s not all about the body. Mia’s also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know she’s only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova.
Social Network Links:
` func makeCommonXPath(attr string) string { return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]` } func makeSimpleAttrConfig(str string) mappedScraperAttrConfig { return mappedScraperAttrConfig{ Selector: str, } } func makeReplaceRegex(regex string, with string) mappedRegexConfig { ret := mappedRegexConfig{ Regex: regex, With: with, } return ret } func makeXPathConfig() mappedPerformerScraperConfig { config := mappedPerformerScraperConfig{ mappedConfig: make(mappedConfig), } config.mappedConfig["Name"] = makeSimpleAttrConfig(makeCommonXPath("Babe Name:") + `/a`) config.mappedConfig["Ethnicity"] = makeSimpleAttrConfig(makeCommonXPath("Ethnicity:")) config.mappedConfig["Aliases"] = makeSimpleAttrConfig(makeCommonXPath("Aliases:")) config.mappedConfig["EyeColor"] = makeSimpleAttrConfig(makeCommonXPath("Eye Color:")) config.mappedConfig["Measurements"] = makeSimpleAttrConfig(makeCommonXPath("Measurements:")) config.mappedConfig["FakeTits"] = makeSimpleAttrConfig(makeCommonXPath("Fake boobs:")) config.mappedConfig["Tattoos"] = makeSimpleAttrConfig(makeCommonXPath("Tattoos:")) config.mappedConfig["Piercings"] = makeSimpleAttrConfig(makeCommonXPath("Piercings:") + "/comment()") config.mappedConfig["Details"] = makeSimpleAttrConfig(makeCommonXPath("Details:")) config.mappedConfig["HairColor"] = makeSimpleAttrConfig(makeCommonXPath("Hair Color:")) // special handling for birthdate birthdateAttrConfig := makeSimpleAttrConfig(makeCommonXPath("Date of Birth:")) var birthdateReplace mappedRegexConfigs // make this leave the trailing space to test existing scrapers that do so birthdateReplace = append(birthdateReplace, makeReplaceRegex(`\(.* years old\)`, "")) birthdateReplaceAction := postProcessReplace(birthdateReplace) birthdateParseDate := postProcessParseDate("January 2, 2006") // "July 1, 1992 (27 years old) " birthdateAttrConfig.postProcessActions = []postProcessAction{ &birthdateReplaceAction, &birthdateParseDate, } config.mappedConfig["Birthdate"] = birthdateAttrConfig // special handling for career length // no colon in attribute header careerLengthAttrConfig := makeSimpleAttrConfig(makeCommonXPath("Career Start And End")) var careerLengthReplace mappedRegexConfigs careerLengthReplace = append(careerLengthReplace, makeReplaceRegex(`\s+\(.*\)`, "")) careerLengthReplaceAction := postProcessReplace(careerLengthReplace) careerLengthAttrConfig.postProcessActions = []postProcessAction{ &careerLengthReplaceAction, } config.mappedConfig["CareerLength"] = careerLengthAttrConfig // use map post-process action for gender genderConfig := makeSimpleAttrConfig(makeCommonXPath("Profession:")) genderMapAction := make(postProcessMap) genderMapAction["Porn Star"] = "Female" genderConfig.postProcessActions = []postProcessAction{ &genderMapAction, } config.mappedConfig["Gender"] = genderConfig // use fixed for Country config.mappedConfig["Country"] = mappedScraperAttrConfig{ Fixed: "United States", } heightConfig := makeSimpleAttrConfig(makeCommonXPath("Height:")) heightConvAction := postProcessFeetToCm(true) heightConfig.postProcessActions = []postProcessAction{ &heightConvAction, } config.mappedConfig["Height"] = heightConfig weightConfig := makeSimpleAttrConfig(makeCommonXPath("Weight:")) weightConvAction := postProcessLbToKg(true) weightConfig.postProcessActions = []postProcessAction{ &weightConvAction, } config.mappedConfig["Weight"] = weightConfig tagConfig := mappedScraperAttrConfig{ Selector: `//ul[@id="socialmedia"]//a`, } config.Tags = make(mappedConfig) config.Tags["Name"] = tagConfig return config } func verifyField(t *testing.T, expected string, actual *string, field string) { t.Helper() if actual == nil || *actual != expected { if actual == nil { t.Errorf("Expected %s to be set to %s, instead got nil", field, expected) } else { t.Errorf("Expected %s to be set to %s, instead got %s", field, expected, *actual) } } } func TestScrapePerformerXPath(t *testing.T) { reader := strings.NewReader(htmlDoc1) doc, err := htmlquery.Parse(reader) if err != nil { t.Errorf("Error loading document: %s", err.Error()) return } xpathConfig := makeXPathConfig() scraper := mappedScraper{ Performer: &xpathConfig, } q := &xpathQuery{ doc: doc, } performer, err := scraper.scrapePerformer(context.Background(), q) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) return } const performerName = "Mia Malkova" const ethnicity = "Caucasian" const country = "United States" const birthdate = "1992-07-01" const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica" const eyeColor = "Hazel" const measurements = "34C-26-36" const fakeTits = "No" const careerLength = "2012 - 2019" const tattoos = "None" const piercings = "" const gender = "Female" const height = "170" // 5ft7 const details = "Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the ... arrow_drop_down Some girls are so damn hot that they can get you bent out of shape, and you will not even be mad at them for doing so. Well, tawny blonde Mia Malkova can bend her body into any shape she pleases, and that’s sure to satisfy all of the horny cocks and wet pussies out there. This girl has acrobatic and contortionist abilities that could even twist a pretzel into a new knot, which can be very helpful in the VR Porn movies – trust us. Ankles behind her neck and feet over her back so she can kiss her toes, turned, twisted and gyrating, she can fuck any which way she wants (and that ass!), will surely make you fall in love with this hot Virtual Reality Porn slut, as she is one of the finest of them all. Talking about perfection, maybe it’s all the acrobatic work that keeps it in such gorgeous shape? Who cares really, because you just want to take a big bite out of it and never let go. But it’s not all about the body. Mia’s also got a great smile, which might not sound kinky, but believe us, it is a smile that will heat up your innards and drop your pants. Is it her golden skin, her innocent pink lips or that heart-shaped face? There is just too much good stuff going on with Mia Malkova, which is maybe why these past few years have heaped awards upon awards on this Southern California native. Mia came to VR Bangers for her first VR Porn video, so you know she’s only going for top-notch scenes with top-game performers, men, and women. Better hit up that yoga studio if you ever dream of being able to bang a flexible and talented chick like lady Malkova." const hairColor = "Blonde" const weight = "57" // 126 lb verifyField(t, performerName, performer.Name, "Name") verifyField(t, gender, performer.Gender, "Gender") verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity") verifyField(t, country, performer.Country, "Country") verifyField(t, birthdate, performer.Birthdate, "Birthdate") verifyField(t, aliases, performer.Aliases, "Aliases") verifyField(t, eyeColor, performer.EyeColor, "EyeColor") verifyField(t, measurements, performer.Measurements, "Measurements") verifyField(t, fakeTits, performer.FakeTits, "FakeTits") verifyField(t, careerLength, performer.CareerLength, "CareerLength") verifyField(t, tattoos, performer.Tattoos, "Tattoos") verifyField(t, piercings, performer.Piercings, "Piercings") verifyField(t, height, performer.Height, "Height") verifyField(t, details, performer.Details, "Details") verifyField(t, hairColor, performer.HairColor, "HairColor") verifyField(t, weight, performer.Weight, "Weight") expectedTagNames := []string{ "Twitter", "Facebook", "YouTube", "Instagram", } for i, expected := range expectedTagNames { verifyField(t, expected, &performer.Tags[i].Name, "TagName") } } func TestConcatXPath(t *testing.T) { const firstName = "FirstName" const lastName = "LastName" const eyeColor = "EyeColor" const separator = " " const testDoc = `
` + firstName + `
` + lastName + `
` + eyeColor + ` ` reader := strings.NewReader(testDoc) doc, err := htmlquery.Parse(reader) if err != nil { t.Errorf("Error loading document: %s", err.Error()) return } xpathConfig := make(mappedConfig) nameAttrConfig := mappedScraperAttrConfig{ Selector: "//div", Concat: separator, } xpathConfig["Name"] = nameAttrConfig xpathConfig["EyeColor"] = makeSimpleAttrConfig("//span") scraper := mappedScraper{ Performer: &mappedPerformerScraperConfig{ mappedConfig: xpathConfig, }, } q := &xpathQuery{ doc: doc, } performer, err := scraper.scrapePerformer(context.Background(), q) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) return } const performerName = firstName + separator + lastName verifyField(t, performerName, performer.Name, "Name") verifyField(t, eyeColor, performer.EyeColor, "EyeColor") } const sceneHTML = ` Test Video - Pornhub.com
` func makeSceneXPathConfig() mappedScraper { common := make(commonMappedConfig) common["$performerElem"] = `//div[@class="pornstarsWrapper"]/a[@data-mxptype="Pornstar"]` common["$studioElem"] = `//div[@data-type="channel"]/a` config := mappedSceneScraperConfig{ mappedConfig: make(mappedConfig), } config.mappedConfig["Title"] = makeSimpleAttrConfig(`//meta[@property="og:title"]/@content`) // this needs post-processing config.mappedConfig["Date"] = makeSimpleAttrConfig(`//script[@type="application/ld+json"]`) tagConfig := make(mappedConfig) tagConfig["Name"] = makeSimpleAttrConfig(`//div[@class="categoriesWrapper"]//a[not(@class="add-btn-small ")]`) config.Tags = tagConfig performerConfig := make(mappedConfig) performerConfig["Name"] = makeSimpleAttrConfig(`$performerElem/@data-mxptext`) performerConfig["URL"] = makeSimpleAttrConfig(`$performerElem/@href`) config.Performers.mappedConfig = performerConfig studioConfig := make(mappedConfig) studioConfig["Name"] = makeSimpleAttrConfig(`$studioElem`) studioConfig["URL"] = makeSimpleAttrConfig(`$studioElem/@href`) config.Studio = studioConfig const sep = " " moviesNameConfig := mappedScraperAttrConfig{ Selector: `//i[@class="isMe tooltipTrig"]/@data-title`, Split: sep, } moviesConfig := make(mappedConfig) moviesConfig["Name"] = moviesNameConfig config.Movies = moviesConfig scraper := mappedScraper{ Scene: &config, Common: common, } return scraper } func verifyTags(t *testing.T, expectedTagNames []string, actualTags []*models.ScrapedTag) { t.Helper() i := 0 for i < len(expectedTagNames) || i < len(actualTags) { expectedTag := "" actualTag := "" if i < len(expectedTagNames) { expectedTag = expectedTagNames[i] } if i < len(actualTags) { actualTag = actualTags[i].Name } if expectedTag != actualTag { t.Errorf("Expected tag %s, got %s", expectedTag, actualTag) } i++ } } func verifyMovies(t *testing.T, expectedMovieNames []string, actualMovies []*models.ScrapedMovie) { t.Helper() i := 0 for i < len(expectedMovieNames) || i < len(actualMovies) { expectedMovie := "" actualMovie := "" if i < len(expectedMovieNames) { expectedMovie = expectedMovieNames[i] } if i < len(actualMovies) { actualMovie = *actualMovies[i].Name } if expectedMovie != actualMovie { t.Errorf("Expected movie %s, got %s", expectedMovie, actualMovie) } i++ } } func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedPerformer) { t.Helper() i := 0 for i < len(expectedNames) || i < len(actualPerformers) { expectedName := "" actualName := "" expectedURL := "" actualURL := "" if i < len(expectedNames) { expectedName = expectedNames[i] } if i < len(expectedURLs) { expectedURL = expectedURLs[i] } if i < len(actualPerformers) { actualName = *actualPerformers[i].Name if actualPerformers[i].URL != nil { actualURL = *actualPerformers[i].URL } } if expectedName != actualName { t.Errorf("Expected performer name %s, got %s", expectedName, actualName) } if expectedURL != actualURL { t.Errorf("Expected performer URL %s, got %s", expectedName, actualName) } i++ } } func TestApplySceneXPathConfig(t *testing.T) { reader := strings.NewReader(sceneHTML) doc, err := htmlquery.Parse(reader) if err != nil { t.Errorf("Error loading document: %s", err.Error()) return } scraper := makeSceneXPathConfig() q := &xpathQuery{ doc: doc, } scene, err := scraper.scrapeScene(context.Background(), q) if err != nil { t.Errorf("Error scraping scene: %s", err.Error()) return } const title = "Test Video" verifyField(t, title, scene.Title, "Title") // verify tags expectedTags := []string{ "Amateur", "Babe", "Blowjob", "Exclusive", "HD Porn", "Pornstar", "Public", "Pussy Licking", "Threesome", "Verified Models", } verifyTags(t, expectedTags, scene.Tags) // verify movies expectedMovies := []string{ "Video", "of", "verified", "member", } verifyMovies(t, expectedMovies, scene.Movies) expectedPerformerNames := []string{ "Alex D", "Mia Malkova", "Riley Reid", } expectedPerformerURLs := []string{ "/pornstar/alex-d", "/pornstar/mia-malkova", "/pornstar/riley-reid", } verifyPerformers(t, expectedPerformerNames, expectedPerformerURLs, scene.Performers) const expectedStudioName = "Sis Loves Me" const expectedStudioURL = "/channels/sis-loves-me" verifyField(t, expectedStudioName, &scene.Studio.Name, "Studio.Name") verifyField(t, expectedStudioURL, scene.Studio.URL, "Studio.URL") } func TestLoadXPathScraperFromYAML(t *testing.T) { const yamlStr = `name: Test performerByURL: - action: scrapeXPath url: - test.com scraper: performerScraper xPathScrapers: performerScraper: performer: name: //h1[@itemprop="name"] sceneScraper: scene: Title: selector: //title postProcess: - parseDate: January 2, 2006 Tags: Name: //tags Movies: Name: //movies Performers: Name: //performers Studio: Name: //studio ` c := &config{} err := yaml.Unmarshal([]byte(yamlStr), &c) if err != nil { t.Errorf("Error loading yaml: %s", err.Error()) return } // ensure fields are filled in correctly sceneScraper := c.XPathScrapers["sceneScraper"] sceneConfig := sceneScraper.Scene assert.Equal(t, "//title", sceneConfig.mappedConfig["Title"].Selector) assert.Equal(t, "//tags", sceneConfig.Tags["Name"].Selector) assert.Equal(t, "//movies", sceneConfig.Movies["Name"].Selector) assert.Equal(t, "//performers", sceneConfig.Performers.mappedConfig["Name"].Selector) assert.Equal(t, "//studio", sceneConfig.Studio["Name"].Selector) postProcess := sceneConfig.mappedConfig["Title"].postProcessActions parseDate := postProcess[0].(*postProcessParseDate) assert.Equal(t, "January 2, 2006", string(*parseDate)) } func TestLoadInvalidXPath(t *testing.T) { config := make(mappedConfig) config["Name"] = makeSimpleAttrConfig(`//a[id=']/span`) reader := strings.NewReader(htmlDoc1) doc, err := htmlquery.Parse(reader) if err != nil { t.Errorf("Error loading document: %s", err.Error()) return } q := &xpathQuery{ doc: doc, } config.process(context.Background(), q, nil) } type mockGlobalConfig struct{} func (mockGlobalConfig) GetScraperUserAgent() string { return "" } func (mockGlobalConfig) GetScrapersPath() string { return "" } func (mockGlobalConfig) GetScraperCDPPath() string { return "" } func (mockGlobalConfig) GetScraperCertCheck() bool { return false } func TestSubScrape(t *testing.T) { retHTML := `
A link
` ssHTML := ` The name ` ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/getName" { fmt.Fprint(w, ssHTML) } else { fmt.Fprint(w, retHTML) } })) defer ts.Close() yamlStr := `name: Test performerByURL: - action: scrapeXPath url: - ` + ts.URL + ` scraper: performerScraper xPathScrapers: performerScraper: performer: Name: selector: //div/a/@href postProcess: - replace: - regex: ^ with: ` + ts.URL + ` - subScraper: selector: //span ` c := &config{} err := yaml.Unmarshal([]byte(yamlStr), &c) if err != nil { t.Errorf("Error loading yaml: %s", err.Error()) return } globalConfig := mockGlobalConfig{} client := &http.Client{} ctx := context.Background() s := newGroupScraper(*c, nil, globalConfig) us, ok := s.(urlScraper) if !ok { t.Error("couldn't convert scraper into url scraper") } content, err := us.viaURL(ctx, client, ts.URL, models.ScrapeContentTypePerformer) if err != nil { t.Errorf("Error scraping performer: %s", err.Error()) return } performer, ok := content.(*models.ScrapedPerformer) if !ok { t.Error("couldn't convert scraped content into a performer") } verifyField(t, "The name", performer.Name, "Name") }