mirror of https://github.com/stashapp/stash.git
733 lines
31 KiB
Go
733 lines
31 KiB
Go
package scraper
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/antchfx/htmlquery"
|
|
"github.com/stashapp/stash/pkg/models"
|
|
"gopkg.in/yaml.v2"
|
|
)
|
|
|
|
// adapted from https://www.freeones.com/html/m_links/bio_Mia_Malkova.php
|
|
const htmlDoc1 = `
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
|
|
<head>
|
|
<title>Freeones: Mia Malkova Biography</title>
|
|
</head>
|
|
<body data-babe="Mia Malkova">
|
|
<div class="ContentBlock Block1">
|
|
<div class="ContentBlockBody" style="padding: 0px;">
|
|
<table id="biographyTable" border="0" cellspacing="0" cellpadding="0" width="100%">
|
|
<tbody>
|
|
<tr>
|
|
<td class="paramname">
|
|
<div><b>Babe Name:</b></div>
|
|
</td>
|
|
<td class="paramvalue">
|
|
<a href="/html/m_links/Mia_Malkova/">Mia Malkova</a>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<div><b>Profession:</b></div>
|
|
</td>
|
|
<td class="paramvalue">Porn Star
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Ethnicity:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
Caucasian
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Country of Origin:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
|
|
<span class="country-us">
|
|
|
|
United States
|
|
<span>
|
|
</span></span></td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Date of Birth:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
July 1, 1992 (27 years old)
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Aliases:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Eye Color:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
Hazel
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Hair Color:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
Blonde
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Height:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
<script type="text/javascript">
|
|
<!--
|
|
heightcm = "171";
|
|
morethenone = 'inch';
|
|
feet = heightcm / 30.48;
|
|
inches = (feet - Math.floor(feet)) * 30.48 / 2.54;
|
|
|
|
feet = Math.floor(feet);
|
|
inches = inches.toFixed(0);
|
|
|
|
if (inches > 1) {
|
|
morethenone = 'inches';
|
|
}
|
|
|
|
if (heightcm == 0) {
|
|
message = 'Unknown';
|
|
} else {
|
|
message = '171 cm - ' + feet + ' feet and ' + inches + ' ' + morethenone;
|
|
}
|
|
document.write(message);
|
|
// -->
|
|
</script>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Measurements:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
34C-26-36
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Fake boobs:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
No
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Career Start And End</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
2012 - 2019
|
|
(7 Years In The Business)
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Tattoos:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
None
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<b>Piercings:</b>
|
|
</td>
|
|
<td class="paramvalue">
|
|
None
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="paramname">
|
|
<div><b>Social Network Links:</b></div>
|
|
</td>
|
|
<td class="paramvalue">
|
|
<ul id="socialmedia">
|
|
<li class="twitter"><a href="https://twitter.com/MiaMalkova" target="_blank" alt="Mia Malkova Twitter" title="Mia Malkova Twitter">Twitter</a></li>
|
|
<li class="facebook"><a href="https://www.facebook.com/MiaMalcove" target="_blank" alt="Mia Malkova Facebook" title="Mia Malkova Facebook">Facebook</a></li>
|
|
<li class="youtube"><a href="https://www.youtube.com/channel/UCEPR0sZKa_ScMoyhemfB7nA" target="_blank" alt="Mia Malkova YouTube" title="Mia Malkova YouTube">YouTube</a></li>
|
|
<li class="instagram"><a href="https://www.instagram.com/mia_malkova/" target="_blank" alt="Mia Malkova Instagram" title="Mia Malkova Instagram">Instagram</a></li>
|
|
</ul>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
`
|
|
|
|
func makeCommonXPath(attr string) string {
|
|
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
|
|
}
|
|
|
|
func makeXPathConfig() xpathScraperConfig {
|
|
config := make(xpathScraperConfig)
|
|
|
|
config["Name"] = makeCommonXPath("Babe Name:") + `/a`
|
|
config["Ethnicity"] = makeCommonXPath("Ethnicity:")
|
|
config["Country"] = makeCommonXPath("Country of Origin:")
|
|
config["Birthdate"] = makeCommonXPath("Date of Birth:")
|
|
config["Aliases"] = makeCommonXPath("Aliases:")
|
|
config["EyeColor"] = makeCommonXPath("Eye Color:")
|
|
config["Measurements"] = makeCommonXPath("Measurements:")
|
|
config["FakeTits"] = makeCommonXPath("Fake boobs:")
|
|
config["Height"] = makeCommonXPath("Height:")
|
|
// no colon in attribute header
|
|
config["CareerLength"] = makeCommonXPath("Career Start And End")
|
|
config["Tattoos"] = makeCommonXPath("Tattoos:")
|
|
config["Piercings"] = makeCommonXPath("Piercings:")
|
|
|
|
return config
|
|
}
|
|
|
|
func verifyField(t *testing.T, expected string, actual *string, field string) {
|
|
t.Helper()
|
|
|
|
if actual == nil || *actual != expected {
|
|
if actual == nil {
|
|
t.Errorf("Expected %s to be set to %s, instead got nil", field, expected)
|
|
} else {
|
|
t.Errorf("Expected %s to be set to %s, instead got %s", field, expected, *actual)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestScrapePerformerXPath(t *testing.T) {
|
|
reader := strings.NewReader(htmlDoc1)
|
|
doc, err := htmlquery.Parse(reader)
|
|
|
|
if err != nil {
|
|
t.Errorf("Error loading document: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
xpathConfig := makeXPathConfig()
|
|
|
|
scraper := xpathScraper{
|
|
Performer: xpathConfig,
|
|
}
|
|
|
|
performer, err := scraper.scrapePerformer(doc)
|
|
|
|
if err != nil {
|
|
t.Errorf("Error scraping performer: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
const performerName = "Mia Malkova"
|
|
const ethnicity = "Caucasian"
|
|
const country = "United States"
|
|
const birthdate = "July 1, 1992 (27 years old)"
|
|
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
|
|
const eyeColor = "Hazel"
|
|
const measurements = "34C-26-36"
|
|
const fakeTits = "No"
|
|
const careerLength = "2012 - 2019"
|
|
const tattoosPiercings = "None"
|
|
|
|
verifyField(t, performerName, performer.Name, "Name")
|
|
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
|
|
verifyField(t, country, performer.Country, "Country")
|
|
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
|
|
verifyField(t, aliases, performer.Aliases, "Aliases")
|
|
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
|
|
verifyField(t, measurements, performer.Measurements, "Measurements")
|
|
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
|
|
|
|
// TODO - this needs post-processing
|
|
//verifyField(t, careerLength, performer.CareerLength, "CareerLength")
|
|
|
|
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
|
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
|
}
|
|
|
|
const sceneHTML = `
|
|
<!DOCTYPE html>
|
|
|
|
<head>
|
|
<title>Test Video - Pornhub.com</title>
|
|
|
|
<meta property="og:title" content="Test Video" />
|
|
<meta property="og:description"
|
|
content="Watch Test Video on Pornhub.com, the best hardcore porn site. Pornhub is home to the widest selection of free Babe sex videos full of the hottest pornstars. If you're craving 3some XXX movies you'll find them here." />
|
|
<meta property="og:image"
|
|
content="https://di.phncdn.com/videos/201910/13/254476211/thumbs_80/(m=eaAaGwObaaaa)(mh=_V1YEGdMFS1rEYoW)9.jpg" />
|
|
|
|
<script type="application/ld+json">
|
|
{
|
|
"@context": "http://schema.org/",
|
|
"@type": "VideoObject",
|
|
"name": "Test Video",
|
|
"embedUrl": "https://www.pornhub.com/embed/ph5da270596459c",
|
|
"duration": "PT00H33M27S",
|
|
"thumbnailUrl": "https://di.phncdn.com/videos/201910/13/254476211/thumbs_80/(m=eaAaGwObaaaa)(mh=_V1YEGdMFS1rEYoW)9.jpg",
|
|
"uploadDate": "2019-10-13T00:33:51+00:00",
|
|
"description": "Watch Test Video on Pornhub.com, the best hardcore porn site. Pornhub is home to the widest selection of free Babe sex videos full of the hottest pornstars. If you're craving 3some XXX movies you'll find them here.",
|
|
"author" : "Mia Malkova", "interactionStatistic": [
|
|
{
|
|
"@type": "InteractionCounter",
|
|
"interactionType": "http://schema.org/WatchAction",
|
|
"userInteractionCount": "5,908,861"
|
|
},
|
|
{
|
|
"@type": "InteractionCounter",
|
|
"interactionType": "http://schema.org/LikeAction",
|
|
"userInteractionCount": "22,090"
|
|
}
|
|
]
|
|
}
|
|
</script>
|
|
</head>
|
|
|
|
<body class="logged-out">
|
|
<div class="container ">
|
|
|
|
|
|
<div id="main-container" class="clearfix" data-delete-check="1" data-is-private="1" data-is-premium=""
|
|
data-liu="0" data-next-shuffle="ph5da270596459c" data-pkey="" data-platform-pc="1" data-playlist-check="0"
|
|
data-playlist-id-check="0" data-playlist-geo-check="0" data-friend="0" data-playlist-user-check="0"
|
|
data-playlist-video-check="0" data-playlist-shuffle="0" data-shuffle-forward="ph5da270596459c"
|
|
data-shuffle-back="ph5da270596459c" data-min-large="1350"
|
|
data-video-title="Test Video">
|
|
|
|
<div id="vpContentContainer">
|
|
<div id="hd-leftColVideoPage">
|
|
<div class="video-wrapper">
|
|
<div class="title-container">
|
|
<i class="isMe tooltipTrig" data-title="Video of verified member"></i>
|
|
<h1 class="title">
|
|
<span class="inlineFree">Test Video</span>
|
|
</h1>
|
|
</div>
|
|
|
|
<div class="video-actions-container">
|
|
<div class="video-actions-tabs">
|
|
<div class="video-action-tab about-tab active">
|
|
<div class="video-detailed-info">
|
|
<div class="video-info-row">
|
|
From:
|
|
|
|
<div class="usernameWrap clearfix" data-type="channel" data-userid="492538092"
|
|
data-liu-user="0"
|
|
data-json-url="/user/box?id=492538092&token=MTU3NzA1NTkzNIqATol8v_WrhmNTXkeflvG09C2U7UUT_NyoZUFa7iKq0mlzBkmdgAH1aNHZkJmIOHbbwmho1BehHDoA63K5Wn4."
|
|
data-disable-popover="0">
|
|
|
|
<a rel="" href="/channels/sis-loves-me" class="bolded">Sis Loves Me</a>
|
|
<div class="avatarPosition"></div>
|
|
</div>
|
|
|
|
<span class="verified-icon flag tooltipTrig"
|
|
data-title="Verified member"></span>
|
|
- 87 videos
|
|
<span class="subscribers-count"> 459466</span>
|
|
</div>
|
|
|
|
<div class="video-info-row">
|
|
<div class="pornstarsWrapper">
|
|
Pornstars:
|
|
<a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
|
data-mxptext="Alex D" data-id="251341" data-login="1"
|
|
href="/pornstar/alex-d">Alex D <span
|
|
class="psbox-link-container display-none"></span>
|
|
</a>
|
|
, <a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
|
data-mxptext="Mia Malkova" data-id="10641" data-login="1"
|
|
href="/pornstar/mia-malkova">Mia Malkova <span
|
|
class="psbox-link-container display-none"></span>
|
|
</a>
|
|
, <a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
|
data-mxptext="Riley Reid" data-id="5343" data-login="1"
|
|
href="/pornstar/riley-reid">Riley Reid <span
|
|
class="psbox-link-container display-none"></span>
|
|
</a>
|
|
<div class="tooltipTrig suggestBtn" data-title="Add a pornstar">
|
|
<a class="add-btn-small add-pornstar-btn-2">+
|
|
<span>Suggest</span></a>
|
|
</div>
|
|
<div id="deletePornstarResult" class="suggest-result"></div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="video-info-row showLess">
|
|
<div class="categoriesWrapper">
|
|
Categories:
|
|
<a href="/video?c=3"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Amateur</a>,
|
|
<a href="/categories/babe"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Babe</a>,
|
|
<a href="/video?c=13"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Blowjob</a>,
|
|
<a href="/video?c=115"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Exclusive</a>,
|
|
<a href="/hd"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">HD
|
|
Porn</a>, <a href="/categories/pornstar"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Pornstar</a>,
|
|
<a href="/video?c=24"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Public</a>,
|
|
<a href="/video?c=131"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Pussy
|
|
Licking</a>, <a href="/video?c=65"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Threesome</a>,
|
|
<a href="/video?c=139"
|
|
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Verified
|
|
Models</a>
|
|
<div class="tooltipTrig suggestBtn" data-title="Suggest Categories">
|
|
<a id="categoryLink" class="add-btn-small ">+
|
|
<span>Suggest</span></a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="video-info-row showLess">
|
|
<div class="productionWrapper">
|
|
Production:
|
|
<a href="/video?p=professional" rel="nofollow"
|
|
class="production">professional</a>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="video-info-row showLess">
|
|
<div class="tagsWrapper">
|
|
Tags:
|
|
<a href="/video/search?search=3some">3some</a>, <a
|
|
href="/video?c=9">blonde</a>, <a href="/video?c=59">small tits</a>,
|
|
<a href="/video/search?search=butt">butt</a>, <a
|
|
href="/video/search?search=natural+tits">natural tits</a>, <a
|
|
href="/video/search?search=petite">petite</a>, <a
|
|
href="/video?c=24">public</a>, <a
|
|
href="/video/search?search=outside">outside</a>, <a
|
|
href="/video/search?search=car">car</a>, <a
|
|
href="/video/search?search=garage">garage</a>, <a
|
|
href="/video?c=65">threesome</a>, <a
|
|
href="/video/search?search=bgg">bgg</a>, <a
|
|
href="/video/search?search=girlfrien+d">girlfrien d</a>, <a
|
|
href="/video/search?search=parking">parking</a>, <a
|
|
href="/video/search?search=sex">sex</a>, <a
|
|
href="/video/search?search=gagging">gagging</a>, <a
|
|
href="/video?c=13">blowjob</a>, <a
|
|
href="/video/search?search=bj">bj</a>, <a
|
|
href="/video/search?search=double">double</a>, <a
|
|
href="/video/search?search=ass">ass</a>
|
|
<div class="tooltipTrig suggestBtn" data-title="Suggest Tags">
|
|
<a id="tagLink" class="add-btn-small">+ <span>Suggest</span></a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="video-info-row showLess">
|
|
Added on: <span class="white">2 months ago</span>
|
|
</div>
|
|
|
|
<div class="video-info-row showLess">
|
|
Featured on: <span class="white">1 month ago</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="video-action-tab jump-to-tab">
|
|
<div class="title">Jump to your favorite action</div>
|
|
|
|
<div class="filters mainFilter float-right">
|
|
<div class="dropdownTrigger">
|
|
<div>
|
|
<span class="textFilter" id="tagSort">Sequence</span>
|
|
<span class="arrowFilters"></span>
|
|
</div>
|
|
<ul class="filterListItem dropdownWrapper">
|
|
<li class="active"><a class="actionTagSort"
|
|
data-sort="seconds">Sequence</a></li>
|
|
<li><a class="actionTagSort" data-sort="tag">Alphabetical</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="reset"></div>
|
|
<div class="display-grid col-4 gap-row-none sortBy seconds">
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(862), ga('send', 'event', 'Video Page', 'click', 'Jump to Blowjob');">
|
|
Blowjob </a>
|
|
|
|
<var>14:22</var>
|
|
</li>
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1117), ga('send', 'event', 'Video Page', 'click', 'Jump to Reverse Cowgirl');">
|
|
Reverse Cowgirl </a>
|
|
|
|
<var>18:37</var>
|
|
</li>
|
|
</ul>
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1182), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
|
Cowgirl </a>
|
|
|
|
<var>19:42</var>
|
|
</li>
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1625), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
|
Cowgirl </a>
|
|
|
|
<var>27:05</var>
|
|
</li>
|
|
</ul>
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1822), ga('send', 'event', 'Video Page', 'click', 'Jump to Doggystyle');">
|
|
Doggystyle </a>
|
|
|
|
<var>30:22</var>
|
|
</li>
|
|
</ul>
|
|
|
|
</div>
|
|
<div class="display-grid col-4 gap-row-none sortBy tag">
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(862), ga('send', 'event', 'Video Page', 'click', 'Jump to Blowjob');">
|
|
Blowjob </a>
|
|
|
|
<var>14:22</var>
|
|
</li>
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1117), ga('send', 'event', 'Video Page', 'click', 'Jump to Reverse Cowgirl');">
|
|
Reverse Cowgirl </a>
|
|
|
|
<var>18:37</var>
|
|
</li>
|
|
</ul>
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1182), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
|
Cowgirl </a>
|
|
|
|
<var>19:42</var>
|
|
</li>
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1625), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
|
Cowgirl </a>
|
|
|
|
<var>27:05</var>
|
|
</li>
|
|
</ul>
|
|
<ul class="actionTagList full-width margin-none">
|
|
<li>
|
|
<a class="js-triggerJumpCat"
|
|
onclick="jumpToAction(1822), ga('send', 'event', 'Video Page', 'click', 'Jump to Doggystyle');">
|
|
Doggystyle </a>
|
|
|
|
<var>30:22</var>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>`
|
|
|
|
func makeSceneXPathConfig() xpathScraper {
|
|
common := make(commonXPathConfig)
|
|
|
|
common["$performerElem"] = `//div[@class="pornstarsWrapper"]/a[@data-mxptype="Pornstar"]`
|
|
common["$studioElem"] = `//div[@data-type="channel"]/a`
|
|
|
|
config := make(xpathScraperConfig)
|
|
|
|
config["Title"] = `//meta[@property="og:title"]/@content`
|
|
// this needs post-processing
|
|
config["Date"] = `//script[@type="application/ld+json"]`
|
|
|
|
tagConfig := make(map[interface{}]interface{})
|
|
tagConfig["Name"] = `//div[@class="categoriesWrapper"]//a[not(@class="add-btn-small ")]`
|
|
config["Tags"] = tagConfig
|
|
|
|
performerConfig := make(map[interface{}]interface{})
|
|
performerConfig["Name"] = `$performerElem/@data-mxptext`
|
|
performerConfig["URL"] = `$performerElem/@href`
|
|
config["Performers"] = performerConfig
|
|
|
|
studioConfig := make(map[interface{}]interface{})
|
|
studioConfig["Name"] = `$studioElem`
|
|
studioConfig["URL"] = `$studioElem/@href`
|
|
config["Studio"] = studioConfig
|
|
|
|
scraper := xpathScraper{
|
|
Scene: config,
|
|
Common: common,
|
|
}
|
|
|
|
return scraper
|
|
}
|
|
|
|
func verifyTags(t *testing.T, expectedTagNames []string, actualTags []*models.ScrapedSceneTag) {
|
|
t.Helper()
|
|
|
|
i := 0
|
|
for i < len(expectedTagNames) || i < len(actualTags) {
|
|
expectedTag := ""
|
|
actualTag := ""
|
|
if i < len(expectedTagNames) {
|
|
expectedTag = expectedTagNames[i]
|
|
}
|
|
if i < len(actualTags) {
|
|
actualTag = actualTags[i].Name
|
|
}
|
|
|
|
if expectedTag != actualTag {
|
|
t.Errorf("Expected tag %s, got %s", expectedTag, actualTag)
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedScenePerformer) {
|
|
t.Helper()
|
|
|
|
i := 0
|
|
for i < len(expectedNames) || i < len(actualPerformers) {
|
|
expectedName := ""
|
|
actualName := ""
|
|
expectedURL := ""
|
|
actualURL := ""
|
|
if i < len(expectedNames) {
|
|
expectedName = expectedNames[i]
|
|
}
|
|
if i < len(expectedURLs) {
|
|
expectedURL = expectedURLs[i]
|
|
}
|
|
if i < len(actualPerformers) {
|
|
actualName = actualPerformers[i].Name
|
|
if actualPerformers[i].URL != nil {
|
|
actualURL = *actualPerformers[i].URL
|
|
}
|
|
}
|
|
|
|
if expectedName != actualName {
|
|
t.Errorf("Expected performer name %s, got %s", expectedName, actualName)
|
|
}
|
|
if expectedURL != actualURL {
|
|
t.Errorf("Expected perfromer URL %s, got %s", expectedName, actualName)
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
func TestApplySceneXPathConfig(t *testing.T) {
|
|
reader := strings.NewReader(sceneHTML)
|
|
doc, err := htmlquery.Parse(reader)
|
|
|
|
if err != nil {
|
|
t.Errorf("Error loading document: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
scraper := makeSceneXPathConfig()
|
|
|
|
scene, err := scraper.scrapeScene(doc)
|
|
|
|
if err != nil {
|
|
t.Errorf("Error scraping scene: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
const title = "Test Video"
|
|
|
|
verifyField(t, title, scene.Title, "Title")
|
|
|
|
// verify tags
|
|
expectedTags := []string{
|
|
"Amateur",
|
|
"Babe",
|
|
"Blowjob",
|
|
"Exclusive",
|
|
"HD Porn",
|
|
"Pornstar",
|
|
"Public",
|
|
"Pussy Licking",
|
|
"Threesome",
|
|
"Verified Models",
|
|
}
|
|
verifyTags(t, expectedTags, scene.Tags)
|
|
|
|
expectedPerformerNames := []string{
|
|
"Alex D",
|
|
"Mia Malkova",
|
|
"Riley Reid",
|
|
}
|
|
|
|
expectedPerformerURLs := []string{
|
|
"/pornstar/alex-d",
|
|
"/pornstar/mia-malkova",
|
|
"/pornstar/riley-reid",
|
|
}
|
|
|
|
verifyPerformers(t, expectedPerformerNames, expectedPerformerURLs, scene.Performers)
|
|
|
|
const expectedStudioName = "Sis Loves Me"
|
|
const expectedStudioURL = "/channels/sis-loves-me"
|
|
|
|
verifyField(t, expectedStudioName, &scene.Studio.Name, "Studio.Name")
|
|
verifyField(t, expectedStudioURL, scene.Studio.URL, "Studio.URL")
|
|
}
|
|
|
|
func TestLoadXPathScraperFromYAML(t *testing.T) {
|
|
const yamlStr = `name: Test
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- test.com
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
performerScraper:
|
|
performer:
|
|
name: //h1[@itemprop="name"]
|
|
`
|
|
|
|
config := &scraperConfig{}
|
|
err := yaml.Unmarshal([]byte(yamlStr), &config)
|
|
|
|
if err != nil {
|
|
t.Errorf("Error loading yaml: %s", err.Error())
|
|
return
|
|
}
|
|
}
|