stash/pkg/scraper/freeones.go

135 lines
3.9 KiB
Go
Raw Normal View History

2019-02-09 12:30:49 +00:00
package scraper
import (
"strings"
"github.com/stashapp/stash/pkg/logger"
2019-02-09 12:30:49 +00:00
)
// FreeonesScraperID is the scraper ID for the built-in Freeones scraper
const FreeonesScraperID = "builtin_freeones"
2020-06-17 01:02:06 +00:00
// 537: stolen from: https://github.com/stashapp/CommunityScrapers/blob/master/scrapers/FreeonesCommunity.yml
const freeonesScraperConfig = `
name: Freeones
performerByName:
action: scrapeXPath
2020-10-24 02:12:21 +00:00
queryURL: https://www.freeones.com/babes?q={}&v=teasers&s=relevance&l=96&m%5BcanPreviewFeatures%5D=0
scraper: performerSearch
performerByURL:
- action: scrapeXPath
2020-06-01 23:45:37 +00:00
url:
2020-10-24 02:12:21 +00:00
- freeones.xxx
- freeones.com
scraper: performerScraper
xPathScrapers:
performerSearch:
performer:
2020-06-01 23:45:37 +00:00
Name: //div[@id="search-result"]//p[@data-test="subject-name"]/text()
URL:
2020-06-01 23:45:37 +00:00
selector: //div[@id="search-result"]//div[@data-test="teaser-subject"]/a/@href
2020-10-24 02:12:21 +00:00
postProcess:
- replace:
2021-03-24 23:01:56 +00:00
- regex: ^
with: https://www.freeones.com
- regex: /feed$
with: /bio
2020-06-01 23:45:37 +00:00
performerScraper:
performer:
2021-03-24 23:01:56 +00:00
Name:
2020-10-24 02:12:21 +00:00
selector: //h1
postProcess:
- replace:
2021-03-24 23:01:56 +00:00
- regex: \sBio\s*$
with: ""
URL: //link[@rel="alternate" and @hreflang="x-default"]/@href
2020-10-24 02:12:21 +00:00
Twitter: //a[contains(@href,'twitter.com/')]/@href
Instagram: //a[contains(@href,'instagram.com/')]/@href
Birthdate:
2021-01-31 21:15:50 +00:00
selector: //span[contains(text(),'Born On')]
2020-10-24 02:12:21 +00:00
postProcess:
- replace:
2021-03-24 23:01:56 +00:00
- regex: Born On
with:
2020-10-24 02:12:21 +00:00
- parseDate: January 2, 2006
2020-06-01 23:45:37 +00:00
Ethnicity:
2021-01-31 21:15:50 +00:00
selector: //a[@data-test="link_ethnicity"]/span/text()
2020-10-24 02:12:21 +00:00
postProcess:
- map:
2021-01-31 21:15:50 +00:00
Asian: Asian
Caucasian: White
Black: Black
Latin: Hispanic
Country: //a[@data-test="link-country"]/span/text()
2020-10-24 02:12:21 +00:00
EyeColor: //span[text()='Eye Color']/following-sibling::span/a
2020-06-01 23:45:37 +00:00
Height:
2020-10-24 02:12:21 +00:00
selector: //span[text()='Height']/following-sibling::span/a
postProcess:
- replace:
2021-03-24 23:01:56 +00:00
- regex: \D+[\s\S]+
with: ""
2020-10-24 02:12:21 +00:00
- map:
Unknown: ""
2020-06-01 23:45:37 +00:00
Measurements:
2020-10-24 02:12:21 +00:00
selector: //span[text()='Measurements']/following-sibling::span/span/a
2020-06-17 01:02:06 +00:00
concat: " - "
2020-10-24 02:12:21 +00:00
postProcess:
- map:
Unknown: ""
2020-06-01 23:45:37 +00:00
FakeTits:
2020-10-24 02:12:21 +00:00
selector: //span[text()='Boobs']/following-sibling::span/a
postProcess:
- map:
Unknown: ""
2021-03-24 23:01:56 +00:00
Fake: "Yes"
Natural: "No"
2020-06-01 23:45:37 +00:00
CareerLength:
2021-01-31 21:15:50 +00:00
selector: //div[contains(@class,'timeline-horizontal')]//p[@class='m-0']
concat: "-"
2021-01-31 21:15:50 +00:00
Aliases: //p[@data-test='p_aliases']/text()
2021-03-24 23:01:56 +00:00
Tattoos:
2021-01-31 21:15:50 +00:00
selector: //span[text()='Tattoos']/following-sibling::span/span
postProcess:
- map:
Unknown: ""
2021-03-24 23:01:56 +00:00
Piercings:
2021-01-31 21:15:50 +00:00
selector: //span[text()='Piercings']/following-sibling::span/span
postProcess:
- map:
Unknown: ""
Image:
selector: //div[contains(@class,'image-container')]//a/img/@src
2020-06-17 01:02:06 +00:00
Gender:
2020-10-24 02:12:21 +00:00
fixed: "Female"
Details: //div[@data-test="biography"]
DeathDate:
selector: //div[contains(text(),'Passed away on')]
postProcess:
- replace:
- regex: Passed away on (.+) at the age of \d+
with: $1
- parseDate: January 2, 2006
HairColor: //span[text()='Hair Color']/following-sibling::span/a
Weight:
selector: //span[text()='Weight']/following-sibling::span/a
postProcess:
- replace:
- regex: \D+[\s\S]+
with: ""
# Last updated April 13, 2021
`
func getFreeonesScraper() config {
yml := freeonesScraperConfig
2019-02-09 12:30:49 +00:00
scraper, err := loadScraperFromYAML(FreeonesScraperID, strings.NewReader(yml))
if err != nil {
logger.Fatalf("Error loading builtin freeones scraper: %s", err.Error())
}
return *scraper
}