Update builtin freeones scraper (#5171)

This commit is contained in:
WithoutPants 2024-08-28 09:00:14 +10:00 committed by GitHub
parent 996dfb1c2f
commit 10341fba58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 30 additions and 25 deletions

View File

@ -42,44 +42,48 @@ xPathScrapers:
selector: //h1 selector: //h1
postProcess: postProcess:
- replace: - replace:
- regex: \sBio\s*$ - regex: (.+)\sidentifies.+
with: "" with: $1
URL: //link[@rel="alternate" and @hreflang="x-default"]/@href URL: //link[@rel="alternate" and @hreflang="x-default"]/@href
Twitter: //a[not(starts-with(@href,'https://twitter.com/FreeOnes'))][contains(@href,'twitter.com/')]/@href Twitter: //form//a[contains(@href,'twitter.com/')]/@href
Instagram: //a[contains(@href,'instagram.com/')]/@href Instagram: //form//a[contains(@href,'instagram.com/')]/@href
Birthdate: Birthdate:
selector: //span[contains(text(),'Born On')] selector: //span[@data-test="link_span_dateOfBirth"]/text()
postProcess: postProcess:
- replace:
- regex: Born On
with:
- parseDate: January 2, 2006 - parseDate: January 2, 2006
Ethnicity: Ethnicity:
selector: //a[@data-test="link_ethnicity"]/span/text() selector: //span[@data-test="link_span_ethnicity"]
postProcess: postProcess:
- map: - map:
Asian: Asian Asian: Asian
Caucasian: White Caucasian: White
Black: Black Black: Black
Latin: Hispanic Latin: Hispanic
Country: //a[@data-test="link-country"]/span/text() Country:
EyeColor: //span[text()='Eye Color']/following-sibling::span/a selector: //a[@data-test="link_placeOfBirth"][contains(@href, 'country')]/span/text()
postProcess:
- map:
United States: "USA"
EyeColor: //span[text()='Eye Color:']/following-sibling::span/a/span/text()
Height: Height:
selector: //span[text()='Height']/following-sibling::span/a selector: //span[text()='Height:']/following-sibling::span/a
postProcess: postProcess:
- replace: - replace:
- regex: \D+[\s\S]+ - regex: \scm
with: "" with: ""
- map: - map:
Unknown: "" Unknown: ""
Measurements: Measurements:
selector: //span[text()='Measurements']/following-sibling::span/span/a selector: //span[(@data-test='link_span_bra') or (@data-test='link_span_waist') or (@data-test='link_span_hip')]
concat: " - " concat: " - "
postProcess: postProcess:
- replace:
- regex: \sIn
with: ""
- map: - map:
Unknown: "" Unknown: ""
FakeTits: FakeTits:
selector: //span[text()='Boobs']/following-sibling::span/a selector: //span[text()='Boobs:']/following-sibling::span/a
postProcess: postProcess:
- map: - map:
Unknown: "" Unknown: ""
@ -88,14 +92,16 @@ xPathScrapers:
CareerLength: CareerLength:
selector: //div[contains(@class,'timeline-horizontal')]//p[@class='m-0'] selector: //div[contains(@class,'timeline-horizontal')]//p[@class='m-0']
concat: "-" concat: "-"
Aliases: //p[@data-test='p_aliases']/text() Aliases:
selector: //span[@data-test='link_span_aliases']/text()
concat: ", "
Tattoos: Tattoos:
selector: //span[text()='Tattoos']/following-sibling::span/span selector: //span[text()='Tattoo locations:']/following-sibling::span
postProcess: postProcess:
- map: - map:
Unknown: "" Unknown: ""
Piercings: Piercings:
selector: //span[text()='Piercings']/following-sibling::span/span selector: //span[text()='Piercing locations:']/following-sibling::span
postProcess: postProcess:
- map: - map:
Unknown: "" Unknown: ""
@ -103,7 +109,6 @@ xPathScrapers:
selector: //div[contains(@class,'image-container')]//a/img/@src selector: //div[contains(@class,'image-container')]//a/img/@src
Gender: Gender:
fixed: "Female" fixed: "Female"
Details: //div[@data-test="biography"]
DeathDate: DeathDate:
selector: //div[contains(text(),'Passed away on')] selector: //div[contains(text(),'Passed away on')]
postProcess: postProcess:
@ -111,15 +116,15 @@ xPathScrapers:
- regex: Passed away on (.+) at the age of \d+ - regex: Passed away on (.+) at the age of \d+
with: $1 with: $1
- parseDate: January 2, 2006 - parseDate: January 2, 2006
HairColor: //span[text()='Hair Color']/following-sibling::span/a HairColor: //span[@data-test="link_span_hair_color"]
Weight: Weight:
selector: //span[text()='Weight']/following-sibling::span/a selector: //span[@data-test="link_span_weight"]
postProcess: postProcess:
- replace: - replace:
- regex: \D+[\s\S]+ - regex: \skg
with: "" with: ""
# Last updated April 13, 2021 # Last Updated January 2, 2024
` `
func getFreeonesScraper(globalConfig GlobalConfig) scraper { func getFreeonesScraper(globalConfig GlobalConfig) scraper {