mirror of https://github.com/explosion/spaCy.git
Port over URL pattern changes from #1411
This commit is contained in:
parent
09aed58140
commit
a4d974d97b
|
@ -36,11 +36,11 @@ URL_PATTERN = (
|
|||
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
|
||||
r"|"
|
||||
# host name
|
||||
r"(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)"
|
||||
r"(?:(?:[a-z0-9\-]*)?[a-z0-9]+)"
|
||||
# domain name
|
||||
r"(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*"
|
||||
r"(?:\.(?:[a-z0-9\-])*[a-z0-9]+)*"
|
||||
# TLD identifier
|
||||
r"(?:\.(?:[a-z\u00a1-\uffff]{2,}))"
|
||||
r"(?:\.(?:[a-z]{2,}))"
|
||||
r")"
|
||||
# port number
|
||||
r"(?::\d{2,5})?"
|
||||
|
|
|
@ -33,13 +33,10 @@ URLS_SHOULD_MATCH = [
|
|||
"http://userid:password@example.com/",
|
||||
"http://142.42.1.1/",
|
||||
"http://142.42.1.1:8080/",
|
||||
"http://⌘.ws",
|
||||
"http://⌘.ws/",
|
||||
"http://foo.com/blah_(wikipedia)#cite-1",
|
||||
"http://foo.com/blah_(wikipedia)_blah#cite-1",
|
||||
"http://foo.com/unicode_(✪)_in_parens",
|
||||
"http://foo.com/(something)?after=parens",
|
||||
"http://☺.damowmow.com/",
|
||||
"http://code.google.com/events/#&product=browser",
|
||||
"http://j.mp",
|
||||
"ftp://foo.bar/baz",
|
||||
|
@ -49,14 +46,17 @@ URLS_SHOULD_MATCH = [
|
|||
"http://a.b-c.de",
|
||||
"http://223.255.255.254",
|
||||
"http://a.b--c.de/", # this is a legit domain name see: https://gist.github.com/dperini/729294 comment on 9/9/2014
|
||||
"http://✪df.ws/123",
|
||||
"http://➡.ws/䨹",
|
||||
"http://مثال.إختبار",
|
||||
"http://例子.测试",
|
||||
"http://उदाहरण.परीक्षा",
|
||||
|
||||
pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)"),
|
||||
pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)_(again)"),
|
||||
pytest.mark.xfail("http://⌘.ws"),
|
||||
pytest.mark.xfail("http://⌘.ws/"),
|
||||
pytest.mark.xfail("http://☺.damowmow.com/"),
|
||||
pytest.mark.xfail("http://✪df.ws/123"),
|
||||
pytest.mark.xfail("http://➡.ws/䨹"),
|
||||
pytest.mark.xfail("http://مثال.إختبار"),
|
||||
pytest.mark.xfail("http://例子.测试"),
|
||||
pytest.mark.xfail("http://उदाहरण.परीक्षा"),
|
||||
]
|
||||
|
||||
URLS_SHOULD_NOT_MATCH = [
|
||||
|
@ -83,7 +83,6 @@ URLS_SHOULD_NOT_MATCH = [
|
|||
"http://foo.bar/foo(bar)baz quux",
|
||||
"ftps://foo.bar/",
|
||||
"http://-error-.invalid/",
|
||||
"http://-a.b.co",
|
||||
"http://a.b-.co",
|
||||
"http://0.0.0.0",
|
||||
"http://10.1.1.0",
|
||||
|
@ -99,6 +98,7 @@ URLS_SHOULD_NOT_MATCH = [
|
|||
pytest.mark.xfail("foo.com"),
|
||||
pytest.mark.xfail("http://1.1.1.1.1"),
|
||||
pytest.mark.xfail("http://www.foo.bar./"),
|
||||
pytest.mark.xfail("http://-a.b.co"),
|
||||
]
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue