mirror of https://github.com/explosion/spaCy.git
* Fix https for url detection
This commit is contained in:
parent
aa12b374c0
commit
3879d28457
|
@ -69,7 +69,7 @@ TLDs = set("com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|mu
|
||||||
cpdef bint like_url(unicode string):
|
cpdef bint like_url(unicode string):
|
||||||
# We're looking for things that function in text like URLs. So, valid URL
|
# We're looking for things that function in text like URLs. So, valid URL
|
||||||
# or not, anything they say http:// is going to be good.
|
# or not, anything they say http:// is going to be good.
|
||||||
if string.startswith('http://'):
|
if string.startswith('http://') or string.startswith('https://'):
|
||||||
return True
|
return True
|
||||||
elif string.startswith('www.') and len(string) >= 5:
|
elif string.startswith('www.') and len(string) >= 5:
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Reference in New Issue