diff --git a/spacy/orth.pyx b/spacy/orth.pyx index 6ffac839b..ca4bbd9ba 100644 --- a/spacy/orth.pyx +++ b/spacy/orth.pyx @@ -69,7 +69,7 @@ TLDs = set("com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|mu cpdef bint like_url(unicode string): # We're looking for things that function in text like URLs. So, valid URL # or not, anything they say http:// is going to be good. - if string.startswith('http://'): + if string.startswith('http://') or string.startswith('https://'): return True elif string.startswith('www.') and len(string) >= 5: return True