From 3879d28457ecf66f37e1e9c0da8ec29661144e52 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 23 Aug 2015 02:40:35 +0200 Subject: [PATCH] * Fix https for url detection --- spacy/orth.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/orth.pyx b/spacy/orth.pyx index 6ffac839b..ca4bbd9ba 100644 --- a/spacy/orth.pyx +++ b/spacy/orth.pyx @@ -69,7 +69,7 @@ TLDs = set("com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|mu cpdef bint like_url(unicode string): # We're looking for things that function in text like URLs. So, valid URL # or not, anything they say http:// is going to be good. - if string.startswith('http://'): + if string.startswith('http://') or string.startswith('https://'): return True elif string.startswith('www.') and len(string) >= 5: return True