From 81d3a1edb11a914e8854193137900d3cd80ad3dd Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 27 Jul 2021 12:07:01 +0200
Subject: [PATCH] Use tokenizer URL_MATCH pattern in LIKE_URL (#8765)

---
 spacy/lang/lex_attrs.py        | 3 +++
 spacy/tests/lang/test_attrs.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py
index 12016c273..6ed981a06 100644
--- a/spacy/lang/lex_attrs.py
+++ b/spacy/lang/lex_attrs.py
@@ -3,6 +3,7 @@ import unicodedata
 import re
 
 from .. import attrs
+from .tokenizer_exceptions import URL_MATCH
 
 
 _like_email = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)").match
@@ -109,6 +110,8 @@ def like_url(text: str) -> bool:
         return True
     if tld.isalpha() and tld in _tlds:
         return True
+    if URL_MATCH(text):
+        return True
     return False
 
 
diff --git a/spacy/tests/lang/test_attrs.py b/spacy/tests/lang/test_attrs.py
index b39109455..6a7a404fd 100644
--- a/spacy/tests/lang/test_attrs.py
+++ b/spacy/tests/lang/test_attrs.py
@@ -58,9 +58,10 @@ def test_lex_attrs_is_currency(text, match):
         ("www.google.com", True),
         ("google.com", True),
         ("sydney.com", True),
-        ("2girls1cup.org", True),
+        ("1abc2def.org", True),
         ("http://stupid", True),
         ("www.hi", True),
+        ("example.com/example", True),
         ("dog", False),
         ("1.2", False),
         ("1.a", False),