mirror of https://github.com/explosion/spaCy.git
Fix raw strings in URL pattern (#5972)
Add missing raw string specifiers.
This commit is contained in:
parent
332803eda9
commit
7d7b65ffd4
|
@ -37,13 +37,13 @@ URL_PATTERN = (
|
|||
r"|"
|
||||
# host & domain names
|
||||
# mods: match is case-sensitive, so include [A-Z]
|
||||
"(?:" # noqa
|
||||
"(?:"
|
||||
"[A-Za-z0-9\u00a1-\uffff]"
|
||||
"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
||||
")?"
|
||||
"[A-Za-z0-9\u00a1-\uffff]\."
|
||||
")+"
|
||||
r"(?:" # noqa
|
||||
r"(?:"
|
||||
r"[A-Za-z0-9\u00a1-\uffff]"
|
||||
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
||||
r")?"
|
||||
r"[A-Za-z0-9\u00a1-\uffff]\."
|
||||
r")+"
|
||||
# TLD identifier
|
||||
# mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
|
||||
# strings like "lower.Upper", which can be split on "." by infixes in some
|
||||
|
|
Loading…
Reference in New Issue