mirror of https://github.com/explosion/spaCy.git
Fix raw strings in URL pattern (#5972)
Add missing raw string specifiers.
This commit is contained in:
parent
332803eda9
commit
7d7b65ffd4
|
@ -37,13 +37,13 @@ URL_PATTERN = (
|
||||||
r"|"
|
r"|"
|
||||||
# host & domain names
|
# host & domain names
|
||||||
# mods: match is case-sensitive, so include [A-Z]
|
# mods: match is case-sensitive, so include [A-Z]
|
||||||
"(?:" # noqa
|
r"(?:" # noqa
|
||||||
"(?:"
|
r"(?:"
|
||||||
"[A-Za-z0-9\u00a1-\uffff]"
|
r"[A-Za-z0-9\u00a1-\uffff]"
|
||||||
"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
||||||
")?"
|
r")?"
|
||||||
"[A-Za-z0-9\u00a1-\uffff]\."
|
r"[A-Za-z0-9\u00a1-\uffff]\."
|
||||||
")+"
|
r")+"
|
||||||
# TLD identifier
|
# TLD identifier
|
||||||
# mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
|
# mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
|
||||||
# strings like "lower.Upper", which can be split on "." by infixes in some
|
# strings like "lower.Upper", which can be split on "." by infixes in some
|
||||||
|
|
Loading…
Reference in New Issue