From 83dca920d4ad68b11428b65007e5fde43b05935f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 7 Apr 2017 15:54:25 +0200 Subject: [PATCH] Rename test #913 -> #957, comment Make test for #957 reference correct bug. Add comment. Previous commit closes #957. --- spacy/language_data/tokenizer_exceptions.py | 4 ++++ spacy/tests/regression/{test_issue913.py => test_issue957.py} | 0 2 files changed, 4 insertions(+) rename spacy/tests/regression/{test_issue913.py => test_issue957.py} (100%) diff --git a/spacy/language_data/tokenizer_exceptions.py b/spacy/language_data/tokenizer_exceptions.py index 1208e1219..b84adb2c4 100644 --- a/spacy/language_data/tokenizer_exceptions.py +++ b/spacy/language_data/tokenizer_exceptions.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +# The use of this module turns out to be important, to avoid pathological +# back-tracking. See Issue #957 import regex # URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex @@ -23,6 +25,8 @@ _URL_PATTERN = ( # excludes reserved space >= 224.0.0.0 # excludes network & broadcast addresses # (first & last IP address of each class) + # MH: Do we really need this? Seems excessive, and seems to have caused + # Issue #957 r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" diff --git a/spacy/tests/regression/test_issue913.py b/spacy/tests/regression/test_issue957.py similarity index 100% rename from spacy/tests/regression/test_issue913.py rename to spacy/tests/regression/test_issue957.py