Maintaining compatibility with other spacy tokenizers.

This commit is contained in:
Gyorgy Orosz 2017-01-14 16:19:15 +01:00
parent be7a7aeb1a
commit f77c0284d6
2 changed files with 2 additions and 2 deletions

View File

@ -540,7 +540,7 @@ ORD_NUM_OR_DATE = "([A-Z0-9]+[./-])*(\d+\.?)"
_NUM = "[+\-]?\d+([,.]\d+)*"
_OPS = "[=<>+\-\*/^()÷%²]"
_SUFFIES = "-[{a}]+".format(a=ALPHA_LOWER)
NUMERIC_EXP = "\(?({n})(({o})({n}))*[)%]?".format(n=_NUM, o=_OPS)
NUMERIC_EXP = "({n})(({o})({n}))*[%]?".format(n=_NUM, o=_OPS)
TIME_EXP = "\d+(:\d+)*(\.\d+)?"
NUMS = "(({ne})|({t})|({on})|({c}))({s})?".format(

View File

@ -94,7 +94,7 @@ NUMBER_TESTS = [
('A 2<3 van.', ['A', '2<3', 'van', '.']),
('A 2=3 van.', ['A', '2=3', 'van', '.']),
('A 2÷3 van.', ['A', '2÷3', 'van', '.']),
('A (2÷3)-2/5=1 van.', ['A', '(2÷3)-2/5=1', 'van', '.']),
('A 1=(2÷3)-2/5 van.', ['A', '1=(2÷3)-2/5', 'van', '.']),
('A 2 +3 van.', ['A', '2', '+3', 'van', '.']),
('A 2+ 3 van.', ['A', '2', '+', '3', 'van', '.']),
('A 2 + 3 van.', ['A', '2', '+', '3', 'van', '.']),