diff --git a/lang_data/en/infix.txt b/lang_data/en/infix.txt index 37eca7350..cb9cc0a78 100644 --- a/lang_data/en/infix.txt +++ b/lang_data/en/infix.txt @@ -1,3 +1,4 @@ \.\.\. (?<=[a-z])\.(?=[A-Z]) (?<=[a-zA-Z])-(?=[a-zA-z]) +(?<=[0-9])-(?=[0-9]) diff --git a/tests/tokenizer/test_infix.py b/tests/tokenizer/test_infix.py index f274e3dd7..d703682cf 100644 --- a/tests/tokenizer/test_infix.py +++ b/tests/tokenizer/test_infix.py @@ -7,6 +7,10 @@ def test_hyphen(en_tokenizer): assert len(tokens) == 3 +def test_numeric_range(en_tokenizer): + tokens = en_tokenizer('0.1-13.5') + assert len(tokens) == 3 + def test_period(en_tokenizer): tokens = en_tokenizer('best.Known') assert len(tokens) == 3