From 9505c6a72ba92287d6cb708cecd0072860264b01 Mon Sep 17 00:00:00 2001 From: Gyorgy Orosz Date: Sat, 14 Jan 2017 20:26:32 +0100 Subject: [PATCH] Passing all old tests. --- spacy/hu/punctuation.py | 3 +-- spacy/tests/hu/test_tokenizer.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spacy/hu/punctuation.py b/spacy/hu/punctuation.py index af0c2b559..ca1656a18 100644 --- a/spacy/hu/punctuation.py +++ b/spacy/hu/punctuation.py @@ -35,8 +35,7 @@ TOKENIZER_INFIXES = ( r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA), r'(?<=[{a}])--(?=[{a}])'.format(a=ALPHA), r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA), - r'(?<=[0-9{a}])({q})(?=[\-{a}])'.format(a=ALPHA, q=QUOTES), + r'(?<=[0-9{a}])(({q})|[\)\]])(?=\-[{a}])'.format(a=ALPHA, q=QUOTES), ] ) - __all__ = ["TOKENIZER_PREFIXES", "TOKENIZER_SUFFIXES", "TOKENIZER_INFIXES"] diff --git a/spacy/tests/hu/test_tokenizer.py b/spacy/tests/hu/test_tokenizer.py index 4536d6658..e4d40c195 100644 --- a/spacy/tests/hu/test_tokenizer.py +++ b/spacy/tests/hu/test_tokenizer.py @@ -248,7 +248,7 @@ WIKI_TESTS = [ ('"(...)"–sokkal ', ['"', '(', '...', ')', '"', '–sokkal']), ] -TESTCASES = DEFAULT_TESTS + DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS # + WIKI_TESTS +TESTCASES = DEFAULT_TESTS + DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS + WIKI_TESTS @pytest.mark.parametrize('text,expected_tokens', TESTCASES)