From 5b6ed0575275e86762cc58dab7b01b7fb2a97b63 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 26 Jun 2020 11:38:37 +0200 Subject: [PATCH] attempt to fix _guess_spaces --- spacy/gold/example.pyx | 4 +--- spacy/tests/test_gold.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 76247100d..4834eba83 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -73,7 +73,7 @@ cdef class Example: tok_dict["ORTH"] = [tok.text for tok in predicted] tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted] if not _has_field(tok_dict, "SPACY"): - spaces = _guess_spaces(predicted.text, tok_dict["ORTH"]) + tok_dict["SPACY"] = _guess_spaces(predicted.text, tok_dict["ORTH"]) return Example( predicted, annotations2doc(predicted.vocab, tok_dict, doc_dict) @@ -333,8 +333,6 @@ def _fix_legacy_dict_data(example_dict): else: raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=remapping.keys())) text = example_dict.get("text", example_dict.get("raw")) - if not _has_field(token_dict, "SPACY"): - token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"]) if "HEAD" in token_dict and "SENT_START" in token_dict: # If heads are set, we don't also redundantly specify SENT_START. token_dict.pop("SENT_START") diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index 463066406..0bde8c990 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -161,6 +161,7 @@ def test_example_from_dict_no_ner(en_vocab): ner_tags = example.get_aligned_ner() assert ner_tags == [None, None, None, None] + def test_example_from_dict_some_ner(en_vocab): words = ["a", "b", "c", "d"] spaces = [True, True, False, True]