From a005ccd6d7b0d62018481cd5f0ffe34d7fb51ab3 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Sun, 31 May 2020 19:57:54 +0200 Subject: [PATCH] Preserve _SP when filtering tag map in Tagger To allow "SP" as a tag (for Chinese OntoNotes), preserve "_SP" if present as the reference `SPACE` POS in the tag map in `Tagger.begin_training()`. --- spacy/pipeline/pipes.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index ccd847ef1..105ce00e6 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -526,6 +526,8 @@ class Tagger(Pipe): new_tag_map[tag] = orig_tag_map[tag] else: new_tag_map[tag] = {POS: X} + if "_SP" in orig_tag_map: + new_tag_map["_SP"] = orig_tag_map["_SP"] cdef Vocab vocab = self.vocab if new_tag_map: vocab.morphology = Morphology(vocab.strings, new_tag_map,