From b6b5908f5e9ca4e6a2e46ca42a2d370b00119d44 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 26 May 2020 14:50:53 +0200 Subject: [PATCH] Prefer _SP over SP for default tag map space attrs If `_SP` is already in the tag map, use the mapping from `_SP` instead of `SP` so that `SP` can be a valid non-space tag. (Chinese has a non-space tag `SP` which was overriding the mapping of `_SP` to `SPACE`.) --- spacy/morphology.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index c146094a9..a9bab38ed 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -152,7 +152,10 @@ cdef class Morphology: self.tags = PreshMap() # Add special space symbol. We prefix with underscore, to make sure it # always sorts to the end. - space_attrs = tag_map.get('SP', {POS: SPACE}) + if '_SP' in tag_map: + space_attrs = tag_map.get('_SP') + else: + space_attrs = tag_map.get('SP', {POS: SPACE}) if '_SP' not in tag_map: self.strings.add('_SP') tag_map = dict(tag_map)