From 3cfeb518ee5a54742366ea5ad60ead420dcd8e3d Mon Sep 17 00:00:00 2001 From: Andrew Janco Date: Tue, 21 Dec 2021 09:46:33 -0500 Subject: [PATCH] Handle "_" value for token pos in conllu data (#9903) * change '_' to '' to allow Token.pos, when no value for token pos in conllu data * Minor code style Co-authored-by: Adriane Boyd --- spacy/training/converters/conllu_to_docs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/training/converters/conllu_to_docs.py b/spacy/training/converters/conllu_to_docs.py index 66156b6e5..7a4f44d3b 100644 --- a/spacy/training/converters/conllu_to_docs.py +++ b/spacy/training/converters/conllu_to_docs.py @@ -188,6 +188,7 @@ def conllu_sentence_to_doc( id_ = int(id_) - 1 head = (int(head) - 1) if head not in ("0", "_") else id_ tag = pos if tag == "_" else tag + pos = pos if pos != "_" else "" morph = morph if morph != "_" else "" dep = "ROOT" if dep == "root" else dep lemmas.append(lemma)