From 68d1cdad625e6e5387912ac08f48f0d9b2f7058c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 7 Nov 2014 04:42:31 +1100 Subject: [PATCH] * When encoding POS/NER tags, accept '-' as a missing value --- spacy/pos_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/pos_util.py b/spacy/pos_util.py index 039e6b15d..e5716665e 100644 --- a/spacy/pos_util.py +++ b/spacy/pos_util.py @@ -36,6 +36,8 @@ def read_gold(file_, tag_list, col): return golds def _encode_pos(tag, tag_ids, tag_list): + if tag == '-': + return 0 if tag not in tag_ids: tag_ids[tag] = len(tag_list) tag_list.append(tag)