From d8d0ce081b9bc438ad9243b286a320200a4f21c9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 30 Dec 2018 15:48:10 +0100 Subject: [PATCH] Fix clobber of doc.is_tagged in doc.from_array() If doc.from_array() was called with say, only entity information, this would cause doc.is_tagged to be set to False, even if tags were set. This caused tags to be dropped from serialisation. The same was true for doc.is_parsed. Closes #3012. --- spacy/tokens/doc.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index b3b137cbe..9e10c8d1f 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -708,8 +708,8 @@ cdef class Doc: if array[i, col] != 0: self.vocab.morphology.assign_tag(&tokens[i], array[i, col]) # set flags - self.is_parsed = bool(HEAD in attrs or DEP in attrs) - self.is_tagged = bool(TAG in attrs or POS in attrs) + self.is_parsed = bool(self.is_parsed or HEAD in attrs or DEP in attrs) + self.is_tagged = bool(self.is_tagged or TAG in attrs or POS in attrs) # if document is parsed, set children if self.is_parsed: set_children_from_heads(self.c, self.length)