* Fix tag handling in doc.merge, and assign sent_start when setting heads.

This commit is contained in:
Matthew Honnibal 2015-11-03 18:14:53 +11:00
parent 068222c09a
commit 09664177d7
1 changed files with 10 additions and 1 deletions

View File

@ -471,7 +471,10 @@ cdef class Doc:
# Update fields
token.lex = lex
token.spacy = self.data[end-1].spacy
self.vocab.morphology.assign_tag(token, self.vocab.strings[tag])
if tag in self.vocab.morphology.tag_map:
self.vocab.morphology.assign_tag(token, self.vocab.strings[tag])
else:
token.tag = self.vocab.strings[tag]
token.tag = self.vocab.strings[tag]
token.lemma = self.vocab.strings[lemma]
if ent_type == 'O':
@ -545,3 +548,9 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
if child.r_edge > head.r_edge:
head.r_edge = child.r_edge
head.r_kids += 1
# Set sentence starts
for i in range(length):
if tokens[i].head == 0 and tokens[i].dep != 0:
tokens[tokens[i].l_edge].sent_start = True