From 834dfb0e9da3d2aef4b0aa4af9464983ee625ca6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 25 Sep 2018 21:32:05 +0200 Subject: [PATCH] Add morph attribute to GoldParse --- spacy/gold.pxd | 1 + spacy/gold.pyx | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/spacy/gold.pxd b/spacy/gold.pxd index a1550b1ef..fdf6f5440 100644 --- a/spacy/gold.pxd +++ b/spacy/gold.pxd @@ -24,6 +24,7 @@ cdef class GoldParse: cdef public int loss cdef public list words cdef public list tags + cdef public list morph cdef public list heads cdef public list labels cdef public dict orths diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 20a319f5d..c9be6d6f1 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -399,7 +399,7 @@ cdef class GoldParse: return cls(doc, words=words, tags=tags, heads=heads, deps=deps, entities=entities, make_projective=make_projective) - def __init__(self, doc, annot_tuples=None, words=None, tags=None, + def __init__(self, doc, annot_tuples=None, words=None, tags=None, morph=None, heads=None, deps=None, entities=None, make_projective=False, cats=None, **_): """Create a GoldParse. @@ -436,6 +436,8 @@ cdef class GoldParse: deps = [None for _ in doc] if entities is None: entities = [None for _ in doc] + if morph is None: + morph = [None for _ in doc] elif len(entities) == 0: entities = ['O' for _ in doc] elif not isinstance(entities[0], basestring): @@ -460,6 +462,7 @@ cdef class GoldParse: self.heads = [None] * len(doc) self.labels = [None] * len(doc) self.ner = [None] * len(doc) + self.morph = [None] * len(doc) # This needs to be done before we align the words if make_projective and heads is not None and deps is not None: @@ -487,10 +490,12 @@ cdef class GoldParse: self.heads[i] = None self.labels[i] = None self.ner[i] = 'O' + self.morph[i] = set() if gold_i is None: if i in i2j_multi: self.words[i] = words[i2j_multi[i]] self.tags[i] = tags[i2j_multi[i]] + self.morph[i] = morph[i2j_multi[i]] is_last = i2j_multi[i] != i2j_multi.get(i+1) is_first = i2j_multi[i] != i2j_multi.get(i-1) # Set next word in multi-token span as head, until last