Add support for sent_start to GoldParse

This commit is contained in:
Matthew Honnibal 2017-08-25 20:03:14 -05:00
parent 44589fb38c
commit 4bb6bc3f9e
2 changed files with 6 additions and 0 deletions

View File

@ -9,6 +9,7 @@ cdef struct GoldParseC:
int* tags
int* heads
int* has_dep
int* sent_start
attr_t* labels
int** brackets
Transition* ner

View File

@ -426,6 +426,7 @@ cdef class GoldParse:
self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
self.c.sent_start = <int*>self.mem.alloc(len(doc), sizeof(int))
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
self.cats = list(cats)
@ -482,6 +483,10 @@ cdef class GoldParse:
"""
return not nonproj.is_nonproj_tree(self.heads)
@property
def sent_starts(self):
return [self.c.sent_start[i] for i in range(self.length)]
def biluo_tags_from_offsets(doc, entities, missing='O'):
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out