mirror of https://github.com/explosion/spaCy.git
Add support for sent_start to GoldParse
This commit is contained in:
parent
44589fb38c
commit
4bb6bc3f9e
|
@ -9,6 +9,7 @@ cdef struct GoldParseC:
|
|||
int* tags
|
||||
int* heads
|
||||
int* has_dep
|
||||
int* sent_start
|
||||
attr_t* labels
|
||||
int** brackets
|
||||
Transition* ner
|
||||
|
|
|
@ -426,6 +426,7 @@ cdef class GoldParse:
|
|||
self.c.heads = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||
self.c.labels = <attr_t*>self.mem.alloc(len(doc), sizeof(attr_t))
|
||||
self.c.has_dep = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||
self.c.sent_start = <int*>self.mem.alloc(len(doc), sizeof(int))
|
||||
self.c.ner = <Transition*>self.mem.alloc(len(doc), sizeof(Transition))
|
||||
|
||||
self.cats = list(cats)
|
||||
|
@ -482,6 +483,10 @@ cdef class GoldParse:
|
|||
"""
|
||||
return not nonproj.is_nonproj_tree(self.heads)
|
||||
|
||||
@property
|
||||
def sent_starts(self):
|
||||
return [self.c.sent_start[i] for i in range(self.length)]
|
||||
|
||||
|
||||
def biluo_tags_from_offsets(doc, entities, missing='O'):
|
||||
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out
|
||||
|
|
Loading…
Reference in New Issue