* Work on parser

This commit is contained in:
Matthew Honnibal 2014-12-17 21:10:12 +11:00
parent 9d7d97978d
commit 87e9487d76
3 changed files with 17 additions and 2 deletions

View File

@ -57,6 +57,16 @@ exts = [
Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes), Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes),
Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++", Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++",
include_dirs=includes), include_dirs=includes),
Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++",
include_dirs=includes),
Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++",
include_dirs=includes),
#Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes), #Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
#Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes), #Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),
#Extension("spacy.ner.bilou_moves", ["spacy/ner/bilou_moves.pyx"], language="c++", include_dirs=includes), #Extension("spacy.ner.bilou_moves", ["spacy/ner/bilou_moves.pyx"], language="c++", include_dirs=includes),

View File

@ -56,6 +56,8 @@ cdef class Token:
cdef public int idx cdef public int idx
cdef int pos cdef int pos
cdef int lemma cdef int lemma
cdef public int head
cdef public int dep_tag
cdef public atom_t id cdef public atom_t id
cdef public atom_t cluster cdef public atom_t cluster

View File

@ -51,7 +51,8 @@ cdef class Tokens:
def __getitem__(self, i): def __getitem__(self, i):
bounds_check(i, self.length, PADDING) bounds_check(i, self.length, PADDING)
return Token(self.lang, i, self.data[i].idx, self.data[i].pos, return Token(self.lang, i, self.data[i].idx, self.data[i].pos,
self.data[i].lemma, self.data[i].lex[0]) self.data[i].lemma, self.data[i].head, self.data[i].dep_tag,
self.data[i].lex[0])
def __iter__(self): def __iter__(self):
for i in range(self.length): for i in range(self.length):
@ -115,11 +116,13 @@ cdef class Tokens:
@cython.freelist(64) @cython.freelist(64)
cdef class Token: cdef class Token:
def __init__(self, Language lang, int i, int idx, def __init__(self, Language lang, int i, int idx,
int pos, int lemma, dict lex): int pos, int lemma, int head, int dep_tag, dict lex):
self.lang = lang self.lang = lang
self.idx = idx self.idx = idx
self.pos = pos self.pos = pos
self.i = i self.i = i
self.head = head
self.dep_tag = dep_tag
self.id = lex['id'] self.id = lex['id']
self.lemma = lemma self.lemma = lemma