From 87e9487d76c1eabd722fbcc10b4fcae50dff3107 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 17 Dec 2014 21:10:12 +1100 Subject: [PATCH] * Work on parser --- setup.py | 10 ++++++++++ spacy/tokens.pxd | 2 ++ spacy/tokens.pyx | 7 +++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 827d44fc6..f8d9ad120 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,16 @@ exts = [ Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes), Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++", include_dirs=includes), + + Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++", + include_dirs=includes), + Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++", + include_dirs=includes), + Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++", + include_dirs=includes), + Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++", + include_dirs=includes), + #Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes), #Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes), #Extension("spacy.ner.bilou_moves", ["spacy/ner/bilou_moves.pyx"], language="c++", include_dirs=includes), diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index 1b1b43cef..9a0e09f92 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -56,6 +56,8 @@ cdef class Token: cdef public int idx cdef int pos cdef int lemma + cdef public int head + cdef public int dep_tag cdef public atom_t id cdef public atom_t cluster diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 617feb269..29ee28c32 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -51,7 +51,8 @@ cdef class Tokens: def __getitem__(self, i): bounds_check(i, self.length, PADDING) return Token(self.lang, i, self.data[i].idx, self.data[i].pos, - self.data[i].lemma, self.data[i].lex[0]) + self.data[i].lemma, self.data[i].head, self.data[i].dep_tag, + self.data[i].lex[0]) def __iter__(self): for i in range(self.length): @@ -115,11 +116,13 @@ cdef class Tokens: @cython.freelist(64) cdef class Token: def __init__(self, Language lang, int i, int idx, - int pos, int lemma, dict lex): + int pos, int lemma, int head, int dep_tag, dict lex): self.lang = lang self.idx = idx self.pos = pos self.i = i + self.head = head + self.dep_tag = dep_tag self.id = lex['id'] self.lemma = lemma