From e170faf5b0d949dde0beb35e02e790d9678f8b67 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 3 Dec 2014 11:05:15 +1100 Subject: [PATCH] * Hack Tokens to work without tagger.pyx --- spacy/tokens.pxd | 7 +++---- spacy/tokens.pyx | 13 +++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd index 36dee698e..2c97a3163 100644 --- a/spacy/tokens.pxd +++ b/spacy/tokens.pxd @@ -4,9 +4,8 @@ cimport numpy as np from cymem.cymem cimport Pool from .lexeme cimport Lexeme -from .typedefs cimport flag_t +from .typedefs cimport flags_t from .utf8string cimport StringStore -from .tagger cimport TagType from thinc.typedefs cimport atom_t @@ -29,7 +28,7 @@ cdef class Tokens: cdef int extend(self, int i, Lexeme** lexemes, int n) except -1 cdef int push_back(self, int i, Lexeme* lexeme) except -1 - cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1 + cpdef int set_tag(self, int i, int tag_type, int tag) except -1 cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features) @@ -56,4 +55,4 @@ cdef class Token: cdef public float prob - cdef public flag_t flags + cdef public flags_t flags diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index ba8812f2e..e8e016944 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -1,7 +1,9 @@ # cython: profile=True from .lexeme cimport * cimport cython -from .tagger cimport POS, ENTITY + +POS = 0 +ENTITY = 0 DEF PADDING = 5 @@ -96,7 +98,7 @@ cdef class Tokens: idx = self.push_back(idx, lexemes[i]) return idx - cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1: + cpdef int set_tag(self, int i, int tag_type, int tag) except -1: if tag_type == POS: self.pos[i] = tag elif tag_type == ENTITY: @@ -108,8 +110,7 @@ cdef class Tokens: output = np.ndarray(shape=(self.length, len(features)), dtype=int) for i in range(self.length): for j, feature in enumerate(features): - output[i, j] = self.lex[i].sic - #output[i, j] = lexeme_get_feature(self.lex[i], feature) + output[i, j] = get_attr(self.lex[i], feature) return output def _realloc(self, new_size): @@ -140,8 +141,8 @@ cdef class Token: self.cluster = lex['cluster'] self.length = lex['length'] - self.postype = lex['postype'] - self.sensetype = lex['supersense'] + self.postype = lex['pos_type'] + self.sensetype = lex['sense_type'] self.sic = lex['sic'] self.norm = lex['norm'] self.shape = lex['shape']