2014-07-07 02:29:24 +00:00
|
|
|
from libcpp.vector cimport vector
|
|
|
|
|
|
|
|
from spacy.spacy cimport StringHash
|
2014-07-07 10:47:21 +00:00
|
|
|
from spacy.spacy cimport Language
|
2014-07-07 14:58:48 +00:00
|
|
|
from spacy.spacy cimport Lexeme
|
|
|
|
from spacy.spacy cimport Lexeme_addr
|
2014-07-07 10:47:21 +00:00
|
|
|
from spacy.tokens cimport Tokens
|
2014-07-07 02:29:24 +00:00
|
|
|
|
|
|
|
|
2014-07-07 10:47:21 +00:00
|
|
|
cdef class EnglishPTB(Language):
|
2014-08-18 17:14:00 +00:00
|
|
|
cdef int find_split(self, unicode word)
|
2014-07-07 10:47:21 +00:00
|
|
|
|
2014-07-07 02:29:24 +00:00
|
|
|
|
2014-07-07 10:47:21 +00:00
|
|
|
cdef EnglishPTB EN_PTB
|
2014-07-07 02:29:24 +00:00
|
|
|
|
|
|
|
cpdef Lexeme_addr lookup(unicode word) except 0
|
2014-07-07 10:47:21 +00:00
|
|
|
cpdef Tokens tokenize(unicode string)
|
2014-07-07 02:29:24 +00:00
|
|
|
cpdef unicode unhash(StringHash hash_value)
|