2014-12-19 19:54:03 +00:00
|
|
|
from libcpp.vector cimport vector
|
|
|
|
from preshed.maps cimport PreshMap
|
|
|
|
from cymem.cymem cimport Pool
|
|
|
|
from murmurhash.mrmr cimport hash64
|
|
|
|
|
2015-07-22 02:49:39 +00:00
|
|
|
from .structs cimport LexemeC, TokenC
|
2015-07-22 23:18:19 +00:00
|
|
|
from .typedefs cimport utf8_t, attr_t, hash_t
|
2014-12-19 19:54:03 +00:00
|
|
|
from .strings cimport StringStore
|
2015-08-26 17:21:03 +00:00
|
|
|
from .morphology cimport Morphology
|
2014-12-19 19:54:03 +00:00
|
|
|
|
|
|
|
|
2015-01-11 23:26:22 +00:00
|
|
|
cdef LexemeC EMPTY_LEXEME
|
2014-12-24 06:42:00 +00:00
|
|
|
|
|
|
|
|
2014-12-19 19:54:03 +00:00
|
|
|
cdef union LexemesOrTokens:
|
2015-01-11 23:26:22 +00:00
|
|
|
const LexemeC* const* lexemes
|
2015-08-28 00:02:33 +00:00
|
|
|
const TokenC* tokens
|
2014-12-19 19:54:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
cdef struct _Cached:
|
|
|
|
LexemesOrTokens data
|
|
|
|
bint is_lex
|
|
|
|
int length
|
|
|
|
|
|
|
|
|
|
|
|
cdef class Vocab:
|
|
|
|
cdef Pool mem
|
|
|
|
cpdef readonly StringStore strings
|
2017-05-30 21:34:50 +00:00
|
|
|
cpdef public Morphology morphology
|
|
|
|
cpdef public object vectors
|
2019-08-22 12:21:32 +00:00
|
|
|
cpdef public object lookups
|
2020-07-22 11:42:59 +00:00
|
|
|
cpdef public object writing_system
|
2020-07-22 20:18:46 +00:00
|
|
|
cpdef public object get_noun_chunks
|
2015-07-18 20:42:15 +00:00
|
|
|
cdef readonly int length
|
2015-07-22 23:18:19 +00:00
|
|
|
cdef public object data_dir
|
2016-09-25 12:49:53 +00:00
|
|
|
cdef public object lex_attr_getters
|
2017-10-30 15:08:50 +00:00
|
|
|
cdef public object cfg
|
2014-12-19 19:54:03 +00:00
|
|
|
|
2015-07-22 02:49:39 +00:00
|
|
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
2015-07-22 23:18:19 +00:00
|
|
|
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
2015-08-28 00:02:33 +00:00
|
|
|
cdef const TokenC* make_fused_token(self, substrings) except NULL
|
2017-05-30 21:34:50 +00:00
|
|
|
|
2015-08-23 18:49:18 +00:00
|
|
|
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
2015-01-13 13:03:48 +00:00
|
|
|
cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
|
2015-09-06 17:45:15 +00:00
|
|
|
cdef const LexemeC* _new_lexeme(self, Pool mem, unicode string) except NULL
|
2015-04-19 08:31:31 +00:00
|
|
|
|
2015-07-18 20:42:15 +00:00
|
|
|
cdef PreshMap _by_orth
|