2015-08-28 00:02:33 +00:00
|
|
|
from cymem.cymem cimport Pool
|
2018-09-24 22:35:59 +00:00
|
|
|
from preshed.maps cimport PreshMap, PreshMapArray
|
2015-08-28 00:02:33 +00:00
|
|
|
from libc.stdint cimport uint64_t
|
2018-09-24 21:57:41 +00:00
|
|
|
from murmurhash cimport mrmr
|
2015-08-28 00:02:33 +00:00
|
|
|
|
2019-03-07 13:03:07 +00:00
|
|
|
from .structs cimport TokenC, MorphAnalysisC
|
2015-08-27 07:16:11 +00:00
|
|
|
from .strings cimport StringStore
|
2018-09-24 21:57:41 +00:00
|
|
|
from .typedefs cimport hash_t, attr_t, flags_t
|
2015-08-28 00:02:33 +00:00
|
|
|
from .parts_of_speech cimport univ_pos_t
|
|
|
|
|
2015-10-10 11:10:58 +00:00
|
|
|
from . cimport symbols
|
2015-08-28 00:02:33 +00:00
|
|
|
|
2015-08-26 17:17:35 +00:00
|
|
|
cdef class Morphology:
|
2015-08-28 00:02:33 +00:00
|
|
|
cdef readonly Pool mem
|
2015-08-28 01:44:54 +00:00
|
|
|
cdef readonly StringStore strings
|
2018-09-24 21:57:41 +00:00
|
|
|
cdef PreshMap tags # Keyed by hash, value is pointer to tag
|
|
|
|
|
2015-08-27 07:16:11 +00:00
|
|
|
cdef public object lemmatizer
|
2015-10-12 04:27:47 +00:00
|
|
|
cdef readonly object tag_map
|
2018-09-24 22:35:59 +00:00
|
|
|
cdef readonly object tag_names
|
|
|
|
cdef readonly object reverse_index
|
|
|
|
cdef readonly object exc
|
2018-09-25 08:57:33 +00:00
|
|
|
cdef readonly PreshMapArray _cache
|
2018-09-24 22:35:59 +00:00
|
|
|
cdef readonly int n_tags
|
2015-08-28 00:02:33 +00:00
|
|
|
|
2018-09-25 19:07:08 +00:00
|
|
|
cpdef update(self, hash_t morph, features)
|
2019-03-07 13:03:07 +00:00
|
|
|
cdef hash_t insert(self, MorphAnalysisC tag) except 0
|
2018-09-24 21:57:41 +00:00
|
|
|
|
2017-10-11 01:22:49 +00:00
|
|
|
cdef int assign_untagged(self, TokenC* token) except -1
|
2015-08-28 00:02:33 +00:00
|
|
|
cdef int assign_tag(self, TokenC* token, tag) except -1
|
2016-11-04 18:19:09 +00:00
|
|
|
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
2015-08-26 17:17:35 +00:00
|
|
|
|
2018-09-25 13:18:21 +00:00
|
|
|
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
2019-03-07 17:33:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil
|
2019-03-07 23:08:35 +00:00
|
|
|
cdef attr_t get_field(const MorphAnalysisC* tag, int field) nogil
|
|
|
|
cdef list list_features(const MorphAnalysisC* tag)
|
2019-03-08 00:38:15 +00:00
|
|
|
cdef int attribute_to_field(unicode attribute)
|
2019-03-07 17:33:06 +00:00
|
|
|
|
2019-03-07 23:08:35 +00:00
|
|
|
cdef tag_to_json(const MorphAnalysisC* tag)
|