mirror of https://github.com/explosion/spaCy.git
Fix loading of morphology exceptions
This commit is contained in:
parent
bb98d45a63
commit
b78cc318c3
|
@ -30,6 +30,7 @@ cdef class Morphology:
|
|||
cdef public object n_tags
|
||||
cdef public object reverse_index
|
||||
cdef public object tag_names
|
||||
cdef public object exc
|
||||
|
||||
cdef RichTagC* rich_tags
|
||||
cdef PreshMapArray _cache
|
||||
|
|
|
@ -33,7 +33,7 @@ def _normalize_props(props):
|
|||
|
||||
|
||||
cdef class Morphology:
|
||||
def __init__(self, StringStore string_store, tag_map, lemmatizer):
|
||||
def __init__(self, StringStore string_store, tag_map, lemmatizer, exc=None):
|
||||
self.mem = Pool()
|
||||
self.strings = string_store
|
||||
self.tag_map = {}
|
||||
|
@ -53,9 +53,14 @@ cdef class Morphology:
|
|||
self.rich_tags[i].pos = attrs[POS]
|
||||
self.reverse_index[self.rich_tags[i].name] = i
|
||||
self._cache = PreshMapArray(self.n_tags)
|
||||
self.exc = {}
|
||||
if exc is not None:
|
||||
for (tag_str, orth_str), attrs in exc.items():
|
||||
self.add_special_case(tag_str, orth_str, attrs)
|
||||
|
||||
def __reduce__(self):
|
||||
return (Morphology, (self.strings, self.tag_map, self.lemmatizer), None, None)
|
||||
return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
|
||||
self.exc), None, None)
|
||||
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1:
|
||||
if isinstance(tag, basestring):
|
||||
|
@ -106,6 +111,7 @@ cdef class Morphology:
|
|||
tag (unicode): The part-of-speech tag to key the exception.
|
||||
orth (unicode): The word-form to key the exception.
|
||||
"""
|
||||
self.exc[(tag_str, orth_str)] = dict(attrs)
|
||||
tag = self.strings.add(tag_str)
|
||||
tag_id = self.reverse_index[tag]
|
||||
orth = self.strings[orth_str]
|
||||
|
|
|
@ -286,7 +286,8 @@ class NeuralTagger(object):
|
|||
cdef Vocab vocab = self.vocab
|
||||
if new_tag_map:
|
||||
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
||||
vocab.morphology.lemmatizer)
|
||||
vocab.morphology.lemmatizer,
|
||||
exc=vocab.morphology.exc)
|
||||
token_vector_width = pipeline[0].model.nO
|
||||
if self.model is True:
|
||||
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
|
||||
|
@ -322,7 +323,9 @@ class NeuralTagger(object):
|
|||
tag_map = msgpack.loads(b, encoding='utf8')
|
||||
self.vocab.morphology = Morphology(
|
||||
self.vocab.strings, tag_map=tag_map,
|
||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||
exc=self.vocab.morphology.exc)
|
||||
|
||||
deserialize = OrderedDict((
|
||||
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||
('tag_map', load_tag_map),
|
||||
|
@ -354,7 +357,9 @@ class NeuralTagger(object):
|
|||
tag_map = msgpack.loads(file_.read(), encoding='utf8')
|
||||
self.vocab.morphology = Morphology(
|
||||
self.vocab.strings, tag_map=tag_map,
|
||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||
exc=self.vocab.morphology.exc)
|
||||
|
||||
|
||||
deserialize = OrderedDict((
|
||||
('vocab', lambda p: self.vocab.from_disk(p)),
|
||||
|
|
Loading…
Reference in New Issue