mirror of https://github.com/explosion/spaCy.git
Fix loading of morphology exceptions
This commit is contained in:
parent
bb98d45a63
commit
b78cc318c3
|
@ -30,6 +30,7 @@ cdef class Morphology:
|
||||||
cdef public object n_tags
|
cdef public object n_tags
|
||||||
cdef public object reverse_index
|
cdef public object reverse_index
|
||||||
cdef public object tag_names
|
cdef public object tag_names
|
||||||
|
cdef public object exc
|
||||||
|
|
||||||
cdef RichTagC* rich_tags
|
cdef RichTagC* rich_tags
|
||||||
cdef PreshMapArray _cache
|
cdef PreshMapArray _cache
|
||||||
|
|
|
@ -33,7 +33,7 @@ def _normalize_props(props):
|
||||||
|
|
||||||
|
|
||||||
cdef class Morphology:
|
cdef class Morphology:
|
||||||
def __init__(self, StringStore string_store, tag_map, lemmatizer):
|
def __init__(self, StringStore string_store, tag_map, lemmatizer, exc=None):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self.strings = string_store
|
self.strings = string_store
|
||||||
self.tag_map = {}
|
self.tag_map = {}
|
||||||
|
@ -53,9 +53,14 @@ cdef class Morphology:
|
||||||
self.rich_tags[i].pos = attrs[POS]
|
self.rich_tags[i].pos = attrs[POS]
|
||||||
self.reverse_index[self.rich_tags[i].name] = i
|
self.reverse_index[self.rich_tags[i].name] = i
|
||||||
self._cache = PreshMapArray(self.n_tags)
|
self._cache = PreshMapArray(self.n_tags)
|
||||||
|
self.exc = {}
|
||||||
|
if exc is not None:
|
||||||
|
for (tag_str, orth_str), attrs in exc.items():
|
||||||
|
self.add_special_case(tag_str, orth_str, attrs)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
return (Morphology, (self.strings, self.tag_map, self.lemmatizer), None, None)
|
return (Morphology, (self.strings, self.tag_map, self.lemmatizer,
|
||||||
|
self.exc), None, None)
|
||||||
|
|
||||||
cdef int assign_tag(self, TokenC* token, tag) except -1:
|
cdef int assign_tag(self, TokenC* token, tag) except -1:
|
||||||
if isinstance(tag, basestring):
|
if isinstance(tag, basestring):
|
||||||
|
@ -106,6 +111,7 @@ cdef class Morphology:
|
||||||
tag (unicode): The part-of-speech tag to key the exception.
|
tag (unicode): The part-of-speech tag to key the exception.
|
||||||
orth (unicode): The word-form to key the exception.
|
orth (unicode): The word-form to key the exception.
|
||||||
"""
|
"""
|
||||||
|
self.exc[(tag_str, orth_str)] = dict(attrs)
|
||||||
tag = self.strings.add(tag_str)
|
tag = self.strings.add(tag_str)
|
||||||
tag_id = self.reverse_index[tag]
|
tag_id = self.reverse_index[tag]
|
||||||
orth = self.strings[orth_str]
|
orth = self.strings[orth_str]
|
||||||
|
|
|
@ -286,7 +286,8 @@ class NeuralTagger(object):
|
||||||
cdef Vocab vocab = self.vocab
|
cdef Vocab vocab = self.vocab
|
||||||
if new_tag_map:
|
if new_tag_map:
|
||||||
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
||||||
vocab.morphology.lemmatizer)
|
vocab.morphology.lemmatizer,
|
||||||
|
exc=vocab.morphology.exc)
|
||||||
token_vector_width = pipeline[0].model.nO
|
token_vector_width = pipeline[0].model.nO
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
|
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
|
||||||
|
@ -322,7 +323,9 @@ class NeuralTagger(object):
|
||||||
tag_map = msgpack.loads(b, encoding='utf8')
|
tag_map = msgpack.loads(b, encoding='utf8')
|
||||||
self.vocab.morphology = Morphology(
|
self.vocab.morphology = Morphology(
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
|
exc=self.vocab.morphology.exc)
|
||||||
|
|
||||||
deserialize = OrderedDict((
|
deserialize = OrderedDict((
|
||||||
('vocab', lambda b: self.vocab.from_bytes(b)),
|
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||||
('tag_map', load_tag_map),
|
('tag_map', load_tag_map),
|
||||||
|
@ -354,7 +357,9 @@ class NeuralTagger(object):
|
||||||
tag_map = msgpack.loads(file_.read(), encoding='utf8')
|
tag_map = msgpack.loads(file_.read(), encoding='utf8')
|
||||||
self.vocab.morphology = Morphology(
|
self.vocab.morphology = Morphology(
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer)
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
|
exc=self.vocab.morphology.exc)
|
||||||
|
|
||||||
|
|
||||||
deserialize = OrderedDict((
|
deserialize = OrderedDict((
|
||||||
('vocab', lambda p: self.vocab.from_disk(p)),
|
('vocab', lambda p: self.vocab.from_disk(p)),
|
||||||
|
|
Loading…
Reference in New Issue