From 9bf22a94aa45c09033c13660139d4b8df60c6292 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 29 May 2017 17:52:36 -0500 Subject: [PATCH] Fix tag set serialisation --- spacy/vocab.pyx | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 5659d7181..45c9e1a07 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -292,10 +292,11 @@ cdef class Vocab: **exclude: Named attributes to prevent from being serialized. RETURNS (bytes): The serialized form of the `Vocab` object. """ - getters = { - 'strings': lambda: self.strings.to_bytes(), - 'lexemes': lambda: self.lexemes_to_bytes() - } + getters = OrderedDict(( + ('strings', lambda: self.strings.to_bytes()), + ('lexemes', lambda: self.lexemes_to_bytes()), + ('tag_map', lambda: self.morphology.tag_map), + )) return util.to_bytes(getters, exclude) def from_bytes(self, bytes_data, **exclude): @@ -305,9 +306,13 @@ cdef class Vocab: **exclude: Named attributes to prevent from being loaded. RETURNS (Vocab): The `Vocab` object. """ + def set_tag_map(tag_map): + self.morphology = Morphology(self.strings, tag_map, + self.morphology.lemmatizer) setters = OrderedDict(( ('strings', lambda b: self.strings.from_bytes(b)), - ('lexemes', lambda b: self.lexemes_from_bytes(b)) + ('lexemes', lambda b: self.lexemes_from_bytes(b)), + ('tag_map', lambda b: set_tag_map(b)) )) return util.from_bytes(bytes_data, setters, exclude)