* Ensure Morphology can be pickled, to address Issue #125.

This commit is contained in:
Matthew Honnibal 2015-10-12 15:27:47 +11:00
parent dfe0ad51ff
commit 5ca57bd859
3 changed files with 22 additions and 0 deletions

View File

@ -25,6 +25,7 @@ cdef class Morphology:
cdef readonly Pool mem cdef readonly Pool mem
cdef readonly StringStore strings cdef readonly StringStore strings
cdef public object lemmatizer cdef public object lemmatizer
cdef readonly object tag_map
cdef public object n_tags cdef public object n_tags
cdef public object reverse_index cdef public object reverse_index
cdef public object tag_names cdef public object tag_names

View File

@ -14,6 +14,7 @@ cdef class Morphology:
def __init__(self, StringStore string_store, tag_map, lemmatizer): def __init__(self, StringStore string_store, tag_map, lemmatizer):
self.mem = Pool() self.mem = Pool()
self.strings = string_store self.strings = string_store
self.tag_map = tag_map
self.lemmatizer = lemmatizer self.lemmatizer = lemmatizer
self.n_tags = len(tag_map) + 1 self.n_tags = len(tag_map) + 1
self.tag_names = tuple(sorted(tag_map.keys())) self.tag_names = tuple(sorted(tag_map.keys()))
@ -28,6 +29,9 @@ cdef class Morphology:
self.reverse_index[self.rich_tags[i].name] = i self.reverse_index[self.rich_tags[i].name] = i
self._cache = PreshMapArray(self.n_tags) self._cache = PreshMapArray(self.n_tags)
def __reduce__(self):
return (Morphology, (self.strings, self.tag_map, self.lemmatizer), None, None)
cdef int assign_tag(self, TokenC* token, tag) except -1: cdef int assign_tag(self, TokenC* token, tag) except -1:
cdef int tag_id cdef int tag_id
if isinstance(tag, basestring): if isinstance(tag, basestring):

View File

@ -0,0 +1,17 @@
import pytest
import pickle
import StringIO
from spacy.morphology import Morphology
from spacy.lemmatizer import Lemmatizer
from spacy.strings import StringStore
def test_pickle():
morphology = Morphology(StringStore(), {}, Lemmatizer({}, {}, {}))
file_ = StringIO.StringIO()
pickle.dump(morphology, file_)