From 8008e2f75b93505734018a072e4131650459ec3a Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 8 Apr 2021 13:22:38 +0200 Subject: [PATCH] Use morph hash in lemmatizer cache key (#7690) Use the morph hash rather than the `MorphAnalysis` object in the cache key so that the `Lemmatizer` can be pickled. --- spacy/pipeline/lemmatizer.py | 2 +- spacy/tests/pipeline/test_lemmatizer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 21f1a8a8b..cfe405efa 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -175,7 +175,7 @@ class Lemmatizer(Pipe): DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize """ - cache_key = (token.orth, token.pos, token.morph) + cache_key = (token.orth, token.pos, token.morph.key) if cache_key in self.cache: return self.cache[cache_key] string = token.text diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py index 1943d3dd7..3c16d3bcb 100644 --- a/spacy/tests/pipeline/test_lemmatizer.py +++ b/spacy/tests/pipeline/test_lemmatizer.py @@ -1,6 +1,7 @@ import pytest import logging import mock +import pickle from spacy import util, registry from spacy.lang.en import English from spacy.lookups import Lookups @@ -106,6 +107,9 @@ def test_lemmatizer_serialize(nlp): doc2 = nlp2.make_doc("coping") doc2[0].pos_ = "VERB" assert doc2[0].lemma_ == "" - doc2 = lemmatizer(doc2) + doc2 = lemmatizer2(doc2) assert doc2[0].text == "coping" assert doc2[0].lemma_ == "cope" + + # Make sure that lemmatizer cache can be pickled + b = pickle.dumps(lemmatizer2)