From b132b3caa69e7f0e35c46f4b6c1d536a189c158f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 5 Jan 2015 11:53:54 +1100 Subject: [PATCH] * Fix unicode error in lemmatizer --- spacy/en/lemmatizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/en/lemmatizer.py b/spacy/en/lemmatizer.py index 5883e12c8..2c77168ab 100644 --- a/spacy/en/lemmatizer.py +++ b/spacy/en/lemmatizer.py @@ -68,6 +68,7 @@ class Lemmatizer(object): def lemmatize(string, index, exceptions, rules): + assert isinstance(string, unicode) string = string.lower() forms = [] if string in index: @@ -77,6 +78,7 @@ def lemmatize(string, index, exceptions, rules): if string.endswith(old): form = string[:len(string) - len(old)] + new if form in index: + assert isinstance(form, unicode) forms.append(form) if not forms: forms.append(string)