mirror of https://github.com/explosion/spaCy.git
* Fix unicode error in lemmatizer
This commit is contained in:
parent
775a66e2b6
commit
b132b3caa6
|
@ -68,6 +68,7 @@ class Lemmatizer(object):
|
||||||
|
|
||||||
|
|
||||||
def lemmatize(string, index, exceptions, rules):
|
def lemmatize(string, index, exceptions, rules):
|
||||||
|
assert isinstance(string, unicode)
|
||||||
string = string.lower()
|
string = string.lower()
|
||||||
forms = []
|
forms = []
|
||||||
if string in index:
|
if string in index:
|
||||||
|
@ -77,6 +78,7 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
if string.endswith(old):
|
if string.endswith(old):
|
||||||
form = string[:len(string) - len(old)] + new
|
form = string[:len(string) - len(old)] + new
|
||||||
if form in index:
|
if form in index:
|
||||||
|
assert isinstance(form, unicode)
|
||||||
forms.append(form)
|
forms.append(form)
|
||||||
if not forms:
|
if not forms:
|
||||||
forms.append(string)
|
forms.append(string)
|
||||||
|
|
Loading…
Reference in New Issue