Fix #719: Lemmatizer can no longer output empty string

This commit is contained in:
Matthew Honnibal 2017-03-18 16:02:06 +01:00
parent ff277140f9
commit 413138de79
2 changed files with 3 additions and 2 deletions

View File

@ -78,7 +78,9 @@ def lemmatize(string, index, exceptions, rules):
for old, new in rules: for old, new in rules:
if string.endswith(old): if string.endswith(old):
form = string[:len(string) - len(old)] + new form = string[:len(string) - len(old)] + new
if form in index or not form.isalpha(): if not form:
pass
elif form in index or not form.isalpha():
forms.append(form) forms.append(form)
else: else:
oov_forms.append(form) oov_forms.append(form)

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import pytest import pytest
@pytest.mark.xfail
@pytest.mark.models @pytest.mark.models
@pytest.mark.parametrize('text', ["s..."]) @pytest.mark.parametrize('text', ["s..."])
def test_issue719(EN, text): def test_issue719(EN, text):