diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 960467a0b..630334bf7 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -86,13 +86,16 @@ def lemmatize(string, index, exceptions, rules): #if string in index: # forms.append(string) forms.extend(exceptions.get(string, [])) + oov_forms = [] for old, new in rules: if string.endswith(old): form = string[:len(string) - len(old)] + new if form in index or not form.isalpha(): forms.append(form) + else: + oov_forms.append(form) if not forms: - forms.append(string) + forms.extend(oov_forms) return set(forms)