From 7b09a4ca499aaf27119933d5896682713e003876 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 5 Jul 2018 13:56:02 +0200 Subject: [PATCH] Fix lemmatization --- spacy/lemmatizer.py | 1 - spacy/morphology.pyx | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index 1738b5e5e..93121a0c5 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -105,7 +105,6 @@ def lemmatize(string, index, exceptions, rules): oov_forms.append(form) # Remove duplicates, and sort forms generated by rules alphabetically. forms = list(set(forms)) - forms.sort() # Put exceptions at the front of the list, so they get priority. # This is a dodgy heuristic -- but it's the best we can do until we get # frequencies on this. We can at least prune out problematic exceptions, diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index da6443b46..bd821d76f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -176,7 +176,7 @@ cdef class Morphology: cdef list lemma_strings cdef unicode lemma_string lemma_strings = self.lemmatizer(py_string, univ_pos, morphology) - lemma_string = sorted(lemma_strings)[0] + lemma_string = lemma_strings[0] lemma = self.strings.add(lemma_string) return lemma