mirror of https://github.com/explosion/spaCy.git
Fix lemmatization
This commit is contained in:
parent
ec41ceb383
commit
7b09a4ca49
|
@ -105,7 +105,6 @@ def lemmatize(string, index, exceptions, rules):
|
|||
oov_forms.append(form)
|
||||
# Remove duplicates, and sort forms generated by rules alphabetically.
|
||||
forms = list(set(forms))
|
||||
forms.sort()
|
||||
# Put exceptions at the front of the list, so they get priority.
|
||||
# This is a dodgy heuristic -- but it's the best we can do until we get
|
||||
# frequencies on this. We can at least prune out problematic exceptions,
|
||||
|
|
|
@ -176,7 +176,7 @@ cdef class Morphology:
|
|||
cdef list lemma_strings
|
||||
cdef unicode lemma_string
|
||||
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
|
||||
lemma_string = sorted(lemma_strings)[0]
|
||||
lemma_string = lemma_strings[0]
|
||||
lemma = self.strings.add(lemma_string)
|
||||
return lemma
|
||||
|
||||
|
|
Loading…
Reference in New Issue