mirror of https://github.com/explosion/spaCy.git
Fix non-clobbering lemmatization
This commit is contained in:
parent
63c6ae4191
commit
31babe3c3f
|
@ -109,8 +109,7 @@ cdef class Morphology:
|
||||||
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
||||||
self.tag_map.get(tag_str, {}))
|
self.tag_map.get(tag_str, {}))
|
||||||
self._cache.set(tag_id, token.lex.orth, analysis)
|
self._cache.set(tag_id, token.lex.orth, analysis)
|
||||||
if token.lemma == 0:
|
token.lemma = analysis.lemma
|
||||||
token.lemma = analysis.lemma
|
|
||||||
token.pos = analysis.tag.pos
|
token.pos = analysis.tag.pos
|
||||||
token.tag = analysis.tag.name
|
token.tag = analysis.tag.name
|
||||||
token.morph = analysis.tag.morph
|
token.morph = analysis.tag.morph
|
||||||
|
|
|
@ -412,7 +412,11 @@ class Tagger(Pipe):
|
||||||
for j, tag_id in enumerate(doc_tag_ids):
|
for j, tag_id in enumerate(doc_tag_ids):
|
||||||
# Don't clobber preset POS tags
|
# Don't clobber preset POS tags
|
||||||
if doc.c[j].tag == 0 and doc.c[j].pos == 0:
|
if doc.c[j].tag == 0 and doc.c[j].pos == 0:
|
||||||
|
# Don't clobber preset lemmas
|
||||||
|
lemma = doc.c[j].lemma
|
||||||
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
||||||
|
if lemma != 0:
|
||||||
|
doc.c[j].lemma = lemma
|
||||||
idx += 1
|
idx += 1
|
||||||
if tensors is not None:
|
if tensors is not None:
|
||||||
if isinstance(doc.tensor, numpy.ndarray) \
|
if isinstance(doc.tensor, numpy.ndarray) \
|
||||||
|
|
Loading…
Reference in New Issue