From c91577db028e3343e7280f0614f7bd89451f93f0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 8 Mar 2019 19:03:17 +0100 Subject: [PATCH] Add set_morphology cfg option for Tagger --- spacy/pipeline/pipes.pyx | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index b3c3db04d..237d36a12 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -357,6 +357,14 @@ class Tagger(Pipe): self.cfg = OrderedDict(sorted(cfg.items())) self.cfg.setdefault("cnn_maxout_pieces", 2) + @property + def set_morphology(self): + return self.cfg.get("set_morphology", True) + + @property.setter + def set_morphology(self, value): + return self.cfg["set_morphology"] = True + @property def labels(self): return tuple(self.vocab.morphology.tag_names) @@ -410,12 +418,13 @@ class Tagger(Pipe): doc_tag_ids = doc_tag_ids.get() for j, tag_id in enumerate(doc_tag_ids): # Don't clobber preset POS tags - if doc.c[j].tag == 0 and doc.c[j].pos == 0: - # Don't clobber preset lemmas - lemma = doc.c[j].lemma - vocab.morphology.assign_tag_id(&doc.c[j], tag_id) - if lemma != 0 and lemma != doc.c[j].lex.orth: - doc.c[j].lemma = lemma + if doc.c[j].tag == 0: + if doc.c[j].pos == 0 and self.set_morphology: + # Don't clobber preset lemmas + lemma = doc.c[j].lemma + vocab.morphology.assign_tag_id(&doc.c[j], tag_id) + if lemma != 0 and lemma != doc.c[j].lex.orth: + doc.c[j].lemma = lemma idx += 1 if tensors is not None and len(tensors): if isinstance(doc.tensor, numpy.ndarray) \