From 0384f0821817014972b5bf8f062d94cd6ea22c2b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 7 Oct 2017 02:00:47 +0200 Subject: [PATCH] Trigger nonproj.deprojectivize as a postprocess --- spacy/language.py | 2 +- spacy/pipeline.pyx | 14 ++++++++++++++ spacy/syntax/nn_parser.pyx | 8 ++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/spacy/language.py b/spacy/language.py index a3152aea3..d40aee3ca 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -97,7 +97,7 @@ class Language(object): 'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp), 'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg), 'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg), - 'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg), # nonproj.deprojectivize, + 'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg), 'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg), 'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg), 'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 8d935335c..4d9adc609 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -28,6 +28,7 @@ from thinc.neural._classes.difference import Siamese, CauchySimilarity from .tokens.doc cimport Doc from .syntax.parser cimport Parser as LinearParser from .syntax.nn_parser cimport Parser as NeuralParser +from .syntax import nonproj from .syntax.parser import get_templates as get_feature_templates from .syntax.beam_parser cimport BeamParser from .syntax.ner cimport BiluoPushDown @@ -773,11 +774,19 @@ cdef class DependencyParser(LinearParser): if isinstance(label, basestring): label = self.vocab.strings[label] + @property + def postprocesses(self): + return [nonproj.deprojectivize] + cdef class NeuralDependencyParser(NeuralParser): name = 'parser' TransitionSystem = ArcEager + @property + def postprocesses(self): + return [nonproj.deprojectivize] + def init_multitask_objectives(self, gold_tuples, pipeline, **cfg): for target in []: labeller = NeuralLabeller(self.vocab, target=target) @@ -818,6 +827,11 @@ cdef class BeamDependencyParser(BeamParser): if isinstance(label, basestring): label = self.vocab.strings[label] + @property + def postprocesses(self): + return [nonproj.deprojectivize] + + __all__ = ['Tagger', 'DependencyParser', 'EntityRecognizer', 'BeamDependencyParser', 'BeamEntityRecognizer', 'TokenVectorEnoder'] diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 459c94463..f2c72a639 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -739,6 +739,14 @@ cdef class Parser: for i in range(doc.length): doc.c[i] = state.c._sent[i] self.moves.finalize_doc(doc) + for hook in self.postprocesses: + for doc in docs: + hook(doc) + + @property + def postprocesses(self): + # Available for subclasses, e.g. to deprojectivize + return [] def add_label(self, label): for action in self.moves.action_types: