From f415e9b7d1d7e2eda64e99653b734387005a5bb0 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Mon, 11 Nov 2019 16:25:03 +0100 Subject: [PATCH] Set extensions when write_conllu() is called in UD train script (#4618) * Set extensions when write_conllu() is called `run_eval.py` uses the `write_conllu()` function from `ud_train.py` by itself, so it needs to set the token extensions if necessary. * Switch from try to if --- bin/ud/ud_train.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/ud/ud_train.py b/bin/ud/ud_train.py index 2784d7c3c..ddd87a31c 100644 --- a/bin/ud/ud_train.py +++ b/bin/ud/ud_train.py @@ -221,6 +221,13 @@ def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None): def write_conllu(docs, file_): + if not Token.has_extension("get_conllu_lines"): + Token.set_extension("get_conllu_lines", method=get_token_conllu) + if not Token.has_extension("begins_fused"): + Token.set_extension("begins_fused", default=False) + if not Token.has_extension("inside_fused"): + Token.set_extension("inside_fused", default=False) + merger = Matcher(docs[0].vocab) merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}]) for i, doc in enumerate(docs):