diff --git a/bin/ud/ud_train.py b/bin/ud/ud_train.py index 2784d7c3c..ddd87a31c 100644 --- a/bin/ud/ud_train.py +++ b/bin/ud/ud_train.py @@ -221,6 +221,13 @@ def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None): def write_conllu(docs, file_): + if not Token.has_extension("get_conllu_lines"): + Token.set_extension("get_conllu_lines", method=get_token_conllu) + if not Token.has_extension("begins_fused"): + Token.set_extension("begins_fused", default=False) + if not Token.has_extension("inside_fused"): + Token.set_extension("inside_fused", default=False) + merger = Matcher(docs[0].vocab) merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}]) for i, doc in enumerate(docs):