Set extensions when write_conllu() is called in UD train script (#4618)

* Set extensions when write_conllu() is called

`run_eval.py` uses the `write_conllu()` function from `ud_train.py` by
itself, so it needs to set the token extensions if necessary.

* Switch from try to if
This commit is contained in:
adrianeboyd 2019-11-11 16:25:03 +01:00 committed by Ines Montani
parent 0b9a5f4074
commit f415e9b7d1
1 changed files with 7 additions and 0 deletions

View File

@ -221,6 +221,13 @@ def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
def write_conllu(docs, file_):
if not Token.has_extension("get_conllu_lines"):
Token.set_extension("get_conllu_lines", method=get_token_conllu)
if not Token.has_extension("begins_fused"):
Token.set_extension("begins_fused", default=False)
if not Token.has_extension("inside_fused"):
Token.set_extension("inside_fused", default=False)
merger = Matcher(docs[0].vocab)
merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
for i, doc in enumerate(docs):