Fix tag and filename conversion for conllu

This commit is contained in:
Matthew Honnibal 2017-11-01 21:25:33 +01:00
parent e5eb5e5bf6
commit e237472cdc
1 changed files with 2 additions and 1 deletions

View File

@ -28,7 +28,7 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
sentences = []
output_filename = input_path.parts[-1].replace(".conllu", ".json")
output_filename = input_path.parts[-1].replace(".conll", ".json")
output_filename = output_filename.parts[-1].replace(".conll", ".json")
output_file = output_path / output_filename
with output_file.open('w', encoding='utf-8') as f:
f.write(json_dumps(docs))
@ -55,6 +55,7 @@ def read_conllx(input_path, use_morphology=False, n=0):
id_ = int(id_) - 1
head = (int(head) - 1) if head != '0' else id_
dep = 'ROOT' if dep == 'root' else dep
tag = pos if tag == '_' else tag
tag = tag+'__'+morph if use_morphology else tag
tokens.append((id_, word, tag, head, dep, 'O'))
except: