From e237472cdcc32276d042ce56ed0ce3cef560b37e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 1 Nov 2017 21:25:33 +0100 Subject: [PATCH] Fix tag and filename conversion for conllu --- spacy/cli/converters/conllu2json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py index 4d3fb58e4..4dc789010 100644 --- a/spacy/cli/converters/conllu2json.py +++ b/spacy/cli/converters/conllu2json.py @@ -28,7 +28,7 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False): sentences = [] output_filename = input_path.parts[-1].replace(".conllu", ".json") - output_filename = input_path.parts[-1].replace(".conll", ".json") + output_filename = output_filename.parts[-1].replace(".conll", ".json") output_file = output_path / output_filename with output_file.open('w', encoding='utf-8') as f: f.write(json_dumps(docs)) @@ -55,6 +55,7 @@ def read_conllx(input_path, use_morphology=False, n=0): id_ = int(id_) - 1 head = (int(head) - 1) if head != '0' else id_ dep = 'ROOT' if dep == 'root' else dep + tag = pos if tag == '_' else tag tag = tag+'__'+morph if use_morphology else tag tokens.append((id_, word, tag, head, dep, 'O')) except: