From 55dab77de88d60640bc553297c1206106157110d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 17 May 2017 13:13:48 +0200 Subject: [PATCH] Add conversion rule for .conll --- spacy/cli/convert.py | 3 ++- spacy/cli/converters/conllu2json.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index 6672995c4..c9a0510a8 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -12,7 +12,8 @@ from ..util import prints # from /converters. CONVERTERS = { - '.conllu': conllu2json + '.conllu': conllu2json, + '.conll': conllu2json } diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py index 5ea01e711..618810584 100644 --- a/spacy/cli/converters/conllu2json.py +++ b/spacy/cli/converters/conllu2json.py @@ -28,6 +28,7 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False): sentences = [] output_filename = input_path.parts[-1].replace(".conllu", ".json") + output_filename = input_path.parts[-1].replace(".conll", ".json") output_file = output_path / output_filename with output_file.open('w', encoding='utf-8') as f: f.write(json_dumps(docs)) @@ -46,8 +47,8 @@ def read_conllx(input_path, use_morphology=False, n=0): tokens = [] for line in lines: - id_, word, lemma, pos, tag, morph, head, dep, _1, \ - _2 = line.split('\t') + parts = line.split('\t') + id_, word, lemma, pos, tag, morph, head, dep, _1, _2 = parts if '-' in id_ or '.' in id_: continue try: