mirror of https://github.com/explosion/spaCy.git
Minor refactor to conversion of output docs (#5718)
Minor refactor of conversion of docs to output format to avoid duplicate conversion steps.
This commit is contained in:
parent
c1ea55307b
commit
ac4297ee39
|
@ -120,8 +120,12 @@ def convert(
|
||||||
no_print=silent,
|
no_print=silent,
|
||||||
ner_map=ner_map,
|
ner_map=ner_map,
|
||||||
)
|
)
|
||||||
|
if file_type == "json":
|
||||||
|
data = [docs_to_json(docs)]
|
||||||
|
else:
|
||||||
|
data = DocBin(docs=docs, store_user_data=True).to_bytes()
|
||||||
if output_dir == "-":
|
if output_dir == "-":
|
||||||
_print_docs_to_stdout(docs, file_type)
|
_print_docs_to_stdout(data, file_type)
|
||||||
else:
|
else:
|
||||||
if input_loc != input_path:
|
if input_loc != input_path:
|
||||||
subpath = input_loc.relative_to(input_path)
|
subpath = input_loc.relative_to(input_path)
|
||||||
|
@ -129,24 +133,23 @@ def convert(
|
||||||
else:
|
else:
|
||||||
output_file = Path(output_dir) / input_loc.parts[-1]
|
output_file = Path(output_dir) / input_loc.parts[-1]
|
||||||
output_file = output_file.with_suffix(f".{file_type}")
|
output_file = output_file.with_suffix(f".{file_type}")
|
||||||
_write_docs_to_file(docs, output_file, file_type)
|
_write_docs_to_file(data, output_file, file_type)
|
||||||
msg.good(f"Generated output file ({len(docs)} documents): {output_file}")
|
msg.good(f"Generated output file ({len(docs)} documents): {output_file}")
|
||||||
|
|
||||||
|
|
||||||
def _print_docs_to_stdout(docs, output_type):
|
def _print_docs_to_stdout(data, output_type):
|
||||||
if output_type == "json":
|
if output_type == "json":
|
||||||
srsly.write_json("-", [docs_to_json(docs)])
|
srsly.write_json("-", data)
|
||||||
else:
|
else:
|
||||||
sys.stdout.buffer.write(DocBin(docs=docs, store_user_data=True).to_bytes())
|
sys.stdout.buffer.write(data)
|
||||||
|
|
||||||
|
|
||||||
def _write_docs_to_file(docs, output_file, output_type):
|
def _write_docs_to_file(data, output_file, output_type):
|
||||||
if not output_file.parent.exists():
|
if not output_file.parent.exists():
|
||||||
output_file.parent.mkdir(parents=True)
|
output_file.parent.mkdir(parents=True)
|
||||||
if output_type == "json":
|
if output_type == "json":
|
||||||
srsly.write_json(output_file, [docs_to_json(docs)])
|
srsly.write_json(output_file, data)
|
||||||
else:
|
else:
|
||||||
data = DocBin(docs=docs, store_user_data=True).to_bytes()
|
|
||||||
with output_file.open("wb") as file_:
|
with output_file.open("wb") as file_:
|
||||||
file_.write(data)
|
file_.write(data)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue