Use null raw for has_unknown_spaces in docs_to_json

This commit is contained in:
Adriane Boyd 2020-10-15 09:44:21 +02:00
parent 1f49300862
commit a93d42861d
1 changed files with 2 additions and 1 deletions

View File

@ -20,7 +20,8 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
docs = [docs]
json_doc = {"id": doc_id, "paragraphs": []}
for i, doc in enumerate(docs):
json_para = {'raw': doc.text, "sentences": [], "cats": [], "entities": [], "links": []}
raw = None if doc.has_unknown_spaces else doc.text
json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []}
for cat, val in doc.cats.items():
json_cat = {"label": cat, "value": val}
json_para["cats"].append(json_cat)