mirror of https://github.com/explosion/spaCy.git
Use null raw for has_unknown_spaces in docs_to_json
This commit is contained in:
parent
1f49300862
commit
a93d42861d
|
@ -20,7 +20,8 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
json_doc = {"id": doc_id, "paragraphs": []}
|
json_doc = {"id": doc_id, "paragraphs": []}
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
json_para = {'raw': doc.text, "sentences": [], "cats": [], "entities": [], "links": []}
|
raw = None if doc.has_unknown_spaces else doc.text
|
||||||
|
json_para = {'raw': raw, "sentences": [], "cats": [], "entities": [], "links": []}
|
||||||
for cat, val in doc.cats.items():
|
for cat, val in doc.cats.items():
|
||||||
json_cat = {"label": cat, "value": val}
|
json_cat = {"label": cat, "value": val}
|
||||||
json_para["cats"].append(json_cat)
|
json_para["cats"].append(json_cat)
|
||||||
|
|
Loading…
Reference in New Issue