From 4d7f393fae83cb3a39e483740c10190f03effdd3 Mon Sep 17 00:00:00 2001 From: Wolfgang Seeker Date: Fri, 22 Apr 2016 16:32:27 +0200 Subject: [PATCH 1/2] don't require json-files to have syntactic annotation --- spacy/gold.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 12c18b1c4..8f326d234 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -151,9 +151,9 @@ def read_json_file(loc, docs_filter=None): for i, token in enumerate(sent['tokens']): words.append(token['orth']) ids.append(i) - tags.append(token['tag']) - heads.append(token['head'] + i) - labels.append(token['dep']) + tags.append(token.get('tag','-')) + heads.append(token.get('head',0) + i) + labels.append(token.get('dep','ROOT')) # Ensure ROOT label is case-insensitive if labels[-1].lower() == 'root': labels[-1] = 'ROOT' From b6b96b233c5e7f708dad13e1cbe6691eea278ebe Mon Sep 17 00:00:00 2001 From: Wolfgang Seeker Date: Mon, 2 May 2016 15:29:30 +0200 Subject: [PATCH 2/2] don't require read_json_file to expect particular annotations --- spacy/gold.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 8f326d234..c3badc60d 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -153,7 +153,7 @@ def read_json_file(loc, docs_filter=None): ids.append(i) tags.append(token.get('tag','-')) heads.append(token.get('head',0) + i) - labels.append(token.get('dep','ROOT')) + labels.append(token.get('dep','')) # Ensure ROOT label is case-insensitive if labels[-1].lower() == 'root': labels[-1] = 'ROOT'