* Allow training documents to be filtered in gold.pyx

This commit is contained in:
Matthew Honnibal 2015-06-12 02:42:08 +02:00
parent 15e177d7a1
commit b643cb3d5c
1 changed files with 3 additions and 1 deletions

View File

@ -121,7 +121,7 @@ def _min_edit_path(cand_words, gold_words):
return prev_costs[n_gold], previous_row[-1]
def read_json_file(loc):
def read_json_file(loc, docs_filter=None):
print loc
if path.isdir(loc):
for filename in os.listdir(loc):
@ -130,6 +130,8 @@ def read_json_file(loc):
with open(loc) as file_:
docs = ujson.load(file_)
for doc in docs:
if docs_filter is not None and not docs_filter(doc):
continue
paragraphs = []
for paragraph in doc['paragraphs']:
sents = []