mirror of https://github.com/explosion/spaCy.git
* Allow training documents to be filtered in gold.pyx
This commit is contained in:
parent
15e177d7a1
commit
b643cb3d5c
|
@ -121,7 +121,7 @@ def _min_edit_path(cand_words, gold_words):
|
||||||
return prev_costs[n_gold], previous_row[-1]
|
return prev_costs[n_gold], previous_row[-1]
|
||||||
|
|
||||||
|
|
||||||
def read_json_file(loc):
|
def read_json_file(loc, docs_filter=None):
|
||||||
print loc
|
print loc
|
||||||
if path.isdir(loc):
|
if path.isdir(loc):
|
||||||
for filename in os.listdir(loc):
|
for filename in os.listdir(loc):
|
||||||
|
@ -130,6 +130,8 @@ def read_json_file(loc):
|
||||||
with open(loc) as file_:
|
with open(loc) as file_:
|
||||||
docs = ujson.load(file_)
|
docs = ujson.load(file_)
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
if docs_filter is not None and not docs_filter(doc):
|
||||||
|
continue
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
for paragraph in doc['paragraphs']:
|
for paragraph in doc['paragraphs']:
|
||||||
sents = []
|
sents = []
|
||||||
|
|
Loading…
Reference in New Issue