* Exclude empty sentences in prepare_treebank

This commit is contained in:
Matthew Honnibal 2015-05-31 01:12:46 +02:00
parent 87d6551d19
commit 4d8d490547
1 changed files with 7 additions and 6 deletions

View File

@ -60,12 +60,13 @@ def format_doc(file_id, raw_paras, ptb_text, dep_text, ner_text):
else: else:
doc['paragraphs'] = [] doc['paragraphs'] = []
for raw_sents in raw_paras: for raw_sents in raw_paras:
doc['paragraphs'].append( para = format_para(
format_para(
' '.join(raw_sents).replace('<SEP>', ''), ' '.join(raw_sents).replace('<SEP>', ''),
ptb_sents[i:i+len(raw_sents)], ptb_sents[i:i+len(raw_sents)],
dep_sents[i:i+len(raw_sents)], dep_sents[i:i+len(raw_sents)],
ner_sents[i:i+len(raw_sents)])) ner_sents[i:i+len(raw_sents)])
if para['sentences']:
doc['paragraphs'].append(para)
i += len(raw_sents) i += len(raw_sents)
return doc return doc