Fix item counting during training

This commit is contained in:
Matthew Honnibal 2017-06-04 20:18:20 -05:00
parent a0f4592f0a
commit a053b1218e
1 changed files with 6 additions and 2 deletions

View File

@ -177,7 +177,7 @@ class GoldCorpus(object):
gold_tuples = read_json_file(loc) gold_tuples = read_json_file(loc)
for item in gold_tuples: for item in gold_tuples:
yield item yield item
i += 1 i += len(item[1])
if self.limit and i >= self.limit: if self.limit and i >= self.limit:
break break
@ -194,8 +194,12 @@ class GoldCorpus(object):
def count_train(self): def count_train(self):
n = 0 n = 0
i = 0
for raw_text, paragraph_tuples in self.train_tuples: for raw_text, paragraph_tuples in self.train_tuples:
n += len(paragraph_tuples) n += sum([len(s[0][1]) for s in paragraph_tuples])
if self.limit and i >= self.limit:
break
i += len(paragraph_tuples)
return n return n
def train_docs(self, nlp, gold_preproc=False, def train_docs(self, nlp, gold_preproc=False,