From a053b1218e577b2471e0c20db8f0e7df3643229e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 4 Jun 2017 20:18:20 -0500 Subject: [PATCH] Fix item counting during training --- spacy/gold.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 6b07592cc..0e5db8329 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -177,7 +177,7 @@ class GoldCorpus(object): gold_tuples = read_json_file(loc) for item in gold_tuples: yield item - i += 1 + i += len(item[1]) if self.limit and i >= self.limit: break @@ -194,8 +194,12 @@ class GoldCorpus(object): def count_train(self): n = 0 + i = 0 for raw_text, paragraph_tuples in self.train_tuples: - n += len(paragraph_tuples) + n += sum([len(s[0][1]) for s in paragraph_tuples]) + if self.limit and i >= self.limit: + break + i += len(paragraph_tuples) return n def train_docs(self, nlp, gold_preproc=False,