From f2e162fc60dab95e16efbb7310e4745689cb886c Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 2 Jun 2020 19:59:04 +0200 Subject: [PATCH] it's only oversized if the tolerance level is also exceeded --- spacy/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/util.py b/spacy/util.py index 3f7a96a19..598545b84 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -678,7 +678,7 @@ def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_o n_words = count_words(example.doc) # if the current example exceeds the batch size, it is returned separately # but only if discard_oversize=False. - if n_words > target_size: + if n_words > target_size + tol_size: if not discard_oversize: yield [example]