From 6208d322d383455ea91c1e30b2c834a08e2cbbf0 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 2 Jun 2020 19:47:30 +0200 Subject: [PATCH] slightly more challenging unit test --- spacy/tests/test_util.py | 4 ++-- spacy/util.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py index 93201eb4b..a0c6ab6c0 100644 --- a/spacy/tests/test_util.py +++ b/spacy/tests/test_util.py @@ -12,8 +12,8 @@ from spacy.util import minibatch_by_words ([400, 400, 199], [3]), ([400, 400, 199, 3], [4]), ([400, 400, 199, 3, 1], [5]), - ([400, 400, 199, 3, 250], [3, 2]), - ([400, 400, 199, 3, 1, 250], [3, 3]), + ([400, 400, 199, 3, 200], [3, 2]), + ([400, 400, 199, 3, 1, 200], [3, 3]), ], ) def test_util_minibatch(doc_sizes, expected_batches): diff --git a/spacy/util.py b/spacy/util.py index 8ac2fd370..b4e6f7fb1 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -682,13 +682,13 @@ def minibatch_by_words(examples, size, count_words=len, tolerance=0.2, discard_o if not discard_oversize: yield [example] - # add the example to the current batch if it still fits - elif (current_size + n_words) < target_size: + # add the example to the current batch if it still fits and there's no overflow yet + elif overflow_size == 0 and (current_size + n_words) < target_size: batch.append(example) current_size += n_words # add the example to the overflow buffer if it fits in the tolerance margins - elif (current_size + n_words) < (target_size + tol_size): + elif (current_size + overflow_size + n_words) < (target_size + tol_size): overflow.append(example) overflow_size += n_words