diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py
new file mode 100644
index 000000000..382a8f548
--- /dev/null
+++ b/spacy/tests/test_util.py
@@ -0,0 +1,23 @@
+import pytest
+from spacy.gold import Example
+
+from .util import get_doc
+
+from spacy.util import minibatch_by_words
+
+
+@pytest.mark.parametrize(
+    "doc_sizes, expected_batches",
+    [
+        ([400, 400, 199], [3]),
+        ([400, 400, 199, 3], [4]),
+        ([400, 400, 199, 3, 250], [3, 2]),
+    ],
+)
+def test_util_minibatch(doc_sizes, expected_batches):
+    docs = [get_doc(doc_size) for doc_size in doc_sizes]
+
+    examples = [Example(doc=doc) for doc in docs]
+
+    batches = list(minibatch_by_words(examples=examples, size=1000))
+    assert [len(batch) for batch in batches] == expected_batches
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index e29342268..73650a6f7 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -92,6 +92,13 @@ def get_batch(batch_size):
     return docs
 
 
+def get_doc(n_words):
+    vocab = Vocab()
+    # Make the words numbers, so that they're easy to track.
+    numbers = [str(i) for i in range(0, n_words)]
+    return Doc(vocab, words=numbers)
+
+
 def apply_transition_sequence(parser, doc, sequence):
     """Perform a series of pre-specified transitions, to put the parser in a
     desired state."""