diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index c9a20f6c0..2d4e9af9d 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -171,7 +171,7 @@ class DocBin: "tokens": tokens.tobytes("C"), "spaces": spaces.tobytes("C"), "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"), - "strings": list(self.strings), + "strings": list(sorted(self.strings)), "cats": self.cats, "flags": self.flags, }