From 092ce4648e959453cbc25843f7d9afcb234b540e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 25 Sep 2020 22:20:44 +0200 Subject: [PATCH] Make DocBin output stable data (set iteration) --- spacy/tokens/_serialize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index c9a20f6c0..2d4e9af9d 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -171,7 +171,7 @@ class DocBin: "tokens": tokens.tobytes("C"), "spaces": spaces.tobytes("C"), "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"), - "strings": list(self.strings), + "strings": list(sorted(self.strings)), "cats": self.cats, "flags": self.flags, }