From 0f7fe5e7a7fff016597a38e73b223e9170ab1ea8 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 18 Sep 2019 19:18:30 +0200 Subject: [PATCH] Auto-format and fix typo and consistency --- spacy/tokens/_serialize.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 54078fe60..8e4e24d46 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -13,7 +13,7 @@ from ..attrs import SPACY, ORTH class DocBin(object): """Pack Doc objects for binary serialization. - + The DocBin class lets you efficiently serialize the information from a collection of Doc objects. You can control which information is serialized by passing a list of attribute IDs, and optionally also specify whether the @@ -23,7 +23,7 @@ class DocBin(object): The serialization format is gzipped msgpack, where the msgpack object has the following structure: - + { "attrs": List[uint64], # e.g. [TAG, HEAD, ENT_IOB, ENT_TYPE] "tokens": bytes, # Serialized numpy uint64 array with the token data @@ -40,6 +40,7 @@ class DocBin(object): A notable downside to this format is that you can't easily extract just one document from the pallet. """ + def __init__(self, attrs=None, store_user_data=False): """Create a DocBin object, to hold serialized annotations. @@ -145,8 +146,8 @@ def merge_bins(bins): return b"" -def pickle_bin(docbin): - return (unpickle_bin, (bin_.to_bytes(),)) +def pickle_bin(doc_bin): + return (unpickle_bin, (doc_bin.to_bytes(),)) def unpickle_bin(byte_string):