Auto-format and fix typo and consistency

This commit is contained in:
Ines Montani 2019-09-18 19:18:30 +02:00
parent 931e96b6c7
commit 0f7fe5e7a7
1 changed files with 5 additions and 4 deletions

View File

@ -13,7 +13,7 @@ from ..attrs import SPACY, ORTH
class DocBin(object): class DocBin(object):
"""Pack Doc objects for binary serialization. """Pack Doc objects for binary serialization.
The DocBin class lets you efficiently serialize the information from a The DocBin class lets you efficiently serialize the information from a
collection of Doc objects. You can control which information is serialized collection of Doc objects. You can control which information is serialized
by passing a list of attribute IDs, and optionally also specify whether the by passing a list of attribute IDs, and optionally also specify whether the
@ -23,7 +23,7 @@ class DocBin(object):
The serialization format is gzipped msgpack, where the msgpack object has The serialization format is gzipped msgpack, where the msgpack object has
the following structure: the following structure:
{ {
"attrs": List[uint64], # e.g. [TAG, HEAD, ENT_IOB, ENT_TYPE] "attrs": List[uint64], # e.g. [TAG, HEAD, ENT_IOB, ENT_TYPE]
"tokens": bytes, # Serialized numpy uint64 array with the token data "tokens": bytes, # Serialized numpy uint64 array with the token data
@ -40,6 +40,7 @@ class DocBin(object):
A notable downside to this format is that you can't easily extract just one A notable downside to this format is that you can't easily extract just one
document from the pallet. document from the pallet.
""" """
def __init__(self, attrs=None, store_user_data=False): def __init__(self, attrs=None, store_user_data=False):
"""Create a DocBin object, to hold serialized annotations. """Create a DocBin object, to hold serialized annotations.
@ -145,8 +146,8 @@ def merge_bins(bins):
return b"" return b""
def pickle_bin(docbin): def pickle_bin(doc_bin):
return (unpickle_bin, (bin_.to_bytes(),)) return (unpickle_bin, (doc_bin.to_bytes(),))
def unpickle_bin(byte_string): def unpickle_bin(byte_string):