diff --git a/setup.py b/setup.py
index 5cae257b4..ec4ea52c2 100644
--- a/setup.py
+++ b/setup.py
@@ -94,6 +94,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
                                    "data/vocab/lexemes.bin",
                                    "data/vocab/strings.txt"],
                       "spacy.tokens": ["*.pxd"],
+                      "spacy.serialize": ["*.pxd"],
                       "spacy.syntax": ["*.pxd"]},
         ext_modules=exts,
         cmdclass={'build_ext': Cython.Distutils.build_ext},
@@ -158,8 +159,9 @@ MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
              'spacy.syntax.transition_system',
              'spacy.syntax.arc_eager',
              'spacy.syntax._parse_features',
-             'spacy.gold', 'spacy.orth', 'spacy.serialize',
+             'spacy.gold', 'spacy.orth',
              'spacy.tokens.doc', 'spacy.tokens.spans', 'spacy.tokens.token',
+             'spacy.serialize.packer', 'spacy.serialize.huffman', 'spacy.serialize.bits',
              'spacy.syntax.ner']
 
 
diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py
index d63fe92a2..2ee5e4d84 100644
--- a/spacy/en/__init__.py
+++ b/spacy/en/__init__.py
@@ -70,6 +70,7 @@ class English(object):
       Tagger=EnPosTagger,
       Parser=ParserFactory(ParserTransitionSystem),
       Entity=ParserFactory(EntityTransitionSystem),
+      Packer=None,
       load_vectors=True
     ):
         
@@ -101,10 +102,10 @@ class English(object):
             self.entity = Entity(self.vocab.strings, path.join(data_dir, 'ner'))
         else:
             self.entity = None
-        if Serializer:
-            self.bitter = Serializer(self.vocab, data_dir)
+        if Packer:
+            self.packer = Packer(self.vocab, data_dir)
         else:
-            self.bitter = None
+            self.packer = None
         self.mwe_merger = RegexMerger([
             ('IN', 'O', regexes.MW_PREPOSITIONS_RE),
             ('CD', 'TIME', regexes.TIME_RE),
diff --git a/spacy/serialize.pyx b/spacy/serialize.pyx
deleted file mode 100644
index de217f74e..000000000
--- a/spacy/serialize.pyx
+++ /dev/null
@@ -1,334 +0,0 @@
-from libcpp.vector cimport vector
-from libc.stdint cimport uint32_t
-from libc.stdint cimport int64_t
-from libc.stdint cimport int32_t
-from libc.stdint cimport uint64_t
-from libcpp.queue cimport priority_queue
-from libcpp.pair cimport pair
-
-from preshed.maps cimport PreshMap
-from murmurhash.mrmr cimport hash64
-from .tokens.doc cimport Doc
-from .vocab cimport Vocab
-
-from os import path
-import numpy
-
-cimport cython
-
-ctypedef unsigned char uchar
-
-# Format
-# - Total number of bytes in message (32 bit int)
-# - Words, terminating in an EOL symbol, huffman coded ~12 bits per word
-# - Spaces ~1 bit per word
-# - Parse: Huffman coded head offset / dep label / POS tag / entity IOB tag
-#          combo. ? bits per word. 40 * 80 * 40 * 12 = 1.5m symbol vocab
-
-
-# Note that we're setting the most significant bits here first, when in practice
-# we're actually wanting the last bit to be most significant (for Huffman coding,
-# anyway).
-cdef Code bit_append(Code code, bint bit) nogil:
-    cdef uint64_t one = 1
-    if bit:
-        code.bits |= one << code.length
-    else:
-        code.bits &= ~(one << code.length)
-    code.length += 1
-    return code
-
-
-cdef class BitArray:
-    cdef bytes data
-    cdef unsigned char byte
-    cdef unsigned char bit_of_byte
-    cdef uint32_t i
-    def __init__(self):
-        self.data = b''
-        self.byte = 0
-        self.bit_of_byte = 0
-        self.i = 0
-
-    def __iter__(self):
-        cdef uchar byte, i
-        cdef uchar one = 1
-        start_byte = self.i // 8
-        if (self.i % 8) != 0:
-            for i in range(self.i % 8):
-                yield 1 if (self.data[start_byte] & (one << i)) else 0
-            start_byte += 1
-        for byte in self.data[start_byte:]:
-            for i in range(8):
-                yield 1 if byte & (one << i) else 0
-        for i in range(self.bit_of_byte):
-            yield 1 if self.byte & (one << i) else 0
-
-    def as_bytes(self):
-        if self.bit_of_byte != 0:
-            return self.data + chr(self.byte)
-        else:
-            return self.data
-
-    def append(self, bint bit):
-        cdef uint64_t one = 1
-        if bit:
-            self.byte |= one << self.bit_of_byte
-        else:
-            self.byte &= ~(one << self.bit_of_byte)
-        self.bit_of_byte += 1
-        if self.bit_of_byte == 8:
-            self.data += chr(self.byte)
-            self.byte = 0
-            self.bit_of_byte = 0
-
-    cdef int extend(self, uint64_t code, char n_bits) except -1:
-        cdef uint64_t one = 1
-        cdef unsigned char bit_of_code
-        for bit_of_code in range(n_bits):
-            if code & (one << bit_of_code):
-                self.byte |= one << self.bit_of_byte
-            else:
-                self.byte &= ~(one << self.bit_of_byte)
-            self.bit_of_byte += 1
-            if self.bit_of_byte == 8:
-                self.data += chr(self.byte)
-                self.byte = 0
-                self.bit_of_byte = 0
-    
-
-cdef class Serializer:
-    # Manage codecs, maintain consistent format for io
-    def __init__(self, Vocab vocab, data_dir):
-        model_dir = path.join(data_dir, 'bitter')
-        self.vocab = vocab # Vocab owns the word codec, the big one
-        #self.cfg = Config.read(model_dir, 'config')
-        self.codecs = tuple([CodecWrapper(attr) for attr in self.cfg.attrs])
-
-    def __call__(self, doc_or_bits):
-        if isinstance(doc_or_bits, Doc):
-            return self.serialize(doc_or_bits)
-        elif isinstance(doc_or_bits, BitArray):
-            return self.deserialize(doc_or_bits)
-        else:
-            raise ValueError(doc_or_bits)
-
-    def train(self, doc):
-        array = doc.to_array([codec.id for codec in self.codecs])
-        for i, codec in enumerate(self.codecs):
-            codec.count(array[i]) 
-
-    def serialize(self, doc):
-        bits = BitArray()
-        array = doc.to_array(self.attrs)
-        for i, codec in enumerate(self.codecs):
-            codec.encode(array[i,], bits)
-        return bits
-
-    @cython.boundscheck(False)
-    def deserialize(self, bits):
-        biterator = iter(bits)
-        cdef Doc doc = Doc(self.vocab)
-        ids = self.vocab.codec.decode(biterator)
-        cdef int id_
-        cdef bint is_spacy
-        for id_ in ids:
-            is_spacy = biterator.next()
-            doc.push_back(self.vocab.lexemes.at(id_), is_spacy)
-
-        cdef int length = doc.length
-        array = numpy.zeros(shape=(length, len(self.codecs)), dtype=numpy.int)
-        for i, codec in enumerate(self.codecs):
-            array[i] = codec.decode(biterator)
-        doc.from_array([c.id for c in self.codecs], array)
-        return doc
-
-
-cdef class CodecWrapper:
-    """Wrapper around HuffmanCodec"""
-    def __init__(self, freqs, id=0):
-        cdef uint64_t key
-        cdef uint64_t count
-        cdef pair[uint64_t, uint64_t] item
-        cdef priority_queue[pair[uint64_t, uint64_t]] items
-        for key, count in freqs:
-            item.first = count
-            item.second = key
-            items.push(item)
-        
-        weights = [] #array('f')
-        keys = [] #array('i')
-        key_to_i = PreshMap()
-        i = 0
-        while not items.empty():
-            item = items.top()
-            weights.append(item.first)
-            keys.append(item.second)
-            key_to_i[item.second] = i
-            i += 1
-            items.pop()
-
-    def encode(self, symbols):
-        indices = [self.table[symbol] for symbol in symbols]
-        return self._codec.encode(indices)
-
-    def decode(self, bits):
-        indices = self._codec.decode(bits)
-        return [self.symbols[i] for i in indices]
-
-
-cdef class HuffmanCodec:
-    """Create a Huffman code table, and use it to pack and unpack sequences into
-    byte strings. Emphasis is on efficiency, so API is quite strict:
-
-    Messages will be encoded/decoded as indices that refer to the probability sequence.
-    For instance, the sequence [5, 10, 8] indicates the 5th most frequent item,
-    the 10th most frequent item, the 8th most frequent item.  The codec will add
-    the EOL symbol to your message. An exception will be raised if you include
-    the EOL symbol in your message.
-
-    Arguments:
-        weights (float[:]): A descending-sorted sequence of probabilities/weights.
-          Must include a weight for an EOL symbol.
-
-        eol (uint32_t): The index of the weight of the EOL symbol.
-    """
-    def __init__(self, float[:] weights, uint32_t eol):
-        self.codes.resize(len(weights))
-        for i in range(len(self.codes)):
-            self.codes[i].bits = 0
-            self.codes[i].length = 0
-        populate_nodes(self.nodes, weights)
-        cdef Code path
-        path.bits = 0
-        path.length = 0
-        assign_codes(self.nodes, self.codes, len(self.nodes) - 1, path)
-
-    def encode(self, uint32_t[:] sequence, BitArray bits=None):
-        if bits is None:
-            bits = BitArray()
-        for i in sequence:
-            bits.extend(self.codes[i].bits, self.codes[i].length) 
-        bits.extend(self.codes[self.eol].bits, self.codes[self.eol].length)
-        return bits
-
-    def decode(self, bits):
-        node = self.nodes.back()
-        symbols = []
-        for bit in bits:
-            branch = node.right if bit else node.left
-            if branch >= 0:
-                node = self.nodes.at(branch)
-            else:
-                symbol = -(branch + 1)
-                if symbol == self.eol:
-                    return symbols
-                else:
-                    symbols.append(symbol)
-                node = self.nodes.back()
-        return symbols
-
-    property strings:
-        @cython.boundscheck(False)
-        @cython.wraparound(False)
-        @cython.nonecheck(False)
-        def __get__(self):
-            output = []
-            cdef int i, j
-            cdef bytes string
-            cdef Code code
-            for i in range(self.codes.size()):
-                code = self.codes[i]
-                string = b'{0:b}'.format(code.bits).rjust(code.length, '0')
-                string = string[::-1]
-                output.append(string)
-            return output
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.nonecheck(False)
-cdef int populate_nodes(vector[Node]& nodes, float[:] probs) except -1:
-    assert len(probs) >= 3
-    cdef int size = len(probs)
-    cdef int i = size - 1
-    cdef int j = 0
-    
-    while i >= 0 or (j+1) < nodes.size():
-        if i < 0:
-            _cover_two_nodes(nodes, j)
-            j += 2
-        elif j >= nodes.size():
-            _cover_two_words(nodes, i, i-1, probs[i] + probs[i-1])
-            i -= 2
-        elif i >= 1 and (j == nodes.size() or probs[i-1] < nodes[j].prob):
-            _cover_two_words(nodes, i, i-1, probs[i] + probs[i-1])
-            i -= 2
-        elif (j+1) < nodes.size() and nodes[j+1].prob < probs[i]:
-            _cover_two_nodes(nodes, j)
-            j += 2
-        else:
-            _cover_one_word_one_node(nodes, j, i, probs[i])
-            i -= 1
-            j += 1
-    return 0
-
-cdef int _cover_two_nodes(vector[Node]& nodes, int j) nogil:
-    """Introduce a new non-terminal, over two non-terminals)"""
-    cdef Node node
-    node.left = j
-    node.right = j+1
-    node.prob = nodes[j].prob + nodes[j+1].prob
-    nodes.push_back(node)
-
-
-cdef int _cover_one_word_one_node(vector[Node]& nodes, int j, int id_, float prob) nogil:
-    """Introduce a new non-terminal, over one terminal and one non-terminal."""
-    cdef Node node
-    # Encode leaves as negative integers, where the integer is the index of the
-    # word in the vocabulary.
-    cdef int64_t leaf_id = - <int64_t>(id_ + 1)
-    cdef float new_prob = prob + nodes[j].prob
-    if prob < nodes[j].prob:
-        node.left = leaf_id
-        node.right = j
-        node.prob = new_prob
-    else:
-        node.left = j
-        node.right = leaf_id
-        node.prob = new_prob
-    nodes.push_back(node)
-
-
-cdef int _cover_two_words(vector[Node]& nodes, int id1, int id2, float prob) nogil:
-    """Introduce a new node, over two non-terminals."""
-    cdef Node node
-    node.left = -(id1+1)
-    node.right = -(id2+1)
-    node.prob = prob
-    nodes.push_back(node)
-
-
-cdef int assign_codes(vector[Node]& nodes, vector[Code]& codes, int i, Code path) except -1:
-    """Recursively assign paths, from the top down. At the end, the entry codes[i]
-    knows the bit-address of the node[j] that points to entry i in the vocabulary.
-    So, to encode i, we go to codes[i] and read its bit-string. To decode, we
-    navigate nodes recursively.
-    """
-    cdef Code left_path = bit_append(path, 0)
-    cdef Code right_path = bit_append(path, 1)
-    
-    # Assign down left branch
-    if nodes[i].left >= 0:
-        assign_codes(nodes, codes, nodes[i].left, left_path)
-    else:
-        # Leaf on left
-        id_ = -(nodes[i].left + 1)
-        codes[id_] = left_path
-    # Assign down right branch
-    if nodes[i].right >= 0:
-        assign_codes(nodes, codes, nodes[i].right, right_path)
-    else:
-        # Leaf on right
-        id_ = -(nodes[i].right + 1)
-        codes[id_] = right_path
diff --git a/spacy/serialize/bits.pxd b/spacy/serialize/bits.pxd
new file mode 100644
index 000000000..51ecf4c63
--- /dev/null
+++ b/spacy/serialize/bits.pxd
@@ -0,0 +1,21 @@
+from libc.stdint cimport uint64_t
+from libc.stdint cimport uint32_t
+
+ctypedef unsigned char uchar
+
+
+cdef struct Code:
+    uint64_t bits
+    char length
+
+
+cdef Code bit_append(Code code, bint bit) nogil
+
+
+cdef class BitArray:
+    cdef bytes data
+    cdef uchar byte
+    cdef uchar bit_of_byte
+    cdef uint32_t i
+    
+    cdef int extend(self, uint64_t code, char n_bits) except -1
diff --git a/spacy/serialize/bits.pyx b/spacy/serialize/bits.pyx
new file mode 100644
index 000000000..7df236537
--- /dev/null
+++ b/spacy/serialize/bits.pyx
@@ -0,0 +1,69 @@
+
+
+# Note that we're setting the most significant bits here first, when in practice
+# we're actually wanting the last bit to be most significant (for Huffman coding,
+# anyway).
+cdef Code bit_append(Code code, bint bit) nogil:
+    cdef uint64_t one = 1
+    if bit:
+        code.bits |= one << code.length
+    else:
+        code.bits &= ~(one << code.length)
+    code.length += 1
+    return code
+
+
+cdef class BitArray:
+    def __init__(self):
+        self.data = b''
+        self.byte = 0
+        self.bit_of_byte = 0
+        self.i = 0
+
+    def __iter__(self):
+        cdef uchar byte, i
+        cdef uchar one = 1
+        start_byte = self.i // 8
+        if (self.i % 8) != 0:
+            for i in range(self.i % 8):
+                yield 1 if (self.data[start_byte] & (one << i)) else 0
+            start_byte += 1
+        for byte in self.data[start_byte:]:
+            for i in range(8):
+                yield 1 if byte & (one << i) else 0
+        for i in range(self.bit_of_byte):
+            yield 1 if self.byte & (one << i) else 0
+
+    def as_bytes(self):
+        if self.bit_of_byte != 0:
+            return self.data + chr(self.byte)
+        else:
+            return self.data
+
+    def append(self, bint bit):
+        cdef uint64_t one = 1
+        if bit:
+            self.byte |= one << self.bit_of_byte
+        else:
+            self.byte &= ~(one << self.bit_of_byte)
+        self.bit_of_byte += 1
+        if self.bit_of_byte == 8:
+            self.data += chr(self.byte)
+            self.byte = 0
+            self.bit_of_byte = 0
+
+    cdef int extend(self, uint64_t code, char n_bits) except -1:
+        cdef uint64_t one = 1
+        cdef unsigned char bit_of_code
+        for bit_of_code in range(n_bits):
+            if code & (one << bit_of_code):
+                self.byte |= one << self.bit_of_byte
+            else:
+                self.byte &= ~(one << self.bit_of_byte)
+            self.bit_of_byte += 1
+            if self.bit_of_byte == 8:
+                self.data += chr(self.byte)
+                self.byte = 0
+                self.bit_of_byte = 0
+
+
diff --git a/spacy/serialize.pxd b/spacy/serialize/huffman.pxd
similarity index 63%
rename from spacy/serialize.pxd
rename to spacy/serialize/huffman.pxd
index d060382a4..c559c2c51 100644
--- a/spacy/serialize.pxd
+++ b/spacy/serialize/huffman.pxd
@@ -4,7 +4,7 @@ from libc.stdint cimport int64_t
 from libc.stdint cimport int32_t
 from libc.stdint cimport uint64_t
 
-from .vocab cimport Vocab
+from .bits cimport Code
 
 
 cdef struct Node:
@@ -13,19 +13,6 @@ cdef struct Node:
     int32_t right
 
 
-cdef struct Code:
-    uint64_t bits
-    char length
-
-
-cdef class Serializer:
-    cdef list codecs
-    cdef Vocab vocab
-
-
 cdef class HuffmanCodec:
     cdef vector[Node] nodes
     cdef vector[Code] codes
-    cdef uint32_t eol
-    cdef int id
-
diff --git a/spacy/serialize/huffman.pyx b/spacy/serialize/huffman.pyx
new file mode 100644
index 000000000..826ee4e29
--- /dev/null
+++ b/spacy/serialize/huffman.pyx
@@ -0,0 +1,157 @@
+cimport cython
+
+from .bits cimport bit_append
+from .bits cimport BitArray
+
+
+cdef class HuffmanCodec:
+    """Create a Huffman code table, and use it to pack and unpack sequences into
+    byte strings. Emphasis is on efficiency, so API is quite strict:
+
+    Messages will be encoded/decoded as indices that refer to the probability sequence.
+    For instance, the sequence [5, 10, 8] indicates the 5th most frequent item,
+    the 10th most frequent item, the 8th most frequent item.
+
+    Arguments:
+        weights (float[:]): A descending-sorted sequence of probabilities/weights.
+          Must include a weight for an EOL symbol.
+
+        eol (uint32_t): The index of the weight of the EOL symbol.
+    """
+    def __init__(self, float[:] weights):
+        self.codes.resize(len(weights))
+        for i in range(len(self.codes)):
+            self.codes[i].bits = 0
+            self.codes[i].length = 0
+        populate_nodes(self.nodes, weights)
+        cdef Code path
+        path.bits = 0
+        path.length = 0
+        assign_codes(self.nodes, self.codes, len(self.nodes) - 1, path)
+
+    def encode(self, uint32_t[:] msg, BitArray into_bits):
+        cdef uint32_t i
+        for i in range(len(msg)):
+            into_bits.extend(self.codes[msg[i]].bits, self.codes[msg[i]].length)
+
+    def decode(self, bits, uint32_t[:] into_msg):
+        node = self.nodes.back()
+        cdef int i = 0
+        cdef int n = len(into_msg)
+        for bit in bits:
+            branch = node.right if bit else node.left
+            if branch >= 0:
+                node = self.nodes.at(branch)
+            else:
+                into_msg[i] = -(branch + 1)
+                node = self.nodes.back()
+                i += 1
+                if i == n:
+                    break
+        else:
+            raise Exception
+
+    property strings:
+        @cython.boundscheck(False)
+        @cython.wraparound(False)
+        @cython.nonecheck(False)
+        def __get__(self):
+            output = []
+            cdef int i, j
+            cdef bytes string
+            cdef Code code
+            for i in range(self.codes.size()):
+                code = self.codes[i]
+                string = b'{0:b}'.format(code.bits).rjust(code.length, '0')
+                string = string[::-1]
+                output.append(string)
+            return output
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+cdef int populate_nodes(vector[Node]& nodes, float[:] probs) except -1:
+    assert len(probs) >= 3
+    cdef int size = len(probs)
+    cdef int i = size - 1
+    cdef int j = 0
+    
+    while i >= 0 or (j+1) < nodes.size():
+        if i < 0:
+            _cover_two_nodes(nodes, j)
+            j += 2
+        elif j >= nodes.size():
+            _cover_two_words(nodes, i, i-1, probs[i] + probs[i-1])
+            i -= 2
+        elif i >= 1 and (j == nodes.size() or probs[i-1] < nodes[j].prob):
+            _cover_two_words(nodes, i, i-1, probs[i] + probs[i-1])
+            i -= 2
+        elif (j+1) < nodes.size() and nodes[j+1].prob < probs[i]:
+            _cover_two_nodes(nodes, j)
+            j += 2
+        else:
+            _cover_one_word_one_node(nodes, j, i, probs[i])
+            i -= 1
+            j += 1
+    return 0
+
+cdef int _cover_two_nodes(vector[Node]& nodes, int j) nogil:
+    """Introduce a new non-terminal, over two non-terminals)"""
+    cdef Node node
+    node.left = j
+    node.right = j+1
+    node.prob = nodes[j].prob + nodes[j+1].prob
+    nodes.push_back(node)
+
+
+cdef int _cover_one_word_one_node(vector[Node]& nodes, int j, int id_, float prob) nogil:
+    """Introduce a new non-terminal, over one terminal and one non-terminal."""
+    cdef Node node
+    # Encode leaves as negative integers, where the integer is the index of the
+    # word in the vocabulary.
+    cdef int64_t leaf_id = - <int64_t>(id_ + 1)
+    cdef float new_prob = prob + nodes[j].prob
+    if prob < nodes[j].prob:
+        node.left = leaf_id
+        node.right = j
+        node.prob = new_prob
+    else:
+        node.left = j
+        node.right = leaf_id
+        node.prob = new_prob
+    nodes.push_back(node)
+
+
+cdef int _cover_two_words(vector[Node]& nodes, int id1, int id2, float prob) nogil:
+    """Introduce a new node, over two non-terminals."""
+    cdef Node node
+    node.left = -(id1+1)
+    node.right = -(id2+1)
+    node.prob = prob
+    nodes.push_back(node)
+
+
+cdef int assign_codes(vector[Node]& nodes, vector[Code]& codes, int i, Code path) except -1:
+    """Recursively assign paths, from the top down. At the end, the entry codes[i]
+    knows the bit-address of the node[j] that points to entry i in the vocabulary.
+    So, to encode i, we go to codes[i] and read its bit-string. To decode, we
+    navigate nodes recursively.
+    """
+    cdef Code left_path = bit_append(path, 0)
+    cdef Code right_path = bit_append(path, 1)
+    
+    # Assign down left branch
+    if nodes[i].left >= 0:
+        assign_codes(nodes, codes, nodes[i].left, left_path)
+    else:
+        # Leaf on left
+        id_ = -(nodes[i].left + 1)
+        codes[id_] = left_path
+    # Assign down right branch
+    if nodes[i].right >= 0:
+        assign_codes(nodes, codes, nodes[i].right, right_path)
+    else:
+        # Leaf on right
+        id_ = -(nodes[i].right + 1)
+        codes[id_] = right_path
diff --git a/spacy/serialize/packer.pxd b/spacy/serialize/packer.pxd
new file mode 100644
index 000000000..f6fca5c9e
--- /dev/null
+++ b/spacy/serialize/packer.pxd
@@ -0,0 +1,6 @@
+from ..vocab cimport Vocab
+
+
+cdef class Packer:
+    cdef tuple _codecs
+    cdef Vocab vocab
diff --git a/spacy/serialize/packer.pyx b/spacy/serialize/packer.pyx
new file mode 100644
index 000000000..2f9305646
--- /dev/null
+++ b/spacy/serialize/packer.pyx
@@ -0,0 +1,136 @@
+from libc.stdint cimport uint32_t
+from libc.stdint cimport uint64_t
+from libc.math cimport exp as c_exp
+from libcpp.queue cimport priority_queue
+from libcpp.pair cimport pair
+
+from cymem.cymem cimport Address, Pool
+from preshed.maps cimport PreshMap
+
+from ..attrs cimport ID, SPACY, TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
+from ..tokens.doc cimport Doc
+from ..vocab cimport Vocab
+from ..typedefs cimport attr_t
+from .bits cimport BitArray
+from .huffman cimport HuffmanCodec
+
+from os import path
+import numpy
+
+cimport cython
+
+
+# Format
+# - Total number of bytes in message (32 bit int) --- handled outside this
+# - Number of words (32 bit int)
+# - Words, terminating in an EOL symbol, huffman coded ~12 bits per word
+# - Spaces 1 bit per word
+# - Attributes:
+#       POS tag
+#       Head offset
+#       Dep label
+#       Entity IOB
+#       Entity tag
+
+
+def make_vocab_codec(Vocab vocab):
+    cdef int length = len(vocab)
+    cdef Address mem = Address(length, sizeof(float))
+    probs = <float*>mem.ptr
+    cdef int i
+    for i in range(length):
+        probs[i] = <float>c_exp(vocab.lexemes[i].prob)
+    cdef float[:] cv_probs = <float[:len(vocab)]>probs
+    return HuffmanCodec(cv_probs)
+
+
+cdef class _BinaryCodec:
+    def encode(self, src, bits):
+        cdef int i
+        for i in range(len(src)):
+            bits.append(src[i])
+
+    def decode(self, dest, bits, n):
+        for i in range(n):
+            dest[i] = bits.next()
+
+
+cdef class _AttributeCodec:
+    cdef Pool mem
+    cdef attr_t* _keys
+    cdef PreshMap _map
+    cdef HuffmanCodec _codec
+
+    def __init__(self, freqs):
+        cdef uint64_t key
+        cdef uint64_t count
+        cdef pair[uint64_t, uint64_t] item
+
+        cdef priority_queue[pair[uint64_t, uint64_t]] items
+
+        for key, count in freqs:
+            item.first = count
+            item.second = key
+            items.push(item)
+        weights = numpy.array(shape=(len(freqs),), dtype=numpy.float32)
+        self._keys = <attr_t*>self.mem.alloc(len(freqs), sizeof(attr_t))
+        self._map = PreshMap()
+        cdef int i = 0
+        while not items.empty():
+            item = items.top()
+            weights[i] = item.first
+            self._keys[i] = item.second
+            self._map[self.keys[i]] = i
+            items.pop()
+        self._codec = HuffmanCodec(weights)
+
+    def encode(self, attr_t[:] msg, BitArray into_bits):
+        for i in range(len(msg)):
+            msg[i] = self._map[msg[i]]
+        self._codec.encode(msg, into_bits)
+
+    def decode(self, BitArray bits, attr_t[:] into_msg):
+        cdef int i
+        self._codec.decode(bits, into_msg)
+        for i in range(len(into_msg)):
+            into_msg[i] = self._keys[into_msg[i]]
+
+
+cdef class Packer:
+    def __init__(self, Vocab vocab, list_of_attr_freqs):
+        self.vocab = vocab
+        codecs = []
+        self.attrs = []
+
+        for attr, freqs in list_of_attr_freqs:
+            if attr == ID:
+                codecs.append(make_vocab_codec(vocab))
+            elif attr == SPACY:
+                codecs.append(_BinaryCodec())
+            else:
+                codecs.append(_AttributeCodec(freqs))
+            self.attrs.append(attr)
+        self._codecs = tuple(codecs)
+
+    def __call__(self, msg_or_bits):
+        if isinstance(msg_or_bits, BitArray):
+            bits = msg_or_bits
+            return Doc.from_array(self.vocab, self.attrs, self.deserialize(bits))
+        else:
+            msg = msg_or_bits
+            return self.serialize(msg.to_array(self.attrs))
+
+    def serialize(self, array):
+        cdef BitArray bits = BitArray()
+        cdef uint32_t length = len(array)
+        bits.extend(length, 32)
+        for i, codec in enumerate(self._codecs):
+            codec.encode(array[i], bits)
+        return bits
+
+    def deserialize(self, bits):
+        cdef uint32_t length = bits.read(32)
+        array = numpy.ndarray(shape=(len(self.codecs), length), dtype=numpy.int)
+        for i, codec in enumerate(self.codecs):
+            array[i] = codec.decode(bits)
+        return array
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 6bf37cf36..392c78a45 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -4,7 +4,6 @@ from libc.string cimport memcpy, memset
 import numpy
 
 from ..lexeme cimport EMPTY_LEXEME
-from ..serialize import BitArray
 from ..strings cimport slice_unicode
 from ..typedefs cimport attr_t, flags_t
 from ..attrs cimport attr_id_t
@@ -371,10 +370,12 @@ cdef class Doc:
         return self[start]
 
     def from_array(self, attrs, array):
-        cdef int i
+        cdef int i, col
         cdef attr_id_t attr_id
         cdef TokenC* tokens = self.data
-        for attr_id in attrs:
+        cdef int length = len(array)
+        for col, attr_id in enumerate(attrs): 
+            values = array[:, col]
             if attr_id == HEAD:
                 for i in range(length):
                     tokens[i].head = values[i]
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 04db8fa30..df0f001be 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -35,5 +35,3 @@ cdef class Vocab:
 
     cdef PreshMap _map
     cdef readonly int repvec_length
-
-    cdef public object _codec
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 60719a9fe..018a42929 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,7 +1,6 @@
 from libc.stdio cimport fopen, fclose, fread, fwrite, FILE
 from libc.string cimport memset
 from libc.stdint cimport int32_t
-from libc.math cimport exp as c_exp
 
 import bz2
 from os import path
@@ -15,7 +14,6 @@ from .strings cimport slice_unicode
 from .strings cimport hash_string
 from .orth cimport word_shape
 from .typedefs cimport attr_t
-from .serialize cimport HuffmanCodec
 
 from cymem.cymem cimport Address
 
@@ -227,22 +225,6 @@ cdef class Vocab:
                 lex.repvec = EMPTY_VEC
         return vec_len
 
-    property codec:
-        def __get__(self):
-            cdef Address mem
-            cdef int i
-            cdef float[:] cv_probs
-            if self._codec is not None:
-                return self._codec
-            else:
-                mem = Address(len(self), sizeof(float))
-                probs = <float*>mem.ptr
-                for i in range(len(self)):
-                    probs[i] = <float>c_exp(self.lexemes[i].prob)
-                cv_probs = <float[:len(self)]>probs
-                self._codec = HuffmanCodec(cv_probs, 0)
-                return self._codec
-
 
 def write_binary_vectors(in_loc, out_loc):
     cdef _CFile out_file = _CFile(out_loc, 'wb')
diff --git a/tests/vocab/test_huffman.py b/tests/vocab/test_huffman.py
index 124431a66..188ebbc58 100644
--- a/tests/vocab/test_huffman.py
+++ b/tests/vocab/test_huffman.py
@@ -3,14 +3,15 @@ from __future__ import division
 
 import pytest
 
-from spacy.serialize import HuffmanCodec
+from spacy.serialize.huffman import HuffmanCodec
+from spacy.serialize.bits import BitArray
 import numpy
 
 from heapq import heappush, heappop, heapify
 from collections import defaultdict
 
 
-class Vocab(object):
+class MockPacker(object):
     def __init__(self, freqs):
         freqs['-eol-'] = 5
         total = sum(freqs.values())
@@ -19,15 +20,19 @@ class Vocab(object):
         self.symbols = [sym for sym, freq in by_freq]
         self.probs = numpy.array([item[1] / total for item in by_freq], dtype=numpy.float32)
         self.table = {sym: i for i, sym in enumerate(self.symbols)}
-        self.codec = HuffmanCodec(self.probs, self.table['-eol-'])
+        self.codec = HuffmanCodec(self.probs)
 
     def pack(self, message):
         seq = [self.table[sym] for sym in message]
-        return self.codec.encode(numpy.array(seq, dtype=numpy.uint32))
+        msg = numpy.array(seq, dtype=numpy.uint32)
+        bits = BitArray()
+        self.codec.encode(msg, bits)
+        return bits
 
-    def unpack(self, packed):
-        ids = self.codec.decode(packed)
-        return [self.symbols[i] for i in ids]
+    def unpack(self, bits, n):
+        msg = numpy.array(range(n), dtype=numpy.uint32)
+        self.codec.decode(bits, msg)
+        return [self.symbols[i] for i in msg]
 
  
 def py_encode(symb2freq):
@@ -60,7 +65,7 @@ def test1():
     probs[8] = 0.0001
     probs[9] = 0.000001
     
-    codec = HuffmanCodec(probs, 9)
+    codec = HuffmanCodec(probs)
     
     py_codes = py_encode(dict(enumerate(probs)))
     py_codes = py_codes.items()
@@ -71,19 +76,19 @@ def test1():
 def test_round_trip():
     freqs = {'the': 10, 'quick': 3, 'brown': 4, 'fox': 1, 'jumped': 5, 'over': 8,
             'lazy': 1, 'dog': 2, '.': 9}
-    vocab = Vocab(freqs)
+    packer = MockPacker(freqs)
 
     message = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the',
                 'the', 'lazy', 'dog', '.']
-    strings = list(vocab.codec.strings)
-    codes = {vocab.symbols[i]: strings[i] for i in range(len(vocab.symbols))}
-    packed = vocab.pack(message)
-    string = b''.join(b'{0:b}'.format(ord(c)).rjust(8, b'0')[::-1] for c in packed.as_bytes())
+    strings = list(packer.codec.strings)
+    codes = {packer.symbols[i]: strings[i] for i in range(len(packer.symbols))}
+    bits = packer.pack(message)
+    string = b''.join(b'{0:b}'.format(ord(c)).rjust(8, b'0')[::-1] for c in bits.as_bytes())
     for word in message:
         code = codes[word]
         assert string[:len(code)] == code
         string = string[len(code):]
-    unpacked = vocab.unpack(packed)
+    unpacked = packer.unpack(bits, len(message))
     assert message == unpacked
 
 
@@ -92,13 +97,12 @@ def test_rosetta():
     symb2freq = defaultdict(int)
     for ch in txt:
         symb2freq[ch] += 1
-    symb2freq['-eol-'] = 1
     by_freq = symb2freq.items()
     by_freq.sort(reverse=True, key=lambda item: item[1])
     symbols = [sym for sym, prob in by_freq]
     probs = numpy.array([prob for sym, prob in by_freq], dtype=numpy.float32)
 
-    codec = HuffmanCodec(probs, symbols.index('-eol-'))
+    codec = HuffmanCodec(probs)
     py_codec = py_encode(symb2freq)
 
     my_lengths = defaultdict(int)
@@ -112,6 +116,7 @@ def test_rosetta():
     assert my_exp_len == py_exp_len
 
 
+"""
 def test_vocab(EN):
     codec = EN.vocab.codec
     expected_length = 0
@@ -137,3 +142,4 @@ def test_freqs():
     for i, code in enumerate(codec.strings):
         expected_length += len(code) * freqs[i]
     assert 8 < expected_length < 14
+"""