mirror of https://github.com/explosion/spaCy.git
* Draft dockstring for HuffmanCache
This commit is contained in:
parent
281f1faefb
commit
8d0f1d98da
|
@ -44,14 +44,26 @@ cdef Code bit_append(Code code, bint bit) nogil:
|
||||||
|
|
||||||
|
|
||||||
cdef class HuffmanCodec:
|
cdef class HuffmanCodec:
|
||||||
|
"""Create a Huffman code table, and use it to pack and unpack sequences into
|
||||||
|
byte strings. Emphasis is on efficiency, so API is quite strict:
|
||||||
|
|
||||||
|
Messages will be encoded/decoded as indices that refer to the probability sequence.
|
||||||
|
For instance, the sequence [5, 10, 8] indicates the 5th most frequent item,
|
||||||
|
the 10th most frequent item, the 8th most frequent item. The codec will add
|
||||||
|
the EOL symbol to your message. An exception will be raised if you include
|
||||||
|
the EOL symbol in your message.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
probs (float[:]): A descending-sorted sequence of probabilities/weights.
|
||||||
|
Must include a weight for an EOL symbol.
|
||||||
|
|
||||||
|
eol (uint32_t): The index of the weight of the EOL symbol.
|
||||||
|
"""
|
||||||
cdef vector[Node] nodes
|
cdef vector[Node] nodes
|
||||||
cdef vector[Code] codes
|
cdef vector[Code] codes
|
||||||
cdef readonly float[:] probs
|
|
||||||
cdef PreshMap table
|
|
||||||
cdef uint32_t eol
|
cdef uint32_t eol
|
||||||
def __init__(self, probs, eol):
|
def __init__(self, float[:] probs, uint32_t eol):
|
||||||
self.eol = eol
|
self.eol = eol
|
||||||
self.probs = probs
|
|
||||||
self.codes.resize(len(probs))
|
self.codes.resize(len(probs))
|
||||||
for i in range(len(self.codes)):
|
for i in range(len(self.codes)):
|
||||||
self.codes[i].bits = 0
|
self.codes[i].bits = 0
|
||||||
|
@ -69,7 +81,7 @@ cdef class HuffmanCodec:
|
||||||
cdef uint64_t one = 1
|
cdef uint64_t one = 1
|
||||||
cdef unsigned char i_of_byte = 0
|
cdef unsigned char i_of_byte = 0
|
||||||
cdef unsigned char i_of_code = 0
|
cdef unsigned char i_of_code = 0
|
||||||
for index in sequence:
|
for index in list(sequence) + [self.eol]:
|
||||||
code = self.codes[index]
|
code = self.codes[index]
|
||||||
for i_of_code in range(code.length):
|
for i_of_code in range(code.length):
|
||||||
if code.bits & (one << i_of_code):
|
if code.bits & (one << i_of_code):
|
||||||
|
|
Loading…
Reference in New Issue