* Improve de/serialize functions

This commit is contained in:
Matthew Honnibal 2015-07-16 01:26:35 +02:00
parent 0e07c1ed2a
commit 01fab6bb90
1 changed files with 18 additions and 17 deletions

View File

@ -380,7 +380,7 @@ cdef class Doc:
@staticmethod @staticmethod
def deserialize(Vocab vocab, bits): def deserialize(Vocab vocab, bits):
biterator = iter(bits) biterator = iter(bits)
ids = vocab.lex_codec.decode(bits) ids = vocab.codecs[0].decode(bits)
cdef Doc doc = Doc(vocab) cdef Doc doc = Doc(vocab)
cdef int id_ cdef int id_
for id_ in ids: for id_ in ids:
@ -388,21 +388,22 @@ cdef class Doc:
doc.push_back(vocab.lexemes.at(id_), is_spacy) doc.push_back(vocab.lexemes.at(id_), is_spacy)
cdef int i cdef int i
for codec in vocab.annotation_codecs: cdef attr_t value
for codec in vocab.codecs[1:]:
values = codec.decode(biterator) values = codec.decode(biterator)
if codec.attr_id == HEAD: if codec.id == HEAD:
for i, head in enumerate(values): for i, value in enumerate(values):
doc.data[i].head = head doc.data[i].head = value
elif codec.attr_id == TAG: elif codec.id == TAG:
for i, tag in enumerate(values): for i, value in enumerate(values):
doc.data[i].tag = tag doc.data[i].tag = value
elif codec.attr_id == DEP: elif codec.id == DEP:
for i, dep in enumerate(values): for i, value in enumerate(values):
doc.data[i].dep = dep doc.data[i].dep = value
elif codec.attr_id == ENT_IOB: elif codec.id == ENT_IOB:
for i, ent_iob in enumerate(values): for i, value in enumerate(values):
doc.data[i].ent_iob = ent_iob doc.data[i].ent_iob = value
elif codec.attr_id == ENT_TYPE: elif codec.id == ENT_TYPE:
for i, ent_type in enumerate(values): for i, value in enumerate(values):
doc.data[i].ent_type = ent_type doc.data[i].ent_type = value
return doc return doc