mirror of https://github.com/explosion/spaCy.git
Fix vocab deserialization when loading already present lexemes (#3383)
* Fix vocab deserialization bug. Closes #2153 * Un-xfail test for #2153
This commit is contained in:
parent
d6eaa71afc
commit
27dd820753
|
@ -68,7 +68,6 @@ def test_serialize_vocab_lex_attrs_bytes(strings, lex_attr):
|
|||
assert vocab2[strings[0]].norm_ == lex_attr
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.parametrize("strings,lex_attr", test_strings_attrs)
|
||||
def test_deserialize_vocab_seen_entries(strings, lex_attr):
|
||||
# Reported in #2153
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf8
|
||||
# cython: profile=True
|
||||
from __future__ import unicode_literals
|
||||
from libc.string cimport memcpy
|
||||
|
||||
import numpy
|
||||
import srsly
|
||||
|
@ -518,7 +519,10 @@ cdef class Vocab:
|
|||
for j in range(sizeof(lex_data.data)):
|
||||
lex_data.data[j] = bytes_ptr[i+j]
|
||||
Lexeme.c_from_bytes(lexeme, lex_data)
|
||||
|
||||
prev_entry = self._by_orth.get(lexeme.orth)
|
||||
if prev_entry != NULL:
|
||||
memcpy(prev_entry, lexeme, sizeof(LexemeC))
|
||||
continue
|
||||
ptr = self.strings._map.get(lexeme.orth)
|
||||
if ptr == NULL:
|
||||
continue
|
||||
|
|
Loading…
Reference in New Issue