mirror of https://github.com/explosion/spaCy.git
Fix allocation of non-transient strings in StringStore
This commit is contained in:
parent
628c973db5
commit
ec6fbbfe34
|
@ -57,16 +57,20 @@ cdef class Morphology:
|
||||||
field_feature_pairs = []
|
field_feature_pairs = []
|
||||||
for field in sorted(string_features):
|
for field in sorted(string_features):
|
||||||
values = string_features[field]
|
values = string_features[field]
|
||||||
|
self.strings.add(field, allow_transient=False),
|
||||||
|
field_id = self.strings[field]
|
||||||
for value in values.split(self.VALUE_SEP):
|
for value in values.split(self.VALUE_SEP):
|
||||||
|
field_sep_value = field + self.FIELD_SEP + value
|
||||||
|
self.strings.add(field_sep_value, allow_transient=False),
|
||||||
field_feature_pairs.append((
|
field_feature_pairs.append((
|
||||||
self.strings.add(field),
|
field_id,
|
||||||
self.strings.add(field + self.FIELD_SEP + value),
|
self.strings[field_sep_value]
|
||||||
))
|
))
|
||||||
cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs)
|
cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs)
|
||||||
# the hash key for the tag is either the hash of the normalized UFEATS
|
# the hash key for the tag is either the hash of the normalized UFEATS
|
||||||
# string or the hash of an empty placeholder
|
# string or the hash of an empty placeholder
|
||||||
norm_feats_string = self.normalize_features(features)
|
norm_feats_string = self.normalize_features(features)
|
||||||
tag.key = self.strings.add(norm_feats_string)
|
tag.key = self.strings.add(norm_feats_string, allow_transient=False)
|
||||||
self.insert(tag)
|
self.insert(tag)
|
||||||
return tag.key
|
return tag.key
|
||||||
|
|
||||||
|
|
|
@ -222,6 +222,8 @@ cdef class StringStore:
|
||||||
internally should not.
|
internally should not.
|
||||||
RETURNS (uint64): The string's hash value.
|
RETURNS (uint64): The string's hash value.
|
||||||
"""
|
"""
|
||||||
|
if not string:
|
||||||
|
return 0
|
||||||
if allow_transient is None:
|
if allow_transient is None:
|
||||||
allow_transient = self.mem is not self._non_temp_mem
|
allow_transient = self.mem is not self._non_temp_mem
|
||||||
cdef hash_t str_hash
|
cdef hash_t str_hash
|
||||||
|
@ -383,7 +385,10 @@ cdef class StringStore:
|
||||||
cdef Utf8Str* value = <Utf8Str*>self._map.get(key)
|
cdef Utf8Str* value = <Utf8Str*>self._map.get(key)
|
||||||
if value is not NULL:
|
if value is not NULL:
|
||||||
return value
|
return value
|
||||||
|
if allow_transient:
|
||||||
value = _allocate(self.mem, <unsigned char*>utf8_string, length)
|
value = _allocate(self.mem, <unsigned char*>utf8_string, length)
|
||||||
|
else:
|
||||||
|
value = _allocate(self._non_temp_mem, <unsigned char*>utf8_string, length)
|
||||||
self._map.set(key, value)
|
self._map.set(key, value)
|
||||||
if allow_transient and self.mem is not self._non_temp_mem:
|
if allow_transient and self.mem is not self._non_temp_mem:
|
||||||
self._transient_keys.push_back(key)
|
self._transient_keys.push_back(key)
|
||||||
|
|
Loading…
Reference in New Issue