mirror of https://github.com/explosion/spaCy.git
Fix memory leak when adding empty morph (#6581)
Fix lookup of empty morph in the morphology table, which fixes a memory leak where a new morphology tag was allocated each time the empty morph tag was added.
This commit is contained in:
parent
fd640afcd8
commit
e10295c9fd
|
@ -29,7 +29,7 @@ cdef class Morphology:
|
||||||
FEATURE_SEP = "|"
|
FEATURE_SEP = "|"
|
||||||
FIELD_SEP = "="
|
FIELD_SEP = "="
|
||||||
VALUE_SEP = ","
|
VALUE_SEP = ","
|
||||||
# not an empty string so that the PreshMap key is not 0
|
# not an empty string so we can distinguish unset morph from empty morph
|
||||||
EMPTY_MORPH = symbols.NAMES[symbols._]
|
EMPTY_MORPH = symbols.NAMES[symbols._]
|
||||||
|
|
||||||
def __init__(self, StringStore strings):
|
def __init__(self, StringStore strings):
|
||||||
|
@ -50,8 +50,8 @@ cdef class Morphology:
|
||||||
"""
|
"""
|
||||||
cdef MorphAnalysisC* tag_ptr
|
cdef MorphAnalysisC* tag_ptr
|
||||||
if isinstance(features, str):
|
if isinstance(features, str):
|
||||||
if features == self.EMPTY_MORPH:
|
if features == "":
|
||||||
features = ""
|
features = self.EMPTY_MORPH
|
||||||
tag_ptr = <MorphAnalysisC*>self.tags.get(<hash_t>self.strings[features])
|
tag_ptr = <MorphAnalysisC*>self.tags.get(<hash_t>self.strings[features])
|
||||||
if tag_ptr != NULL:
|
if tag_ptr != NULL:
|
||||||
return tag_ptr.key
|
return tag_ptr.key
|
||||||
|
@ -71,13 +71,9 @@ cdef class Morphology:
|
||||||
))
|
))
|
||||||
cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs)
|
cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs)
|
||||||
# the hash key for the tag is either the hash of the normalized UFEATS
|
# the hash key for the tag is either the hash of the normalized UFEATS
|
||||||
# string or the hash of an empty placeholder (using the empty string
|
# string or the hash of an empty placeholder
|
||||||
# would give a hash key of 0, which is not good for PreshMap)
|
|
||||||
norm_feats_string = self.normalize_features(features)
|
norm_feats_string = self.normalize_features(features)
|
||||||
if norm_feats_string:
|
tag.key = self.strings.add(norm_feats_string)
|
||||||
tag.key = self.strings.add(norm_feats_string)
|
|
||||||
else:
|
|
||||||
tag.key = self.strings.add(self.EMPTY_MORPH)
|
|
||||||
self.insert(tag)
|
self.insert(tag)
|
||||||
return tag.key
|
return tag.key
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue