Simplify lookup hashing

Just use get_string_id, which already does everything ensure_hash was supposed to do
This commit is contained in:
Ines Montani 2019-09-18 20:24:41 +02:00
parent dd1810f05a
commit f2c8b1e362
2 changed files with 11 additions and 17 deletions

View File

@ -7,16 +7,9 @@ from preshed.bloom import BloomFilter
from .errors import Errors
from .util import SimpleFrozenDict, ensure_path
from .compat import basestring_
from .strings import get_string_id
def ensure_hash(key):
if isinstance(key, basestring_):
return get_string_id(key)
return key
class Lookups(object):
"""Container for large lookup tables and dictionaries, e.g. lemmatization
data or tokenizer exception lists. Lookups are available via vocab.lookups,
@ -202,7 +195,7 @@ class Table(OrderedDict):
key (unicode / int): The key to set.
value: The value to set.
"""
key = ensure_hash(key)
key = get_string_id(key)
OrderedDict.__setitem__(self, key, value)
self.bloom.add(key)
@ -221,7 +214,7 @@ class Table(OrderedDict):
key (unicode / int): The key to get.
RETURNS: The value.
"""
key = ensure_hash(key)
key = get_string_id(key)
return OrderedDict.__getitem__(self, key)
def get(self, key, default=None):
@ -231,7 +224,7 @@ class Table(OrderedDict):
default: The default value to return.
RETURNS: The value.
"""
key = ensure_hash(key)
key = get_string_id(key)
return OrderedDict.get(self, key, default)
def __contains__(self, key):
@ -240,7 +233,7 @@ class Table(OrderedDict):
key (unicode / int): The key to check.
RETURNS (bool): Whether the key is in the table.
"""
key = ensure_hash(key)
key = get_string_id(key)
# This can give a false positive, so we need to check it after
if key not in self.bloom:
return False

View File

@ -2,7 +2,8 @@
from __future__ import unicode_literals
import pytest
from spacy.lookups import Lookups, Table, ensure_hash
from spacy.lookups import Lookups, Table
from spacy.strings import get_string_id
from spacy.vocab import Vocab
from ..util import make_tempdir
@ -45,17 +46,17 @@ def test_table_api():
table = Table(name="table", data=data)
assert len(table) == len(data)
assert "foo" in table
assert ensure_hash("foo") in table
assert get_string_id("foo") in table
assert table["foo"] == "bar"
assert table[ensure_hash("foo")] == "bar"
assert table[get_string_id("foo")] == "bar"
assert table.get("foo") == "bar"
assert table.get("abc") is None
table["abc"] = 123
assert table["abc"] == 123
assert table[ensure_hash("abc")] == 123
assert table[get_string_id("abc")] == 123
table.set("def", 456)
assert table["def"] == 456
assert table[ensure_hash("def")] == 456
assert table[get_string_id("def")] == 456
def test_table_api_to_from_bytes():
@ -66,7 +67,7 @@ def test_table_api_to_from_bytes():
assert new_table.name == "table"
assert len(new_table) == 3
assert new_table["foo"] == "bar"
assert new_table[ensure_hash("foo")] == "bar"
assert new_table[get_string_id("foo")] == "bar"
new_table2 = Table(data={"def": 456})
new_table2.from_bytes(table_bytes)
assert len(new_table2) == 3