mirror of https://github.com/explosion/spaCy.git
Simplify lookup hashing
Just use get_string_id, which already does everything ensure_hash was supposed to do
This commit is contained in:
parent
dd1810f05a
commit
f2c8b1e362
|
@ -7,16 +7,9 @@ from preshed.bloom import BloomFilter
|
|||
|
||||
from .errors import Errors
|
||||
from .util import SimpleFrozenDict, ensure_path
|
||||
from .compat import basestring_
|
||||
from .strings import get_string_id
|
||||
|
||||
|
||||
def ensure_hash(key):
|
||||
if isinstance(key, basestring_):
|
||||
return get_string_id(key)
|
||||
return key
|
||||
|
||||
|
||||
class Lookups(object):
|
||||
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
||||
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
||||
|
@ -202,7 +195,7 @@ class Table(OrderedDict):
|
|||
key (unicode / int): The key to set.
|
||||
value: The value to set.
|
||||
"""
|
||||
key = ensure_hash(key)
|
||||
key = get_string_id(key)
|
||||
OrderedDict.__setitem__(self, key, value)
|
||||
self.bloom.add(key)
|
||||
|
||||
|
@ -221,7 +214,7 @@ class Table(OrderedDict):
|
|||
key (unicode / int): The key to get.
|
||||
RETURNS: The value.
|
||||
"""
|
||||
key = ensure_hash(key)
|
||||
key = get_string_id(key)
|
||||
return OrderedDict.__getitem__(self, key)
|
||||
|
||||
def get(self, key, default=None):
|
||||
|
@ -231,7 +224,7 @@ class Table(OrderedDict):
|
|||
default: The default value to return.
|
||||
RETURNS: The value.
|
||||
"""
|
||||
key = ensure_hash(key)
|
||||
key = get_string_id(key)
|
||||
return OrderedDict.get(self, key, default)
|
||||
|
||||
def __contains__(self, key):
|
||||
|
@ -240,7 +233,7 @@ class Table(OrderedDict):
|
|||
key (unicode / int): The key to check.
|
||||
RETURNS (bool): Whether the key is in the table.
|
||||
"""
|
||||
key = ensure_hash(key)
|
||||
key = get_string_id(key)
|
||||
# This can give a false positive, so we need to check it after
|
||||
if key not in self.bloom:
|
||||
return False
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
from spacy.lookups import Lookups, Table, ensure_hash
|
||||
from spacy.lookups import Lookups, Table
|
||||
from spacy.strings import get_string_id
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
@ -45,17 +46,17 @@ def test_table_api():
|
|||
table = Table(name="table", data=data)
|
||||
assert len(table) == len(data)
|
||||
assert "foo" in table
|
||||
assert ensure_hash("foo") in table
|
||||
assert get_string_id("foo") in table
|
||||
assert table["foo"] == "bar"
|
||||
assert table[ensure_hash("foo")] == "bar"
|
||||
assert table[get_string_id("foo")] == "bar"
|
||||
assert table.get("foo") == "bar"
|
||||
assert table.get("abc") is None
|
||||
table["abc"] = 123
|
||||
assert table["abc"] == 123
|
||||
assert table[ensure_hash("abc")] == 123
|
||||
assert table[get_string_id("abc")] == 123
|
||||
table.set("def", 456)
|
||||
assert table["def"] == 456
|
||||
assert table[ensure_hash("def")] == 456
|
||||
assert table[get_string_id("def")] == 456
|
||||
|
||||
|
||||
def test_table_api_to_from_bytes():
|
||||
|
@ -66,7 +67,7 @@ def test_table_api_to_from_bytes():
|
|||
assert new_table.name == "table"
|
||||
assert len(new_table) == 3
|
||||
assert new_table["foo"] == "bar"
|
||||
assert new_table[ensure_hash("foo")] == "bar"
|
||||
assert new_table[get_string_id("foo")] == "bar"
|
||||
new_table2 = Table(data={"def": 456})
|
||||
new_table2.from_bytes(table_bytes)
|
||||
assert len(new_table2) == 3
|
||||
|
|
Loading…
Reference in New Issue