making cacheutils more consistent between LRU and LRI, adding some cacheutils docs

This commit is contained in:
Mahmoud Hashemi 2015-03-23 00:18:45 -07:00
parent 690dff1e77
commit c971dd50b2
1 changed files with 71 additions and 67 deletions

View File

@ -1,10 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """\
Contains basic cache types, including LRU. Contains fundamental cache types, including LRU (Least-recently
used) and LRI (Least-recently inserted).
Learn more about `caching algorithms on Wikipedia
<https://en.wikipedia.org/wiki/Cache_algorithms#Examples>`_.
""" """
import itertools
from collections import deque from collections import deque
try: try:
@ -23,49 +25,15 @@ DEFAULT_MAX_SIZE = 128
_MISSING = object() _MISSING = object()
__all__ = ['BasicCache', 'LRU', 'DefaultLRU'] __all__ = ['LRI', 'LRU']
# TODO: rename to LRI?
# TODO: on_miss to default_factory
class BasicCache(dict):
"""\
a.k.a, SizeLimitedDefaultDict. LRI/Least Recently Inserted.
`on_miss` is a callable that accepts the missing key (as opposed
to default_factory which accepts no arguments.
"""
def __init__(self, on_miss, max_size=DEFAULT_MAX_SIZE):
super(BasicCache, self).__init__()
self.max_size = max_size
self.on_miss = on_miss
self._queue = deque()
def __missing__(self, key):
ret = self.on_miss(key)
self[key] = ret
self._queue.append(key)
if len(self._queue) > self.max_size:
old = self._queue.popleft()
del self[old]
return ret
try:
from collections import defaultdict
except ImportError:
# no defaultdict means that __missing__ isn't supported in
# this version of python, so we define __getitem__
def __getitem__(self, key):
try:
return super(BasicCache, self).__getitem__(key)
except KeyError:
return self.__missing__(key)
else:
del defaultdict
class LRU(dict): class LRU(dict):
def __init__(self, max_size=DEFAULT_MAX_SIZE, values=None): """\
The ``LRU`` implements
"""
def __init__(self, max_size=DEFAULT_MAX_SIZE, values=None,
on_miss=None):
if max_size <= 0: if max_size <= 0:
raise ValueError('expected max_size > 0, not %r' % max_size) raise ValueError('expected max_size > 0, not %r' % max_size)
self.hit_count = self.miss_count = self.soft_miss_count = 0 self.hit_count = self.miss_count = self.soft_miss_count = 0
@ -76,6 +44,11 @@ class LRU(dict):
self.root = root self.root = root
self.lock = RLock() self.lock = RLock()
if on_miss is not None and not callable(on_miss):
raise TypeError('expected on_miss to be a callable'
' (or None), not %r' % on_miss)
self.on_miss = on_miss
if values: if values:
self.update(values) self.update(values)
@ -202,26 +175,13 @@ class LRU(dict):
def __repr__(self): def __repr__(self):
cn = self.__class__.__name__ cn = self.__class__.__name__
val_map = super(LRU, self).__repr__() val_map = super(LRU, self).__repr__()
return '%s(max_size=%r, values=%r)' % (cn, self.max_size, val_map) return ('%s(max_size=%r, on_miss=%r, values=%r)'
% (cn, self.on_miss, self.max_size, val_map))
class DefaultLRU(LRU):
"""\
Like a defaultdict, but for the LRU cache. If set, the
`default_factory` is called on misses and assigned to the missing
key.
"""
def __init__(self, default_factory=None, *args, **kwargs):
if default_factory is not None and not callable(default_factory):
raise TypeError('expected default_factory to be a callable'
' (or None), not %r' % default_factory)
self.default_factory = default_factory
super(DefaultLRU, self).__init__(*args, **kwargs)
def __missing__(self, key): def __missing__(self, key):
if not self.default_factory: if not self.on_miss:
raise KeyError(key) raise KeyError(key)
ret = self.default_factory() ret = self.on_miss(key)
self.soft_miss += 1 self.soft_miss += 1
self[key] = ret self[key] = ret
return ret return ret
@ -233,25 +193,68 @@ class DefaultLRU(LRU):
# this version of python, so we define __getitem__ # this version of python, so we define __getitem__
def __getitem__(self, key): def __getitem__(self, key):
try: try:
return super(DefaultLRU, self).__getitem__(key) return super(LRU, self).__getitem__(key)
except KeyError: except KeyError:
if self.default_factory: if self.on_miss:
return self.__missing__(key) return self.__missing__(key)
raise raise
else: else:
del defaultdict del defaultdict
def test_basic_cache(): class LRI(dict):
"""\
The LRI implements the basic Least Recently Inserted strategy to
caching. One could also think of this as a SizeLimitedDefaultDict.
``on_miss`` is a callable that accepts the missing key (as
opposed to on_miss which accepts no arguments.)
"""
def __init__(self, max_size=DEFAULT_MAX_SIZE, values=None,
on_miss=None):
super(LRI, self).__init__()
self.max_size = max_size
self.on_miss = on_miss
self._queue = deque()
if values:
self.update(values)
def __missing__(self, key):
if not self.on_miss:
raise KeyError(key)
ret = self.on_miss(key)
self[key] = ret
self._queue.append(key)
if len(self._queue) > self.max_size:
old = self._queue.popleft()
del self[old]
return ret
try:
from collections import defaultdict
except ImportError:
# no defaultdict means that __missing__ isn't supported in
# this version of python, so we define __getitem__
def __getitem__(self, key):
try:
return super(LRI, self).__getitem__(key)
except KeyError:
return self.__missing__(key)
else:
del defaultdict
def _test_lri():
import string import string
bc = BasicCache(10, lambda k: k.upper()) bc = LRI(10, on_miss=lambda k: k.upper())
for char in string.letters: for char in string.letters:
x = bc[char] x = bc[char]
assert x == char.upper() assert x == char.upper()
assert len(bc) == 10 assert len(bc) == 10
def test_lru_cache(): def _test_lru():
lru = LRU(max_size=1) lru = LRU(max_size=1)
lru['hi'] = 0 lru['hi'] = 0
lru['bye'] = 1 lru['bye'] = 1
@ -264,4 +267,5 @@ def test_lru_cache():
if __name__ == '__main__': if __name__ == '__main__':
test_lru_cache() _test_lri()
_test_lru()