boltons/boltons/listutils.py

329 lines
10 KiB
Python

# -*- coding: utf-8 -*-
"""Python's builtin :class:`list` is a very fast and efficient
sequence type, but it could be better for certain access patterns,
such as non-sequential insertion into a large lists. ``listutils``
provides a pure-Python solution to this problem.
For utilities for working with iterables and lists, check out
:mod:`iterutils`. For the a :class:`list`-based version of
:class:`collections.namedtuple`, check out :mod:`namedutils`.
"""
from __future__ import print_function, division
import operator
from math import log as math_log
from itertools import chain, islice
try:
from typeutils import make_sentinel
_MISSING = make_sentinel(var_name='_MISSING')
except ImportError:
_MISSING = object()
try:
xrange
except NameError:
# Python 3 compat
xrange = range
# TODO: expose splaylist?
__all__ = ['BList', 'BarrelList']
# TODO: comparators
# TODO: keep track of list lengths and bisect to the right list for
# faster getitem (and slightly slower setitem and delitem ops)
class BarrelList(list):
"""The ``BarrelList`` is a :class:`list` subtype backed by many
dynamically-scaled sublists, to provide better scaling and random
insertion/deletion characteristics. It is a subtype of the builtin
:class:`list` and has an identical API, supporting indexing,
slicing, sorting, etc. If application requirements call for
something more performant, consider the `blist module available on
PyPI`_.
The name comes by way of Kurt Rose, who said it reminded him of
barrel shifters. Not sure how, but it's BList-like, so the name
stuck. BList is of course a reference to `B-trees`_.
Args:
iterable: An optional iterable of initial values for the list.
>>> blist = BList(xrange(100000))
>>> blist.pop(50000)
50000
>>> len(blist)
99999
>>> len(blist.lists) # how many underlying lists
8
>>> slice_idx = blist.lists[0][-1]
>>> blist[slice_idx:slice_idx + 2]
BarrelList([11637, 11638])
Slicing is supported and works just fine across list borders,
returning another instance of the BarrelList.
.. _blist module available on PyPI: https://pypi.python.org/pypi/blist
.. _B-trees: https://en.wikipedia.org/wiki/B-tree
"""
_size_factor = 1520
"This size factor is the result of tuning using the tune() function below."
def __init__(self, iterable=None):
self.lists = [[]]
if iterable:
self.extend(iterable)
@property
def _cur_size_limit(self):
len_self, size_factor = len(self), self._size_factor
return int(round(size_factor * math_log(len_self + 2, 2)))
def _translate_index(self, index):
if index < 0:
index += len(self)
rel_idx, lists = index, self.lists
for list_idx in range(len(lists)):
len_list = len(lists[list_idx])
if rel_idx < len_list:
break
rel_idx -= len_list
if rel_idx < 0:
return None, None
return list_idx, rel_idx
def _balance_list(self, list_idx):
if list_idx < 0:
list_idx += len(self.lists)
cur_list, len_self = self.lists[list_idx], len(self)
size_limit = self._cur_size_limit
if len(cur_list) > size_limit:
half_limit = size_limit // 2
while len(cur_list) > half_limit:
next_list_idx = list_idx + 1
self.lists.insert(next_list_idx, cur_list[-half_limit:])
del cur_list[-half_limit:]
return True
return False
def insert(self, index, item):
if len(self.lists) == 1:
self.lists[0].insert(index, item)
self._balance_list(0)
else:
list_idx, rel_idx = self._translate_index(index)
if list_idx is None:
raise IndexError()
self.lists[list_idx].insert(rel_idx, item)
self._balance_list(list_idx)
return
def append(self, item):
self.lists[-1].append(item)
def extend(self, iterable):
self.lists[-1].extend(iterable)
def pop(self, *a):
lists = self.lists
if len(lists) == 1 and not a:
return self.lists[0].pop()
index = a and a[0]
if index == () or index is None or index == -1:
ret = lists[-1].pop()
if len(lists) > 1 and not lists[-1]:
lists.pop()
else:
list_idx, rel_idx = self._translate_index(index)
if list_idx is None:
raise IndexError()
ret = lists[list_idx].pop(rel_idx)
self._balance_list(list_idx)
return ret
def iter_slice(self, start, stop, step=None):
iterable = self # TODO: optimization opportunities abound
# start_list_idx, stop_list_idx = 0, len(self.lists)
if start is None:
start = 0
if stop is None:
stop = len(self)
if step is not None and step < 0:
step = -step
start, stop = -start, -stop - 1
iterable = reversed(self)
if start < 0:
start += len(self)
# start_list_idx, start_rel_idx = self._translate_index(start)
if stop < 0:
stop += len(self)
# stop_list_idx, stop_rel_idx = self._translate_index(stop)
return islice(iterable, start, stop, step)
def del_slice(self, start, stop, step=None):
if step is not None and abs(step) > 1: # punt
new_list = chain(self.iter_slice(0, start, step),
self.iter_slice(stop, None, step))
self.lists[0][:] = new_list
self._balance_list(0)
return
if start is None:
start = 0
if stop is None:
stop = len(self)
start_list_idx, start_rel_idx = self._translate_index(start)
stop_list_idx, stop_rel_idx = self._translate_index(stop)
if start_list_idx is None:
raise IndexError()
if stop_list_idx is None:
raise IndexError()
if start_list_idx == stop_list_idx:
del self.lists[start_list_idx][start_rel_idx:stop_rel_idx]
elif start_list_idx < stop_list_idx:
del self.lists[start_list_idx + 1:stop_list_idx]
del self.lists[start_list_idx][start_rel_idx:]
del self.lists[stop_list_idx][:stop_rel_idx]
else:
assert False, ('start list index should never translate to'
' greater than stop list index')
__delslice__ = del_slice
@classmethod
def from_iterable(cls, it):
return cls(it)
def __iter__(self):
return chain(*self.lists)
def __reversed__(self):
return chain.from_iterable(reversed(l) for l in reversed(self.lists))
def __len__(self):
return sum([len(l) for l in self.lists])
def __contains__(self, item):
for cur in self.lists:
if item in cur:
return True
return False
def __getitem__(self, index):
try:
start, stop, step = index.start, index.stop, index.step
except AttributeError:
index = operator.index(index)
else:
iter_slice = self.iter_slice(start, stop, step)
ret = self.from_iterable(iter_slice)
return ret
list_idx, rel_idx = self._translate_index(index)
if list_idx is None:
raise IndexError()
return self.lists[list_idx][rel_idx]
def __delitem__(self, index):
try:
start, stop, step = index.start, index.stop, index.step
except AttributeError:
index = operator.index(index)
else:
self.del_slice(start, stop, step)
return
list_idx, rel_idx = self._translate_index(index)
if list_idx is None:
raise IndexError()
del self.lists[list_idx][rel_idx]
def __setitem__(self, index, item):
try:
start, stop, step = index.start, index.stop, index.step
except AttributeError:
index = operator.index(index)
else:
if len(self.lists) == 1:
self.lists[0][index] = item
else:
tmp = list(self)
tmp[index] = item
self.lists[:] = [tmp]
self._balance_list(0)
return
list_idx, rel_idx = self._translate_index(index)
if list_idx is None:
raise IndexError()
self.lists[list_idx][rel_idx] = item
def __getslice__(self, start, stop):
iter_slice = self.iter_slice(start, stop, 1)
return self.from_iterable(iter_slice)
def __setslice__(self, start, stop, sequence):
if len(self.lists) == 1:
self.lists[0][start:stop] = sequence
else:
tmp = list(self)
tmp[start:stop] = sequence
self.lists[:] = [tmp]
self._balance_list(0)
return
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, list(self))
def sort(self):
# poor pythonist's mergesort, it's faster than sorted(self)
# when the lists' average length is greater than 512.
if len(self.lists) == 1:
self.lists[0].sort()
else:
for li in self.lists:
li.sort()
tmp_sorted = sorted(chain.from_iterable(self.lists))
del self.lists[:]
self.lists[0] = tmp_sorted
self._balance_list(0)
def reverse(self):
for cur in self.lists:
cur.reverse()
self.lists.reverse()
def count(self, item):
return sum([cur.count(item) for cur in self.lists])
def index(self, item):
len_accum = 0
for cur in self.lists:
try:
rel_idx = cur.index(item)
return len_accum + rel_idx
except ValueError:
len_accum += len(cur)
raise ValueError('%r is not in list' % (item,))
BList = BarrelList
class SplayList(list):
"""Like a `splay tree`_, the SplayList facilitates moving higher
utility items closer to the front of the list for faster access.
.. _splay tree: https://en.wikipedia.org/wiki/Splay_tree
"""
def shift(self, item_index, dest_index=0):
if item_index == dest_index:
return
item = self.pop(item_index)
self.insert(dest_index, item)
def swap(self, item_index, dest_index):
self[dest_index], self[item_index] = self[item_index], self[dest_index]