mirror of https://github.com/python/cpython.git
Issue #16098: Update heapq.nsmallest to use the same algorithm as nlargest.
This removes the dependency on bisect and it bring the pure Python code in-sync with the C code.
This commit is contained in:
parent
dce969d2b0
commit
3e6aafe209
84
Lib/heapq.py
84
Lib/heapq.py
|
@ -129,9 +129,8 @@
|
|||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
||||
'nlargest', 'nsmallest', 'heappushpop']
|
||||
|
||||
from itertools import islice, repeat, count, imap, izip, tee, chain
|
||||
from itertools import islice, count, imap, izip, tee, chain
|
||||
from operator import itemgetter
|
||||
import bisect
|
||||
|
||||
def cmp_lt(x, y):
|
||||
# Use __lt__ if available; otherwise, try __le__.
|
||||
|
@ -188,6 +187,19 @@ def heapify(x):
|
|||
for i in reversed(xrange(n//2)):
|
||||
_siftup(x, i)
|
||||
|
||||
def _heappushpop_max(heap, item):
|
||||
"""Maxheap version of a heappush followed by a heappop."""
|
||||
if heap and cmp_lt(item, heap[0]):
|
||||
item, heap[0] = heap[0], item
|
||||
_siftup_max(heap, 0)
|
||||
return item
|
||||
|
||||
def _heapify_max(x):
|
||||
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
|
||||
n = len(x)
|
||||
for i in reversed(range(n//2)):
|
||||
_siftup_max(x, i)
|
||||
|
||||
def nlargest(n, iterable):
|
||||
"""Find the n largest elements in a dataset.
|
||||
|
||||
|
@ -213,30 +225,16 @@ def nsmallest(n, iterable):
|
|||
"""
|
||||
if n < 0:
|
||||
return []
|
||||
if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
|
||||
# For smaller values of n, the bisect method is faster than a minheap.
|
||||
# It is also memory efficient, consuming only n elements of space.
|
||||
it = iter(iterable)
|
||||
result = sorted(islice(it, 0, n))
|
||||
if not result:
|
||||
return result
|
||||
insort = bisect.insort
|
||||
pop = result.pop
|
||||
los = result[-1] # los --> Largest of the nsmallest
|
||||
for elem in it:
|
||||
if cmp_lt(elem, los):
|
||||
insort(result, elem)
|
||||
pop()
|
||||
los = result[-1]
|
||||
it = iter(iterable)
|
||||
result = list(islice(it, n))
|
||||
if not result:
|
||||
return result
|
||||
# An alternative approach manifests the whole iterable in memory but
|
||||
# saves comparisons by heapifying all at once. Also, saves time
|
||||
# over bisect.insort() which has O(n) data movement time for every
|
||||
# insertion. Finding the n smallest of an m length iterable requires
|
||||
# O(m) + O(n log m) comparisons.
|
||||
h = list(iterable)
|
||||
heapify(h)
|
||||
return map(heappop, repeat(h, min(n, len(h))))
|
||||
_heapify_max(result)
|
||||
_heappushpop = _heappushpop_max
|
||||
for elem in it:
|
||||
_heappushpop(result, elem)
|
||||
result.sort()
|
||||
return result
|
||||
|
||||
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
||||
# is the index of a leaf with a possibly out-of-order value. Restore the
|
||||
|
@ -314,6 +312,42 @@ def _siftup(heap, pos):
|
|||
heap[pos] = newitem
|
||||
_siftdown(heap, startpos, pos)
|
||||
|
||||
def _siftdown_max(heap, startpos, pos):
|
||||
'Maxheap variant of _siftdown'
|
||||
newitem = heap[pos]
|
||||
# Follow the path to the root, moving parents down until finding a place
|
||||
# newitem fits.
|
||||
while pos > startpos:
|
||||
parentpos = (pos - 1) >> 1
|
||||
parent = heap[parentpos]
|
||||
if cmp_lt(parent, newitem):
|
||||
heap[pos] = parent
|
||||
pos = parentpos
|
||||
continue
|
||||
break
|
||||
heap[pos] = newitem
|
||||
|
||||
def _siftup_max(heap, pos):
|
||||
'Maxheap variant of _siftup'
|
||||
endpos = len(heap)
|
||||
startpos = pos
|
||||
newitem = heap[pos]
|
||||
# Bubble up the larger child until hitting a leaf.
|
||||
childpos = 2*pos + 1 # leftmost child position
|
||||
while childpos < endpos:
|
||||
# Set childpos to index of larger child.
|
||||
rightpos = childpos + 1
|
||||
if rightpos < endpos and not cmp_lt(heap[rightpos], heap[childpos]):
|
||||
childpos = rightpos
|
||||
# Move the larger child up.
|
||||
heap[pos] = heap[childpos]
|
||||
pos = childpos
|
||||
childpos = 2*pos + 1
|
||||
# The leaf at pos is empty now. Put newitem there, and bubble it up
|
||||
# to its final resting place (by sifting its parents down).
|
||||
heap[pos] = newitem
|
||||
_siftdown_max(heap, startpos, pos)
|
||||
|
||||
# If available, use C implementation
|
||||
try:
|
||||
from _heapq import *
|
||||
|
|
Loading…
Reference in New Issue