From 0c4102760c440af3e7b575b0fd27fe25549641a2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 5 Jan 2004 10:13:35 +0000 Subject: [PATCH] SF Patch #864863: Bisect C implementation (Contributed by Dmitry Vasiliev.) --- Doc/whatsnew/whatsnew24.tex | 4 + Lib/bisect.py | 6 + Lib/test/test_bisect.py | 27 ++--- Misc/ACKS | 1 + Misc/NEWS | 3 +- Modules/_bisectmodule.c | 228 ++++++++++++++++++++++++++++++++++++ PC/VC6/pythoncore.dsp | 4 + PC/config.c | 2 + setup.py | 2 + 9 files changed, 261 insertions(+), 16 deletions(-) create mode 100644 Modules/_bisectmodule.c diff --git a/Doc/whatsnew/whatsnew24.tex b/Doc/whatsnew/whatsnew24.tex index 5967d6d2b2c..b7e404b3c67 100644 --- a/Doc/whatsnew/whatsnew24.tex +++ b/Doc/whatsnew/whatsnew24.tex @@ -305,6 +305,10 @@ details. supports transparency, this makes it possible to use a transparent background. (Contributed by J\"org Lehmann.) +\item The \module{bisect} module now has an underlying C implementation + for improved performance. + (Contributed by Dmitry Vasiliev.) + \item The \module{heapq} module has been converted to C. The resulting ten-fold improvement in speed makes the module suitable for handling high volumes of data. diff --git a/Lib/bisect.py b/Lib/bisect.py index c9e6c60cd37..152f6c7854f 100644 --- a/Lib/bisect.py +++ b/Lib/bisect.py @@ -76,3 +76,9 @@ def bisect_left(a, x, lo=0, hi=None): if a[mid] < x: lo = mid+1 else: hi = mid return lo + +# Overwrite above definitions with a fast C implementation +try: + from _bisect import bisect_right, bisect_left, insort_left, insort_right, insort, bisect +except ImportError: + pass diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py index 549978d0f7b..809d8afcb24 100644 --- a/Lib/test/test_bisect.py +++ b/Lib/test/test_bisect.py @@ -1,6 +1,7 @@ import unittest from test import test_support from bisect import bisect_right, bisect_left, insort_left, insort_right, insort, bisect +from UserList import UserList class TestBisect(unittest.TestCase): @@ -89,6 +90,7 @@ class TestBisect(unittest.TestCase): def test_precomputed(self): for func, data, elem, expected in self.precomputedCases: self.assertEqual(func(data, elem), expected) + self.assertEqual(func(UserList(data), elem), expected) def test_random(self, n=25): from random import randrange @@ -132,22 +134,17 @@ def test_backcompatibility(self): class TestInsort(unittest.TestCase): - def test_vsListSort(self, n=500): + def test_vsBuiltinSort(self, n=500): from random import choice - digits = "0123456789" - raw = [] - insorted = [] - for i in range(n): - digit = choice(digits) - raw.append(digit) - if digit in "02468": - f = insort_left - else: - f = insort_right - f(insorted, digit) - sorted = raw[:] - sorted.sort() - self.assertEqual(sorted, insorted) + for insorted in (list(), UserList()): + for i in xrange(n): + digit = choice("0123456789") + if digit in "02468": + f = insort_left + else: + f = insort_right + f(insorted, digit) + self.assertEqual(sorted(insorted), insorted) def test_backcompatibility(self): self.assertEqual(insort, insort_right) diff --git a/Misc/ACKS b/Misc/ACKS index 4ddd6a3ec87..eea665aced2 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -565,6 +565,7 @@ Bill Tutt Doobee R. Tzeck Lionel Ulmer Hector Urtubia +Dmitry Vasiliev Frank Vercruesse Jaap Vermeulen Al Vezza diff --git a/Misc/NEWS b/Misc/NEWS index 18f1348b5e9..63b33217480 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -210,7 +210,8 @@ Library - Plugged a minor hole in tempfile.mktemp() due to the use of os.path.exists(), switched to using os.lstat() directly if possible. -- heapq.py has been converted to C for improved performance +- bisect.py and heapq.py now have underlying C implementations + for better performance - traceback.format_exc has been added (similar to print_exc but it returns a string). diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c new file mode 100644 index 00000000000..d3361586488 --- /dev/null +++ b/Modules/_bisectmodule.c @@ -0,0 +1,228 @@ +/* Bisection algorithms. Drop in replacement for bisect.py + +Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru). +*/ + +#include "Python.h" + +static int +internal_bisect_right(PyObject *list, PyObject *item, int lo, int hi) +{ + PyObject *litem; + int mid, res; + + if (hi == -1) { + hi = PySequence_Size(list); + if (hi < 0) + return -1; + } + while (lo < hi) { + mid = (lo + hi) / 2; + litem = PySequence_GetItem(list, mid); + if (litem == NULL) + return -1; + res = PyObject_RichCompareBool(item, litem, Py_LT); + Py_DECREF(litem); + if (res < 0) + return -1; + if (res) + hi = mid; + else + lo = mid + 1; + } + return lo; +} + +static PyObject * +bisect_right(PyObject *self, PyObject *args) +{ + PyObject *list, *item; + int lo = 0; + int hi = -1; + int index; + + if (!PyArg_ParseTuple(args, "OO|ii:bisect_right", + &list, &item, &lo, &hi)) + return NULL; + index = internal_bisect_right(list, item, lo, hi); + if (index < 0) + return NULL; + return PyInt_FromLong(index); +} + +PyDoc_STRVAR(bisect_right_doc, +"bisect_right(list, item[, lo[, hi]]) -> index\n\ +\n\ +Return the index where to insert item x in list a, assuming a is sorted.\n\ +\n\ +The return value i is such that all e in a[:i] have e <= x, and all e in\n\ +a[i:] have e > x. So if x already appears in the list, i points just\n\ +beyond the rightmost x already there\n\ +\n\ +Optional args lo (default 0) and hi (default len(a)) bound the\n\ +slice of a to be searched.\n"); + +static PyObject * +insort_right(PyObject *self, PyObject *args) +{ + PyObject *list, *item; + int lo = 0; + int hi = -1; + int index; + + if (!PyArg_ParseTuple(args, "OO|ii:insort_right", + &list, &item, &lo, &hi)) + return NULL; + index = internal_bisect_right(list, item, lo, hi); + if (index < 0) + return NULL; + if (PyList_Check(list)) { + if (PyList_Insert(list, index, item) < 0) + return NULL; + } else { + if (PyObject_CallMethod(list, "insert", "iO", index, item) + == NULL) + return NULL; + } + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(insort_right_doc, +"insort_right(list, item[, lo[, hi]])\n\ +\n\ +Insert item x in list a, and keep it sorted assuming a is sorted.\n\ +\n\ +If x is already in a, insert it to the right of the rightmost x.\n\ +\n\ +Optional args lo (default 0) and hi (default len(a)) bound the\n\ +slice of a to be searched.\n"); + +static int +internal_bisect_left(PyObject *list, PyObject *item, int lo, int hi) +{ + PyObject *litem; + int mid, res; + + if (hi == -1) { + hi = PySequence_Size(list); + if (hi < 0) + return -1; + } + while (lo < hi) { + mid = (lo + hi) / 2; + litem = PySequence_GetItem(list, mid); + if (litem == NULL) + return -1; + res = PyObject_RichCompareBool(litem, item, Py_LT); + Py_DECREF(litem); + if (res < 0) + return -1; + if (res) + lo = mid + 1; + else + hi = mid; + } + return lo; +} + +static PyObject * +bisect_left(PyObject *self, PyObject *args) +{ + PyObject *list, *item; + int lo = 0; + int hi = -1; + int index; + + if (!PyArg_ParseTuple(args, "OO|ii:bisect_left", + &list, &item, &lo, &hi)) + return NULL; + index = internal_bisect_left(list, item, lo, hi); + if (index < 0) + return NULL; + return PyInt_FromLong(index); +} + +PyDoc_STRVAR(bisect_left_doc, +"bisect_left(list, item[, lo[, hi]]) -> index\n\ +\n\ +Return the index where to insert item x in list a, assuming a is sorted.\n\ +\n\ +The return value i is such that all e in a[:i] have e < x, and all e in\n\ +a[i:] have e >= x. So if x already appears in the list, i points just\n\ +before the leftmost x already there.\n\ +\n\ +Optional args lo (default 0) and hi (default len(a)) bound the\n\ +slice of a to be searched.\n"); + +static PyObject * +insort_left(PyObject *self, PyObject *args) +{ + PyObject *list, *item; + int lo = 0; + int hi = -1; + int index; + + if (!PyArg_ParseTuple(args, "OO|ii:insort_left", + &list, &item, &lo, &hi)) + return NULL; + index = internal_bisect_left(list, item, lo, hi); + if (index < 0) + return NULL; + if (PyList_Check(list)) { + if (PyList_Insert(list, index, item) < 0) + return NULL; + } else { + if (PyObject_CallMethod(list, "insert", "iO", index, item) + == NULL) + return NULL; + } + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(insort_left_doc, +"insort_left(list, item[, lo[, hi]])\n\ +\n\ +Insert item x in list a, and keep it sorted assuming a is sorted.\n\ +\n\ +If x is already in a, insert it to the left of the leftmost x.\n\ +\n\ +Optional args lo (default 0) and hi (default len(a)) bound the\n\ +slice of a to be searched.\n"); + +PyDoc_STRVAR(bisect_doc, "Alias for bisect_right().\n"); +PyDoc_STRVAR(insort_doc, "Alias for insort_right().\n"); + +static PyMethodDef bisect_methods[] = { + {"bisect_right", (PyCFunction)bisect_right, + METH_VARARGS, bisect_right_doc}, + {"bisect", (PyCFunction)bisect_right, + METH_VARARGS, bisect_doc}, + {"insort_right", (PyCFunction)insort_right, + METH_VARARGS, insort_right_doc}, + {"insort", (PyCFunction)insort_right, + METH_VARARGS, insort_doc}, + {"bisect_left", (PyCFunction)bisect_left, + METH_VARARGS, bisect_left_doc}, + {"insort_left", (PyCFunction)insort_left, + METH_VARARGS, insort_left_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(module_doc, +"Bisection algorithms.\n\ +\n\ +This module provides support for maintaining a list in sorted order without\n\ +having to sort the list after each insertion. For long lists of items with\n\ +expensive comparison operations, this can be an improvement over the more\n\ +common approach.\n"); + +PyMODINIT_FUNC +init_bisect(void) +{ + PyObject *m; + + m = Py_InitModule3("_bisect", bisect_methods, module_doc); +} + diff --git a/PC/VC6/pythoncore.dsp b/PC/VC6/pythoncore.dsp index dfc2bad6626..0e8b42afe5a 100644 --- a/PC/VC6/pythoncore.dsp +++ b/PC/VC6/pythoncore.dsp @@ -93,6 +93,10 @@ LINK32=link.exe # Name "pythoncore - Win32 Debug" # Begin Source File +SOURCE=..\..\Modules\_bisectmodule.c +# End Source File +# Begin Source File + SOURCE=..\..\Modules\_codecsmodule.c # End Source File # Begin Source File diff --git a/PC/config.c b/PC/config.c index 7bf31b0e153..15671f68a3d 100644 --- a/PC/config.c +++ b/PC/config.c @@ -47,6 +47,7 @@ extern void initzipimport(void); extern void init_random(void); extern void inititertools(void); extern void initheapq(void); +extern void init_bisect(void); extern void init_symtable(void); extern void initmmap(void); extern void init_csv(void); @@ -106,6 +107,7 @@ struct _inittab _PyImport_Inittab[] = { {"_weakref", init_weakref}, {"_hotshot", init_hotshot}, {"_random", init_random}, + {"_bisect", init_bisect}, {"heapq", initheapq}, {"itertools", inititertools}, {"_symtable", init_symtable}, diff --git a/setup.py b/setup.py index 2c3eae6873a..39873d3ed13 100644 --- a/setup.py +++ b/setup.py @@ -322,6 +322,8 @@ def detect_modules(self): exts.append( Extension("_random", ["_randommodule.c"]) ) # fast iterator tools implemented in C exts.append( Extension("itertools", ["itertoolsmodule.c"]) ) + # bisect + exts.append( Extension("_bisect", ["_bisectmodule.c"]) ) # heapq exts.append( Extension("heapq", ["heapqmodule.c"]) ) # operator.add() and similar goodies