]> granicus.if.org Git - python/commitdiff
Install C version of heapq.nsmallest().
authorRaymond Hettinger <python@rcn.com>
Sun, 13 Jun 2004 05:26:33 +0000 (05:26 +0000)
committerRaymond Hettinger <python@rcn.com>
Sun, 13 Jun 2004 05:26:33 +0000 (05:26 +0000)
Doc/lib/libheapq.tex
Lib/heapq.py
Lib/test/test_heapq.py
Modules/_heapqmodule.c

index 4585058b321a7184359f301404bfdd5955ff45e8..5a140ab81c8a545cecbdff4903156e51950c8a9f 100644 (file)
@@ -97,11 +97,6 @@ by \var{iterable}. Equivalent to:  \code{sorted(iterable)[:n]}
 \versionadded{2.4}              
 \end{funcdesc}
 
-Though the above functions appear symmetrical, they each have different
-speed and space requirements.  In particular, \function{nsmallest()}
-operates on a full copy of the dataset.  In contrast, \function{nlargest()}
-only requires storage space for \var{n} elements.
-
 Both functions perform best for smaller values of \var{n}.  For larger
 values, it is more efficient to use the \function{sorted()} function.  Also,
 when \code{n==1}, it is more efficient to use the builtin \function{min()}
index 65f415504201096aecf9c1d957096e064f167b30..09f996aff80be633d60a0e00e553e8b288fc69cf 100644 (file)
@@ -300,7 +300,7 @@ def _siftup(heap, pos):
 
 # If available, use C implementation
 try:
-    from _heapq import heappush, heappop, heapify, heapreplace
+    from _heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
 except ImportError:
     pass
 
index 1cdaabe886154e23b6982596ebfd759abe35562d..b6fec9f47a8f4bdf045f0a7d8509eb0dddaba57e 100644 (file)
@@ -4,6 +4,7 @@ from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
 import random
 import unittest
 from test import test_support
+import sys
 
 
 def heapiter(heap):
@@ -91,16 +92,28 @@ class TestHeap(unittest.TestCase):
 
     def test_nsmallest(self):
         data = [random.randrange(2000) for i in range(1000)]
-        self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
-        self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
+        for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
+            self.assertEqual(nsmallest(data, i), sorted(data)[:i])
 
     def test_largest(self):
         data = [random.randrange(2000) for i in range(1000)]
-        self.assertEqual(nlargest(data, 400), sorted(data, reverse=True)[:400])
+        for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
+            self.assertEqual(nlargest(data, i), sorted(data, reverse=True)[:i])
 
-def test_main():
-    test_support.run_unittest(TestHeap)
+def test_main(verbose=None):
+    test_classes = [TestHeap]
+    test_support.run_unittest(*test_classes)
+
+    # verify reference counting
+    if verbose and hasattr(sys, "gettotalrefcount"):
+        import gc
+        counts = [None] * 5
+        for i in xrange(len(counts)):
+            test_support.run_unittest(*test_classes)
+            gc.collect()
+            counts[i] = sys.gettotalrefcount()
+        print counts
 
 if __name__ == "__main__":
-    test_main()
+    test_main(verbose=True)
 
index 8fe742ff0eb6c1fc2f016361cc441e9122c72507..61ee41343923fafc04e4711d1ba61b7df00588d5 100644 (file)
@@ -219,7 +219,7 @@ PyDoc_STRVAR(heapify_doc,
 static PyObject *
 nlargest(PyObject *self, PyObject *args)
 {
-       PyObject *heap=NULL, *elem, *rv, *iterable, *sol, *it, *oldelem;
+       PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
        int i, n;
 
        if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n))
@@ -246,10 +246,9 @@ nlargest(PyObject *self, PyObject *args)
        if (PyList_GET_SIZE(heap) == 0)
                goto sortit;
 
-       rv = heapify(self, heap);
-       if (rv == NULL)
-               goto fail;
-       Py_DECREF(rv);
+       for (i=n/2-1 ; i>=0 ; i--)
+               if(_siftup((PyListObject *)heap, i) == -1)
+                       goto fail;
 
        sol = PyList_GET_ITEM(heap, 0);
        while (1) {
@@ -290,6 +289,162 @@ PyDoc_STRVAR(nlargest_doc,
 \n\
 Equivalent to:  sorted(iterable, reverse=True)[:n]\n");
 
+static int
+_siftdownmax(PyListObject *heap, int startpos, int pos)
+{
+       PyObject *newitem, *parent;
+       int cmp, parentpos;
+
+       assert(PyList_Check(heap));
+       if (pos >= PyList_GET_SIZE(heap)) {
+               PyErr_SetString(PyExc_IndexError, "index out of range");
+               return -1;
+       }
+
+       newitem = PyList_GET_ITEM(heap, pos);
+       Py_INCREF(newitem);
+       /* Follow the path to the root, moving parents down until finding
+          a place newitem fits. */
+       while (pos > startpos){
+               parentpos = (pos - 1) >> 1;
+               parent = PyList_GET_ITEM(heap, parentpos);
+               cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
+               if (cmp == -1)
+                       return -1;
+               if (cmp == 1)
+                       break;
+               Py_INCREF(parent);
+               Py_DECREF(PyList_GET_ITEM(heap, pos));
+               PyList_SET_ITEM(heap, pos, parent);
+               pos = parentpos;
+       }
+       Py_DECREF(PyList_GET_ITEM(heap, pos));
+       PyList_SET_ITEM(heap, pos, newitem);
+       return 0;
+}
+
+static int
+_siftupmax(PyListObject *heap, int pos)
+{
+       int startpos, endpos, childpos, rightpos;
+       int cmp;
+       PyObject *newitem, *tmp;
+
+       assert(PyList_Check(heap));
+       endpos = PyList_GET_SIZE(heap);
+       startpos = pos;
+       if (pos >= endpos) {
+               PyErr_SetString(PyExc_IndexError, "index out of range");
+               return -1;
+       }
+       newitem = PyList_GET_ITEM(heap, pos);
+       Py_INCREF(newitem);
+
+       /* Bubble up the smaller child until hitting a leaf. */
+       childpos = 2*pos + 1;    /* leftmost child position  */
+       while (childpos < endpos) {
+               /* Set childpos to index of smaller child.   */
+               rightpos = childpos + 1;
+               if (rightpos < endpos) {
+                       cmp = PyObject_RichCompareBool(
+                               PyList_GET_ITEM(heap, childpos),
+                               PyList_GET_ITEM(heap, rightpos),
+                               Py_LE);
+                       if (cmp == -1)
+                               return -1;
+                       if (cmp == 1)
+                               childpos = rightpos;
+               }
+               /* Move the smaller child up. */
+               tmp = PyList_GET_ITEM(heap, childpos);
+               Py_INCREF(tmp);
+               Py_DECREF(PyList_GET_ITEM(heap, pos));
+               PyList_SET_ITEM(heap, pos, tmp);
+               pos = childpos;
+               childpos = 2*pos + 1;
+       }
+
+       /* The leaf at pos is empty now.  Put newitem there, and and bubble
+          it up to its final resting place (by sifting its parents down). */
+       Py_DECREF(PyList_GET_ITEM(heap, pos));
+       PyList_SET_ITEM(heap, pos, newitem);
+       return _siftdownmax(heap, startpos, pos);
+}
+
+static PyObject *
+nsmallest(PyObject *self, PyObject *args)
+{
+       PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
+       int i, n;
+
+       if (!PyArg_ParseTuple(args, "Oi:nsmallest", &iterable, &n))
+               return NULL;
+
+       it = PyObject_GetIter(iterable);
+       if (it == NULL)
+               return NULL;
+
+       heap = PyList_New(0);
+       if (it == NULL)
+               goto fail;
+
+       for (i=0 ; i<n ; i++ ){
+               elem = PyIter_Next(it);
+               if (elem == NULL)
+                       goto sortit;
+               if (PyList_Append(heap, elem) == -1) {
+                       Py_DECREF(elem);
+                       goto fail;
+               }
+               Py_DECREF(elem);
+       }
+       n = PyList_GET_SIZE(heap);
+       if (n == 0)
+               goto sortit;
+
+       for (i=n/2-1 ; i>=0 ; i--)
+               if(_siftupmax((PyListObject *)heap, i) == -1)
+                       goto fail;
+
+       los = PyList_GET_ITEM(heap, 0);
+       while (1) {
+               elem = PyIter_Next(it);
+               if (elem == NULL) {
+                       if (PyErr_Occurred())
+                               goto fail;
+                       else
+                               goto sortit;
+               }
+               if (PyObject_RichCompareBool(los, elem, Py_LE)) {
+                       Py_DECREF(elem);
+                       continue;
+               }
+
+               oldelem = PyList_GET_ITEM(heap, 0);
+               PyList_SET_ITEM(heap, 0, elem);
+               Py_DECREF(oldelem);
+               if (_siftupmax((PyListObject *)heap, 0) == -1)
+                       goto fail;
+               los = PyList_GET_ITEM(heap, 0);
+       }
+
+sortit:
+       Py_DECREF(it);
+       if (PyList_Sort(heap) == -1)
+               goto fail;
+       return heap;
+
+fail:
+       Py_DECREF(it);
+       Py_XDECREF(heap);
+       return NULL;
+}
+
+PyDoc_STRVAR(nsmallest_doc,
+"Find the n smallest elements in a dataset.\n\
+\n\
+Equivalent to:  sorted(iterable)[:n]\n");
+
 static PyMethodDef heapq_methods[] = {
        {"heappush",    (PyCFunction)heappush,          
                METH_VARARGS,   heappush_doc},
@@ -301,6 +456,8 @@ static PyMethodDef heapq_methods[] = {
                METH_O,         heapify_doc},
        {"nlargest",    (PyCFunction)nlargest,
                METH_VARARGS,   nlargest_doc},
+       {"nsmallest",   (PyCFunction)nsmallest,
+               METH_VARARGS,   nsmallest_doc},
        {NULL,          NULL}           /* sentinel */
 };