]> granicus.if.org Git - python/commitdiff
Add itertools.izip_longest().
authorRaymond Hettinger <python@rcn.com>
Wed, 21 Feb 2007 05:20:38 +0000 (05:20 +0000)
committerRaymond Hettinger <python@rcn.com>
Wed, 21 Feb 2007 05:20:38 +0000 (05:20 +0000)
Doc/lib/libitertools.tex
Lib/test/test_itertools.py
Misc/NEWS
Modules/itertoolsmodule.c

index 59fbd98844e8ad2f46e990bd8ef247a71cc9ab48..ac6028b31a747321a35e19ec66b378d4fb926a3c 100644 (file)
@@ -302,6 +302,33 @@ by functions or loops that truncate the stream.
   don't care about trailing, unmatched values from the longer iterables.
 \end{funcdesc}
 
+\begin{funcdesc}{izip_longest}{*iterables\optional{, fillvalue}}
+  Make an iterator that aggregates elements from each of the iterables.
+  If the iterables are of uneven length, missing values are filled-in
+  with \var{fillvalue}.  Iteration continues until the longest iterable
+  is exhausted.  Equivalent to:
+
+  \begin{verbatim}
+    def izip_longest(*args, **kwds):
+        fillvalue = kwds.get('fillvalue')
+        def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
+            yield counter()         # yields the fillvalue, or raises IndexError
+        fillers = repeat(fillvalue)
+        iters = [chain(it, sentinel(), fillers) for it in args]
+        try:
+            for tup in izip(*iters):
+                yield tup
+        except IndexError:
+            pass
+  \end{verbatim}
+
+  If one of the iterables is potentially infinite, then the
+  \function{izip_longest()} function should be wrapped with something
+  that limits the number of calls (for example \function{islice()} or
+  \function{take()}).
+  \versionadded{2.6}
+\end{funcdesc}
+
 \begin{funcdesc}{repeat}{object\optional{, times}}
   Make an iterator that returns \var{object} over and over again.
   Runs indefinitely unless the \var{times} argument is specified.
index c965d4c14efc3c126dbc71008069fa1bf7f49345..93fdab7998f2671316fe1c95416a229d5af83c06 100644 (file)
@@ -198,6 +198,51 @@ class TestBasicOps(unittest.TestCase):
         ids = map(id, list(izip('abc', 'def')))
         self.assertEqual(len(dict.fromkeys(ids)), len(ids))
 
+    def test_iziplongest(self):
+        for args in [
+                ['abc', range(6)],
+                [range(6), 'abc'],
+                [range(1000), range(2000,2100), range(3000,3050)],
+                [range(1000), range(0), range(3000,3050), range(1200), range(1500)],
+                [range(1000), range(0), range(3000,3050), range(1200), range(1500), range(0)],
+            ]:
+            target = map(None, *args)
+            self.assertEqual(list(izip_longest(*args)), target)
+            self.assertEqual(list(izip_longest(*args, **{})), target)
+            target = [tuple((e is None and 'X' or e) for e in t) for t in target]   # Replace None fills with 'X'
+            self.assertEqual(list(izip_longest(*args, **dict(fillvalue='X'))), target)
+        
+        self.assertEqual(take(3,izip_longest('abcdef', count())), zip('abcdef', range(3))) # take 3 from infinite input
+
+        self.assertEqual(list(izip_longest()), zip())
+        self.assertEqual(list(izip_longest([])), zip([]))
+        self.assertEqual(list(izip_longest('abcdef')), zip('abcdef'))
+    
+        self.assertEqual(list(izip_longest('abc', 'defg', **{})), map(None, 'abc', 'defg')) # empty keyword dict
+        self.assertRaises(TypeError, izip_longest, 3)
+        self.assertRaises(TypeError, izip_longest, range(3), 3)
+
+        for stmt in [
+            "izip_longest('abc', fv=1)",
+            "izip_longest('abc', fillvalue=1, bogus_keyword=None)",            
+        ]:
+            try:
+                eval(stmt, globals(), locals())
+            except TypeError:
+                pass
+            else:
+                self.fail('Did not raise Type in:  ' + stmt)
+        
+        # Check tuple re-use (implementation detail)
+        self.assertEqual([tuple(list(pair)) for pair in izip_longest('abc', 'def')],
+                         zip('abc', 'def'))
+        self.assertEqual([pair for pair in izip_longest('abc', 'def')],
+                         zip('abc', 'def'))
+        ids = map(id, izip_longest('abc', 'def'))
+        self.assertEqual(min(ids), max(ids))
+        ids = map(id, list(izip_longest('abc', 'def')))
+        self.assertEqual(len(dict.fromkeys(ids)), len(ids))
+
     def test_repeat(self):
         self.assertEqual(zip(xrange(3),repeat('a')),
                          [(0, 'a'), (1, 'a'), (2, 'a')])
@@ -611,6 +656,15 @@ class TestVariousIteratorArgs(unittest.TestCase):
             self.assertRaises(TypeError, list, izip(N(s)))
             self.assertRaises(ZeroDivisionError, list, izip(E(s)))
 
+    def test_iziplongest(self):
+        for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)):
+            for g in (G, I, Ig, S, L, R):
+                self.assertEqual(list(izip_longest(g(s))), zip(g(s)))
+                self.assertEqual(list(izip_longest(g(s), g(s))), zip(g(s), g(s)))
+            self.assertRaises(TypeError, izip_longest, X(s))
+            self.assertRaises(TypeError, list, izip_longest(N(s)))
+            self.assertRaises(ZeroDivisionError, list, izip_longest(E(s)))
+
     def test_imap(self):
         for s in (range(10), range(0), range(100), (7,11), xrange(20,50,5)):
             for g in (G, I, Ig, S, L, R):
index 72d58320a7159f0407fa9c0556541f683d6dd35d..d7eab9cbc0fc2da8d1111368e72812072862946e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -127,6 +127,8 @@ Library
 
 - Added heapq.merge() for merging sorted input streams.
 
+- Added itertools.izip_longest().
+
 - Have the encoding package's search function dynamically import using absolute
   import semantics.
 
index 70f787f784b2b6660bab88e5d79bb51027a4fbe7..1c91a1959dfe05fedc77abe590c8db831d97251d 100644 (file)
@@ -2472,6 +2472,238 @@ static PyTypeObject repeat_type = {
        PyObject_GC_Del,                /* tp_free */
 };
 
+/* iziplongest object ************************************************************/
+
+#include "Python.h"
+
+typedef struct {
+       PyObject_HEAD
+       Py_ssize_t tuplesize;
+       Py_ssize_t numactive;   
+       PyObject *ittuple;              /* tuple of iterators */
+       PyObject *result;
+       PyObject *fillvalue;
+       PyObject *filler;               /* repeat(fillvalue) */
+} iziplongestobject;
+
+static PyTypeObject iziplongest_type;
+
+static PyObject *
+izip_longest_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+       iziplongestobject *lz;
+       Py_ssize_t i;
+       PyObject *ittuple;  /* tuple of iterators */
+       PyObject *result;
+       PyObject *fillvalue = Py_None;
+       PyObject *filler;
+       Py_ssize_t tuplesize = PySequence_Length(args);
+
+        if (kwds != NULL && PyDict_CheckExact(kwds) && PyDict_Size(kwds) > 0) {
+                fillvalue = PyDict_GetItemString(kwds, "fillvalue");
+                if (fillvalue == NULL  ||  PyDict_Size(kwds) > 1) {
+                        PyErr_SetString(PyExc_TypeError,
+                               "izip_longest() got an unexpected keyword argument");
+                        return NULL;                      
+                }
+        }
+
+       /* args must be a tuple */
+       assert(PyTuple_Check(args));
+
+       /* obtain iterators */
+       ittuple = PyTuple_New(tuplesize);
+       if (ittuple == NULL)
+               return NULL;
+       for (i=0; i < tuplesize; ++i) {
+               PyObject *item = PyTuple_GET_ITEM(args, i);
+               PyObject *it = PyObject_GetIter(item);
+               if (it == NULL) {
+                       if (PyErr_ExceptionMatches(PyExc_TypeError))
+                               PyErr_Format(PyExc_TypeError,
+                                   "izip_longest argument #%zd must support iteration",
+                                   i+1);
+                       Py_DECREF(ittuple);
+                       return NULL;
+               }
+               PyTuple_SET_ITEM(ittuple, i, it);
+       }
+
+       filler = PyObject_CallFunctionObjArgs((PyObject *)(&repeat_type), fillvalue, NULL);
+       if (filler == NULL) {
+               Py_DECREF(ittuple);
+               return NULL;
+       }
+
+       /* create a result holder */
+       result = PyTuple_New(tuplesize);
+       if (result == NULL) {
+               Py_DECREF(ittuple);
+               Py_DECREF(filler);
+               return NULL;
+       }
+       for (i=0 ; i < tuplesize ; i++) {
+               Py_INCREF(Py_None);
+               PyTuple_SET_ITEM(result, i, Py_None);
+       }
+
+       /* create iziplongestobject structure */
+       lz = (iziplongestobject *)type->tp_alloc(type, 0);
+       if (lz == NULL) {
+               Py_DECREF(ittuple);
+               Py_DECREF(filler);
+               Py_DECREF(result);
+               return NULL;
+       }
+       lz->ittuple = ittuple;
+       lz->tuplesize = tuplesize;
+       lz->numactive = tuplesize;
+       lz->result = result;
+       Py_INCREF(fillvalue);
+       lz->fillvalue = fillvalue;
+       Py_INCREF(filler);
+       lz->filler = filler;                    /* XXX */
+       return (PyObject *)lz;
+}
+
+static void
+izip_longest_dealloc(iziplongestobject *lz)
+{
+       PyObject_GC_UnTrack(lz);
+       Py_XDECREF(lz->ittuple);
+       Py_XDECREF(lz->result);
+       Py_XDECREF(lz->fillvalue);
+       Py_XDECREF(lz->filler);
+       lz->ob_type->tp_free(lz);
+}
+
+static int
+izip_longest_traverse(iziplongestobject *lz, visitproc visit, void *arg)
+{
+       Py_VISIT(lz->ittuple);
+       Py_VISIT(lz->result);
+       Py_VISIT(lz->fillvalue);
+       Py_VISIT(lz->filler);
+       return 0;
+}
+
+static PyObject *
+izip_longest_next(iziplongestobject *lz)
+{
+       Py_ssize_t i;
+       Py_ssize_t tuplesize = lz->tuplesize;
+       PyObject *result = lz->result;
+       PyObject *it;
+       PyObject *item;
+       PyObject *olditem;
+
+       if (tuplesize == 0)
+               return NULL;
+       if (result->ob_refcnt == 1) {
+               Py_INCREF(result);
+               for (i=0 ; i < tuplesize ; i++) {
+                       it = PyTuple_GET_ITEM(lz->ittuple, i);
+                       assert(PyIter_Check(it));
+                       item = (*it->ob_type->tp_iternext)(it);
+                       if (item == NULL) {
+                               if (lz->numactive <= 1) {
+                                       Py_DECREF(result);
+                                       return NULL;
+                               } else {
+                                       Py_INCREF(lz->filler);
+                                       PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
+                                       Py_INCREF(lz->fillvalue);
+                                       item = lz->fillvalue;
+                                       Py_DECREF(it);
+                                       lz->numactive -= 1;
+                               }
+                       }
+                       olditem = PyTuple_GET_ITEM(result, i);
+                       PyTuple_SET_ITEM(result, i, item);
+                       Py_DECREF(olditem);
+               }
+       } else {
+               result = PyTuple_New(tuplesize);
+               if (result == NULL)
+                       return NULL;
+               for (i=0 ; i < tuplesize ; i++) {
+                       it = PyTuple_GET_ITEM(lz->ittuple, i);
+                       assert(PyIter_Check(it));
+                       item = (*it->ob_type->tp_iternext)(it);
+                       if (item == NULL) {
+                               if (lz->numactive <= 1) {
+                                       Py_DECREF(result);
+                                       return NULL;
+                               } else {
+                                       Py_INCREF(lz->filler);
+                                       PyTuple_SET_ITEM(lz->ittuple, i, lz->filler);
+                                       Py_INCREF(lz->fillvalue);
+                                       item = lz->fillvalue;
+                                       Py_DECREF(it);
+                                       lz->numactive -= 1;
+                               }
+                       }
+                       PyTuple_SET_ITEM(result, i, item);
+               }
+       }
+       return result;
+}
+
+PyDoc_STRVAR(izip_longest_doc,
+"izip_longest(iter1 [,iter2 [...]], [fillvalue=None]) --> izip_longest object\n\
+\n\
+Return an izip_longest object whose .next() method returns a tuple where\n\
+the i-th element comes from the i-th iterable argument.  The .next()\n\
+method continues until the longest iterable in the argument sequence\n\
+is exhausted and then it raises StopIteration.  When the shorter iterables\n\
+are exhausted, the fillvalue is substituted in their place.  The fillvalue\n\
+defaults to None or can be specified by a keyword argument.\n\
+");
+
+static PyTypeObject iziplongest_type = {
+       PyObject_HEAD_INIT(NULL)
+       0,                              /* ob_size */
+       "itertools.izip_longest",       /* tp_name */
+       sizeof(iziplongestobject),      /* tp_basicsize */
+       0,                              /* tp_itemsize */
+       /* methods */
+       (destructor)izip_longest_dealloc,       /* tp_dealloc */
+       0,                              /* tp_print */
+       0,                              /* tp_getattr */
+       0,                              /* tp_setattr */
+       0,                              /* tp_compare */
+       0,                              /* tp_repr */
+       0,                              /* tp_as_number */
+       0,                              /* tp_as_sequence */
+       0,                              /* tp_as_mapping */
+       0,                              /* tp_hash */
+       0,                              /* tp_call */
+       0,                              /* tp_str */
+       PyObject_GenericGetAttr,        /* tp_getattro */
+       0,                              /* tp_setattro */
+       0,                              /* tp_as_buffer */
+       Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+               Py_TPFLAGS_BASETYPE,    /* tp_flags */
+       izip_longest_doc,                       /* tp_doc */
+       (traverseproc)izip_longest_traverse,    /* tp_traverse */
+       0,                              /* tp_clear */
+       0,                              /* tp_richcompare */
+       0,                              /* tp_weaklistoffset */
+       PyObject_SelfIter,              /* tp_iter */
+       (iternextfunc)izip_longest_next,        /* tp_iternext */
+       0,                              /* tp_methods */
+       0,                              /* tp_members */
+       0,                              /* tp_getset */
+       0,                              /* tp_base */
+       0,                              /* tp_dict */
+       0,                              /* tp_descr_get */
+       0,                              /* tp_descr_set */
+       0,                              /* tp_dictoffset */
+       0,                              /* tp_init */
+       0,                              /* tp_alloc */
+       izip_longest_new,                       /* tp_new */
+       PyObject_GC_Del,                /* tp_free */
+};
 
 /* module level code ********************************************************/
 
@@ -2485,6 +2717,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
 \n\
 Iterators terminating on the shortest input sequence:\n\
 izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
+izip_longest(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
 ifilter(pred, seq) --> elements of seq where pred(elem) is True\n\
 ifilterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
 islice(seq, [start,] stop [, step]) --> elements from\n\
@@ -2522,6 +2755,7 @@ inititertools(void)
                &ifilterfalse_type,
                &count_type,
                &izip_type,
+               &iziplongest_type,                
                &repeat_type,
                &groupby_type,
                NULL