A number of tools in Python accept key functions to control how elements
are ordered or grouped. They include :func:`min`, :func:`max`,
- :func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`,
- :func:`heapq.nlargest`, and :func:`itertools.groupby`.
+ :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`,
+ :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and
+ :func:`itertools.groupby`.
There are several ways to create a key function. For example. the
:meth:`str.lower` method can serve as a key function for case insensitive
- sorts. Alternatively, an ad-hoc key function can be built from a
+ sorts. Alternatively, a key function can be built from a
:keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also,
the :mod:`operator` module provides three key function constructors:
:func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and
The module also offers three general purpose functions based on heaps.
-.. function:: merge(*iterables)
+.. function:: merge(*iterables, key=None, reverse=False)
Merge multiple sorted inputs into a single sorted output (for example, merge
timestamped entries from multiple log files). Returns an :term:`iterator`
not pull the data into memory all at once, and assumes that each of the input
streams is already sorted (smallest to largest).
+ Has two optional arguments which must be specified as keyword arguments.
+
+ *key* specifies a :term:`key function` of one argument that is used to
+ extract a comparison key from each input element. The default value is
+ ``None`` (compare the elements directly).
+
+ *reverse* is a boolean value. If set to ``True``, then the input elements
+ are merged as if each comparison were reversed.
+
+ .. versionchanged:: 3.5
+ Added the optional *key* and *reverse* parameters.
+
.. function:: nlargest(n, iterable, key=None)
for i in reversed(range(n//2)):
_siftup(x, i)
+def _heappop_max(heap):
+ """Maxheap version of a heappop."""
+ lastelt = heap.pop() # raises appropriate IndexError if heap is empty
+ if heap:
+ returnitem = heap[0]
+ heap[0] = lastelt
+ _siftup_max(heap, 0)
+ return returnitem
+ return lastelt
+
def _heapreplace_max(heap, item):
"""Maxheap version of a heappop followed by a heappush."""
returnitem = heap[0] # raises appropriate IndexError if heap is empty
except ImportError:
pass
-def merge(*iterables):
+def merge(*iterables, key=None, reverse=False):
'''Merge multiple sorted inputs into a single sorted output.
Similar to sorted(itertools.chain(*iterables)) but returns a generator,
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+ If *key* is not None, applies a key function to each element to determine
+ its sort order.
+
+ >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
+ ['dog', 'cat', 'fish', 'horse', 'kangaroo']
+
'''
h = []
h_append = h.append
+
+ if reverse:
+ _heapify = _heapify_max
+ _heappop = _heappop_max
+ _heapreplace = _heapreplace_max
+ direction = -1
+ else:
+ _heapify = heapify
+ _heappop = heappop
+ _heapreplace = heapreplace
+ direction = 1
+
+ if key is None:
+ for order, it in enumerate(map(iter, iterables)):
+ try:
+ next = it.__next__
+ h_append([next(), order * direction, next])
+ except StopIteration:
+ pass
+ _heapify(h)
+ while len(h) > 1:
+ try:
+ while True:
+ value, order, next = s = h[0]
+ yield value
+ s[0] = next() # raises StopIteration when exhausted
+ _heapreplace(h, s) # restore heap condition
+ except StopIteration:
+ _heappop(h) # remove empty iterator
+ if h:
+ # fast case when only a single iterator remains
+ value, order, next = h[0]
+ yield value
+ yield from next.__self__
+ return
+
for order, it in enumerate(map(iter, iterables)):
try:
next = it.__next__
- h_append([next(), order, next])
+ value = next()
+ h_append([key(value), order * direction, value, next])
except StopIteration:
pass
- heapify(h)
-
- _heapreplace = heapreplace
+ _heapify(h)
while len(h) > 1:
try:
while True:
- value, order, next = s = h[0]
+ key_value, order, value, next = s = h[0]
yield value
- s[0] = next() # raises StopIteration when exhausted
- _heapreplace(h, s) # restore heap condition
+ value = next()
+ s[0] = key(value)
+ s[2] = value
+ _heapreplace(h, s)
except StopIteration:
- heappop(h) # remove empty iterator
+ _heappop(h)
if h:
- # fast case when only a single iterator remains
- value, order, next = h[0]
+ key_value, order, value, next = h[0]
yield value
yield from next.__self__
from test import support
from unittest import TestCase, skipUnless
+from operator import itemgetter
py_heapq = support.import_fresh_module('heapq', blocked=['_heapq'])
c_heapq = support.import_fresh_module('heapq', fresh=['_heapq'])
def test_merge(self):
inputs = []
- for i in range(random.randrange(5)):
- row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
+ for i in range(random.randrange(25)):
+ row = []
+ for j in range(random.randrange(100)):
+ tup = random.choice('ABC'), random.randrange(-500, 500)
+ row.append(tup)
inputs.append(row)
- self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs)))
- self.assertEqual(list(self.module.merge()), [])
+
+ for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]:
+ for reverse in [False, True]:
+ seqs = []
+ for seq in inputs:
+ seqs.append(sorted(seq, key=key, reverse=reverse))
+ self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse),
+ list(self.module.merge(*seqs, key=key, reverse=reverse)))
+ self.assertEqual(list(self.module.merge()), [])
def test_merge_does_not_suppress_index_error(self):
# Issue 19018: Heapq.merge suppresses IndexError from user generator
error bubble up as this "bad data" appears in many real world zip files in
the wild and is ignored by other zip tools.
+- Issue #13742: Added "key" and "reverse" parameters to heapq.merge().
+ (First draft of patch contributed by Simon Sapin.)
+
- Issue #21402: tkinter.ttk now works when default root window is not set.
- Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by