From aaa6e630f8440c7a29621863df35fb4c427e5968 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 13 Jan 2009 01:05:03 +0000 Subject: [PATCH] Simplify Counter() API. Replace items keyword argument with a mapping. Makes Counter() idempotent, makes update() API the same as Counter.__init__(), makes a more readable repr, makes the API more dict-like, and allows Steven Bethard's update() example to work. --- Doc/library/collections.rst | 32 ++++++++++++------------ Lib/collections.py | 48 +++++++++++++++++++----------------- Lib/test/test_collections.py | 8 +++--- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 4aced1e500..e7cced105a 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -161,12 +161,12 @@ A counter tool is provided to support convenient and rapid tallies. For example:: # Tally repeated words in a list - >>> words = ['red', 'blue', 'red', 'green', 'blue', blue'] + >>> words = ['red', 'blue', 'red', 'green', 'blue', 'blue'] >>> cnt = Counter() >>> for word in words: ... cnt[word] += 1 >>> cnt - Counter(items=[('blue', 3), ('red', 2), ('green', 1)]) + Counter({'blue': 3, 'red': 2, 'green': 1}) # Find the ten most common words in Hamlet >>> import re @@ -175,21 +175,20 @@ For example:: [('the', 1143), ('and', 966), ('to', 762), ('of', 669), ('i', 631), ('you', 554), ('a', 546), ('my', 514), ('hamlet', 471), ('in', 451)] -.. class:: Counter([iterable[, items]]) +.. class:: Counter([iterable]) A :class:`Counter` is a :class:`dict` subclass for counting hashable items. - Elements are stored as dictionary keys and their counts are stored as - dictionary values. Counts are allowed to be any integer value including - zero or negative counts. The :class:`Counter` class is similar to bags - or multisets in other languages. + It is an unordered collection where elements are stored as dictionary keys + and their counts are stored as dictionary values. Counts are allowed to be + any integer value including zero or negative counts. The :class:`Counter` + class is similar to bags or multisets in other languages. Elements are counted from the *iterable* if given. Also, the counts - can be initialized from an *items* list of *(element, count)* pairs. - If provided, *items* must be a keyword argument:: + can be initialized from another mapping of elements to their counts:: >>> c = Counter() # a new, empty counter >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter(items=[('a', 4), ('b', 2)]) # a new counter from an items list + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping The returned object has a dictionary style interface except that it returns a zero count for missing items (instead of raising a :exc:`KeyError` like a @@ -222,7 +221,7 @@ For example:: Elements are returned in arbitrary order. If an element's count has been set to zero or a negative number, :meth:`elements` will ignore it. - >>> c = Counter(items=[('a', 4), ('b', 2), ('d', 0), ('e', -2)]) + >>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2}) >>> list(c.elements()) ['a', 'a', 'a', 'a', 'b', 'b'] @@ -245,19 +244,20 @@ For example:: There is no equivalent class method for :class:`Counter` objects. Raises a :exc:`NotImplementedError` when called. - .. method:: update(mapping) + .. method:: update(iterable) Like :meth:`dict.update` but adds-in counts instead of replacing them. - Used for combining two independent counts. Accepts a *mapping* object - which can be another counter or can be a :class:`dict` that maps - elements to element counts:: + + Elements are counted from the *iterable* if given. Also, the counts + can be taken from another counter or mapping of elements to their + counts:: >>> c = Counter('which') # count letters in a word >>> d = Counter('witch') # count letters in another word >>> c.update(d) # add counts from d to those in c >>> c['h'] # count of 'h' is now three 3 - >>> c.update(Counter('watch')) # add in letters from another word + >>> c.update('watch') # add in letters from another word >>> c['h'] # count of 'h' is now four 4 diff --git a/Lib/collections.py b/Lib/collections.py index ff498442b0..349f884b46 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -167,21 +167,17 @@ class Counter(dict): # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 - def __init__(self, iterable=None, items=None): + def __init__(self, iterable=None): '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from an items list - of (element, count) pairs. + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. - >>> c = Counter('hocus pocus') # count elements in an iterable - >>> c = Counter(items=[('a', 4), ('b', 2)]) # take counts from an items list + >>> c = Counter() # a new, empty counter + >>> c = Counter('hocus pocus') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping ''' - if iterable is not None: - for elem in iterable: - self[elem] += 1 - if items is not None: - for elem, count in items: - self[elem] += count + self.update(iterable) def __missing__(self, key): 'The count of elements not in the Counter is zero.' @@ -210,7 +206,7 @@ class Counter(dict): # Knuth's example of prime factors of 1836: 2**2 * 3**3 * 17**1 >>> import operator - >>> prime_factors = Counter(items=[(2,2), (3,3), (17,1)]) + >>> prime_factors = Counter(dict([(2,2), (3,3), (17,1)])) >>> sorted(prime_factors.elements()) # list individual factors [2, 2, 3, 3, 3, 17] >>> reduce(operator.mul, prime_factors.elements(), 1) # multiply them @@ -234,16 +230,19 @@ class Counter(dict): raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - def update(self, mapping): + def update(self, iterable=None): '''Like dict.update() but add counts instead of replacing them. - Source can be another dictionary or a Counter.instance(). + Source can be an iterable, a dictionary, or another Counter.instance(). >>> c = Counter('which') >>> d = Counter('witch') - >>> c.update(d) # Add counts from d to those in c - >>> c['h'] # Count of 'h' is now three + >>> c.update(d) # add counts from d to those in c + >>> c['h'] # count of 'h' is now three 3 + >>> c.update('watch') + >>> c['h'] + 4 ''' # The regular dict.update() operation makes no sense here because the @@ -254,19 +253,24 @@ class Counter(dict): # multisets and implement the union-add operation discussed in # TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for # multisets calls that operation a sum or join. - for elem, count in mapping.iteritems(): - self[elem] += count + + if iterable is not None: + if isinstance(iterable, Mapping): + for elem, count in iterable.iteritems(): + self[elem] += count + else: + for elem in iterable: + self[elem] += 1 def copy(self): 'Like dict.copy() but returns a Counter instance instead of a dict.' - c = Counter() - c.update(self) - return c + return Counter(self) def __repr__(self): if not self: return '%s()' % self.__class__.__name__ - return '%s(items=%r)' % (self.__class__.__name__, self.most_common()) + items = ', '.join('%r: %r' % item for item in self.most_common()) + return '%s({%s})' % (self.__class__.__name__, items) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 00882e21fe..80ee2c58b1 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -370,8 +370,7 @@ class TestCounter(unittest.TestCase): self.assertEqual(c.get('b', 10), 2) self.assertEqual(c.get('z', 10), 10) self.assertEqual(c, dict(a=3, b=2, c=1)) - self.assertEqual(repr(c), - "Counter(items=[('a', 3), ('b', 2), ('c', 1)])") + self.assertEqual(repr(c), "Counter({'a': 3, 'b': 2, 'c': 1})") self.assertEqual(c.most_common(), [('a', 3), ('b', 2), ('c', 1)]) for i in range(5): self.assertEqual(c.most_common(i), @@ -396,8 +395,8 @@ class TestCounter(unittest.TestCase): self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc') self.assertRaises(TypeError, hash, c) c.update(dict(a=5, b=3, c=1)) - c.update(Counter(items=[('a', 50), ('b', 30)])) - c.__init__(items=[('a', 500), ('b', 300)]) + c.update(Counter('a' * 50 + 'b' * 30)) + c.__init__('a' * 500 + 'b' * 300) c.__init__('cdc') self.assertEqual(c, dict(a=555, b=333, c=3, d=1)) self.assertEqual(c.setdefault('d', 5), 1) @@ -425,6 +424,7 @@ class TestCounter(unittest.TestCase): cPickle.loads(cPickle.dumps(words, -1)), eval(repr(words)), update_test, + Counter(words), ]): msg = (i, dup, words) self.assert_(dup is not words) -- 2.50.1