Issue 10586: change the new functools.lru_cache implementation to expose the maximum...

author Nick Coghlan <ncoghlan@gmail.com>

Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)

committer Nick Coghlan <ncoghlan@gmail.com>

Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)
author Nick Coghlan <ncoghlan@gmail.com>
Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)
committer Nick Coghlan <ncoghlan@gmail.com>
Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)
diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst

index 82238fda72bcea07d1d01b4fe1238760a898de9f..a8fc8569a1dbc9d7d624c0975931b7b629e6b7c0 100644 (file)
--- a/Doc/library/functools.rst
+++ b/Doc/library/functools.rst
@@ -52,10 +52,16 @@ The :mod:`functools` module defines the following functions:
     results, the positional and keyword arguments to the function must be
     hashable.
  
-   The wrapped function is instrumented with two attributes, :attr:`cache_hits`
-   and :attr:`cache_misses` which count the number of successful or unsuccessful
-   cache lookups.  These statistics are helpful for tuning the *maxsize*
-   parameter and for measuring the cache's effectiveness.
+   The wrapped function is instrumented with a :attr:`cache_info` attribute that
+   can be called to retrieve a named tuple with the following fields:
+
+      - :attr:`maxsize`: maximum cache size (as set by the *maxsize* parameter)
+      - :attr:`size`: current number of entries in the cache
+      - :attr:`hits`: number of successful cache lookups
+      - :attr:`misses`: number of unsuccessful cache lookups.
+
+   These statistics are helpful for tuning the *maxsize* parameter and for measuring
+   the effectiveness of the cache.
  
     The wrapped function also has a :attr:`cache_clear` attribute which can be
     called (with no arguments) to clear the cache.
diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst

index 1edbf5095ab56fadcfb113b4beedea25861e19e4..91ab849a65ac125be17ebb8f73422e8113396927 100644 (file)
--- a/Doc/whatsnew/3.2.rst
+++ b/Doc/whatsnew/3.2.rst
@@ -332,6 +332,8 @@ New, Improved, and Deprecated Modules
           c.execute('SELECT phonenumber FROM phonelist WHERE name=?', (name,))
           return c.fetchone()[0]
  
+  XXX: update for Issue 10586 changes to cache statistics API
+
    To help with choosing an effective cache size, the wrapped function is
    instrumented with two attributes *cache_hits* and *cache_misses*:
  
diff --git a/Lib/functools.py b/Lib/functools.py

index c1fa170d72f6abfff9534b25332b47144d6b84b2..c223a629d03f86db3a74dcf794d038602619c9ca 100644 (file)
--- a/Lib/functools.py
+++ b/Lib/functools.py
@@ -12,7 +12,7 @@ __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES',
             'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial']
  
  from _functools import partial, reduce
-from collections import OrderedDict
+from collections import OrderedDict, namedtuple
  try:
      from _thread import allocate_lock as Lock
  except:
@@ -114,12 +114,15 @@ def cmp_to_key(mycmp):
              raise TypeError('hash not implemented')
      return K
  
+_CacheInfo = namedtuple("CacheInfo", "maxsize, size, hits, misses")
+
  def lru_cache(maxsize=100):
      """Least-recently-used cache decorator.
  
      Arguments to the cached function must be hashable.
  
-    Performance statistics stored in f.cache_hits and f.cache_misses.
+    Significant statistics (maxsize, size, hits, misses) are
+      available through the f.cache_info() named tuple.
      Clear the cache and statistics using f.cache_clear().
      The underlying function is stored in f.__wrapped__.
  
@@ -127,7 +130,7 @@ def lru_cache(maxsize=100):
  
      """
      # Users should only access the lru_cache through its public API:
-    #   cache_hits, cache_misses, cache_clear(), and __wrapped__
+    #   cache_info, cache_clear, and f.__wrapped__
      # The internals of the lru_cache are encapsulated for thread safety and
      # to allow the implementation to change (including a possible C version).
  
@@ -137,11 +140,13 @@ def lru_cache(maxsize=100):
          cache = OrderedDict()           # ordered least recent to most recent
          cache_popitem = cache.popitem
          cache_renew = cache.move_to_end
+        hits = misses = 0
          kwd_mark = object()             # separate positional and keyword args
          lock = Lock()
  
          @wraps(user_function)
          def wrapper(*args, **kwds):
+            nonlocal hits, misses
              key = args
              if kwds:
                  key += (kwd_mark,) + tuple(sorted(kwds.items()))
@@ -149,23 +154,29 @@ def lru_cache(maxsize=100):
                  with lock:
                      result = cache[key]
                      cache_renew(key)            # record recent use of this key
-                    wrapper.cache_hits += 1
+                    hits += 1
              except KeyError:
                  result = user_function(*args, **kwds)
                  with lock:
                      cache[key] = result         # record recent use of this key
-                    wrapper.cache_misses += 1
+                    misses += 1
                      if len(cache) > maxsize:
                          cache_popitem(0)        # purge least recently used cache entry
              return result
  
+        def cache_info():
+            """Report significant cache statistics"""
+            with lock:
+                return _CacheInfo(maxsize, len(cache), hits, misses)
+
          def cache_clear():
              """Clear the cache and cache statistics"""
+            nonlocal hits, misses
              with lock:
                  cache.clear()
-                wrapper.cache_hits = wrapper.cache_misses = 0
+                hits = misses = 0
  
-        wrapper.cache_hits = wrapper.cache_misses = 0
+        wrapper.cache_info = cache_info
          wrapper.cache_clear = cache_clear
          return wrapper
  
diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py

index 65ef9b4a7f43e1c53e45a7bb574061a37bbb1277..c877f889d8636d8fd464654c572b0cd0e3874d0e 100644 (file)
--- a/Lib/test/test_functools.py
+++ b/Lib/test/test_functools.py
@@ -501,6 +501,11 @@ class TestLRU(unittest.TestCase):
          def orig(x, y):
              return 3*x+y
          f = functools.lru_cache(maxsize=20)(orig)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(maxsize, 20)
+        self.assertEqual(currsize, 0)
+        self.assertEqual(hits, 0)
+        self.assertEqual(misses, 0)
  
          domain = range(5)
          for i in range(1000):
@@ -508,21 +513,29 @@ class TestLRU(unittest.TestCase):
              actual = f(x, y)
              expected = orig(x, y)
              self.assertEqual(actual, expected)
-        self.assertTrue(f.cache_hits > f.cache_misses)
-        self.assertEqual(f.cache_hits + f.cache_misses, 1000)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertTrue(hits > misses)
+        self.assertEqual(hits + misses, 1000)
+        self.assertEqual(currsize, 20)
  
          f.cache_clear()   # test clearing
-        self.assertEqual(f.cache_hits, 0)
-        self.assertEqual(f.cache_misses, 0)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 0)
+        self.assertEqual(misses, 0)
+        self.assertEqual(currsize, 0)
          f(x, y)
-        self.assertEqual(f.cache_hits, 0)
-        self.assertEqual(f.cache_misses, 1)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 0)
+        self.assertEqual(misses, 1)
+        self.assertEqual(currsize, 1)
  
          # Test bypassing the cache
          self.assertIs(f.__wrapped__, orig)
          f.__wrapped__(x, y)
-        self.assertEqual(f.cache_hits, 0)
-        self.assertEqual(f.cache_misses, 1)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 0)
+        self.assertEqual(misses, 1)
+        self.assertEqual(currsize, 1)
  
          # test size zero (which means "never-cache")
          @functools.lru_cache(0)
@@ -530,10 +543,15 @@ class TestLRU(unittest.TestCase):
              nonlocal f_cnt
              f_cnt += 1
              return 20
+        self.assertEqual(f.cache_info().maxsize, 0)
          f_cnt = 0
          for i in range(5):
              self.assertEqual(f(), 20)
          self.assertEqual(f_cnt, 5)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 0)
+        self.assertEqual(misses, 5)
+        self.assertEqual(currsize, 0)
  
          # test size one
          @functools.lru_cache(1)
@@ -541,10 +559,15 @@ class TestLRU(unittest.TestCase):
              nonlocal f_cnt
              f_cnt += 1
              return 20
+        self.assertEqual(f.cache_info().maxsize, 1)
          f_cnt = 0
          for i in range(5):
              self.assertEqual(f(), 20)
          self.assertEqual(f_cnt, 1)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 4)
+        self.assertEqual(misses, 1)
+        self.assertEqual(currsize, 1)
  
          # test size two
          @functools.lru_cache(2)
@@ -552,11 +575,16 @@ class TestLRU(unittest.TestCase):
              nonlocal f_cnt
              f_cnt += 1
              return x*10
+        self.assertEqual(f.cache_info().maxsize, 2)
          f_cnt = 0
          for x in 7, 9, 7, 9, 7, 9, 8, 8, 8, 9, 9, 9, 8, 8, 8, 7:
              #    *  *              *                          *
              self.assertEqual(f(x), x*10)
          self.assertEqual(f_cnt, 4)
+        maxsize, currsize, hits, misses = f.cache_info()
+        self.assertEqual(hits, 12)
+        self.assertEqual(misses, 4)
+        self.assertEqual(currsize, 2)
  
  def test_main(verbose=None):
      test_classes = (
diff --git a/Misc/NEWS b/Misc/NEWS

index 320aaaa4a897711be5e7c0f55f612b3a1a4cec14..f39b2e2d9061392701cfabb0beb75b4ddba963c6 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -43,6 +43,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #10586: The statistics API for the new functools.lru_cache has
+  been changed to a single cache_info() method returning a named tuple.
+
  - Issue #10323: itertools.islice() now consumes the minimum number of
    inputs before stopping.  Formerly, the final state of the underlying
    iterator was undefined.
author	Nick Coghlan <ncoghlan@gmail.com>
	Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)
committer	Nick Coghlan <ncoghlan@gmail.com>
	Tue, 30 Nov 2010 06:19:46 +0000 (06:19 +0000)
Doc/library/functools.rst		patch \| blob \| history
Doc/whatsnew/3.2.rst		patch \| blob \| history
Lib/functools.py		patch \| blob \| history
Lib/test/test_functools.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history