Implement random.sample() using sets instead of dicts.

author Raymond Hettinger <python@rcn.com>

Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)

committer Raymond Hettinger <python@rcn.com>

Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)
author Raymond Hettinger <python@rcn.com>
Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)
committer Raymond Hettinger <python@rcn.com>
Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)
diff --git a/Lib/random.py b/Lib/random.py

index 80af32b0f1f116178206a60a09e80cd616983ee6..3e6941e17828a5c1d125ce3b40ebe58a240a797e 100644 (file)
--- a/Lib/random.py
+++ b/Lib/random.py
@@ -41,7 +41,7 @@ General notes on the underlying Mersenne Twister core generator:
  
  from warnings import warn as _warn
  from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
-from math import log as _log, exp as _exp, pi as _pi, e as _e
+from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
  from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
  from os import urandom as _urandom
  from binascii import hexlify as _hexlify
@@ -286,15 +286,14 @@ class Random(_random.Random):
          """
  
          # Sampling without replacement entails tracking either potential
-        # selections (the pool) in a list or previous selections in a
-        # dictionary.
+        # selections (the pool) in a list or previous selections in a set.
  
          # When the number of selections is small compared to the
          # population, then tracking selections is efficient, requiring
-        # only a small dictionary and an occasional reselection.  For
+        # only a small set and an occasional reselection.  For
          # a larger number of selections, the pool tracking method is
          # preferred since the list takes less space than the
-        # dictionary and it doesn't suffer from frequent reselections.
+        # set and it doesn't suffer from frequent reselections.
  
          n = len(population)
          if not 0 <= k <= n:
@@ -302,7 +301,10 @@ class Random(_random.Random):
          random = self.random
          _int = int
          result = [None] * k
-        if n < 6 * k:     # if n len list takes less space than a k len dict
+        setsize = 21        # size of a small set minus size of an empty list
+        if k > 5:
+              setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
+        if n <= setsize:    # is an n-length list smaller than a k-length set
              pool = list(population)
              for i in xrange(k):         # invariant:  non-selected at [0,n-i)
                  j = _int(random() * (n-i))
@@ -311,14 +313,16 @@ class Random(_random.Random):
          else:
              try:
                  n > 0 and (population[0], population[n//2], population[n-1])
-            except (TypeError, KeyError):   # handle sets and dictionaries
+            except (TypeError, KeyError):   # handle non-sequence iterables
                  population = tuple(population)
-            selected = {}
+            selected = set()
+            selected_add = selected.add
              for i in xrange(k):
                  j = _int(random() * n)
                  while j in selected:
                      j = _int(random() * n)
-                result[i] = selected[j] = population[j]
+                selected_add(j)
+                result[i] = population[j]
          return result
  
  ## -------------------- real-valued distributions  -------------------
author	Raymond Hettinger <python@rcn.com>
	Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)
committer	Raymond Hettinger <python@rcn.com>
	Fri, 19 Aug 2005 01:36:35 +0000 (01:36 +0000)