Issue #13703: add a way to randomize the hash values of basic types (str, bytes,...

author Georg Brandl <georg@python.org>

Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)

committer Georg Brandl <georg@python.org>

Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)
author Georg Brandl <georg@python.org>
Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)
committer Georg Brandl <georg@python.org>
Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst

index bb9f92058789bb5c51cbc463af8b7cec513ffbf1..95947560f21ce8e987a0391b01186cd267bb4a27 100644 (file)
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -220,8 +220,12 @@ always available.
     :const:`ignore_environment`   :option:`-E`
     :const:`verbose`              :option:`-v`
     :const:`bytes_warning`        :option:`-b`
+   :const:`hash_randomization`   :option:`-R`
     ============================= =============================
  
+   .. versionadded:: 3.1.5
+      The ``hash_randomization`` attribute.
+
  
  .. data:: float_info
  
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst

index 6f874b6764a5312410370c1c384c4d3748f50df0..7ded851ab6f2073a81ef43eb81f77430ce01e887 100644 (file)
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -1265,6 +1265,8 @@ Basic customization
     inheritance of :meth:`__hash__` will be blocked, just as if :attr:`__hash__`
     had been explicitly set to :const:`None`.
  
+   See also the :option:`-R` command-line option.
+
  
  .. method:: object.__bool__(self)
  
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst

index 3fe0c7aedd3beb1afc1d2dc1fc1aef4a33128ac5..11e2d7d9c0c8c0524ee1ce03eab74dab18b667f8 100644 (file)
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -21,7 +21,7 @@ Command line
  
  When invoking Python, you may specify any of these options::
  
-    python [-bBdEhiOsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+    python [-bBdEhiORsSuvVWx?] [-c command | -m module-name | script | - ] [args]
  
  The most common use case is, of course, a simple invocation of a script::
  
@@ -215,6 +215,29 @@ Miscellaneous options
     Discard docstrings in addition to the :option:`-O` optimizations.
  
  
+.. cmdoption:: -R
+
+   Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes
+   and datetime objects are "salted" with an unpredictable random value.
+   Although they remain constant within an individual Python process, they are
+   not predictable between repeated invocations of Python.
+
+   This is intended to provide protection against a denial-of-service caused by
+   carefully-chosen inputs that exploit the worst case performance of a dict
+   insertion, O(n^2) complexity.  See
+   http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+   Changing hash values affects the order in which keys are retrieved from a
+   dict.  Although Python has never made guarantees about this ordering (and it
+   typically varies between 32-bit and 64-bit builds), enough real-world code
+   implicitly relies on this non-guaranteed behavior that the randomization is
+   disabled by default.
+
+   See also :envvar:`PYTHONHASHSEED`.
+
+   .. versionadded:: 3.1.5
+
+
  .. cmdoption:: -s
  
     Don't add user site directory to sys.path
@@ -314,6 +337,7 @@ Miscellaneous options
  
     .. note:: The line numbers in error messages will be off by one.
  
+
  Options you shouldn't use
  ~~~~~~~~~~~~~~~~~~~~~~~~~
  
@@ -328,6 +352,7 @@ Options you shouldn't use
      Reserved for alternative implementations of Python to use for their own
      purposes.
  
+
  .. _using-on-envvars:
  
  Environment variables
@@ -435,6 +460,27 @@ These environment variables influence Python's behavior.
     import of source modules.
  
  
+.. envvar:: PYTHONHASHSEED
+
+   If this variable is set to ``random``, the effect is the same as specifying
+   the :option:`-R` option: a random value is used to seed the hashes of str,
+   bytes and datetime objects.
+
+   If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
+   seed for generating the hash() of the types covered by the hash
+   randomization.
+
+   Its purpose is to allow repeatable hashing, such as for selftests for the
+   interpreter itself, or to allow a cluster of python processes to share hash
+   values.
+
+   The integer must be a decimal number in the range [0,4294967295].  Specifying
+   the value 0 will lead to the same hash values as when hash randomization is
+   disabled.
+
+   .. versionadded:: 3.1.5
+
+
  .. envvar:: PYTHONIOENCODING
  
     Overrides the encoding used for stdin/stdout/stderr, in the syntax
diff --git a/Include/object.h b/Include/object.h

index ef73a213d16f1e5aae85e6a32dcc753d34c27605..7848cf4c6ebdf8b89ecf13a62ea8d01e1bd8f528 100644 (file)
--- a/Include/object.h
+++ b/Include/object.h
@@ -473,6 +473,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
  PyAPI_FUNC(long) _Py_HashDouble(double);
  PyAPI_FUNC(long) _Py_HashPointer(void*);
  
+typedef struct {
+    long prefix;
+    long suffix;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
  /* Helper for passing objects to printf and the like */
  #define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
  
diff --git a/Include/pydebug.h b/Include/pydebug.h

index 0a31f5ba01e1aa47aa01b1f845b53d544232c47d..5969e4441959fe4373ae753a11d63d93ff64a46d 100644 (file)
--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -19,6 +19,7 @@ PyAPI_DATA(int) Py_DivisionWarningFlag;
  PyAPI_DATA(int) Py_DontWriteBytecodeFlag;
  PyAPI_DATA(int) Py_NoUserSiteDirectory;
  PyAPI_DATA(int) Py_UnbufferedStdioFlag;
+PyAPI_DATA(int) Py_HashRandomizationFlag;
  
  /* this is a wrapper around getenv() that pays attention to
     Py_IgnoreEnvironmentFlag.  It should be used for getting variables like
diff --git a/Include/pythonrun.h b/Include/pythonrun.h

index 96a0e232f9dadbb96a707da0990a3179bd1fc495..af4aa0877ed9e456f492a77fe8395a2365cb3085 100644 (file)
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -174,6 +174,8 @@ typedef void (*PyOS_sighandler_t)(int);
  PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
  PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
  
+/* Random */
+PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
  
  #ifdef __cplusplus
  }
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py

index 6d88931dcbdce308adbf9e4894d1e64f51db168c..ba2bc1d3426ad269d7cdc6fba61bb6255a758a37 100644 (file)
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -31,7 +31,9 @@ Encoding basic Python object hierarchies::
  Compact encoding::
  
      >>> import json
-    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',', ':'))
+    >>> from collections import OrderedDict
+    >>> mydict = OrderedDict([('4', 5), ('6', 7)])
+    >>> json.dumps([1,2,3,mydict], separators=(',', ':'))
      '[1,2,3,{"4":5,"6":7}]'
  
  Pretty printing::
diff --git a/Lib/os.py b/Lib/os.py

index b46c02f5800fda85e444b1e6ccb405db64010612..8f66472390a3f7f5bcf34181b8e4ed9072775b59 100644 (file)
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -611,23 +611,6 @@ try:
  except NameError: # statvfs_result may not exist
      pass
  
-if not _exists("urandom"):
-    def urandom(n):
-        """urandom(n) -> str
-
-        Return a string of n random bytes suitable for cryptographic use.
-
-        """
-        try:
-            _urandomfd = open("/dev/urandom", O_RDONLY)
-        except (OSError, IOError):
-            raise NotImplementedError("/dev/urandom (or equivalent) not found")
-        bs = b""
-        while len(bs) < n:
-            bs += read(_urandomfd, n - len(bs))
-        close(_urandomfd)
-        return bs
-
  # Supply os.popen()
  def popen(cmd, mode="r", buffering=-1):
      if not isinstance(cmd, str):
diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py

index c34bd59e9cc20b7a8a4f9e4b30249262c8bb4d35..592a78be41b6cd4ee0ebf0fa3c9707dcd0d39359 100644 (file)
--- a/Lib/test/mapping_tests.py
+++ b/Lib/test/mapping_tests.py
@@ -14,7 +14,7 @@ class BasicTestMappingProtocol(unittest.TestCase):
      def _reference(self):
          """Return a dictionary of values which are invariant by storage
          in the object under test."""
-        return {1:2, "key1":"value1", "key2":(1,2,3)}
+        return {"1": "2", "key1":"value1", "key2":(1,2,3)}
      def _empty_mapping(self):
          """Return an empty mapping object"""
          return self.type2test()
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py

index 98d68bd1b483ba81f5061e24d4e4f7a23b58ade5..d203600e295413c336bf735c9a7cec21937cbe1b 100755 (executable)
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -428,6 +428,11 @@ def main(tests=None, testdir=None, verbose=0, quiet=False, generate=False,
          except ValueError:
              print("Couldn't find starting test (%s), using all tests" % start)
      if randomize:
+        hashseed = os.getenv('PYTHONHASHSEED')
+        if not hashseed:
+            os.environ['PYTHONHASHSEED'] = str(random_seed)
+            os.execv(sys.executable, [sys.executable] + sys.argv)
+            return
          random.seed(random_seed)
          print("Using random seed", random_seed)
          random.shuffle(tests)
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py

index 0699cf61a2154065d5645e4f224dc68bdafd9003..fca28d37c36beed997074514bbd78281ce58f96e 100644 (file)
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -3,7 +3,6 @@
  
  import sys
  import os
-import re
  import os.path
  import tempfile
  import subprocess
@@ -19,11 +18,15 @@ def _assert_python(expected_success, *args, **env_vars):
      cmd_line = [sys.executable]
      if not env_vars:
          cmd_line.append('-E')
-    cmd_line.extend(args)
      # Need to preserve the original environment, for in-place testing of
      # shared library builds.
      env = os.environ.copy()
+    # But a special flag that can be set to override -- in this case, the
+    # caller is responsible to pass the full environment.
+    if env_vars.pop('__cleanenv', None):
+        env = {}
      env.update(env_vars)
+    cmd_line.extend(args)
      p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                           env=env)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py

index c4b8be5f6cbf4c8788c78ad79e25b2cba15b866f..eacd7a6ae436969853bd4d2a694889f374dd9938 100644 (file)
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -4,7 +4,6 @@
  
  import os
  import test.support, unittest
-import os
  import sys
  import subprocess
  
@@ -190,6 +189,22 @@ sys.stdout.buffer.write(path)"""
              self.assertTrue(path1.encode('ascii') in stdout)
              self.assertTrue(path2.encode('ascii') in stdout)
  
+    def test_hash_randomization(self):
+        # Verify that -R enables hash randomization:
+        self.verify_valid_flag('-R')
+        hashes = []
+        for i in range(2):
+            code = 'print(hash("spam"))'
+            data, rc = self.start_python_and_exit_code('-R', '-c', code)
+            self.assertEqual(rc, 0)
+            hashes.append(data)
+        self.assertNotEqual(hashes[0], hashes[1])
+
+        # Verify that sys.flags contains hash_randomization
+        code = 'import sys; print("random is", sys.flags.hash_randomization)'
+        data, rc = self.start_python_and_exit_code('-R', '-c', code)
+        self.assertEqual(rc, 0)
+        self.assertIn(b'random is 1', data)
  
  def test_main():
      test.support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py

index 0ce85f0b6975ff81ab3261f0cb596571c27bb256..077f5da5075d3426fa2616625a4b5d90de042c56 100644 (file)
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -4300,8 +4300,18 @@ class DictProxyTests(unittest.TestCase):
  
      def test_repr(self):
          # Testing dict_proxy.__repr__
+        def sorted_dict_repr(repr_):
+            # Given the repr of a dict, sort the keys
+            assert repr_.startswith('{')
+            assert repr_.endswith('}')
+            kvs = repr_[1:-1].split(', ')
+            return '{' + ', '.join(sorted(kvs)) + '}'
          dict_ = {k: v for k, v in self.C.__dict__.items()}
-        self.assertEqual(repr(self.C.__dict__), 'dict_proxy({!r})'.format(dict_))
+        repr_ = repr(self.C.__dict__)
+        self.assert_(repr_.startswith('dict_proxy('))
+        self.assert_(repr_.endswith(')'))
+        self.assertEqual(sorted_dict_repr(repr_[len('dict_proxy('):-len(')')]),
+                         sorted_dict_repr('{!r}'.format(dict_)))
  
  
  class PTypesLongInitTest(unittest.TestCase):
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py

index 569e5e054c4451a0ef3c2fb5bb17a19962e509f1..f5736b2de7fa6af8ff4d95bc99e9a2836ab3c31e 100644 (file)
--- a/Lib/test/test_hash.py
+++ b/Lib/test/test_hash.py
@@ -3,10 +3,16 @@
  #
  # Also test that hash implementations are inherited as expected
  
+import datetime
+import os
+import struct
  import unittest
  from test import support
+from test.script_helper import assert_python_ok
  from collections import Hashable
  
+IS_64BIT = (struct.calcsize('l') == 8)
+
  
  class HashEqualityTestCase(unittest.TestCase):
  
@@ -118,10 +124,92 @@ class HashBuiltinsTestCase(unittest.TestCase):
          for obj in self.hashes_to_check:
              self.assertEqual(hash(obj), _default_hash(obj))
  
+class HashRandomizationTests(unittest.TestCase):
+
+    # Each subclass should define a field "repr_", containing the repr() of
+    # an object to be tested
+
+    def get_hash_command(self, repr_):
+        return 'print(hash(%s))' % repr_
+
+    def get_hash(self, repr_, seed=None):
+        env = os.environ.copy()
+        env['__cleanenv'] = True  # signal to assert_python not to do a copy
+                                  # of os.environ on its own
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        else:
+            env.pop('PYTHONHASHSEED', None)
+        out = assert_python_ok(
+            '-c', self.get_hash_command(repr_),
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash(self.repr_, seed='random')
+        run2 = self.get_hash(self.repr_, seed='random')
+        self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        if IS_64BIT:
+            known_hash_of_obj = 1453079729188098211
+        else:
+            known_hash_of_obj = -1600925533
+
+        # Randomization is disabled by default:
+        self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+        # It can also be disabled by setting the seed to 0:
+        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        # Note that all types share the same values:
+        if IS_64BIT:
+            h = -4410911502303878509
+        else:
+            h = -206076799
+        self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr('abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests):
+    repr_ = repr(b'abc')
+
+    def test_empty_string(self):
+        self.assertEqual(hash(b""), 0)
+
+class DatetimeTests(HashRandomizationTests):
+    def get_hash_command(self, repr_):
+        return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests):
+    repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests):
+    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests):
+    repr_ = repr(datetime.time(0))
+
+
  def test_main():
      support.run_unittest(HashEqualityTestCase,
-                              HashInheritanceTestCase,
-                              HashBuiltinsTestCase)
+                         HashInheritanceTestCase,
+                         HashBuiltinsTestCase,
+                         StrHashRandomizationTests,
+                         BytesHashRandomizationTests,
+                         DatetimeDateTests,
+                         DatetimeDatetimeTests,
+                         DatetimeTimeTests)
  
  
  if __name__ == "__main__":
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py

index cbf0c1647d34dc3e283858bd17019b07b8ef6606..bff4f0bdea090bdddd0f1f3d44a8769ce10899dd 100644 (file)
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -9,6 +9,7 @@ import warnings
  import sys
  import shutil
  from test import support
+from test.script_helper import assert_python_ok
  
  # Detect whether we're on a Linux system that uses the (now outdated
  # and unmaintained) linuxthreads threading library.  There's an issue
@@ -574,14 +575,33 @@ class DevNullTests(unittest.TestCase):
          f.close()
  
  class URandomTests(unittest.TestCase):
-    def test_urandom(self):
-        try:
-            self.assertEqual(len(os.urandom(1)), 1)
-            self.assertEqual(len(os.urandom(10)), 10)
-            self.assertEqual(len(os.urandom(100)), 100)
-            self.assertEqual(len(os.urandom(1000)), 1000)
-        except NotImplementedError:
-            pass
+    def test_urandom_length(self):
+        self.assertEqual(len(os.urandom(0)), 0)
+        self.assertEqual(len(os.urandom(1)), 1)
+        self.assertEqual(len(os.urandom(10)), 10)
+        self.assertEqual(len(os.urandom(100)), 100)
+        self.assertEqual(len(os.urandom(1000)), 1000)
+
+    def test_urandom_value(self):
+        data1 = os.urandom(16)
+        data2 = os.urandom(16)
+        self.assertNotEqual(data1, data2)
+
+    def get_urandom_subprocess(self, count):
+        code = '\n'.join((
+            'import os, sys',
+            'data = os.urandom(%s)' % count,
+            'sys.stdout.buffer.write(data)',
+            'sys.stdout.buffer.flush()'))
+        out = assert_python_ok('-c', code)
+        stdout = out[1]
+        self.assertEqual(len(stdout), 16)
+        return stdout
+
+    def test_urandom_subprocess(self):
+        data1 = self.get_urandom_subprocess(16)
+        data2 = self.get_urandom_subprocess(16)
+        self.assertNotEqual(data1, data2)
  
  class ExecTests(unittest.TestCase):
      @unittest.skipIf(USING_LINUXTHREADS,
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py

index 99d5c70e0a362f3860111abf42823a75a7424843..5d5e2324e9df465c30c4959c42c1424c3b6e59c8 100644 (file)
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -734,6 +734,17 @@ class TestBasicOps(unittest.TestCase):
          if self.repr is not None:
              self.assertEqual(repr(self.set), self.repr)
  
+    def check_repr_against_values(self):
+        text = repr(self.set)
+        self.assertTrue(text.startswith('{'))
+        self.assertTrue(text.endswith('}'))
+
+        result = text[1:-1].split(', ')
+        result.sort()
+        sorted_repr_values = [repr(value) for value in self.values]
+        sorted_repr_values.sort()
+        self.assertEqual(result, sorted_repr_values)
+
      def test_print(self):
          try:
              fo = open(support.TESTFN, "w")
@@ -892,7 +903,9 @@ class TestBasicOpsString(TestBasicOps):
          self.set    = set(self.values)
          self.dup    = set(self.values)
          self.length = 3
-        self.repr   = "{'a', 'c', 'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
  
  #------------------------------------------------------------------------------
  
@@ -903,7 +916,9 @@ class TestBasicOpsBytes(TestBasicOps):
          self.set    = set(self.values)
          self.dup    = set(self.values)
          self.length = 3
-        self.repr   = "{b'a', b'c', b'b'}"
+
+    def test_repr(self):
+        self.check_repr_against_values()
  
  #------------------------------------------------------------------------------
  
@@ -916,11 +931,13 @@ class TestBasicOpsMixedStringBytes(TestBasicOps):
          self.set    = set(self.values)
          self.dup    = set(self.values)
          self.length = 4
-        self.repr   = "{'a', b'a', 'b', b'b'}"
  
      def tearDown(self):
          warnings.filters = self.warning_filters
  
+    def test_repr(self):
+        self.check_repr_against_values()
+
  #==============================================================================
  
  def baditer():
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py

index 11685a420230c8f13bccddcefe4966120d8ed5ec..7732c4c325f35f2fe36b8d62dd75df30743ec48f 100644 (file)
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -446,7 +446,7 @@ class SysModuleTest(unittest.TestCase):
          attrs = ("debug", "division_warning",
                   "inspect", "interactive", "optimize", "dont_write_bytecode",
                   "no_user_site", "no_site", "ignore_environment", "verbose",
-                 "bytes_warning")
+                 "bytes_warning", "hash_randomization")
          for attr in attrs:
              self.assertTrue(hasattr(sys.flags, attr), attr)
              self.assertEqual(type(getattr(sys.flags, attr)), int, attr)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py

index 4d3509ae7d2f1726910745f4a9a9f1a06ed39af8..482acc1c0f224d7fa1f52025ce3e039426053d63 100644 (file)
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -12,6 +12,7 @@ import os
  import sys
  import tempfile
  import warnings
+import collections
  
  def hexescape(char):
      """Escape char as RFC 2396 specifies"""
@@ -840,8 +841,9 @@ class urlencode_Tests(unittest.TestCase):
          self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
          self.assertEqual("a=None&a=a",
                           urllib.parse.urlencode({"a": [None, "a"]}, True))
+        data = collections.OrderedDict([("a", 1), ("b", 1)])
          self.assertEqual("a=a&a=b",
-                         urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
+                         urllib.parse.urlencode({"a": data}, True))
  
      def test_urlencode_encoding(self):
          # ASCII encoding. Expect %3F with errors="replace'
diff --git a/Lib/tkinter/test/test_ttk/test_functions.py b/Lib/tkinter/test/test_ttk/test_functions.py

index df593cd710920aa11e2131e35f784247453a5f85..2303e4cd468950e949dbe27dadfdd69cc30bcab4 100644 (file)
--- a/Lib/tkinter/test/test_ttk/test_functions.py
+++ b/Lib/tkinter/test/test_ttk/test_functions.py
@@ -143,7 +143,7 @@ class InternalFunctionsTest(unittest.TestCase):
              ('a', 'b', 'c')), ("test {a b} c", ()))
          # state spec and options
          self.assertEqual(ttk._format_elemcreate('image', False, 'test',
-            ('a', 'b'), a='x', b='y'), ("test a b", ("-a", "x", "-b", "y")))
+            ('a', 'b'), a='x'), ("test a b", ("-a", "x")))
          # format returned values as a tcl script
          # state spec with multiple states and an option with a multivalue
          self.assertEqual(ttk._format_elemcreate('image', True, 'test',
diff --git a/Makefile.pre.in b/Makefile.pre.in

index ed1dc33509eadbbe37f105dc1f195a2cdfd090af..e4470bdb88da95ddf342c5c8d8befdaf785041a0 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -305,6 +305,7 @@ PYTHON_OBJS=        \
                 Python/pymath.o \
                 Python/pystate.o \
                 Python/pythonrun.o \
+               Python/random.o \
                 Python/structmember.o \
                 Python/symtable.o \
                 Python/sysmodule.o \
diff --git a/Misc/NEWS b/Misc/NEWS

index 6e9569733778e115a572270b68c639c8e0d20308..486da13674d91ea03da37e23dead43d391e2282d 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@ What's New in Python 3.1.5?
  Core and Builtins
  -----------------
  
+- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
+  environment variables, to provide an opt-in way to protect against denial of
+  service attacks due to hash collisions within the dict and set types.  Patch
+  by David Malcolm, based on work by Victor Stinner.
+
  Library
  -------
  
diff --git a/Misc/python.man b/Misc/python.man

index 411a43a3e7710f72a387c69c95859b3457a3ed1d..5b4eeef6546e89f6eb8249f03f17dd82e316d62d 100644 (file)
--- a/Misc/python.man
+++ b/Misc/python.man
@@ -34,6 +34,9 @@ python \- an interpreted, interactive, object-oriented programming language
  .B \-OO
  ]
  [
+.B \-R
+]
+[
  .B -Q
  .I argument
  ]
@@ -145,6 +148,18 @@ to \fI.pyo\fP.  Given twice, causes docstrings to be discarded.
  .B \-OO
  Discard docstrings in addition to the \fB-O\fP optimizations.
  .TP
+.B \-R
+Turn on "hash randomization", so that the hash() values of str, bytes and
+datetime objects are "salted" with an unpredictable pseudo-random value.
+Although they remain constant within an individual Python process, they are
+not predictable between repeated invocations of Python.
+.IP
+This is intended to provide protection against a denial of service
+caused by carefully-chosen inputs that exploit the worst case performance
+of a dict insertion, O(n^2) complexity.  See
+http://www.ocert.org/advisories/ocert-2011-003.html
+for details.
+.TP
  .BI "\-Q " argument
  Division control; see PEP 238.  The argument must be one of "old" (the
  default, int/int and long/long return an int or long), "new" (new
@@ -403,6 +418,20 @@ the \fB\-u\fP option.
  If this is set to a non-empty string it is equivalent to specifying
  the \fB\-v\fP option. If set to an integer, it is equivalent to
  specifying \fB\-v\fP multiple times. 
+.IP PYTHONHASHSEED
+If this variable is set to "random", the effect is the same as specifying
+the \fB-R\fP option: a random value is used to seed the hashes of str,
+bytes and datetime objects.
+
+If PYTHONHASHSEED is set to an integer value, it is used as a fixed seed for
+generating the hash() of the types covered by the hash randomization.  Its
+purpose is to allow repeatable hashing, such as for selftests for the
+interpreter itself, or to allow a cluster of python processes to share hash
+values.
+
+The integer must be a decimal number in the range [0,4294967295].  Specifying
+the value 0 will lead to the same hash values as when hash randomization is
+disabled.
  .SH AUTHOR
  The Python Software Foundation: http://www.python.org/psf
  .SH INTERNET RESOURCES
diff --git a/Modules/datetimemodule.c b/Modules/datetimemodule.c

index 0ac51aaa7ff319c1c81ef3d4f67c42f4d749dc21..f3103eaf69956a93e9ee0492de4f69f0ab22e4b2 100644 (file)
--- a/Modules/datetimemodule.c
+++ b/Modules/datetimemodule.c
@@ -2566,10 +2566,12 @@ generic_hash(unsigned char *data, int len)
      register long x;
  
      p = (unsigned char *) data;
-    x = *p << 7;
+    x = _Py_HashSecret.prefix;
+    x ^= *p << 7;
      while (--len >= 0)
          x = (1000003*x) ^ *p++;
      x ^= len;
+    x ^= _Py_HashSecret.suffix;
      if (x == -1)
          x = -2;
  
diff --git a/Modules/main.c b/Modules/main.c

index eb9bb5451723559372ee145a4344ed73b98edb29..9607cb334db25a74d36b037e43ec334691c79b0e 100644 (file)
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -47,7 +47,7 @@ static wchar_t **orig_argv;
  static int  orig_argc;
  
  /* command line options */
-#define BASE_OPTS L"bBc:dEhiJm:OsStuvVW:xX?"
+#define BASE_OPTS L"bBc:dEhiJm:ORsStuvVW:xX?"
  
  #define PROGRAM_OPTS BASE_OPTS
  
@@ -72,6 +72,9 @@ static char *usage_2 = "\
  -m mod : run library module as a script (terminates option list)\n\
  -O     : optimize generated bytecode slightly; also PYTHONOPTIMIZE=x\n\
  -OO    : remove doc-strings in addition to the -O optimizations\n\
+-R     : use a pseudo-random salt to make hash() values of various types be\n\
+         unpredictable between separate invocations of the interpreter, as\n\
+         a defence against denial-of-service attacks\n\
  -s     : don't add user site directory to sys.path; also PYTHONNOUSERSITE\n\
  -S     : don't imply 'import site' on initialization\n\
  ";
@@ -99,6 +102,12 @@ PYTHONHOME   : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
  PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
  PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
  ";
+static char *usage_6 = "\
+PYTHONHASHSEED: if this variable is set to ``random``, the effect is the same \n\
+   as specifying the :option:`-R` option: a random value is used to seed the\n\
+   hashes of str, bytes and datetime objects.  It can also be set to an integer\n\
+   in the range [0,4294967295] to get hash values with a predictable seed.\n\
+";
  
  #ifndef MS_WINDOWS
  static FILE*
@@ -136,6 +145,7 @@ usage(int exitcode, wchar_t* program)
          fputs(usage_3, f);
          fprintf(f, usage_4, DELIM);
          fprintf(f, usage_5, DELIM, PYTHONHOMEHELP);
+        fputs(usage_6, f);
      }
  #if defined(__VMS)
      if (exitcode == 0) {
@@ -373,6 +383,10 @@ Py_Main(int argc, wchar_t **argv)
              PySys_AddWarnOption(_PyOS_optarg);
              break;
  
+        case 'R':
+            Py_HashRandomizationFlag++;
+            break;
+
          /* This space reserved for other options */
  
          default:
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c

index a836af6cd05d272f2125a220c1edd0d44978bac2..dbbc29f95efdddda70e397c3f66b22c30a967b97 100644 (file)
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -4022,7 +4022,7 @@ posix_getgroups(PyObject *self, PyObject *noargs)
  #endif
      gid_t grouplist[MAX_GROUPS];
  
-    /* On MacOSX getgroups(2) can return more than MAX_GROUPS results 
+    /* On MacOSX getgroups(2) can return more than MAX_GROUPS results
       * This is a helper variable to store the intermediate result when
       * that happens.
       *
@@ -6942,82 +6942,6 @@ posix_getloadavg(PyObject *self, PyObject *noargs)
  }
  #endif
  
-#ifdef MS_WINDOWS
-
-PyDoc_STRVAR(win32_urandom__doc__,
-"urandom(n) -> str\n\n\
-Return n random bytes suitable for cryptographic use.");
-
-typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
-              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
-              DWORD dwFlags );
-typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
-              BYTE *pbBuffer );
-
-static CRYPTGENRANDOM pCryptGenRandom = NULL;
-/* This handle is never explicitly released. Instead, the operating
-   system will release it when the process terminates. */
-static HCRYPTPROV hCryptProv = 0;
-
-static PyObject*
-win32_urandom(PyObject *self, PyObject *args)
-{
-    int howMany;
-    PyObject* result;
-
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
-        return NULL;
-    if (howMany < 0)
-        return PyErr_Format(PyExc_ValueError,
-                            "negative argument not allowed");
-
-    if (hCryptProv == 0) {
-        HINSTANCE hAdvAPI32 = NULL;
-        CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
-
-        /* Obtain handle to the DLL containing CryptoAPI
-           This should not fail         */
-        hAdvAPI32 = GetModuleHandle("advapi32.dll");
-        if(hAdvAPI32 == NULL)
-            return win32_error("GetModuleHandle", NULL);
-
-        /* Obtain pointers to the CryptoAPI functions
-           This will fail on some early versions of Win95 */
-        pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
-                                        hAdvAPI32,
-                                        "CryptAcquireContextA");
-        if (pCryptAcquireContext == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptAcquireContextA not found");
-
-        pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(
-                                        hAdvAPI32, "CryptGenRandom");
-        if (pCryptGenRandom == NULL)
-            return PyErr_Format(PyExc_NotImplementedError,
-                                "CryptGenRandom not found");
-
-        /* Acquire context */
-        if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
-                                   PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
-            return win32_error("CryptAcquireContext", NULL);
-    }
-
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        memset(PyBytes_AS_STRING(result), 0, howMany); /* zero seed */
-        if (! pCryptGenRandom(hCryptProv, howMany, (unsigned char*)
-                              PyBytes_AS_STRING(result))) {
-            Py_DECREF(result);
-            return win32_error("CryptGenRandom", NULL);
-        }
-    }
-    return result;
-}
-#endif
-
  PyDoc_STRVAR(device_encoding__doc__,
  "device_encoding(fd) -> str\n\n\
  Return a string describing the encoding of the device\n\
@@ -7055,41 +6979,35 @@ device_encoding(PyObject *self, PyObject *args)
      return Py_None;
  }
  
-#ifdef __VMS
-/* Use openssl random routine */
-#include <openssl/rand.h>
-PyDoc_STRVAR(vms_urandom__doc__,
+PyDoc_STRVAR(posix_urandom__doc__,
  "urandom(n) -> str\n\n\
  Return n random bytes suitable for cryptographic use.");
  
-static PyObject*
-vms_urandom(PyObject *self, PyObject *args)
+static PyObject *
+posix_urandom(PyObject *self, PyObject *args)
  {
-    int howMany;
-    PyObject* result;
+    Py_ssize_t size;
+    PyObject *result;
+    int ret;
  
-    /* Read arguments */
-    if (! PyArg_ParseTuple(args, "i:urandom", &howMany))
+     /* Read arguments */
+    if (!PyArg_ParseTuple(args, "n:urandom", &size))
          return NULL;
-    if (howMany < 0)
+    if (size < 0)
          return PyErr_Format(PyExc_ValueError,
                              "negative argument not allowed");
+    result = PyBytes_FromStringAndSize(NULL, size);
+    if (result == NULL)
+        return NULL;
  
-    /* Allocate bytes */
-    result = PyBytes_FromStringAndSize(NULL, howMany);
-    if (result != NULL) {
-        /* Get random data */
-        if (RAND_pseudo_bytes((unsigned char*)
-                              PyBytes_AS_STRING(result),
-                              howMany) < 0) {
-            Py_DECREF(result);
-            return PyErr_Format(PyExc_ValueError,
-                                "RAND_pseudo_bytes");
-        }
+    ret = _PyOS_URandom(PyBytes_AS_STRING(result),
+                        PyBytes_GET_SIZE(result));
+    if (ret == -1) {
+        Py_DECREF(result);
+        return NULL;
      }
      return result;
  }
-#endif
  
  static PyMethodDef posix_methods[] = {
      {"access",          posix_access, METH_VARARGS, posix_access__doc__},
@@ -7374,12 +7292,7 @@ static PyMethodDef posix_methods[] = {
  #ifdef HAVE_GETLOADAVG
      {"getloadavg",      posix_getloadavg, METH_NOARGS, posix_getloadavg__doc__},
  #endif
- #ifdef MS_WINDOWS
-    {"urandom", win32_urandom, METH_VARARGS, win32_urandom__doc__},
- #endif
- #ifdef __VMS
-    {"urandom", vms_urandom, METH_VARARGS, vms_urandom__doc__},
- #endif
+    {"urandom",         posix_urandom,   METH_VARARGS, posix_urandom__doc__},
      {NULL,              NULL}            /* Sentinel */
  };
  
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c

index f2ee1310fa4fc151f28d8dbaa2a9a89a5bcf5d53..e6ab440caa952d74c05d19d42fb4d996652aa054 100644 (file)
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -899,11 +899,21 @@ bytes_hash(PyBytesObject *a)
      if (a->ob_shash != -1)
          return a->ob_shash;
      len = Py_SIZE(a);
+    /*
+      We make the hash of the empty string be 0, rather than using
+      (prefix ^ suffix), since this slightly obfuscates the hash secret
+    */
+    if (len == 0) {
+        a->ob_shash = 0;
+        return 0;
+    }
      p = (unsigned char *) a->ob_sval;
-    x = *p << 7;
+    x = _Py_HashSecret.prefix;
+    x ^= *p << 7;
      while (--len >= 0)
          x = (1000003*x) ^ *p++;
      x ^= Py_SIZE(a);
+    x ^= _Py_HashSecret.suffix;
      if (x == -1)
          x = -2;
      a->ob_shash = x;
diff --git a/Objects/object.c b/Objects/object.c

index ac57cd7e93c36d72cfcbfe87c5935ca0eda8f2fb..0b1c656cc6004cab6d16d67068d3dd6468cf715e 100644 (file)
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -712,6 +712,8 @@ PyObject_HashNotImplemented(PyObject *v)
      return -1;
  }
  
+_Py_HashSecret_t _Py_HashSecret;
+
  long
  PyObject_Hash(PyObject *v)
  {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 2cdbc0e6890d0072323f6646de2d632060846477..5986fb8ea072b47e51fbcd0017d968215098f55b 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7344,11 +7344,21 @@ unicode_hash(PyUnicodeObject *self)
      if (self->hash != -1)
          return self->hash;
      len = Py_SIZE(self);
+    /*
+      We make the hash of the empty string be 0, rather than using
+      (prefix ^ suffix), since this slightly obfuscates the hash secret
+    */
+    if (len == 0) {
+        self->hash = 0;
+        return 0;
+    }
      p = self->str;
-    x = *p << 7;
+    x = _Py_HashSecret.prefix;
+    x ^= *p << 7;
      while (--len >= 0)
          x = (1000003*x) ^ *p++;
      x ^= Py_SIZE(self);
+    x ^= _Py_HashSecret.suffix;
      if (x == -1)
          x = -2;
      self->hash = x;
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj

index 11cd3118a24b74b97600fa3dc07637910ca53112..045300ace9eda1850702330cb0256a8aa77bc496 100644 (file)
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -1778,6 +1778,10 @@
                                 RelativePath="..\Python\pythonrun.c"\r
                                 >\r
                         </File>\r
+                       <File\r
+                               RelativePath="..\Python\random.c"\r
+                               >\r
+                       </File>\r
                         <File\r
                                 RelativePath="..\Python\structmember.c"\r
                                 >\r
diff --git a/Python/pythonrun.c b/Python/pythonrun.c

index c4ae9211ffa1d4ea2c44e25fe29e670d5d801110..4474e79b0f4eef313fbaba88631caf3772fa4304 100644 (file)
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -73,6 +73,7 @@ extern void _PyUnicode_Init(void);
  extern void _PyUnicode_Fini(void);
  extern int _PyLong_Init(void);
  extern void PyLong_Fini(void);
+extern void _PyRandom_Init(void);
  
  #ifdef WITH_THREAD
  extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -91,6 +92,7 @@ int Py_FrozenFlag; /* Needed by getpath.c */
  int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */
  int Py_NoUserSiteDirectory = 0; /* for -s and site.py */
  int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */
+int Py_HashRandomizationFlag = 0; /* for -R and PYTHONHASHSEED */
  
  /* PyModule_GetWarningsModule is no longer necessary as of 2.6
  since _warnings is builtin.  This API should not be used. */
@@ -195,6 +197,12 @@ Py_InitializeEx(int install_sigs)
          Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p);
      if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0')
          Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p);
+    /* The variable is only tested for existence here; _PyRandom_Init will
+       check its value further. */
+    if ((p = Py_GETENV("PYTHONHASHSEED")) && *p != '\0')
+        Py_HashRandomizationFlag = add_flag(Py_HashRandomizationFlag, p);
+
+    _PyRandom_Init();
  
      interp = PyInterpreterState_New();
      if (interp == NULL)
diff --git a/Python/random.c b/Python/random.c

new file mode 100644 (file)

index 0000000..327166e
--- /dev/null
+++ b/Python/random.c
@@ -0,0 +1,302 @@
+#include "Python.h"
+#ifdef MS_WINDOWS
+#include <windows.h>
+#else
+#include <fcntl.h>
+#endif
+
+static int random_initialized = 0;
+
+#ifdef MS_WINDOWS
+typedef BOOL (WINAPI *CRYPTACQUIRECONTEXTA)(HCRYPTPROV *phProv,\
+              LPCSTR pszContainer, LPCSTR pszProvider, DWORD dwProvType,\
+              DWORD dwFlags );
+typedef BOOL (WINAPI *CRYPTGENRANDOM)(HCRYPTPROV hProv, DWORD dwLen,\
+              BYTE *pbBuffer );
+
+static CRYPTGENRANDOM pCryptGenRandom = NULL;
+/* This handle is never explicitly released. Instead, the operating
+   system will release it when the process terminates. */
+static HCRYPTPROV hCryptProv = 0;
+
+static int
+win32_urandom_init(int raise)
+{
+    HINSTANCE hAdvAPI32 = NULL;
+    CRYPTACQUIRECONTEXTA pCryptAcquireContext = NULL;
+
+    /* Obtain handle to the DLL containing CryptoAPI. This should not fail. */
+    hAdvAPI32 = GetModuleHandle("advapi32.dll");
+    if(hAdvAPI32 == NULL)
+        goto error;
+
+    /* Obtain pointers to the CryptoAPI functions. This will fail on some early
+       versions of Win95. */
+    pCryptAcquireContext = (CRYPTACQUIRECONTEXTA)GetProcAddress(
+                               hAdvAPI32, "CryptAcquireContextA");
+    if (pCryptAcquireContext == NULL)
+        goto error;
+
+    pCryptGenRandom = (CRYPTGENRANDOM)GetProcAddress(hAdvAPI32,
+                                                     "CryptGenRandom");
+    if (pCryptGenRandom == NULL)
+        goto error;
+
+    /* Acquire context */
+    if (! pCryptAcquireContext(&hCryptProv, NULL, NULL,
+                               PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
+        goto error;
+
+    return 0;
+
+error:
+    if (raise)
+        PyErr_SetFromWindowsErr(0);
+    else
+        Py_FatalError("Failed to initialize Windows random API (CryptoGen)");
+    return -1;
+}
+
+/* Fill buffer with size pseudo-random bytes generated by the Windows CryptoGen
+   API. Return 0 on success, or -1 on error. */
+static int
+win32_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    Py_ssize_t chunk;
+
+    if (hCryptProv == 0)
+    {
+        if (win32_urandom_init(raise) == -1)
+            return -1;
+    }
+
+    while (size > 0)
+    {
+        chunk = size > INT_MAX ? INT_MAX : size;
+        if (!pCryptGenRandom(hCryptProv, chunk, buffer))
+        {
+            /* CryptGenRandom() failed */
+            if (raise)
+                PyErr_SetFromWindowsErr(0);
+            else
+                Py_FatalError("Failed to initialized the randomized hash "
+                        "secret using CryptoGen)");
+            return -1;
+        }
+        buffer += chunk;
+        size -= chunk;
+    }
+    return 0;
+}
+#endif /* MS_WINDOWS */
+
+
+#ifdef __VMS
+/* Use openssl random routine */
+#include <openssl/rand.h>
+static int
+vms_urandom(unsigned char *buffer, Py_ssize_t size, int raise)
+{
+    if (RAND_pseudo_bytes(buffer, size) < 0) {
+        if (raise) {
+            PyErr_Format(PyExc_ValueError,
+                         "RAND_pseudo_bytes");
+        } else {
+            Py_FatalError("Failed to initialize the randomized hash "
+                          "secret using RAND_pseudo_bytes");
+        }
+        return -1;
+    }
+    return 0;
+}
+#endif /* __VMS */
+
+
+#if !defined(MS_WINDOWS) && !defined(__VMS)
+
+/* Read size bytes from /dev/urandom into buffer.
+   Call Py_FatalError() on error. */
+static void
+dev_urandom_noraise(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    assert (0 < size);
+
+    fd = open("/dev/urandom", O_RDONLY);
+    if (fd < 0)
+        Py_FatalError("Failed to open /dev/urandom");
+
+    while (0 < size)
+    {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+        {
+            /* stop on error or if read(size) returned 0 */
+            Py_FatalError("Failed to read bytes from /dev/urandom");
+            break;
+        }
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    }
+    close(fd);
+}
+
+/* Read size bytes from /dev/urandom into buffer.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+dev_urandom_python(char *buffer, Py_ssize_t size)
+{
+    int fd;
+    Py_ssize_t n;
+
+    if (size <= 0)
+        return 0;
+
+    Py_BEGIN_ALLOW_THREADS
+    fd = open("/dev/urandom", O_RDONLY);
+    Py_END_ALLOW_THREADS
+    if (fd < 0)
+    {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, "/dev/urandom");
+        return -1;
+    }
+
+    Py_BEGIN_ALLOW_THREADS
+    do {
+        do {
+            n = read(fd, buffer, (size_t)size);
+        } while (n < 0 && errno == EINTR);
+        if (n <= 0)
+            break;
+        buffer += n;
+        size -= (Py_ssize_t)n;
+    } while (0 < size);
+    Py_END_ALLOW_THREADS
+
+    if (n <= 0)
+    {
+        /* stop on error or if read(size) returned 0 */
+        if (n < 0)
+            PyErr_SetFromErrno(PyExc_OSError);
+        else
+            PyErr_Format(PyExc_RuntimeError,
+                         "Failed to read %zi bytes from /dev/urandom",
+                         size);
+        close(fd);
+        return -1;
+    }
+    close(fd);
+    return 0;
+}
+#endif /* !defined(MS_WINDOWS) && !defined(__VMS) */
+
+/* Fill buffer with pseudo-random bytes generated by a linear congruent
+   generator (LCG):
+
+       x(n+1) = (x(n) * 214013 + 2531011) % 2^32
+
+   Use bits 23..16 of x(n) to generate a byte. */
+static void
+lcg_urandom(unsigned int x0, unsigned char *buffer, size_t size)
+{
+    size_t index;
+    unsigned int x;
+
+    x = x0;
+    for (index=0; index < size; index++) {
+        x *= 214013;
+        x += 2531011;
+        /* modulo 2 ^ (8 * sizeof(int)) */
+        buffer[index] = (x >> 16) & 0xff;
+    }
+}
+
+/* Fill buffer with size pseudo-random bytes, not suitable for cryptographic
+   use, from the operating random number generator (RNG).
+
+   Return 0 on success, raise an exception and return -1 on error. */
+int
+_PyOS_URandom(void *buffer, Py_ssize_t size)
+{
+    if (size < 0) {
+        PyErr_Format(PyExc_ValueError,
+                     "negative argument not allowed");
+        return -1;
+    }
+    if (size == 0)
+        return 0;
+
+#ifdef MS_WINDOWS
+    return win32_urandom((unsigned char *)buffer, size, 1);
+#else
+# ifdef __VMS
+    return vms_urandom((unsigned char *)buffer, size, 1);
+# else
+    return dev_urandom_python((char*)buffer, size);
+# endif
+#endif
+}
+
+void
+_PyRandom_Init(void)
+{
+    char *env;
+    void *secret = &_Py_HashSecret;
+    Py_ssize_t secret_size = sizeof(_Py_HashSecret);
+
+    if (random_initialized)
+        return;
+    random_initialized = 1;
+
+    /*
+      By default, hash randomization is disabled, and only
+      enabled if PYTHONHASHSEED is set to non-empty or if
+      "-R" is provided at the command line:
+    */
+    if (!Py_HashRandomizationFlag) {
+        /* Disable the randomized hash: */
+        memset(secret, 0, secret_size);
+        return;
+    }
+
+    /*
+      Hash randomization is enabled.  Generate a per-process secret,
+      using PYTHONHASHSEED if provided.
+    */
+
+    env = Py_GETENV("PYTHONHASHSEED");
+    if (env && *env != '\0' & strcmp(env, "random") != 0) {
+        char *endptr = env;
+        unsigned long seed;
+        seed = strtoul(env, &endptr, 10);
+        if (*endptr != '\0'
+            || seed > 4294967295UL
+            || (errno == ERANGE && seed == ULONG_MAX))
+        {
+            Py_FatalError("PYTHONHASHSEED must be \"random\" or an integer "
+                          "in range [0; 4294967295]");
+        }
+        if (seed == 0) {
+            /* disable the randomized hash */
+            memset(secret, 0, secret_size);
+        }
+        else {
+            lcg_urandom(seed, (unsigned char*)secret, secret_size);
+        }
+    }
+    else {
+#ifdef MS_WINDOWS
+        (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+#else /* #ifdef MS_WINDOWS */
+# ifdef __VMS
+        vms_urandom((unsigned char *)secret, secret_size, 0);
+# else
+        dev_urandom_noraise((char*)secret, secret_size);
+# endif
+#endif
+    }
+}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c

index c688172dac3cac7f51df6b49de8b44b30a8c2d58..6a7e91432c0b5178f380201ef806f72d66b712df 100644 (file)
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1126,6 +1126,7 @@ static PyStructSequence_Field flags_fields[] = {
      /* {"unbuffered",                   "-u"}, */
      /* {"skip_first",                   "-x"}, */
      {"bytes_warning", "-b"},
+    {"hash_randomization", "-R"},
      {0}
  };
  
@@ -1134,9 +1135,9 @@ static PyStructSequence_Desc flags_desc = {
      flags__doc__,       /* doc */
      flags_fields,       /* fields */
  #ifdef RISCOS
-    12
+    13
  #else
-    11
+    12
  #endif
  };
  
@@ -1169,6 +1170,7 @@ make_flags(void)
      /* SetFlag(saw_unbuffered_flag); */
      /* SetFlag(skipfirstline); */
      SetFlag(Py_BytesWarningFlag);
+    SetFlag(Py_HashRandomizationFlag);
  #undef SetFlag
  
      if (PyErr_Occurred()) {
author	Georg Brandl <georg@python.org>
	Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)
committer	Georg Brandl <georg@python.org>
	Mon, 20 Feb 2012 18:54:16 +0000 (19:54 +0100)
Doc/library/sys.rst		patch \| blob \| history
Doc/reference/datamodel.rst		patch \| blob \| history
Doc/using/cmdline.rst		patch \| blob \| history
Include/object.h		patch \| blob \| history
Include/pydebug.h		patch \| blob \| history
Include/pythonrun.h		patch \| blob \| history
Lib/json/__init__.py		patch \| blob \| history
Lib/os.py		patch \| blob \| history
Lib/test/mapping_tests.py		patch \| blob \| history
Lib/test/regrtest.py		patch \| blob \| history
Lib/test/script_helper.py		patch \| blob \| history
Lib/test/test_cmd_line.py		patch \| blob \| history
Lib/test/test_descr.py		patch \| blob \| history
Lib/test/test_hash.py		patch \| blob \| history
Lib/test/test_os.py		patch \| blob \| history
Lib/test/test_set.py		patch \| blob \| history
Lib/test/test_sys.py		patch \| blob \| history
Lib/test/test_urllib.py		patch \| blob \| history
Lib/tkinter/test/test_ttk/test_functions.py		patch \| blob \| history
Makefile.pre.in		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Misc/python.man		patch \| blob \| history
Modules/datetimemodule.c		patch \| blob \| history
Modules/main.c		patch \| blob \| history
Modules/posixmodule.c		patch \| blob \| history
Objects/bytesobject.c		patch \| blob \| history
Objects/object.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
PCbuild/pythoncore.vcproj		patch \| blob \| history
Python/pythonrun.c		patch \| blob \| history
Python/random.c	[new file with mode: 0644]	patch \| blob
Python/sysmodule.c		patch \| blob \| history