]> granicus.if.org Git - python/commitdiff
Issue #19254: Provide an optimized Python implementation of PBKDF2_HMAC
authorChristian Heimes <christian@cheimes.de>
Sat, 19 Oct 2013 12:12:02 +0000 (14:12 +0200)
committerChristian Heimes <christian@cheimes.de>
Sat, 19 Oct 2013 12:12:02 +0000 (14:12 +0200)
Doc/library/hashlib.rst
Lib/hashlib.py
Lib/test/test_hashlib.py
Misc/NEWS

index 3bf30bb22e35d43225c95c3a86cf32816592b677..677d530dfcae55eee87c37403eea5bd91bfbc628 100644 (file)
@@ -212,7 +212,11 @@ slow and include a salt.
 
    .. versionadded:: 3.4
 
-   .. note:: *pbkdf2_hmac* is only available with OpenSSL 1.0 and newer.
+   .. note:: A fast implementation of *pbkdf2_hmac* is only available with
+      OpenSSL 1.0 and newer. The Python implementation uses an inline
+      version of :mod:`hmac` and is about three times slower. Contrary to
+      OpenSSL's current code the length of the password has only a minimal
+      impact on the runtime of the Python implementation.
 
 
 .. seealso::
index 73882d1872b10b79802d8a2ebe731aa40ef78fd4..56a9360eac60ff5f31b50e6200c6eda0e16c2a2c 100644 (file)
@@ -1,4 +1,4 @@
-#  Copyright (C) 2005-2010   Gregory P. Smith (greg@krypto.org)
+#.  Copyright (C) 2005-2010   Gregory P. Smith (greg@krypto.org)
 #  Licensed to PSF under a Contributor Agreement.
 #
 
@@ -61,7 +61,7 @@ algorithms_guaranteed = set(__always_supported)
 algorithms_available = set(__always_supported)
 
 __all__ = __always_supported + ('new', 'algorithms_guaranteed',
-                                'algorithms_available')
+                                'algorithms_available', 'pbkdf2_hmac')
 
 
 def __get_builtin_constructor(name):
@@ -147,13 +147,70 @@ except ImportError:
     new = __py_new
     __get_hash = __get_builtin_constructor
 
-# PBKDF2 requires OpenSSL 1.0+ with HMAC and SHA
 try:
+    # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
     from _hashlib import pbkdf2_hmac
 except ImportError:
-    pass
-else:
-    __all__ += ('pbkdf2_hmac',)
+    _trans_5C = bytes((x ^ 0x5C) for x in range(256))
+    _trans_36 = bytes((x ^ 0x36) for x in range(256))
+
+    def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
+        """Password based key derivation function 2 (PKCS #5 v2.0)
+
+        This Python implementations based on the hmac module about as fast
+        as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
+        for long passwords.
+        """
+        if not isinstance(hash_name, str):
+            raise TypeError(hash_name)
+
+        if not isinstance(password, (bytes, bytearray)):
+            password = bytes(memoryview(password))
+        if not isinstance(salt, (bytes, bytearray)):
+            salt = bytes(memoryview(salt))
+
+        # Fast inline HMAC implementation
+        inner = new(hash_name)
+        outer = new(hash_name)
+        blocksize = getattr(inner, 'block_size', 64)
+        if len(password) > blocksize:
+            password = new(hash_name, password).digest()
+        password = password + b'\x00' * (blocksize - len(password))
+        inner.update(password.translate(_trans_36))
+        outer.update(password.translate(_trans_5C))
+
+        def prf(msg, inner=inner, outer=outer):
+            # PBKDF2_HMAC uses the password as key. We can re-use the same
+            # digest objects and and just update copies to skip initialization.
+            icpy = inner.copy()
+            ocpy = outer.copy()
+            icpy.update(msg)
+            ocpy.update(icpy.digest())
+            return ocpy.digest()
+
+        if iterations < 1:
+            raise ValueError(iterations)
+        if dklen is None:
+            dklen = outer.digest_size
+        if dklen < 1:
+            raise ValueError(dklen)
+
+        dkey = b''
+        loop = 1
+        from_bytes = int.from_bytes
+        while len(dkey) < dklen:
+            prev = prf(salt + loop.to_bytes(4, 'big'))
+            # endianess doesn't matter here as long to / from use the same
+            rkey = int.from_bytes(prev, 'big')
+            for i in range(iterations - 1):
+                prev = prf(prev)
+                # rkey = rkey ^ prev
+                rkey ^= from_bytes(prev, 'big')
+            loop += 1
+            dkey += rkey.to_bytes(inner.digest_size, 'big')
+
+        return dkey[:dklen]
+
 
 for __func_name in __always_supported:
     # try them all, some may not work due to the OpenSSL
index 3eadee1cc7f0c6b31478cb0a35a56df9eff1491b..18fe4b50de73f6ced2a6447d81309d57b940a5e7 100644 (file)
@@ -18,11 +18,13 @@ except ImportError:
 import unittest
 import warnings
 from test import support
-from test.support import _4G, bigmemtest
+from test.support import _4G, bigmemtest, import_fresh_module
 
 # Were we compiled --with-pydebug or with #define Py_DEBUG?
 COMPILED_WITH_PYDEBUG = hasattr(sys, 'gettotalrefcount')
 
+c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib'])
+py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib'])
 
 def hexstr(s):
     assert isinstance(s, bytes), repr(s)
@@ -545,6 +547,10 @@ class HashLibTestCase(unittest.TestCase):
 
         self.assertEqual(expected_hash, hasher.hexdigest())
 
+
+class KDFTests:
+    hashlibmod = None
+
     pbkdf2_test_vectors = [
         (b'password', b'salt', 1, None),
         (b'password', b'salt', 2, None),
@@ -594,10 +600,8 @@ class HashLibTestCase(unittest.TestCase):
             (bytes.fromhex('9d9e9c4cd21fe4be24d5b8244c759665'), None),],
     }
 
-    @unittest.skipUnless(hasattr(hashlib, 'pbkdf2_hmac'),
-                         'pbkdf2_hmac required for this test.')
     def test_pbkdf2_hmac(self):
-        pbkdf2 = hashlib.pbkdf2_hmac
+        pbkdf2 = self.hashlibmod.pbkdf2_hmac
 
         for digest_name, results in self.pbkdf2_results.items():
             for i, vector in enumerate(self.pbkdf2_test_vectors):
@@ -628,5 +632,13 @@ class HashLibTestCase(unittest.TestCase):
             pbkdf2('unknown', b'pass', b'salt', 1)
 
 
+class PyKDFTests(KDFTests, unittest.TestCase):
+    hashlibmod = py_hashlib
+
+
+class CKDFTests(KDFTests, unittest.TestCase):
+    hashlibmod = c_hashlib
+
+
 if __name__ == "__main__":
     unittest.main()
index f083832398dae9c2c434c80c66976b0d141f6090..527f3e7ba032affd8e3cda06be2948e3306620d6 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -57,6 +57,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #19254: Provide an optimized Python implementation of PBKDF2_HMAC.
+
 - Issues #19201, #19222, #19223: Add "x" mode (exclusive creation) in opening
   file to bz2, gzip and lzma modules. Patches by Tim Heaney and Vajrasky Kok.