From 995b5d38e7cc24cac3de8dfd516115f86b0bcf80 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Fri, 13 Sep 2019 15:31:19 +0200 Subject: [PATCH] bpo-38153: Normalize hashlib algorithm names (GH-16083) Signed-off-by: Christian Heimes --- Lib/hashlib.py | 29 +-- Lib/test/test_hashlib.py | 12 ++ .../2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst | 3 + Modules/_hashopenssl.c | 167 +++++++++++++++--- 4 files changed, 179 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 4e783a86a3..56873b7278 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -70,37 +70,44 @@ __all__ = __always_supported + ('new', 'algorithms_guaranteed', __builtin_constructor_cache = {} +__block_openssl_constructor = { + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256', + 'blake2b', 'blake2s', +} + def __get_builtin_constructor(name): cache = __builtin_constructor_cache constructor = cache.get(name) if constructor is not None: return constructor try: - if name in ('SHA1', 'sha1'): + if name in {'SHA1', 'sha1'}: import _sha1 cache['SHA1'] = cache['sha1'] = _sha1.sha1 - elif name in ('MD5', 'md5'): + elif name in {'MD5', 'md5'}: import _md5 cache['MD5'] = cache['md5'] = _md5.md5 - elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): + elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}: import _sha256 cache['SHA224'] = cache['sha224'] = _sha256.sha224 cache['SHA256'] = cache['sha256'] = _sha256.sha256 - elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): + elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}: import _sha512 cache['SHA384'] = cache['sha384'] = _sha512.sha384 cache['SHA512'] = cache['sha512'] = _sha512.sha512 - elif name in ('blake2b', 'blake2s'): + elif name in {'blake2b', 'blake2s'}: import _blake2 cache['blake2b'] = _blake2.blake2b cache['blake2s'] = _blake2.blake2s - elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', - 'shake_128', 'shake_256'}: + elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}: import _sha3 cache['sha3_224'] = _sha3.sha3_224 cache['sha3_256'] = _sha3.sha3_256 cache['sha3_384'] = _sha3.sha3_384 cache['sha3_512'] = _sha3.sha3_512 + elif name in {'shake_128', 'shake_256'}: + import _sha3 cache['shake_128'] = _sha3.shake_128 cache['shake_256'] = _sha3.shake_256 except ImportError: @@ -114,8 +121,8 @@ def __get_builtin_constructor(name): def __get_openssl_constructor(name): - if name in {'blake2b', 'blake2s'}: - # Prefer our blake2 implementation. + if name in __block_openssl_constructor: + # Prefer our blake2 and sha3 implementation. return __get_builtin_constructor(name) try: f = getattr(_hashlib, 'openssl_' + name) @@ -140,8 +147,8 @@ def __hash_new(name, data=b'', **kwargs): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ - if name in {'blake2b', 'blake2s'}: - # Prefer our blake2 implementation. + if name in __block_openssl_constructor: + # Prefer our blake2 and sha3 implementation # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. # It does neither support keyed blake2 nor advanced features like # salt, personal, tree hashing or SSE. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 46088e52dc..9204b44bf4 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -26,6 +26,11 @@ COMPILED_WITH_PYDEBUG = hasattr(sys, 'gettotalrefcount') c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib']) py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib']) +try: + from _hashlib import HASH +except ImportError: + HASH = None + try: import _blake2 except ImportError: @@ -386,6 +391,9 @@ class HashLibTestCase(unittest.TestCase): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: m = hash_object_constructor() + if HASH is not None and isinstance(m, HASH): + # _hashopenssl's variant does not have extra SHA3 attributes + continue self.assertEqual(capacity + rate, 1600) self.assertEqual(m._capacity_bits, capacity) self.assertEqual(m._rate_bits, rate) @@ -985,6 +993,10 @@ class KDFTests(unittest.TestCase): hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1, dklen=dklen) + def test_normalized_name(self): + self.assertNotIn("blake2b512", hashlib.algorithms_available) + self.assertNotIn("sha3-512", hashlib.algorithms_available) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst new file mode 100644 index 0000000000..8a483c760a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-13-12-18-51.bpo-38153.nHAbuJ.rst @@ -0,0 +1,3 @@ +Names of hashing algorithms frome OpenSSL are now normalized to follow +Python's naming conventions. For example OpenSSL uses sha3-512 instead of +sha3_512 or blake2b512 instead of blake2b. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index c65c698cbb..29ebec77a4 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -34,6 +34,14 @@ #define MUNCH_SIZE INT_MAX +#if defined(NID_sha3_224) && defined(EVP_MD_FLAG_XOF) +#define PY_OPENSSL_HAS_SHA3 1 +#endif + +#ifdef NID_blake2b512 +#define PY_OPENSSL_HAS_BLAKE2 1 +#endif + typedef struct { PyObject_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ @@ -82,6 +90,135 @@ _setException(PyObject *exc) } /* LCOV_EXCL_STOP */ +static PyObject* +py_digest_name(const EVP_MD *md) +{ + int nid = EVP_MD_nid(md); + const char *name = NULL; + + /* Hard-coded names for well-known hashing algorithms. + * OpenSSL uses slightly different names algorithms like SHA3. + */ + switch (nid) { + case NID_md5: + name = "md5"; + break; + case NID_sha1: + name = "sha1"; + break; + case NID_sha224: + name ="sha224"; + break; + case NID_sha256: + name ="sha256"; + break; + case NID_sha384: + name ="sha384"; + break; + case NID_sha512: + name ="sha512"; + break; +#ifdef NID_sha512_224 + case NID_sha512_224: + name ="sha512_224"; + break; + case NID_sha512_256: + name ="sha512_256"; + break; +#endif +#ifdef PY_OPENSSL_HAS_SHA3 + case NID_sha3_224: + name ="sha3_224"; + break; + case NID_sha3_256: + name ="sha3_256"; + break; + case NID_sha3_384: + name ="sha3_384"; + break; + case NID_sha3_512: + name ="sha3_512"; + break; + case NID_shake128: + name ="shake_128"; + break; + case NID_shake256: + name ="shake_256"; + break; +#endif +#ifdef PY_OPENSSL_HAS_BLAKE2 + case NID_blake2s256: + name ="blake2s"; + break; + case NID_blake2b512: + name ="blake2b"; + break; +#endif + default: + /* Ignore aliased names and only use long, lowercase name. The aliases + * pollute the list and OpenSSL appears to have its own definition of + * alias as the resulting list still contains duplicate and alternate + * names for several algorithms. + */ + name = OBJ_nid2ln(nid); + if (name == NULL) + name = OBJ_nid2sn(nid); + break; + } + + return PyUnicode_FromString(name); +} + +static const EVP_MD* +py_digest_by_name(const char *name) +{ + const EVP_MD *digest = EVP_get_digestbyname(name); + + /* OpenSSL uses dash instead of underscore in names of some algorithms + * like SHA3 and SHAKE. Detect different spellings. */ + if (digest == NULL) { +#ifdef NID_sha512_224 + if (!strcmp(name, "sha512_224") || !strcmp(name, "SHA512_224")) { + digest = EVP_sha512_224(); + } + else if (!strcmp(name, "sha512_256") || !strcmp(name, "SHA512_256")) { + digest = EVP_sha512_256(); + } +#endif +#ifdef PY_OPENSSL_HAS_SHA3 + /* could be sha3_ or shake_, Python never defined upper case */ + else if (!strcmp(name, "sha3_224")) { + digest = EVP_sha3_224(); + } + else if (!strcmp(name, "sha3_256")) { + digest = EVP_sha3_256(); + } + else if (!strcmp(name, "sha3_384")) { + digest = EVP_sha3_384(); + } + else if (!strcmp(name, "sha3_512")) { + digest = EVP_sha3_512(); + } + else if (!strcmp(name, "shake_128")) { + digest = EVP_shake128(); + } + else if (!strcmp(name, "shake_256")) { + digest = EVP_shake256(); + } +#endif +#ifdef PY_OPENSSL_HAS_BLAKE2 + else if (!strcmp(name, "blake2s256")) { + digest = EVP_blake2s256(); + } + else if (!strcmp(name, "blake2b512")) { + digest = EVP_blake2b512(); + } +#endif + } + + return digest; +} + static EVPobject * newEVPobject(void) { @@ -304,16 +441,7 @@ EVP_get_digest_size(EVPobject *self, void *closure) static PyObject * EVP_get_name(EVPobject *self, void *closure) { - const char *name = EVP_MD_name(EVP_MD_CTX_md(self->ctx)); - PyObject *name_obj, *name_lower; - - name_obj = PyUnicode_FromString(name); - if (!name_obj) { - return NULL; - } - name_lower = PyObject_CallMethod(name_obj, "lower", NULL); - Py_DECREF(name_obj); - return name_lower; + return py_digest_name(EVP_MD_CTX_md(self->ctx)); } static PyGetSetDef EVP_getseters[] = { @@ -337,7 +465,7 @@ static PyObject * EVP_repr(EVPobject *self) { PyObject *name_obj, *repr; - name_obj = EVP_get_name(self, NULL); + name_obj = py_digest_name(EVP_MD_CTX_md(self->ctx)); if (!name_obj) { return NULL; } @@ -403,6 +531,7 @@ static PyTypeObject EVPtype = { 0, /* tp_dictoffset */ }; +\ static PyObject * EVPnew(const EVP_MD *digest, const unsigned char *cp, Py_ssize_t len, int usedforsecurity) @@ -485,7 +614,7 @@ EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, if (data_obj) GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); - digest = EVP_get_digestbyname(name); + digest = py_digest_by_name(name); ret_obj = EVPnew(digest, (unsigned char*)view.buf, view.len, @@ -922,21 +1051,17 @@ typedef struct _internal_name_mapper_state { /* A callback function to pass to OpenSSL's OBJ_NAME_do_all(...) */ static void -_openssl_hash_name_mapper(const OBJ_NAME *openssl_obj_name, void *arg) +_openssl_hash_name_mapper(const EVP_MD *md, const char *from, + const char *to, void *arg) { _InternalNameMapperState *state = (_InternalNameMapperState *)arg; PyObject *py_name; assert(state != NULL); - if (openssl_obj_name == NULL) - return; - /* Ignore aliased names, they pollute the list and OpenSSL appears to - * have its own definition of alias as the resulting list still - * contains duplicate and alternate names for several algorithms. */ - if (openssl_obj_name->alias) + if (md == NULL) return; - py_name = PyUnicode_FromString(openssl_obj_name->name); + py_name = py_digest_name(md); if (py_name == NULL) { state->error = 1; } else { @@ -958,7 +1083,7 @@ generate_hash_name_list(void) return NULL; state.error = 0; - OBJ_NAME_do_all(OBJ_NAME_TYPE_MD_METH, &_openssl_hash_name_mapper, &state); + EVP_MD_do_all(&_openssl_hash_name_mapper, &state); if (state.error) { Py_DECREF(state.set); -- 2.40.0