Merge refactoring I did when committing r72267 to trunk into the

author Gregory P. Smith <greg@mad-scientist.com>

Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)

committer Gregory P. Smith <greg@mad-scientist.com>

Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)
author Gregory P. Smith <greg@mad-scientist.com>
Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)
committer Gregory P. Smith <greg@mad-scientist.com>
Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)
diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst

index 955afb8fae8be5313d41a1aa89244bc4edb3f0fb..36f386cb57839427101b34d1784f1aa3791d8a82 100644 (file)
--- a/Doc/library/hashlib.rst
+++ b/Doc/library/hashlib.rst
@@ -105,6 +105,12 @@ A hash object has the following methods:
     concatenation of all the arguments: ``m.update(a); m.update(b)`` is
     equivalent to ``m.update(a+b)``.
  
+   .. versionchanged:: 2.7
+
+      The Python GIL is released to allow other threads to run while
+      hash updates on data larger than 2048 bytes is taking place when
+      using hash algorithms supplied by OpenSSL.
+
  
  .. method:: hash.digest()
  
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py

index 9b51459892ff4fd26d509b190b7919e7244f8289..594f5dd8850c35213ffd499681f7a2c5f1ca8447 100644 (file)
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -2,11 +2,16 @@
  #
  # $Id$
  #
-#  Copyright (C) 2005   Gregory P. Smith (greg@krypto.org)
+#  Copyright (C) 2005-2009   Gregory P. Smith (greg@krypto.org)
  #  Licensed to PSF under a Contributor Agreement.
  #
  
  import hashlib
+from io import StringIO
+try:
+    import threading
+except ImportError:
+    threading = None
  import unittest
  from test import support
  from test.support import _4G, precisionbigmemtest
@@ -224,6 +229,45 @@ class HashLibTestCase(unittest.TestCase):
          m = hashlib.md5(b'x' * gil_minsize)
          self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958')
  
+    def test_threaded_hashing(self):
+        if not threading:
+            raise unittest.SkipTest('No threading module.')
+
+        # Updating the same hash object from several threads at once
+        # using data chunk sizes containing the same byte sequences.
+        #
+        # If the internal locks are working to prevent multiple
+        # updates on the same object from running at once, the resulting
+        # hash will be the same as doing it single threaded upfront.
+        hasher = hashlib.sha1()
+        num_threads = 5
+        smallest_data = b'swineflu'
+        data = smallest_data*200000
+        expected_hash = hashlib.sha1(data*num_threads).hexdigest()
+
+        def hash_in_chunks(chunk_size, event):
+            index = 0
+            while index < len(data):
+                hasher.update(data[index:index+chunk_size])
+                index += chunk_size
+            event.set()
+
+        events = []
+        for threadnum in range(num_threads):
+            chunk_size = len(data) // (10**threadnum)
+            assert chunk_size > 0
+            assert chunk_size % len(smallest_data) == 0
+            event = threading.Event()
+            events.append(event)
+            threading.Thread(target=hash_in_chunks,
+                             args=(chunk_size, event)).start()
+
+        for event in events:
+            event.wait()
+
+        self.assertEqual(expected_hash, hasher.hexdigest())
+
+
  def test_main():
      support.run_unittest(HashLibTestCase)
  
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c

index 569d441c99e2129011fbefd8ffabe7cd2797c7b9..0dae5158d51665b10b218f2b772623cd7573041e 100644 (file)
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -1,7 +1,7 @@
  /* Module that wraps all OpenSSL hash algorithms */
  
  /*
- * Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
+ * Copyright (C) 2005-2009   Gregory P. Smith (greg@krypto.org)
   * Licensed to PSF under a Contributor Agreement.
   *
   * Derived from a skeleton of shamodule.c containing work performed by:
@@ -17,21 +17,8 @@
  #include "structmember.h"
  #include "hashlib.h"
  
-/* EVP is the preferred interface to hashing in OpenSSL */
-#include <openssl/evp.h>
-
-#define MUNCH_SIZE INT_MAX
-
-
-#ifndef HASH_OBJ_CONSTRUCTOR
-#define HASH_OBJ_CONSTRUCTOR 0
-#endif
-
-#define HASHLIB_GIL_MINSIZE 2048
-
  #ifdef WITH_THREAD
-    #include "pythread.h"
-
+#include "pythread.h"
      #define ENTER_HASHLIB(obj) \
          if ((obj)->lock) { \
              if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@@ -49,6 +36,20 @@
      #define LEAVE_HASHLIB(obj)
  #endif
  
+/* EVP is the preferred interface to hashing in OpenSSL */
+#include <openssl/evp.h>
+
+#define MUNCH_SIZE INT_MAX
+
+/* TODO(gps): We should probably make this a module or EVPobject attribute
+ * to allow the user to optimize based on the platform they're using. */
+#define HASHLIB_GIL_MINSIZE 2048
+
+#ifndef HASH_OBJ_CONSTRUCTOR
+#define HASH_OBJ_CONSTRUCTOR 0
+#endif
+
+
  typedef struct {
      PyObject_HEAD
      PyObject            *name;  /* name of this hash algorithm */
@@ -122,11 +123,18 @@ EVP_dealloc(EVPobject *self)
      PyObject_Del(self);
  }
  
+static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
+{
+    ENTER_HASHLIB(self);
+    EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
+    LEAVE_HASHLIB(self);
+}
  
  /* External methods for a hash object */
  
  PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
  
+
  static PyObject *
  EVP_copy(EVPobject *self, PyObject *unused)
  {
@@ -135,9 +143,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
      if ( (newobj = newEVPobject(self->name))==NULL)
          return NULL;
  
-    ENTER_HASHLIB(self);
-    EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
-    LEAVE_HASHLIB(self);
+    locked_EVP_MD_CTX_copy(&newobj->ctx, self);
      return (PyObject *)newobj;
  }
  
@@ -152,9 +158,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
      PyObject *retval;
      unsigned int digest_size;
  
-    ENTER_HASHLIB(self);
-    EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
-    LEAVE_HASHLIB(self);
+    locked_EVP_MD_CTX_copy(&temp_ctx, self);
      digest_size = EVP_MD_CTX_size(&temp_ctx);
      EVP_DigestFinal(&temp_ctx, digest, NULL);
  
@@ -176,9 +180,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
      unsigned int i, j, digest_size;
  
      /* Get the raw (binary) digest value */
-    ENTER_HASHLIB(self);
-    EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
-    LEAVE_HASHLIB(self);
+    locked_EVP_MD_CTX_copy(&temp_ctx, self);
      digest_size = EVP_MD_CTX_size(&temp_ctx);
      EVP_DigestFinal(&temp_ctx, digest, NULL);
  
@@ -221,11 +223,7 @@ EVP_update(EVPobject *self, PyObject *args)
  #ifdef WITH_THREAD
      if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
          self->lock = PyThread_allocate_lock();
-        if (self->lock == NULL) {
-            PyBuffer_Release(&view);
-            PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
-            return NULL;
-        }
+        /* fail? lock = NULL and we fail over to non-threaded code. */
      }
  
      if (self->lock != NULL) {
@@ -257,9 +255,7 @@ static PyObject *
  EVP_get_block_size(EVPobject *self, void *closure)
  {
      long block_size;
-    ENTER_HASHLIB(self);
      block_size = EVP_MD_CTX_block_size(&self->ctx);
-    LEAVE_HASHLIB(self);
      return PyLong_FromLong(block_size);
  }
  
@@ -267,9 +263,7 @@ static PyObject *
  EVP_get_digest_size(EVPobject *self, void *closure)
  {
      long size;
-    ENTER_HASHLIB(self);
      size = EVP_MD_CTX_size(&self->ctx);
-    LEAVE_HASHLIB(self);
      return PyLong_FromLong(size);
  }
author	Gregory P. Smith <greg@mad-scientist.com>
	Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)
committer	Gregory P. Smith <greg@mad-scientist.com>
	Mon, 4 May 2009 00:45:33 +0000 (00:45 +0000)
Doc/library/hashlib.rst		patch \| blob \| history
Lib/test/test_hashlib.py		patch \| blob \| history
Modules/_hashopenssl.c		patch \| blob \| history