Fixes issue #6838: use a list to accumulate the value instead of repeatedly concatena...

author Chris Withers <chris@simplistix.co.uk>

Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)

committer Chris Withers <chris@simplistix.co.uk>

Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)
author Chris Withers <chris@simplistix.co.uk>
Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)
committer Chris Withers <chris@simplistix.co.uk>
Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)
diff --git a/Lib/http/client.py b/Lib/http/client.py

index f73cd9ef124814a8b4ddd352ac16b8346e819458..2418340e74a39ed2e0164c01c72413d92cb58ee0 100644 (file)
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -518,10 +518,7 @@ class HTTPResponse(io.RawIOBase):
      def _read_chunked(self, amt):
          assert self.chunked != _UNKNOWN
          chunk_left = self.chunk_left
-        value = b""
-
-        # XXX This accumulates chunks by repeated string concatenation,
-        # which is not efficient as the number or size of chunks gets big.
+        value = []
          while True:
              if chunk_left is None:
                  line = self.fp.readline()
@@ -534,22 +531,22 @@ class HTTPResponse(io.RawIOBase):
                      # close the connection as protocol synchronisation is
                      # probably lost
                      self.close()
-                    raise IncompleteRead(value)
+                    raise IncompleteRead(b''.join(value))
                  if chunk_left == 0:
                      break
              if amt is None:
-                value += self._safe_read(chunk_left)
+                value.append(self._safe_read(chunk_left))
              elif amt < chunk_left:
-                value += self._safe_read(amt)
+                value.append(self._safe_read(amt))
                  self.chunk_left = chunk_left - amt
-                return value
+                return b''.join(value)
              elif amt == chunk_left:
-                value += self._safe_read(amt)
+                value.append(self._safe_read(amt))
                  self._safe_read(2)  # toss the CRLF at the end of the chunk
                  self.chunk_left = None
-                return value
+                return b''.join(value)
              else:
-                value += self._safe_read(chunk_left)
+                value.append(self._safe_read(chunk_left))
                  amt -= chunk_left
  
              # we read the whole chunk, get another
@@ -570,7 +567,7 @@ class HTTPResponse(io.RawIOBase):
          # we read everything; close the "file"
          self.close()
  
-        return value
+        return b''.join(value)
  
      def _safe_read(self, amt):
          """Read the number of bytes requested, compensating for partial reads.
diff --git a/Misc/NEWS b/Misc/NEWS

index 000f2c7c77bab19c2a6039f3a1de61f6feed3c80..80c636f7cfe55b581448175bcc8983f315fc6dd2 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -50,6 +50,11 @@ C-API
  Library
  -------
  
+- Issue #6838: Use a list to accumulate the value instead of
+  repeatedly concatenating strings in http.client's
+  HTTPResponse._read_chunked providing a significant speed increase
+  when downloading large files servend with a Transfer-Encoding of 'chunked'.
+
  - Have importlib raise ImportError if None is found in sys.modules for a
    module.
author	Chris Withers <chris@simplistix.co.uk>
	Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)
committer	Chris Withers <chris@simplistix.co.uk>
	Fri, 4 Sep 2009 17:15:46 +0000 (17:15 +0000)
Lib/http/client.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history