From: Inada Naoki Date: Sat, 6 Apr 2019 09:06:19 +0000 (+0900) Subject: bpo-36050: optimize HTTPResponse.read() (GH-12698) X-Git-Tag: v3.8.0a4~257 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d6bf6f2d0c83f0c64ce86e7b9340278627798090;p=python bpo-36050: optimize HTTPResponse.read() (GH-12698) * No need to chunking for now. * No need to partial read caused by EINTR for now. --- diff --git a/Lib/http/client.py b/Lib/http/client.py index 1de151c38e..5a2225276b 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -105,9 +105,6 @@ globals().update(http.HTTPStatus.__members__) # Mapping status codes to official W3C names responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - # maximal line length when calling readline(). _MAXLINE = 65536 _MAXHEADERS = 100 @@ -592,43 +589,24 @@ class HTTPResponse(io.BufferedIOBase): raise IncompleteRead(bytes(b[0:total_bytes])) def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. + """Read the number of bytes requested. This function should be used when bytes "should" be present for reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(b''.join(s), amt) - s.append(chunk) - amt -= len(chunk) - return b"".join(s) + data = self.fp.read(amt) + if len(data) < amt: + raise IncompleteRead(data, amt-len(data)) + return data def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" - total_bytes = 0 - mvb = memoryview(b) - while total_bytes < len(b): - if MAXAMOUNT < len(mvb): - temp_mvb = mvb[0:MAXAMOUNT] - n = self.fp.readinto(temp_mvb) - else: - n = self.fp.readinto(mvb) - if not n: - raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) - mvb = mvb[n:] - total_bytes += n - return total_bytes + amt = len(b) + n = self.fp.readinto(b) + if n < amt: + raise IncompleteRead(bytes(b[:n]), amt-n) + return n def read1(self, n=-1): """Read with at most one underlying system call. If at least one diff --git a/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst b/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst new file mode 100644 index 0000000000..92318f877b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst @@ -0,0 +1,2 @@ +Optimized ``http.client.HTTPResponse.read()`` for large response. Patch by +Inada Naoki.