Revert gzip readline performance patch #1281707 until a more generic performance...

author Bob Ippolito <bob@redivi.com>

Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)

committer Bob Ippolito <bob@redivi.com>

Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)
author Bob Ippolito <bob@redivi.com>
Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)
committer Bob Ippolito <bob@redivi.com>
Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)
diff --git a/Lib/gzip.py b/Lib/gzip.py

index 8c7870e6adc284472df4518358a8cd8033b79dee..3c1ebf25142c925a8d76264fde3c196bfd522652 100644 (file)
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -107,7 +107,6 @@ class GzipFile:
              self.extrabuf = ""
              self.extrasize = 0
              self.filename = filename
-            self.min_readsize = 64       # Starts small, scales exponentially
  
          elif mode[0:1] == 'w' or mode[0:1] == 'a':
              self.mode = WRITE
@@ -382,39 +381,32 @@ class GzipFile:
              self.read(count % 1024)
  
      def readline(self, size=-1):
-        if size < 0:
-            size = sys.maxint              # Line can be as long as maxint
-            readsize = self.min_readsize   # Read from file in small chunks
-        else:
-            readsize = size                # Only read in as much as specified
-
-        bufs = ""
-
+        if size < 0: size = sys.maxint
+        bufs = []
+        readsize = min(100, size)    # Read from the file in small chunks
          while True:
-            if size == 0: return bufs      # Return line (reached max len)
+            if size == 0:
+                return "".join(bufs) # Return resulting line
  
              c = self.read(readsize)
              i = c.find('\n')
+            if size is not None:
+                # We set i=size to break out of the loop under two
+                # conditions: 1) there's no newline, and the chunk is
+                # larger than size, or 2) there is a newline, but the
+                # resulting line would be longer than 'size'.
+                if i==-1 and len(c) > size: i=size-1
+                elif size <= i: i = size -1
  
-            # If there is a newline, or the string is empty
              if i >= 0 or c == '':
-                if size <= i: i = size - 1 # Another larger than size check
-
-                self._unread(c[i+1:])      # Push back rest of chunk
-
-                return bufs + c[:i+1]      # Stored line, plus new segment
-
-            # If there is no newline
-            else:
-                if len(c) > size: i = size - 1   # If lineis larger than size
-
-                bufs = bufs + c
-                size = size - len(c)
-                readsize = min(size, int(readsize * 1.1))
-
-                # Optimize future readline() calls
-                if readsize > self.min_readsize:
-                    self.min_readsize = readsize
+                bufs.append(c[:i+1])    # Add portion of last chunk
+                self._unread(c[i+1:])   # Push back rest of chunk
+                return ''.join(bufs)    # Return resulting line
+
+            # Append chunk to list, decrease 'size',
+            bufs.append(c)
+            size = size - len(c)
+            readsize = min(size, readsize * 2)
  
      def readlines(self, sizehint=0):
          # Negative numbers result in reading all the lines
author	Bob Ippolito <bob@redivi.com>
	Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)
committer	Bob Ippolito <bob@redivi.com>
	Mon, 22 May 2006 15:22:46 +0000 (15:22 +0000)