]> granicus.if.org Git - python/commitdiff
Fixed issue #4533: File read operation was dreadfully slow
authorChristian Heimes <christian@cheimes.de>
Fri, 5 Dec 2008 08:26:55 +0000 (08:26 +0000)
committerChristian Heimes <christian@cheimes.de>
Fri, 5 Dec 2008 08:26:55 +0000 (08:26 +0000)
Misc/NEWS
Modules/_fileio.c

index 87af173215b6686cc882993635716dc197abd01b..3dd58e6cfdcc38a38a2167321d700ed4081cf7fa 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's New in Python 3.1 alpha 0
 Core and Builtins
 -----------------
 
+- Issue #4533: File read operation was dreadfully slow due to a slowly
+  growing read buffer. Fixed by using the same growth rate algorithm as
+  Python 2.x.
+
 
 Library
 -------
index 0a34eb3be5ffc22be453dfe2d937fa1d766edb6a..c23d5a3c1636c81a79608bb668ed27519003371b 100644 (file)
 #include <windows.h>
 #endif
 
+#if BUFSIZ < (8*1024)
+#define SMALLCHUNK (8*1024)
+#elif (BUFSIZ >= (2 << 25))
+#error "unreasonable BUFSIZ > 64MB defined"
+#else
+#define SMALLCHUNK BUFSIZ
+#endif
+
+#if SIZEOF_INT < 4
+#define BIGCHUNK  (512 * 32)
+#else
+#define BIGCHUNK  (512 * 1024)
+#endif
+
 typedef struct {
        PyObject_HEAD
        int fd;
@@ -387,8 +401,6 @@ fileio_readinto(PyFileIOObject *self, PyObject *args)
        return PyLong_FromSsize_t(n);
 }
 
-#define DEFAULT_BUFFER_SIZE (8*1024)
-
 static PyObject *
 fileio_readall(PyFileIOObject *self)
 {
@@ -396,12 +408,23 @@ fileio_readall(PyFileIOObject *self)
        Py_ssize_t total = 0;
        int n;
 
-       result = PyBytes_FromStringAndSize(NULL, DEFAULT_BUFFER_SIZE);
+       result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
        if (result == NULL)
                return NULL;
 
        while (1) {
-               Py_ssize_t newsize = total + DEFAULT_BUFFER_SIZE;
+               Py_ssize_t newsize = (total < SMALLCHUNK) ? SMALLCHUNK : total;
+
+               /* Keep doubling until we reach BIGCHUNK;
+                  then keep adding BIGCHUNK. */
+               if (newsize <= BIGCHUNK) {
+                       newsize += newsize;
+               }
+               else {
+                       /* NOTE: overflow impossible due to limits on BUFSIZ */
+                       newsize += BIGCHUNK;
+               }
+
                if (PyBytes_GET_SIZE(result) < newsize) {
                        if (_PyBytes_Resize(&result, newsize) < 0) {
                                if (total == 0) {