]> granicus.if.org Git - python/commitdiff
Issue #15609: Optimize str%args for integer argument
authorVictor Stinner <victor.stinner@gmail.com>
Mon, 1 Oct 2012 22:33:47 +0000 (00:33 +0200)
committerVictor Stinner <victor.stinner@gmail.com>
Mon, 1 Oct 2012 22:33:47 +0000 (00:33 +0200)
 - Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid
   a temporary buffer
 - Enable the fast path when width is smaller or equals to the length,
   and when the precision is bigger or equals to the length
 - Add unit tests!
 - formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII()
   to resize the output string

Lib/test/test_format.py
Objects/unicodeobject.c
Python/formatter_unicode.c

index b6e25409db6649faad149679880853e4426f7066..e6b0d20b873228e33fd04779a8f8741ec76a55b2 100644 (file)
@@ -307,6 +307,22 @@ class FormatTest(unittest.TestCase):
         finally:
             locale.setlocale(locale.LC_ALL, oldloc)
 
+    @support.cpython_only
+    def test_optimisations(self):
+        text = "abcde" # 5 characters
+
+        self.assertIs("%s" % text, text)
+        self.assertIs("%.5s" % text, text)
+        self.assertIs("%.10s" % text, text)
+        self.assertIs("%1s" % text, text)
+        self.assertIs("%5s" % text, text)
+
+        self.assertIs("{0}".format(text), text)
+        self.assertIs("{0:s}".format(text), text)
+        self.assertIs("{0:.5s}".format(text), text)
+        self.assertIs("{0:.10s}".format(text), text)
+        self.assertIs("{0:1s}".format(text), text)
+        self.assertIs("{0:5s}".format(text), text)
 
 
 def test_main():
index 0da565a612b33c9741d5d104e5b73e922f3fe6f7..606aa3327c9fd1135de1ae6edbe1f02696c0f935 100644 (file)
@@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type)
     assert(PyLong_Check(val));
 
     switch (type) {
+    default:
+        assert(!"'type' not in [diuoxX]");
     case 'd':
+    case 'i':
     case 'u':
         /* Special-case boolean: we want 0/1 */
         if (PyBool_Check(val))
@@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
         numnondigits = 2;
         result = PyNumber_ToBase(val, 16);
         break;
-    default:
-        assert(!"'type' not in [duoxX]");
     }
     if (!result)
         return NULL;
@@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type)
             if (buf[i] >= 'a' && buf[i] <= 'x')
                 buf[i] -= 'a'-'A';
     }
-    if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
+    if (!PyUnicode_Check(result)
+        || buf != PyUnicode_DATA(result)) {
         PyObject *unicode;
         unicode = _PyUnicode_FromASCII(buf, len);
         Py_DECREF(result);
         result = unicode;
     }
+    else if (len != PyUnicode_GET_LENGTH(result)) {
+        if (PyUnicode_Resize(&result, len) < 0)
+            Py_CLEAR(result);
+    }
     return result;
 }
 
+/* Format an integer.
+ * Return 1 if the number has been formatted into the writer,
+ *        0 if the number has been formatted into *p_result
+ *       -1 and raise an exception on error */
+static int
+mainformatlong(_PyUnicodeWriter *writer, PyObject *v,
+               int c, Py_ssize_t width, int prec, int flags,
+               PyObject **p_result)
+{
+    PyObject *iobj, *res;
+
+    if (!PyNumber_Check(v))
+        goto wrongtype;
+
+    if (!PyLong_Check(v)) {
+        iobj = PyNumber_Long(v);
+        if (iobj == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_TypeError))
+                goto wrongtype;
+            return -1;
+        }
+        assert(PyLong_Check(iobj));
+    }
+    else {
+        iobj = v;
+        Py_INCREF(iobj);
+    }
+
+    if (PyLong_CheckExact(v)
+        && width == -1 && prec == -1
+        && !(flags & (F_SIGN | F_BLANK))
+        && c != 'X')
+    {
+        /* Fast path */
+        int alternate = flags & F_ALT;
+        int base;
+
+        switch(c)
+        {
+            default:
+                assert(0 && "'type' not in [diuoxX]");
+            case 'd':
+            case 'i':
+            case 'u':
+                base = 10;
+                break;
+            case 'o':
+                base = 8;
+                break;
+            case 'x':
+            case 'X':
+                base = 16;
+                break;
+        }
+
+        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1)
+            return -1;
+        return 1;
+    }
+
+    res = formatlong(iobj, flags, prec, c);
+    Py_DECREF(iobj);
+    if (res == NULL)
+        return -1;
+    *p_result = res;
+    return 0;
+
+wrongtype:
+    PyErr_Format(PyExc_TypeError,
+            "%%%c format: a number is required, "
+            "not %.200s", (char)c, Py_TYPE(v)->tp_name);
+    return -1;
+}
+
 static Py_UCS4
 formatchar(PyObject *v)
 {
@@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
             Py_UCS4 fill;
             int sign;
             Py_UCS4 signchar;
-            int isnumok;
             PyObject *v = NULL;
             void *pbuf = NULL;
             Py_ssize_t pindex, len;
@@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args)
             case 'o':
             case 'x':
             case 'X':
-                if (PyLong_CheckExact(v)
-                    && width == -1 && prec == -1
-                    && !(flags & (F_SIGN | F_BLANK)))
-                {
-                    /* Fast path */
-                    switch(c)
-                    {
-                    case 'd':
-                    case 'i':
-                    case 'u':
-                        if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'x':
-                        if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'o':
-                        if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    default:
-                        break;
-                    }
-                }
-
-                isnumok = 0;
-                if (PyNumber_Check(v)) {
-                    PyObject *iobj=NULL;
-
-                    if (PyLong_Check(v)) {
-                        iobj = v;
-                        Py_INCREF(iobj);
-                    }
-                    else {
-                        iobj = PyNumber_Long(v);
-                    }
-                    if (iobj!=NULL) {
-                        if (PyLong_Check(iobj)) {
-                            isnumok = 1;
-                            sign = 1;
-                            temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
-                            Py_DECREF(iobj);
-                        }
-                        else {
-                            Py_DECREF(iobj);
-                        }
-                    }
-                }
-                if (!isnumok) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "%%%c format: a number is required, "
-                                 "not %.200s", (char)c, Py_TYPE(v)->tp_name);
+            {
+                int ret = mainformatlong(&writer, v, c, width, prec,
+                                         flags, &temp);
+                if (ret == 1)
+                    goto nextarg;
+                if (ret == -1)
                     goto onError;
-                }
+                sign = 1;
                 if (flags & F_ZERO)
                     fill = '0';
                 break;
+            }
 
             case 'e':
             case 'E':
@@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                 goto onError;
             assert (PyUnicode_Check(temp));
 
-            if (width == -1 && prec == -1
+            if (PyUnicode_READY(temp) == -1) {
+                Py_CLEAR(temp);
+                goto onError;
+            }
+
+            len = PyUnicode_GET_LENGTH(temp);
+            if ((width == -1 || width <= len)
+                && (prec == -1 || prec >= len)
                 && !(flags & (F_SIGN | F_BLANK)))
             {
                 /* Fast path */
@@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                 goto nextarg;
             }
 
-            if (PyUnicode_READY(temp) == -1) {
-                Py_CLEAR(temp);
-                goto onError;
-            }
-            kind = PyUnicode_KIND(temp);
-            pbuf = PyUnicode_DATA(temp);
-            len = PyUnicode_GET_LENGTH(temp);
-
             if (c == 's' || c == 'r' || c == 'a') {
                 if (prec >= 0 && len > prec)
                     len = prec;
             }
 
             /* pbuf is initialized here. */
+            kind = PyUnicode_KIND(temp);
+            pbuf = PyUnicode_DATA(temp);
             pindex = 0;
             if (sign) {
                 Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
index aa62502dbef118294c4f8f238ecc098c14b10283..0ce9862a20c9873877a35e11dc3e4591c6f517be 100644 (file)
@@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
         goto done;
     }
 
-    if (format->width == -1 && format->precision == -1) {
+    if ((format->width == -1 || format->width <= len)
+        && (format->precision == -1 || format->precision >= len)) {
         /* Fast path */
         return _PyUnicodeWriter_WriteStr(writer, value);
     }