]> granicus.if.org Git - python/commitdiff
SF patch #980695: efficient string concatenation
authorRaymond Hettinger <python@rcn.com>
Fri, 6 Aug 2004 18:43:09 +0000 (18:43 +0000)
committerRaymond Hettinger <python@rcn.com>
Fri, 6 Aug 2004 18:43:09 +0000 (18:43 +0000)
(Original patch by Armin Rigo).

Doc/lib/libstdtypes.tex
Misc/NEWS
Python/ceval.c

index c33360d3befce2dae3b88bfa6fa292cd76bd9059..938dc6ed473f6cc9b8b9f0da247c1fd1d6a667d5 100644 (file)
@@ -455,7 +455,7 @@ and \var{j} are integers:
   \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
 equal to \var{x}, else \code{1}}{(1)}
   \hline
-  \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
+  \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{(6)}
   \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
   \hline
   \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
@@ -536,6 +536,16 @@ In Python 2.3 and beyond, \var{x} may be a string of any length.
   (which end depends on the sign of \var{k}).  Note, \var{k} cannot
   be zero.
 
+\item[(6)] If \var{s} and \var{t} are both strings, some Python
+implementations such as CPython can usally perform an inplace optimization
+for assignments of the form \code{\var{s}=\var{s}+\var{t}} or
+\code{\var{s}+=\var{t}}.  When applicable, this optimization makes
+quadratic run-time much less likely.  This optimization is both version
+and implementation dependent.  For performance sensitive code, it is
+preferrable to use the \method{str.join()} method which assures consistent
+linear concatenation performance across versions and implementations.
+\versionchanged[Formerly, string concatenation never occurred inplace]{2.4}
+
 \end{description}
 
 
index d0276ffc89a3281b9c4d60957b625c03f1fc7388..cbe3e6ee6cec41e7d5150304cf4de78775b28d26 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,11 @@ What's New in Python 2.4 alpha 2?
 Core and builtins
 -----------------
 
+- Patch #980695:  Implements efficient string concatenation for statements
+  of the form s=s+t and s+=t.  This will vary across implementations.
+  Accordingly, the str.join() method is strongly preferred for performance
+  sensitive code.
+
 - PEP-0318, Function Decorators have been added to the language. These are
   implemented using the Java-style @decorator syntax, like so:
      @staticmethod
index 6c457e125c1405ff866570c27666ba0b28afa18b..4dd31ab9bab122bd11b50f661b402d81d028eda4 100644 (file)
@@ -85,6 +85,8 @@ static int exec_statement(PyFrameObject *,
 static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *);
 static void reset_exc_info(PyThreadState *);
 static void format_exc_check_arg(PyObject *, char *, PyObject *);
+static PyObject *string_concatenate(PyObject *, PyObject *,
+                                   PyFrameObject *, unsigned char *);
 
 #define NAME_ERROR_MSG \
        "name '%.200s' is not defined"
@@ -550,6 +552,7 @@ PyEval_EvalFrame(PyFrameObject *f)
 #define INSTR_OFFSET() (next_instr - first_instr)
 #define NEXTOP()       (*next_instr++)
 #define NEXTARG()      (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
+#define PEEKARG()      ((next_instr[2]<<8) + next_instr[1])
 #define JUMPTO(x)      (next_instr = first_instr + (x))
 #define JUMPBY(x)      (next_instr += (x))
 
@@ -580,8 +583,7 @@ PyEval_EvalFrame(PyFrameObject *f)
 #endif
 
 #define PREDICTED(op)          PRED_##op: next_instr++
-#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = (next_instr[2]<<8) + \
-                               next_instr[1]; next_instr += 3
+#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3
 
 /* Stack manipulation macros */
 
@@ -1066,11 +1068,18 @@ PyEval_EvalFrame(PyFrameObject *f)
                                        goto slow_add;
                                x = PyInt_FromLong(i);
                        }
+                       else if (PyString_CheckExact(v) &&
+                                PyString_CheckExact(w)) {
+                               x = string_concatenate(v, w, f, next_instr);
+                               /* string_concatenate consumed the ref to v */
+                               goto skip_decref_vx;
+                       }
                        else {
                          slow_add:
                                x = PyNumber_Add(v, w);
                        }
                        Py_DECREF(v);
+                 skip_decref_vx:
                        Py_DECREF(w);
                        SET_TOP(x);
                        if (x != NULL) continue;
@@ -1261,11 +1270,18 @@ PyEval_EvalFrame(PyFrameObject *f)
                                        goto slow_iadd;
                                x = PyInt_FromLong(i);
                        }
+                       else if (PyString_CheckExact(v) &&
+                                PyString_CheckExact(w)) {
+                               x = string_concatenate(v, w, f, next_instr);
+                               /* string_concatenate consumed the ref to v */
+                               goto skip_decref_v;
+                       }
                        else {
                          slow_iadd:
                                x = PyNumber_InPlaceAdd(v, w);
                        }
                        Py_DECREF(v);
+                 skip_decref_v:
                        Py_DECREF(w);
                        SET_TOP(x);
                        if (x != NULL) continue;
@@ -4191,6 +4207,79 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj)
        PyErr_Format(exc, format_str, obj_str);
 }
 
+static PyObject *
+string_concatenate(PyObject *v, PyObject *w,
+                  PyFrameObject *f, unsigned char *next_instr)
+{
+       /* This function implements 'variable += expr' when both arguments
+          are strings. */
+       
+       if (v->ob_refcnt == 2) {
+               /* In the common case, there are 2 references to the value
+                * stored in 'variable' when the += is performed: one on the
+                * value stack (in 'v') and one still stored in the 'variable'.
+                * We try to delete the variable now to reduce the refcnt to 1.
+                */
+               switch (*next_instr) {
+               case STORE_FAST:
+               {
+                       int oparg = PEEKARG();
+                       PyObject **fastlocals = f->f_localsplus;
+                       if (GETLOCAL(oparg) == v)
+                               SETLOCAL(oparg, NULL);
+                       break;
+               }
+               case STORE_DEREF:
+               {
+                       PyObject **freevars = f->f_localsplus + f->f_nlocals;
+                       PyObject *c = freevars[PEEKARG()];
+                       if (PyCell_GET(c) == v)
+                               PyCell_Set(c, NULL);
+                       break;
+               }
+               case STORE_NAME:
+               {
+                       PyObject *names = f->f_code->co_names;
+                       PyObject *name = GETITEM(names, PEEKARG());
+                       PyObject *locals = f->f_locals;
+                       if (PyDict_CheckExact(locals) &&
+                           PyDict_GetItem(locals, name) == v) {
+                               if (PyDict_DelItem(locals, name) != 0) {
+                                       PyErr_Clear();
+                               }
+                       }
+                       break;
+               }
+               }
+       }
+
+       if (v->ob_refcnt == 1) {
+               /* Now we own the last reference to 'v', so we can resize it
+                * in-place.
+                */
+               int v_len = PyString_GET_SIZE(v);
+               int w_len = PyString_GET_SIZE(w);
+               if (_PyString_Resize(&v, v_len + w_len) != 0) {
+                       /* XXX if _PyString_Resize() fails, 'v' has been
+                        * deallocated so it cannot be put back into 'variable'.
+                        * The MemoryError is raised when there is no value in
+                        * 'variable', which might (very remotely) be a cause
+                        * of incompatibilities.
+                        */
+                       return NULL;
+               }
+               /* copy 'w' into the newly allocated area of 'v' */
+               memcpy(PyString_AS_STRING(v) + v_len,
+                      PyString_AS_STRING(w), w_len);
+               return v;
+       }
+       else {
+               /* When in-place resizing is not an option. */
+               PyString_Concat(&v, w);
+               return v;
+       }
+}
+
 #ifdef DYNAMIC_EXECUTION_PROFILE
 
 static PyObject *