Issue #16061: Speed up str.replace() for replacing 1-character strings.

author Serhiy Storchaka <storchaka@gmail.com>

Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)
diff --git a/Makefile.pre.in b/Makefile.pre.in

index 534ddc0dc550c3b5ebc6d44681dadc2b9bb168ed..ab56e3f4e5d5e2baa374369a4886210052ab51b3 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -726,6 +726,7 @@ UNICODE_DEPS = \
                 $(srcdir)/Objects/stringlib/find_max_char.h \
                 $(srcdir)/Objects/stringlib/localeutil.h \
                 $(srcdir)/Objects/stringlib/partition.h \
+               $(srcdir)/Objects/stringlib/replace.h \
                 $(srcdir)/Objects/stringlib/split.h \
                 $(srcdir)/Objects/stringlib/ucs1lib.h \
                 $(srcdir)/Objects/stringlib/ucs2lib.h \
diff --git a/Misc/NEWS b/Misc/NEWS

index c6188de24b0d8afc8cb2874c3e3be39a106f9757..1889ac25d0cddb630488db468170049dacac8299 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1?
  Core and Builtins
  -----------------
  
+- Issue #16061: Speed up str.replace() for replacing 1-character strings.
+
  - Issue #17715: Fix segmentation fault from raising an exception in a __trunc__
    method.
  
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h

new file mode 100644 (file)

index 0000000..ef318ed
--- /dev/null
+++ b/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+                                 Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    *s = u2;
+    while (--maxcount && ++s != end) {
+        /* Find the next character to be replaced.
+
+           If it occurs often, it is faster to scan for it using an inline
+           loop.  If it occurs seldom, it is faster to scan for it using a
+           function call; the overhead of the function call is amortized
+           across the many characters that call covers.  We start with an
+           inline loop and use a heuristic to determine whether to fall back
+           to a function call. */
+        if (*s != u1) {
+            int attempts = 10;
+            /* search u1 in a dummy loop */
+            while (1) {
+                if (++s == end)
+                    return;
+                if (*s == u1)
+                    break;
+                if (!--attempts) {
+                    /* if u1 was not found for attempts iterations,
+                       use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    s++;
+                    s = memchr(s, u1, end - s);
+                    if (s == NULL)
+                        return;
+#else
+                    Py_ssize_t i;
+                    STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+                    s++;
+                    i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+                    if (i < 0)
+                        return;
+                    s += i;
+#endif
+                    /* restart the dummy loop */
+                    break;
+                }
+            }
+        }
+        *s = u2;
+    }
+}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index e52571db2e83d89c036951de64cf0a4e9625145f..3688f4a789f01b74aa8e8ec2d49a87e90b325062 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -605,6 +605,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
  #include "stringlib/split.h"
  #include "stringlib/count.h"
  #include "stringlib/find.h"
+#include "stringlib/replace.h"
  #include "stringlib/find_max_char.h"
  #include "stringlib/localeutil.h"
  #include "stringlib/undef.h"
@@ -615,6 +616,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
  #include "stringlib/split.h"
  #include "stringlib/count.h"
  #include "stringlib/find.h"
+#include "stringlib/replace.h"
  #include "stringlib/find_max_char.h"
  #include "stringlib/localeutil.h"
  #include "stringlib/undef.h"
@@ -625,6 +627,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
  #include "stringlib/split.h"
  #include "stringlib/count.h"
  #include "stringlib/find.h"
+#include "stringlib/replace.h"
  #include "stringlib/find_max_char.h"
  #include "stringlib/localeutil.h"
  #include "stringlib/undef.h"
@@ -9927,6 +9930,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
      return 0;
  }
  
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+                      Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    int kind = PyUnicode_KIND(u);
+    void *data = PyUnicode_DATA(u);
+    Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+                                      (Py_UCS1 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else if (kind == PyUnicode_2BYTE_KIND) {
+        ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+                                      (Py_UCS2 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else {
+        assert(kind == PyUnicode_4BYTE_KIND);
+        ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+                                      (Py_UCS4 *)data + len,
+                                      u1, u2, maxcount);
+    }
+}
+
  static PyObject *
  replace(PyObject *self, PyObject *str1,
          PyObject *str2, Py_ssize_t maxcount)
@@ -9943,7 +9971,7 @@ replace(PyObject *self, PyObject *str1,
      Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
      Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
      int mayshrink;
-    Py_UCS4 maxchar, maxchar_str2;
+    Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
  
      if (maxcount < 0)
          maxcount = PY_SSIZE_T_MAX;
@@ -9952,15 +9980,16 @@ replace(PyObject *self, PyObject *str1,
  
      if (str1 == str2)
          goto nothing;
-    if (skind < kind1)
-        /* substring too wide to be present */
-        goto nothing;
  
      maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+    maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+    if (maxchar < maxchar_str1)
+        /* substring too wide to be present */
+        goto nothing;
      maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
      /* Replacing str1 with str2 may cause a maxchar reduction in the
         result string. */
-    mayshrink = (maxchar_str2 < maxchar);
+    mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
      maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
  
      if (len1 == len2) {
@@ -9970,36 +9999,19 @@ replace(PyObject *self, PyObject *str1,
          if (len1 == 1) {
              /* replace characters */
              Py_UCS4 u1, u2;
-            int rkind;
-            Py_ssize_t index, pos;
-            char *src, *rbuf;
+            Py_ssize_t pos;
  
              u1 = PyUnicode_READ(kind1, buf1, 0);
-            pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+            pos = findchar(sbuf, skind, slen, u1, 1);
              if (pos < 0)
                  goto nothing;
              u2 = PyUnicode_READ(kind2, buf2, 0);
              u = PyUnicode_New(slen, maxchar);
              if (!u)
                  goto error;
-            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
-            rkind = PyUnicode_KIND(u);
-            rbuf = PyUnicode_DATA(u);
  
-            PyUnicode_WRITE(rkind, rbuf, pos, u2);
-            index = 0;
-            src = sbuf;
-            while (--maxcount)
-            {
-                pos++;
-                src += pos * PyUnicode_KIND(self);
-                slen -= pos;
-                index += pos;
-                pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
-                if (pos < 0)
-                    break;
-                PyUnicode_WRITE(rkind, rbuf, index + pos, u2);
-            }
+            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
+            replace_1char_inplace(u, pos, u1, u2, maxcount);
          }
          else {
              int rkind = skind;
diff --git a/PC/VS9.0/pythoncore.vcproj b/PC/VS9.0/pythoncore.vcproj

index 9fb63ff105b0d7105d5eec104f11e779f33a828e..ab7feca6796ff6a3efd6fe930618af4e7c4d2cdf 100644 (file)
--- a/PC/VS9.0/pythoncore.vcproj
+++ b/PC/VS9.0/pythoncore.vcproj
@@ -1586,6 +1586,10 @@
                                 RelativePath="..\..\Objects\rangeobject.c"\r
                                 >\r
                         </File>\r
+                       <File\r
+                               RelativePath="..\..\Objects\stringlib\replace.h"\r
+                               >\r
+                       </File>\r
                         <File\r
                                 RelativePath="..\..\Objects\setobject.c"\r
                                 >\r
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj

index 29493e399b73c963160a7c0f69f17ea3b2c7f639..b00991e0c7027df0b36cb18d68563c9f1ba41ad8 100644 (file)
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -475,6 +475,7 @@
      <ClInclude Include="..\Objects\stringlib\fastsearch.h" />
      <ClInclude Include="..\Objects\stringlib\find.h" />
      <ClInclude Include="..\Objects\stringlib\partition.h" />
+    <ClInclude Include="..\Objects\stringlib\replace.h" />
      <ClInclude Include="..\Objects\stringlib\split.h" />
      <ClInclude Include="..\Objects\unicodetype_db.h" />
      <ClInclude Include="..\Parser\parser.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters

index 09b4bb4d027ee8363e6766cb8a8bf232291d8808..915fec5049146250ba707f5326098853a87c8337 100644 (file)
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -378,6 +378,9 @@
      <ClInclude Include="..\Objects\stringlib\partition.h">
        <Filter>Objects</Filter>
      </ClInclude>
+    <ClInclude Include="..\Objects\stringlib\replace.h">
+      <Filter>Objects</Filter>
+    </ClInclude>
      <ClInclude Include="..\Objects\stringlib\split.h">
        <Filter>Objects</Filter>
      </ClInclude>
author	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 13 Apr 2013 19:45:04 +0000 (22:45 +0300)
Makefile.pre.in		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/stringlib/replace.h	[new file with mode: 0644]	patch \| blob
Objects/unicodeobject.c		patch \| blob \| history
PC/VS9.0/pythoncore.vcproj		patch \| blob \| history
PCbuild/pythoncore.vcxproj		patch \| blob \| history
PCbuild/pythoncore.vcxproj.filters		patch \| blob \| history