Issue #15026: utf-16 encoding is now significantly faster (up to 10x).

author Antoine Pitrou <solipsis@pitrou.net>

Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)

committer Antoine Pitrou <solipsis@pitrou.net>

Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)
author Antoine Pitrou <solipsis@pitrou.net>
Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)
committer Antoine Pitrou <solipsis@pitrou.net>
Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index e40cc98d258bdde4e8670bdc0d16701bba5070cc..135e4694b0f8ea2393326f336da08ecb13e13a15 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -188,9 +188,9 @@ typedef unsigned char Py_UCS1;
      (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
        ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
  /* high surrogate = top 10 bits added to D800 */
-#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 | (((ch) - 0x10000) >> 10))
+#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
  /* low surrogate = bottom 10 bits added to DC00 */
-#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 | (((ch) - 0x10000) & 0x3FF))
+#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
  
  /* Check if substring matches at given offset.  The offset must be
     valid, and the substring must not be empty. */
diff --git a/Misc/NEWS b/Misc/NEWS

index d7010443506ba33eccb63882f3918102b76a82ff..6acf02b1a7756aed8efa83913f580c8d15a3bd51 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3.0 Beta 1?
  Core and Builtins
  -----------------
  
+- Issue #15026: utf-16 encoding is now significantly faster (up to 10x).
+  Patch by Serhiy Storchaka.
+
  - Issue #11022: open() and io.TextIOWrapper are now calling
    locale.getpreferredencoding(False) instead of locale.getpreferredencoding()
    in text mode if the encoding is not specified. Don't change temporary the
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h

index 07627d6ff9977a6b5fd22f9151c8af2e006e194c..fb35493b1b915836a818912eb273d5cd2339df77 100644 (file)
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -562,4 +562,68 @@ IllegalSurrogate:
  #undef STRIPPED_MASK
  #undef SWAB
  #undef LONG_PTR_MASK
+
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(utf16_encode)(unsigned short *out,
+                        const STRINGLIB_CHAR *in,
+                        Py_ssize_t len,
+                        int native_ordering)
+{
+    const STRINGLIB_CHAR *end = in + len;
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define SWAB2(CH)  ((CH) << 8)
+#else
+# define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
+#endif
+#if STRINGLIB_MAX_CHAR < 0x10000
+    if (native_ordering) {
+# if STRINGLIB_SIZEOF_CHAR == 2
+        Py_MEMCPY(out, in, 2 * len);
+# else
+        _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out);
+# endif
+    } else {
+        const STRINGLIB_CHAR *unrolled_end = in + (len & ~ (Py_ssize_t) 3);
+        while (in < unrolled_end) {
+            out[0] = SWAB2(in[0]);
+            out[1] = SWAB2(in[1]);
+            out[2] = SWAB2(in[2]);
+            out[3] = SWAB2(in[3]);
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            *out++ = SWAB2(*in);
+            ++in;
+        }
+    }
+#else
+    if (native_ordering) {
+        while (in < end) {
+            Py_UCS4 ch = *in++;
+            if (ch < 0x10000)
+                *out++ = ch;
+            else {
+                out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+                out[1] = Py_UNICODE_LOW_SURROGATE(ch);
+                out += 2;
+            }
+        }
+    } else {
+        while (in < end) {
+            Py_UCS4 ch = *in++;
+            if (ch < 0x10000)
+                *out++ = SWAB2((Py_UCS2)ch);
+            else {
+                Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
+                Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
+                out[0] = SWAB2(ch1);
+                out[1] = SWAB2(ch2);
+                out += 2;
+            }
+        }
+    }
+#endif
+#undef SWAB2
+}
  #endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index a1efec081107a8d8ee83ce05a30274db2a7f5c4b..c974ffe5ded716ad6494a257c5a17cf2a49d0a6b 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5359,27 +5359,19 @@ _PyUnicode_EncodeUTF16(PyObject *str,
                         const char *errors,
                         int byteorder)
  {
-    int kind;
-    void *data;
+    enum PyUnicode_Kind kind;
+    const void *data;
      Py_ssize_t len;
      PyObject *v;
-    unsigned char *p;
-    Py_ssize_t nsize, bytesize;
-    Py_ssize_t i, pairs;
-    /* Offsets from p for storing byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int ihi = 1, ilo = 0;
+    unsigned short *out;
+    Py_ssize_t bytesize;
+    Py_ssize_t pairs;
+#ifdef WORDS_BIGENDIAN
+    int native_ordering = byteorder >= 0;
  #else
-    int ihi = 0, ilo = 1;
+    int native_ordering = byteorder <= 0;
  #endif
  
-#define STORECHAR(CH)                           \
-    do {                                        \
-        p[ihi] = ((CH) >> 8) & 0xff;            \
-        p[ilo] = (CH) & 0xff;                   \
-        p += 2;                                 \
-    } while(0)
-
      if (!PyUnicode_Check(str)) {
          PyErr_BadArgument();
          return NULL;
@@ -5391,53 +5383,47 @@ _PyUnicode_EncodeUTF16(PyObject *str,
      len = PyUnicode_GET_LENGTH(str);
  
      pairs = 0;
-    if (kind == PyUnicode_4BYTE_KIND)
-        for (i = 0; i < len; i++)
-            if (PyUnicode_READ(kind, data, i) >= 0x10000)
+    if (kind == PyUnicode_4BYTE_KIND) {
+        const Py_UCS4 *in = (const Py_UCS4 *)data;
+        const Py_UCS4 *end = in + len;
+        while (in < end)
+            if (*in++ >= 0x10000)
                  pairs++;
-    /* 2 * (len + pairs + (byteorder == 0)) */
-    if (len > PY_SSIZE_T_MAX - pairs - (byteorder == 0))
-        return PyErr_NoMemory();
-    nsize = len + pairs + (byteorder == 0);
-    bytesize = nsize * 2;
-    if (bytesize / 2 != nsize)
+    }
+    if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0))
          return PyErr_NoMemory();
+    bytesize = (len + pairs + (byteorder == 0)) * 2;
      v = PyBytes_FromStringAndSize(NULL, bytesize);
      if (v == NULL)
          return NULL;
  
-    p = (unsigned char *)PyBytes_AS_STRING(v);
+    /* output buffer is 2-bytes aligned */
+    assert(((Py_uintptr_t)PyBytes_AS_STRING(v) & 1) == 0);
+    out = (unsigned short *)PyBytes_AS_STRING(v);
      if (byteorder == 0)
-        STORECHAR(0xFEFF);
+        *out++ = 0xFEFF;
      if (len == 0)
          goto done;
  
-    if (byteorder == -1) {
-        /* force LE */
-        ihi = 1;
-        ilo = 0;
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND: {
+        ucs1lib_utf16_encode(out, (const Py_UCS1 *)data, len, native_ordering);
+        break;
      }
-    else if (byteorder == 1) {
-        /* force BE */
-        ihi = 0;
-        ilo = 1;
+    case PyUnicode_2BYTE_KIND: {
+        ucs2lib_utf16_encode(out, (const Py_UCS2 *)data, len, native_ordering);
+        break;
      }
-
-    for (i = 0; i < len; i++) {
-        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        Py_UCS4 ch2 = 0;
-        if (ch >= 0x10000) {
-            ch2 = Py_UNICODE_LOW_SURROGATE(ch);
-            ch  = Py_UNICODE_HIGH_SURROGATE(ch);
-        }
-        STORECHAR(ch);
-        if (ch2)
-            STORECHAR(ch2);
+    case PyUnicode_4BYTE_KIND: {
+        ucs4lib_utf16_encode(out, (const Py_UCS4 *)data, len, native_ordering);
+        break;
+    }
+    default:
+        assert(0);
      }
  
    done:
      return v;
-#undef STORECHAR
  }
  
  PyObject *
author	Antoine Pitrou <solipsis@pitrou.net>
	Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)
committer	Antoine Pitrou <solipsis@pitrou.net>
	Fri, 15 Jun 2012 20:15:23 +0000 (22:15 +0200)
Include/unicodeobject.h		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/stringlib/codecs.h		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history