]> granicus.if.org Git - python/commitdiff
Issue #4916: fix little-endian UTF-16 decoding bug on big-endian UCS-4 builds, introd...
authorAntoine Pitrou <solipsis@pitrou.net>
Sun, 11 Jan 2009 23:56:55 +0000 (23:56 +0000)
committerAntoine Pitrou <solipsis@pitrou.net>
Sun, 11 Jan 2009 23:56:55 +0000 (23:56 +0000)
Objects/unicodeobject.c

index bc1612dcf6551655ec154d5abc144f080840262a..c668d0cf162de79166ca64c23c06343828528f70 100644 (file)
@@ -2791,16 +2791,24 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
                     _p[3] = 0;
 #endif
 #endif
-                    ((unsigned char *) _p)[1] = _q[0];
-                    ((unsigned char *) _p)[0] = _q[1];
-                    ((unsigned char *) _p)[1 + Py_UNICODE_SIZE] = _q[2];
-                    ((unsigned char *) _p)[0 + Py_UNICODE_SIZE] = _q[3];
+                    /* Issue #4916; UCS-4 builds on big endian machines must
+                       fill the two last bytes of each 4-byte unit. */
+#if (!defined(BYTEORDER_IS_LITTLE_ENDIAN) && Py_UNICODE_SIZE > 2)
+# define OFF 2
+#else
+# define OFF 0
+#endif
+                    ((unsigned char *) _p)[OFF + 1] = _q[0];
+                    ((unsigned char *) _p)[OFF + 0] = _q[1];
+                    ((unsigned char *) _p)[OFF + 1 + Py_UNICODE_SIZE] = _q[2];
+                    ((unsigned char *) _p)[OFF + 0 + Py_UNICODE_SIZE] = _q[3];
 #if (SIZEOF_LONG == 8)
-                    ((unsigned char *) _p)[1 + 2 * Py_UNICODE_SIZE] = _q[4];
-                    ((unsigned char *) _p)[0 + 2 * Py_UNICODE_SIZE] = _q[5];
-                    ((unsigned char *) _p)[1 + 3 * Py_UNICODE_SIZE] = _q[6];
-                    ((unsigned char *) _p)[0 + 3 * Py_UNICODE_SIZE] = _q[7];
+                    ((unsigned char *) _p)[OFF + 1 + 2 * Py_UNICODE_SIZE] = _q[4];
+                    ((unsigned char *) _p)[OFF + 0 + 2 * Py_UNICODE_SIZE] = _q[5];
+                    ((unsigned char *) _p)[OFF + 1 + 3 * Py_UNICODE_SIZE] = _q[6];
+                    ((unsigned char *) _p)[OFF + 0 + 3 * Py_UNICODE_SIZE] = _q[7];
 #endif
+#undef OFF
                     _q += SIZEOF_LONG;
                     _p += SIZEOF_LONG / 2;
                 }