Issue #3280: like chr() already does, the "%c" format now accepts the full unicode...

author Amaury Forgeot d'Arc <amauryfa@gmail.com>

Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)

committer Amaury Forgeot d'Arc <amauryfa@gmail.com>

Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)
author Amaury Forgeot d'Arc <amauryfa@gmail.com>
Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)
committer Amaury Forgeot d'Arc <amauryfa@gmail.com>
Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 4c812056ff468e06fe7580f30304b54b37408fad..fb904bf2a462b4dec05a97dd09e13cb2ec5b67c9 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -717,7 +717,10 @@ class UnicodeTest(
          self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
  
          self.assertEqual('%c' % 0x1234, '\u1234')
-        self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
+        self.assertEqual('%c' % 0x21483, '\U00021483')
+        self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
+        self.assertEqual('%c' % '\U00021483', '\U00021483')
+        self.assertRaises(TypeError, "%c".__mod__, "aa")
  
          # formatting jobs delegated from the string implementation:
          self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
diff --git a/Misc/NEWS b/Misc/NEWS

index 76e95527a4546eaaae3e89a67aed54e9d949859c..40249914736324eb99e7e55b82d7d8824d26eb1e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,11 @@ What's new in Python 3.0b2?
  Core and Builtins
  -----------------
  
+- Issue #3280: like chr(), the "%c" format now accepts unicode code points
+  beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On
+  "narrow Unicode" builds, the result is a string of 2 code units, forming a
+  UTF-16 surrogate pair.
+
  - Issue #3282: str.isprintable() should return False for undefined
    Unicode characters.
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 53dbe5513a221ff3844fc8e8282b76e1e14b8592..9dead636dcbd23ec480e59ae8f8868e7d0335c59 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8730,11 +8730,28 @@ formatchar(Py_UNICODE *buf,
             size_t buflen,
             PyObject *v)
  {
-    /* presume that the buffer is at least 2 characters long */
+    /* presume that the buffer is at least 3 characters long */
      if (PyUnicode_Check(v)) {
-       if (PyUnicode_GET_SIZE(v) != 1)
-           goto onError;
-       buf[0] = PyUnicode_AS_UNICODE(v)[0];
+       if (PyUnicode_GET_SIZE(v) == 1) {
+           buf[0] = PyUnicode_AS_UNICODE(v)[0];
+           buf[1] = '\0';
+           return 1;
+       }
+#ifndef Py_UNICODE_WIDE
+       if (PyUnicode_GET_SIZE(v) == 2) {
+           /* Decode a valid surrogate pair */
+           int c0 = PyUnicode_AS_UNICODE(v)[0];
+           int c1 = PyUnicode_AS_UNICODE(v)[1];
+           if (0xD800 <= c0 && c0 <= 0xDBFF &&
+               0xDC00 <= c1 && c1 <= 0xDFFF) {
+               buf[0] = c0;
+               buf[1] = c1;
+               buf[2] = '\0';
+               return 2;
+           }
+       }
+#endif
+       goto onError;
      }
      else {
         /* Integer input truncated to a character */
@@ -8742,25 +8759,25 @@ formatchar(Py_UNICODE *buf,
         x = PyLong_AsLong(v);
         if (x == -1 && PyErr_Occurred())
             goto onError;
-#ifdef Py_UNICODE_WIDE
+
         if (x < 0 || x > 0x10ffff) {
             PyErr_SetString(PyExc_OverflowError,
-                           "%c arg not in range(0x110000) "
-                           "(wide Python build)");
+                           "%c arg not in range(0x110000)");
             return -1;
         }
-#else
-       if (x < 0 || x > 0xffff) {
-           PyErr_SetString(PyExc_OverflowError,
-                           "%c arg not in range(0x10000) "
-                           "(narrow Python build)");
-           return -1;
+
+#ifndef Py_UNICODE_WIDE
+       if (x > 0xffff) {
+           x -= 0x10000;
+           buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
+           buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
+           return 2;
         }
  #endif
         buf[0] = (Py_UNICODE) x;
+       buf[1] = '\0';
+       return 1;
      }
-    buf[1] = '\0';
-    return 1;
  
   onError:
      PyErr_SetString(PyExc_TypeError,
diff --git a/Python/modsupport.c b/Python/modsupport.c

index b88c1edff287fae2a7642e5d4d71778c041a19b1..e39c315459ec87db6ed9bc3a891f36e3d0f0ae4b 100644 (file)
--- a/Python/modsupport.c
+++ b/Python/modsupport.c
@@ -294,21 +294,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
                 case 'C':
                 {
                         int i = va_arg(*p_va, int);
-                       Py_UNICODE c;
                         if (i < 0 || i > PyUnicode_GetMax()) {
-#ifdef Py_UNICODE_WIDE
                                 PyErr_SetString(PyExc_OverflowError,
-                                               "%c arg not in range(0x110000) "
-                                               "(wide Python build)");
-#else
-                               PyErr_SetString(PyExc_OverflowError,
-                                               "%c arg not in range(0x10000) "
-                                               "(narrow Python build)");
-#endif
+                                               "%c arg not in range(0x110000)";
                                 return NULL;
                         }
-                       c = i;
-                       return PyUnicode_FromUnicode(&c, 1);
+                       return PyUnicode_FromOrdinal(i);
                 }
  
                 case 's':
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Fri, 4 Jul 2008 21:26:43 +0000 (21:26 +0000)
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Python/modsupport.c		patch \| blob \| history