Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF

author Victor Stinner <victor.stinner@haypocalc.com>

Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)

committer Victor Stinner <victor.stinner@haypocalc.com>

Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)
author Victor Stinner <victor.stinner@haypocalc.com>
Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)
committer Victor Stinner <victor.stinner@haypocalc.com>
Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index debcddcf95cdb5bacc5ccc9a1d828bfa2d8edf43..5eb331a73cb21963e14f27bfbaf679453e9775d7 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -395,6 +395,19 @@ class UnicodeTest(
          self.assertEqual(u'%c' % 0x1234, u'\u1234')
          self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
  
+        for num in range(0x00,0x80):
+            char = chr(num)
+            self.assertEqual(u"%c" % char, char)
+            self.assertEqual(u"%c" % num, char)
+        # Issue 7649
+        for num in range(0x80,0x100):
+            uchar = unichr(num)
+            self.assertEqual(uchar, u"%c" % num)   # works only with ints
+            self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
+            # the implicit decoding should fail for non-ascii chars
+            self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
+            self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
+
          # formatting jobs delegated from the string implementation:
          self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
          self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
diff --git a/Misc/NEWS b/Misc/NEWS

index dd2346be0b9ba74ef4039ce946a6b597aad6a2a2..5dc3a7b610b42f9c2c96562ed89b00d2b174f3f5 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 4?
  Core and Builtins
  -----------------
  
+- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
+  UnicodeDecodeError
+
  - Issue #6902: Fix problem with built-in types format incorrectly with
    0 padding.
  
@@ -249,7 +252,7 @@ Tests
  
  Documentation
  -------------
- 
+
  - Updating `Using Python` documentation to include description of CPython's
    -J, -U and -X options.
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 130ca48463c7dd2963afb7485bf90435d4bea206..d80ff714ae240bec8d4c403c94daaee4491f81c4 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8170,6 +8170,7 @@ formatchar(Py_UNICODE *buf,
             size_t buflen,
             PyObject *v)
  {
+    PyObject *s;
      /* presume that the buffer is at least 2 characters long */
      if (PyUnicode_Check(v)) {
          if (PyUnicode_GET_SIZE(v) != 1)
@@ -8180,7 +8181,14 @@ formatchar(Py_UNICODE *buf,
      else if (PyString_Check(v)) {
          if (PyString_GET_SIZE(v) != 1)
              goto onError;
-        buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+        /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
+           string, "u'%c' % char" should fail with a UnicodeDecodeError */
+        s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
+        /* if the char is not decodable return -1 */
+        if (s == NULL)
+            return -1;
+        buf[0] = PyUnicode_AS_UNICODE(s)[0];
+        Py_DECREF(s);
      }
  
      else {
author	Victor Stinner <victor.stinner@haypocalc.com>
	Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Tue, 23 Feb 2010 23:16:07 +0000 (23:16 +0000)
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history