]> granicus.if.org Git - python/commitdiff
Issue #13333: The UTF-7 decoder now accepts lone surrogates
authorAntoine Pitrou <solipsis@pitrou.net>
Tue, 15 Nov 2011 00:44:16 +0000 (01:44 +0100)
committerAntoine Pitrou <solipsis@pitrou.net>
Tue, 15 Nov 2011 00:44:16 +0000 (01:44 +0100)
(the encoder already accepts them).

1  2 
Lib/test/test_unicode.py
Misc/NEWS
Objects/unicodeobject.c

Simple merge
diff --cc Misc/NEWS
index 082da2a003ced3a387be6658ef00e61e9fc24333,4fb9ff6305a135d9074696e768545442d14ae73e..d03f761e83754fe2b3db74394ef6a6b371551f55
+++ b/Misc/NEWS
@@@ -10,18 -10,9 +10,21 @@@ What's New in Python 3.3 Alpha 1
  Core and Builtins
  -----------------
  
+ - Issue #13333: The UTF-7 decoder now accepts lone surrogates (the encoder
+   already accepts them).
 +- Issue #13389: Full garbage collection passes now clear the freelists for
 +  list and dict objects.  They already cleared other freelists in the
 +  interpreter.
 +
 +- Issue #13327: Remove the need for an explicit None as the second argument
 +  to os.utime, os.lutimes, os.futimes, os.futimens, os.futimesat, in
 +  order to update to the current time. Also added keyword argument
 +  handling to os.utimensat in order to remove the need for explicit None.
 +
 +- Issue #13350: Simplify some C code by replacing most usages of
 +  PyUnicode_Format by PyUnicode_FromFormat.
 +
  - Issue #13342: input() used to ignore sys.stdin's and sys.stdout's unicode
    error handler in interactive mode (when calling into PyOS_Readline()).
  
index 6b245aa1254c48c92fda943575ea4fae0f43a8bb,8680726275e7cb2334ff12ddef63f1f499ac5e4d..cdad738d4795ff3f47512d24b61a6afc4864915c
@@@ -3879,29 -2274,27 +3879,26 @@@ PyUnicode_DecodeUTF7Stateful(const cha
                      if (surrogate) {
                          /* expecting a second surrogate */
                          if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
 -#ifdef Py_UNICODE_WIDE
 -                            *p++ = (((surrogate & 0x3FF)<<10)
 -                                    | (outCh & 0x3FF)) + 0x10000;
 -#else
 -                            *p++ = surrogate;
 -                            *p++ = outCh;
 -#endif
 +                            Py_UCS4 ch2 = (((surrogate & 0x3FF)<<10)
 +                                           | (outCh & 0x3FF)) + 0x10000;
 +                            if (unicode_putchar(&unicode, &outpos, ch2) < 0)
 +                                goto onError;
                              surrogate = 0;
+                             continue;
                          }
                          else {
 -                            *p++ = surrogate;
++                            if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
++                                goto onError;
                              surrogate = 0;
-                             errmsg = "second surrogate missing";
-                             goto utf7Error;
                          }
                      }
-                     else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
+                     if (outCh >= 0xD800 && outCh <= 0xDBFF) {
                          /* first surrogate */
                          surrogate = outCh;
                      }
-                     else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
-                         errmsg = "unexpected second surrogate";
-                         goto utf7Error;
-                     }
                      else {
 -                        *p++ = outCh;
 +                        if (unicode_putchar(&unicode, &outpos, outCh) < 0)
 +                            goto onError;
                      }
                  }
              }
                  inShift = 0;
                  s++;
                  if (surrogate) {
-                     errmsg = "second surrogate missing at end of shift sequence";
-                     goto utf7Error;
 -                    *p++ = surrogate;
++                    if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
++                        goto onError;
+                     surrogate = 0;
                  }
                  if (base64bits > 0) { /* left-over bits */
                      if (base64bits >= 6) {