]> granicus.if.org Git - python/commitdiff
Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 15 Jan 2013 13:01:20 +0000 (15:01 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Tue, 15 Jan 2013 13:01:20 +0000 (15:01 +0200)
in any mapping, not only in an unicode string.

1  2 
Lib/test/test_codecs.py
Misc/NEWS
Objects/unicodeobject.c

Simple merge
diff --cc Misc/NEWS
index ae3a67b8aa744af6e1da99251be687225496ca50,45c059817c86758a6179c788a0a46b382b0ccf71..c1ff58d5a7d383d91045d231d0eb2e1d25342d2f
+++ b/Misc/NEWS
@@@ -12,14 -10,9 +12,17 @@@ What's New in Python 3.3.1
  Core and Builtins
  -----------------
  
+ - Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
+   in any mapping, not only in a string.
 +- Issue #16730: importlib.machinery.FileFinder now no longers raises an
 +  exception when trying to populate its cache and it finds out the directory is
 +  unreadable or has turned into a file. Reported and diagnosed by
 +  David Pritchard.
 +
 +- Issue #16906: Fix a logic error that prevented most static strings from being
 +  cleared.
 +
  - Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
    Amaury Forgeot d'Arc.
  
index 16d59292eff2e0586ae1a4df8967c20680a79ffe,e1df874c0b1eba0bb5922e278d64a5b4dcf9c742..044b26e7fc109a9a906bb4c7d1c4c00e2af3aecb
@@@ -7518,45 -5251,53 +7517,36 @@@ Error
              }
  
              /* Apply mapping */
+             if (x == Py_None)
+                 goto Undefined;
              if (PyLong_Check(x)) {
                  long value = PyLong_AS_LONG(x);
 -                if (value < 0 || value > 0x10FFFF) {
 -                    PyErr_SetString(PyExc_TypeError,
 -                                    "character mapping must be in range(0x110000)");
+                 if (value == 0xFFFE)
+                     goto Undefined;
 +                if (value < 0 || value > MAX_UNICODE) {
 +                    PyErr_Format(PyExc_TypeError,
 +                                 "character mapping must be in range(0x%lx)",
 +                                 (unsigned long)MAX_UNICODE + 1);
                      Py_DECREF(x);
                      goto onError;
                  }
 -
 -#ifndef Py_UNICODE_WIDE
 -                if (value > 0xFFFF) {
 -                    /* see the code for 1-n mapping below */
 -                    if (extrachars < 2) {
 -                        /* resize first */
 -                        Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);
 -                        Py_ssize_t needed = 10 - extrachars;
 -                        extrachars += needed;
 -                        /* XXX overflow detection missing */
 -                        if (_PyUnicode_Resize(&v,
 -                                              PyUnicode_GET_SIZE(v) + needed) < 0) {
 -                            Py_DECREF(x);
 -                            goto onError;
 -                        }
 -                        p = PyUnicode_AS_UNICODE(v) + oldpos;
 -                    }
 -                    value -= 0x10000;
 -                    *p++ = 0xD800 | (value >> 10);
 -                    *p++ = 0xDC00 | (value & 0x3FF);
 -                    extrachars -= 2;
 -                }
 -                else
 -#endif
 -                *p++ = (Py_UNICODE)value;
 +                if (unicode_putchar(&v, &outpos, value) < 0)
 +                    goto onError;
              }
-             else if (x == Py_None) {
-                 /* undefined mapping */
-                 startinpos = s-starts;
-                 endinpos = startinpos+1;
-                 if (unicode_decode_call_errorhandler(
-                         errors, &errorHandler,
-                         "charmap", "character maps to <undefined>",
-                         &starts, &e, &startinpos, &endinpos, &exc, &s,
-                         &v, &outpos)) {
-                     Py_DECREF(x);
-                     goto onError;
-                 }
-                 Py_DECREF(x);
-                 continue;
-             }
              else if (PyUnicode_Check(x)) {
 -                Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
 +                Py_ssize_t targetsize;
 +
 +                if (PyUnicode_READY(x) == -1)
 +                    goto onError;
 +                targetsize = PyUnicode_GET_LENGTH(x);
  
                  if (targetsize == 1) {
                      /* 1-1 mapping */
-                     if (unicode_putchar(&v, &outpos,
-                                         PyUnicode_READ_CHAR(x, 0)) < 0)
 -                    Py_UNICODE value = *PyUnicode_AS_UNICODE(x);
++                    Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
+                     if (value == 0xFFFE)
+                         goto Undefined;
 -                    *p++ = value;
++                    if (unicode_putchar(&v, &outpos, value) < 0)
 +                        goto onError;
                  }
                  else if (targetsize > 1) {
                      /* 1-n mapping */
              }
              Py_DECREF(x);
              ++s;
 -            outpos = p-PyUnicode_AS_UNICODE(v);
+             continue;
+ Undefined:
+             /* undefined mapping */
+             Py_XDECREF(x);
 -                    &v, &outpos, &p)) {
+             startinpos = s-starts;
+             endinpos = startinpos+1;
+             if (unicode_decode_call_errorhandler(
+                     errors, &errorHandler,
+                     "charmap", "character maps to <undefined>",
+                     &starts, &e, &startinpos, &endinpos, &exc, &s,
++                    &v, &outpos)) {
+                 goto onError;
+             }
          }
      }
 -    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
 -        if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
 -            goto onError;
 +    if (unicode_resize(&v, outpos) < 0)
 +        goto onError;
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
 -    return (PyObject *)v;
 +    return unicode_result(v);
  
    onError:
      Py_XDECREF(errorHandler);