]> granicus.if.org Git - python/commitdiff
Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping"
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 15 Jan 2013 13:30:04 +0000 (15:30 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Tue, 15 Jan 2013 13:30:04 +0000 (15:30 +0200)
in any mapping, not only in an unicode string.

1  2 
Lib/test/test_codecs.py
Misc/NEWS
Objects/unicodeobject.c

Simple merge
diff --cc Misc/NEWS
Simple merge
index 65393d2efadda0794dd442670d5b2664171fab35,044b26e7fc109a9a906bb4c7d1c4c00e2af3aecb..bd0dbb47ad033ee17918dd5b32ed4ff225c9b9a1
@@@ -7409,31 -7530,46 +7412,29 @@@ Error
                      Py_DECREF(x);
                      goto onError;
                  }
 -                if (unicode_putchar(&v, &outpos, value) < 0)
 +
 +                if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1)
                      goto onError;
 +                PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value);
 +                writer.pos++;
              }
-             else if (x == Py_None) {
-                 /* undefined mapping */
-                 startinpos = s-starts;
-                 endinpos = startinpos+1;
-                 if (unicode_decode_call_errorhandler_writer(
-                         errors, &errorHandler,
-                         "charmap", "character maps to <undefined>",
-                         &starts, &e, &startinpos, &endinpos, &exc, &s,
-                         &writer)) {
-                     Py_DECREF(x);
-                     goto onError;
-                 }
-                 Py_DECREF(x);
-                 continue;
-             }
              else if (PyUnicode_Check(x)) {
-                 writer.overallocate = 1;
-                 if (_PyUnicodeWriter_WriteStr(&writer, x) == -1)
 -                Py_ssize_t targetsize;
 -
+                 if (PyUnicode_READY(x) == -1)
                      goto onError;
 -                targetsize = PyUnicode_GET_LENGTH(x);
 -
 -                if (targetsize == 1) {
 -                    /* 1-1 mapping */
++                if (PyUnicode_GET_LENGTH(x) == 1) {
+                     Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
+                     if (value == 0xFFFE)
+                         goto Undefined;
 -                    if (unicode_putchar(&v, &outpos, value) < 0)
++                    if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1)
+                         goto onError;
++                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value);
++                    writer.pos++;
+                 }
 -                else if (targetsize > 1) {
 -                    /* 1-n mapping */
 -                    if (targetsize > extrachars) {
 -                        /* resize first */
 -                        Py_ssize_t needed = (targetsize - extrachars) + \
 -                            (targetsize << 2);
 -                        extrachars += needed;
 -                        /* XXX overflow detection missing */
 -                        if (unicode_resize(&v,
 -                                           PyUnicode_GET_LENGTH(v) + needed) < 0)
 -                        {
 -                            Py_DECREF(x);
 -                            goto onError;
 -                        }
 -                    }
 -                    if (unicode_widen(&v, outpos, PyUnicode_MAX_CHAR_VALUE(x)) < 0)
++                else {
++                    writer.overallocate = 1;
++                    if (_PyUnicodeWriter_WriteStr(&writer, x) == -1)
+                         goto onError;
 -                    PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize);
 -                    outpos += targetsize;
 -                    extrachars -= targetsize;
+                 }
 -                /* 1-0 mapping: skip the character */
              }
              else {
                  /* wrong return value */
              }
              Py_DECREF(x);
              ++s;
 -            if (unicode_decode_call_errorhandler(
+             continue;
+ Undefined:
+             /* undefined mapping */
+             Py_XDECREF(x);
+             startinpos = s-starts;
+             endinpos = startinpos+1;
 -                    &v, &outpos)) {
++            if (unicode_decode_call_errorhandler_writer(
+                     errors, &errorHandler,
+                     "charmap", "character maps to <undefined>",
+                     &starts, &e, &startinpos, &endinpos, &exc, &s,
++                    &writer)) {
+                 goto onError;
+             }
          }
      }
 -    if (unicode_resize(&v, outpos) < 0)
 -        goto onError;
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
 -    return unicode_result(v);
 +    return _PyUnicodeWriter_Finish(&writer);
  
    onError:
      Py_XDECREF(errorHandler);