]> granicus.if.org Git - python/commitdiff
Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
authorVictor Stinner <victor.stinner@haypocalc.com>
Sat, 4 Dec 2010 17:24:33 +0000 (17:24 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Sat, 4 Dec 2010 17:24:33 +0000 (17:24 +0000)
UnicodeEncodeError.

Doc/library/sys.rst
Lib/test/test_cmd_line.py
Misc/NEWS
Python/sysmodule.c

index c7aa2142260e183f066c1897244b8e15ceddea53..95d1cf91f50354317e1dbf6e445733da628db3cb 100644 (file)
@@ -99,13 +99,39 @@ always available.
 
 .. function:: displayhook(value)
 
-   If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
-   it in ``builtins._``.
+   If *value* is not ``None``, this function prints ``repr(value)`` to
+   ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
+   not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
+   handler (which is probably ``'strict'``), encode it to
+   ``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
 
    ``sys.displayhook`` is called on the result of evaluating an :term:`expression`
    entered in an interactive Python session.  The display of these values can be
    customized by assigning another one-argument function to ``sys.displayhook``.
 
+   Pseudo-code::
+
+       def displayhook(value):
+           if value is None:
+               return
+           # Set '_' to None to avoid recursion
+           builtins._ = None
+           text = repr(value)
+           try:
+               sys.stdout.write(text)
+           except UnicodeEncodeError:
+               bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
+               if hasattr(sys.stdout, 'buffer'):
+                   sys.stdout.buffer.write(bytes)
+               else:
+                   text = bytes.decode(sys.stdout.encoding, 'strict')
+                   sys.stdout.write(text)
+           sys.stdout.write("\n")
+           builtins._ = value
+
+   .. versionchanged:: 3.2
+      Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
+
 
 .. function:: excepthook(type, value, traceback)
 
index e1fe7f5bdee77981626016619d38607c87864307..b21b61e6590e1db378aad4c5f978e49d8a562796 100644 (file)
@@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase):
         self.assertIn(path1.encode('ascii'), out)
         self.assertIn(path2.encode('ascii'), out)
 
+    def test_displayhook_unencodable(self):
+        for encoding in ('ascii', 'latin1', 'utf8'):
+            env = os.environ.copy()
+            env['PYTHONIOENCODING'] = encoding
+            p = subprocess.Popen(
+                [sys.executable, '-i'],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                env=env)
+            # non-ascii, surrogate, non-BMP printable, non-BMP unprintable
+            text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
+            p.stdin.write(ascii(text).encode('ascii') + b"\n")
+            p.stdin.write(b'exit()\n')
+            data = kill_python(p)
+            escaped = repr(text).encode(encoding, 'backslashreplace')
+            self.assertIn(escaped, data)
+
 
 def test_main():
     test.support.run_unittest(CmdLineTest)
index 0d1c1d513a97e87d9c2dfc7ac4a4e465c46ab893..124fc1c5413745b38b2591792e864f2ee501bdf3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
+  UnicodeEncodeError.
+
 - Add the "display" and "undisplay" pdb commands.
 
 - Issue #7245: Add a SIGINT handler in pdb that allows to break a program
index 204c8c8cc93d17d166712ab7f2681380cd8ed1c8..0a14f0eb36add9b8188485b251ae3ad26d21cbd4 100644 (file)
@@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v)
         return PyDict_SetItemString(sd, name, v);
 }
 
+/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
+   error handler. If sys.stdout has a buffer attribute, use
+   sys.stdout.buffer.write(encoded), otherwise redecode the string and use
+   sys.stdout.write(redecoded).
+
+   Helper function for sys_displayhook(). */
+static int
+sys_displayhook_unencodable(PyObject *outf, PyObject *o)
+{
+    PyObject *stdout_encoding = NULL;
+    PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
+    char *stdout_encoding_str;
+    int ret;
+
+    stdout_encoding = PyObject_GetAttrString(outf, "encoding");
+    if (stdout_encoding == NULL)
+        goto error;
+    stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
+    if (stdout_encoding_str == NULL)
+        goto error;
+
+    repr_str = PyObject_Repr(o);
+    if (repr_str == NULL)
+        goto error;
+    encoded = PyUnicode_AsEncodedString(repr_str,
+                                        stdout_encoding_str,
+                                        "backslashreplace");
+    Py_DECREF(repr_str);
+    if (encoded == NULL)
+        goto error;
+
+    buffer = PyObject_GetAttrString(outf, "buffer");
+    if (buffer) {
+        result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
+        Py_DECREF(buffer);
+        Py_DECREF(encoded);
+        if (result == NULL)
+            goto error;
+        Py_DECREF(result);
+    }
+    else {
+        PyErr_Clear();
+        escaped_str = PyUnicode_FromEncodedObject(encoded,
+                                                  stdout_encoding_str,
+                                                  "strict");
+        Py_DECREF(encoded);
+        if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
+            Py_DECREF(escaped_str);
+            goto error;
+        }
+        Py_DECREF(escaped_str);
+    }
+    ret = 0;
+    goto finally;
+
+error:
+    ret = -1;
+finally:
+    Py_XDECREF(stdout_encoding);
+    return ret;
+}
+
 static PyObject *
 sys_displayhook(PyObject *self, PyObject *o)
 {
@@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o)
     PyInterpreterState *interp = PyThreadState_GET()->interp;
     PyObject *modules = interp->modules;
     PyObject *builtins = PyDict_GetItemString(modules, "builtins");
+    int err;
 
     if (builtins == NULL) {
         PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o)
         PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
         return NULL;
     }
-    if (PyFile_WriteObject(o, outf, 0) != 0)
-        return NULL;
+    if (PyFile_WriteObject(o, outf, 0) != 0) {
+        if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+            /* repr(o) is not encodable to sys.stdout.encoding with
+             * sys.stdout.errors error handler (which is probably 'strict') */
+            PyErr_Clear();
+            err = sys_displayhook_unencodable(outf, o);
+            if (err)
+                return NULL;
+        }
+        else {
+            return NULL;
+        }
+    }
     if (PyFile_WriteString("\n", outf) != 0)
         return NULL;
     if (PyObject_SetAttrString(builtins, "_", o) != 0)