Implement the changes proposed in patch #413333. unicode(obj) now

author Marc-André Lemburg <mal@egenix.com>

Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)

committer Marc-André Lemburg <mal@egenix.com>

Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)
author Marc-André Lemburg <mal@egenix.com>
Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)
committer Marc-André Lemburg <mal@egenix.com>
Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index d57328d52c9f284dc322df4fb764011b8ca214a6..d508bef7574dcdb6c164b6b14358291434a347c3 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -429,6 +429,7 @@ verify(unicode('hello','utf-8') == u'hello')
  verify(unicode('hello','utf8') == u'hello')
  verify(unicode('hello','latin-1') == u'hello')
  
+# Compatibility to str():
  class String:
      x = ''
      def __str__(self):
@@ -444,6 +445,10 @@ o.x = u'abc'
  verify(unicode(o) == u'abc')
  verify(str(o) == 'abc')
  
+for obj in (123, 123.45, 123L):
+    verify(unicode(obj) == unicode(str(obj)))
+
+# Error handling
  try:
      u'Andr\202 x'.encode('ascii')
      u'Andr\202 x'.encode('ascii','strict')
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 50f2f5c627349cd521cab1716686f24d64a4cd03..896e80f794993621c085dee594548ab7b653907d 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -398,10 +398,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
                                       const char *encoding,
                                       const char *errors)
  {
-    const char *s;
+    const char *s = NULL;
      int len;
      int owned = 0;
      PyObject *v;
+    int reclevel;
      
      if (obj == NULL) {
         PyErr_BadInternalCall();
@@ -409,53 +410,65 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
      }
  
      /* Coerce object */
-    if (PyInstance_Check(obj)) {
-       PyObject *func;
-       func = PyObject_GetAttrString(obj, "__str__");
-       if (func == NULL) {
-           PyErr_SetString(PyExc_TypeError,
-                 "coercing to Unicode: instance doesn't define __str__");
-           return NULL;
+    for (reclevel = 0; reclevel < 2; reclevel++) {
+
+       if (PyUnicode_Check(obj)) {
+           if (encoding) {
+               PyErr_SetString(PyExc_TypeError,
+                               "decoding Unicode is not supported");
+               goto onError;
+           }
+           if (PyUnicode_CheckExact(obj)) {
+               Py_INCREF(obj);
+               v = obj;
+           }
+           else {
+               /* For a subclass of unicode, return a true unicode object
+                  with the same string value. */
+               v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+                                         PyUnicode_GET_SIZE(obj));
+           }
+           goto done;
         }
-       obj = PyEval_CallObject(func, NULL);
-       Py_DECREF(func);
-       if (obj == NULL)
-           return NULL;
-       owned = 1;
-    }
-    if (PyUnicode_Check(obj)) {
-       if (encoding) {
-            PyErr_SetString(PyExc_TypeError,
-                           "decoding Unicode is not supported");
-            return NULL;
+       else if (PyString_Check(obj)) {
+           s = PyString_AS_STRING(obj);
+           len = PyString_GET_SIZE(obj);
+           break;
         }
-        if (PyUnicode_CheckExact(obj)) {
-           Py_INCREF(obj);
-            v = obj;
+       else {
+           PyObject *w;
+
+           /* Try char buffer interface */
+            if (PyObject_AsCharBuffer(obj, &s, &len))
+               PyErr_Clear();
+           else
+               break;
+    
+           /* Mimic the behaviour of str(object) if everything else
+              fails (see PyObject_Str()); this also covers instances
+              which implement __str__. */
+           if (obj->ob_type->tp_str == NULL)
+               w = PyObject_Repr(obj);
+           else
+               w = (*obj->ob_type->tp_str)(obj);
+           if (w == NULL)
+               goto onError;
+           if (owned) {
+               Py_DECREF(obj);
+           }
+           obj = w;
+           owned = 1;
         }
-        else {
-            /* For a subclass of unicode, return a true unicode object
-               with the same string value. */
-            v = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
-                                      PyUnicode_GET_SIZE(obj));
-        }
-       goto done;
      }
-    else if (PyString_Check(obj)) {
-       s = PyString_AS_STRING(obj);
-       len = PyString_GET_SIZE(obj);
-    }
-    else if (PyObject_AsCharBuffer(obj, &s, &len)) {
-       /* Overwrite the error message with something more useful in
-          case of a TypeError. */
-       if (PyErr_ExceptionMatches(PyExc_TypeError))
-           PyErr_Format(PyExc_TypeError,
-                        "coercing to Unicode: need string or buffer, "
-                        "%.80s found",
-                        obj->ob_type->tp_name);
+
+    if (s == NULL) {
+       PyErr_Format(PyExc_TypeError,
+                    "coercing to Unicode: __str__ recursion limit exceeded "
+                    "(last type: %.80s)",
+                    obj->ob_type->tp_name);
         goto onError;
      }
-
+    
      /* Convert to Unicode */
      if (len == 0) {
         Py_INCREF(unicode_empty);
author	Marc-André Lemburg <mal@egenix.com>
	Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)
committer	Marc-André Lemburg <mal@egenix.com>
	Thu, 20 Sep 2001 12:53:16 +0000 (12:53 +0000)
Lib/test/test_unicode.py		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history