]> granicus.if.org Git - python/commitdiff
Change filterstring() and filterunicode(): If the
authorWalter Dörwald <walter@livinglogic.de>
Mon, 10 Feb 2003 13:19:13 +0000 (13:19 +0000)
committerWalter Dörwald <walter@livinglogic.de>
Mon, 10 Feb 2003 13:19:13 +0000 (13:19 +0000)
object is not a real str or unicode but an instance
of a subclass, construct the output via looping
over __getitem__. This guarantees that the result
is the same for function==None and function==lambda x:x

This doesn't happen for tuples, because filtertuple()
uses PyTuple_GetItem().

(This was discussed on SF bug #665835).

Lib/test/test_builtin.py
Python/bltinmodule.c

index 047e93eee6c8f15aeef3f0f123602e9b622ab9cc..9af3233331cc00f9286e9f6480736639281dfddc 100644 (file)
@@ -418,26 +418,40 @@ class BuiltinTest(unittest.TestCase):
 
     def test_filter_subclasses(self):
         # test, that filter() never returns tuple, str or unicode subclasses
+        # and that the result always go's through __getitem__
+        # FIXME: For tuple currently it doesn't go through __getitem__
         funcs = (None, lambda x: True)
         class tuple2(tuple):
-            pass
+            def __getitem__(self, index):
+                return 2*tuple.__getitem__(self, index)
         class str2(str):
-            pass
+            def __getitem__(self, index):
+                return 2*str.__getitem__(self, index)
         inputs = {
-            tuple2: [(), (1,2,3)],
-            str2:   ["", "123"]
+            tuple2: {(): (), (1, 2, 3): (1, 2, 3)}, # FIXME
+            str2:   {"": "", "123": "112233"}
         }
         if have_unicode:
             class unicode2(unicode):
-                pass
-            inputs[unicode2] = [unicode(), unicode("123")]
-
-        for func in funcs:
-            for (cls, inps) in inputs.iteritems():
-                for inp in inps:
-                    out = filter(func, cls(inp))
-                    self.assertEqual(inp, out)
-                    self.assert_(not isinstance(out, cls))
+                def __getitem__(self, index):
+                    return 2*unicode.__getitem__(self, index)
+            inputs[unicode2] = {
+                unicode(): unicode(),
+                unicode("123"): unicode("112233")
+            }
+
+        for (cls, inps) in inputs.iteritems():
+            for (inp, exp) in inps.iteritems():
+                 # make sure the output goes through __getitem__
+                 # even if func is None
+                 self.assertEqual(
+                     filter(funcs[0], cls(inp)),
+                     filter(funcs[1], cls(inp))
+                 )
+                 for func in funcs:
+                    outp = filter(func, cls(inp))
+                    self.assertEqual(outp, exp)
+                    self.assert_(not isinstance(outp, cls))
 
     def test_float(self):
         self.assertEqual(float(3.14), 3.14)
index 0b43905999bf97c14278bc2ee93e6dc969e7f3a7..19af6f7873a4cf39f938d81f329b647f401cafc5 100644 (file)
@@ -1934,40 +1934,43 @@ filterstring(PyObject *func, PyObject *strobj)
        int outlen = len;
 
        if (func == Py_None) {
-               /* No character is ever false -- share input string
-                * (if it's not a subclass) */
-               if (PyString_CheckExact(strobj))
+               /* If it's a real string we can return the original,
+                * as no character is ever false and __getitem__
+                * does return this character. If it's a subclass
+                * we must go through the __getitem__ loop */
+               if (PyString_CheckExact(strobj)) {
                        Py_INCREF(strobj);
-               else
-                       strobj = PyString_FromStringAndSize(
-                               PyString_AS_STRING(strobj),
-                               len
-                       );
-               return strobj;
+                       return strobj;
+               }
        }
        if ((result = PyString_FromStringAndSize(NULL, len)) == NULL)
                return NULL;
 
        for (i = j = 0; i < len; ++i) {
-               PyObject *item, *arg, *good;
+               PyObject *item;
                int ok;
 
                item = (*strobj->ob_type->tp_as_sequence->sq_item)(strobj, i);
                if (item == NULL)
                        goto Fail_1;
-               arg = Py_BuildValue("(O)", item);
-               if (arg == NULL) {
-                       Py_DECREF(item);
-                       goto Fail_1;
-               }
-               good = PyEval_CallObject(func, arg);
-               Py_DECREF(arg);
-               if (good == NULL) {
-                       Py_DECREF(item);
-                       goto Fail_1;
+               if (func==Py_None) {
+                       ok = 1;
+               } else {
+                       PyObject *arg, *good;
+                       arg = Py_BuildValue("(O)", item);
+                       if (arg == NULL) {
+                               Py_DECREF(item);
+                               goto Fail_1;
+                       }
+                       good = PyEval_CallObject(func, arg);
+                       Py_DECREF(arg);
+                       if (good == NULL) {
+                               Py_DECREF(item);
+                               goto Fail_1;
+                       }
+                       ok = PyObject_IsTrue(good);
+                       Py_DECREF(good);
                }
-               ok = PyObject_IsTrue(good);
-               Py_DECREF(good);
                if (ok) {
                        int reslen;
                        if (!PyString_Check(item)) {
@@ -2026,16 +2029,14 @@ filterunicode(PyObject *func, PyObject *strobj)
        int outlen = len;
 
        if (func == Py_None) {
-               /* No character is ever false -- share input string
-                * (it if's not a subclass) */
-               if (PyUnicode_CheckExact(strobj))
+               /* If it's a real string we can return the original,
+                * as no character is ever false and __getitem__
+                * does return this character. If it's a subclass
+                * we must go through the __getitem__ loop */
+               if (PyUnicode_CheckExact(strobj)) {
                        Py_INCREF(strobj);
-               else
-                       strobj = PyUnicode_FromUnicode(
-                               PyUnicode_AS_UNICODE(strobj),
-                               len
-                       );
-               return strobj;
+                       return strobj;
+               }
        }
        if ((result = PyUnicode_FromUnicode(NULL, len)) == NULL)
                return NULL;
@@ -2047,19 +2048,23 @@ filterunicode(PyObject *func, PyObject *strobj)
                item = (*strobj->ob_type->tp_as_sequence->sq_item)(strobj, i);
                if (item == NULL)
                        goto Fail_1;
-               arg = Py_BuildValue("(O)", item);
-               if (arg == NULL) {
-                       Py_DECREF(item);
-                       goto Fail_1;
-               }
-               good = PyEval_CallObject(func, arg);
-               Py_DECREF(arg);
-               if (good == NULL) {
-                       Py_DECREF(item);
-                       goto Fail_1;
+               if (func == Py_None) {
+                       ok = 1;
+               } else {
+                       arg = Py_BuildValue("(O)", item);
+                       if (arg == NULL) {
+                               Py_DECREF(item);
+                               goto Fail_1;
+                       }
+                       good = PyEval_CallObject(func, arg);
+                       Py_DECREF(arg);
+                       if (good == NULL) {
+                               Py_DECREF(item);
+                               goto Fail_1;
+                       }
+                       ok = PyObject_IsTrue(good);
+                       Py_DECREF(good);
                }
-               ok = PyObject_IsTrue(good);
-               Py_DECREF(good);
                if (ok) {
                        int reslen;
                        if (!PyUnicode_Check(item)) {