Patch #636005: Filter unicode into unicode.
authorMartin v. Löwis <martin@v.loewis.de>
Sat, 25 Jan 2003 22:46:11 +0000 (22:46 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Sat, 25 Jan 2003 22:46:11 +0000 (22:46 +0000)
Lib/test/test_builtin.py
Misc/NEWS
Python/bltinmodule.c

index 60358a45f19d93101c542d239c38a0fe72a4c179..eb4b3d2a820dbaa9d0bdfb505835e29f59752b00 100644 (file)
@@ -365,6 +365,12 @@ class BuiltinTest(unittest.TestCase):
             def __getitem__(self, index):
                 raise ValueError
         self.assertRaises(ValueError, filter, lambda x: x >="3", badstr("1234"))
+        if have_unicode:
+            # test biltinmodule.c::filterstring()
+            self.assertEqual(filter(None, unicode("12")), unicode("12"))
+            self.assertEqual(filter(lambda x: x>="3", unicode("1234")), unicode("34"))
+            self.assertRaises(TypeError, filter, 42, unicode("12"))
+            self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(unicode("1234")))
 
     def test_float(self):
         self.assertEqual(float(3.14), 3.14)
index d63d5227c2b4c145bd52fa7125e1ab22c119cc5c..4223ce11cf10dde957d2fdb95b3b97947b140a50 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.3 alpha 2?
 Core and builtins
 -----------------
 
+- filter returns now Unicode results for Unicode arguments.
+
 - raw_input can now return Unicode objects.
 
 - List objects' sort() method now accepts None as the comparison function.
index 45ea4dcf8787e56ba2ab89d90ca7d1830f493200..54a9afd2e8b7c96f140f786ec2f984919a6feaca 100644 (file)
@@ -24,6 +24,9 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* use default */
 
 /* Forward */
 static PyObject *filterstring(PyObject *, PyObject *);
+#ifdef Py_USING_UNICODE
+static PyObject *filterunicode(PyObject *, PyObject *);
+#endif
 static PyObject *filtertuple (PyObject *, PyObject *);
 
 static PyObject *
@@ -132,6 +135,10 @@ builtin_filter(PyObject *self, PyObject *args)
        /* Strings and tuples return a result of the same type. */
        if (PyString_Check(seq))
                return filterstring(func, seq);
+#ifdef Py_USING_UNICODE
+       if (PyUnicode_Check(seq))
+               return filterunicode(func, seq);
+#endif
        if (PyTuple_Check(seq))
                return filtertuple(func, seq);
 
@@ -1926,3 +1933,58 @@ Fail_1:
        Py_DECREF(result);
        return NULL;
 }
+
+#ifdef Py_USING_UNICODE
+/* Helper for filter(): filter a Unicode object through a function */
+
+static PyObject *
+filterunicode(PyObject *func, PyObject *strobj)
+{
+       PyObject *result;
+       register int i, j;
+       int len = PyUnicode_GetSize(strobj);
+
+       if (func == Py_None) {
+               /* No character is ever false -- share input string */
+               Py_INCREF(strobj);
+               return strobj;
+       }
+       if ((result = PyUnicode_FromUnicode(NULL, len)) == NULL)
+               return NULL;
+
+       for (i = j = 0; i < len; ++i) {
+               PyObject *item, *arg, *good;
+               int ok;
+
+               item = (*strobj->ob_type->tp_as_sequence->sq_item)(strobj, i);
+               if (item == NULL)
+                       goto Fail_1;
+               arg = Py_BuildValue("(O)", item);
+               if (arg == NULL) {
+                       Py_DECREF(item);
+                       goto Fail_1;
+               }
+               good = PyEval_CallObject(func, arg);
+               Py_DECREF(arg);
+               if (good == NULL) {
+                       Py_DECREF(item);
+                       goto Fail_1;
+               }
+               ok = PyObject_IsTrue(good);
+               Py_DECREF(good);
+               if (ok)
+                       PyUnicode_AS_UNICODE((PyStringObject *)result)[j++] =
+                               PyUnicode_AS_UNICODE((PyStringObject *)item)[0];
+               Py_DECREF(item);
+       }
+
+       if (j < len)
+               PyUnicode_Resize(&result, j);
+
+       return result;
+
+Fail_1:
+       Py_DECREF(result);
+       return NULL;
+}
+#endif