Issue #28822: Adjust indices handling of PyUnicode_FindChar().

author Xiang Zhang <angwerzx@126.com>

Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)

committer Xiang Zhang <angwerzx@126.com>

Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)
author Xiang Zhang <angwerzx@126.com>
Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)
committer Xiang Zhang <angwerzx@126.com>
Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst

index b31d6895b6b1f42f99fa96b02b572b58018d1363..b57d70a154b10bf5ab57016a8a58e8bad54c993b 100644 (file)
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1625,6 +1625,9 @@ They all return *NULL* or ``-1`` if an exception occurs.
  
     .. versionadded:: 3.3
  
+   .. versionchanged:: 3.7
+      *start* and *end* are now adjusted to behave like ``str[start:end]``.
+
  
  .. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \
                                 Py_ssize_t start, Py_ssize_t end)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 883c362a03545f6b4b10d9b628811b6002a02f5e..fb77ffb47067f4337ecbcf1de72e1100f8f4b9e4 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2728,6 +2728,29 @@ class CAPITest(unittest.TestCase):
              self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
              self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
  
+    # Test PyUnicode_FindChar()
+    @support.cpython_only
+    def test_findchar(self):
+        from _testcapi import unicode_findchar
+
+        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            for i, ch in enumerate(str):
+                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
+                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
+
+        str = "!>_<!"
+        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
+        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
+        # start < end
+        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
+        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
+        # start >= end
+        self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
+        self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
+        # negative
+        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
+        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
+
      # Test PyUnicode_CopyCharacters()
      @support.cpython_only
      def test_copycharacters(self):
diff --git a/Misc/NEWS b/Misc/NEWS

index 24285d56a9e427f5ecd43b93c096f7141d86e6ec..cfc9bd5bb39551e8dae5c82477a2759cb9b4fb22 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -545,6 +545,9 @@ Windows
  C API
  -----
  
+- Issue #28822: The indices parameters *start* and *end* of PyUnicode_FindChar()
+  are now adjusted to behave like ``str[start:end]``.
+
  - Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
  
  - Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef,
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c

index 8d4346cff3f7a7061c47a9897360ae4fbd96a088..ef5f9d4d13cedf5d7b4e08080d9f648a1e8f21ff 100644 (file)
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1887,6 +1887,27 @@ unicode_asucs4(PyObject *self, PyObject *args)
      return result;
  }
  
+static PyObject *
+unicode_findchar(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    int direction;
+    unsigned int ch;
+    Py_ssize_t result;
+    Py_ssize_t start, end;
+
+    if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
+                          &start, &end, &direction)) {
+        return NULL;
+    }
+
+    result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
+    if (result == -2)
+        return NULL;
+    else
+        return PyLong_FromSsize_t(result);
+}
+
  static PyObject *
  unicode_copycharacters(PyObject *self, PyObject *args)
  {
@@ -4121,6 +4142,7 @@ static PyMethodDef TestMethods[] = {
      {"unicode_aswidechar",      unicode_aswidechar,              METH_VARARGS},
      {"unicode_aswidecharstring",unicode_aswidecharstring,        METH_VARARGS},
      {"unicode_asucs4",          unicode_asucs4,                  METH_VARARGS},
+    {"unicode_findchar",        unicode_findchar,                METH_VARARGS},
      {"unicode_copycharacters",  unicode_copycharacters,          METH_VARARGS},
      {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},
      {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 3fdce82ae6bbe4293ea9b1a9da338cd6a52b4d79..bbda4d884ca9b12ee6f821c5cff209660109dddf 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
                     int direction)
  {
      int kind;
-    Py_ssize_t result;
+    Py_ssize_t len, result;
      if (PyUnicode_READY(str) == -1)
          return -2;
-    if (start < 0 || end < 0) {
-        PyErr_SetString(PyExc_IndexError, "string index out of range");
-        return -2;
-    }
-    if (end > PyUnicode_GET_LENGTH(str))
-        end = PyUnicode_GET_LENGTH(str);
-    if (start >= end)
+    len = PyUnicode_GET_LENGTH(str);
+    ADJUST_INDICES(start, end, len);
+    if (end - start < 1)
          return -1;
      kind = PyUnicode_KIND(str);
      result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
author	Xiang Zhang <angwerzx@126.com>
	Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)
committer	Xiang Zhang <angwerzx@126.com>
	Tue, 20 Dec 2016 14:52:33 +0000 (22:52 +0800)
Doc/c-api/unicode.rst		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/_testcapimodule.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history