bpo-32677: Add .isascii() to str, bytes and bytearray (GH-5342)

author INADA Naoki <methane@users.noreply.github.com>

Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)

committer GitHub <noreply@github.com>

Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)
author INADA Naoki <methane@users.noreply.github.com>
Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)
committer GitHub <noreply@github.com>
Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst

index 120b0d3399c8b5e58eb2068f0ea07f211b2f2ad1..ad7f578e08606940033a4351e5ad7a1eee61ac02 100644 (file)
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -1653,6 +1653,15 @@ expression support in the :mod:`re` module).
     from the "Alphabetic" property defined in the Unicode Standard.
  
  
+.. method:: str.isascii()
+
+   Return true if the string is empty or all characters in the string are ASCII,
+   false otherwise.
+   ASCII characters have code points in the range U+0000-U+007F.
+
+   .. versionadded:: 3.7
+
+
  .. method:: str.isdecimal()
  
     Return true if all characters in the string are decimal
@@ -2941,6 +2950,16 @@ place, and instead produce new objects.
        False
  
  
+.. method:: bytes.isascii()
+            bytearray.isascii()
+
+   Return true if the sequence is empty or all bytes in the sequence are ASCII,
+   false otherwise.
+   ASCII bytes are in the range 0-0x7F.
+
+   .. versionadded:: 3.7
+
+
  .. method:: bytes.isdigit()
              bytearray.isdigit()
  
diff --git a/Include/bytes_methods.h b/Include/bytes_methods.h

index 7fa7540c38b7313f26a601a5f8d1e66b05738ca8..8434a50a4bba717ee6c3a4428de3be3677e5c928 100644 (file)
--- a/Include/bytes_methods.h
+++ b/Include/bytes_methods.h
@@ -9,6 +9,7 @@
  extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len);
  extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len);
  extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len);
+extern PyObject* _Py_bytes_isascii(const char *cptr, Py_ssize_t len);
  extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len);
  extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len);
  extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len);
@@ -37,6 +38,7 @@ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to);
  extern const char _Py_isspace__doc__[];
  extern const char _Py_isalpha__doc__[];
  extern const char _Py_isalnum__doc__[];
+extern const char _Py_isascii__doc__[];
  extern const char _Py_isdigit__doc__[];
  extern const char _Py_islower__doc__[];
  extern const char _Py_isupper__doc__[];
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py

index 7088b88e04a7886b42c710a9aaee0dde08c8f01d..21d91fd61d607c6d8606a8729dd936d046a5e9fb 100644 (file)
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -1214,6 +1214,7 @@ class UserString(Sequence):
          return self.data.index(sub, start, end)
      def isalpha(self): return self.data.isalpha()
      def isalnum(self): return self.data.isalnum()
+    def isascii(self): return self.data.isascii()
      def isdecimal(self): return self.data.isdecimal()
      def isdigit(self): return self.data.isdigit()
      def isidentifier(self): return self.data.isidentifier()
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py

index cd3ee48a92bb7d3b6dc7516cee8c0369f121884a..4be1d2118978a98f05b3a82a77c8cfa3e36407f0 100644 (file)
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -909,6 +909,14 @@ class BaseTest:
          self.checkequal(False, 'abc\n', 'isalnum')
          self.checkraises(TypeError, 'abc', 'isalnum', 42)
  
+    def test_isascii(self):
+        self.checkequal(True, '', 'isascii')
+        self.checkequal(True, '\x00', 'isascii')
+        self.checkequal(True, '\x7f', 'isascii')
+        self.checkequal(True, '\x00\x7f', 'isascii')
+        self.checkequal(False, '\x80', 'isascii')
+        self.checkequal(False, '\xe9', 'isascii')
+
      def test_isdigit(self):
          self.checkequal(False, '', 'isdigit')
          self.checkequal(False, 'a', 'isdigit')
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py

index 2258c6b1baed26bd930bc0bfd202838001a7d1d3..5ad94aba6492ab0ee360c541fe79df5cfbdcd838 100644 (file)
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -659,7 +659,7 @@ plain ol' Python and is guaranteed to be available.
  
      >>> import builtins
      >>> tests = doctest.DocTestFinder().find(builtins)
-    >>> 790 < len(tests) < 810 # approximate number of objects with docstrings
+    >>> 800 < len(tests) < 820 # approximate number of objects with docstrings
      True
      >>> real_tests = [t for t in tests if len(t.examples) > 0]
      >>> len(real_tests) # objects that actually have doctests
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 2b77863e52e4902b0a652bf34b955028fc5d979c..3cc018c0cc2caa8c3125233fde2a4d314dc4e0d9 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -638,6 +638,11 @@ class UnicodeTest(string_tests.CommonTest,
          self.assertFalse('\U0001F40D'.isalpha())
          self.assertFalse('\U0001F46F'.isalpha())
  
+    def test_isascii(self):
+        super().test_isascii()
+        self.assertFalse("\u20ac".isascii())
+        self.assertFalse("\U0010ffff".isascii())
+
      def test_isdecimal(self):
          self.checkequalnofix(False, '', 'isdecimal')
          self.checkequalnofix(False, 'a', 'isdecimal')
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst b/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst

new file mode 100644 (file)

index 0000000..947c74f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst
@@ -0,0 +1,2 @@
+Add ``.isascii()`` method to ``str``, ``bytes`` and ``bytearray``.
+It can be used to test that string contains only ASCII characters.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c

index dc1515a059cb940f8d0117ad7f7265214b5456c0..692b7be739245579607c6c7b05cac88ed288355f 100644 (file)
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2159,6 +2159,8 @@ bytearray_methods[] = {
       _Py_isalnum__doc__},
      {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
       _Py_isalpha__doc__},
+    {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
+     _Py_isascii__doc__},
      {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
       _Py_isdigit__doc__},
      {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c

index bd79773a54eabff285796caedf79954d400c6239..149650f018fcb691eeb851b0945f2007bf798ddf 100644 (file)
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -92,6 +92,26 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
  }
  
  
+PyDoc_STRVAR_shared(_Py_isascii__doc__,
+"B.isascii() -> bool\n\
+\n\
+Return True if B is empty or all characters in B are ASCII,\n\
+False otherwise.");
+
+PyObject*
+_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
+{
+    const unsigned char *p = (unsigned char *) cptr;
+    const unsigned char *e = p + len;
+    for (; p < e; p++) {
+        if (*p >= 128) {
+            Py_RETURN_FALSE;
+        }
+    }
+    Py_RETURN_TRUE;
+}
+
+
  PyDoc_STRVAR_shared(_Py_isdigit__doc__,
  "B.isdigit() -> bool\n\
  \n\
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c

index a921d9cb8c5e98117b79401ba816aa83d42446d5..c358756bfea8e65de342b1c148fe76eb76671db6 100644 (file)
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -2459,6 +2459,8 @@ bytes_methods[] = {
       _Py_isalnum__doc__},
      {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
       _Py_isalpha__doc__},
+    {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS,
+     _Py_isascii__doc__},
      {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
       _Py_isdigit__doc__},
      {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h

index 643ef0491b349d9f37a844fd3cc070f6e860f9d4..8072516a8a36d142f1408e065cc626006460e4e2 100644 (file)
--- a/Objects/clinic/unicodeobject.c.h
+++ b/Objects/clinic/unicodeobject.c.h
@@ -165,6 +165,27 @@ exit:
      return return_value;
  }
  
+PyDoc_STRVAR(unicode_isascii__doc__,
+"isascii($self, /)\n"
+"--\n"
+"\n"
+"Return True if all characters in the string are ASCII, False otherwise.\n"
+"\n"
+"ASCII characters have code points in the range U+0000-U+007F.\n"
+"Empty string is ASCII too.");
+
+#define UNICODE_ISASCII_METHODDEF    \
+    {"isascii", (PyCFunction)unicode_isascii, METH_NOARGS, unicode_isascii__doc__},
+
+static PyObject *
+unicode_isascii_impl(PyObject *self);
+
+static PyObject *
+unicode_isascii(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return unicode_isascii_impl(self);
+}
+
  PyDoc_STRVAR(unicode_islower__doc__,
  "islower($self, /)\n"
  "--\n"
@@ -930,4 +951,4 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
  {
      return unicode_sizeof_impl(self);
  }
-/*[clinic end generated code: output=1ad4e81b68194264 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=561c88c912b8fe3b input=a9049054013a1b77]*/
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h

index f0546256ed911cdaecbb40568ba18f5d6ee1b25f..fd7b1bd49e54f73b40b352351d301e7b62803df4 100644 (file)
--- a/Objects/stringlib/ctype.h
+++ b/Objects/stringlib/ctype.h
@@ -22,6 +22,12 @@ stringlib_isalnum(PyObject *self)
      return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
  }
  
+static PyObject*
+stringlib_isascii(PyObject *self)
+{
+    return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
  static PyObject*
  stringlib_isdigit(PyObject *self)
  {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 07330119dc30380950c3b8896bfff4c474d5d74d..4b90cc364b8c02af4e39bcef3b02ee9eaccf85e4 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11611,6 +11611,25 @@ unicode_index(PyObject *self, PyObject *args)
      return PyLong_FromSsize_t(result);
  }
  
+/*[clinic input]
+str.isascii as unicode_isascii
+
+Return True if all characters in the string are ASCII, False otherwise.
+
+ASCII characters have code points in the range U+0000-U+007F.
+Empty string is ASCII too.
+[clinic start generated code]*/
+
+static PyObject *
+unicode_isascii_impl(PyObject *self)
+/*[clinic end generated code: output=c5910d64b5a8003f input=5a43cbc6399621d5]*/
+{
+    if (PyUnicode_READY(self) == -1) {
+        return NULL;
+    }
+    return PyBool_FromLong(PyUnicode_IS_ASCII(self));
+}
+
  /*[clinic input]
  str.islower as unicode_islower
  
@@ -13801,6 +13820,7 @@ static PyMethodDef unicode_methods[] = {
      UNICODE_UPPER_METHODDEF
      {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
      {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
+    UNICODE_ISASCII_METHODDEF
      UNICODE_ISLOWER_METHODDEF
      UNICODE_ISUPPER_METHODDEF
      UNICODE_ISTITLE_METHODDEF
author	INADA Naoki <methane@users.noreply.github.com>
	Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)
committer	GitHub <noreply@github.com>
	Sat, 27 Jan 2018 05:06:21 +0000 (14:06 +0900)
Doc/library/stdtypes.rst		patch \| blob \| history
Include/bytes_methods.h		patch \| blob \| history
Lib/collections/__init__.py		patch \| blob \| history
Lib/test/string_tests.py		patch \| blob \| history
Lib/test/test_doctest.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst	[new file with mode: 0644]	patch \| blob
Objects/bytearrayobject.c		patch \| blob \| history
Objects/bytes_methods.c		patch \| blob \| history
Objects/bytesobject.c		patch \| blob \| history
Objects/clinic/unicodeobject.c.h		patch \| blob \| history
Objects/stringlib/ctype.h		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history