Issue 27080: PEP 515: add '_' formatting option.

author Eric V. Smith <eric@trueblade.com>

Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)

committer Eric V. Smith <eric@trueblade.com>

Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)
author Eric V. Smith <eric@trueblade.com>
Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)
committer Eric V. Smith <eric@trueblade.com>
Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)
diff --git a/Doc/library/string.rst b/Doc/library/string.rst

index c421c72d758f0cae47048d4e51e115c7257d0de2..b5d5ed1901910713459bef2ad813258d7a5a8174 100644 (file)
--- a/Doc/library/string.rst
+++ b/Doc/library/string.rst
@@ -300,7 +300,7 @@ non-empty format string typically modifies the result.
  The general form of a *standard format specifier* is:
  
  .. productionlist:: sf
-   format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][.`precision`][`type`]
+   format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][_][.`precision`][`type`]
     fill: <any character>
     align: "<" | ">" | "=" | "^"
     sign: "+" | "-" | " "
@@ -378,6 +378,16 @@ instead.
  .. versionchanged:: 3.1
     Added the ``','`` option (see also :pep:`378`).
  
+The ``'_'`` option signals the use of an underscore for a thousands
+separator for floating point presentation types and for integer
+presentation type ``'d'``.  For integer presentation types ``'b'``,
+``'o'``, ``'x'``, and ``'X'``, underscores will be inserted every 4
+digits.  For other presentation types, specifying this option is an
+error.
+
+.. versionchanged:: 3.6
+   Added the ``'_'`` option (see also :pep:`515`).
+
  *width* is a decimal integer defining the minimum field width.  If not
  specified, then the field width will be determined by the content.
  
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py

index 4cc4b05c7f3c199db7a3c4c5e23ef4ab1c38f3a4..fd15f04aceca8fc7f15298a8fe0e40d9add08a06 100644 (file)
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -621,6 +621,8 @@ class LongTest(unittest.TestCase):
      def test__format__(self):
          self.assertEqual(format(123456789, 'd'), '123456789')
          self.assertEqual(format(123456789, 'd'), '123456789')
+        self.assertEqual(format(123456789, ','), '123,456,789')
+        self.assertEqual(format(123456789, '_'), '123_456_789')
  
          # sign and aligning are interdependent
          self.assertEqual(format(1, "-"), '1')
@@ -649,8 +651,25 @@ class LongTest(unittest.TestCase):
          self.assertEqual(format(int('be', 16), "X"), "BE")
          self.assertEqual(format(-int('be', 16), "x"), "-be")
          self.assertEqual(format(-int('be', 16), "X"), "-BE")
+        self.assertRaises(ValueError, format, 1234567890, ',x')
+        self.assertEqual(format(1234567890, '_x'), '4996_02d2')
+        self.assertEqual(format(1234567890, '_X'), '4996_02D2')
  
          # octal
+        self.assertEqual(format(3, "o"), "3")
+        self.assertEqual(format(-3, "o"), "-3")
+        self.assertEqual(format(1234, "o"), "2322")
+        self.assertEqual(format(-1234, "o"), "-2322")
+        self.assertEqual(format(1234, "-o"), "2322")
+        self.assertEqual(format(-1234, "-o"), "-2322")
+        self.assertEqual(format(1234, " o"), " 2322")
+        self.assertEqual(format(-1234, " o"), "-2322")
+        self.assertEqual(format(1234, "+o"), "+2322")
+        self.assertEqual(format(-1234, "+o"), "-2322")
+        self.assertRaises(ValueError, format, 1234567890, ',o')
+        self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
+
+        # binary
          self.assertEqual(format(3, "b"), "11")
          self.assertEqual(format(-3, "b"), "-11")
          self.assertEqual(format(1234, "b"), "10011010010")
@@ -661,12 +680,21 @@ class LongTest(unittest.TestCase):
          self.assertEqual(format(-1234, " b"), "-10011010010")
          self.assertEqual(format(1234, "+b"), "+10011010010")
          self.assertEqual(format(-1234, "+b"), "-10011010010")
+        self.assertRaises(ValueError, format, 1234567890, ',b')
+        self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
  
          # make sure these are errors
          self.assertRaises(ValueError, format, 3, "1.3")  # precision disallowed
+        self.assertRaises(ValueError, format, 3, "_c")   # underscore,
+        self.assertRaises(ValueError, format, 3, ",c")   # comma, and
          self.assertRaises(ValueError, format, 3, "+c")   # sign not allowed
                                                           # with 'c'
  
+        self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,')
+        self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_')
+        self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,d')
+        self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_d')
+
          # ensure that only int and float type specifiers work
          for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
                              [chr(x) for x in range(ord('A'), ord('Z')+1)]):
diff --git a/Misc/NEWS b/Misc/NEWS

index b8cc778de117d60db3b7b6c58b730cc54132a6c2..5628dc9cd2ad58bd6e9a4ec81aa6b9c22e825d2c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
  Core and Builtins
  -----------------
  
+- Issue #27080: Implement formatting support for PEP 515.  Initial patch
+  by Chris Angelico.
+
  - Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
    Patch by Jason Fried.
  
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c

index db9f5b831631819f97f226277c7a50cd2baa32cf..95c507ec6ea8f26b91f9d6bf74f94fedcdeef3e6 100644 (file)
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type)
  {
      if (presentation_type > 32 && presentation_type < 128)
          PyErr_Format(PyExc_ValueError,
-                     "Cannot specify ',' with '%c'.",
+                     "Cannot specify ',' or '_' with '%c'.",
                       (char)presentation_type);
      else
          PyErr_Format(PyExc_ValueError,
-                     "Cannot specify ',' with '\\x%x'.",
+                     "Cannot specify ',' or '_' with '\\x%x'.",
                       (unsigned int)presentation_type);
  }
  
+static void
+invalid_comma_and_underscore()
+{
+    PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
+}
+
  /*
      get_integer consumes 0 or more decimal digit characters from an
      input string, updates *result with the corresponding positive
@@ -108,6 +114,12 @@ is_sign_element(Py_UCS4 c)
      }
  }
  
+/* Locale type codes. LT_NO_LOCALE must be zero. */
+#define LT_NO_LOCALE 0
+#define LT_DEFAULT_LOCALE 1
+#define LT_UNDERSCORE_LOCALE 2
+#define LT_UNDER_FOUR_LOCALE 3
+#define LT_CURRENT_LOCALE 4
  
  typedef struct {
      Py_UCS4 fill_char;
@@ -223,9 +235,22 @@ parse_internal_render_format_spec(PyObject *format_spec,
  
      /* Comma signifies add thousands separators */
      if (end-pos && READ_spec(pos) == ',') {
-        format->thousands_separators = 1;
+        format->thousands_separators = LT_DEFAULT_LOCALE;
          ++pos;
      }
+    /* Underscore signifies add thousands separators */
+    if (end-pos && READ_spec(pos) == '_') {
+        if (format->thousands_separators != 0) {
+            invalid_comma_and_underscore();
+            return 0;
+        }
+        format->thousands_separators = LT_UNDERSCORE_LOCALE;
+        ++pos;
+    }
+    if (end-pos && READ_spec(pos) == ',') {
+        invalid_comma_and_underscore();
+        return 0;
+    }
  
      /* Parse field precision */
      if (end-pos && READ_spec(pos) == '.') {
@@ -275,6 +300,16 @@ parse_internal_render_format_spec(PyObject *format_spec,
          case '\0':
              /* These are allowed. See PEP 378.*/
              break;
+        case 'b':
+        case 'o':
+        case 'x':
+        case 'X':
+            /* Underscores are allowed in bin/oct/hex. See PEP 515. */
+            if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
+                /* Every four digits, not every three, in bin/oct/hex. */
+                format->thousands_separators = LT_UNDER_FOUR_LOCALE;
+                break;
+            }
          default:
              invalid_comma_type(format->type);
              return 0;
@@ -351,11 +386,6 @@ fill_padding(_PyUnicodeWriter *writer,
  /*********** common routines for numeric formatting *********************/
  /************************************************************************/
  
-/* Locale type codes. */
-#define LT_CURRENT_LOCALE 0
-#define LT_DEFAULT_LOCALE 1
-#define LT_NO_LOCALE 2
-
  /* Locale info needed for formatting integers and the part of floats
     before and including the decimal. Note that locales only support
     8-bit chars, not unicode. */
@@ -667,8 +697,8 @@ static const char no_grouping[1] = {CHAR_MAX};
  
  /* Find the decimal point character(s?), thousands_separator(s?), and
     grouping description, either for the current locale if type is
-   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
-   none if LT_NO_LOCALE. */
+   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
+   LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
  static int
  get_locale_info(int type, LocaleInfo *locale_info)
  {
@@ -691,16 +721,22 @@ get_locale_info(int type, LocaleInfo *locale_info)
          break;
      }
      case LT_DEFAULT_LOCALE:
+    case LT_UNDERSCORE_LOCALE:
+    case LT_UNDER_FOUR_LOCALE:
          locale_info->decimal_point = PyUnicode_FromOrdinal('.');
-        locale_info->thousands_sep = PyUnicode_FromOrdinal(',');
+        locale_info->thousands_sep = PyUnicode_FromOrdinal(
+            type == LT_DEFAULT_LOCALE ? ',' : '_');
          if (!locale_info->decimal_point || !locale_info->thousands_sep) {
              Py_XDECREF(locale_info->decimal_point);
              Py_XDECREF(locale_info->thousands_sep);
              return -1;
          }
-        locale_info->grouping = "\3"; /* Group every 3 characters.  The
+        if (type != LT_UNDER_FOUR_LOCALE)
+            locale_info->grouping = "\3"; /* Group every 3 characters.  The
                                           (implicit) trailing 0 means repeat
                                           infinitely. */
+        else
+            locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
          break;
      case LT_NO_LOCALE:
          locale_info->decimal_point = PyUnicode_FromOrdinal('.');
@@ -952,9 +988,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
  
      /* Determine the grouping, separator, and decimal point, if any. */
      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
-                        (format->thousands_separators ?
-                         LT_DEFAULT_LOCALE :
-                         LT_NO_LOCALE),
+                        format->thousands_separators,
                          &locale) == -1)
          goto done;
  
@@ -1099,9 +1133,7 @@ format_float_internal(PyObject *value,
  
      /* Determine the grouping, separator, and decimal point, if any. */
      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
-                        (format->thousands_separators ?
-                         LT_DEFAULT_LOCALE :
-                         LT_NO_LOCALE),
+                        format->thousands_separators,
                          &locale) == -1)
          goto done;
  
@@ -1277,9 +1309,7 @@ format_complex_internal(PyObject *value,
  
      /* Determine the grouping, separator, and decimal point, if any. */
      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
-                        (format->thousands_separators ?
-                         LT_DEFAULT_LOCALE :
-                         LT_NO_LOCALE),
+                        format->thousands_separators,
                          &locale) == -1)
          goto done;
author	Eric V. Smith <eric@trueblade.com>
	Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)
committer	Eric V. Smith <eric@trueblade.com>
	Sat, 10 Sep 2016 03:06:47 +0000 (23:06 -0400)
Doc/library/string.rst		patch \| blob \| history
Lib/test/test_long.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Python/formatter_unicode.c		patch \| blob \| history