Addresses issue 2802: 'n' formatting for integers.

author Eric Smith <eric@trueblade.com>

Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)

committer Eric Smith <eric@trueblade.com>

Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)
author Eric Smith <eric@trueblade.com>
Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)
committer Eric Smith <eric@trueblade.com>
Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)
diff --git a/Include/stringobject.h b/Include/stringobject.h

index e3f880a066d78c2d94a897c2d2370f0e2a96ecc5..405736285c4f3c870c7d5b5a1615bc62d39f13f6 100644 (file)
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -176,7 +176,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
                                    (only possible for 0-terminated
                                    strings) */
      );
-    
+
+/* Using the current locale, insert the thousands grouping
+   into the string pointed to by buffer.  For the argument descriptions,
+   see Objects/stringlib/localeutil.h */
+
+PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
+                                                 Py_ssize_t len,
+                                                 char *plast,
+                                                 Py_ssize_t buf_size,
+                                                 Py_ssize_t *count,
+                                                 int append_zero_char);
  
  #ifdef __cplusplus
  }
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py

index 4b620c509f42c5f269abed8373990db57e1a7d90..aca5ff28c86c8838fe7548431117f5b6b51a0a46 100644 (file)
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -377,7 +377,7 @@ class TypesTests(unittest.TestCase):
  
          # ensure that float type specifiers work; format converts
          #  the int to a float
-        for format_spec in 'eEfFgGn%':
+        for format_spec in 'eEfFgG%':
              for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
                  self.assertEqual(value.__format__(format_spec),
                                   float(value).__format__(format_spec))
@@ -472,7 +472,7 @@ class TypesTests(unittest.TestCase):
  
          # ensure that float type specifiers work; format converts
          #  the long to a float
-        for format_spec in 'eEfFgGn%':
+        for format_spec in 'eEfFgG%':
              for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
                  self.assertEqual(value.__format__(format_spec),
                                   float(value).__format__(format_spec))
@@ -486,6 +486,17 @@ class TypesTests(unittest.TestCase):
              self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
              self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
  
+    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
+    def test_int__format__locale(self):
+        # test locale support for __format__ code 'n' for integers
+
+        x = 123456789012345678901234567890
+        for i in range(0, 30):
+            self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
+
+            # move to the next integer to test
+            x = x // 10
+
      def test_float__format__(self):
          # these should be rewritten to use both format(x, spec) and
          # x.__format__(spec)
diff --git a/Makefile.pre.in b/Makefile.pre.in

index b3f1cf06c2259449c29a0b07108896e9d60e50fc..daae2baf683cc5d50f84cb0dae7afa34d1ff6d4b 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \
                 $(srcdir)/Objects/stringlib/stringdefs.h \
                 $(srcdir)/Objects/stringlib/string_format.h \
                 $(srcdir)/Objects/stringlib/transmogrify.h \
-               $(srcdir)/Objects/stringlib/unicodedefs.h
+               $(srcdir)/Objects/stringlib/unicodedefs.h \
+               $(srcdir)/Objects/stringlib/localeutil.h
  
  Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
                                 $(STRINGLIB_HEADERS)
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h

index 531bc223ff3241e7f969b2b2b4d68931e80fa92f..22dd292ce9ea41313ce19d802b6139473cd4ee4b 100644 (file)
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
      Py_ssize_t n_digits;       /* count of digits need from the computed
                                    string */
      Py_ssize_t n_leading_chars;
+    Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
+                                       allocate, used for 'n'
+                                       formatting. */
      NumberFieldWidths spec;
      long x;
  
@@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
              break;
          default:  /* shouldn't be needed, but stops a compiler warning */
          case 'd':
+        case 'n':
              base = 10;
              leading_chars_to_skip = 0;
              break;
@@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
      /* Calculate the widths of the various leading and trailing parts */
      calc_number_widths(&spec, sign, n_digits, format);
  
+    if (format->type == 'n')
+           /* Compute how many additional chars we need to allocate
+              to hold the thousands grouping. */
+           STRINGLIB_GROUPING(pnumeric_chars, n_digits,
+                              pnumeric_chars+n_digits,
+                              0, &n_grouping_chars, 0);
+
      /* Allocate a new string to hold the result */
-    result = STRINGLIB_NEW(NULL, spec.n_total);
+    result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
      if (!result)
         goto done;
      p = STRINGLIB_STR(result);
@@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
             pnumeric_chars,
             n_digits * sizeof(STRINGLIB_CHAR));
  
-    /* if X, convert to uppercase */
+    /* If type is 'X', convert to uppercase */
      if (format->type == 'X') {
         Py_ssize_t t;
         for (t = 0; t < n_digits; ++t)
             p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
      }
  
+    /* Insert the grouping, if any, after the uppercasing of 'X', so we can
+       ensure that grouping chars won't be affeted. */
+    if (n_grouping_chars && format->type == 'n') {
+           /* We know this can't fail, since we've already
+              reserved enough space. */
+           STRINGLIB_CHAR *pstart = p + n_leading_chars;
+           int r = STRINGLIB_GROUPING(pstart, n_digits,
+                                      pstart + n_digits,
+                                      spec.n_total+n_grouping_chars-n_leading_chars,
+                                      NULL, 0);
+           assert(r);
+    }
+
      /* Fill in the non-digit parts */
      fill_number(p, &spec, n_digits,
                  format->fill_char == '\0' ? ' ' : format->fill_char);
@@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
      case 'o':
      case 'x':
      case 'X':
+    case 'n':
          /* no type conversion needed, already an int (or long).  do
            the formatting */
             result = format_int_or_long_internal(value, &format, tostring);
@@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
      case 'F':
      case 'g':
      case 'G':
-    case 'n':
      case '%':
          /* convert to float */
          tmp = PyNumber_Float(value);
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h

new file mode 100644 (file)

index 0000000..5cab0bb
--- /dev/null
+++ b/Objects/stringlib/localeutil.h
@@ -0,0 +1,121 @@
+/* stringlib: locale related helpers implementation */
+
+#ifndef STRINGLIB_LOCALEUTIL_H
+#define STRINGLIB_LOCALEUTIL_H
+
+#include <locale.h>
+
+/**
+ * _Py_InsertThousandsGrouping:
+ * @buffer: A pointer to the start of a string.
+ * @len: The length of the string.
+ * @plast: A pointer to the end of of the digits in the string.  This
+ *         may be before the end of the string (if the string contains
+ *         decimals, for example).
+ * @buf_size: The maximum size of the buffer pointed to by buffer.
+ * @count: If non-NULL, points to a variable that will receive the
+ *         number of characters we need to insert (and no formatting
+ *         will actually occur).
+ * @append_zero_char: If non-zero, put a trailing zero at the end of
+ *         of the resulting string, if and only if we modified the
+ *         string.
+ *
+ * Inserts thousand grouping characters (as defined in the current
+ *  locale) into the string between buffer and plast.  If count is
+ *  non-NULL, don't do any formatting, just count the number of
+ *  characters to insert.  This is used by the caller to appropriately
+ *  resize the buffer, if needed.
+ *
+ * Return value: 0 on error, else 1.  Note that no error can occur if
+ *  count is non-NULL.
+ *
+ * This name won't be used, the includer of this file should define
+ *  it to be the actual function name, based on unicode or string.
+ **/
+int
+_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
+                           Py_ssize_t len,
+                           STRINGLIB_CHAR *plast,
+                           Py_ssize_t buf_size,
+                           Py_ssize_t *count,
+                           int append_zero_char)
+{
+       struct lconv *locale_data = localeconv();
+       const char *grouping = locale_data->grouping;
+       const char *thousands_sep = locale_data->thousands_sep;
+       Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+       STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
+       STRINGLIB_CHAR *pmax = buffer + buf_size;       /* max of buffer */
+       char current_grouping;
+
+       /* Initialize the character count, if we're just counting. */
+       if (count)
+               *count = 0;
+
+       /* Starting at plast and working right-to-left, keep track of
+          what grouping needs to be added and insert that. */
+       current_grouping = *grouping++;
+
+       /* If the first character is 0, perform no grouping at all. */
+       if (current_grouping == 0)
+               return 1;
+
+       while (plast - buffer > current_grouping) {
+               /* Always leave buffer and pend valid at the end of this
+                  loop, since we might leave with a return statement. */
+
+               plast -= current_grouping;
+               if (count) {
+                       /* We're only counting, not touching the memory. */
+                       *count += thousands_sep_len;
+               }
+               else {
+                       /* Do the formatting. */
+
+                       /* Is there room to insert thousands_sep_len chars? */
+                       if (pmax - pend < thousands_sep_len)
+                               /* No room. */
+                               return 0;
+
+                       /* Move the rest of the string down. */
+                       memmove(plast + thousands_sep_len,
+                               plast,
+                               (pend - plast) * sizeof(STRINGLIB_CHAR));
+                       /* Copy the thousands_sep chars into the buffer. */
+#if STRINGLIB_IS_UNICODE
+                       /* Convert from the char's of the thousands_sep from
+                          the locale into unicode. */
+                       {
+                               Py_ssize_t i;
+                               for (i = 0; i < thousands_sep_len; ++i)
+                                       plast[i] = thousands_sep[i];
+                       }
+#else
+                       /* No conversion, just memcpy the thousands_sep. */
+                       memcpy(plast, thousands_sep, thousands_sep_len);
+#endif
+               }
+
+               /* Adjust end pointer. */
+               pend += thousands_sep_len;
+
+               /* Move to the next grouping character, unless we're
+                  repeating (which is designated by a grouping of 0). */
+               if (*grouping != 0) {
+                       current_grouping = *grouping++;
+                       if (current_grouping == CHAR_MAX)
+                               /* We're done. */
+                               break;
+               }
+       }
+       if (append_zero_char) {
+               /* Append a zero character to mark the end of the string,
+                  if there's room. */
+               if (pend - plast < 1)
+                       /* No room, error. */
+                       return 0;
+               *pend = 0;
+       }
+       return 1;
+}
+#endif /* STRINGLIB_LOCALEUTIL_H */
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h

index 1e0df0f9cdc51fccef5cf85752e6903b7e038323..daaa2e2b0f7be7ad91c1996b386881dcbb3d92be 100644 (file)
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -23,5 +23,6 @@
  #define STRINGLIB_CHECK          PyString_Check
  #define STRINGLIB_CMP            memcmp
  #define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_GROUPING       _PyString_InsertThousandsGrouping
  
  #endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h

index f402a9878835ecc7d330a880e0bfc18b46bc6983..8f87fe0f7d1a697ba0a74c05eb65d82734e6061e 100644 (file)
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -21,6 +21,7 @@
  #define STRINGLIB_NEW            PyUnicode_FromUnicode
  #define STRINGLIB_RESIZE         PyUnicode_Resize
  #define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping
  
  #if PY_VERSION_HEX < 0x03000000
  #define STRINGLIB_TOSTR          PyObject_Unicode
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 9d518541ec7f9760686240b58870a16c696b3cb9..b96aaf85b62d2a1c21f905bdb950ee30b477a4d6 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj,
  #include "stringlib/find.h"
  #include "stringlib/partition.h"
  
+#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
+#include "stringlib/localeutil.h"
+
+
  
  static int
  string_print(PyStringObject *op, FILE *fp, int flags)
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index 0912cec57ed34695b98251caf9cd98397b9f8eed..3f0328e06b31564e5d4eb2f744c50a8de7b245ae 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
  Py_LOCAL_INLINE(int)
  add_thousands_grouping(char* buffer, size_t buf_size)
  {
+       Py_ssize_t len = strlen(buffer);
         struct lconv *locale_data = localeconv();
-       const char *grouping = locale_data->grouping;
-       const char *thousands_sep = locale_data->thousands_sep;
-       size_t thousands_sep_len = strlen(thousands_sep);
         const char *decimal_point = locale_data->decimal_point;
-       char *pend = buffer + strlen(buffer); /* current end of buffer */
-       char *pmax = buffer + buf_size;       /* max of buffer */
-       char current_grouping;
  
         /* Find the decimal point, if any.  We're only concerned
            about the characters to the left of the decimal when
@@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
                 if (!p)
                         /* No exponent and no decimal.  Use the entire
                            string. */
-                       p = pend;
+                       p = buffer + len;
         }
         /* At this point, p points just past the right-most character we
            want to format.  We need to add the grouping string for the
            characters between buffer and p. */
-
-       /* Starting at p and working right-to-left, keep track of
-          what grouping needs to be added and insert that. */
-       current_grouping = *grouping++;
-
-       /* If the first character is 0, perform no grouping at all. */
-       if (current_grouping == 0)
-               return 1;
-
-       while (p - buffer > current_grouping) {
-               /* Always leave buffer and pend valid at the end of this
-                  loop, since we might leave with a return statement. */
-
-               /* Is there room to insert thousands_sep_len chars?. */
-               if (pmax - pend <= thousands_sep_len)
-                       /* No room. */
-                       return 0;
-
-               /* Move the rest of the string down. */
-               p -= current_grouping;
-               memmove(p + thousands_sep_len,
-                       p,
-                       pend - p + 1);
-               /* Adjust end pointer. */
-               pend += thousands_sep_len;
-               /* Copy the thousands_sep chars into the buffer. */
-               memcpy(p, thousands_sep, thousands_sep_len);
-
-               /* Move to the next grouping character, unless we're
-                  repeating (which is designated by a grouping of 0). */
-               if (*grouping != 0) {
-                       current_grouping = *grouping++;
-                       if (current_grouping == CHAR_MAX)
-                               /* We're done. */
-                               return 1;
-               }
-       }
-       return 1;
+       return _PyString_InsertThousandsGrouping(buffer, len, p,
+                                                buf_size, NULL, 1);
  }
  
  /* see FORMATBUFLEN in unicodeobject.c */
author	Eric Smith <eric@trueblade.com>
	Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)
committer	Eric Smith <eric@trueblade.com>
	Sun, 11 May 2008 19:52:48 +0000 (19:52 +0000)
Include/stringobject.h		patch \| blob \| history
Lib/test/test_types.py		patch \| blob \| history
Makefile.pre.in		patch \| blob \| history
Objects/stringlib/formatter.h		patch \| blob \| history
Objects/stringlib/localeutil.h	[new file with mode: 0644]	patch \| blob
Objects/stringlib/stringdefs.h		patch \| blob \| history
Objects/stringlib/unicodedefs.h		patch \| blob \| history
Objects/stringobject.c		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history