Merged revisions 62586 via svnmerge from

author Eric Smith <eric@trueblade.com>

Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)

committer Eric Smith <eric@trueblade.com>

Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)
author Eric Smith <eric@trueblade.com>
Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)
committer Eric Smith <eric@trueblade.com>
Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py

index dae250e9cc5321ea01cdaaa942884c83725fdd4f..1c7a8cd2b784f3e7c4c2c495e3e94368750a3fb7 100644 (file)
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -1,8 +1,9 @@
  # Python test set -- part 6, built-in types
  
-from test.test_support import run_unittest
+from test.test_support import run_unittest, run_with_locale
  import unittest
  import sys
+import locale
  
  class TypesTests(unittest.TestCase):
  
@@ -407,6 +408,15 @@ class TypesTests(unittest.TestCase):
                  self.assertEqual(value.__format__(format_spec),
                                   float(value).__format__(format_spec))
  
+    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
+    def test_float__format__locale(self):
+        # test locale support for __format__ code 'n'
+
+        for i in range(-10, 10):
+            x = 1234567890.0 * (10.0 ** i)
+            self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
+            self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
+
      def test_float__format__(self):
          # these should be rewritten to use both format(x, spec) and
          # x.__format__(spec)
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index 2ca84021d8b29a3485a49df266723dd12a2ada63..0912cec57ed34695b98251caf9cd98397b9f8eed 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
         return val;
  }
  
+/* Given a string that may have a decimal point in the current
+   locale, change it back to a dot.  Since the string cannot get
+   longer, no need for a maximum buffer size parameter. */
+Py_LOCAL_INLINE(void)
+change_decimal_from_locale_to_dot(char* buffer)
+{
+       struct lconv *locale_data = localeconv();
+       const char *decimal_point = locale_data->decimal_point;
+
+       if (decimal_point[0] != '.' || decimal_point[1] != 0) {
+               size_t decimal_point_len = strlen(decimal_point);
+
+               if (*buffer == '+' || *buffer == '-')
+                       buffer++;
+               while (isdigit(Py_CHARMASK(*buffer)))
+                       buffer++;
+               if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
+                       *buffer = '.';
+                       buffer++;
+                       if (decimal_point_len > 1) {
+                               /* buffer needs to get smaller */
+                               size_t rest_len = strlen(buffer +
+                                                    (decimal_point_len - 1));
+                               memmove(buffer,
+                                       buffer + (decimal_point_len - 1),
+                                       rest_len);
+                               buffer[rest_len] = 0;
+                       }
+               }
+       }
+}
+
  
  /* From the C99 standard, section 7.19.6:
  The exponent always contains at least two digits, and only as many more digits
@@ -194,6 +226,189 @@ as necessary to represent the exponent.
  */
  #define MIN_EXPONENT_DIGITS 2
  
+/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
+   in length. */
+Py_LOCAL_INLINE(void)
+ensure_minumim_exponent_length(char* buffer, size_t buf_size)
+{
+       char *p = strpbrk(buffer, "eE");
+       if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
+               char *start = p + 2;
+               int exponent_digit_cnt = 0;
+               int leading_zero_cnt = 0;
+               int in_leading_zeros = 1;
+               int significant_digit_cnt;
+
+               /* Skip over the exponent and the sign. */
+               p += 2;
+
+               /* Find the end of the exponent, keeping track of leading
+                  zeros. */
+               while (*p && isdigit(Py_CHARMASK(*p))) {
+                       if (in_leading_zeros && *p == '0')
+                               ++leading_zero_cnt;
+                       if (*p != '0')
+                               in_leading_zeros = 0;
+                       ++p;
+                       ++exponent_digit_cnt;
+               }
+
+               significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
+               if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
+                       /* If there are 2 exactly digits, we're done,
+                          regardless of what they contain */
+               }
+               else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
+                       int extra_zeros_cnt;
+
+                       /* There are more than 2 digits in the exponent.  See
+                          if we can delete some of the leading zeros */
+                       if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
+                               significant_digit_cnt = MIN_EXPONENT_DIGITS;
+                       extra_zeros_cnt = exponent_digit_cnt -
+                               significant_digit_cnt;
+
+                       /* Delete extra_zeros_cnt worth of characters from the
+                          front of the exponent */
+                       assert(extra_zeros_cnt >= 0);
+
+                       /* Add one to significant_digit_cnt to copy the
+                          trailing 0 byte, thus setting the length */
+                       memmove(start,
+                               start + extra_zeros_cnt,
+                               significant_digit_cnt + 1);
+               }
+               else {
+                       /* If there are fewer than 2 digits, add zeros
+                          until there are 2, if there's enough room */
+                       int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
+                       if (start + zeros + exponent_digit_cnt + 1
+                             < buffer + buf_size) {
+                               memmove(start + zeros, start,
+                                       exponent_digit_cnt + 1);
+                               memset(start, '0', zeros);
+                       }
+               }
+       }
+}
+
+/* Ensure that buffer has a decimal point in it.  The decimal point
+   will not be in the current locale, it will always be '.' */
+Py_LOCAL_INLINE(void)
+ensure_decimal_point(char* buffer, size_t buf_size)
+{
+       int insert_count = 0;
+       char* chars_to_insert;
+
+       /* search for the first non-digit character */
+       char *p = buffer;
+       while (*p && isdigit(Py_CHARMASK(*p)))
+               ++p;
+
+       if (*p == '.') {
+               if (isdigit(Py_CHARMASK(*(p+1)))) {
+                       /* Nothing to do, we already have a decimal
+                          point and a digit after it */
+               }
+               else {
+                       /* We have a decimal point, but no following
+                          digit.  Insert a zero after the decimal. */
+                       ++p;
+                       chars_to_insert = "0";
+                       insert_count = 1;
+               }
+       }
+       else {
+               chars_to_insert = ".0";
+               insert_count = 2;
+       }
+       if (insert_count) {
+               size_t buf_len = strlen(buffer);
+               if (buf_len + insert_count + 1 >= buf_size) {
+                       /* If there is not enough room in the buffer
+                          for the additional text, just skip it.  It's
+                          not worth generating an error over. */
+               }
+               else {
+                       memmove(p + insert_count, p,
+                               buffer + strlen(buffer) - p + 1);
+                       memcpy(p, chars_to_insert, insert_count);
+               }
+       }
+}
+
+/* Add the locale specific grouping characters to buffer.  Note
+   that any decimal point (if it's present) in buffer is already
+   locale-specific.  Return 0 on error, else 1. */
+Py_LOCAL_INLINE(int)
+add_thousands_grouping(char* buffer, size_t buf_size)
+{
+       struct lconv *locale_data = localeconv();
+       const char *grouping = locale_data->grouping;
+       const char *thousands_sep = locale_data->thousands_sep;
+       size_t thousands_sep_len = strlen(thousands_sep);
+       const char *decimal_point = locale_data->decimal_point;
+       char *pend = buffer + strlen(buffer); /* current end of buffer */
+       char *pmax = buffer + buf_size;       /* max of buffer */
+       char current_grouping;
+
+       /* Find the decimal point, if any.  We're only concerned
+          about the characters to the left of the decimal when
+          adding grouping. */
+       char *p = strstr(buffer, decimal_point);
+       if (!p) {
+               /* No decimal, use the entire string. */
+
+               /* If any exponent, adjust p. */
+               p = strpbrk(buffer, "eE");
+               if (!p)
+                       /* No exponent and no decimal.  Use the entire
+                          string. */
+                       p = pend;
+       }
+       /* At this point, p points just past the right-most character we
+          want to format.  We need to add the grouping string for the
+          characters between buffer and p. */
+
+       /* Starting at p and working right-to-left, keep track of
+          what grouping needs to be added and insert that. */
+       current_grouping = *grouping++;
+
+       /* If the first character is 0, perform no grouping at all. */
+       if (current_grouping == 0)
+               return 1;
+
+       while (p - buffer > current_grouping) {
+               /* Always leave buffer and pend valid at the end of this
+                  loop, since we might leave with a return statement. */
+
+               /* Is there room to insert thousands_sep_len chars?. */
+               if (pmax - pend <= thousands_sep_len)
+                       /* No room. */
+                       return 0;
+
+               /* Move the rest of the string down. */
+               p -= current_grouping;
+               memmove(p + thousands_sep_len,
+                       p,
+                       pend - p + 1);
+               /* Adjust end pointer. */
+               pend += thousands_sep_len;
+               /* Copy the thousands_sep chars into the buffer. */
+               memcpy(p, thousands_sep, thousands_sep_len);
+
+               /* Move to the next grouping character, unless we're
+                  repeating (which is designated by a grouping of 0). */
+               if (*grouping != 0) {
+                       current_grouping = *grouping++;
+                       if (current_grouping == CHAR_MAX)
+                               /* We're done. */
+                               return 1;
+               }
+       }
+       return 1;
+}
+
  /* see FORMATBUFLEN in unicodeobject.c */
  #define FLOAT_FORMATBUFLEN 120
  
@@ -222,7 +437,6 @@ PyOS_ascii_formatd(char       *buffer,
                    const char *format, 
                    double      d)
  {
-       char *p;
         char format_char;
         size_t format_len = strlen(format);
  
@@ -277,144 +491,31 @@ PyOS_ascii_formatd(char       *buffer,
         /* Have PyOS_snprintf do the hard work */
         PyOS_snprintf(buffer, buf_size, format, d);
  
-       /* Get the current local, and find the decimal point character (or
-          string?).  Convert that string back to a dot.  Do not do this if
-          using the 'n' (number) format code. */
-       if (format_char != 'n') {
-               struct lconv *locale_data = localeconv();
-               const char *decimal_point = locale_data->decimal_point;
-               size_t decimal_point_len = strlen(decimal_point);
-               size_t rest_len;
-
-               assert(decimal_point_len != 0);
+       /* Do various fixups on the return string */
  
-               if (decimal_point[0] != '.' || decimal_point[1] != 0) {
-                       p = buffer;
-
-                       if (*p == '+' || *p == '-')
-                               p++;
-
-                       while (isdigit(Py_CHARMASK(*p)))
-                               p++;
-
-                       if (strncmp(p, decimal_point,
-                                   decimal_point_len) == 0) {
-                               *p = '.';
-                               p++;
-                               if (decimal_point_len > 1) {
-                                       rest_len = strlen(p +
-                                                     (decimal_point_len - 1));
-                                       memmove(p, p + (decimal_point_len - 1),
-                                               rest_len);
-                                       p[rest_len] = 0;
-                               }
-                       }
-               }
-       }
+       /* Get the current locale, and find the decimal point string.
+          Convert that string back to a dot.  Do not do this if using the
+          'n' (number) format code, since we want to keep the localized
+          decimal point in that case. */
+       if (format_char != 'n')
+               change_decimal_from_locale_to_dot(buffer);
  
         /* If an exponent exists, ensure that the exponent is at least
            MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
            for the extra zeros.  Also, if there are more than
            MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
            back to MIN_EXPONENT_DIGITS */
-       p = strpbrk(buffer, "eE");
-       if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
-               char *start = p + 2;
-               int exponent_digit_cnt = 0;
-               int leading_zero_cnt = 0;
-               int in_leading_zeros = 1;
-               int significant_digit_cnt;
-
-               p += 2;
-               while (*p && isdigit(Py_CHARMASK(*p))) {
-                       if (in_leading_zeros && *p == '0')
-                               ++leading_zero_cnt;
-                       if (*p != '0')
-                               in_leading_zeros = 0;
-                       ++p;
-                       ++exponent_digit_cnt;
-               }
-
-               significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
-               if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
-                       /* If there are 2 exactly digits, we're done,
-                          regardless of what they contain */
-               }
-               else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
-                       int extra_zeros_cnt;
-
-                       /* There are more than 2 digits in the exponent.  See
-                          if we can delete some of the leading zeros */
-                       if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
-                               significant_digit_cnt = MIN_EXPONENT_DIGITS;
-                       extra_zeros_cnt = exponent_digit_cnt -
-                               significant_digit_cnt;
-
-                       /* Delete extra_zeros_cnt worth of characters from the
-                          front of the exponent */
-                       assert(extra_zeros_cnt >= 0);
-
-                       /* Add one to significant_digit_cnt to copy the
-                          trailing 0 byte, thus setting the length */
-                       memmove(start,
-                               start + extra_zeros_cnt,
-                               significant_digit_cnt + 1);
-               }
-               else {
-                       /* If there are fewer than 2 digits, add zeros
-                          until there are 2, if there's enough room */
-                       int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
-                       if (start + zeros + exponent_digit_cnt + 1
-                             < buffer + buf_size) {
-                               memmove(start + zeros, start,
-                                       exponent_digit_cnt + 1);
-                               memset(start, '0', zeros);
-                       }
-               }
-       }
+       ensure_minumim_exponent_length(buffer, buf_size);
  
         /* If format_char is 'Z', make sure we have at least one character
            after the decimal point (and make sure we have a decimal point). */
-       if (format_char == 'Z') {
-               int insert_count = 0;
-               char* chars_to_insert;
-
-               /* search for the first non-digit character */
-               p = buffer;
-               while (*p && isdigit(Py_CHARMASK(*p)))
-                       ++p;
+       if (format_char == 'Z')
+               ensure_decimal_point(buffer, buf_size);
  
-               if (*p == '.') {
-                       if (isdigit(Py_CHARMASK(*(p+1)))) {
-                               /* Nothing to do, we already have a decimal
-                                  point and a digit after it */
-                       }
-                       else {
-                               /* We have a decimal point, but no following
-                                  digit.  Insert a zero after the decimal. */
-                               ++p;
-                               chars_to_insert = "0";
-                               insert_count = 1;
-                       }
-               }
-               else {
-                       chars_to_insert = ".0";
-                       insert_count = 2;
-               }
-               if (insert_count) {
-                       size_t buf_len = strlen(buffer);
-                       if (buf_len + insert_count + 1 >= buf_size) {
-                               /* If there is not enough room in the buffer
-                                  for the additional text, just skip it.  It's
-                                  not worth generating an error over. */
-                       }
-                       else {
-                               memmove(p + insert_count, p,
-                                       buffer + strlen(buffer) - p + 1);
-                               memcpy(p, chars_to_insert, insert_count);
-                       }
-               }
-       }
+       /* If format_char is 'n', add the thousands grouping. */
+       if (format_char == 'n')
+               if (!add_thousands_grouping(buffer, buf_size))
+                       return NULL;
  
         return buffer;
  }
author	Eric Smith <eric@trueblade.com>
	Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)
committer	Eric Smith <eric@trueblade.com>
	Wed, 30 Apr 2008 02:12:09 +0000 (02:12 +0000)
Lib/test/test_types.py		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history