Trim leading zeros from a floating point exponent, per C99. See issue 1600. As...

author Eric Smith <eric@trueblade.com>

Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)

committer Eric Smith <eric@trueblade.com>

Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)
author Eric Smith <eric@trueblade.com>
Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)
committer Eric Smith <eric@trueblade.com>
Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py

index fe2950acedd0684029f95b43748e179eced938c4..bf218db3b5927c1b2b7f6cac0ad699f89059f794 100644 (file)
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -89,6 +89,29 @@ class TypesTests(unittest.TestCase):
          if float(1) == 1.0 and float(-1) == -1.0 and float(0) == 0.0: pass
          else: self.fail('float() does not work properly')
  
+    def test_float_to_string(self):
+        def test(f, result):
+            self.assertEqual(f.__format__('e'), result)
+            self.assertEqual('%e' % f, result)
+
+        # test all 2 digit exponents, both with __format__ and with
+        #  '%' formatting
+        for i in range(-99, 100):
+            test(float('1.5e'+str(i)), '1.500000e{0:+03d}'.format(i))
+
+        # test some 3 digit exponents
+        self.assertEqual(1.5e100.__format__('e'), '1.500000e+100')
+        self.assertEqual('%e' % 1.5e100, '1.500000e+100')
+
+        self.assertEqual(1.5e101.__format__('e'), '1.500000e+101')
+        self.assertEqual('%e' % 1.5e101, '1.500000e+101')
+
+        self.assertEqual(1.5e-100.__format__('e'), '1.500000e-100')
+        self.assertEqual('%e' % 1.5e-100, '1.500000e-100')
+
+        self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
+        self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
+
      def test_normal_integers(self):
          # Ensure the first 256 integers are shared
          a = 256
@@ -486,16 +509,17 @@ class TypesTests(unittest.TestCase):
          test(-1.0, ' f', '-1.000000')
          test( 1.0, '+f', '+1.000000')
          test(-1.0, '+f', '-1.000000')
+        test(1.1234e90, 'f', '1.1234e+90')
+        test(1.1234e90, 'F', '1.1234e+90')
          test(1.1234e200, 'f', '1.1234e+200')
          test(1.1234e200, 'F', '1.1234e+200')
  
-        # temporarily removed.  see issue 1600
- #       test( 1.0, 'e', '1.000000e+00')
- #       test(-1.0, 'e', '-1.000000e+00')
- #       test( 1.0, 'E', '1.000000E+00')
- #       test(-1.0, 'E', '-1.000000E+00')
- #       test(1.1234e20, 'e', '1.123400e+20')
- #       test(1.1234e20, 'E', '1.123400E+20')
+        test( 1.0, 'e', '1.000000e+00')
+        test(-1.0, 'e', '-1.000000e+00')
+        test( 1.0, 'E', '1.000000E+00')
+        test(-1.0, 'E', '-1.000000E+00')
+        test(1.1234e20, 'e', '1.123400e+20')
+        test(1.1234e20, 'E', '1.123400E+20')
  
          # % formatting
          test(-1.0, '%', '-100.000000%')
diff --git a/Misc/NEWS b/Misc/NEWS

index 40b78fc12dfd7f7952c2ab02d55a99addfd3069c..f4d1cc50e2e61e31afdf92e346a56c5adf055553 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,13 @@ What's New in Python 2.6 alpha 1?
  Core and builtins
  -----------------
  
+- Issue #1600: Modifed PyOS_ascii_formatd to use at most 2 digit
+  exponents for exponents with absolute value < 100.  Follows C99
+  standard.  This is a change on Windows, which would use 3 digits.
+  Also, added 'n' to the formats that PyOS_ascii_formatd understands,
+  so that any alterations it does to the resulting string will be
+  available in stringlib/formatter.h (for float.__format__).
+
  - Implemented PEP 3101, Advanced String Formatting.  This adds a new
    builtin format(); a format() method for str and unicode; a
    __format__() method to object, str, unicode, int, long, float, and
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index 14fb84ba715b8c5e9d59cfe7fc871316ce10e9d3..16efa9d3ab8c2987d56a0c6436316ab3ee121e09 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -186,6 +186,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
  }
  
  
+/* From the C99 standard, section 7.19.6:
+The exponent always contains at least two digits, and only as many more digits
+as necessary to represent the exponent.
+*/
+#define MIN_EXPONENT_DIGITS 2
+
+/* see FORMATBUFLEN in unicodeobject.c */
+#define FLOAT_FORMATBUFLEN 120
+
  /**
   * PyOS_ascii_formatd:
   * @buffer: A buffer to place the resulting string in
@@ -197,8 +206,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
   * Converts a #gdouble to a string, using the '.' as
   * decimal point. To format the number you pass in
   * a printf()-style format string. Allowed conversion
- * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. 
+ * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
   * 
+ * 'n' is the same as 'g', except it uses the current locale.
+ *
   * Return value: The pointer to the buffer with the converted string.
   **/
  char *
@@ -207,17 +218,23 @@ PyOS_ascii_formatd(char       *buffer,
                    const char *format, 
                    double      d)
  {
-       struct lconv *locale_data;
-       const char *decimal_point;
-       size_t decimal_point_len, rest_len;
         char *p;
         char format_char;
+       size_t format_len = strlen(format);
+
+       /* For type 'n', we need to make a copy of the format string, because
+          we're going to modify 'n' -> 'g', and format is const char*, so we
+          can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than
+          we ever need this to be.  There's an upcoming check to ensure it's
+          big enough. */
+       char tmp_format[FLOAT_FORMATBUFLEN];
  
  /*     g_return_val_if_fail (buffer != NULL, NULL); */
  /*     g_return_val_if_fail (format[0] == '%', NULL); */
  /*     g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
  
-       format_char = format[strlen(format) - 1];
+       /* The last character in the format string must be the format char */
+       format_char = format[format_len - 1];
  
  /*     g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
  /*                           format_char == 'f' || format_char == 'F' || */
@@ -227,43 +244,126 @@ PyOS_ascii_formatd(char       *buffer,
         if (format[0] != '%')
                 return NULL;
  
+       /* I'm not sure why this test is here.  It's ensuring that the format
+          string after the first character doesn't have a single quote, a
+          lowercase l, or a percent. This is the reverse of the commented-out
+          test about 10 lines ago. */
         if (strpbrk(format + 1, "'l%"))
                 return NULL;
  
         if (!(format_char == 'e' || format_char == 'E' || 
               format_char == 'f' || format_char == 'F' || 
-             format_char == 'g' || format_char == 'G'))
+             format_char == 'g' || format_char == 'G' ||
+             format_char == 'n'))
                 return NULL;
  
+       /* Map 'n' format_char to 'g', by copying the format string and
+          replacing the final 'n' with a 'g' */
+       if (format_char == 'n') {
+               if (format_len + 1 >= sizeof(tmp_format)) {
+                       /* The format won't fit in our copy.  Error out.  In
+                          practice, this will never happen and will be detected
+                          by returning NULL */
+                       return NULL;
+               }
+               strcpy(tmp_format, format);
+               tmp_format[format_len - 1] = 'g';
+               format = tmp_format;
+       }
  
+       /* Have PyOS_snprintf do the hard work */
         PyOS_snprintf(buffer, buf_len, format, d);
  
-       locale_data = localeconv();
-       decimal_point = locale_data->decimal_point;
-       decimal_point_len = strlen(decimal_point);
+       /* Get the current local, and find the decimal point character (or
+          string?).  Convert that string back to a dot.  Do not do this if
+          using the 'n' (number) format code. */
+       if (format_char != 'n') {
+               struct lconv *locale_data = localeconv();
+               const char *decimal_point = locale_data->decimal_point;
+               size_t decimal_point_len = strlen(decimal_point);
+               size_t rest_len;
  
-       assert(decimal_point_len != 0);
+               assert(decimal_point_len != 0);
  
-       if (decimal_point[0] != '.' || 
-           decimal_point[1] != 0)
-       {
-               p = buffer;
+               if (decimal_point[0] != '.' || decimal_point[1] != 0) {
+                       p = buffer;
  
-               if (*p == '+' || *p == '-')
-                       p++;
+                       if (*p == '+' || *p == '-')
+                               p++;
  
-               while (isdigit((unsigned char)*p))
-                       p++;
+                       while (isdigit(Py_CHARMASK(*p)))
+                               p++;
  
-               if (strncmp(p, decimal_point, decimal_point_len) == 0)
-               {
-                       *p = '.';
-                       p++;
-                       if (decimal_point_len > 1) {
-                               rest_len = strlen(p + (decimal_point_len - 1));
-                               memmove(p, p + (decimal_point_len - 1), 
-                                       rest_len);
-                               p[rest_len] = 0;
+                       if (strncmp(p, decimal_point, decimal_point_len) == 0) {
+                               *p = '.';
+                               p++;
+                               if (decimal_point_len > 1) {
+                                       rest_len = strlen(p +
+                                                     (decimal_point_len - 1));
+                                       memmove(p, p + (decimal_point_len - 1),
+                                               rest_len);
+                                       p[rest_len] = 0;
+                               }
+                       }
+               }
+       }
+
+       /* If an exponent exists, ensure that the exponent is at least
+          MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
+          for the extra zeros.  Also, if there are more than
+          MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
+          back to MIN_EXPONENT_DIGITS */
+       p = strpbrk(buffer, "eE");
+       if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
+               char *start = p + 2;
+               int exponent_digit_cnt = 0;
+               int leading_zero_cnt = 0;
+               int in_leading_zeros = 1;
+               int significant_digit_cnt;
+
+               p += 2;
+               while (*p && isdigit(Py_CHARMASK(*p))) {
+                       if (in_leading_zeros && *p == '0')
+                               ++leading_zero_cnt;
+                       if (*p != '0')
+                               in_leading_zeros = 0;
+                       ++p;
+                       ++exponent_digit_cnt;
+               }
+
+               significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
+               if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
+                       /* If there are 2 exactly digits, we're done,
+                          regardless of what they contain */
+               }
+               else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
+                       int extra_zeros_cnt;
+
+                       /* There are more than 2 digits in the exponent.  See
+                          if we can delete some of the leading zeros */
+                       if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
+                               significant_digit_cnt = MIN_EXPONENT_DIGITS;
+                       extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt;
+
+                       /* Delete extra_zeros_cnt worth of characters from the
+                          front of the exponent */
+                       assert(extra_zeros_cnt >= 0);
+
+                       /* Add one to significant_digit_cnt to copy the
+                          trailing 0 byte, thus setting the length */
+                       memmove(start,
+                               start + extra_zeros_cnt,
+                               significant_digit_cnt + 1);
+               }
+               else {
+                       /* If there are fewer than 2 digits, add zeros
+                          until there are 2, if there's enough room */
+                       int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
+                       if (start + zeros + exponent_digit_cnt + 1
+                             < buffer + buf_len) {
+                               memmove(start + zeros, start,
+                                       exponent_digit_cnt + 1);
+                               memset(start, '0', zeros);
                         }
                 }
         }
author	Eric Smith <eric@trueblade.com>
	Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)
committer	Eric Smith <eric@trueblade.com>
	Wed, 20 Feb 2008 23:34:22 +0000 (23:34 +0000)
Lib/test/test_types.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history