Internal plumbing changes for float parsing:

author Mark Dickinson <dickinsm@gmail.com>

Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
diff --git a/Objects/floatobject.c b/Objects/floatobject.c

index b7b52207e36b67ee4a2523b4c88009d65c7572f7..fdca3bef3640b5526c934d4b5a3f439bb29f61e0 100644 (file)
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
  PyObject *
  PyFloat_FromString(PyObject *v)
  {
-       const char *s, *last, *end, *sp;
+       const char *s, *last, *end;
         double x;
         char buffer[256]; /* for errors */
         char *s_buffer = NULL;
@@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
                                 "float() argument must be a string or a number");
                 return NULL;
         }
-
         last = s + len;
+
         while (*s && isspace(Py_CHARMASK(*s)))
                 s++;
-       if (*s == '\0') {
-               PyErr_SetString(PyExc_ValueError, "empty string for float()");
-               goto error;
-       }
-       sp = s;
-       /* We don't care about overflow or underflow.  If the platform supports
-        * them, infinities and signed zeroes (on underflow) are fine.
-        * However, strtod can return 0 for denormalized numbers.  Note that
-        * whether strtod sets errno on underflow is not defined, so we can't
-        * key off errno.
-         */
+       /* We don't care about overflow or underflow.  If the platform
+        * supports them, infinities and signed zeroes (on underflow) are
+        * fine. */
+       errno = 0;
         PyFPE_START_PROTECT("strtod", goto error)
         x = PyOS_ascii_strtod(s, (char **)&end);
         PyFPE_END_PROTECT(x)
-       errno = 0;
-       /* Believe it or not, Solaris 2.6 can move end *beyond* the null
-          byte at the end of the string, when the input is inf(inity). */
-       if (end > last)
-               end = last;
-       /* Check for inf and nan. This is done late because it rarely happens. */
         if (end == s) {
-               char *p = (char*)sp;
-               int sign = 1;
-
-               if (*p == '-') {
-                       sign = -1;
-                       p++;
-               }
-               if (*p == '+') {
-                       p++;
-               }
-               if (PyOS_strnicmp(p, "inf", 4) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_INF(sign);
-               }
-               if (PyOS_strnicmp(p, "infinity", 9) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_INF(sign);
-               }
-#ifdef Py_NAN
-               if(PyOS_strnicmp(p, "nan", 4) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_NAN;
+               if (errno == ENOMEM)
+                       PyErr_NoMemory();
+               else {
+                       PyOS_snprintf(buffer, sizeof(buffer),
+                               "invalid literal for float(): %.200s", s);
+                       PyErr_SetString(PyExc_ValueError, buffer);
                 }
-#endif
-               PyOS_snprintf(buffer, sizeof(buffer),
-                             "invalid literal for float(): %.200s", s);
-               PyErr_SetString(PyExc_ValueError, buffer);
                 goto error;
         }
         /* Since end != s, the platform made *some* kind of sense out
            of the input.  Trust it. */
         while (*end && isspace(Py_CHARMASK(*end)))
                 end++;
-       if (*end != '\0') {
-               PyOS_snprintf(buffer, sizeof(buffer),
-                             "invalid literal for float(): %.200s", s);
-               PyErr_SetString(PyExc_ValueError, buffer);
-               goto error;
-       }
-       else if (end != last) {
-               PyErr_SetString(PyExc_ValueError,
-                               "null byte in argument for float()");
+       if (end != last) {
+               if (*end == '\0')
+                       PyErr_SetString(PyExc_ValueError,
+                                       "null byte in argument for float()");
+               else {
+                       PyOS_snprintf(buffer, sizeof(buffer),
+                               "invalid literal for float(): %.200s", s);
+                       PyErr_SetString(PyExc_ValueError, buffer);
+               }
                 goto error;
         }
         result = PyFloat_FromDouble(x);
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index 002714f7c2917e8507d8b6f2a33fc73e6e1d5166..b9a33a10c754647b7703714f8d6628789c7d1216 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
  
         decimal_point_pos = NULL;
  
+       /* Set errno to zero, so that we can distinguish zero results
+          and underflows */
+       errno = 0;
+
         /* We process any leading whitespace and the optional sign manually,
            then pass the remainder to the system strtod.  This ensures that
            the result of an underflow has the correct sign. (bug #1725)  */
@@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
         if (*p == '-') {
                 negate = 1;
                 p++;
-       } else if (*p == '+') {
+       }
+       else if (*p == '+') {
                 p++;
         }
  
-       /* What's left should begin with a digit, a decimal point, or one of
-          the letters i, I, n, N. It should not begin with 0x or 0X */
-       if ((!ISDIGIT(*p) &&
-            *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
-           ||
-           (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
-       {
-               if (endptr)
-                       *endptr = (char*)nptr;
-               errno = EINVAL;
-               return val;
+       /* Parse infinities and nans */
+       if (*p == 'i' || *p == 'I') {
+               if (PyOS_strnicmp(p, "inf", 3) == 0) {
+                       val = Py_HUGE_VAL;
+                       if (PyOS_strnicmp(p+3, "inity", 5) == 0)
+                               fail_pos = (char *)p+8;
+                       else
+                               fail_pos = (char *)p+3;
+                       goto got_val;
+               }
+               else
+                       goto invalid_string;
         }
-       digits_pos = p;
+#ifdef Py_NAN
+       if (*p == 'n' || *p == 'N') {
+               if (PyOS_strnicmp(p, "nan", 3) == 0) {
+                       val = Py_NAN;
+                       fail_pos = (char *)p+3;
+                       goto got_val;
+               }
+               else
+                       goto invalid_string;
+       }
+#endif
+
+       /* Some platform strtods accept hex floats; Python shouldn't (at the
+          moment), so we check explicitly for strings starting with '0x'. */
+       if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
+               goto invalid_string;
  
-       if (decimal_point[0] != '.' || 
+       /* Check that what's left begins with a digit or decimal point */
+       if (!ISDIGIT(*p) && *p != '.')
+               goto invalid_string;
+
+       digits_pos = p;
+       if (decimal_point[0] != '.' ||
             decimal_point[1] != 0)
         {
+               /* Look for a '.' in the input; if present, it'll need to be
+                  swapped for the current locale's decimal point before we
+                  call strtod.  On the other hand, if we find the current
+                  locale's decimal point then the input is invalid. */
                 while (ISDIGIT(*p))
                         p++;
  
@@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                 {
                         decimal_point_pos = p++;
  
+                       /* locate end of number */
                         while (ISDIGIT(*p))
                                 p++;
  
@@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                         end = p;
                 }
                 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
-               {
                         /* Python bug #1417699 */
-                       if (endptr)
-                               *endptr = (char*)nptr;
-                       errno = EINVAL;
-                       return val;
-               }
+                       goto invalid_string;
                 /* For the other cases, we need not convert the decimal
                    point */
         }
  
-       /* Set errno to zero, so that we can distinguish zero results
-          and underflows */
-       errno = 0;
-
-       if (decimal_point_pos)
-       {
+       if (decimal_point_pos) {
                 char *copy, *c;
-
-               /* We need to convert the '.' to the locale specific decimal
-                  point */
+               /* Create a copy of the input, with the '.' converted to the
+                  locale-specific decimal point */
                 copy = (char *)PyMem_MALLOC(end - digits_pos +
                                             1 + decimal_point_len);
                 if (copy == NULL) {
@@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
         }
  
         if (fail_pos == digits_pos)
-               fail_pos = (char *)nptr;
+               goto invalid_string;
  
+  got_val:
         if (negate && fail_pos != nptr)
                 val = -val;
  
@@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                 *endptr = fail_pos;
  
         return val;
+
+  invalid_string:
+       if (endptr)
+               *endptr = (char*)nptr;
+       errno = EINVAL;
+       return -1.0;
  }
  
  #endif
author	Mark Dickinson <dickinsm@gmail.com>
	Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
Objects/floatobject.c		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history