]> granicus.if.org Git - python/commitdiff
Internal plumbing changes for float parsing:
authorMark Dickinson <dickinsm@gmail.com>
Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
committerMark Dickinson <dickinsm@gmail.com>
Sun, 26 Apr 2009 15:30:47 +0000 (15:30 +0000)
 - check for nans and infs within PyOS_ascii_strtod
 - simplify parsing in PyFloat_FromString, and handle out-of-memory
   errors properly

Objects/floatobject.c
Python/pystrtod.c

index b7b52207e36b67ee4a2523b4c88009d65c7572f7..fdca3bef3640b5526c934d4b5a3f439bb29f61e0 100644 (file)
@@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
 PyObject *
 PyFloat_FromString(PyObject *v)
 {
-       const char *s, *last, *end, *sp;
+       const char *s, *last, *end;
        double x;
        char buffer[256]; /* for errors */
        char *s_buffer = NULL;
@@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
                                "float() argument must be a string or a number");
                return NULL;
        }
-
        last = s + len;
+
        while (*s && isspace(Py_CHARMASK(*s)))
                s++;
-       if (*s == '\0') {
-               PyErr_SetString(PyExc_ValueError, "empty string for float()");
-               goto error;
-       }
-       sp = s;
-       /* We don't care about overflow or underflow.  If the platform supports
-        * them, infinities and signed zeroes (on underflow) are fine.
-        * However, strtod can return 0 for denormalized numbers.  Note that
-        * whether strtod sets errno on underflow is not defined, so we can't
-        * key off errno.
-         */
+       /* We don't care about overflow or underflow.  If the platform
+        * supports them, infinities and signed zeroes (on underflow) are
+        * fine. */
+       errno = 0;
        PyFPE_START_PROTECT("strtod", goto error)
        x = PyOS_ascii_strtod(s, (char **)&end);
        PyFPE_END_PROTECT(x)
-       errno = 0;
-       /* Believe it or not, Solaris 2.6 can move end *beyond* the null
-          byte at the end of the string, when the input is inf(inity). */
-       if (end > last)
-               end = last;
-       /* Check for inf and nan. This is done late because it rarely happens. */
        if (end == s) {
-               char *p = (char*)sp;
-               int sign = 1;
-
-               if (*p == '-') {
-                       sign = -1;
-                       p++;
-               }
-               if (*p == '+') {
-                       p++;
-               }
-               if (PyOS_strnicmp(p, "inf", 4) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_INF(sign);
-               }
-               if (PyOS_strnicmp(p, "infinity", 9) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_INF(sign);
-               }
-#ifdef Py_NAN
-               if(PyOS_strnicmp(p, "nan", 4) == 0) {
-                       if (s_buffer != NULL)
-                               PyMem_FREE(s_buffer);
-                       Py_RETURN_NAN;
+               if (errno == ENOMEM)
+                       PyErr_NoMemory();
+               else {
+                       PyOS_snprintf(buffer, sizeof(buffer),
+                               "invalid literal for float(): %.200s", s);
+                       PyErr_SetString(PyExc_ValueError, buffer);
                }
-#endif
-               PyOS_snprintf(buffer, sizeof(buffer),
-                             "invalid literal for float(): %.200s", s);
-               PyErr_SetString(PyExc_ValueError, buffer);
                goto error;
        }
        /* Since end != s, the platform made *some* kind of sense out
           of the input.  Trust it. */
        while (*end && isspace(Py_CHARMASK(*end)))
                end++;
-       if (*end != '\0') {
-               PyOS_snprintf(buffer, sizeof(buffer),
-                             "invalid literal for float(): %.200s", s);
-               PyErr_SetString(PyExc_ValueError, buffer);
-               goto error;
-       }
-       else if (end != last) {
-               PyErr_SetString(PyExc_ValueError,
-                               "null byte in argument for float()");
+       if (end != last) {
+               if (*end == '\0')
+                       PyErr_SetString(PyExc_ValueError,
+                                       "null byte in argument for float()");
+               else {
+                       PyOS_snprintf(buffer, sizeof(buffer),
+                               "invalid literal for float(): %.200s", s);
+                       PyErr_SetString(PyExc_ValueError, buffer);
+               }
                goto error;
        }
        result = PyFloat_FromDouble(x);
index 002714f7c2917e8507d8b6f2a33fc73e6e1d5166..b9a33a10c754647b7703714f8d6628789c7d1216 100644 (file)
@@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 
        decimal_point_pos = NULL;
 
+       /* Set errno to zero, so that we can distinguish zero results
+          and underflows */
+       errno = 0;
+
        /* We process any leading whitespace and the optional sign manually,
           then pass the remainder to the system strtod.  This ensures that
           the result of an underflow has the correct sign. (bug #1725)  */
@@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
        if (*p == '-') {
                negate = 1;
                p++;
-       } else if (*p == '+') {
+       }
+       else if (*p == '+') {
                p++;
        }
 
-       /* What's left should begin with a digit, a decimal point, or one of
-          the letters i, I, n, N. It should not begin with 0x or 0X */
-       if ((!ISDIGIT(*p) &&
-            *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
-           ||
-           (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
-       {
-               if (endptr)
-                       *endptr = (char*)nptr;
-               errno = EINVAL;
-               return val;
+       /* Parse infinities and nans */
+       if (*p == 'i' || *p == 'I') {
+               if (PyOS_strnicmp(p, "inf", 3) == 0) {
+                       val = Py_HUGE_VAL;
+                       if (PyOS_strnicmp(p+3, "inity", 5) == 0)
+                               fail_pos = (char *)p+8;
+                       else
+                               fail_pos = (char *)p+3;
+                       goto got_val;
+               }
+               else
+                       goto invalid_string;
        }
-       digits_pos = p;
+#ifdef Py_NAN
+       if (*p == 'n' || *p == 'N') {
+               if (PyOS_strnicmp(p, "nan", 3) == 0) {
+                       val = Py_NAN;
+                       fail_pos = (char *)p+3;
+                       goto got_val;
+               }
+               else
+                       goto invalid_string;
+       }
+#endif
+
+       /* Some platform strtods accept hex floats; Python shouldn't (at the
+          moment), so we check explicitly for strings starting with '0x'. */
+       if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
+               goto invalid_string;
 
-       if (decimal_point[0] != '.' || 
+       /* Check that what's left begins with a digit or decimal point */
+       if (!ISDIGIT(*p) && *p != '.')
+               goto invalid_string;
+
+       digits_pos = p;
+       if (decimal_point[0] != '.' ||
            decimal_point[1] != 0)
        {
+               /* Look for a '.' in the input; if present, it'll need to be
+                  swapped for the current locale's decimal point before we
+                  call strtod.  On the other hand, if we find the current
+                  locale's decimal point then the input is invalid. */
                while (ISDIGIT(*p))
                        p++;
 
@@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                {
                        decimal_point_pos = p++;
 
+                       /* locate end of number */
                        while (ISDIGIT(*p))
                                p++;
 
@@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                        end = p;
                }
                else if (strncmp(p, decimal_point, decimal_point_len) == 0)
-               {
                        /* Python bug #1417699 */
-                       if (endptr)
-                               *endptr = (char*)nptr;
-                       errno = EINVAL;
-                       return val;
-               }
+                       goto invalid_string;
                /* For the other cases, we need not convert the decimal
                   point */
        }
 
-       /* Set errno to zero, so that we can distinguish zero results
-          and underflows */
-       errno = 0;
-
-       if (decimal_point_pos)
-       {
+       if (decimal_point_pos) {
                char *copy, *c;
-
-               /* We need to convert the '.' to the locale specific decimal
-                  point */
+               /* Create a copy of the input, with the '.' converted to the
+                  locale-specific decimal point */
                copy = (char *)PyMem_MALLOC(end - digits_pos +
                                            1 + decimal_point_len);
                if (copy == NULL) {
@@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
        }
 
        if (fail_pos == digits_pos)
-               fail_pos = (char *)nptr;
+               goto invalid_string;
 
+  got_val:
        if (negate && fail_pos != nptr)
                val = -val;
 
@@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
                *endptr = fail_pos;
 
        return val;
+
+  invalid_string:
+       if (endptr)
+               *endptr = (char*)nptr;
+       errno = EINVAL;
+       return -1.0;
 }
 
 #endif