Refactor to remove duplicated nan/inf parsing code in

author Mark Dickinson <dickinsm@gmail.com>

Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)
diff --git a/Include/pystrtod.h b/Include/pystrtod.h

index abf9930caf4d775d95d22456ad280ed7a44bf5c0..b1d8786e509d6959940b06d8f2e83f95fa53c758 100644 (file)
--- a/Include/pystrtod.h
+++ b/Include/pystrtod.h
@@ -21,6 +21,8 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                           int flags,
                                           int *type);
  
+PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
+
  
  /* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
  #define Py_DTSF_SIGN      0x01 /* always add the sign */
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py

index b4b62f0445d168d4070a6372e31500d026d2211c..e74e8922030a67083909025d65b1cf5f1b4bb5a6 100644 (file)
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -532,6 +532,11 @@ class InfNanTest(unittest.TestCase):
          self.assertRaises(ValueError, float, "-INFI")
          self.assertRaises(ValueError, float, "infinitys")
  
+        self.assertRaises(ValueError, float, "++Inf")
+        self.assertRaises(ValueError, float, "-+inf")
+        self.assertRaises(ValueError, float, "+-infinity")
+        self.assertRaises(ValueError, float, "--Infinity")
+
      def test_inf_as_str(self):
          self.assertEqual(repr(1e300 * 1e300), "inf")
          self.assertEqual(repr(-1e300 * 1e300), "-inf")
@@ -563,6 +568,11 @@ class InfNanTest(unittest.TestCase):
          self.assertRaises(ValueError, float, "+na")
          self.assertRaises(ValueError, float, "-na")
  
+        self.assertRaises(ValueError, float, "++nan")
+        self.assertRaises(ValueError, float, "-+NAN")
+        self.assertRaises(ValueError, float, "+-NaN")
+        self.assertRaises(ValueError, float, "--nAn")
+
      def test_nan_as_str(self):
          self.assertEqual(repr(1e300 * 1e300 * 0), "nan")
          self.assertEqual(repr(-1e300 * 1e300 * 0), "nan")
diff --git a/Objects/floatobject.c b/Objects/floatobject.c

index cd8c14fb2d5174c87ea74487c9287d848d54c470..341072eaababc8765e1b052e2e1326e355bef2fe 100644 (file)
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -1157,20 +1157,6 @@ Return a hexadecimal representation of a floating-point number.\n\
  >>> 3.14159.hex()\n\
  '0x1.921f9f01b866ep+1'");
  
-/* Case-insensitive locale-independent string match used for nan and inf
-   detection. t should be lower-case and null-terminated.  Return a nonzero
-   result if the first strlen(t) characters of s match t and 0 otherwise. */
-
-static int
-case_insensitive_match(const char *s, const char *t)
-{
-       while(*t && Py_TOLOWER(*s) == *t) {
-               s++;
-               t++;
-       }
-       return *t ? 0 : 1;
-}
-
  /* Convert a hexadecimal string to a float. */
  
  static PyObject *
@@ -1180,7 +1166,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
         double x;
         long exp, top_exp, lsb, key_digit;
         char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end;
-       int half_eps, digit, round_up, sign=1;
+       int half_eps, digit, round_up, negate=0;
         Py_ssize_t length, ndigits, fdigits, i;
  
         /*
@@ -1237,33 +1223,24 @@ float_fromhex(PyObject *cls, PyObject *arg)
          * Parse the string *
          ********************/
  
-       /* leading whitespace and optional sign */
+       /* leading whitespace */
         while (Py_ISSPACE(*s))
                 s++;
-       if (*s == '-') {
-               s++;
-               sign = -1;
-       }
-       else if (*s == '+')
-               s++;
  
         /* infinities and nans */
-       if (*s == 'i' || *s == 'I') {
-               if (!case_insensitive_match(s+1, "nf"))
-                       goto parse_error;
-               s += 3;
-               x = Py_HUGE_VAL;
-               if (case_insensitive_match(s, "inity"))
-                       s += 5;
+       x = _Py_parse_inf_or_nan(s, &coeff_end);
+       if (coeff_end != s) {
+               s = coeff_end;
                 goto finished;
         }
-       if (*s == 'n' || *s == 'N') {
-               if (!case_insensitive_match(s+1, "an"))
-                       goto parse_error;
-               s += 3;
-               x = Py_NAN;
-               goto finished;
+
+       /* optional sign */
+       if (*s == '-') {
+               s++;
+               negate = 1;
         }
+       else if (*s == '+')
+               s++;
  
         /* [0x] */
         s_store = s;
@@ -1400,7 +1377,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
                 s++;
         if (s != s_end)
                 goto parse_error;
-       result_as_float = Py_BuildValue("(d)", sign * x);
+       result_as_float = Py_BuildValue("(d)", negate ? -x : x);
         if (result_as_float == NULL)
                 return NULL;
         result = PyObject_CallObject(cls, result_as_float);
diff --git a/Python/dtoa.c b/Python/dtoa.c

index 82434bccc2ff42bf335c352493ed96b8f8d0e8f8..1cac9417487492a7ee18d3f66defca6895b51be0 100644 (file)
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@@ -264,15 +264,6 @@ extern int strtod_diglim;
  #define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1))
  #define Big1 0xffffffff
  
-#ifndef NAN_WORD0
-#define NAN_WORD0 0x7ff80000
-#endif
-
-#ifndef NAN_WORD1
-#define NAN_WORD1 0
-#endif
-
-
  /* struct BCinfo is used to pass information from _Py_dg_strtod to bigcomp */
  
  typedef struct BCinfo BCinfo;
@@ -1026,25 +1017,6 @@ static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
  #define Scale_Bit 0x10
  #define n_bigtens 5
  
-/* case insensitive string match, for recognising 'inf[inity]' and
-   'nan' strings. */
-
-static int
-match(const char **sp, char *t)
-{
-    int c, d;
-    const char *s = *sp;
-
-    while((d = *t++)) {
-        if ((c = *++s) >= 'A' && c <= 'Z')
-            c += 'a' - 'A';
-        if (c != d)
-            return 0;
-    }
-    *sp = s + 1;
-    return 1;
-}
-
  #define ULbits 32
  #define kshift 5
  #define kmask 31
@@ -1459,28 +1431,6 @@ _Py_dg_strtod(const char *s00, char **se)
      }
      if (!nd) {
          if (!nz && !nz0) {
-            /* Check for Nan and Infinity */
-            if (!bc.dplen)
-                switch(c) {
-                case 'i':
-                case 'I':
-                    if (match(&s,"nf")) {
-                        --s;
-                        if (!match(&s,"inity"))
-                            ++s;
-                        word0(&rv) = 0x7ff00000;
-                        word1(&rv) = 0;
-                        goto ret;
-                    }
-                    break;
-                case 'n':
-                case 'N':
-                    if (match(&s, "an")) {
-                        word0(&rv) = NAN_WORD0;
-                        word1(&rv) = NAN_WORD1;
-                        goto ret;
-                    }
-                }
            ret0:
              s = s00;
              sign = 0;
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index d36f9310882808a50aee44116365f8c0c1f0009a..95c0ff63760517f52c98487cc5f245bef38a6fa7 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -3,6 +3,57 @@
  #include <Python.h>
  #include <locale.h>
  
+/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
+   "infinity", with an optional leading sign of "+" or "-".  On success,
+   return the NaN or Infinity as a double and set *endptr to point just beyond
+   the successfully parsed portion of the string.  On failure, return -1.0 and
+   set *endptr to point to the start of the string. */
+
+static int
+case_insensitive_match(const char *s, const char *t)
+{
+       while(*t && Py_TOLOWER(*s) == *t) {
+               s++;
+               t++;
+       }
+       return *t ? 0 : 1;
+}
+
+double
+_Py_parse_inf_or_nan(const char *p, char **endptr)
+{
+       double retval;
+       const char *s;
+       int negate = 0;
+
+       s = p;
+       if (*s == '-') {
+               negate = 1;
+               s++;
+       }
+       else if (*s == '+') {
+               s++;
+       }
+       if (case_insensitive_match(s, "inf")) {
+               s += 3;
+               if (case_insensitive_match(s, "inity"))
+                       s += 5;
+               retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
+       }
+#ifdef Py_NAN
+       else if (case_insensitive_match(s, "nan")) {
+               s += 3;
+               retval = negate ? -Py_NAN : Py_NAN;
+       }
+#endif
+       else {
+               s = p;
+               retval = -1.0;
+       }
+       *endptr = (char *)s;
+       return retval;
+}
+
  /**
   * PyOS_ascii_strtod:
   * @nptr:    the string to convert to a numeric value.
@@ -49,6 +100,10 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
         result = _Py_dg_strtod(nptr, endptr);
         _Py_SET_53BIT_PRECISION_END;
  
+       if (*endptr == nptr)
+               /* string might represent and inf or nan */
+               result = _Py_parse_inf_or_nan(nptr, endptr);
+
         return result;
  
  }
@@ -63,19 +118,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
     correctly rounded results.
  */
  
-/* Case-insensitive string match used for nan and inf detection; t should be
-   lower-case.  Returns 1 for a successful match, 0 otherwise. */
-
-static int
-case_insensitive_match(const char *s, const char *t)
-{
-       while(*t && Py_TOLOWER(*s) == *t) {
-               s++;
-               t++;
-       }
-       return *t ? 0 : 1;
-}
-
  double
  _PyOS_ascii_strtod(const char *nptr, char **endptr)
  {
@@ -101,6 +143,11 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
  
         decimal_point_pos = NULL;
  
+       /* Parse infinities and nans */
+       val = _Py_parse_inf_or_nan(nptr, endptr);
+       if (*endptr != nptr)
+               return val;
+
         /* Set errno to zero, so that we can distinguish zero results
            and underflows */
         errno = 0;
@@ -118,31 +165,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
                 p++;
         }
  
-       /* Parse infinities and nans */
-       if (*p == 'i' || *p == 'I') {
-               if (case_insensitive_match(p+1, "nf")) {
-                       val = Py_HUGE_VAL;
-                       if (case_insensitive_match(p+3, "inity"))
-                               fail_pos = (char *)p+8;
-                       else
-                               fail_pos = (char *)p+3;
-                       goto got_val;
-               }
-               else
-                       goto invalid_string;
-       }
-#ifdef Py_NAN
-       if (*p == 'n' || *p == 'N') {
-               if (case_insensitive_match(p+1, "an")) {
-                       val = Py_NAN;
-                       fail_pos = (char *)p+3;
-                       goto got_val;
-               }
-               else
-                       goto invalid_string;
-       }
-#endif
-
         /* Some platform strtods accept hex floats; Python shouldn't (at the
            moment), so we check explicitly for strings starting with '0x'. */
         if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
@@ -231,7 +253,6 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
         if (fail_pos == digits_pos)
                 goto invalid_string;
  
-  got_val:
         if (negate && fail_pos != nptr)
                 val = -val;
         *endptr = fail_pos;
author	Mark Dickinson <dickinsm@gmail.com>
	Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Wed, 20 May 2009 22:05:25 +0000 (22:05 +0000)
Include/pystrtod.h		patch \| blob \| history
Lib/test/test_float.py		patch \| blob \| history
Objects/floatobject.c		patch \| blob \| history
Python/dtoa.c		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history