Issue #10557: Fixed error messages from float() and other numeric

author Alexander Belopolsky <alexander.belopolsky@gmail.com>

Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)

committer Alexander Belopolsky <alexander.belopolsky@gmail.com>

Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)
author Alexander Belopolsky <alexander.belopolsky@gmail.com>
Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)
committer Alexander Belopolsky <alexander.belopolsky@gmail.com>
Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst

index 45304222e9eca4b6ae741e334fef5b196d125452..9edbcbb8c7cb3b4baf8cedef8844f367a2765874 100644 (file)
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -328,6 +328,13 @@ APIs:
     Identical to :c:func:`PyUnicode_FromFormat` except that it takes exactly two
     arguments.
  
+.. c:function:: PyObject* PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, Py_ssize_t size)
+
+   Create a Unicode object by replacing all decimal digits in
+   :c:type:`Py_UNICODE` buffer of the given size by ASCII digits 0--9
+   according to their decimal value.  Return *NULL* if an exception
+   occurs.
+
  
  .. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode)
  
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index 116bb8258faa0f501bae9a252dba8272962de796..abd286db4f79e011d93f11fdb27128feafc9db5c 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1225,6 +1225,17 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
      );
  #endif
  
+/* Transforms code points that have decimal digit property to the
+   corresponding ASCII digit code points.
+
+   Returns a new Unicode string on success, NULL on failure.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
+    Py_UNICODE *s,              /* Unicode buffer */
+    Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
+    );
+
  /* --- File system encoding ---------------------------------------------- */
  
  /* ParseTuple converter: encode str objects to bytes using
diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py

index cc21aa7a622140ba9e2cbe96e01e072b66e93b89..2352ef1e2c7519ec2154141866acd80a8157c0b1 100644 (file)
--- a/Lib/test/test_complex.py
+++ b/Lib/test/test_complex.py
@@ -220,6 +220,7 @@ class ComplexTest(unittest.TestCase):
          self.assertEqual(complex(NS(1+10j)), 1+10j)
          self.assertRaises(TypeError, complex, OS(None))
          self.assertRaises(TypeError, complex, NS(None))
+        self.assertRaises(TypeError, complex, {})
  
          self.assertAlmostEqual(complex("1+10j"), 1+10j)
          self.assertAlmostEqual(complex(10), 10+0j)
@@ -325,6 +326,8 @@ class ComplexTest(unittest.TestCase):
  
          # check that complex accepts long unicode strings
          self.assertEqual(type(complex("1"*500)), complex)
+        # check whitespace processing
+        self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j)
  
          class EvilExc(Exception):
              pass
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py

index 0072133aae3bdfaea57333fa52c44710df79a37c..9bcd63dd2302cb84ef703cc28bbd51c09824072c 100644 (file)
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -43,14 +43,30 @@ class GeneralFloatCases(unittest.TestCase):
          self.assertRaises(ValueError, float, "+.inf")
          self.assertRaises(ValueError, float, ".")
          self.assertRaises(ValueError, float, "-.")
+        self.assertRaises(ValueError, float, b"-")
+        self.assertRaises(TypeError, float, {})
+        # Lone surrogate
+        self.assertRaises(UnicodeEncodeError, float, '\uD8F0')
          # check that we don't accept alternate exponent markers
          self.assertRaises(ValueError, float, "-1.7d29")
          self.assertRaises(ValueError, float, "3D-14")
-        self.assertEqual(float(b"  \u0663.\u0661\u0664  ".decode('raw-unicode-escape')), 3.14)
+        self.assertEqual(float("  \u0663.\u0661\u0664  "), 3.14)
+        self.assertEqual(float("\N{EM SPACE}3.14\N{EN SPACE}"), 3.14)
          # extra long strings should not be a problem
          float(b'.' + b'1'*1000)
          float('.' + '1'*1000)
  
+    def test_error_message(self):
+        testlist = ('\xbd', '123\xbd', '  123 456  ')
+        for s in testlist:
+            try:
+                float(s)
+            except ValueError as e:
+                self.assertIn(s.strip(), e.args[0])
+            else:
+                self.fail("Expected int(%r) to raise a ValueError", s)
+
+
      @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE')
      def test_float_with_comma(self):
          # set locale to something that doesn't use '.' for the decimal point
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py

index 86c4dd791561796e9497069f5ffceedfba4d64a3..437e323cbccf8d0ddbe27fc3df6e8e3edd83aa6b 100644 (file)
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -20,7 +20,8 @@ L = [
          ('  1\02  ', ValueError),
          ('', ValueError),
          (' ', ValueError),
-        ('  \t\t  ', ValueError)
+        ('  \t\t  ', ValueError),
+        ("\u0200", ValueError)
  ]
  
  class IntTestCases(unittest.TestCase):
@@ -35,6 +36,8 @@ class IntTestCases(unittest.TestCase):
          self.assertEqual(int(3.5), 3)
          self.assertEqual(int(-3.5), -3)
          self.assertEqual(int("-3"), -3)
+        self.assertEqual(int(" -3 "), -3)
+        self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
          # Different base:
          self.assertEqual(int("10",16), 16)
          # Test conversion from strings and various anomalies
@@ -302,6 +305,16 @@ class IntTestCases(unittest.TestCase):
                      self.fail("Failed to raise TypeError with %s" %
                                ((base, trunc_result_base),))
  
+    def test_error_message(self):
+        testlist = ('\xbd', '123\xbd', '  123 456  ')
+        for s in testlist:
+            try:
+                int(s)
+            except ValueError as e:
+                self.assertIn(s.strip(), e.args[0])
+            else:
+                self.fail("Expected int(%r) to raise a ValueError", s)
+
  def test_main():
      run_unittest(IntTestCases)
  
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index c5a0f803a0c786382abd2212be23655bc53982f4..2de9e7f1c5e935aa942a53f1e1ee579632d681df 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1168,8 +1168,13 @@ class UnicodeTest(string_tests.CommonTest,
          # Error handling (wrong arguments)
          self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
  
-        # Error handling (PyUnicode_EncodeDecimal())
-        self.assertRaises(UnicodeError, int, "\u0200")
+        # Error handling (lone surrogate in PyUnicode_TransformDecimalToASCII())
+        self.assertRaises(UnicodeError, int, "\ud800")
+        self.assertRaises(UnicodeError, int, "\udf00")
+        self.assertRaises(UnicodeError, float, "\ud800")
+        self.assertRaises(UnicodeError, float, "\udf00")
+        self.assertRaises(UnicodeError, complex, "\ud800")
+        self.assertRaises(UnicodeError, complex, "\udf00")
  
      def test_codecs(self):
          # Encoding
diff --git a/Misc/NEWS b/Misc/NEWS

index 59946bdc289b7bf62de95299fe8f950fc38d295c..f53a48689291cf0c424e2c089428efe0d9651a86 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -222,6 +222,10 @@ Library
  C-API
  -----
  
+- Issue #10557: Added a new API function, PyUnicode_TransformDecimalToASCII(),
+  which transforms non-ASCII decimal digits in a Unicode string to their
+  ASCII equivalents. 
+
  - Issue #9518: Extend the PyModuleDef_HEAD_INIT macro to explicitly
    zero-initialize all fields, fixing compiler warnings seen when building
    extension modules with gcc with "-Wmissing-field-initializers" (implied by
diff --git a/Objects/complexobject.c b/Objects/complexobject.c

index 59997962cef123a194e97eab12c54762a71da301..ec529d5bbf950be8d51b3f25bb8552a1b7933459 100644 (file)
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -766,20 +766,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
      char *end;
      double x=0.0, y=0.0, z;
      int got_bracket=0;
-    char *s_buffer = NULL;
+    PyObject *s_buffer = NULL;
      Py_ssize_t len;
  
      if (PyUnicode_Check(v)) {
-        s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1);
+        Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
+        Py_UNICODE *bufptr;
+        s_buffer = PyUnicode_TransformDecimalToASCII(
+            PyUnicode_AS_UNICODE(v), buflen);
          if (s_buffer == NULL)
-            return PyErr_NoMemory();
-        if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
-                                    PyUnicode_GET_SIZE(v),
-                                    s_buffer,
-                                    NULL))
+            return NULL;
+        /* Replace non-ASCII whitespace with ' ' */
+        bufptr = PyUnicode_AS_UNICODE(s_buffer);
+        for (i = 0; i < buflen; i++) {
+            Py_UNICODE ch = bufptr[i];
+            if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+                bufptr[i] = ' ';
+        }
+        s = _PyUnicode_AsStringAndSize(s_buffer, &len);
+        if (s == NULL)
              goto error;
-        s = s_buffer;
-        len = strlen(s);
      }
      else if (PyObject_AsCharBuffer(v, &s, &len)) {
          PyErr_SetString(PyExc_TypeError,
@@ -894,16 +900,14 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
      if (s-start != len)
          goto parse_error;
  
-    if (s_buffer)
-        PyMem_FREE(s_buffer);
+    Py_XDECREF(s_buffer);
      return complex_subtype_from_doubles(type, x, y);
  
    parse_error:
      PyErr_SetString(PyExc_ValueError,
                      "complex() arg is a malformed string");
    error:
-    if (s_buffer)
-        PyMem_FREE(s_buffer);
+    Py_XDECREF(s_buffer);
      return NULL;
  }
  
diff --git a/Objects/floatobject.c b/Objects/floatobject.c

index 4decb0b6278d70d67c893829ee4937707b7343f1..8409f0a13aae5e73ba0fcd7675ec9ae723cbe798 100644 (file)
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -174,22 +174,30 @@ PyFloat_FromString(PyObject *v)
  {
      const char *s, *last, *end;
      double x;
-    char buffer[256]; /* for errors */
-    char *s_buffer = NULL;
+    PyObject *s_buffer = NULL;
      Py_ssize_t len;
      PyObject *result = NULL;
  
      if (PyUnicode_Check(v)) {
-        s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1);
+        Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
+        Py_UNICODE *bufptr;
+        s_buffer = PyUnicode_TransformDecimalToASCII(
+            PyUnicode_AS_UNICODE(v), buflen);
          if (s_buffer == NULL)
-            return PyErr_NoMemory();
-        if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
-                                    PyUnicode_GET_SIZE(v),
-                                    s_buffer,
-                                    NULL))
-            goto error;
-        s = s_buffer;
-        len = strlen(s);
+            return NULL;
+        /* Replace non-ASCII whitespace with ' ' */
+        bufptr = PyUnicode_AS_UNICODE(s_buffer);
+        for (i = 0; i < buflen; i++) {
+            Py_UNICODE ch = bufptr[i];
+            if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+                bufptr[i] = ' ';
+        }
+        s = _PyUnicode_AsStringAndSize(s_buffer, &len);
+        if (s == NULL) {
+            Py_DECREF(s_buffer);
+            return NULL;
+        }
+        last = s + len;
      }
      else if (PyObject_AsCharBuffer(v, &s, &len)) {
          PyErr_SetString(PyExc_TypeError,
@@ -197,29 +205,27 @@ PyFloat_FromString(PyObject *v)
          return NULL;
      }
      last = s + len;
-
-    while (Py_ISSPACE(*s))
+    /* strip space */
+    while (s < last && Py_ISSPACE(*s))
          s++;
+    while (s < last - 1 && Py_ISSPACE(last[-1]))
+        last--;
      /* We don't care about overflow or underflow.  If the platform
       * supports them, infinities and signed zeroes (on underflow) are
       * fine. */
      x = PyOS_string_to_double(s, (char **)&end, NULL);
-    if (x == -1.0 && PyErr_Occurred())
-        goto error;
-    while (Py_ISSPACE(*end))
-        end++;
-    if (end == last)
-        result = PyFloat_FromDouble(x);
-    else {
-        PyOS_snprintf(buffer, sizeof(buffer),
-                      "invalid literal for float(): %.200s", s);
-        PyErr_SetString(PyExc_ValueError, buffer);
+    if (end != last) {
+        PyErr_Format(PyExc_ValueError,
+                     "could not convert string to float: "
+                     "%R", v);
          result = NULL;
      }
+    else if (x == -1.0 && PyErr_Occurred())
+        result = NULL;
+    else
+        result = PyFloat_FromDouble(x);
  
-  error:
-    if (s_buffer)
-        PyMem_FREE(s_buffer);
+    Py_XDECREF(s_buffer);
      return result;
  }
  
diff --git a/Objects/longobject.c b/Objects/longobject.c

index e8a728489b605621fcae31c090bdf459761c9f36..534e52dfd38370214784c20cc6aa97c5f27e1f55 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -2133,17 +2133,34 @@ PyObject *
  PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
  {
      PyObject *result;
-    char *buffer = (char *)PyMem_MALLOC(length+1);
+    PyObject *asciidig;
+    char *buffer, *end;
+    Py_ssize_t i, buflen;
+    Py_UNICODE *ptr;
  
-    if (buffer == NULL)
+    asciidig = PyUnicode_TransformDecimalToASCII(u, length);
+    if (asciidig == NULL)
          return NULL;
-
-    if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) {
-        PyMem_FREE(buffer);
+    /* Replace non-ASCII whitespace with ' ' */
+    ptr = PyUnicode_AS_UNICODE(asciidig);
+    for (i = 0; i < length; i++) {
+      Py_UNICODE ch = ptr[i];
+      if (ch > 127 && Py_UNICODE_ISSPACE(ch))
+        ptr[i] = ' ';
+    }
+    buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen);
+    if (buffer == NULL) {
+        Py_DECREF(asciidig);
          return NULL;
      }
-    result = PyLong_FromString(buffer, NULL, base);
-    PyMem_FREE(buffer);
+    result = PyLong_FromString(buffer, &end, base);
+    if (result != NULL && end != buffer + buflen) {
+        PyErr_SetString(PyExc_ValueError,
+                        "null byte in argument for int()");
+        Py_DECREF(result);
+        result = NULL;
+    }
+    Py_DECREF(asciidig);
      return result;
  }
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index d3a2d1b715c10d17992ed58ac0d281f8b9fe9445..751da30e42353155c20bed3a3588603c607ba546 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6206,6 +6206,30 @@ PyObject *PyUnicode_Translate(PyObject *str,
      return NULL;
  }
  
+PyObject *
+PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
+                                  Py_ssize_t length)
+{
+    PyObject *result;
+    Py_UNICODE *p; /* write pointer into result */
+    Py_ssize_t i;
+    /* Copy to a new string */
+    result = (PyObject *)_PyUnicode_New(length);
+    Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length);
+    if (result == NULL)
+        return result;
+    p = PyUnicode_AS_UNICODE(result);
+    /* Iterate over code points */
+    for (i = 0; i < length; i++) {
+        Py_UNICODE ch =s[i];
+        if (ch > 127) {
+            int decimal = Py_UNICODE_TODECIMAL(ch);
+            if (decimal >= 0)
+                p[i] = '0' + decimal;
+        }
+    }
+    return result;
+}
  /* --- Decimal Encoder ---------------------------------------------------- */
  
  int PyUnicode_EncodeDecimal(Py_UNICODE *s,
@@ -8967,6 +8991,13 @@ unicode_freelistsize(PyUnicodeObject *self)
  {
      return PyLong_FromLong(numfree);
  }
+
+static PyObject *
+unicode__decimal2ascii(PyObject *self)
+{
+    return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self),
+                                             PyUnicode_GET_SIZE(self));
+}
  #endif
  
  PyDoc_STRVAR(startswith__doc__,
@@ -9108,7 +9139,6 @@ unicode_getnewargs(PyUnicodeObject *v)
      return Py_BuildValue("(u#)", v->str, v->length);
  }
  
-
  static PyMethodDef unicode_methods[] = {
  
      /* Order is according to common usage: often used methods should
@@ -9170,8 +9200,9 @@ static PyMethodDef unicode_methods[] = {
  #endif
  
  #if 0
-    /* This one is just used for debugging the implementation. */
+    /* These methods are just used for debugging the implementation. */
      {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
+    {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
  #endif
  
      {"__getnewargs__",  (PyCFunction)unicode_getnewargs, METH_NOARGS},
author	Alexander Belopolsky <alexander.belopolsky@gmail.com>
	Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)
committer	Alexander Belopolsky <alexander.belopolsky@gmail.com>
	Sat, 4 Dec 2010 03:38:46 +0000 (03:38 +0000)
Doc/c-api/unicode.rst		patch \| blob \| history
Include/unicodeobject.h		patch \| blob \| history
Lib/test/test_complex.py		patch \| blob \| history
Lib/test/test_float.py		patch \| blob \| history
Lib/test/test_int.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/complexobject.c		patch \| blob \| history
Objects/floatobject.c		patch \| blob \| history
Objects/longobject.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history