Add C API PyUnicode_FromOrdinal() which exposes unichr() at C level.

author Marc-André Lemburg <mal@egenix.com>

Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)

committer Marc-André Lemburg <mal@egenix.com>

Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)
author Marc-André Lemburg <mal@egenix.com>
Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)
committer Marc-André Lemburg <mal@egenix.com>
Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index d0a2885ccfe64bbd47606e670529eb60dcd0b1fc..2a92fe52acac02366aa319628bd7231e5dbc61d9 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -517,6 +517,18 @@ extern DL_IMPORT(int) PyUnicode_AsWideChar(
  
  #endif
  
+/* --- Unicode ordinals --------------------------------------------------- */
+
+/* Create a Unicode Object from the given Unicode code point ordinal. 
+ 
+   The ordinal must be in range(0x10000) on narrow Python builds
+   (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
+   raised in case it is not.
+
+*/
+
+extern DL_IMPORT(PyObject*) PyUnicode_FromOrdinal(int ordinal);
+
  /* === Builtin Codecs ===================================================== 
  
     Many of these APIs take two arguments encoding and errors. These
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index a915b2e35862210da8281a74f4b2822acbed60de..f5f4245ca21a336821a79fe588a11fc728db52ed 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -453,6 +453,14 @@ except KeyError:
  else:
      verify(value == u'abc, def')
  
+for ordinal in (-100, 0x20000):
+    try:
+        u"%c" % ordinal
+    except ValueError:
+        pass
+    else:
+        print '*** formatting u"%%c" % %i should give a ValueError' % ordinal
+
  # formatting jobs delegated from the string implementation:
  verify('...%(foo)s...' % {'foo':u"abc"} == u'...abc...')
  verify('...%(foo)s...' % {'foo':"abc"} == '...abc...')
diff --git a/Misc/NEWS b/Misc/NEWS

index e12120dbc6caba546e519fc8aa417debe2e16514..a454e1840a16bbd11ba5a077ace389006b2fc28c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -6,6 +6,9 @@ Type/class unification and new-style classes
  
  Core and builtins
  
+- u'%c' will now raise a ValueError in case the argument is an
+  integer outside the valid range of Unicode code point ordinals.
+
  - The tempfile module has been overhauled for enhanced security.  The
    mktemp() function is now deprecated; new, safe replacements are
    mkstemp() (for files) and mkdtemp() (for directories), and the
@@ -437,6 +440,9 @@ Build
  
  C API
  
+- New C API PyUnicode_FromOrdinal() which exposes unichr() at C
+  level.
+
  - New functions PyErr_SetExcFromWindowsErr() and
    PyErr_SetExcFromWindowsErrWithFilename(). Similar to
    PyErr_SetFromWindowsErrWithFilename() and
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 145186e0dffb2093ca1ac5f9280e938383e67cca..d0fe24c9dc192bd2d41e66f10815b1766ebd00c7 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -390,6 +390,45 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
  
  #endif
  
+PyObject *PyUnicode_FromOrdinal(int ordinal)
+{
+    Py_UNICODE s[2];
+
+#ifdef Py_UNICODE_WIDE
+    if (ordinal < 0 || ordinal > 0x10ffff) {
+       PyErr_SetString(PyExc_ValueError,
+                       "unichr() arg not in range(0x110000) "
+                       "(wide Python build)");
+       return NULL;
+    }
+#else
+    if (ordinal < 0 || ordinal > 0xffff) {
+       PyErr_SetString(PyExc_ValueError,
+                       "unichr() arg not in range(0x10000) "
+                       "(narrow Python build)");
+       return NULL;
+    }
+#endif
+
+    if (ordinal <= 0xffff) {
+       /* UCS-2 character */
+       s[0] = (Py_UNICODE) ordinal;
+       return PyUnicode_FromUnicode(s, 1);
+    }
+    else {
+#ifndef Py_UNICODE_WIDE
+       /* UCS-4 character.  store as two surrogate characters */
+       ordinal -= 0x10000L;
+       s[0] = 0xD800 + (Py_UNICODE) (ordinal >> 10);
+       s[1] = 0xDC00 + (Py_UNICODE) (ordinal & 0x03FF);
+       return PyUnicode_FromUnicode(s, 2);
+#else
+       s[0] = (Py_UNICODE)ordinal;
+       return PyUnicode_FromUnicode(s, 1);
+#endif
+    }
+}
+
  PyObject *PyUnicode_FromObject(register PyObject *obj)
  {
      /* XXX Perhaps we should make this API an alias of
@@ -5373,7 +5412,22 @@ formatchar(Py_UNICODE *buf,
         x = PyInt_AsLong(v);
         if (x == -1 && PyErr_Occurred())
             goto onError;
-       buf[0] = (char) x;
+#ifdef Py_UNICODE_WIDE
+       if (x < 0 || x > 0x10ffff) {
+           PyErr_SetString(PyExc_ValueError,
+                           "%c arg not in range(0x110000) "
+                           "(wide Python build)");
+           return -1;
+       }
+#else
+       if (x < 0 || x > 0xffff) {
+           PyErr_SetString(PyExc_ValueError,
+                           "%c arg not in range(0x10000) "
+                           "(narrow Python build)");
+           return -1;
+       }
+#endif
+       buf[0] = (Py_UNICODE) x;
      }
      buf[1] = '\0';
      return 1;
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c

index fec7554f32c7b981c8fa74fc114e4f36f48a2fac..7a53065a9d1880f0aea5090e7985b4292d68fd3f 100644 (file)
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -260,44 +260,11 @@ static PyObject *
  builtin_unichr(PyObject *self, PyObject *args)
  {
         long x;
-       Py_UNICODE s[2];
  
         if (!PyArg_ParseTuple(args, "l:unichr", &x))
                 return NULL;
  
-#ifdef Py_UNICODE_WIDE
-       if (x < 0 || x > 0x10ffff) {
-               PyErr_SetString(PyExc_ValueError,
-                               "unichr() arg not in range(0x110000) "
-                               "(wide Python build)");
-               return NULL;
-       }
-#else
-       if (x < 0 || x > 0xffff) {
-               PyErr_SetString(PyExc_ValueError,
-                               "unichr() arg not in range(0x10000) "
-                               "(narrow Python build)");
-               return NULL;
-       }
-#endif
-
-       if (x <= 0xffff) {
-               /* UCS-2 character */
-               s[0] = (Py_UNICODE) x;
-               return PyUnicode_FromUnicode(s, 1);
-       }
-       else {
-#ifndef Py_UNICODE_WIDE
-               /* UCS-4 character.  store as two surrogate characters */
-               x -= 0x10000L;
-               s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
-               s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
-               return PyUnicode_FromUnicode(s, 2);
-#else
-               s[0] = (Py_UNICODE)x;
-               return PyUnicode_FromUnicode(s, 1);
-#endif
-       }
+       return PyUnicode_FromOrdinal(x);
  }
  
  PyDoc_STRVAR(unichr_doc,
author	Marc-André Lemburg <mal@egenix.com>
	Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)
committer	Marc-André Lemburg <mal@egenix.com>
	Sun, 11 Aug 2002 12:23:04 +0000 (12:23 +0000)
Include/unicodeobject.h		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Python/bltinmodule.c		patch \| blob \| history