Add PyUnicode_FromString(), which create a unicode object from a

author Walter Dörwald <walter@livinglogic.de>

Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)

committer Walter Dörwald <walter@livinglogic.de>

Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)
author Walter Dörwald <walter@livinglogic.de>
Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)
committer Walter Dörwald <walter@livinglogic.de>
Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex

index cdf6856a3542c35c2006a83da33fb0b6941c8516..e1ab3ec6f3fb21659f0dbabf8f8962f69a10ae5d 100644 (file)
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -995,6 +995,17 @@ use these APIs:
    \var{u} is \NULL{}.
  \end{cfuncdesc}
  
+\begin{cfuncdesc}{PyObject*}{PyUnicode_FromString}{const char *u}
+  Create a Unicode Object from the char buffer \var{u} of the.
+  \var{u} must be 0-terminated, the bytes will be interpreted as
+  being latin-1 encoded. \var{u} may also be \NULL{} which causes the
+  contents to be undefined. It is the user's responsibility to fill
+  in the needed data.  The buffer is copied into the new object.
+  If the buffer is not \NULL{}, the return value might be a shared object.
+  Therefore, modification of the resulting Unicode object is only allowed
+  when \var{u} is \NULL{}.
+\end{cfuncdesc}
+
  \begin{cfuncdesc}{Py_UNICODE*}{PyUnicode_AsUnicode}{PyObject *unicode}
    Return a read-only pointer to the Unicode object's internal
    \ctype{Py_UNICODE} buffer, \NULL{} if \var{unicode} is not a Unicode
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index c12cb96af2c7f58288d8fd94f32d7c6c23a16006..9d0cabf6d3fc513ec9ff8e3827a7917429a92127 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -172,6 +172,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
  # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
  # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
  # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
+# define PyUnicode_FromString PyUnicodeUCS2_FromString
  # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
  # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
  # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
@@ -250,6 +251,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
  # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
  # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
  # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
+# define PyUnicode_FromString PyUnicodeUCS4_FromString
  # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
  # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
  # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
@@ -427,6 +429,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
      Py_ssize_t size             /* size of buffer */
      );
  
+/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
+   Latin-1 encoded bytes */
+PyAPI_FUNC(PyObject*) PyUnicode_FromString(
+    const char *u        /* string */
+    );
+
  /* Return a read-only pointer to the Unicode object's internal
     Py_UNICODE buffer. */
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 45c52cc77c1649a8159cd3596cd759c3e254faa2..c9a922dd80c82663fb956ef3e69b35c89178484e 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -393,6 +393,51 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
      return (PyObject *)unicode;
  }
  
+PyObject *PyUnicode_FromString(const char *u)
+{
+    PyUnicodeObject *unicode;
+    Py_ssize_t size = strlen(u);
+
+    /* If the Unicode data is known at construction time, we can apply
+       some optimizations which share commonly used objects. */
+    if (u != NULL) {
+
+       /* Optimization for empty strings */
+       if (size == 0 && unicode_empty != NULL) {
+           Py_INCREF(unicode_empty);
+           return (PyObject *)unicode_empty;
+       }
+
+       /* Single character Unicode objects in the Latin-1 range are
+          shared when using this constructor */
+       if (size == 1 && *u < 256) {
+           unicode = unicode_latin1[*u];
+           if (!unicode) {
+               unicode = _PyUnicode_New(1);
+               if (!unicode)
+                   return NULL;
+               unicode->str[0] = *u;
+               unicode_latin1[*u] = unicode;
+           }
+           Py_INCREF(unicode);
+           return (PyObject *)unicode;
+       }
+    }
+
+    unicode = _PyUnicode_New(size);
+    if (!unicode)
+        return NULL;
+
+    /* Copy the Unicode data into the new object */
+    if (u != NULL) {
+        char *p = unicode->str;
+        while (*p++ = *u++)
+            ;
+    }
+
+    return (PyObject *)unicode;
+}
+
  #ifdef HAVE_WCHAR_H
  
  PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
author	Walter Dörwald <walter@livinglogic.de>
	Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)
committer	Walter Dörwald <walter@livinglogic.de>
	Sat, 5 May 2007 12:00:46 +0000 (12:00 +0000)
Doc/api/concrete.tex		patch \| blob \| history
Include/unicodeobject.h		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history