From 137c34c0274954b2cdd1fd5b490c654528ff30f7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 29 Sep 2010 10:25:54 +0000 Subject: [PATCH] Issue #9979: Create function PyUnicode_AsWideCharString(). --- Doc/c-api/unicode.rst | 13 +++++++++ Include/unicodeobject.h | 19 +++++++++++-- Misc/NEWS | 2 ++ Objects/unicodeobject.c | 62 +++++++++++++++++++++++++++++++---------- 4 files changed, 80 insertions(+), 16 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 2e274ad636..50a1254192 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -462,6 +462,19 @@ wchar_t support for platforms which support it: required by the application. +.. cfunction:: wchar_t* PyUnicode_AsWideCharString(PyUnicodeObject *unicode, Py_ssize_t *size) + + Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If *size* is not *NULL*, write the number + of wide characters (including the nul character) into *\*size*. + + Returns a buffer allocated by :cfunc:`PyMem_Alloc` (use :cfunc:`PyMem_Free` + to free it) on success. On error, returns *NULL*, *\*size* is undefined and + raises a :exc:`MemoryError`. + + .. versionadded:: 3.2 + + .. _builtincodecs: Built-in Codecs diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 111d7e230e..f206895a10 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -99,8 +99,8 @@ Copyright (c) Corporation for National Research Initiatives. #endif /* If the compiler provides a wchar_t type we try to support it - through the interface functions PyUnicode_FromWideChar() and - PyUnicode_AsWideChar(). */ + through the interface functions PyUnicode_FromWideChar(), + PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ #ifdef HAVE_USABLE_WCHAR_T # ifndef HAVE_WCHAR_H @@ -156,6 +156,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS2_Compare # define PyUnicode_CompareWithASCII PyUnicodeUCS2_CompareASCII @@ -239,6 +240,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS4_Compare # define PyUnicode_CompareWithASCII PyUnicodeUCS4_CompareWithASCII @@ -570,6 +572,19 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( Py_ssize_t size /* size of buffer */ ); +/* Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If size is not NULL, write the number of + wide characters (including the nul character) into *size. + + Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it) + on success. On error, returns NULL, *size is undefined and raises a + MemoryError. */ + +PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( + PyUnicodeObject *unicode, /* Unicode object */ + Py_ssize_t *size /* number of characters of the result */ + ); + #endif /* --- Unicode ordinals --------------------------------------------------- */ diff --git a/Misc/NEWS b/Misc/NEWS index 03ca4a5c60..f774926069 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 3.2 Alpha 3? Core and Builtins ----------------- +- Issue #9979: Create function PyUnicode_AsWideCharString(). + - Issue #7397: Mention that importlib.import_module() is probably what someone really wants to be using in __import__'s docstring. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 489c98cd83..527e219896 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1153,9 +1153,26 @@ PyUnicode_FromFormat(const char *format, ...) return ret; } -Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, - wchar_t *w, - Py_ssize_t size) +static void +unicode_aswidechar(PyUnicodeObject *unicode, + wchar_t *w, + Py_ssize_t size) +{ +#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T + memcpy(w, unicode->str, size * sizeof(wchar_t)); +#else + register Py_UNICODE *u; + register Py_ssize_t i; + u = PyUnicode_AS_UNICODE(unicode); + for (i = size; i > 0; i--) + *w++ = *u++; +#endif +} + +Py_ssize_t +PyUnicode_AsWideChar(PyUnicodeObject *unicode, + wchar_t *w, + Py_ssize_t size) { if (unicode == NULL) { PyErr_BadInternalCall(); @@ -1166,17 +1183,7 @@ Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, if (size > PyUnicode_GET_SIZE(unicode)) size = PyUnicode_GET_SIZE(unicode) + 1; -#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T - memcpy(w, unicode->str, size * sizeof(wchar_t)); -#else - { - register Py_UNICODE *u; - register Py_ssize_t i; - u = PyUnicode_AS_UNICODE(unicode); - for (i = size; i > 0; i--) - *w++ = *u++; - } -#endif + unicode_aswidechar(unicode, w, size); if (size > PyUnicode_GET_SIZE(unicode)) return PyUnicode_GET_SIZE(unicode); @@ -1184,6 +1191,33 @@ Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, return size; } +wchar_t* +PyUnicode_AsWideCharString(PyUnicodeObject *unicode, + Py_ssize_t *size) +{ + wchar_t* buffer; + Py_ssize_t buflen; + + if (unicode == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + if ((PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) < PyUnicode_GET_SIZE(unicode)) { + PyErr_NoMemory(); + return NULL; + } + + buflen = PyUnicode_GET_SIZE(unicode) + 1; /* copy L'\0' */ + buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + unicode_aswidechar(unicode, buffer, buflen); + return buffer; +} + #endif PyObject *PyUnicode_FromOrdinal(int ordinal) -- 2.40.0