From 90d1fcd1011b7b5633eaeeec9c59779bc357c5b4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Fri, 31 Aug 2007 11:01:23 +0000 Subject: [PATCH] Change %s argument for PyUnicode_FromFormat to be UTF-8. Fixes #1070. --- Objects/unicodeobject.c | 48 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f9d3068edf..e9ce08c2e2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -621,8 +621,39 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) abuffersize = width; break; case 's': - n += strlen(va_arg(count, char*)); + { + /* UTF-8 */ + unsigned char*s; + s = va_arg(count, unsigned char*); + while (*s) { + if (*s < 128) { + n++; s++; + } else if (*s < 0xc0) { + /* invalid UTF-8 */ + n++; s++; + } else if (*s < 0xc0) { + n++; + s++; if(!*s)break; + s++; + } else if (*s < 0xe0) { + n++; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } else { + #ifdef Py_UNICODE_WIDE + n++; + #else + n+=2; + #endif + s++; if(!*s)break; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } + } break; + } case 'U': { PyObject *obj = va_arg(count, PyObject *); @@ -775,9 +806,22 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) appendstring(realbuffer); break; case 's': + { + /* Parameter must be UTF-8 encoded. + In case of encoding errors, use + the replacement character. */ + PyObject *u; p = va_arg(vargs, char*); - appendstring(p); + u = PyUnicode_DecodeUTF8(p, strlen(p), + "replace"); + if (!u) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), + PyUnicode_GET_SIZE(u)); + s += PyUnicode_GET_SIZE(u); + Py_DECREF(u); break; + } case 'U': { PyObject *obj = va_arg(vargs, PyObject *); -- 2.50.0