From: Heikki Linnakangas Date: Thu, 5 Jul 2012 18:45:24 +0000 (+0300) Subject: Fix mapping of PostgreSQL encodings to Python encodings. X-Git-Tag: REL9_3_BETA1~1233 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b66de4c6d7208d9ec420b912758377a3533c7a7d;p=postgresql Fix mapping of PostgreSQL encodings to Python encodings. Windows encodings, "win1252" and so forth, are named differently in Python, like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails for some reason, use a plain ereport(), not a PLy_elog(), to report that error. That avoids recursion and crash, if PLy_elog() tries to call PLyUnicode_Bytes() again. This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that plpython didn't even try these conversions. Jan UrbaƄski, with minor comment improvements by me. --- diff --git a/src/pl/plpython/plpy_util.c b/src/pl/plpython/plpy_util.c index 9a4901ecb2..bf2953226f 100644 --- a/src/pl/plpython/plpy_util.c +++ b/src/pl/plpython/plpy_util.c @@ -65,16 +65,71 @@ PLyUnicode_Bytes(PyObject *unicode) const char *serverenc; /* - * Python understands almost all PostgreSQL encoding names, but it doesn't - * know SQL_ASCII. + * Map PostgreSQL encoding to a Python encoding name. */ - if (GetDatabaseEncoding() == PG_SQL_ASCII) - serverenc = "ascii"; - else - serverenc = GetDatabaseEncodingName(); + switch (GetDatabaseEncoding()) + { + case PG_SQL_ASCII: + /* + * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's + * 'ascii' means true 7-bit only ASCII, while PostgreSQL's + * SQL_ASCII means that anything is allowed, and the system doesn't + * try to interpret the bytes in any way. But not sure what else + * to do, and we haven't heard any complaints... + */ + serverenc = "ascii"; + break; + case PG_WIN1250: + serverenc = "cp1250"; + break; + case PG_WIN1251: + serverenc = "cp1251"; + break; + case PG_WIN1252: + serverenc = "cp1252"; + break; + case PG_WIN1253: + serverenc = "cp1253"; + break; + case PG_WIN1254: + serverenc = "cp1254"; + break; + case PG_WIN1255: + serverenc = "cp1255"; + break; + case PG_WIN1256: + serverenc = "cp1256"; + break; + case PG_WIN1257: + serverenc = "cp1257"; + break; + case PG_WIN1258: + serverenc = "cp1258"; + break; + case PG_WIN866: + serverenc = "cp866"; + break; + case PG_WIN874: + serverenc = "cp874"; + break; + default: + /* Other encodings have the same name in Python. */ + serverenc = GetDatabaseEncodingName(); + break; + } + rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict"); if (rv == NULL) - PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding"); + { + /* + * Use a plain ereport instead of PLy_elog to avoid recursion, if + * the traceback formatting functions try to do unicode to bytes + * conversion again. + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not convert Python Unicode object to PostgreSQL server encoding"))); + } return rv; }