From 138313ebaa985030d75d429c3b3cb7138e62b10f Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 5 Jul 2012 22:16:29 +0300 Subject: [PATCH] Fix mapping of PostgreSQL encodings to Python encodings. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Windows encodings, "win1252" and so forth, are named differently in Python, like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails for some reason, use a plain ereport(), not a PLy_elog(), to report that error. That avoids recursion and crash, if PLy_elog() tries to call PLyUnicode_Bytes() again. This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that plpython didn't even try these conversions. Jan Urbański, with minor comment improvements by me. --- src/pl/plpython/plpython.c | 69 ++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c index 95efd0b42e..2f67caba6d 100644 --- a/src/pl/plpython/plpython.c +++ b/src/pl/plpython/plpython.c @@ -4873,16 +4873,71 @@ PLyUnicode_Bytes(PyObject *unicode) const char *serverenc; /* - * Python understands almost all PostgreSQL encoding names, but it doesn't - * know SQL_ASCII. + * Map PostgreSQL encoding to a Python encoding name. */ - if (GetDatabaseEncoding() == PG_SQL_ASCII) - serverenc = "ascii"; - else - serverenc = GetDatabaseEncodingName(); + switch (GetDatabaseEncoding()) + { + case PG_SQL_ASCII: + /* + * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's + * 'ascii' means true 7-bit only ASCII, while PostgreSQL's + * SQL_ASCII means that anything is allowed, and the system doesn't + * try to interpret the bytes in any way. But not sure what else + * to do, and we haven't heard any complaints... + */ + serverenc = "ascii"; + break; + case PG_WIN1250: + serverenc = "cp1250"; + break; + case PG_WIN1251: + serverenc = "cp1251"; + break; + case PG_WIN1252: + serverenc = "cp1252"; + break; + case PG_WIN1253: + serverenc = "cp1253"; + break; + case PG_WIN1254: + serverenc = "cp1254"; + break; + case PG_WIN1255: + serverenc = "cp1255"; + break; + case PG_WIN1256: + serverenc = "cp1256"; + break; + case PG_WIN1257: + serverenc = "cp1257"; + break; + case PG_WIN1258: + serverenc = "cp1258"; + break; + case PG_WIN866: + serverenc = "cp866"; + break; + case PG_WIN874: + serverenc = "cp874"; + break; + default: + /* Other encodings have the same name in Python. */ + serverenc = GetDatabaseEncodingName(); + break; + } + rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict"); if (rv == NULL) - PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding"); + { + /* + * Use a plain ereport instead of PLy_elog to avoid recursion, if + * the traceback formatting functions try to do unicode to bytes + * conversion again. + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not convert Python Unicode object to PostgreSQL server encoding"))); + } return rv; } -- 2.40.0