]> granicus.if.org Git - postgresql/commitdiff
Fix mapping of PostgreSQL encodings to Python encodings.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 5 Jul 2012 18:45:24 +0000 (21:45 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 5 Jul 2012 19:31:29 +0000 (22:31 +0300)
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.

This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.

Jan UrbaƄski, with minor comment improvements by me.

src/pl/plpython/plpy_util.c

index 9a4901ecb2f5e2de38b5e9bc781a7bbe6f62d9ea..bf2953226f49275b022d4222c93f7754c0e403d1 100644 (file)
@@ -65,16 +65,71 @@ PLyUnicode_Bytes(PyObject *unicode)
        const char *serverenc;
 
        /*
-        * Python understands almost all PostgreSQL encoding names, but it doesn't
-        * know SQL_ASCII.
+        * Map PostgreSQL encoding to a Python encoding name.
         */
-       if (GetDatabaseEncoding() == PG_SQL_ASCII)
-               serverenc = "ascii";
-       else
-               serverenc = GetDatabaseEncodingName();
+       switch (GetDatabaseEncoding())
+       {
+               case PG_SQL_ASCII:
+                       /*
+                        * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
+                        * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
+                        * SQL_ASCII means that anything is allowed, and the system doesn't
+                        * try to interpret the bytes in any way. But not sure what else
+                        * to do, and we haven't heard any complaints...
+                        */
+                       serverenc = "ascii";
+                       break;
+               case PG_WIN1250:
+                       serverenc = "cp1250";
+                       break;
+               case PG_WIN1251:
+                       serverenc = "cp1251";
+                       break;
+               case PG_WIN1252:
+                       serverenc = "cp1252";
+                       break;
+               case PG_WIN1253:
+                       serverenc = "cp1253";
+                       break;
+               case PG_WIN1254:
+                       serverenc = "cp1254";
+                       break;
+               case PG_WIN1255:
+                       serverenc = "cp1255";
+                       break;
+               case PG_WIN1256:
+                       serverenc = "cp1256";
+                       break;
+               case PG_WIN1257:
+                       serverenc = "cp1257";
+                       break;
+               case PG_WIN1258:
+                       serverenc = "cp1258";
+                       break;
+               case PG_WIN866:
+                       serverenc = "cp866";
+                       break;
+               case PG_WIN874:
+                       serverenc = "cp874";
+                       break;
+               default:
+                       /* Other encodings have the same name in Python. */
+                       serverenc = GetDatabaseEncodingName();
+                       break;
+       }
+
        rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
        if (rv == NULL)
-               PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+       {
+               /*
+                * Use a plain ereport instead of PLy_elog to avoid recursion, if
+                * the traceback formatting functions try to do unicode to bytes
+                * conversion again.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INTERNAL_ERROR),
+                                errmsg("could not convert Python Unicode object to PostgreSQL server encoding")));
+       }
        return rv;
 }