+++ /dev/null
---
--- Unicode handling
---
-SET client_encoding TO UTF8;
-CREATE TABLE unicode_test (
- testvalue text NOT NULL
-);
-CREATE FUNCTION unicode_return() RETURNS text AS E'
-return u"\\x80"
-' LANGUAGE plpythonu;
-CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
-TD["new"]["testvalue"] = u"\\x80"
-return "MODIFY"
-' LANGUAGE plpythonu;
-CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
- FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
-CREATE FUNCTION unicode_plan1() RETURNS text AS E'
-plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
-rv = plpy.execute(plan, [u"\\x80"], 1)
-return rv[0]["testvalue"]
-' LANGUAGE plpythonu;
-CREATE FUNCTION unicode_plan2() RETURNS text AS E'
-plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
-rv = plpy.execute(plan, ["foo", "bar"], 1)
-return rv[0]["testvalue"]
-' LANGUAGE plpythonu;
-SELECT unicode_return();
-ERROR: could not convert Python Unicode object to PostgreSQL server encoding
-DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
-CONTEXT: while creating return value
-PL/Python function "unicode_return"
-INSERT INTO unicode_test (testvalue) VALUES ('test');
-ERROR: could not convert Python Unicode object to PostgreSQL server encoding
-DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
-CONTEXT: while modifying trigger row
-PL/Python function "unicode_trigger"
-SELECT * FROM unicode_test;
- testvalue
------------
-(0 rows)
-
-SELECT unicode_plan1();
-ERROR: spiexceptions.InternalError: could not convert Python Unicode object to PostgreSQL server encoding
-DETAIL: UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
-CONTEXT: Traceback (most recent call last):
- PL/Python function "unicode_plan1", line 3, in <module>
- rv = plpy.execute(plan, [u"\x80"], 1)
-PL/Python function "unicode_plan1"
-SELECT unicode_plan2();
- unicode_plan2
----------------
- foobar
-(1 row)
-
PyObject *
PLyUnicode_Bytes(PyObject *unicode)
{
- PyObject *rv;
- const char *serverenc;
+ PyObject *bytes, *rv;
+ char *utf8string, *encoded;
+
+ /* First encode the Python unicode object with UTF-8. */
+ bytes = PyUnicode_AsUTF8String(unicode);
+ if (bytes == NULL)
+ PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
+
+ utf8string = PyBytes_AsString(bytes);
+ if (utf8string == NULL) {
+ Py_DECREF(bytes);
+ PLy_elog(ERROR, "could not extract bytes from encoded string");
+ }
/*
- * Map PostgreSQL encoding to a Python encoding name.
+ * Then convert to server encoding if necessary.
+ *
+ * PyUnicode_AsEncodedString could be used to encode the object directly
+ * in the server encoding, but Python doesn't support all the encodings
+ * that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
+ * intermediary in PLyUnicode_FromString as well.
*/
- switch (GetDatabaseEncoding())
+ if (GetDatabaseEncoding() != PG_UTF8)
{
- case PG_SQL_ASCII:
- /*
- * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
- * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
- * SQL_ASCII means that anything is allowed, and the system doesn't
- * try to interpret the bytes in any way. But not sure what else
- * to do, and we haven't heard any complaints...
- */
- serverenc = "ascii";
- break;
- case PG_WIN1250:
- serverenc = "cp1250";
- break;
- case PG_WIN1251:
- serverenc = "cp1251";
- break;
- case PG_WIN1252:
- serverenc = "cp1252";
- break;
- case PG_WIN1253:
- serverenc = "cp1253";
- break;
- case PG_WIN1254:
- serverenc = "cp1254";
- break;
- case PG_WIN1255:
- serverenc = "cp1255";
- break;
- case PG_WIN1256:
- serverenc = "cp1256";
- break;
- case PG_WIN1257:
- serverenc = "cp1257";
- break;
- case PG_WIN1258:
- serverenc = "cp1258";
- break;
- case PG_WIN866:
- serverenc = "cp866";
- break;
- case PG_WIN874:
- serverenc = "cp874";
- break;
- default:
- /* Other encodings have the same name in Python. */
- serverenc = GetDatabaseEncodingName();
- break;
+ PG_TRY();
+ {
+ encoded = (char *) pg_do_encoding_conversion(
+ (unsigned char *) utf8string,
+ strlen(utf8string),
+ PG_UTF8,
+ GetDatabaseEncoding());
+ }
+ PG_CATCH();
+ {
+ Py_DECREF(bytes);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
}
+ else
+ encoded = utf8string;
+
+ /* finally, build a bytes object in the server encoding */
+ rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
+
+ /* if pg_do_encoding_conversion allocated memory, free it now */
+ if (utf8string != encoded)
+ pfree(encoded);
- rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
- if (rv == NULL)
- PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+ Py_DECREF(bytes);
return rv;
}