]> granicus.if.org Git - postgresql/commitdiff
Force strings passed to and from plperl to be in UTF8 encoding.
authorAndrew Dunstan <andrew@dunslane.net>
Sun, 6 Feb 2011 22:29:26 +0000 (17:29 -0500)
committerAndrew Dunstan <andrew@dunslane.net>
Sun, 6 Feb 2011 22:29:26 +0000 (17:29 -0500)
String are converted to UTF8 on the way into perl and to the
database encoding on the way back. This avoids a number of
observed anomalies, and ensures Perl a consistent view of the
world.

Some minor code cleanups are also accomplished.

Alex Hunsaker, reviewed by Andy Colson.

doc/src/sgml/plperl.sgml
src/pl/plperl/SPI.xs
src/pl/plperl/Util.xs
src/pl/plperl/plperl.c
src/pl/plperl/plperl_helpers.h [new file with mode: 0644]

index dd8695834fe3b8c8702711cdcbe5a42fc889ee61..4150998808c0414655cb1b07de16e57fa6c62a05 100644 (file)
@@ -125,6 +125,14 @@ $$ LANGUAGE plperl;
 </programlisting>
   </para>
 
+  <note>
+    <para>
+      Arguments will be converted from the database's encoding to UTF-8 
+      for use inside plperl, and then converted from UTF-8 back to the 
+      database encoding upon return. 
+    </para>
+  </note>
+
   <para>
    If an SQL null value<indexterm><primary>null value</><secondary
    sortas="PL/Perl">in PL/Perl</></indexterm> is passed to a function,
index afcfe211c8d44ea8760c2c4dd869ca0a0fec6ebd..6b8dcf62990ef0ed670bd8ed7117a67f48bbe269 100644 (file)
@@ -9,11 +9,14 @@
 
 /* this must be first: */
 #include "postgres.h"
+#include "mb/pg_wchar.h"       /* for GetDatabaseEncoding */
+
 /* Defined by Perl */
 #undef _
 
 /* perl stuff */
 #include "plperl.h"
+#include "plperl_helpers.h"
 
 
 /*
@@ -50,18 +53,21 @@ PROTOTYPES: ENABLE
 VERSIONCHECK: DISABLE
 
 SV*
-spi_spi_exec_query(query, ...)
-       char* query;
+spi_spi_exec_query(sv, ...)
+       SV* sv;
        PREINIT:
                HV *ret_hash;
                int limit = 0;
+               char *query;
        CODE:
                if (items > 2)
                        croak("Usage: spi_exec_query(query, limit) "
                                  "or spi_exec_query(query)");
                if (items == 2)
                        limit = SvIV(ST(1));
+               query = sv2cstr(sv);
                ret_hash = plperl_spi_exec(query, limit);
+               pfree(query);
                RETVAL = newRV_noinc((SV*) ret_hash);
        OUTPUT:
                RETVAL
@@ -73,27 +79,32 @@ spi_return_next(rv)
                do_plperl_return_next(rv);
 
 SV *
-spi_spi_query(query)
-       char *query;
+spi_spi_query(sv)
+       SV *sv;
        CODE:
+               char* query = sv2cstr(sv);
                RETVAL = plperl_spi_query(query);
+               pfree(query);
        OUTPUT:
                RETVAL
 
 SV *
-spi_spi_fetchrow(cursor)
-       char *cursor;
+spi_spi_fetchrow(sv)
+       SV* sv;
        CODE:
+               char* cursor = sv2cstr(sv);
                RETVAL = plperl_spi_fetchrow(cursor);
+               pfree(cursor);
        OUTPUT:
                RETVAL
 
 SV*
-spi_spi_prepare(query, ...)
-       char* query;
+spi_spi_prepare(sv, ...)
+       SV* sv;
        CODE:
                int i;
                SV** argv;
+               char* query = sv2cstr(sv);
                if (items < 1)
                        Perl_croak(aTHX_ "Usage: spi_prepare(query, ...)");
                argv = ( SV**) palloc(( items - 1) * sizeof(SV*));
@@ -101,18 +112,20 @@ spi_spi_prepare(query, ...)
                        argv[i - 1] = ST(i);
                RETVAL = plperl_spi_prepare(query, items - 1, argv);
                pfree( argv);
+               pfree(query);
        OUTPUT:
                RETVAL
 
 SV*
-spi_spi_exec_prepared(query, ...)
-       char * query;
+spi_spi_exec_prepared(sv, ...)
+       SV* sv;
        PREINIT:
                HV *ret_hash;
        CODE:
                HV *attr = NULL;
                int i, offset = 1, argc;
                SV ** argv;
+               char *query = sv2cstr(sv);
                if ( items < 1)
                        Perl_croak(aTHX_ "Usage: spi_exec_prepared(query, [\\%%attr,] "
                                           "[\\@bind_values])");
@@ -128,15 +141,17 @@ spi_spi_exec_prepared(query, ...)
                ret_hash = plperl_spi_exec_prepared(query, attr, argc, argv);
                RETVAL = newRV_noinc((SV*)ret_hash);
                pfree( argv);
+               pfree(query);
        OUTPUT:
                RETVAL
 
 SV*
-spi_spi_query_prepared(query, ...)
-       char * query;
+spi_spi_query_prepared(sv, ...)
+       SV * sv;
        CODE:
                int i;
                SV ** argv;
+               char *query = sv2cstr(sv);
                if ( items < 1)
                        Perl_croak(aTHX_ "Usage: spi_query_prepared(query, "
                                           "[\\@bind_values])");
@@ -145,20 +160,25 @@ spi_spi_query_prepared(query, ...)
                        argv[i - 1] = ST(i);
                RETVAL = plperl_spi_query_prepared(query, items - 1, argv);
                pfree( argv);
+               pfree(query);
        OUTPUT:
                RETVAL
 
 void
-spi_spi_freeplan(query)
-       char *query;
+spi_spi_freeplan(sv)
+       SV *sv;
        CODE:
+               char *query = sv2cstr(sv);
                plperl_spi_freeplan(query);
+               pfree(query);
 
 void
-spi_spi_cursor_close(cursor)
-       char *cursor;
+spi_spi_cursor_close(sv)
+       SV *sv;
        CODE:
+               char *cursor = sv2cstr(sv);
                plperl_spi_cursor_close(cursor);
+               pfree(cursor);
 
 
 BOOT:
index 6b96107444d4b12491d04ab19215318fdb9220c0..6c6e90faa771ea68dd70393ff3e63aa5115b8905 100644 (file)
@@ -21,7 +21,7 @@
 
 /* perl stuff */
 #include "plperl.h"
-
+#include "plperl_helpers.h"
 
 /*
  * Implementation of plperl's elog() function
  * This is out-of-line to suppress "might be clobbered by longjmp" warnings.
  */
 static void
-do_util_elog(int level, char *message)
+do_util_elog(int level, SV *msg)
 {
     MemoryContext oldcontext = CurrentMemoryContext;
+       char *cmsg = NULL;
 
     PG_TRY();
     {
-        elog(level, "%s", message);
+               cmsg = sv2cstr(msg);
+        elog(level, "%s", cmsg);
+               pfree(cmsg);
     }
     PG_CATCH();
     {
@@ -51,35 +54,20 @@ do_util_elog(int level, char *message)
         edata = CopyErrorData();
         FlushErrorState();
 
+               if (cmsg)
+                       pfree(cmsg);
+
         /* Punt the error to Perl */
         croak("%s", edata->message);
     }
     PG_END_TRY();
 }
 
-static SV  *
-newSVstring_len(const char *str, STRLEN len)
-{
-    SV         *sv;
-
-    sv = newSVpvn(str, len);
-#if PERL_BCDVERSION >= 0x5006000L
-    if (GetDatabaseEncoding() == PG_UTF8)
-        SvUTF8_on(sv);
-#endif
-    return sv;
-}
-
 static text *
 sv2text(SV *sv)
 {
-    STRLEN    sv_len;
-    char     *sv_pv;
-
-    if (!sv)
-        sv = &PL_sv_undef;
-    sv_pv = SvPV(sv, sv_len);
-    return cstring_to_text_with_len(sv_pv, sv_len);
+       char *str = sv2cstr(sv);
+       return cstring_to_text(str);
 }
 
 MODULE = PostgreSQL::InServer::Util PREFIX = util_
@@ -105,15 +93,15 @@ _aliased_constants()
 
 
 void
-util_elog(level, message)
+util_elog(level, msg)
     int level
-    char* message
+    SV *msg
     CODE:
         if (level > ERROR)      /* no PANIC allowed thanks */
             level = ERROR;
         if (level < DEBUG5)
             level = DEBUG5;
-        do_util_elog(level, message);
+        do_util_elog(level, msg);
 
 SV *
 util_quote_literal(sv)
@@ -125,7 +113,9 @@ util_quote_literal(sv)
     else {
         text *arg = sv2text(sv);
         text *ret = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg)));
-        RETVAL = newSVstring_len(VARDATA(ret), (VARSIZE(ret) - VARHDRSZ));
+               char *str = text_to_cstring(ret);
+               RETVAL = cstr2sv(str);
+               pfree(str);
     }
     OUTPUT:
     RETVAL
@@ -136,13 +126,15 @@ util_quote_nullable(sv)
     CODE:
     if (!sv || !SvOK(sv))
        {
-        RETVAL = newSVstring_len("NULL", 4);
+        RETVAL = cstr2sv("NULL");
     }
     else
        {
         text *arg = sv2text(sv);
         text *ret = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg)));
-        RETVAL = newSVstring_len(VARDATA(ret), (VARSIZE(ret) - VARHDRSZ));
+               char *str = text_to_cstring(ret);
+               RETVAL = cstr2sv(str);
+               pfree(str);
     }
     OUTPUT:
     RETVAL
@@ -153,10 +145,13 @@ util_quote_ident(sv)
     PREINIT:
         text *arg;
         text *ret;
+               char *str;
     CODE:
         arg = sv2text(sv);
         ret = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg)));
-        RETVAL = newSVstring_len(VARDATA(ret), (VARSIZE(ret) - VARHDRSZ));
+               str = text_to_cstring(ret);
+               RETVAL = cstr2sv(str);
+               pfree(str);
     OUTPUT:
     RETVAL
 
@@ -167,9 +162,9 @@ util_decode_bytea(sv)
         char *arg;
         text *ret;
     CODE:
-        arg = SvPV_nolen(sv);
+        arg = SvPVbyte_nolen(sv);
         ret = DatumGetTextP(DirectFunctionCall1(byteain, PointerGetDatum(arg)));
-        /* not newSVstring_len because this is raw bytes not utf8'able */
+        /* not cstr2sv because this is raw bytes not utf8'able */
         RETVAL = newSVpvn(VARDATA(ret), (VARSIZE(ret) - VARHDRSZ));
     OUTPUT:
     RETVAL
@@ -180,10 +175,13 @@ util_encode_bytea(sv)
     PREINIT:
         text *arg;
         char *ret;
+               STRLEN len;
     CODE:
-        arg = sv2text(sv);
+        /* not sv2text because this is raw bytes not utf8'able */
+        ret = SvPVbyte(sv, len);
+               arg = cstring_to_text_with_len(ret, len);
         ret = DatumGetCString(DirectFunctionCall1(byteaout, PointerGetDatum(arg)));
-        RETVAL = newSVstring_len(ret, strlen(ret));
+        RETVAL = cstr2sv(ret);
     OUTPUT:
     RETVAL
 
index 2ac716855892a7c47147faacaadfe2ad3fb545fc..48a1f8ec09e039e34d49e08e6f704c133a3f3167 100644 (file)
@@ -43,6 +43,7 @@
 
 /* perl stuff */
 #include "plperl.h"
+#include "plperl_helpers.h"
 
 /* string literal macros defining chunks of perl code */
 #include "perlchunks.h"
@@ -222,7 +223,7 @@ static void plperl_init_shared_libs(pTHX);
 static void plperl_trusted_init(void);
 static void plperl_untrusted_init(void);
 static HV  *plperl_spi_execute_fetch_result(SPITupleTable *, int, int);
-static SV  *newSVstring(const char *str);
+static char *hek2cstr(HE *he);
 static SV **hv_store_string(HV *hv, const char *key, SV *val);
 static SV **hv_fetch_string(HV *hv, const char *key);
 static void plperl_create_sub(plperl_proc_desc *desc, char *s, Oid fn_oid);
@@ -239,24 +240,39 @@ static char *setlocale_perl(int category, char *locale);
 #endif
 
 /*
- * Convert an SV to char * and verify the encoding via pg_verifymbstr()
+ * convert a HE (hash entry) key to a cstr in the current database encoding
  */
-static inline char *
-sv2text_mbverified(SV *sv)
+static char *
+hek2cstr(HE *he)
 {
-       char       *val;
-       STRLEN          len;
-
        /*
-        * The value returned here might include an embedded nul byte, because
-        * perl allows such things. That's OK, because pg_verifymbstr will choke
-        * on it,  If we just used strlen() instead of getting perl's idea of the
-        * length, whatever uses the "verified" value might get something quite
-        * weird.
+        * Unfortunately,  while HeUTF8 is true for most things > 256, for
+        * values 128..255 it's not, but perl will treat them as
+        * unicode code points if the utf8 flag is not set ( see
+        * The "Unicode Bug" in perldoc perlunicode for more)
+        *
+        * So if we did the expected:
+        *    if (HeUTF8(he))
+        *                utf_u2e(key...);
+        *        else // must be ascii
+        *            return HePV(he);
+        * we won't match columns with codepoints from 128..255
+        *
+        * For a more concrete example given a column with the
+        * name of the unicode codepoint U+00ae (registered sign)
+        * and a UTF8 database and the perl return_next {
+        * "\N{U+00ae}=>'text } would always fail as heUTF8
+        * returns 0 and HePV() would give us a char * with 1 byte
+        * contains the decimal value 174
+        *
+        * Perl has the brains to know when it should utf8 encode
+        * 174 properly, so here we force it into an SV so that
+        * perl will figure it out and do the right thing
         */
-       val = SvPV(sv, len);
-       pg_verifymbstr(val, len, false);
-       return val;
+       SV *sv = HeSVKEY_force(he);
+       if (HeUTF8(he))
+               SvUTF8_on(sv);
+       return sv2cstr(sv);
 }
 
 /*
@@ -568,7 +584,7 @@ select_perl_context(bool trusted)
        eval_pv("PostgreSQL::InServer::SPI::bootstrap()", FALSE);
        if (SvTRUE(ERRSV))
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                errcontext("while executing PostgreSQL::InServer::SPI::bootstrap")));
 
        /* Fully initialized, so mark the hashtable entry valid */
@@ -609,7 +625,6 @@ static PerlInterpreter *
 plperl_init_interp(void)
 {
        PerlInterpreter *plperl;
-       static int      perl_sys_init_done;
 
        static char *embedding[3 + 2] = {
                "", "-e", PLC_PERLBOOT
@@ -678,15 +693,19 @@ plperl_init_interp(void)
         * true when MYMALLOC is set.
         */
 #if defined(PERL_SYS_INIT3) && !defined(MYMALLOC)
-       /* only call this the first time through, as per perlembed man page */
-       if (!perl_sys_init_done)
        {
-               char       *dummy_env[1] = {NULL};
+               static int      perl_sys_init_done;
 
-               PERL_SYS_INIT3(&nargs, (char ***) &embedding, (char ***) &dummy_env);
-               perl_sys_init_done = 1;
-               /* quiet warning if PERL_SYS_INIT3 doesn't use the third argument */
-               dummy_env[0] = NULL;
+               /* only call this the first time through, as per perlembed man page */
+               if (!perl_sys_init_done)
+               {
+                       char       *dummy_env[1] = {NULL};
+
+                       PERL_SYS_INIT3(&nargs, (char ***) &embedding, (char ***) &dummy_env);
+                       perl_sys_init_done = 1;
+                       /* quiet warning if PERL_SYS_INIT3 doesn't use the third argument */
+                       dummy_env[0] = NULL;
+               }
        }
 #endif
 
@@ -727,12 +746,12 @@ plperl_init_interp(void)
        if (perl_parse(plperl, plperl_init_shared_libs,
                                   nargs, embedding, NULL) != 0)
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                                 errcontext("while parsing Perl initialization")));
 
        if (perl_run(plperl) != 0)
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                                 errcontext("while running Perl initialization")));
 
 #ifdef PLPERL_RESTORE_LOCALE
@@ -836,22 +855,19 @@ plperl_trusted_init(void)
        eval_pv(PLC_TRUSTED, FALSE);
        if (SvTRUE(ERRSV))
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                                 errcontext("while executing PLC_TRUSTED")));
 
-       if (GetDatabaseEncoding() == PG_UTF8)
-       {
-               /*
-                * Force loading of utf8 module now to prevent errors that can arise
-                * from the regex code later trying to load utf8 modules. See
-                * http://rt.perl.org/rt3/Ticket/Display.html?id=47576
-                */
-               eval_pv("my $a=chr(0x100); return $a =~ /\\xa9/i", FALSE);
-               if (SvTRUE(ERRSV))
-                       ereport(ERROR,
-                                       (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
-                                        errcontext("while executing utf8fix")));
-       }
+       /*
+        * Force loading of utf8 module now to prevent errors that can arise
+        * from the regex code later trying to load utf8 modules. See
+        * http://rt.perl.org/rt3/Ticket/Display.html?id=47576
+        */
+       eval_pv("my $a=chr(0x100); return $a =~ /\\xa9/i", FALSE);
+       if (SvTRUE(ERRSV))
+               ereport(ERROR,
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
+                                errcontext("while executing utf8fix")));
 
        /*
         * Lock down the interpreter
@@ -891,7 +907,7 @@ plperl_trusted_init(void)
                eval_pv(plperl_on_plperl_init, FALSE);
                if (SvTRUE(ERRSV))
                        ereport(ERROR,
-                                       (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                                       (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                                         errcontext("while executing plperl.on_plperl_init")));
 
        }
@@ -912,7 +928,7 @@ plperl_untrusted_init(void)
                eval_pv(plperl_on_plperlu_init, FALSE);
                if (SvTRUE(ERRSV))
                        ereport(ERROR,
-                                       (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV))),
+                                       (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV))),
                                         errcontext("while executing plperl.on_plperlu_init")));
        }
 }
@@ -940,17 +956,18 @@ plperl_build_tuple_result(HV *perlhash, AttInMetadata *attinmeta)
 {
        TupleDesc       td = attinmeta->tupdesc;
        char      **values;
-       SV                 *val;
-       char       *key;
-       I32                     klen;
+       HE                      *he;
        HeapTuple       tup;
+       int                     i;
 
        values = (char **) palloc0(td->natts * sizeof(char *));
 
        hv_iterinit(perlhash);
-       while ((val = hv_iternextsv(perlhash, &key, &klen)))
+       while ((he = hv_iternext(perlhash)))
        {
-               int                     attn = SPI_fnumber(td, key);
+               SV              *val = HeVAL(he);
+               char    *key = hek2cstr(he);
+               int             attn = SPI_fnumber(td, key);
 
                if (attn <= 0 || td->attrs[attn - 1]->attisdropped)
                        ereport(ERROR,
@@ -959,13 +976,22 @@ plperl_build_tuple_result(HV *perlhash, AttInMetadata *attinmeta)
                                                        key)));
                if (SvOK(val))
                {
-                       values[attn - 1] = sv2text_mbverified(val);
+                       values[attn - 1] = sv2cstr(val);
                }
+
+               pfree(key);
        }
        hv_iterinit(perlhash);
 
        tup = BuildTupleFromCStrings(attinmeta, values);
+
+       for(i = 0; i < td->natts; i++)
+       {
+               if (values[i])
+                       pfree(values[i]);
+       }
        pfree(values);
+
        return tup;
 }
 
@@ -1025,8 +1051,8 @@ plperl_trigger_build_args(FunctionCallInfo fcinfo)
                                                                                                )
                );
 
-       hv_store_string(hv, "name", newSVstring(tdata->tg_trigger->tgname));
-       hv_store_string(hv, "relid", newSVstring(relid));
+       hv_store_string(hv, "name", cstr2sv(tdata->tg_trigger->tgname));
+       hv_store_string(hv, "relid", cstr2sv(relid));
 
        if (TRIGGER_FIRED_BY_INSERT(tdata->tg_event))
        {
@@ -1062,7 +1088,7 @@ plperl_trigger_build_args(FunctionCallInfo fcinfo)
        else
                event = "UNKNOWN";
 
-       hv_store_string(hv, "event", newSVstring(event));
+       hv_store_string(hv, "event", cstr2sv(event));
        hv_store_string(hv, "argc", newSViv(tdata->tg_trigger->tgnargs));
 
        if (tdata->tg_trigger->tgnargs > 0)
@@ -1071,18 +1097,18 @@ plperl_trigger_build_args(FunctionCallInfo fcinfo)
 
                av_extend(av, tdata->tg_trigger->tgnargs);
                for (i = 0; i < tdata->tg_trigger->tgnargs; i++)
-                       av_push(av, newSVstring(tdata->tg_trigger->tgargs[i]));
+                       av_push(av, cstr2sv(tdata->tg_trigger->tgargs[i]));
                hv_store_string(hv, "args", newRV_noinc((SV *) av));
        }
 
        hv_store_string(hv, "relname",
-                                       newSVstring(SPI_getrelname(tdata->tg_relation)));
+                                       cstr2sv(SPI_getrelname(tdata->tg_relation)));
 
        hv_store_string(hv, "table_name",
-                                       newSVstring(SPI_getrelname(tdata->tg_relation)));
+                                       cstr2sv(SPI_getrelname(tdata->tg_relation)));
 
        hv_store_string(hv, "table_schema",
-                                       newSVstring(SPI_getnspname(tdata->tg_relation)));
+                                       cstr2sv(SPI_getnspname(tdata->tg_relation)));
 
        if (TRIGGER_FIRED_BEFORE(tdata->tg_event))
                when = "BEFORE";
@@ -1092,7 +1118,7 @@ plperl_trigger_build_args(FunctionCallInfo fcinfo)
                when = "INSTEAD OF";
        else
                when = "UNKNOWN";
-       hv_store_string(hv, "when", newSVstring(when));
+       hv_store_string(hv, "when", cstr2sv(when));
 
        if (TRIGGER_FIRED_FOR_ROW(tdata->tg_event))
                level = "ROW";
@@ -1100,7 +1126,7 @@ plperl_trigger_build_args(FunctionCallInfo fcinfo)
                level = "STATEMENT";
        else
                level = "UNKNOWN";
-       hv_store_string(hv, "level", newSVstring(level));
+       hv_store_string(hv, "level", cstr2sv(level));
 
        return newRV_noinc((SV *) hv);
 }
@@ -1113,10 +1139,8 @@ plperl_modify_tuple(HV *hvTD, TriggerData *tdata, HeapTuple otup)
 {
        SV                **svp;
        HV                 *hvNew;
+       HE                 *he;
        HeapTuple       rtup;
-       SV                 *val;
-       char       *key;
-       I32                     klen;
        int                     slotsused;
        int                *modattrs;
        Datum      *modvalues;
@@ -1143,13 +1167,15 @@ plperl_modify_tuple(HV *hvTD, TriggerData *tdata, HeapTuple otup)
        slotsused = 0;
 
        hv_iterinit(hvNew);
-       while ((val = hv_iternextsv(hvNew, &key, &klen)))
+       while ((he = hv_iternext(hvNew)))
        {
-               int                     attn = SPI_fnumber(tupdesc, key);
                Oid                     typinput;
                Oid                     typioparam;
                int32           atttypmod;
                FmgrInfo        finfo;
+               SV                      *val = HeVAL(he);
+               char            *key = hek2cstr(he);
+               int                     attn = SPI_fnumber(tupdesc, key);
 
                if (attn <= 0 || tupdesc->attrs[attn - 1]->attisdropped)
                        ereport(ERROR,
@@ -1163,11 +1189,13 @@ plperl_modify_tuple(HV *hvTD, TriggerData *tdata, HeapTuple otup)
                atttypmod = tupdesc->attrs[attn - 1]->atttypmod;
                if (SvOK(val))
                {
+                       char    *str = sv2cstr(val);
                        modvalues[slotsused] = InputFunctionCall(&finfo,
-                                                                                                        sv2text_mbverified(val),
+                                                                                                        str,
                                                                                                         typioparam,
                                                                                                         atttypmod);
                        modnulls[slotsused] = ' ';
+                       pfree(str);
                }
                else
                {
@@ -1179,6 +1207,8 @@ plperl_modify_tuple(HV *hvTD, TriggerData *tdata, HeapTuple otup)
                }
                modattrs[slotsused] = attn;
                slotsused++;
+
+               pfree(key);
        }
        hv_iterinit(hvNew);
 
@@ -1420,7 +1450,7 @@ plperl_create_sub(plperl_proc_desc *prodesc, char *s, Oid fn_oid)
        SAVETMPS;
        PUSHMARK(SP);
        EXTEND(SP, 4);
-       PUSHs(sv_2mortal(newSVstring(subname)));
+       PUSHs(sv_2mortal(cstr2sv(subname)));
        PUSHs(sv_2mortal(newRV_noinc((SV *) pragma_hv)));
        /* 
         * Use 'false' for $prolog in mkfunc, which is kept for compatibility
@@ -1428,7 +1458,7 @@ plperl_create_sub(plperl_proc_desc *prodesc, char *s, Oid fn_oid)
         * the function compiler.
         */
        PUSHs(&PL_sv_no); 
-       PUSHs(sv_2mortal(newSVstring(s)));
+       PUSHs(sv_2mortal(cstr2sv(s)));
        PUTBACK;
 
        /*
@@ -1457,7 +1487,7 @@ plperl_create_sub(plperl_proc_desc *prodesc, char *s, Oid fn_oid)
        if (SvTRUE(ERRSV))
                ereport(ERROR,
                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV)))));
+                                errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV)))));
 
        if (!subref)
                ereport(ERROR,
@@ -1533,7 +1563,7 @@ plperl_call_perl_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo)
 
                        tmp = OutputFunctionCall(&(desc->arg_out_func[i]),
                                                                         fcinfo->arg[i]);
-                       sv = newSVstring(tmp);
+                       sv = cstr2sv(tmp);
                        PUSHs(sv_2mortal(sv));
                        pfree(tmp);
                }
@@ -1561,7 +1591,7 @@ plperl_call_perl_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo)
                LEAVE;
                /* XXX need to find a way to assign an errcode here */
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV)))));
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV)))));
        }
 
        retval = newSVsv(POPs);
@@ -1594,7 +1624,7 @@ plperl_call_perl_trigger_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo,
        EXTEND(sp, tg_trigger->tgnargs);
 
        for (i = 0; i < tg_trigger->tgnargs; i++)
-               PUSHs(sv_2mortal(newSVstring(tg_trigger->tgargs[i])));
+               PUSHs(sv_2mortal(cstr2sv(tg_trigger->tgargs[i])));
        PUTBACK;
 
        /* Do NOT use G_KEEPERR here */
@@ -1618,7 +1648,7 @@ plperl_call_perl_trigger_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo,
                LEAVE;
                /* XXX need to find a way to assign an errcode here */
                ereport(ERROR,
-                               (errmsg("%s", strip_trailing_ws(SvPV_nolen(ERRSV)))));
+                               (errmsg("%s", strip_trailing_ws(sv2cstr(ERRSV)))));
        }
 
        retval = newSVsv(POPs);
@@ -1766,6 +1796,7 @@ plperl_func_handler(PG_FUNCTION_ARGS)
        else
        {
                /* Return a perl string converted to a Datum */
+               char    *str;
 
                if (prodesc->fn_retisarray && SvROK(perlret) &&
                        SvTYPE(SvRV(perlret)) == SVt_PVAV)
@@ -1775,9 +1806,11 @@ plperl_func_handler(PG_FUNCTION_ARGS)
                        perlret = array_ret;
                }
 
+               str = sv2cstr(perlret);
                retval = InputFunctionCall(&prodesc->result_in_func,
-                                                                  sv2text_mbverified(perlret),
+                                                                  str,
                                                                   prodesc->result_typioparam, -1);
+               pfree(str);
        }
 
        /* Restore the previous error callback */
@@ -1857,7 +1890,7 @@ plperl_trigger_handler(PG_FUNCTION_ARGS)
                HeapTuple       trv;
                char       *tmp;
 
-               tmp = SvPV_nolen(perlret);
+               tmp = sv2cstr(perlret);
 
                if (pg_strcasecmp(tmp, "SKIP") == 0)
                        trv = NULL;
@@ -1888,6 +1921,7 @@ plperl_trigger_handler(PG_FUNCTION_ARGS)
                        trv = NULL;
                }
                retval = PointerGetDatum(trv);
+               pfree(tmp);
        }
 
        /* Restore the previous error callback */
@@ -2231,7 +2265,7 @@ plperl_hash_from_tuple(HeapTuple tuple, TupleDesc tupdesc)
 
                outputstr = OidOutputFunctionCall(typoutput, attr);
 
-               hv_store_string(hv, attname, newSVstring(outputstr));
+               hv_store_string(hv, attname, cstr2sv(outputstr));
 
                pfree(outputstr);
        }
@@ -2336,7 +2370,7 @@ plperl_spi_execute_fetch_result(SPITupleTable *tuptable, int processed,
        result = newHV();
 
        hv_store_string(result, "status",
-                                       newSVstring(SPI_result_code_string(status)));
+                                       cstr2sv(SPI_result_code_string(status)));
        hv_store_string(result, "processed",
                                        newSViv(processed));
 
@@ -2466,16 +2500,20 @@ plperl_return_next(SV *sv)
 
                if (SvOK(sv))
                {
+                       char    *str;
+
                        if (prodesc->fn_retisarray && SvROK(sv) &&
                                SvTYPE(SvRV(sv)) == SVt_PVAV)
                        {
                                sv = plperl_convert_to_pg_array(sv);
                        }
 
+                       str = sv2cstr(sv);
                        ret = InputFunctionCall(&prodesc->result_in_func,
-                                                                       sv2text_mbverified(sv),
+                                                                       str,
                                                                        prodesc->result_typioparam, -1);
                        isNull = false;
+                       pfree(str);
                }
                else
                {
@@ -2531,7 +2569,7 @@ plperl_spi_query(char *query)
                if (portal == NULL)
                        elog(ERROR, "SPI_cursor_open() failed:%s",
                                 SPI_result_code_string(SPI_result));
-               cursor = newSVstring(portal->name);
+               cursor = cstr2sv(portal->name);
 
                /* Commit the inner transaction, return to outer xact context */
                ReleaseCurrentSubTransaction();
@@ -2716,8 +2754,11 @@ plperl_spi_prepare(char *query, int argc, SV **argv)
                                                typInput,
                                                typIOParam;
                        int32           typmod;
+                       char            *typstr;
 
-                       parseTypeString(SvPV_nolen(argv[i]), &typId, &typmod);
+                       typstr = sv2cstr(argv[i]);
+                       parseTypeString(typstr, &typId, &typmod);
+                       pfree(typstr);
 
                        getTypeInputInfo(typId, &typInput, &typIOParam);
 
@@ -2804,7 +2845,7 @@ plperl_spi_prepare(char *query, int argc, SV **argv)
                                                         HASH_ENTER, &found);
        hash_entry->query_data = qdesc;
 
-       return newSVstring(qdesc->qname);
+       return cstr2sv(qdesc->qname);
 }
 
 HV *
@@ -2881,11 +2922,13 @@ plperl_spi_exec_prepared(char *query, HV *attr, int argc, SV **argv)
                {
                        if (SvOK(argv[i]))
                        {
+                               char    *str = sv2cstr(argv[i]);
                                argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
-                                                                                                sv2text_mbverified(argv[i]),
+                                                                                                str,
                                                                                                 qdesc->argtypioparams[i],
                                                                                                 -1);
                                nulls[i] = ' ';
+                               pfree(str);
                        }
                        else
                        {
@@ -3014,11 +3057,13 @@ plperl_spi_query_prepared(char *query, int argc, SV **argv)
                {
                        if (SvOK(argv[i]))
                        {
+                               char    *str = sv2cstr(argv[i]);
                                argvalues[i] = InputFunctionCall(&qdesc->arginfuncs[i],
-                                                                                                sv2text_mbverified(argv[i]),
+                                                                                                str,
                                                                                                 qdesc->argtypioparams[i],
                                                                                                 -1);
                                nulls[i] = ' ';
+                               pfree(str);
                        }
                        else
                        {
@@ -3044,7 +3089,7 @@ plperl_spi_query_prepared(char *query, int argc, SV **argv)
                        elog(ERROR, "SPI_cursor_open() failed:%s",
                                 SPI_result_code_string(SPI_result));
 
-               cursor = newSVstring(portal->name);
+               cursor = cstr2sv(portal->name);
 
                /* Commit the inner transaction, return to outer xact context */
                ReleaseCurrentSubTransaction();
@@ -3124,23 +3169,6 @@ plperl_spi_freeplan(char *query)
        SPI_freeplan(plan);
 }
 
-/*
- * Create a new SV from a string assumed to be in the current database's
- * encoding.
- */
-static SV  *
-newSVstring(const char *str)
-{
-       SV                 *sv;
-
-       sv = newSVpv(str, 0);
-#if PERL_BCDVERSION >= 0x5006000L
-       if (GetDatabaseEncoding() == PG_UTF8)
-               SvUTF8_on(sv);
-#endif
-       return sv;
-}
-
 /*
  * Store an SV into a hash table under a key that is a string assumed to be
  * in the current database's encoding.
@@ -3148,7 +3176,11 @@ newSVstring(const char *str)
 static SV **
 hv_store_string(HV *hv, const char *key, SV *val)
 {
-       int32           klen = strlen(key);
+       int32           hlen;
+       char            *hkey;
+       SV                      **ret;
+
+       hkey = (char*)pg_do_encoding_conversion((unsigned char *)key, strlen(key), GetDatabaseEncoding(), PG_UTF8);
 
        /*
         * This seems nowhere documented, but under Perl 5.8.0 and up, hv_store()
@@ -3156,11 +3188,13 @@ hv_store_string(HV *hv, const char *key, SV *val)
         * does not appear that hashes track UTF-8-ness of keys at all in Perl
         * 5.6.
         */
-#if PERL_BCDVERSION >= 0x5008000L
-       if (GetDatabaseEncoding() == PG_UTF8)
-               klen = -klen;
-#endif
-       return hv_store(hv, key, klen, val, 0);
+       hlen = -strlen(hkey);
+       ret = hv_store(hv, hkey, hlen, val, 0);
+
+       if (hkey != key)
+               pfree(hkey);
+
+       return ret;
 }
 
 /*
@@ -3170,14 +3204,20 @@ hv_store_string(HV *hv, const char *key, SV *val)
 static SV **
 hv_fetch_string(HV *hv, const char *key)
 {
-       int32           klen = strlen(key);
+       int32           hlen;
+       char            *hkey;
+       SV                      **ret;
+
+       hkey = (char*)pg_do_encoding_conversion((unsigned char *)key, strlen(key), GetDatabaseEncoding(), PG_UTF8);
 
        /* See notes in hv_store_string */
-#if PERL_BCDVERSION >= 0x5008000L
-       if (GetDatabaseEncoding() == PG_UTF8)
-               klen = -klen;
-#endif
-       return hv_fetch(hv, key, klen, 0);
+       hlen = -strlen(hkey);
+       ret = hv_fetch(hv, hkey, hlen, 0);
+
+       if(hkey != key)
+               pfree(hkey);
+
+       return ret;
 }
 
 /*
diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h
new file mode 100644 (file)
index 0000000..4480ce8
--- /dev/null
@@ -0,0 +1,69 @@
+#ifndef PL_PERL_HELPERS_H
+#define PL_PERL_HELPERS_H
+
+/*
+ * convert from utf8 to database encoding
+ */
+static inline char *
+utf_u2e(const char *utf8_str, size_t len)
+{
+       char *ret = (char*)pg_do_encoding_conversion((unsigned char*)utf8_str, len, PG_UTF8, GetDatabaseEncoding());
+       if (ret == utf8_str)
+               ret = pstrdup(ret);
+       return ret;
+}
+
+/*
+ * convert from database encoding to utf8
+ */
+static inline char *
+utf_e2u(const char *str)
+{
+       char *ret = (char*)pg_do_encoding_conversion((unsigned char*)str, strlen(str), GetDatabaseEncoding(), PG_UTF8);
+       if (ret == str)
+               ret = pstrdup(ret);
+       return ret;
+}
+
+
+/*
+ * Convert an SV to a char * in the current database encoding
+ */
+static inline char *
+sv2cstr(SV *sv)
+{
+       char *val;
+       STRLEN len;
+
+       /*
+        * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
+        */
+       val = SvPVutf8(sv, len);
+
+       /*
+        * we use perls length in the event we had an embedded null byte to ensure
+        * we error out properly
+        */
+       return utf_u2e(val, len);
+}
+
+/*
+ * Create a new SV from a string assumed to be in the current database's
+ * encoding.
+ */
+
+static inline SV *
+cstr2sv(const char *str)
+{
+       SV *sv;
+       char *utf8_str = utf_e2u(str);
+
+       sv = newSVpv(utf8_str, 0);
+       SvUTF8_on(sv);
+
+       pfree(utf8_str);
+
+       return sv;
+}
+
+#endif   /* PL_PERL_HELPERS_H */