plperl: Skip setting UTF8 flag when in SQL_ASCII encoding

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Mon, 9 Jul 2012 21:36:29 +0000 (17:36 -0400)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Tue, 10 Jul 2012 19:15:16 +0000 (15:15 -0400)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 9 Jul 2012 21:36:29 +0000 (17:36 -0400)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Tue, 10 Jul 2012 19:15:16 +0000 (15:15 -0400)
diff --git a/src/pl/plperl/GNUmakefile b/src/pl/plperl/GNUmakefile

index 188d7d234bce47af85187684db0c3a2669a1ca53..b469b269749afda18b20a5202eae3aa4e4c26d26 100644 (file)
--- a/src/pl/plperl/GNUmakefile
+++ b/src/pl/plperl/GNUmakefile
@@ -44,7 +44,7 @@ PERLCHUNKS = plc_perlboot.pl plc_trusted.pl
  SHLIB_LINK = $(perl_embed_ldflags)
  
  REGRESS_OPTS = --dbname=$(PL_TESTDB) --load-extension=plperl  --load-extension=plperlu
-REGRESS = plperl plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array
+REGRESS = plperl plperl_lc plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array
  # if Perl can support two interpreters in one backend,
  # test plperl-and-plperlu cases
  ifneq ($(PERL),)
diff --git a/src/pl/plperl/Util.xs b/src/pl/plperl/Util.xs

index 7d0102b837f8bd0b5149180321019f4f5ba45a63..b2e0dfcf75d30fd9bff1fabd6f3c46928e4f8656 100644 (file)
--- a/src/pl/plperl/Util.xs
+++ b/src/pl/plperl/Util.xs
@@ -67,8 +67,11 @@ static text *
  sv2text(SV *sv)
  {
         char       *str = sv2cstr(sv);
+       text       *text;
  
-       return cstring_to_text(str);
+       text = cstring_to_text(str);
+       pfree(str);
+       return text;
  }
  
  MODULE = PostgreSQL::InServer::Util PREFIX = util_
@@ -113,8 +116,11 @@ util_quote_literal(sv)
      }
      else {
          text *arg = sv2text(sv);
-        text *ret = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg)));
-               char *str = text_to_cstring(ret);
+               text *quoted = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg)));
+               char *str;
+
+               pfree(arg);
+               str = text_to_cstring(quoted);
                 RETVAL = cstr2sv(str);
                 pfree(str);
      }
@@ -132,8 +138,11 @@ util_quote_nullable(sv)
      else
         {
          text *arg = sv2text(sv);
-        text *ret = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg)));
-               char *str = text_to_cstring(ret);
+               text *quoted = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg)));
+               char *str;
+
+               pfree(arg);
+               str = text_to_cstring(quoted);
                 RETVAL = cstr2sv(str);
                 pfree(str);
      }
@@ -145,12 +154,14 @@ util_quote_ident(sv)
      SV *sv
      PREINIT:
          text *arg;
-        text *ret;
+               text *quoted;
                 char *str;
      CODE:
          arg = sv2text(sv);
-        ret = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg)));
-               str = text_to_cstring(ret);
+               quoted = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg)));
+
+               pfree(arg);
+               str = text_to_cstring(quoted);
                 RETVAL = cstr2sv(str);
                 pfree(str);
      OUTPUT:
diff --git a/src/pl/plperl/expected/plperl.out b/src/pl/plperl/expected/plperl.out

index df54937f49406feb26190a20b914d0fb58a5dfa0..906dc15e0ca097ec962c6dce9a08b29cb31d35b5 100644 (file)
--- a/src/pl/plperl/expected/plperl.out
+++ b/src/pl/plperl/expected/plperl.out
@@ -650,16 +650,6 @@ CONTEXT:  PL/Perl anonymous code block
  DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl;
  ERROR:  Useless use of sort in scalar context at line 1.
  CONTEXT:  PL/Perl anonymous code block
---
--- Make sure strings are validated
--- Should fail for all encodings, as nul bytes are never permitted.
---
-CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
-  return "abcd\0efg";
-$$ LANGUAGE plperl;
-SELECT perl_zerob();
-ERROR:  invalid byte sequence for encoding "UTF8": 0x00
-CONTEXT:  PL/Perl function "perl_zerob"
  -- make sure functions marked as VOID without an explicit return work
  CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$
     $_SHARED{myquote} = sub {
diff --git a/src/pl/plperl/expected/plperl_lc.out b/src/pl/plperl/expected/plperl_lc.out

new file mode 100644 (file)

index 0000000..8557b46
--- /dev/null
+++ b/src/pl/plperl/expected/plperl_lc.out
@@ -0,0 +1,33 @@
+--
+-- Make sure strings are validated
+-- Should fail for all encodings, as nul bytes are never permitted.
+--
+CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
+  return "abcd\0efg";
+$$ LANGUAGE plperl;
+SELECT perl_zerob();
+ERROR:  invalid byte sequence for encoding "UTF8": 0x00
+CONTEXT:  PL/Perl function "perl_zerob"
+CREATE OR REPLACE FUNCTION perl_0x80_in(text) RETURNS BOOL AS $$
+  return ($_[0] eq "abc\x80de" ? "true" : "false");
+$$ LANGUAGE plperl;
+SELECT perl_0x80_in(E'abc\x80de');
+ERROR:  invalid byte sequence for encoding "UTF8": 0x80
+CREATE OR REPLACE FUNCTION perl_0x80_out() RETURNS TEXT AS $$
+  return "abc\x80de";
+$$ LANGUAGE plperl;
+SELECT perl_0x80_out() = E'abc\x80de';
+ERROR:  invalid byte sequence for encoding "UTF8": 0x80
+CREATE OR REPLACE FUNCTION perl_utf_inout(text) RETURNS TEXT AS $$
+  $str = $_[0]; $code = "NotUTF8:"; $match = "ab\xe5\xb1\xb1cd";
+  if (utf8::is_utf8($str)) {
+    $code = "UTF8:"; utf8::decode($str); $match="ab\x{5c71}cd";
+  }
+  return ($str ne $match ? $code."DIFFER" : $code."ab\x{5ddd}cd");
+$$ LANGUAGE plperl;
+SELECT encode(perl_utf_inout(E'ab\xe5\xb1\xb1cd')::bytea, 'escape')
+        encode         
+-----------------------
+ UTF8:ab\345\267\235cd
+(1 row)
+
diff --git a/src/pl/plperl/expected/plperl_lc_1.out b/src/pl/plperl/expected/plperl_lc_1.out

new file mode 100644 (file)

index 0000000..c454c44
--- /dev/null
+++ b/src/pl/plperl/expected/plperl_lc_1.out
@@ -0,0 +1,41 @@
+--
+-- Make sure strings are validated
+-- Should fail for all encodings, as nul bytes are never permitted.
+--
+CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
+  return "abcd\0efg";
+$$ LANGUAGE plperl;
+SELECT perl_zerob();
+ERROR:  invalid byte sequence for encoding "SQL_ASCII": 0x00
+CONTEXT:  PL/Perl function "perl_zerob"
+CREATE OR REPLACE FUNCTION perl_0x80_in(text) RETURNS BOOL AS $$
+  return ($_[0] eq "abc\x80de" ? "true" : "false");
+$$ LANGUAGE plperl;
+SELECT perl_0x80_in(E'abc\x80de');
+ perl_0x80_in 
+--------------
+ t
+(1 row)
+
+CREATE OR REPLACE FUNCTION perl_0x80_out() RETURNS TEXT AS $$
+  return "abc\x80de";
+$$ LANGUAGE plperl;
+SELECT perl_0x80_out() = E'abc\x80de';
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE OR REPLACE FUNCTION perl_utf_inout(text) RETURNS TEXT AS $$
+  $str = $_[0]; $code = "NotUTF8:"; $match = "ab\xe5\xb1\xb1cd";
+  if (utf8::is_utf8($str)) {
+    $code = "UTF8:"; utf8::decode($str); $match="ab\x{5c71}cd";
+  }
+  return ($str ne $match ? $code."DIFFER" : $code."ab\x{5ddd}cd");
+$$ LANGUAGE plperl;
+SELECT encode(perl_utf_inout(E'ab\xe5\xb1\xb1cd')::bytea, 'escape')
+          encode          
+--------------------------
+ NotUTF8:ab\345\267\235cd
+(1 row)
+
diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h

index 1b6648be1d169daf3869ca4135598af7e37d23ca..ed99194ed1e074826453f2c9e53e42d7ce87c607 100644 (file)
--- a/src/pl/plperl/plperl_helpers.h
+++ b/src/pl/plperl/plperl_helpers.h
@@ -3,21 +3,29 @@
  
  /*
   * convert from utf8 to database encoding
+ *
+ * Returns a palloc'ed copy of the original string
   */
  static inline char *
-utf_u2e(const char *utf8_str, size_t len)
+utf_u2e(char *utf8_str, size_t len)
  {
         int                     enc = GetDatabaseEncoding();
-
-       char       *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc);
+       char       *ret;
  
         /*
-        * when we are a PG_UTF8 or SQL_ASCII database pg_do_encoding_conversion()
-        * will not do any conversion or verification. we need to do it manually
-        * instead.
+        * When we are in a PG_UTF8 or SQL_ASCII database
+        * pg_do_encoding_conversion() will not do any conversion (which is good)
+        * or verification (not so much), so we need to run the verification step
+        * separately.
          */
         if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
-               pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false);
+       {
+               pg_verify_mbstr_len(enc, utf8_str, len, false);
+               ret = utf8_str;
+       }
+       else
+               ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
+                                                                                                len, PG_UTF8, enc);
  
         if (ret == utf8_str)
                 ret = pstrdup(ret);
@@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len)
  
  /*
   * convert from database encoding to utf8
+ *
+ * Returns a palloc'ed copy of the original string
   */
  static inline char *
  utf_e2u(const char *str)
  {
-       char       *ret = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), GetDatabaseEncoding(), PG_UTF8);
+       char       *ret =
+               (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
+                                                                                  GetDatabaseEncoding(), PG_UTF8);
  
         if (ret == str)
                 ret = pstrdup(ret);
@@ -41,6 +53,8 @@ utf_e2u(const char *str)
  
  /*
   * Convert an SV to a char * in the current database encoding
+ *
+ * Returns a palloc'ed copy of the original string
   */
  static inline char *
  sv2cstr(SV *sv)
@@ -51,7 +65,9 @@ sv2cstr(SV *sv)
  
         /*
          * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
-        *
+        */
+
+       /*
          * SvPVutf8() croaks nastily on certain things, like typeglobs and
          * readonly objects such as $^V. That's a perl bug - it's not supposed to
          * happen. To avoid crashing the backend, we make a copy of the sv before
@@ -63,18 +79,27 @@ sv2cstr(SV *sv)
                 (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM))
                 sv = newSVsv(sv);
         else
-
+       {
                 /*
                  * increase the reference count so we can just SvREFCNT_dec() it when
                  * we are done
                  */
                 SvREFCNT_inc_simple_void(sv);
+       }
  
-       val = SvPVutf8(sv, len);
+       /*
+        * Request the string from Perl, in UTF-8 encoding; but if we're in a
+        * SQL_ASCII database, just request the byte soup without trying to make it
+        * UTF8, because that might fail.
+        */
+       if (GetDatabaseEncoding() == PG_SQL_ASCII)
+               val = SvPV(sv, len);
+       else
+               val = SvPVutf8(sv, len);
  
         /*
-        * we use perl's length in the event we had an embedded null byte to
-        * ensure we error out properly
+        * Now convert to database encoding.  We use perl's length in the event we
+        * had an embedded null byte to ensure we error out properly.
          */
         res = utf_u2e(val, len);
  
@@ -88,16 +113,20 @@ sv2cstr(SV *sv)
   * Create a new SV from a string assumed to be in the current database's
   * encoding.
   */
-
  static inline SV *
  cstr2sv(const char *str)
  {
         SV                 *sv;
-       char       *utf8_str = utf_e2u(str);
+       char       *utf8_str;
+
+       /* no conversion when SQL_ASCII */
+       if (GetDatabaseEncoding() == PG_SQL_ASCII)
+               return newSVpv(str, 0);
+
+       utf8_str = utf_e2u(str);
  
         sv = newSVpv(utf8_str, 0);
         SvUTF8_on(sv);
-
         pfree(utf8_str);
  
         return sv;
diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql

index 84af1fd73fb78474b94b27176b463963d0e73a76..a5e3840dac23667ff2d599c46b6e6887cc6875a8 100644 (file)
--- a/src/pl/plperl/sql/plperl.sql
+++ b/src/pl/plperl/sql/plperl.sql
@@ -423,15 +423,6 @@ DO $do$ use strict; my $name = "foo"; my $ref = $$name; $do$ LANGUAGE plperl;
  -- yields "ERROR:  Useless use of sort in scalar context."
  DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl;
  
---
--- Make sure strings are validated
--- Should fail for all encodings, as nul bytes are never permitted.
---
-CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
-  return "abcd\0efg";
-$$ LANGUAGE plperl;
-SELECT perl_zerob();
-
  -- make sure functions marked as VOID without an explicit return work
  CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$
     $_SHARED{myquote} = sub {
diff --git a/src/pl/plperl/sql/plperl_lc.sql b/src/pl/plperl/sql/plperl_lc.sql

new file mode 100644 (file)

index 0000000..fd75bc0
--- /dev/null
+++ b/src/pl/plperl/sql/plperl_lc.sql
@@ -0,0 +1,24 @@
+--
+-- Make sure strings are validated
+-- Should fail for all encodings, as nul bytes are never permitted.
+--
+CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$
+  return "abcd\0efg";
+$$ LANGUAGE plperl;
+SELECT perl_zerob();
+CREATE OR REPLACE FUNCTION perl_0x80_in(text) RETURNS BOOL AS $$
+  return ($_[0] eq "abc\x80de" ? "true" : "false");
+$$ LANGUAGE plperl;
+SELECT perl_0x80_in(E'abc\x80de');
+CREATE OR REPLACE FUNCTION perl_0x80_out() RETURNS TEXT AS $$
+  return "abc\x80de";
+$$ LANGUAGE plperl;
+SELECT perl_0x80_out() = E'abc\x80de';
+CREATE OR REPLACE FUNCTION perl_utf_inout(text) RETURNS TEXT AS $$
+  $str = $_[0]; $code = "NotUTF8:"; $match = "ab\xe5\xb1\xb1cd";
+  if (utf8::is_utf8($str)) {
+    $code = "UTF8:"; utf8::decode($str); $match="ab\x{5c71}cd";
+  }
+  return ($str ne $match ? $code."DIFFER" : $code."ab\x{5ddd}cd");
+$$ LANGUAGE plperl;
+SELECT encode(perl_utf_inout(E'ab\xe5\xb1\xb1cd')::bytea, 'escape')
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Mon, 9 Jul 2012 21:36:29 +0000 (17:36 -0400)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Tue, 10 Jul 2012 19:15:16 +0000 (15:15 -0400)
src/pl/plperl/GNUmakefile		patch \| blob \| history
src/pl/plperl/Util.xs		patch \| blob \| history
src/pl/plperl/expected/plperl.out		patch \| blob \| history
src/pl/plperl/expected/plperl_lc.out	[new file with mode: 0644]	patch \| blob
src/pl/plperl/expected/plperl_lc_1.out	[new file with mode: 0644]	patch \| blob
src/pl/plperl/plperl_helpers.h		patch \| blob \| history
src/pl/plperl/sql/plperl.sql		patch \| blob \| history
src/pl/plperl/sql/plperl_lc.sql	[new file with mode: 0644]	patch \| blob