From: Andrew Dunstan Date: Sat, 26 Nov 2011 17:16:27 +0000 (-0500) Subject: Ensure plperl strings are always correctly UTF8 encoded. X-Git-Tag: REL9_1_2~19 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e75d41f0c3e69fefe24a98514b897716d60a83dd;p=postgresql Ensure plperl strings are always correctly UTF8 encoded. Amit Khandekar and Alex Hunsaker. Backpatched to 9.1 where the problem first occurred. --- diff --git a/src/pl/plperl/GNUmakefile b/src/pl/plperl/GNUmakefile index aa44827bfa..58d5f357b4 100644 --- a/src/pl/plperl/GNUmakefile +++ b/src/pl/plperl/GNUmakefile @@ -57,7 +57,7 @@ PSQLDIR = $(bindir) include $(top_srcdir)/src/Makefile.shlib -plperl.o: perlchunks.h plperl_opmask.h +plperl.o: perlchunks.h plperl_opmask.h plperl_helpers.h plperl_opmask.h: plperl_opmask.pl @if [ x"$(perl_privlibexp)" = x"" ]; then echo "configure switch --with-perl was not specified."; exit 1; fi diff --git a/src/pl/plperl/expected/plperl.out b/src/pl/plperl/expected/plperl.out index 906dc15e0c..6e2d5a6f7e 100644 --- a/src/pl/plperl/expected/plperl.out +++ b/src/pl/plperl/expected/plperl.out @@ -650,6 +650,16 @@ CONTEXT: PL/Perl anonymous code block DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; ERROR: Useless use of sort in scalar context at line 1. CONTEXT: PL/Perl anonymous code block +-- +-- Make sure strings are validated +-- Should fail for all encodings, as nul bytes are never permitted. +-- +CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ + return "abcd\0efg"; +$$ LANGUAGE plperlu; +SELECT perl_zerob(); +ERROR: invalid byte sequence for encoding "UTF8": 0x00 +CONTEXT: PL/Perl function "perl_zerob" -- make sure functions marked as VOID without an explicit return work CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ $_SHARED{myquote} = sub { diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h index 81c177b164..ac0a97d7aa 100644 --- a/src/pl/plperl/plperl_helpers.h +++ b/src/pl/plperl/plperl_helpers.h @@ -7,10 +7,21 @@ static inline char * utf_u2e(const char *utf8_str, size_t len) { - char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, GetDatabaseEncoding()); + int enc = GetDatabaseEncoding(); + + char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc); + + /* + * when we are a PG_UTF8 or SQL_ASCII database + * pg_do_encoding_conversion() will not do any conversion or + * verification. we need to do it manually instead. + */ + if (enc == PG_UTF8 || enc == PG_SQL_ASCII) + pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false); if (ret == utf8_str) ret = pstrdup(ret); + return ret; } diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql index a5e3840dac..ecb59cdb77 100644 --- a/src/pl/plperl/sql/plperl.sql +++ b/src/pl/plperl/sql/plperl.sql @@ -423,6 +423,15 @@ DO $do$ use strict; my $name = "foo"; my $ref = $$name; $do$ LANGUAGE plperl; -- yields "ERROR: Useless use of sort in scalar context." DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; +-- +-- Make sure strings are validated +-- Should fail for all encodings, as nul bytes are never permitted. +-- +CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ + return "abcd\0efg"; +$$ LANGUAGE plperlu; +SELECT perl_zerob(); + -- make sure functions marked as VOID without an explicit return work CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ $_SHARED{myquote} = sub {