From b631a46ed83b7eebf5cde16b41d842596cbcc69d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Sep 2015 10:52:22 -0400 Subject: [PATCH] Fix plperl to handle non-ASCII error message texts correctly. We were passing error message texts to croak() verbatim, which turns out not to work if the text contains non-ASCII characters; Perl mangles their encoding, as reported in bug #13638 from Michal Leinweber. To fix, convert the text into a UTF8-encoded SV first. It's hard to test this without risking failures in different database encodings; but we can follow the lead of plpython, which is already assuming that no-break space (U+00A0) has an equivalent in all encodings we care about running the regression tests in (cf commit 2dfa15de5). Back-patch to 9.1. The code is quite different in 9.0, and anyway it seems too risky to put something like this into 9.0's final minor release. Alex Hunsaker, with suggestions from Tim Bunce and Tom Lane --- src/pl/plperl/SPI.xs | 2 +- src/pl/plperl/Util.xs | 2 +- src/pl/plperl/expected/plperl_elog.out | 13 ++++++++ src/pl/plperl/expected/plperl_elog_1.out | 13 ++++++++ src/pl/plperl/plperl.c | 12 ++++---- src/pl/plperl/plperl_helpers.h | 38 ++++++++++++++++++++++++ src/pl/plperl/sql/plperl_elog.sql | 15 ++++++++++ 7 files changed, 87 insertions(+), 8 deletions(-) diff --git a/src/pl/plperl/SPI.xs b/src/pl/plperl/SPI.xs index 6b8dcf6299..0447c50df1 100644 --- a/src/pl/plperl/SPI.xs +++ b/src/pl/plperl/SPI.xs @@ -41,7 +41,7 @@ do_plperl_return_next(SV *sv) FlushErrorState(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); } PG_END_TRY(); } diff --git a/src/pl/plperl/Util.xs b/src/pl/plperl/Util.xs index b2e0dfcf75..8c3c47fec9 100644 --- a/src/pl/plperl/Util.xs +++ b/src/pl/plperl/Util.xs @@ -58,7 +58,7 @@ do_util_elog(int level, SV *msg) pfree(cmsg); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); } PG_END_TRY(); } diff --git a/src/pl/plperl/expected/plperl_elog.out b/src/pl/plperl/expected/plperl_elog.out index 3f9449a965..a6d35cb79c 100644 --- a/src/pl/plperl/expected/plperl_elog.out +++ b/src/pl/plperl/expected/plperl_elog.out @@ -97,3 +97,16 @@ NOTICE: caught die 2 (1 row) +-- Test non-ASCII error messages +-- +-- Note: this test case is known to fail if the database encoding is +-- EUC_CN, EUC_JP, EUC_KR, or EUC_TW, for lack of any equivalent to +-- U+00A0 (no-break space) in those encodings. However, testing with +-- plain ASCII data would be rather useless, so we must live with that. +SET client_encoding TO UTF8; +create or replace function error_with_nbsp() returns void language plperl as $$ + elog(ERROR, "this message contains a no-break space"); +$$; +select error_with_nbsp(); +ERROR: this message contains a no-break space at line 2. +CONTEXT: PL/Perl function "error_with_nbsp" diff --git a/src/pl/plperl/expected/plperl_elog_1.out b/src/pl/plperl/expected/plperl_elog_1.out index 34d5d5836d..85aa460ec4 100644 --- a/src/pl/plperl/expected/plperl_elog_1.out +++ b/src/pl/plperl/expected/plperl_elog_1.out @@ -97,3 +97,16 @@ NOTICE: caught die 2 (1 row) +-- Test non-ASCII error messages +-- +-- Note: this test case is known to fail if the database encoding is +-- EUC_CN, EUC_JP, EUC_KR, or EUC_TW, for lack of any equivalent to +-- U+00A0 (no-break space) in those encodings. However, testing with +-- plain ASCII data would be rather useless, so we must live with that. +SET client_encoding TO UTF8; +create or replace function error_with_nbsp() returns void language plperl as $$ + elog(ERROR, "this message contains a no-break space"); +$$; +select error_with_nbsp(); +ERROR: this message contains a no-break space at line 2. +CONTEXT: PL/Perl function "error_with_nbsp" diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 296d17dbbb..65f2d242a0 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -3066,7 +3066,7 @@ plperl_spi_exec(char *query, int limit) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; @@ -3299,7 +3299,7 @@ plperl_spi_query(char *query) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; @@ -3385,7 +3385,7 @@ plperl_spi_fetchrow(char *cursor) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; @@ -3560,7 +3560,7 @@ plperl_spi_prepare(char *query, int argc, SV **argv) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; @@ -3701,7 +3701,7 @@ plperl_spi_exec_prepared(char *query, HV *attr, int argc, SV **argv) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; @@ -3830,7 +3830,7 @@ plperl_spi_query_prepared(char *query, int argc, SV **argv) SPI_restore_connection(); /* Punt the error to Perl */ - croak("%s", edata->message); + croak_cstr(edata->message); /* Can't get here, but keep compiler quiet */ return NULL; diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h index fab0a7ba08..f8aa06835c 100644 --- a/src/pl/plperl/plperl_helpers.h +++ b/src/pl/plperl/plperl_helpers.h @@ -123,4 +123,42 @@ cstr2sv(const char *str) return sv; } +/* + * croak() with specified message, which is given in the database encoding. + * + * Ideally we'd just write croak("%s", str), but plain croak() does not play + * nice with non-ASCII data. In modern Perl versions we can call cstr2sv() + * and pass the result to croak_sv(); in versions that don't have croak_sv(), + * we have to work harder. + */ +static inline void +croak_cstr(const char *str) +{ +#ifdef croak_sv + /* Use sv_2mortal() to be sure the transient SV gets freed */ + croak_sv(sv_2mortal(cstr2sv(str))); +#else + + /* + * The older way to do this is to assign a UTF8-marked value to ERRSV and + * then call croak(NULL). But if we leave it to croak() to append the + * error location, it does so too late (only after popping the stack) in + * some Perl versions. Hence, use mess() to create an SV with the error + * location info already appended. + */ + SV *errsv = get_sv("@", GV_ADD); + char *utf8_str = utf_e2u(str); + SV *ssv; + + ssv = mess("%s", utf8_str); + SvUTF8_on(ssv); + + pfree(utf8_str); + + sv_setsv(errsv, ssv); + + croak(NULL); +#endif /* croak_sv */ +} + #endif /* PL_PERL_HELPERS_H */ diff --git a/src/pl/plperl/sql/plperl_elog.sql b/src/pl/plperl/sql/plperl_elog.sql index 032fd8b8ba..9ea1350069 100644 --- a/src/pl/plperl/sql/plperl_elog.sql +++ b/src/pl/plperl/sql/plperl_elog.sql @@ -76,3 +76,18 @@ return $a + $b; $$; select indirect_die_caller(); + +-- Test non-ASCII error messages +-- +-- Note: this test case is known to fail if the database encoding is +-- EUC_CN, EUC_JP, EUC_KR, or EUC_TW, for lack of any equivalent to +-- U+00A0 (no-break space) in those encodings. However, testing with +-- plain ASCII data would be rather useless, so we must live with that. + +SET client_encoding TO UTF8; + +create or replace function error_with_nbsp() returns void language plperl as $$ + elog(ERROR, "this message contains a no-break space"); +$$; + +select error_with_nbsp(); -- 2.40.0