From a0b2bb8ced532daf8ea173d280334109fe87e88a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 19 Jul 2007 20:34:34 +0000 Subject: [PATCH] Make replace(), split_part(), and string_to_array() behave somewhat sanely when handed an invalidly-encoded pattern. The previous coding could get into an infinite loop if pg_mb2wchar_with_len() returned a zero-length string after we'd tested for nonempty pattern; which is exactly what it will do if the string consists only of an incomplete multibyte character. This led to either an out-of-memory error or a backend crash depending on platform. Per report from Wiktor Wodecki. --- src/backend/utils/adt/varlena.c | 68 ++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 26ba15b189..7c4e313217 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.139.2.4 2006/10/07 00:11:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.139.2.5 2007/07/19 20:34:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2076,8 +2076,8 @@ replace_text(PG_FUNCTION_ARGS) text *src_text = PG_GETARG_TEXT_P(0); text *from_sub_text = PG_GETARG_TEXT_P(1); text *to_sub_text = PG_GETARG_TEXT_P(2); - int src_text_len = TEXTLEN(src_text); - int from_sub_text_len = TEXTLEN(from_sub_text); + int src_text_len; + int from_sub_text_len; TextPositionState state; text *chunk_text; text *ret_text; @@ -2085,11 +2085,22 @@ replace_text(PG_FUNCTION_ARGS) int curr_posn; StringInfoData str; - if (src_text_len == 0 || from_sub_text_len == 0) - PG_RETURN_TEXT_P(src_text); - text_position_setup(src_text, from_sub_text, &state); + /* + * Note: we check the converted string length, not the original, because + * they could be different if the input contained invalid encoding. + */ + src_text_len = state.len1; + from_sub_text_len = state.len2; + + /* Return unmodified source string if empty source or pattern */ + if (src_text_len < 1 || from_sub_text_len < 1) + { + text_position_cleanup(&state); + PG_RETURN_TEXT_P(src_text); + } + start_posn = 1; curr_posn = text_position_next(1, &state); @@ -2104,6 +2115,9 @@ replace_text(PG_FUNCTION_ARGS) do { + CHECK_FOR_INTERRUPTS(); + + /* copy the data skipped over by last text_position_next() */ chunk_text = text_substring(PointerGetDatum(src_text), start_posn, curr_posn - start_posn, @@ -2396,8 +2410,8 @@ split_text(PG_FUNCTION_ARGS) text *inputstring = PG_GETARG_TEXT_P(0); text *fldsep = PG_GETARG_TEXT_P(1); int fldnum = PG_GETARG_INT32(2); - int inputstring_len = TEXTLEN(inputstring); - int fldsep_len = TEXTLEN(fldsep); + int inputstring_len; + int fldsep_len; TextPositionState state; int start_posn; int end_posn; @@ -2409,13 +2423,26 @@ split_text(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("field position must be greater than zero"))); + text_position_setup(inputstring, fldsep, &state); + + /* + * Note: we check the converted string length, not the original, because + * they could be different if the input contained invalid encoding. + */ + inputstring_len = state.len1; + fldsep_len = state.len2; + /* return empty string for empty input string */ if (inputstring_len < 1) + { + text_position_cleanup(&state); PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); + } /* empty field separator */ if (fldsep_len < 1) { + text_position_cleanup(&state); /* if first field, return input string, else empty string */ if (fldnum == 1) PG_RETURN_TEXT_P(inputstring); @@ -2423,8 +2450,6 @@ split_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(PG_STR_GET_TEXT("")); } - text_position_setup(inputstring, fldsep, &state); - /* identify bounds of first field */ start_posn = 1; end_posn = text_position_next(1, &state); @@ -2484,8 +2509,8 @@ text_to_array(PG_FUNCTION_ARGS) { text *inputstring = PG_GETARG_TEXT_P(0); text *fldsep = PG_GETARG_TEXT_P(1); - int inputstring_len = TEXTLEN(inputstring); - int fldsep_len = TEXTLEN(fldsep); + int inputstring_len; + int fldsep_len; TextPositionState state; int fldnum; int start_posn; @@ -2493,23 +2518,38 @@ text_to_array(PG_FUNCTION_ARGS) text *result_text; ArrayBuildState *astate = NULL; + text_position_setup(inputstring, fldsep, &state); + + /* + * Note: we check the converted string length, not the original, because + * they could be different if the input contained invalid encoding. + */ + inputstring_len = state.len1; + fldsep_len = state.len2; + /* return NULL for empty input string */ if (inputstring_len < 1) + { + text_position_cleanup(&state); PG_RETURN_NULL(); + } /* * empty field separator return one element, 1D, array using the input * string */ if (fldsep_len < 1) + { + text_position_cleanup(&state); PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID, PointerGetDatum(inputstring), 1)); - - text_position_setup(inputstring, fldsep, &state); + } start_posn = 1; for (fldnum = 1;; fldnum++) /* field number is 1 based */ { + CHECK_FOR_INTERRUPTS(); + end_posn = text_position_next(start_posn, &state); if (end_posn == 0) -- 2.50.0