From f3eb76b399efb88882b2984fccdefcea540b2749 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 31 Jul 2018 13:00:07 -0400 Subject: [PATCH] Further fixes for quoted-list GUC values in pg_dump and ruleutils.c. Commits 742869946 et al turn out to be a couple bricks shy of a load. We were dumping the stored values of GUC_LIST_QUOTE variables as they appear in proconfig or setconfig catalog columns. However, although that quoting rule looks a lot like SQL-identifier double quotes, there are two critical differences: empty strings ("") are legal, and depending on which variable you're considering, values longer than NAMEDATALEN might be valid too. So the current technique fails altogether on empty-string list entries (as reported by Steven Winfield in bug #15248) and it also risks truncating file pathnames during dump/reload of GUC values that are lists of pathnames. To fix, split the stored value without any downcasing or truncation, and then emit each element as a SQL string literal. This is a tad annoying, because we now have three copies of the comma-separated-string splitting logic in varlena.c as well as a fourth one in dumputils.c. (Not to mention the randomly-different-from-those splitting logic in libpq...) I looked at unifying these, but it would be rather a mess unless we're willing to tweak the API definitions of SplitIdentifierString, SplitDirectoriesString, or both. That might be worth doing in future; but it seems pretty unsafe for a back-patched bug fix, so for now accept the duplication. Back-patch to all supported branches, as the previous fix was. Discussion: https://postgr.es/m/7585.1529435872@sss.pgh.pa.us --- src/backend/utils/adt/ruleutils.c | 37 +++++-- src/backend/utils/adt/varlena.c | 112 ++++++++++++++++++++++ src/bin/pg_dump/dumputils.c | 144 ++++++++++++++++++++++++++-- src/bin/pg_dump/dumputils.h | 3 + src/bin/pg_dump/pg_dump.c | 30 +++++- src/include/utils/varlena.h | 2 + src/test/regress/expected/rules.out | 26 ++--- src/test/regress/sql/rules.sql | 2 +- 8 files changed, 326 insertions(+), 30 deletions(-) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5a61d2dac0..03e9a28a63 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -2640,14 +2640,39 @@ pg_get_functiondef(PG_FUNCTION_ARGS) /* * Variables that are marked GUC_LIST_QUOTE were already fully * quoted by flatten_set_variable_args() before they were put - * into the proconfig array; we mustn't re-quote them or we'll - * make a mess. Variables that are not so marked should just - * be emitted as simple string literals. If the variable is - * not known to guc.c, we'll do the latter; this makes it - * unsafe to use GUC_LIST_QUOTE for extension variables. + * into the proconfig array. However, because the quoting + * rules used there aren't exactly like SQL's, we have to + * break the list value apart and then quote the elements as + * string literals. (The elements may be double-quoted as-is, + * but we can't just feed them to the SQL parser; it would do + * the wrong thing with elements that are zero-length or + * longer than NAMEDATALEN.) + * + * Variables that are not so marked should just be emitted as + * simple string literals. If the variable is not known to + * guc.c, we'll do that; this makes it unsafe to use + * GUC_LIST_QUOTE for extension variables. */ if (GetConfigOptionFlags(configitem, true) & GUC_LIST_QUOTE) - appendStringInfoString(&buf, pos); + { + List *namelist; + ListCell *lc; + + /* Parse string into list of identifiers */ + if (!SplitGUCList(pos, ',', &namelist)) + { + /* this shouldn't fail really */ + elog(ERROR, "invalid list syntax in proconfig item"); + } + foreach(lc, namelist) + { + char *curname = (char *) lfirst(lc); + + simple_quote_literal(&buf, curname); + if (lnext(lc)) + appendStringInfoString(&buf, ", "); + } + } else simple_quote_literal(&buf, pos); appendStringInfoChar(&buf, '\n'); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 31eaa92c3b..a5e812d026 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3503,6 +3503,118 @@ SplitDirectoriesString(char *rawstring, char separator, } +/* + * SplitGUCList --- parse a string containing identifiers or file names + * + * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without + * presuming whether the elements will be taken as identifiers or file names. + * We assume the input has already been through flatten_set_variable_args(), + * so that we need never downcase (if appropriate, that was done already). + * Nor do we ever truncate, since we don't know the correct max length. + * We disallow embedded whitespace for simplicity (it shouldn't matter, + * because any embedded whitespace should have led to double-quoting). + * Otherwise the API is identical to SplitIdentifierString. + * + * XXX it's annoying to have so many copies of this string-splitting logic. + * However, it's not clear that having one function with a bunch of option + * flags would be much better. + * + * XXX there is a version of this function in src/bin/pg_dump/dumputils.c. + * Be sure to update that if you have to change this. + * + * Inputs: + * rawstring: the input string; must be overwritable! On return, it's + * been modified to contain the separated identifiers. + * separator: the separator punctuation expected between identifiers + * (typically '.' or ','). Whitespace may also appear around + * identifiers. + * Outputs: + * namelist: filled with a palloc'd list of pointers to identifiers within + * rawstring. Caller should list_free() this even on error return. + * + * Returns true if okay, false if there is a syntax error in the string. + */ +bool +SplitGUCList(char *rawstring, char separator, + List **namelist) +{ + char *nextp = rawstring; + bool done = false; + + *namelist = NIL; + + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new identifier. */ + do + { + char *curname; + char *endp; + + if (*nextp == '"') + { + /* Quoted name --- collapse quote-quote pairs */ + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + return false; /* mismatched quotes */ + if (endp[1] != '"') + break; /* found end of quoted name */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + /* endp now points at the terminating quote */ + nextp = endp + 1; + } + else + { + /* Unquoted name --- extends to separator or whitespace */ + curname = nextp; + while (*nextp && *nextp != separator && + !scanner_isspace(*nextp)) + nextp++; + endp = nextp; + if (curname == nextp) + return false; /* empty unquoted name not allowed */ + } + + while (scanner_isspace(*nextp)) + nextp++; /* skip trailing whitespace */ + + if (*nextp == separator) + { + nextp++; + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace for next */ + /* we expect another name, so done remains false */ + } + else if (*nextp == '\0') + done = true; + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* + * Finished isolating current name --- add it to list + */ + *namelist = lappend(*namelist, curname); + + /* Loop back if we didn't reach end of string */ + } while (!done); + + return true; +} + + /***************************************************************************** * Comparison Functions used for bytea * diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c index 875545f699..8a93ace9fa 100644 --- a/src/bin/pg_dump/dumputils.c +++ b/src/bin/pg_dump/dumputils.c @@ -14,6 +14,8 @@ */ #include "postgres_fe.h" +#include + #include "dumputils.h" #include "fe_utils/string_utils.h" @@ -873,6 +875,115 @@ variable_is_guc_list_quote(const char *name) return false; } +/* + * SplitGUCList --- parse a string containing identifiers or file names + * + * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without + * presuming whether the elements will be taken as identifiers or file names. + * See comparable code in src/backend/utils/adt/varlena.c. + * + * Inputs: + * rawstring: the input string; must be overwritable! On return, it's + * been modified to contain the separated identifiers. + * separator: the separator punctuation expected between identifiers + * (typically '.' or ','). Whitespace may also appear around + * identifiers. + * Outputs: + * namelist: receives a malloc'd, null-terminated array of pointers to + * identifiers within rawstring. Caller should free this + * even on error return. + * + * Returns true if okay, false if there is a syntax error in the string. + */ +bool +SplitGUCList(char *rawstring, char separator, + char ***namelist) +{ + char *nextp = rawstring; + bool done = false; + char **nextptr; + + /* + * Since we disallow empty identifiers, this is a conservative + * overestimate of the number of pointers we could need. Allow one for + * list terminator. + */ + *namelist = nextptr = (char **) + pg_malloc((strlen(rawstring) / 2 + 2) * sizeof(char *)); + *nextptr = NULL; + + while (isspace((unsigned char) *nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new identifier. */ + do + { + char *curname; + char *endp; + + if (*nextp == '"') + { + /* Quoted name --- collapse quote-quote pairs */ + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '"'); + if (endp == NULL) + return false; /* mismatched quotes */ + if (endp[1] != '"') + break; /* found end of quoted name */ + /* Collapse adjacent quotes into one quote, and look again */ + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + /* endp now points at the terminating quote */ + nextp = endp + 1; + } + else + { + /* Unquoted name --- extends to separator or whitespace */ + curname = nextp; + while (*nextp && *nextp != separator && + !isspace((unsigned char) *nextp)) + nextp++; + endp = nextp; + if (curname == nextp) + return false; /* empty unquoted name not allowed */ + } + + while (isspace((unsigned char) *nextp)) + nextp++; /* skip trailing whitespace */ + + if (*nextp == separator) + { + nextp++; + while (isspace((unsigned char) *nextp)) + nextp++; /* skip leading whitespace for next */ + /* we expect another name, so done remains false */ + } + else if (*nextp == '\0') + done = true; + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* + * Finished isolating current name --- add it to output array + */ + *nextptr++ = curname; + + /* Loop back if we didn't reach end of string */ + } while (!done); + + *nextptr = NULL; + return true; +} + /* * Helper function for dumping "ALTER DATABASE/ROLE SET ..." commands. * @@ -912,14 +1023,35 @@ makeAlterConfigCommand(PGconn *conn, const char *configitem, /* * Variables that are marked GUC_LIST_QUOTE were already fully quoted by * flatten_set_variable_args() before they were put into the setconfig - * array; we mustn't re-quote them or we'll make a mess. Variables that - * are not so marked should just be emitted as simple string literals. If - * the variable is not known to variable_is_guc_list_quote(), we'll do the - * latter; this makes it unsafe to use GUC_LIST_QUOTE for extension - * variables. + * array. However, because the quoting rules used there aren't exactly + * like SQL's, we have to break the list value apart and then quote the + * elements as string literals. (The elements may be double-quoted as-is, + * but we can't just feed them to the SQL parser; it would do the wrong + * thing with elements that are zero-length or longer than NAMEDATALEN.) + * + * Variables that are not so marked should just be emitted as simple + * string literals. If the variable is not known to + * variable_is_guc_list_quote(), we'll do that; this makes it unsafe to + * use GUC_LIST_QUOTE for extension variables. */ if (variable_is_guc_list_quote(mine)) - appendPQExpBufferStr(buf, pos); + { + char **namelist; + char **nameptr; + + /* Parse string into list of identifiers */ + /* this shouldn't fail really */ + if (SplitGUCList(pos, ',', &namelist)) + { + for (nameptr = namelist; *nameptr; nameptr++) + { + if (nameptr != namelist) + appendPQExpBufferStr(buf, ", "); + appendStringLiteralConn(buf, *nameptr, conn); + } + } + pg_free(namelist); + } else appendStringLiteralConn(buf, pos, conn); diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h index 7bd3e1d5eb..0043124677 100644 --- a/src/bin/pg_dump/dumputils.h +++ b/src/bin/pg_dump/dumputils.h @@ -58,6 +58,9 @@ extern void buildACLQueries(PQExpBuffer acl_subquery, PQExpBuffer racl_subquery, extern bool variable_is_guc_list_quote(const char *name); +extern bool SplitGUCList(char *rawstring, char separator, + char ***namelist); + extern void makeAlterConfigCommand(PGconn *conn, const char *configitem, const char *type, const char *name, const char *type2, const char *name2, diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 31ebd8b101..20e8aedb19 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -11964,14 +11964,36 @@ dumpFunc(Archive *fout, FuncInfo *finfo) /* * Variables that are marked GUC_LIST_QUOTE were already fully quoted * by flatten_set_variable_args() before they were put into the - * proconfig array; we mustn't re-quote them or we'll make a mess. + * proconfig array. However, because the quoting rules used there + * aren't exactly like SQL's, we have to break the list value apart + * and then quote the elements as string literals. (The elements may + * be double-quoted as-is, but we can't just feed them to the SQL + * parser; it would do the wrong thing with elements that are + * zero-length or longer than NAMEDATALEN.) + * * Variables that are not so marked should just be emitted as simple * string literals. If the variable is not known to - * variable_is_guc_list_quote(), we'll do the latter; this makes it - * unsafe to use GUC_LIST_QUOTE for extension variables. + * variable_is_guc_list_quote(), we'll do that; this makes it unsafe + * to use GUC_LIST_QUOTE for extension variables. */ if (variable_is_guc_list_quote(configitem)) - appendPQExpBufferStr(q, pos); + { + char **namelist; + char **nameptr; + + /* Parse string into list of identifiers */ + /* this shouldn't fail really */ + if (SplitGUCList(pos, ',', &namelist)) + { + for (nameptr = namelist; *nameptr; nameptr++) + { + if (nameptr != namelist) + appendPQExpBufferStr(q, ", "); + appendStringLiteralAH(q, *nameptr, fout); + } + } + pg_free(namelist); + } else appendStringLiteralAH(q, pos, fout); } diff --git a/src/include/utils/varlena.h b/src/include/utils/varlena.h index 90e131a3a0..c776931bc4 100644 --- a/src/include/utils/varlena.h +++ b/src/include/utils/varlena.h @@ -31,6 +31,8 @@ extern bool SplitIdentifierString(char *rawstring, char separator, List **namelist); extern bool SplitDirectoriesString(char *rawstring, char separator, List **namelist); +extern bool SplitGUCList(char *rawstring, char separator, + List **namelist); extern text *replace_text_regexp(text *src_text, void *regexp, text *replace_text, bool glob); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 744d501e31..078129f251 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -3160,21 +3160,21 @@ CREATE FUNCTION func_with_set_params() RETURNS integer SET extra_float_digits TO 2 SET work_mem TO '4MB' SET datestyle to iso, mdy - SET local_preload_libraries TO "Mixed/Case", 'c:/"a"/path' + SET local_preload_libraries TO "Mixed/Case", 'c:/''a"/path', '', '0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789' IMMUTABLE STRICT; SELECT pg_get_functiondef('func_with_set_params()'::regprocedure); - pg_get_functiondef ---------------------------------------------------------------- - CREATE OR REPLACE FUNCTION public.func_with_set_params() + - RETURNS integer + - LANGUAGE sql + - IMMUTABLE STRICT + - SET search_path TO pg_catalog + - SET extra_float_digits TO '2' + - SET work_mem TO '4MB' + - SET "DateStyle" TO 'iso, mdy' + - SET local_preload_libraries TO "Mixed/Case", "c:/""a""/path"+ - AS $function$select 1;$function$ + + pg_get_functiondef +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + CREATE OR REPLACE FUNCTION public.func_with_set_params() + + RETURNS integer + + LANGUAGE sql + + IMMUTABLE STRICT + + SET search_path TO 'pg_catalog' + + SET extra_float_digits TO '2' + + SET work_mem TO '4MB' + + SET "DateStyle" TO 'iso, mdy' + + SET local_preload_libraries TO 'Mixed/Case', 'c:/''a"/path', '', '0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789'+ + AS $function$select 1;$function$ + (1 row) diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql index 3ca4c07356..f4ee30ec8f 100644 --- a/src/test/regress/sql/rules.sql +++ b/src/test/regress/sql/rules.sql @@ -1164,7 +1164,7 @@ CREATE FUNCTION func_with_set_params() RETURNS integer SET extra_float_digits TO 2 SET work_mem TO '4MB' SET datestyle to iso, mdy - SET local_preload_libraries TO "Mixed/Case", 'c:/"a"/path' + SET local_preload_libraries TO "Mixed/Case", 'c:/''a"/path', '', '0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789' IMMUTABLE STRICT; SELECT pg_get_functiondef('func_with_set_params()'::regprocedure); -- 2.40.0