1 /*-------------------------------------------------------------------------
3 * String-processing utility routines for frontend code
5 * Assorted utility functions that are useful in constructing SQL queries
6 * and interpreting backend output.
9 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
12 * src/fe_utils/string_utils.c
14 *-------------------------------------------------------------------------
16 #include "postgres_fe.h"
20 #include "fe_utils/string_utils.h"
22 #include "common/keywords.h"
25 static PQExpBuffer defaultGetLocalPQExpBuffer(void);
27 /* Globals exported by this file */
28 int quote_all_identifiers = 0;
29 PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
33 * Returns a temporary PQExpBuffer, valid until the next call to the function.
34 * This is used by fmtId and fmtQualifiedId.
36 * Non-reentrant and non-thread-safe but reduces memory leakage. You can
37 * replace this with a custom version by setting the getLocalPQExpBuffer
41 defaultGetLocalPQExpBuffer(void)
43 static PQExpBuffer id_return = NULL;
45 if (id_return) /* first time through? */
47 /* same buffer, just wipe contents */
48 resetPQExpBuffer(id_return);
53 id_return = createPQExpBuffer();
60 * Quotes input string if it's not a legitimate SQL identifier as-is.
62 * Note that the returned string must be used before calling fmtId again,
63 * since we re-use the same return buffer each time.
66 fmtId(const char *rawid)
68 PQExpBuffer id_return = getLocalPQExpBuffer();
71 bool need_quotes = false;
74 * These checks need to match the identifier production in scan.l. Don't
77 if (quote_all_identifiers)
79 /* slightly different rules for first character */
80 else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
84 /* otherwise check the entire string */
85 for (cp = rawid; *cp; cp++)
87 if (!((*cp >= 'a' && *cp <= 'z')
88 || (*cp >= '0' && *cp <= '9')
100 * Check for keyword. We quote keywords except for unreserved ones.
101 * (In some cases we could avoid quoting a col_name or type_func_name
102 * keyword, but it seems much harder than it's worth to tell that.)
104 * Note: ScanKeywordLookup() does case-insensitive comparison, but
105 * that's fine, since we already know we have all-lower-case.
107 const ScanKeyword *keyword = ScanKeywordLookup(rawid,
111 if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
117 /* no quoting needed */
118 appendPQExpBufferStr(id_return, rawid);
122 appendPQExpBufferChar(id_return, '"');
123 for (cp = rawid; *cp; cp++)
126 * Did we find a double-quote in the string? Then make this a
127 * double double-quote per SQL99. Before, we put in a
128 * backslash/double-quote pair. - thomas 2000-08-05
131 appendPQExpBufferChar(id_return, '"');
132 appendPQExpBufferChar(id_return, *cp);
134 appendPQExpBufferChar(id_return, '"');
137 return id_return->data;
141 * fmtQualifiedId - convert a qualified name to the proper format for
142 * the source database.
144 * Like fmtId, use the result before calling again.
146 * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
147 * use that buffer until we're finished with calling fmtId().
150 fmtQualifiedId(int remoteVersion, const char *schema, const char *id)
152 PQExpBuffer id_return;
153 PQExpBuffer lcl_pqexp = createPQExpBuffer();
155 /* Suppress schema name if fetching from pre-7.3 DB */
156 if (remoteVersion >= 70300 && schema && *schema)
158 appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
160 appendPQExpBufferStr(lcl_pqexp, fmtId(id));
162 id_return = getLocalPQExpBuffer();
164 appendPQExpBufferStr(id_return, lcl_pqexp->data);
165 destroyPQExpBuffer(lcl_pqexp);
167 return id_return->data;
172 * Format a Postgres version number (in the PG_VERSION_NUM integer format
173 * returned by PQserverVersion()) as a string. This exists mainly to
174 * encapsulate knowledge about two-part vs. three-part version numbers.
176 * For reentrancy, caller must supply the buffer the string is put in.
177 * Recommended size of the buffer is 32 bytes.
179 * Returns address of 'buf', as a notational convenience.
182 formatPGVersionNumber(int version_number, bool include_minor,
183 char *buf, size_t buflen)
185 if (version_number >= 100000)
187 /* New two-part style */
189 snprintf(buf, buflen, "%d.%d", version_number / 10000,
190 version_number % 10000);
192 snprintf(buf, buflen, "%d", version_number / 10000);
196 /* Old three-part style */
198 snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
199 (version_number / 100) % 100,
200 version_number % 100);
202 snprintf(buf, buflen, "%d.%d", version_number / 10000,
203 (version_number / 100) % 100);
210 * Convert a string value to an SQL string literal and append it to
211 * the given buffer. We assume the specified client_encoding and
212 * standard_conforming_strings settings.
214 * This is essentially equivalent to libpq's PQescapeStringInternal,
215 * except for the output buffer structure. We need it in situations
216 * where we do not have a PGconn available. Where we do,
217 * appendStringLiteralConn is a better choice.
220 appendStringLiteral(PQExpBuffer buf, const char *str,
221 int encoding, bool std_strings)
223 size_t length = strlen(str);
224 const char *source = str;
227 if (!enlargePQExpBuffer(buf, 2 * length + 2))
230 target = buf->data + buf->len;
233 while (*source != '\0')
239 /* Fast path for plain ASCII */
240 if (!IS_HIGHBIT_SET(c))
242 /* Apply quoting if needed */
243 if (SQL_STR_DOUBLE(c, !std_strings))
245 /* Copy the character */
251 /* Slow path for possible multibyte characters */
252 len = PQmblen(source, encoding);
254 /* Copy the character */
255 for (i = 0; i < len; i++)
259 *target++ = *source++;
263 * If we hit premature end of string (ie, incomplete multibyte
264 * character), try to pad out to the correct length with spaces. We
265 * may not be able to pad completely, but we will always be able to
266 * insert at least one pad space (since we'd not have quoted a
267 * multibyte character). This should be enough to make a string that
268 * the server will error out on.
272 char *stop = buf->data + buf->maxlen - 2;
284 /* Write the terminating quote and NUL character. */
288 buf->len = target - buf->data;
293 * Convert a string value to an SQL string literal and append it to
294 * the given buffer. Encoding and string syntax rules are as indicated
295 * by current settings of the PGconn.
298 appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
300 size_t length = strlen(str);
303 * XXX This is a kluge to silence escape_string_warning in our utility
304 * programs. It should go away someday.
306 if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
308 /* ensure we are not adjacent to an identifier */
309 if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
310 appendPQExpBufferChar(buf, ' ');
311 appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
312 appendStringLiteral(buf, str, PQclientEncoding(conn), false);
317 if (!enlargePQExpBuffer(buf, 2 * length + 2))
319 appendPQExpBufferChar(buf, '\'');
320 buf->len += PQescapeStringConn(conn, buf->data + buf->len,
322 appendPQExpBufferChar(buf, '\'');
327 * Convert a string value to a dollar quoted literal and append it to
328 * the given buffer. If the dqprefix parameter is not NULL then the
329 * dollar quote delimiter will begin with that (after the opening $).
331 * No escaping is done at all on str, in compliance with the rules
332 * for parsing dollar quoted strings. Also, we need not worry about
336 appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
338 static const char suffixes[] = "_XXXXXXX";
340 PQExpBuffer delimBuf = createPQExpBuffer();
342 /* start with $ + dqprefix if not NULL */
343 appendPQExpBufferChar(delimBuf, '$');
345 appendPQExpBufferStr(delimBuf, dqprefix);
348 * Make sure we choose a delimiter which (without the trailing $) is not
349 * present in the string being quoted. We don't check with the trailing $
350 * because a string ending in $foo must not be quoted with $foo$.
352 while (strstr(str, delimBuf->data) != NULL)
354 appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
355 nextchar %= sizeof(suffixes) - 1;
359 appendPQExpBufferChar(delimBuf, '$');
361 /* quote it and we are all done */
362 appendPQExpBufferStr(buf, delimBuf->data);
363 appendPQExpBufferStr(buf, str);
364 appendPQExpBufferStr(buf, delimBuf->data);
366 destroyPQExpBuffer(delimBuf);
371 * Convert a bytea value (presented as raw bytes) to an SQL string literal
372 * and append it to the given buffer. We assume the specified
373 * standard_conforming_strings setting.
375 * This is needed in situations where we do not have a PGconn available.
376 * Where we do, PQescapeByteaConn is a better choice.
379 appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
382 const unsigned char *source = str;
385 static const char hextbl[] = "0123456789abcdef";
388 * This implementation is hard-wired to produce hex-format output. We do
389 * not know the server version the output will be loaded into, so making
390 * an intelligent format choice is impossible. It might be better to
391 * always use the old escaped format.
393 if (!enlargePQExpBuffer(buf, 2 * length + 5))
396 target = buf->data + buf->len;
405 unsigned char c = *source++;
407 *target++ = hextbl[(c >> 4) & 0xF];
408 *target++ = hextbl[c & 0xF];
411 /* Write the terminating quote and NUL character. */
415 buf->len = target - buf->data;
420 * Append the given string to the shell command being built in the buffer,
421 * with shell-style quoting as needed to create exactly one argument.
423 * Forbid LF or CR characters, which have scant practical use beyond designing
424 * security breaches. The Windows command shell is unusable as a conduit for
425 * arguments containing LF or CR characters. A future major release should
426 * reject those characters in CREATE ROLE and CREATE DATABASE, because use
427 * there eventually leads to errors here.
430 appendShellString(PQExpBuffer buf, const char *str)
433 int backslash_run_length = 0;
438 * Don't bother with adding quotes if the string is nonempty and clearly
439 * contains only safe characters.
442 strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
444 appendPQExpBufferStr(buf, str);
449 appendPQExpBufferChar(buf, '\'');
450 for (p = str; *p; p++)
452 if (*p == '\n' || *p == '\r')
455 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
461 appendPQExpBufferStr(buf, "'\"'\"'");
463 appendPQExpBufferChar(buf, *p);
465 appendPQExpBufferChar(buf, '\'');
469 * A Windows system() argument experiences two layers of interpretation.
470 * First, cmd.exe interprets the string. Its behavior is undocumented,
471 * but a caret escapes any byte except LF or CR that would otherwise have
472 * special meaning. Handling of a caret before LF or CR differs between
473 * "cmd.exe /c" and other modes, and it is unusable here.
475 * Second, the new process parses its command line to construct argv (see
476 * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
477 * backslash-double quote sequences specially.
479 appendPQExpBufferStr(buf, "^\"");
480 for (p = str; *p; p++)
482 if (*p == '\n' || *p == '\r')
485 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
490 /* Change N backslashes before a double quote to 2N+1 backslashes. */
493 while (backslash_run_length)
495 appendPQExpBufferStr(buf, "^\\");
496 backslash_run_length--;
498 appendPQExpBufferStr(buf, "^\\");
501 backslash_run_length++;
503 backslash_run_length = 0;
506 * Decline to caret-escape the most mundane characters, to ease
507 * debugging and lest we approach the command length limit.
509 if (!((*p >= 'a' && *p <= 'z') ||
510 (*p >= 'A' && *p <= 'Z') ||
511 (*p >= '0' && *p <= '9')))
512 appendPQExpBufferChar(buf, '^');
513 appendPQExpBufferChar(buf, *p);
517 * Change N backslashes at end of argument to 2N backslashes, because they
518 * precede the double quote that terminates the argument.
520 while (backslash_run_length)
522 appendPQExpBufferStr(buf, "^\\");
523 backslash_run_length--;
525 appendPQExpBufferStr(buf, "^\"");
531 * Append the given string to the buffer, with suitable quoting for passing
532 * the string as a value, in a keyword/pair value in a libpq connection
536 appendConnStrVal(PQExpBuffer buf, const char *str)
542 * If the string is one or more plain ASCII characters, no need to quote
543 * it. This is quite conservative, but better safe than sorry.
546 for (s = str; *s; s++)
548 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
549 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
559 appendPQExpBufferChar(buf, '\'');
562 /* ' and \ must be escaped by to \' and \\ */
563 if (*str == '\'' || *str == '\\')
564 appendPQExpBufferChar(buf, '\\');
566 appendPQExpBufferChar(buf, *str);
569 appendPQExpBufferChar(buf, '\'');
572 appendPQExpBufferStr(buf, str);
577 * Append a psql meta-command that connects to the given database with the
578 * then-current connection's user, host and port.
581 appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
587 * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
588 * For other names, even many not technically requiring it, skip to the
589 * general case. No database has a zero-length name.
592 for (s = dbname; *s; s++)
594 if (*s == '\n' || *s == '\r')
597 _("database name contains a newline or carriage return: \"%s\"\n"),
602 if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
603 (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
609 appendPQExpBufferStr(buf, "\\connect ");
612 PQExpBufferData connstr;
614 initPQExpBuffer(&connstr);
615 appendPQExpBuffer(&connstr, "dbname=");
616 appendConnStrVal(&connstr, dbname);
618 appendPQExpBuffer(buf, "-reuse-previous=on ");
621 * As long as the name does not contain a newline, SQL identifier
622 * quoting satisfies the psql meta-command parser. Prefer not to
623 * involve psql-interpreted single quotes, which behaved differently
624 * before PostgreSQL 9.2.
626 appendPQExpBufferStr(buf, fmtId(connstr.data));
628 termPQExpBuffer(&connstr);
631 appendPQExpBufferStr(buf, fmtId(dbname));
632 appendPQExpBufferChar(buf, '\n');
637 * Deconstruct the text representation of a 1-dimensional Postgres array
638 * into individual items.
640 * On success, returns true and sets *itemarray and *nitems to describe
641 * an array of individual strings. On parse failure, returns false;
642 * *itemarray may exist or be NULL.
644 * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
647 parsePGArray(const char *atext, char ***itemarray, int *nitems)
655 * We expect input in the form of "{item,item,item}" where any item is
656 * either raw data, or surrounded by double quotes (in which case embedded
657 * characters including backslashes and quotes are backslashed).
659 * We build the result as an array of pointers followed by the actual
660 * string data, all in one malloc block for convenience of deallocation.
661 * The worst-case storage need is not more than one pointer and one
662 * character for each input character (consider "{,,,,,,,,,,}").
666 inputlen = strlen(atext);
667 if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
668 return false; /* bad input */
669 items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
671 return false; /* out of memory */
673 strings = (char *) (items + inputlen);
675 atext++; /* advance over initial '{' */
677 while (*atext != '}')
680 return false; /* premature end of string */
681 items[curitem] = strings;
682 while (*atext != '}' && *atext != ',')
685 return false; /* premature end of string */
687 *strings++ = *atext++; /* copy unquoted data */
690 /* process quoted substring */
692 while (*atext != '"')
695 return false; /* premature end of string */
700 return false; /* premature end of string */
702 *strings++ = *atext++; /* copy quoted data */
712 if (atext[1] != '\0')
713 return false; /* bogus syntax (embedded '}') */
720 * Format a reloptions array and append it to the given buffer.
722 * "prefix" is prepended to the option names; typically it's "" or "toast.".
724 * Returns false if the reloptions array could not be parsed (in which case
725 * nothing will have been appended to the buffer), or true on success.
727 * Note: this logic should generally match the backend's flatten_reloptions()
728 * (in adt/ruleutils.c).
731 appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
732 const char *prefix, int encoding, bool std_strings)
738 if (!parsePGArray(reloptions, &options, &noptions))
745 for (i = 0; i < noptions; i++)
747 char *option = options[i];
753 * Each array element should have the form name=value. If the "=" is
754 * missing for some reason, treat it like an empty value.
757 separator = strchr(option, '=');
761 value = separator + 1;
767 appendPQExpBufferStr(buffer, ", ");
768 appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
771 * In general we need to quote the value; but to avoid unnecessary
772 * clutter, do not quote if it is an identifier that would not need
773 * quoting. (We could also allow numbers, but that is a bit trickier
774 * than it looks --- for example, are leading zeroes significant? We
775 * don't want to assume very much here about what custom reloptions
778 if (strcmp(fmtId(value), value) == 0)
779 appendPQExpBufferStr(buffer, value);
781 appendStringLiteral(buffer, value, encoding, std_strings);
792 * processSQLNamePattern
794 * Scan a wildcard-pattern string and generate appropriate WHERE clauses
795 * to limit the set of objects returned. The WHERE clauses are appended
796 * to the already-partially-constructed query in buf. Returns whether
797 * any clause was added.
799 * conn: connection query will be sent to (consulted for escaping rules).
800 * buf: output parameter.
801 * pattern: user-specified pattern option, or NULL if none ("*" is implied).
802 * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
803 * onto the existing WHERE clause).
804 * force_escape: always quote regexp special characters, even outside
805 * double quotes (else they are quoted only between double quotes).
806 * schemavar: name of query variable to match against a schema-name pattern.
807 * Can be NULL if no schema.
808 * namevar: name of query variable to match against an object-name pattern.
809 * altnamevar: NULL, or name of an alternative variable to match against name.
810 * visibilityrule: clause to use if we want to restrict to visible objects
811 * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
813 * Formatting note: the text already present in buf should end with a newline.
814 * The appended text, if any, will end with one too.
817 processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
818 bool have_where, bool force_escape,
819 const char *schemavar, const char *namevar,
820 const char *altnamevar, const char *visibilityrule)
822 PQExpBufferData schemabuf;
823 PQExpBufferData namebuf;
824 int encoding = PQclientEncoding(conn);
828 bool added_clause = false;
831 (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
832 have_where = true, added_clause = true)
836 /* Default: select all visible objects */
840 appendPQExpBuffer(buf, "%s\n", visibilityrule);
845 initPQExpBuffer(&schemabuf);
846 initPQExpBuffer(&namebuf);
849 * Parse the pattern, converting quotes and lower-casing unquoted letters.
850 * Also, adjust shell-style wildcard characters into regexp notation.
852 * We surround the pattern with "^(...)$" to force it to match the whole
853 * string, as per SQL practice. We have to have parens in case the string
854 * contains "|", else the "^" and "$" will be bound into the first and
855 * last alternatives which is not what we want.
857 * Note: the result of this pass is the actual regexp pattern(s) we want
858 * to execute. Quoting/escaping into SQL literal format will be done
859 * below using appendStringLiteralConn().
861 appendPQExpBufferStr(&namebuf, "^(");
872 if (inquotes && cp[1] == '"')
874 /* emit one quote, stay in inquotes mode */
875 appendPQExpBufferChar(&namebuf, '"');
879 inquotes = !inquotes;
882 else if (!inquotes && isupper((unsigned char) ch))
884 appendPQExpBufferChar(&namebuf,
885 pg_tolower((unsigned char) ch));
888 else if (!inquotes && ch == '*')
890 appendPQExpBufferStr(&namebuf, ".*");
893 else if (!inquotes && ch == '?')
895 appendPQExpBufferChar(&namebuf, '.');
898 else if (!inquotes && ch == '.')
900 /* Found schema/name separator, move current pattern to schema */
901 resetPQExpBuffer(&schemabuf);
902 appendPQExpBufferStr(&schemabuf, namebuf.data);
903 resetPQExpBuffer(&namebuf);
904 appendPQExpBufferStr(&namebuf, "^(");
910 * Dollar is always quoted, whether inside quotes or not. The
911 * reason is that it's allowed in SQL identifiers, so there's a
912 * significant use-case for treating it literally, while because
913 * we anchor the pattern automatically there is no use-case for
914 * having it possess its regexp meaning.
916 appendPQExpBufferStr(&namebuf, "\\$");
922 * Ordinary data character, transfer to pattern
924 * Inside double quotes, or at all times if force_escape is true,
925 * quote regexp special characters with a backslash to avoid
926 * regexp errors. Outside quotes, however, let them pass through
927 * as-is; this lets knowledgeable users build regexp expressions
928 * that are more powerful than shell-style patterns.
930 if ((inquotes || force_escape) &&
931 strchr("|*+?()[]{}.^$\\", ch))
932 appendPQExpBufferChar(&namebuf, '\\');
933 i = PQmblen(cp, encoding);
936 appendPQExpBufferChar(&namebuf, *cp);
943 * Now decide what we need to emit. Note there will be a leading "^(" in
944 * the patterns in any case.
948 /* We have a name pattern, so constrain the namevar(s) */
950 appendPQExpBufferStr(&namebuf, ")$");
951 /* Optimize away a "*" pattern */
952 if (strcmp(namebuf.data, "^(.*)$") != 0)
957 appendPQExpBuffer(buf, "(%s ~ ", namevar);
958 appendStringLiteralConn(buf, namebuf.data, conn);
959 appendPQExpBuffer(buf, "\n OR %s ~ ", altnamevar);
960 appendStringLiteralConn(buf, namebuf.data, conn);
961 appendPQExpBufferStr(buf, ")\n");
965 appendPQExpBuffer(buf, "%s ~ ", namevar);
966 appendStringLiteralConn(buf, namebuf.data, conn);
967 appendPQExpBufferChar(buf, '\n');
972 if (schemabuf.len > 2)
974 /* We have a schema pattern, so constrain the schemavar */
976 appendPQExpBufferStr(&schemabuf, ")$");
977 /* Optimize away a "*" pattern */
978 if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
981 appendPQExpBuffer(buf, "%s ~ ", schemavar);
982 appendStringLiteralConn(buf, schemabuf.data, conn);
983 appendPQExpBufferChar(buf, '\n');
988 /* No schema pattern given, so select only visible objects */
992 appendPQExpBuffer(buf, "%s\n", visibilityrule);
996 termPQExpBuffer(&schemabuf);
997 termPQExpBuffer(&namebuf);