Rationalize handling of single and double quotes in bootstrap data.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)
diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml

index f7a323ef3450ce9ad54a81e33f820fa9e3489768..5a4cd39342128627e740be5ba9b44f8833244f35 100644 (file)
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -184,13 +184,11 @@
  <programlisting>
  [
  
-# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
-# that might contain non-word characters, so we must double-quote them.
-
+# A comment could appear here.
  { oid =&gt; '1', oid_symbol =&gt; 'TemplateDbOid',
    descr =&gt; 'database\'s default template',
    datname =&gt; 'template1', datdba =&gt; 'PGUID', encoding =&gt; 'ENCODING',
-  datcollate =&gt; '"LC_COLLATE"', datctype =&gt; '"LC_CTYPE"', datistemplate =&gt; 't',
+  datcollate =&gt; 'LC_COLLATE', datctype =&gt; 'LC_CTYPE', datistemplate =&gt; 't',
    datallowconn =&gt; 't', datconnlimit =&gt; '-1', datlastsysoid =&gt; '0',
    datfrozenxid =&gt; '0', datminmxid =&gt; '1', dattablespace =&gt; '1663',
    datacl =&gt; '_null_' },
@@ -234,10 +232,16 @@
  
      <listitem>
       <para>
-      All values must be single-quoted.  Escape single quotes used within
-      a value with a backslash.  (Backslashes meant as data need not be
-      doubled, however; this follows Perl's rules for simple quoted
-      literals.)
+      All values must be single-quoted.  Escape single quotes used within a
+      value with a backslash.  Backslashes meant as data can, but need not,
+      be doubled; this follows Perl's rules for simple quoted literals.
+      Note that backslashes appearing as data will be treated as escapes by
+      the bootstrap scanner, according to the same rules as for escape string
+      constants (see <xref linkend="sql-syntax-strings-escape"/>); for
+      example <literal>\t</literal> converts to a tab character.  If you
+      actually want a backslash in the final value, you will need to write
+      four of them: Perl strips two, leaving <literal>\\</literal> for the
+      bootstrap scanner to see.
       </para>
      </listitem>
  
@@ -247,15 +251,6 @@
       </para>
      </listitem>
  
-    <listitem>
-     <para>
-      If a value is a macro to be expanded
-      by <application>initdb</application>, it should also contain double
-      quotes as shown above, unless we know that no special characters can
-      appear within the string that will be substituted.
-     </para>
-    </listitem>
-
      <listitem>
       <para>
        Comments are preceded by <literal>#</literal>, and must be on their
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl

index b750ec9f0ac27846762f415928e138b37d9077bf..938686f0ca76a4b07a38e5411e510fd71b5a4354 100644 (file)
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -660,12 +660,19 @@ sub print_bki_insert
                 # since that represents a NUL char in C code.
                 $bki_value = '' if $bki_value eq '\0';
  
+               # Handle single quotes by doubling them, and double quotes by
+               # converting them to octal escapes, because that's what the
+               # bootstrap scanner requires.  We do not process backslashes
+               # specially; this allows escape-string-style backslash escapes
+               # to be used in catalog data.
+               $bki_value =~ s/'/''/g;
+               $bki_value =~ s/"/\\042/g;
+
                 # Quote value if needed.  We need not quote values that satisfy
                 # the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
                 $bki_value = sprintf(qq'"%s"', $bki_value)
-                 if $bki_value !~ /^"[^"]+"$/
-                 and ( length($bki_value) == 0
-                               or $bki_value =~ /[^-A-Za-z0-9_]/);
+                 if length($bki_value) == 0
+                        or $bki_value =~ /[^-A-Za-z0-9_]/;
  
                 push @bki_values, $bki_value;
         }
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c

index 4907a379038d5b7cf0d653886fd4588c9e5288cb..b39115c346d51833ab7e24f8fd50fdc8f9a9bda5 100644 (file)
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -265,6 +265,7 @@ static void make_postgres(FILE *cmdfd);
  static void trapsig(int signum);
  static void check_ok(void);
  static char *escape_quotes(const char *src);
+static char *escape_quotes_bki(const char *src);
  static int     locale_date_order(const char *locale);
  static void check_locale_name(int category, const char *locale,
                                   char **canonname);
@@ -324,6 +325,10 @@ do { \
                 output_failed = true, output_errno = errno; \
  } while (0)
  
+/*
+ * Escape single quotes and backslashes, suitably for insertions into
+ * configuration files or SQL E'' strings.
+ */
  static char *
  escape_quotes(const char *src)
  {
@@ -337,6 +342,52 @@ escape_quotes(const char *src)
         return result;
  }
  
+/*
+ * Escape a field value to be inserted into the BKI data.
+ * Here, we first run the value through escape_quotes (which
+ * will be inverted by the backend's scanstr() function) and
+ * then overlay special processing of double quotes, which
+ * bootscanner.l will only accept as data if converted to octal
+ * representation ("\042").  We always wrap the value in double
+ * quotes, even if that isn't strictly necessary.
+ */
+static char *
+escape_quotes_bki(const char *src)
+{
+       char       *result;
+       char       *data = escape_quotes(src);
+       char       *resultp;
+       char       *datap;
+       int                     nquotes = 0;
+
+       /* count double quotes in data */
+       datap = data;
+       while ((datap = strchr(datap, '"')) != NULL)
+       {
+               nquotes++;
+               datap++;
+       }
+
+       result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
+       resultp = result;
+       *resultp++ = '"';
+       for (datap = data; *datap; datap++)
+       {
+               if (*datap == '"')
+               {
+                       strcpy(resultp, "\\042");
+                       resultp += 4;
+               }
+               else
+                       *resultp++ = *datap;
+       }
+       *resultp++ = '"';
+       *resultp = '\0';
+
+       free(data);
+       return result;
+}
+
  /*
   * make a copy of the array of lines, with token replaced by replacement
   * the first time it occurs on each line.
@@ -1368,13 +1419,17 @@ bootstrap_template1(void)
         bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL",
                                                           FLOAT8PASSBYVAL ? "true" : "false");
  
-       bki_lines = replace_token(bki_lines, "POSTGRES", escape_quotes(username));
+       bki_lines = replace_token(bki_lines, "POSTGRES",
+                                                         escape_quotes_bki(username));
  
-       bki_lines = replace_token(bki_lines, "ENCODING", encodingid_to_string(encodingid));
+       bki_lines = replace_token(bki_lines, "ENCODING",
+                                                         encodingid_to_string(encodingid));
  
-       bki_lines = replace_token(bki_lines, "LC_COLLATE", escape_quotes(lc_collate));
+       bki_lines = replace_token(bki_lines, "LC_COLLATE",
+                                                         escape_quotes_bki(lc_collate));
  
-       bki_lines = replace_token(bki_lines, "LC_CTYPE", escape_quotes(lc_ctype));
+       bki_lines = replace_token(bki_lines, "LC_CTYPE",
+                                                         escape_quotes_bki(lc_ctype));
  
         /*
          * Pass correct LC_xxx environment to bootstrap.
diff --git a/src/include/catalog/pg_authid.dat b/src/include/catalog/pg_authid.dat

index 52f42595600d8d2587a93b4842a832e48c41869f..d8421b01627d68b3b6864a1edb8fb95224bb6c4f 100644 (file)
--- a/src/include/catalog/pg_authid.dat
+++ b/src/include/catalog/pg_authid.dat
@@ -12,14 +12,11 @@
  
  [
  
-# POSTGRES will be replaced at initdb time with a user choice that might
-# contain non-word characters, so we must double-quote it.
-
  # The C code typically refers to these roles using the #define symbols,
  # so make sure every entry has an oid_symbol value.
  
  { oid => '10', oid_symbol => 'BOOTSTRAP_SUPERUSERID',
-  rolname => '"POSTGRES"', rolsuper => 't', rolinherit => 't',
+  rolname => 'POSTGRES', rolsuper => 't', rolinherit => 't',
    rolcreaterole => 't', rolcreatedb => 't', rolcanlogin => 't',
    rolreplication => 't', rolbypassrls => 't', rolconnlimit => '-1',
    rolpassword => '_null_', rolvaliduntil => '_null_' },
diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat

index 957ca8408a5c9376b4d8dc4a06f2e2e0b1d20a50..002075685ccc043891a1f93a178da6bffe89def5 100644 (file)
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@@ -12,13 +12,10 @@
  
  [
  
-# LC_COLLATE and LC_CTYPE will be replaced at initdb time with user choices
-# that might contain non-word characters, so we must double-quote them.
-
  { oid => '1', oid_symbol => 'TemplateDbOid',
    descr => 'default template for new databases',
    datname => 'template1', datdba => 'PGUID', encoding => 'ENCODING',
-  datcollate => '"LC_COLLATE"', datctype => '"LC_CTYPE"', datistemplate => 't',
+  datcollate => 'LC_COLLATE', datctype => 'LC_CTYPE', datistemplate => 't',
    datallowconn => 't', datconnlimit => '-1', datlastsysoid => '0',
    datfrozenxid => '0', datminmxid => '1', dattablespace => '1663',
    datacl => '_null_' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index 3c56ef6abccf6bc50112d9473e13e1dc0c92d4ad..9b53855236f37831f8f8be09692e487187457a81 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2417,7 +2417,7 @@
  { oid => '1216', descr => 'get description for table column',
    proname => 'col_description', prolang => '14', procost => '100',
    provolatile => 's', prorettype => 'text', proargtypes => 'oid int4',
-  prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'\'pg_catalog.pg_class\'\'::pg_catalog.regclass and objsubid = $2' },
+  prosrc => 'select description from pg_catalog.pg_description where objoid = $1 and classoid = \'pg_catalog.pg_class\'::pg_catalog.regclass and objsubid = $2' },
  { oid => '1993',
    descr => 'get description for object id and shared catalog name',
    proname => 'shobj_description', prolang => '14', procost => '100',
@@ -3483,11 +3483,11 @@
  { oid => '879', descr => 'left-pad string to length',
    proname => 'lpad', prolang => '14', prorettype => 'text',
    proargtypes => 'text int4',
-  prosrc => 'select pg_catalog.lpad($1, $2, \'\' \'\')' },
+  prosrc => 'select pg_catalog.lpad($1, $2, \' \')' },
  { oid => '880', descr => 'right-pad string to length',
    proname => 'rpad', prolang => '14', prorettype => 'text',
    proargtypes => 'text int4',
-  prosrc => 'select pg_catalog.rpad($1, $2, \'\' \'\')' },
+  prosrc => 'select pg_catalog.rpad($1, $2, \' \')' },
  { oid => '881', descr => 'trim spaces from left end of string',
    proname => 'ltrim', prorettype => 'text', proargtypes => 'text',
    prosrc => 'ltrim1' },
@@ -6930,7 +6930,7 @@
    descr => 'disk space usage for the main fork of the specified table or index',
    proname => 'pg_relation_size', prolang => '14', provolatile => 'v',
    prorettype => 'int8', proargtypes => 'regclass',
-  prosrc => 'select pg_catalog.pg_relation_size($1, \'\'main\'\')' },
+  prosrc => 'select pg_catalog.pg_relation_size($1, \'main\')' },
  { oid => '2332',
    descr => 'disk space usage for the specified fork of a table or index',
    proname => 'pg_relation_size', provolatile => 'v', prorettype => 'int8',
@@ -8168,7 +8168,7 @@
  { oid => '2932', descr => 'evaluate XPath expression',
    proname => 'xpath', prolang => '14', prorettype => '_xml',
    proargtypes => 'text xml',
-  prosrc => 'select pg_catalog.xpath($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
+  prosrc => 'select pg_catalog.xpath($1, $2, \'{}\'::pg_catalog.text[])' },
  
  { oid => '2614', descr => 'test XML value against XPath expression',
    proname => 'xmlexists', prorettype => 'bool', proargtypes => 'text xml',
@@ -8181,7 +8181,7 @@
  { oid => '3050', descr => 'test XML value against XPath expression',
    proname => 'xpath_exists', prolang => '14', prorettype => 'bool',
    proargtypes => 'text xml',
-  prosrc => 'select pg_catalog.xpath_exists($1, $2, \'\'{}\'\'::pg_catalog.text[])' },
+  prosrc => 'select pg_catalog.xpath_exists($1, $2, \'{}\'::pg_catalog.text[])' },
  { oid => '3051', descr => 'determine if a string is well formed XML',
    proname => 'xml_is_well_formed', provolatile => 's', prorettype => 'bool',
    proargtypes => 'text', prosrc => 'xml_is_well_formed' },
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 17 Apr 2018 23:53:50 +0000 (19:53 -0400)
doc/src/sgml/bki.sgml		patch \| blob \| history
src/backend/catalog/genbki.pl		patch \| blob \| history
src/bin/initdb/initdb.c		patch \| blob \| history
src/include/catalog/pg_authid.dat		patch \| blob \| history
src/include/catalog/pg_database.dat		patch \| blob \| history
src/include/catalog/pg_proc.dat		patch \| blob \| history