Code and docs review for commit 3187d6de0e5a9e805b27c48437897e8c39071d45.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index ae93e69f0d220488a5b722c45f70f0a0f4205495..e6c4ee52ee12ca061085eb201d974ec6300b0f50 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1823,25 +1823,22 @@
          <indexterm>
           <primary>parse_ident</primary>
          </indexterm>
-        <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
-           [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+        <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
+           [, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
         </entry>
         <entry><type>text[]</type></entry>
-       <entry>Split <parameter>qualified identifier</parameter> into array
-       <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
-       false, extra characters after the identifier are ignored. This is useful
-       for parsing identifiers for objects like functions and arrays that may
-       have trailing characters. By default, extra characters after the last
-       identifier are considered an error, but if the second parameter is false,
-       then the characters after the last identifier are ignored. Note that this
-       function does not truncate quoted identifiers. If you care about that
-       you should cast the result of this function to name[]. Non-printable
-       characters (like 0 to 31) are always displayed as hexadecimal codes,
-       which can be different from PostgreSQL internal SQL identifiers
-       processing, when the original escaped value is displayed.
+       <entry>
+        Split <parameter>qualified_identifier</parameter> into an array of
+        identifiers, removing any quoting of individual identifiers.  By
+        default, extra characters after the last identifier are considered an
+        error; but if the second parameter is <literal>false</>, then such
+        extra characters are ignored. (This behavior is useful for parsing
+        names for objects like functions.) Note that this function does not
+        truncate over-length identifiers. If you want truncation you can cast
+        the result to <type>name[]</>.
         </entry>
         <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
-       <entry><literal>"SomeSchema,sometable"</literal></entry>
+       <entry><literal>{SomeSchema,sometable}</literal></entry>
        </row>
  
        <row>
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c

index faa8ef3c9131d055694a280784cc1b65a58b4bbf..6f7c40781616cf7980e9de3d8487d1f9f71eb254 100644 (file)
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
  
  
  /*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
   */
  static bool
  is_ident_start(unsigned char c)
  {
+       /* Underscores and ASCII letters are OK */
         if (c == '_')
                 return true;
         if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
                 return true;
-
-       if (c >= 0200 && c <= 0377)
+       /* Any high-bit-set character is OK (might be part of a multibyte char) */
+       if (IS_HIGHBIT_SET(c))
                 return true;
-
         return false;
  }
  
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
  static bool
  is_ident_cont(unsigned char c)
  {
-       if (c >= '0' && c <= '9')
+       /* Can be digit or dollar sign ... */
+       if ((c >= '0' && c <= '9') || c == '$')
                 return true;
-
+       /* ... or an identifier start character */
         return is_ident_start(c);
  }
  
  /*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
-       int                     len = VARSIZE_ANY_EXHDR(t);
-       const char *p = VARDATA_ANY(t);
-       StringInfo      dstr;
-
-       dstr = makeStringInfo();
-
-       appendStringInfoChar(dstr, '"');
-
-       while (len--)
-       {
-               switch (*p)
-               {
-                       case '\b':
-                               appendStringInfoString(dstr, "\\b");
-                               break;
-                       case '\f':
-                               appendStringInfoString(dstr, "\\f");
-                               break;
-                       case '\n':
-                               appendStringInfoString(dstr, "\\n");
-                               break;
-                       case '\r':
-                               appendStringInfoString(dstr, "\\r");
-                               break;
-                       case '\t':
-                               appendStringInfoString(dstr, "\\t");
-                               break;
-                       case '\'':
-                               appendStringInfoString(dstr, "''");
-                               break;
-                       case '\\':
-                               appendStringInfoString(dstr, "\\\\");
-                               break;
-                       default:
-                               if ((unsigned char) *p < ' ')
-                                       appendStringInfo(dstr, "\\u%04x", (int) *p);
-                               else
-                                       appendStringInfoCharMacro(dstr, *p);
-                               break;
-               }
-               p++;
-       }
-
-       appendStringInfoChar(dstr, '"');
-
-       return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
   * When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
   */
  Datum
  parse_ident(PG_FUNCTION_ARGS)
  {
-       text       *qualname;
-       char       *qualname_str;
-       bool            strict;
+       text       *qualname = PG_GETARG_TEXT_PP(0);
+       bool            strict = PG_GETARG_BOOL(1);
+       char       *qualname_str = text_to_cstring(qualname);
+       ArrayBuildState *astate = NULL;
         char       *nextp;
         bool            after_dot = false;
-       ArrayBuildState *astate = NULL;
-
-       qualname = PG_GETARG_TEXT_PP(0);
-       qualname_str = text_to_cstring(qualname);
-       strict = PG_GETARG_BOOL(1);
  
+       /*
+        * The code below scribbles on qualname_str in some cases, so we should
+        * reconvert qualname if we need to show the original string in error
+        * messages.
+        */
         nextp = qualname_str;
  
         /* skip leading whitespace */
@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
  
         for (;;)
         {
-               char            *curname;
-               char            *endp;
-               bool            missing_ident;
-
-               missing_ident = true;
+               char       *curname;
+               bool            missing_ident = true;
  
-               if (*nextp == '\"')
+               if (*nextp == '"')
                 {
+                       char       *endp;
+
                         curname = nextp + 1;
                         for (;;)
                         {
-                               endp = strchr(nextp + 1, '\"');
+                               endp = strchr(nextp + 1, '"');
                                 if (endp == NULL)
                                         ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("unclosed double quotes"),
-                                                errdetail("string %s is not valid identifier",
-                                                                       sanitize_text(qualname))));
-                               if (endp[1] != '\"')
+                                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                  errmsg("string is not a valid identifier: \"%s\"",
+                                                                 text_to_cstring(qualname)),
+                                                  errdetail("String has unclosed double quotes.")));
+                               if (endp[1] != '"')
                                         break;
                                 memmove(endp, endp + 1, strlen(endp));
                                 nextp = endp;
@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
                         nextp = endp + 1;
                         *endp = '\0';
  
-                       /* Show complete input string in this case. */
                         if (endp - curname == 0)
                                 ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("identifier should not be empty: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                                errdetail("Quoted identifier must not be empty.")));
  
                         astate = accumArrayResult(astate, CStringGetTextDatum(curname),
                                                                           false, TEXTOID, CurrentMemoryContext);
                         missing_ident = false;
                 }
-               else
+               else if (is_ident_start((unsigned char) *nextp))
                 {
-                       if (is_ident_start((unsigned char) *nextp))
-                       {
-                               char *downname;
-                               int     len;
-                               text    *part;
-
-                               curname = nextp++;
-                               while (is_ident_cont((unsigned char) *nextp))
-                                       nextp++;
-
-                               len = nextp - curname;
-
-                               /*
-                                * Unlike name, we don't implicitly truncate identifiers. This
-                                * is useful for allowing the user to check for specific parts
-                                * of the identifier being too long. It's easy enough for the
-                                * user to get the truncated names by casting our output to
-                                * name[].
-                                */
-                               downname = downcase_identifier(curname, len, false, false);
-                               part = cstring_to_text_with_len(downname, len);
-                               astate = accumArrayResult(astate, PointerGetDatum(part), false,
-                                                                                 TEXTOID, CurrentMemoryContext);
-                               missing_ident = false;
-                       }
+                       char       *downname;
+                       int                     len;
+                       text       *part;
+
+                       curname = nextp++;
+                       while (is_ident_cont((unsigned char) *nextp))
+                               nextp++;
+
+                       len = nextp - curname;
+
+                       /*
+                        * We don't implicitly truncate identifiers. This is useful for
+                        * allowing the user to check for specific parts of the identifier
+                        * being too long. It's easy enough for the user to get the
+                        * truncated names by casting our output to name[].
+                        */
+                       downname = downcase_identifier(curname, len, false, false);
+                       part = cstring_to_text_with_len(downname, len);
+                       astate = accumArrayResult(astate, PointerGetDatum(part), false,
+                                                                         TEXTOID, CurrentMemoryContext);
+                       missing_ident = false;
                 }
  
                 if (missing_ident)
@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
                         /* Different error messages based on where we failed. */
                         if (*nextp == '.')
                                 ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier before \".\" symbol: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                        errdetail("No valid identifier before \".\" symbol.")));
                         else if (after_dot)
                                 ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier after \".\" symbol: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                         errdetail("No valid identifier after \".\" symbol.")));
                         else
                                 ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname))));
                 }
  
                 while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
                 {
                         if (strict)
                                 ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("identifier contains disallowed characters: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname))));
                         break;
                 }
         }
diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out

index 56139d45efcc085b9f1891e80fad759b40ee8ee1..acc5ce6193974a75962012ebdd5f6b052f1edbf3 100644 (file)
--- a/src/test/regress/expected/name.out
+++ b/src/test/regress/expected/name.out
@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
  (1 row)
  
  SELECT parse_ident('foo.boo[]'); -- should fail
-ERROR:  identifier contains disallowed characters: "foo.boo[]"
+ERROR:  string is not a valid identifier: "foo.boo[]"
  SELECT parse_ident('foo.boo[]', strict => false); -- ok
   parse_ident 
  -------------
@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
  
  -- should fail
  SELECT parse_ident(' ');
-ERROR:  missing valid identifier: " "
+ERROR:  string is not a valid identifier: " "
  SELECT parse_ident(' .aaa');
-ERROR:  missing valid identifier before "." symbol: " .aaa"
+ERROR:  string is not a valid identifier: " .aaa"
+DETAIL:  No valid identifier before "." symbol.
  SELECT parse_ident(' aaa . ');
-ERROR:  missing valid identifier after "." symbol: " aaa . "
+ERROR:  string is not a valid identifier: " aaa . "
+DETAIL:  No valid identifier after "." symbol.
  SELECT parse_ident('aaa.a%b');
-ERROR:  identifier contains disallowed characters: "aaa.a%b"
+ERROR:  string is not a valid identifier: "aaa.a%b"
  SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
-ERROR:  identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ERROR:  string is not a valid identifier: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
  SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
   length | length 
  --------+--------
@@ -179,14 +181,17 @@ SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66)
  (1 row)
  
  SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
-ERROR:  identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+ERROR:  string is not a valid identifier: ""c".X XXXX\ 2XXXXXX"
  SELECT parse_ident('1020');
-ERROR:  missing valid identifier: "1020"
+ERROR:  string is not a valid identifier: "1020"
  SELECT parse_ident('10.20');
-ERROR:  missing valid identifier: "10.20"
+ERROR:  string is not a valid identifier: "10.20"
  SELECT parse_ident('.');
-ERROR:  missing valid identifier before "." symbol: "."
+ERROR:  string is not a valid identifier: "."
+DETAIL:  No valid identifier before "." symbol.
  SELECT parse_ident('.1020');
-ERROR:  missing valid identifier before "." symbol: ".1020"
+ERROR:  string is not a valid identifier: ".1020"
+DETAIL:  No valid identifier before "." symbol.
  SELECT parse_ident('xxx.1020');
-ERROR:  missing valid identifier after "." symbol: "xxx.1020"
+ERROR:  string is not a valid identifier: "xxx.1020"
+DETAIL:  No valid identifier after "." symbol.
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
doc/src/sgml/func.sgml		patch \| blob \| history
src/backend/utils/adt/misc.c		patch \| blob \| history
src/test/regress/expected/name.out		patch \| blob \| history