]> granicus.if.org Git - postgresql/commitdiff
Code and docs review for commit 3187d6de0e5a9e805b27c48437897e8c39071d45.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Mar 2016 05:00:30 +0000 (01:00 -0400)
Fix up check for high-bit-set characters, which provoked "comparison is
always true due to limited range of data type" warnings on some compilers,
and was unlike the way we do it elsewhere anyway.  Fix omission of "$"
from the set of valid identifier continuation characters.  Get rid of
sanitize_text(), which was utterly inconsistent with any other error report
anywhere in the system, and wasn't even well designed on its own terms
(double-quoting the result string without escaping contained double quotes
doesn't seem very well thought out).  Fix up error messages, which didn't
follow the message style guidelines very well, and were overly specific in
situations where the actual mistake might not be what they said.  Improve
documentation.

(I started out just intending to fix the compiler warning, but the more
I looked at the patch the less I liked it.)

doc/src/sgml/func.sgml
src/backend/utils/adt/misc.c
src/test/regress/expected/name.out

index ae93e69f0d220488a5b722c45f70f0a0f4205495..e6c4ee52ee12ca061085eb201d974ec6300b0f50 100644 (file)
         <indexterm>
          <primary>parse_ident</primary>
         </indexterm>
-        <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
-           [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+        <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
+           [, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
        </entry>
        <entry><type>text[]</type></entry>
-       <entry>Split <parameter>qualified identifier</parameter> into array
-       <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
-       false, extra characters after the identifier are ignored. This is useful
-       for parsing identifiers for objects like functions and arrays that may
-       have trailing characters. By default, extra characters after the last
-       identifier are considered an error, but if the second parameter is false,
-       then the characters after the last identifier are ignored. Note that this
-       function does not truncate quoted identifiers. If you care about that
-       you should cast the result of this function to name[]. Non-printable
-       characters (like 0 to 31) are always displayed as hexadecimal codes,
-       which can be different from PostgreSQL internal SQL identifiers
-       processing, when the original escaped value is displayed.
+       <entry>
+        Split <parameter>qualified_identifier</parameter> into an array of
+        identifiers, removing any quoting of individual identifiers.  By
+        default, extra characters after the last identifier are considered an
+        error; but if the second parameter is <literal>false</>, then such
+        extra characters are ignored. (This behavior is useful for parsing
+        names for objects like functions.) Note that this function does not
+        truncate over-length identifiers. If you want truncation you can cast
+        the result to <type>name[]</>.
        </entry>
        <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
-       <entry><literal>"SomeSchema,sometable"</literal></entry>
+       <entry><literal>{SomeSchema,sometable}</literal></entry>
       </row>
 
       <row>
index faa8ef3c9131d055694a280784cc1b65a58b4bbf..6f7c40781616cf7980e9de3d8487d1f9f71eb254 100644 (file)
@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
 
 
 /*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
  */
 static bool
 is_ident_start(unsigned char c)
 {
+       /* Underscores and ASCII letters are OK */
        if (c == '_')
                return true;
        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
                return true;
-
-       if (c >= 0200 && c <= 0377)
+       /* Any high-bit-set character is OK (might be part of a multibyte char) */
+       if (IS_HIGHBIT_SET(c))
                return true;
-
        return false;
 }
 
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
 static bool
 is_ident_cont(unsigned char c)
 {
-       if (c >= '0' && c <= '9')
+       /* Can be digit or dollar sign ... */
+       if ((c >= '0' && c <= '9') || c == '$')
                return true;
-
+       /* ... or an identifier start character */
        return is_ident_start(c);
 }
 
 /*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
-       int                     len = VARSIZE_ANY_EXHDR(t);
-       const char *p = VARDATA_ANY(t);
-       StringInfo      dstr;
-
-       dstr = makeStringInfo();
-
-       appendStringInfoChar(dstr, '"');
-
-       while (len--)
-       {
-               switch (*p)
-               {
-                       case '\b':
-                               appendStringInfoString(dstr, "\\b");
-                               break;
-                       case '\f':
-                               appendStringInfoString(dstr, "\\f");
-                               break;
-                       case '\n':
-                               appendStringInfoString(dstr, "\\n");
-                               break;
-                       case '\r':
-                               appendStringInfoString(dstr, "\\r");
-                               break;
-                       case '\t':
-                               appendStringInfoString(dstr, "\\t");
-                               break;
-                       case '\'':
-                               appendStringInfoString(dstr, "''");
-                               break;
-                       case '\\':
-                               appendStringInfoString(dstr, "\\\\");
-                               break;
-                       default:
-                               if ((unsigned char) *p < ' ')
-                                       appendStringInfo(dstr, "\\u%04x", (int) *p);
-                               else
-                                       appendStringInfoCharMacro(dstr, *p);
-                               break;
-               }
-               p++;
-       }
-
-       appendStringInfoChar(dstr, '"');
-
-       return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
  * When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
  */
 Datum
 parse_ident(PG_FUNCTION_ARGS)
 {
-       text       *qualname;
-       char       *qualname_str;
-       bool            strict;
+       text       *qualname = PG_GETARG_TEXT_PP(0);
+       bool            strict = PG_GETARG_BOOL(1);
+       char       *qualname_str = text_to_cstring(qualname);
+       ArrayBuildState *astate = NULL;
        char       *nextp;
        bool            after_dot = false;
-       ArrayBuildState *astate = NULL;
-
-       qualname = PG_GETARG_TEXT_PP(0);
-       qualname_str = text_to_cstring(qualname);
-       strict = PG_GETARG_BOOL(1);
 
+       /*
+        * The code below scribbles on qualname_str in some cases, so we should
+        * reconvert qualname if we need to show the original string in error
+        * messages.
+        */
        nextp = qualname_str;
 
        /* skip leading whitespace */
@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
 
        for (;;)
        {
-               char            *curname;
-               char            *endp;
-               bool            missing_ident;
-
-               missing_ident = true;
+               char       *curname;
+               bool            missing_ident = true;
 
-               if (*nextp == '\"')
+               if (*nextp == '"')
                {
+                       char       *endp;
+
                        curname = nextp + 1;
                        for (;;)
                        {
-                               endp = strchr(nextp + 1, '\"');
+                               endp = strchr(nextp + 1, '"');
                                if (endp == NULL)
                                        ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                                errmsg("unclosed double quotes"),
-                                                errdetail("string %s is not valid identifier",
-                                                                       sanitize_text(qualname))));
-                               if (endp[1] != '\"')
+                                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                  errmsg("string is not a valid identifier: \"%s\"",
+                                                                 text_to_cstring(qualname)),
+                                                  errdetail("String has unclosed double quotes.")));
+                               if (endp[1] != '"')
                                        break;
                                memmove(endp, endp + 1, strlen(endp));
                                nextp = endp;
@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
                        nextp = endp + 1;
                        *endp = '\0';
 
-                       /* Show complete input string in this case. */
                        if (endp - curname == 0)
                                ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("identifier should not be empty: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                                errdetail("Quoted identifier must not be empty.")));
 
                        astate = accumArrayResult(astate, CStringGetTextDatum(curname),
                                                                          false, TEXTOID, CurrentMemoryContext);
                        missing_ident = false;
                }
-               else
+               else if (is_ident_start((unsigned char) *nextp))
                {
-                       if (is_ident_start((unsigned char) *nextp))
-                       {
-                               char *downname;
-                               int     len;
-                               text    *part;
-
-                               curname = nextp++;
-                               while (is_ident_cont((unsigned char) *nextp))
-                                       nextp++;
-
-                               len = nextp - curname;
-
-                               /*
-                                * Unlike name, we don't implicitly truncate identifiers. This
-                                * is useful for allowing the user to check for specific parts
-                                * of the identifier being too long. It's easy enough for the
-                                * user to get the truncated names by casting our output to
-                                * name[].
-                                */
-                               downname = downcase_identifier(curname, len, false, false);
-                               part = cstring_to_text_with_len(downname, len);
-                               astate = accumArrayResult(astate, PointerGetDatum(part), false,
-                                                                                 TEXTOID, CurrentMemoryContext);
-                               missing_ident = false;
-                       }
+                       char       *downname;
+                       int                     len;
+                       text       *part;
+
+                       curname = nextp++;
+                       while (is_ident_cont((unsigned char) *nextp))
+                               nextp++;
+
+                       len = nextp - curname;
+
+                       /*
+                        * We don't implicitly truncate identifiers. This is useful for
+                        * allowing the user to check for specific parts of the identifier
+                        * being too long. It's easy enough for the user to get the
+                        * truncated names by casting our output to name[].
+                        */
+                       downname = downcase_identifier(curname, len, false, false);
+                       part = cstring_to_text_with_len(downname, len);
+                       astate = accumArrayResult(astate, PointerGetDatum(part), false,
+                                                                         TEXTOID, CurrentMemoryContext);
+                       missing_ident = false;
                }
 
                if (missing_ident)
@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
                        /* Different error messages based on where we failed. */
                        if (*nextp == '.')
                                ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier before \".\" symbol: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                        errdetail("No valid identifier before \".\" symbol.")));
                        else if (after_dot)
                                ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier after \".\" symbol: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname)),
+                                         errdetail("No valid identifier after \".\" symbol.")));
                        else
                                ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("missing valid identifier: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname))));
                }
 
                while (isspace((unsigned char) *nextp))
@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
                {
                        if (strict)
                                ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                        errmsg("identifier contains disallowed characters: %s",
-                                                       sanitize_text(qualname))));
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("string is not a valid identifier: \"%s\"",
+                                                               text_to_cstring(qualname))));
                        break;
                }
        }
index 56139d45efcc085b9f1891e80fad759b40ee8ee1..acc5ce6193974a75962012ebdd5f6b052f1edbf3 100644 (file)
@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
 (1 row)
 
 SELECT parse_ident('foo.boo[]'); -- should fail
-ERROR:  identifier contains disallowed characters: "foo.boo[]"
+ERROR:  string is not a valid identifier: "foo.boo[]"
 SELECT parse_ident('foo.boo[]', strict => false); -- ok
  parse_ident 
 -------------
@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
 
 -- should fail
 SELECT parse_ident(' ');
-ERROR:  missing valid identifier: " "
+ERROR:  string is not a valid identifier: " "
 SELECT parse_ident(' .aaa');
-ERROR:  missing valid identifier before "." symbol: " .aaa"
+ERROR:  string is not a valid identifier: " .aaa"
+DETAIL:  No valid identifier before "." symbol.
 SELECT parse_ident(' aaa . ');
-ERROR:  missing valid identifier after "." symbol: " aaa . "
+ERROR:  string is not a valid identifier: " aaa . "
+DETAIL:  No valid identifier after "." symbol.
 SELECT parse_ident('aaa.a%b');
-ERROR:  identifier contains disallowed characters: "aaa.a%b"
+ERROR:  string is not a valid identifier: "aaa.a%b"
 SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
-ERROR:  identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ERROR:  string is not a valid identifier: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
 SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
  length | length 
 --------+--------
@@ -179,14 +181,17 @@ SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66)
 (1 row)
 
 SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
-ERROR:  identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+ERROR:  string is not a valid identifier: ""c".X XXXX\ 2XXXXXX"
 SELECT parse_ident('1020');
-ERROR:  missing valid identifier: "1020"
+ERROR:  string is not a valid identifier: "1020"
 SELECT parse_ident('10.20');
-ERROR:  missing valid identifier: "10.20"
+ERROR:  string is not a valid identifier: "10.20"
 SELECT parse_ident('.');
-ERROR:  missing valid identifier before "." symbol: "."
+ERROR:  string is not a valid identifier: "."
+DETAIL:  No valid identifier before "." symbol.
 SELECT parse_ident('.1020');
-ERROR:  missing valid identifier before "." symbol: ".1020"
+ERROR:  string is not a valid identifier: ".1020"
+DETAIL:  No valid identifier before "." symbol.
 SELECT parse_ident('xxx.1020');
-ERROR:  missing valid identifier after "." symbol: "xxx.1020"
+ERROR:  string is not a valid identifier: "xxx.1020"
+DETAIL:  No valid identifier after "." symbol.