<indexterm>
<primary>parse_ident</primary>
</indexterm>
- <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
- [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+ <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
+ [, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
</entry>
<entry><type>text[]</type></entry>
- <entry>Split <parameter>qualified identifier</parameter> into array
- <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
- false, extra characters after the identifier are ignored. This is useful
- for parsing identifiers for objects like functions and arrays that may
- have trailing characters. By default, extra characters after the last
- identifier are considered an error, but if the second parameter is false,
- then the characters after the last identifier are ignored. Note that this
- function does not truncate quoted identifiers. If you care about that
- you should cast the result of this function to name[]. Non-printable
- characters (like 0 to 31) are always displayed as hexadecimal codes,
- which can be different from PostgreSQL internal SQL identifiers
- processing, when the original escaped value is displayed.
+ <entry>
+ Split <parameter>qualified_identifier</parameter> into an array of
+ identifiers, removing any quoting of individual identifiers. By
+ default, extra characters after the last identifier are considered an
+ error; but if the second parameter is <literal>false</>, then such
+ extra characters are ignored. (This behavior is useful for parsing
+ names for objects like functions.) Note that this function does not
+ truncate over-length identifiers. If you want truncation you can cast
+ the result to <type>name[]</>.
</entry>
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
- <entry><literal>"SomeSchema,sometable"</literal></entry>
+ <entry><literal>{SomeSchema,sometable}</literal></entry>
</row>
<row>
/*
- * This simple parser utility are compatible with lexer implementation,
- * used only in parse_ident function
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
+ /* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
-
- if (c >= 0200 && c <= 0377)
+ /* Any high-bit-set character is OK (might be part of a multibyte char) */
+ if (IS_HIGHBIT_SET(c))
return true;
-
return false;
}
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
static bool
is_ident_cont(unsigned char c)
{
- if (c >= '0' && c <= '9')
+ /* Can be digit or dollar sign ... */
+ if ((c >= '0' && c <= '9') || c == '$')
return true;
-
+ /* ... or an identifier start character */
return is_ident_start(c);
}
/*
- * Sanitize SQL string for using in error message.
- */
-static char *
-sanitize_text(text *t)
-{
- int len = VARSIZE_ANY_EXHDR(t);
- const char *p = VARDATA_ANY(t);
- StringInfo dstr;
-
- dstr = makeStringInfo();
-
- appendStringInfoChar(dstr, '"');
-
- while (len--)
- {
- switch (*p)
- {
- case '\b':
- appendStringInfoString(dstr, "\\b");
- break;
- case '\f':
- appendStringInfoString(dstr, "\\f");
- break;
- case '\n':
- appendStringInfoString(dstr, "\\n");
- break;
- case '\r':
- appendStringInfoString(dstr, "\\r");
- break;
- case '\t':
- appendStringInfoString(dstr, "\\t");
- break;
- case '\'':
- appendStringInfoString(dstr, "''");
- break;
- case '\\':
- appendStringInfoString(dstr, "\\\\");
- break;
- default:
- if ((unsigned char) *p < ' ')
- appendStringInfo(dstr, "\\u%04x", (int) *p);
- else
- appendStringInfoCharMacro(dstr, *p);
- break;
- }
- p++;
- }
-
- appendStringInfoChar(dstr, '"');
-
- return dstr->data;
-}
-
-/*
- * parse_ident - parse SQL composed identifier to separate identifiers.
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
- * last identifiers are disallowed.
+ * the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
- text *qualname;
- char *qualname_str;
- bool strict;
+ text *qualname = PG_GETARG_TEXT_PP(0);
+ bool strict = PG_GETARG_BOOL(1);
+ char *qualname_str = text_to_cstring(qualname);
+ ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
- ArrayBuildState *astate = NULL;
-
- qualname = PG_GETARG_TEXT_PP(0);
- qualname_str = text_to_cstring(qualname);
- strict = PG_GETARG_BOOL(1);
+ /*
+ * The code below scribbles on qualname_str in some cases, so we should
+ * reconvert qualname if we need to show the original string in error
+ * messages.
+ */
nextp = qualname_str;
/* skip leading whitespace */
for (;;)
{
- char *curname;
- char *endp;
- bool missing_ident;
-
- missing_ident = true;
+ char *curname;
+ bool missing_ident = true;
- if (*nextp == '\"')
+ if (*nextp == '"')
{
+ char *endp;
+
curname = nextp + 1;
for (;;)
{
- endp = strchr(nextp + 1, '\"');
+ endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unclosed double quotes"),
- errdetail("string %s is not valid identifier",
- sanitize_text(qualname))));
- if (endp[1] != '\"')
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("String has unclosed double quotes.")));
+ if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
nextp = endp + 1;
*endp = '\0';
- /* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier should not be empty: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
- else
+ else if (is_ident_start((unsigned char) *nextp))
{
- if (is_ident_start((unsigned char) *nextp))
- {
- char *downname;
- int len;
- text *part;
-
- curname = nextp++;
- while (is_ident_cont((unsigned char) *nextp))
- nextp++;
-
- len = nextp - curname;
-
- /*
- * Unlike name, we don't implicitly truncate identifiers. This
- * is useful for allowing the user to check for specific parts
- * of the identifier being too long. It's easy enough for the
- * user to get the truncated names by casting our output to
- * name[].
- */
- downname = downcase_identifier(curname, len, false, false);
- part = cstring_to_text_with_len(downname, len);
- astate = accumArrayResult(astate, PointerGetDatum(part), false,
- TEXTOID, CurrentMemoryContext);
- missing_ident = false;
- }
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * We don't implicitly truncate identifiers. This is useful for
+ * allowing the user to check for specific parts of the identifier
+ * being too long. It's easy enough for the user to get the
+ * truncated names by casting our output to name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
}
if (missing_ident)
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier before \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier after \".\" symbol: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier after \".\" symbol.")));
else
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing valid identifier: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
{
if (strict)
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("identifier contains disallowed characters: %s",
- sanitize_text(qualname))));
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
break;
}
}
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
-ERROR: identifier contains disallowed characters: "foo.boo[]"
+ERROR: string is not a valid identifier: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
-- should fail
SELECT parse_ident(' ');
-ERROR: missing valid identifier: " "
+ERROR: string is not a valid identifier: " "
SELECT parse_ident(' .aaa');
-ERROR: missing valid identifier before "." symbol: " .aaa"
+ERROR: string is not a valid identifier: " .aaa"
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident(' aaa . ');
-ERROR: missing valid identifier after "." symbol: " aaa . "
+ERROR: string is not a valid identifier: " aaa . "
+DETAIL: No valid identifier after "." symbol.
SELECT parse_ident('aaa.a%b');
-ERROR: identifier contains disallowed characters: "aaa.a%b"
+ERROR: string is not a valid identifier: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
-ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ERROR: string is not a valid identifier: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
-ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+ERROR: string is not a valid identifier: ""c".X XXXX\ 2XXXXXX"
SELECT parse_ident('1020');
-ERROR: missing valid identifier: "1020"
+ERROR: string is not a valid identifier: "1020"
SELECT parse_ident('10.20');
-ERROR: missing valid identifier: "10.20"
+ERROR: string is not a valid identifier: "10.20"
SELECT parse_ident('.');
-ERROR: missing valid identifier before "." symbol: "."
+ERROR: string is not a valid identifier: "."
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('.1020');
-ERROR: missing valid identifier before "." symbol: ".1020"
+ERROR: string is not a valid identifier: ".1020"
+DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('xxx.1020');
-ERROR: missing valid identifier after "." symbol: "xxx.1020"
+ERROR: string is not a valid identifier: "xxx.1020"
+DETAIL: No valid identifier after "." symbol.