SQL-layer function to split qualified identifier into array parts.
Author: Pavel Stehule with minor editorization by me and Jim Nasby
<entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>parse_ident</primary>
+ </indexterm>
+ <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
+ [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+ </entry>
+ <entry><type>text[]</type></entry>
+ <entry>Split <parameter>qualified identifier</parameter> into array
+ <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
+ false, extra characters after the identifier are ignored. This is useful
+ for parsing identifiers for objects like functions and arrays that may
+ have trailing characters. By default, extra characters after the last
+ identifier are considered an error, but if second parameter is false,
+ then chararacters after last identifier are ignored. Note that this
+ function does not truncate quoted identifiers. If you care about that
+ you should cast the result of this function to name[]. A non-printable
+ chararacters (like 0 to 31) are displayed as hexadecimal codes always,
+ what can be different from PostgreSQL internal SQL identifiers
+ processing, when the original escaped value is displayed.
+ </entry>
+ <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+ <entry><literal>"SomeSchema,sometable"</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'jsonb_set';
+
+CREATE OR REPLACE FUNCTION
+ parse_ident(str text, strict boolean DEFAULT true)
+RETURNS text[]
+LANGUAGE INTERNAL
+STRICT IMMUTABLE
+AS 'parse_ident';
*/
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
+{
+ return downcase_identifier(ident, len, warn, true);
+}
+
+/*
+ * a workhorse for downcase_truncate_identifier
+ */
+char *
+downcase_identifier(const char *ident, int len, bool warn, bool truncate)
{
char *result;
int i;
}
result[i] = '\0';
- if (i >= NAMEDATALEN)
+ if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn);
return result;
}
+
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
+#include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
+
+
+/*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+ if (c == '_')
+ return true;
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return true;
+
+ if (c >= 0200 && c <= 0377)
+ return true;
+
+ return false;
+}
+
+static bool
+is_ident_cont(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return true;
+
+ return is_ident_start(c);
+}
+
+/*
+ * Sanitize SQL string for using in error message.
+ */
+static char *
+sanitize_text(text *t)
+{
+ int len = VARSIZE_ANY_EXHDR(t);
+ const char *p = VARDATA_ANY(t);
+ StringInfo dstr;
+
+ dstr = makeStringInfo();
+
+ appendStringInfoChar(dstr, '"');
+
+ while (len--)
+ {
+ switch (*p)
+ {
+ case '\b':
+ appendStringInfoString(dstr, "\\b");
+ break;
+ case '\f':
+ appendStringInfoString(dstr, "\\f");
+ break;
+ case '\n':
+ appendStringInfoString(dstr, "\\n");
+ break;
+ case '\r':
+ appendStringInfoString(dstr, "\\r");
+ break;
+ case '\t':
+ appendStringInfoString(dstr, "\\t");
+ break;
+ case '\'':
+ appendStringInfoString(dstr, "''");
+ break;
+ case '\\':
+ appendStringInfoString(dstr, "\\\\");
+ break;
+ default:
+ if ((unsigned char) *p < ' ')
+ appendStringInfo(dstr, "\\u%04x", (int) *p);
+ else
+ appendStringInfoCharMacro(dstr, *p);
+ break;
+ }
+ p++;
+ }
+
+ appendStringInfoChar(dstr, '"');
+
+ return dstr->data;
+}
+
+/*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+ text *qualname;
+ char *qualname_str;
+ bool strict;
+ char *nextp;
+ bool after_dot = false;
+ ArrayBuildState *astate = NULL;
+
+ qualname = PG_GETARG_TEXT_PP(0);
+ qualname_str = text_to_cstring(qualname);
+ strict = PG_GETARG_BOOL(1);
+
+ nextp = qualname_str;
+
+ /* skip leading whitespace */
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ for (;;)
+ {
+ char *curname;
+ char *endp;
+ bool missing_ident;
+
+ missing_ident = true;
+
+ if (*nextp == '\"')
+ {
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '\"');
+ if (endp == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unclosed double quotes"),
+ errdetail("string %s is not valid identifier",
+ sanitize_text(qualname))));
+ if (endp[1] != '\"')
+ break;
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ nextp = endp + 1;
+ *endp = '\0';
+
+ /* Show complete input string in this case. */
+ if (endp - curname == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier should not be empty: %s",
+ sanitize_text(qualname))));
+
+ astate = accumArrayResult(astate, CStringGetTextDatum(curname),
+ false, TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+ else
+ {
+ if (is_ident_start((unsigned char) *nextp))
+ {
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * Unlike name, we don't implicitly truncate identifiers. This
+ * is useful for allowing the user to check for specific parts
+ * of the identifier being too long. It's easy enough for the
+ * user to get the truncated names by casting our output to
+ * name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+ }
+
+ if (missing_ident)
+ {
+ /* Different error messages based on where we failed. */
+ if (*nextp == '.')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing valid identifier before \".\" symbol: %s",
+ sanitize_text(qualname))));
+ else if (after_dot)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing valid identifier after \".\" symbol: %s",
+ sanitize_text(qualname))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing valid identifier: %s",
+ sanitize_text(qualname))));
+ }
+
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ if (*nextp == '.')
+ {
+ after_dot = true;
+ nextp++;
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+ }
+ else if (*nextp == '\0')
+ {
+ break;
+ }
+ else
+ {
+ if (strict)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier contains disallowed characters: %s",
+ sanitize_text(qualname))));
+ break;
+ }
+ }
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201603151
+#define CATALOG_VERSION_NO 201603181
#endif
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
DESCR("convert namespace name to regnamespace");
+DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
+DESCR("parse qualified identifier to array of identifiers");
+
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn);
+extern char *downcase_identifier(const char *ident, int len,
+ bool warn, bool truncate);
+
extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch);
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS);
(2 rows)
DROP TABLE NAME_TBL;
+DO $$
+DECLARE r text[];
+BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+NOTICE: schemax.tabley
+NOTICE: "SchemaX"."TableY"
+SELECT parse_ident('foo.boo');
+ parse_ident
+-------------
+ {foo,boo}
+(1 row)
+
+SELECT parse_ident('foo.boo[]'); -- should fail
+ERROR: identifier contains disallowed characters: "foo.boo[]"
+SELECT parse_ident('foo.boo[]', strict => false); -- ok
+ parse_ident
+-------------
+ {foo,boo}
+(1 row)
+
+-- should fail
+SELECT parse_ident(' ');
+ERROR: missing valid identifier: " "
+SELECT parse_ident(' .aaa');
+ERROR: missing valid identifier before "." symbol: " .aaa"
+SELECT parse_ident(' aaa . ');
+ERROR: missing valid identifier after "." symbol: " aaa . "
+SELECT parse_ident('aaa.a%b');
+ERROR: identifier contains disallowed characters: "aaa.a%b"
+SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
+ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+ length | length
+--------+--------
+ 414 | 289
+(1 row)
+
+SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
+ parse_ident
+-----------------------------------------------------------------------------------------------------------
+ {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
+ parse_ident
+------------------------------------------------------------------------------------------------------
+ {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
+ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+SELECT parse_ident('1020');
+ERROR: missing valid identifier: "1020"
+SELECT parse_ident('10.20');
+ERROR: missing valid identifier: "10.20"
+SELECT parse_ident('.');
+ERROR: missing valid identifier before "." symbol: "."
+SELECT parse_ident('.1020');
+ERROR: missing valid identifier before "." symbol: ".1020"
+SELECT parse_ident('xxx.1020');
+ERROR: missing valid identifier after "." symbol: "xxx.1020"
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL;
+
+DO $$
+DECLARE r text[];
+BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+
+SELECT parse_ident('foo.boo');
+SELECT parse_ident('foo.boo[]'); -- should fail
+SELECT parse_ident('foo.boo[]', strict => false); -- ok
+
+-- should fail
+SELECT parse_ident(' ');
+SELECT parse_ident(' .aaa');
+SELECT parse_ident(' aaa . ');
+SELECT parse_ident('aaa.a%b');
+SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
+
+SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+
+SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
+SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
+
+SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
+SELECT parse_ident('1020');
+SELECT parse_ident('10.20');
+SELECT parse_ident('.');
+SELECT parse_ident('.1020');
+SELECT parse_ident('xxx.1020');