]> granicus.if.org Git - postgresql/commitdiff
Introduce parse_ident()
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 18 Mar 2016 15:16:14 +0000 (18:16 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 18 Mar 2016 15:16:14 +0000 (18:16 +0300)
SQL-layer function to split qualified identifier into array parts.

Author: Pavel Stehule with minor editorization by me and Jim Nasby

doc/src/sgml/func.sgml
src/backend/catalog/system_views.sql
src/backend/parser/scansup.c
src/backend/utils/adt/misc.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/parser/scansup.h
src/include/utils/builtins.h
src/test/regress/expected/name.out
src/test/regress/sql/name.sql

index 000489d961b53703750b047bff6b962938bcdad4..918356c075d08e635a9ba908f6ee228c4a364a99 100644 (file)
        <entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
       </row>
 
+      <row>
+       <entry>
+        <indexterm>
+         <primary>parse_ident</primary>
+        </indexterm>
+        <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
+           [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+       </entry>
+       <entry><type>text[]</type></entry>
+       <entry>Split <parameter>qualified identifier</parameter> into array
+       <parameter>parts</parameter>. When <parameter>strictmode</parameter> is
+       false, extra characters after the identifier are ignored. This is useful
+       for parsing identifiers for objects like functions and arrays that may
+       have trailing characters. By default, extra characters after the last
+       identifier are considered an error, but if second parameter is false,
+       then chararacters after last identifier are ignored. Note that this
+       function does not truncate quoted identifiers. If you care about that
+       you should cast the result of this function to name[]. A non-printable
+       chararacters (like 0 to 31) are displayed as hexadecimal codes always,
+       what can be different from PostgreSQL internal SQL identifiers
+       processing, when the original escaped value is displayed.
+       </entry>
+       <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+       <entry><literal>"SomeSchema,sometable"</literal></entry>
+      </row>
+
       <row>
        <entry>
         <indexterm>
index fef67bdd4cdc1f9c7012fa5a96c2abd81499ac62..9ae1ef4efa68ea1a71e557d15f8b3975d21e87ca 100644 (file)
@@ -990,3 +990,10 @@ RETURNS jsonb
 LANGUAGE INTERNAL
 STRICT IMMUTABLE
 AS 'jsonb_set';
+
+CREATE OR REPLACE FUNCTION
+  parse_ident(str text, strict boolean DEFAULT true)
+RETURNS text[]
+LANGUAGE INTERNAL
+STRICT IMMUTABLE
+AS 'parse_ident';
index 2b4ab202c34e36a50d267b618380ec24859f7a1d..7aa5b768411297f61da2964f8930d081d721f18a 100644 (file)
@@ -129,6 +129,15 @@ scanstr(const char *s)
  */
 char *
 downcase_truncate_identifier(const char *ident, int len, bool warn)
+{
+       return downcase_identifier(ident, len, warn, true);
+}
+
+/*
+ * a workhorse for downcase_truncate_identifier
+ */
+char *
+downcase_identifier(const char *ident, int len, bool warn, bool truncate)
 {
        char       *result;
        int                     i;
@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
        }
        result[i] = '\0';
 
-       if (i >= NAMEDATALEN)
+       if (i >= NAMEDATALEN && truncate)
                truncate_identifier(result, i, warn);
 
        return result;
 }
 
+
 /*
  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
  *
index 43f36db47bb1864351e9e14e4ba168f3d7b4774d..4dcc5a63be772b581464c1a93fc89ba90c7986c4 100644 (file)
@@ -27,6 +27,7 @@
 #include "commands/dbcommands.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "parser/scansup.h"
 #include "parser/keywords.h"
 #include "postmaster/syslogger.h"
 #include "rewrite/rewriteHandler.h"
@@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
 
        PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
 }
+
+
+/*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+       if (c == '_')
+               return true;
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return true;
+
+       if (c >= 0200 && c <= 0377)
+               return true;
+
+       return false;
+}
+
+static bool
+is_ident_cont(unsigned char c)
+{
+       if (c >= '0' && c <= '9')
+               return true;
+
+       return is_ident_start(c);
+}
+
+/*
+ * Sanitize SQL string for using in error message.
+ */
+static char *
+sanitize_text(text *t)
+{
+       int                     len = VARSIZE_ANY_EXHDR(t);
+       const char *p = VARDATA_ANY(t);
+       StringInfo      dstr;
+
+       dstr = makeStringInfo();
+
+       appendStringInfoChar(dstr, '"');
+
+       while (len--)
+       {
+               switch (*p)
+               {
+                       case '\b':
+                               appendStringInfoString(dstr, "\\b");
+                               break;
+                       case '\f':
+                               appendStringInfoString(dstr, "\\f");
+                               break;
+                       case '\n':
+                               appendStringInfoString(dstr, "\\n");
+                               break;
+                       case '\r':
+                               appendStringInfoString(dstr, "\\r");
+                               break;
+                       case '\t':
+                               appendStringInfoString(dstr, "\\t");
+                               break;
+                       case '\'':
+                               appendStringInfoString(dstr, "''");
+                               break;
+                       case '\\':
+                               appendStringInfoString(dstr, "\\\\");
+                               break;
+                       default:
+                               if ((unsigned char) *p < ' ')
+                                       appendStringInfo(dstr, "\\u%04x", (int) *p);
+                               else
+                                       appendStringInfoCharMacro(dstr, *p);
+                               break;
+               }
+               p++;
+       }
+
+       appendStringInfoChar(dstr, '"');
+
+       return dstr->data;
+}
+
+/*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+       text       *qualname;
+       char       *qualname_str;
+       bool            strict;
+       char       *nextp;
+       bool            after_dot = false;
+       ArrayBuildState *astate = NULL;
+
+       qualname = PG_GETARG_TEXT_PP(0);
+       qualname_str = text_to_cstring(qualname);
+       strict = PG_GETARG_BOOL(1);
+
+       nextp = qualname_str;
+
+       /* skip leading whitespace */
+       while (isspace((unsigned char) *nextp))
+               nextp++;
+
+       for (;;)
+       {
+               char            *curname;
+               char            *endp;
+               bool            missing_ident;
+
+               missing_ident = true;
+
+               if (*nextp == '\"')
+               {
+                       curname = nextp + 1;
+                       for (;;)
+                       {
+                               endp = strchr(nextp + 1, '\"');
+                               if (endp == NULL)
+                                       ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("unclosed double quotes"),
+                                                errdetail("string %s is not valid identifier",
+                                                                       sanitize_text(qualname))));
+                               if (endp[1] != '\"')
+                                       break;
+                               memmove(endp, endp + 1, strlen(endp));
+                               nextp = endp;
+                       }
+                       nextp = endp + 1;
+                       *endp = '\0';
+
+                       /* Show complete input string in this case. */
+                       if (endp - curname == 0)
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("identifier should not be empty: %s",
+                                                       sanitize_text(qualname))));
+
+                       astate = accumArrayResult(astate, CStringGetTextDatum(curname),
+                                                                         false, TEXTOID, CurrentMemoryContext);
+                       missing_ident = false;
+               }
+               else
+               {
+                       if (is_ident_start((unsigned char) *nextp))
+                       {
+                               char *downname;
+                               int     len;
+                               text    *part;
+
+                               curname = nextp++;
+                               while (is_ident_cont((unsigned char) *nextp))
+                                       nextp++;
+
+                               len = nextp - curname;
+
+                               /*
+                                * Unlike name, we don't implicitly truncate identifiers. This
+                                * is useful for allowing the user to check for specific parts
+                                * of the identifier being too long. It's easy enough for the
+                                * user to get the truncated names by casting our output to
+                                * name[].
+                                */
+                               downname = downcase_identifier(curname, len, false, false);
+                               part = cstring_to_text_with_len(downname, len);
+                               astate = accumArrayResult(astate, PointerGetDatum(part), false,
+                                                                                 TEXTOID, CurrentMemoryContext);
+                               missing_ident = false;
+                       }
+               }
+
+               if (missing_ident)
+               {
+                       /* Different error messages based on where we failed. */
+                       if (*nextp == '.')
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing valid identifier before \".\" symbol: %s",
+                                                       sanitize_text(qualname))));
+                       else if (after_dot)
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing valid identifier after \".\" symbol: %s",
+                                                       sanitize_text(qualname))));
+                       else
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("missing valid identifier: %s",
+                                                       sanitize_text(qualname))));
+               }
+
+               while (isspace((unsigned char) *nextp))
+                       nextp++;
+
+               if (*nextp == '.')
+               {
+                       after_dot = true;
+                       nextp++;
+                       while (isspace((unsigned char) *nextp))
+                               nextp++;
+               }
+               else if (*nextp == '\0')
+               {
+                       break;
+               }
+               else
+               {
+                       if (strict)
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("identifier contains disallowed characters: %s",
+                                                       sanitize_text(qualname))));
+                       break;
+               }
+       }
+
+       PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
index 7f410c178c54c85f8b3e14729229c7f7de000547..7c9e9eb19f675b048346f24cdc512f4bd43e224c 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201603151
+#define CATALOG_VERSION_NO     201603181
 
 #endif
index ceb8129e9af490cec3f9ec3fe12bd7b15e021f31..a59532732c0433a95bc35fe06ae1dc828389564e 100644 (file)
@@ -3463,6 +3463,9 @@ DESCR("I/O");
 DATA(insert OID = 4086 (  to_regnamespace      PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
 DESCR("convert namespace name to regnamespace");
 
+DATA(insert OID = 1268 (  parse_ident          PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
+DESCR("parse qualified identifier to array of identifiers");
+
 DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
 DESCR("(internal)");
 DATA(insert OID = 2247 ( fmgr_c_validator      PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
index 4f4164bb8c47c5c0bb50e353f03774575555431d..4f95c81f7c99269a6a5a1bf9ec168f376e0df0fd 100644 (file)
@@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
 extern char *downcase_truncate_identifier(const char *ident, int len,
                                                         bool warn);
 
+extern char *downcase_identifier(const char *ident, int len,
+                                                        bool warn, bool truncate);
+
 extern void truncate_identifier(char *ident, int len, bool warn);
 
 extern bool scanner_isspace(char ch);
index 59a00bbcbcf8291c92f7c9b13b53861b1be2372c..206288da8100699ca07de94f15084e70433e8643 100644 (file)
@@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
 extern Datum pg_collation_for(PG_FUNCTION_ARGS);
 extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
 extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+extern Datum parse_ident(PG_FUNCTION_ARGS);
 
 /* oid.c */
 extern Datum oidin(PG_FUNCTION_ARGS);
index b359d52822571f522d1110435014b04b7f0659da..56139d45efcc085b9f1891e80fad759b40ee8ee1 100644 (file)
@@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
 (2 rows)
 
 DROP TABLE NAME_TBL;
+DO $$
+DECLARE r text[];
+BEGIN
+  r := parse_ident('Schemax.Tabley');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+  r := parse_ident('"SchemaX"."TableY"');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+NOTICE:  schemax.tabley
+NOTICE:  "SchemaX"."TableY"
+SELECT parse_ident('foo.boo');
+ parse_ident 
+-------------
+ {foo,boo}
+(1 row)
+
+SELECT parse_ident('foo.boo[]'); -- should fail
+ERROR:  identifier contains disallowed characters: "foo.boo[]"
+SELECT parse_ident('foo.boo[]', strict => false); -- ok
+ parse_ident 
+-------------
+ {foo,boo}
+(1 row)
+
+-- should fail
+SELECT parse_ident(' ');
+ERROR:  missing valid identifier: " "
+SELECT parse_ident(' .aaa');
+ERROR:  missing valid identifier before "." symbol: " .aaa"
+SELECT parse_ident(' aaa . ');
+ERROR:  missing valid identifier after "." symbol: " aaa . "
+SELECT parse_ident('aaa.a%b');
+ERROR:  identifier contains disallowed characters: "aaa.a%b"
+SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
+ERROR:  identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+ length | length 
+--------+--------
+    414 |    289
+(1 row)
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66) || '"');
+                                                parse_ident                                                
+-----------------------------------------------------------------------------------------------------------
+ {first,"  second  ","   third   ","  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66) || '"')::name[];
+                                             parse_ident                                              
+------------------------------------------------------------------------------------------------------
+ {first,"  second  ","   third   ","  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
+ERROR:  identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
+SELECT parse_ident('1020');
+ERROR:  missing valid identifier: "1020"
+SELECT parse_ident('10.20');
+ERROR:  missing valid identifier: "10.20"
+SELECT parse_ident('.');
+ERROR:  missing valid identifier before "." symbol: "."
+SELECT parse_ident('.1020');
+ERROR:  missing valid identifier before "." symbol: ".1020"
+SELECT parse_ident('xxx.1020');
+ERROR:  missing valid identifier after "." symbol: "xxx.1020"
index 1c7a6716eea935f1f21345ddac30b2c53b49cd95..602bf26a48c6894c19bf2e3745183c6a6fed07a2 100644 (file)
@@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]';
 SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
 
 DROP TABLE NAME_TBL;
+
+DO $$
+DECLARE r text[];
+BEGIN
+  r := parse_ident('Schemax.Tabley');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+  r := parse_ident('"SchemaX"."TableY"');
+  RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+
+SELECT parse_ident('foo.boo');
+SELECT parse_ident('foo.boo[]'); -- should fail
+SELECT parse_ident('foo.boo[]', strict => false); -- ok
+
+-- should fail
+SELECT parse_ident(' ');
+SELECT parse_ident(' .aaa');
+SELECT parse_ident(' aaa . ');
+SELECT parse_ident('aaa.a%b');
+SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
+
+SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66) || '"');
+SELECT parse_ident(' first . "  second  " ."   third   ". "  ' || repeat('x',66) || '"')::name[];
+
+SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
+SELECT parse_ident('1020');
+SELECT parse_ident('10.20');
+SELECT parse_ident('.');
+SELECT parse_ident('.1020');
+SELECT parse_ident('xxx.1020');