Fix some minor spec-compliance issues in jsonpath lexer.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l

index 2165ffcc254b60db7f93cbca41c5abec690ffcb2..e35636883a1e9c6a9e3721445f862cb283248053 100644 (file)
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -59,25 +59,24 @@ fprintf_to_ereport(const char *fmt, const char *msg)
  %option noyyfree
  
  /*
- * We use exclusive states for quoted, signle-quoted and non-quoted strings,
- * quoted variable names and C-tyle comments.
+ * We use exclusive states for quoted and non-quoted strings,
+ * quoted variable names and C-style comments.
   * Exclusive states:
   *  <xq> - quoted strings
   *  <xnq> - non-quoted strings
   *  <xvq> - quoted variable names
- *  <xsq> - single-quoted strings
   *  <xc> - C-style comment
   */
  
  %x xq
  %x xnq
  %x xvq
-%x xsq
  %x xc
  
-special                 [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
-any                    [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
+special                [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
  blank          [ \t\n\r\f]
+/* "other" means anything that's not special, blank, or '\' or '"' */
+other          [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
  
  digit          [0-9]
  integer                (0|[1-9]{digit}*)
@@ -95,7 +94,7 @@ hex_fail      \\x{hex_dig}{0,1}
  
  %%
  
-<xnq>{any}+                                            {
+<xnq>{other}+                                  {
                                                                         addstring(false, yytext, yyleng);
                                                                 }
  
@@ -105,13 +104,12 @@ hex_fail  \\x{hex_dig}{0,1}
                                                                         return checkKeyword();
                                                                 }
  
-
  <xnq>\/\*                                              {
                                                                         yylval->str = scanstring;
                                                                         BEGIN xc;
                                                                 }
  
-<xnq>({special}|\"|\')                 {
+<xnq>({special}|\")                            {
                                                                         yylval->str = scanstring;
                                                                         yyless(0);
                                                                         BEGIN INITIAL;
@@ -124,39 +122,37 @@ hex_fail  \\x{hex_dig}{0,1}
                                                                         return checkKeyword();
                                                                 }
  
-<xnq,xq,xvq,xsq>\\[\"\'\\]             { addchar(false, yytext[1]); }
-
-<xnq,xq,xvq,xsq>\\b                            { addchar(false, '\b'); }
+<xnq,xq,xvq>\\b                                { addchar(false, '\b'); }
  
-<xnq,xq,xvq,xsq>\\f                            { addchar(false, '\f'); }
+<xnq,xq,xvq>\\f                                { addchar(false, '\f'); }
  
-<xnq,xq,xvq,xsq>\\n                            { addchar(false, '\n'); }
+<xnq,xq,xvq>\\n                                { addchar(false, '\n'); }
  
-<xnq,xq,xvq,xsq>\\r                            { addchar(false, '\r'); }
+<xnq,xq,xvq>\\r                                { addchar(false, '\r'); }
  
-<xnq,xq,xvq,xsq>\\t                            { addchar(false, '\t'); }
+<xnq,xq,xvq>\\t                                { addchar(false, '\t'); }
  
-<xnq,xq,xvq,xsq>\\v                            { addchar(false, '\v'); }
+<xnq,xq,xvq>\\v                                { addchar(false, '\v'); }
  
-<xnq,xq,xvq,xsq>{unicode}+             { parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+         { parseUnicode(yytext, yyleng); }
  
-<xnq,xq,xvq,xsq>{hex_char}             { parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char}         { parseHexChar(yytext); }
  
-<xnq,xq,xvq,xsq>{unicode}*{unicodefail}        { yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail}    { yyerror(NULL, "invalid unicode sequence"); }
  
-<xnq,xq,xvq,xsq>{hex_fail}             { yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail}         { yyerror(NULL, "invalid hex character sequence"); }
  
-<xnq,xq,xvq,xsq>{unicode}+\\   {
-                                                                       /* throw back the \\, and treat as unicode */
-                                                                       yyless(yyleng - 1);
-                                                                       parseUnicode(yytext, yyleng);
-                                                               }
+<xnq,xq,xvq>{unicode}+\\       {
+                                                               /* throw back the \\, and treat as unicode */
+                                                               yyless(yyleng - 1);
+                                                               parseUnicode(yytext, yyleng);
+                                                       }
  
-<xnq,xq,xvq,xsq>\\.                            { yyerror(NULL, "escape sequence is invalid"); }
+<xnq,xq,xvq>\\.                                { addchar(false, yytext[1]); }
  
-<xnq,xq,xvq,xsq>\\                             { yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\                         { yyerror(NULL, "unexpected end after backslash"); }
  
-<xq,xvq,xsq><<EOF>>                            { yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>>                                { yyerror(NULL, "unexpected end of quoted string"); }
  
  <xq>\"                                                 {
                                                                         yylval->str = scanstring;
@@ -170,16 +166,8 @@ hex_fail   \\x{hex_dig}{0,1}
                                                                         return VARIABLE_P;
                                                                 }
  
-<xsq>\'                                                        {
-                                                                       yylval->str = scanstring;
-                                                                       BEGIN INITIAL;
-                                                                       return STRING_P;
-                                                               }
-
  <xq,xvq>[^\\\"]+                               { addstring(false, yytext, yyleng); }
  
-<xsq>[^\\\']+                                  { addstring(false, yytext, yyleng); }
-
  <xc>\*\/                                               { BEGIN INITIAL; }
  
  <xc>[^\*]+                                             { }
@@ -210,7 +198,7 @@ hex_fail    \\x{hex_dig}{0,1}
  
  \>                                                             { return GREATER_P; }
  
-\${any}+                                               {
+\${other}+                                             {
                                                                         addstring(true, yytext + 1, yyleng - 1);
                                                                         addchar(false, '\0');
                                                                         yylval->str = scanstring;
@@ -263,27 +251,22 @@ hex_fail  \\x{hex_dig}{0,1}
  
  ({realfail1}|{realfail2})              { yyerror(NULL, "invalid floating point number"); }
  
-{any}+                                                 {
-                                                                       addstring(true, yytext, yyleng);
-                                                                       BEGIN xnq;
-                                                               }
-
  \"                                                             {
                                                                         addchar(true, '\0');
                                                                         BEGIN xq;
                                                                 }
  
-\'                                                             {
-                                                                       addchar(true, '\0');
-                                                                       BEGIN xsq;
-                                                               }
-
  \\                                                             {
                                                                         yyless(0);
                                                                         addchar(true, '\0');
                                                                         BEGIN xnq;
                                                                 }
  
+{other}+                                               {
+                                                                       addstring(true, yytext, yyleng);
+                                                                       BEGIN xnq;
+                                                               }
+
  <<EOF>>                                                        { yyterminate(); }
  
  %%
diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out

index ea42ae367a3a6073bd875e444b04ecd4ba6aebc0..fc971dc408e4e46b1864b5ce3cea64fe60d99722 100644 (file)
--- a/src/test/regress/expected/jsonpath.out
+++ b/src/test/regress/expected/jsonpath.out
@@ -171,30 +171,24 @@ select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
   "\b\f\r\n\t\u000b\"'\\"
  (1 row)
  
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
-        jsonpath         
--------------------------
- "\b\f\r\n\t\u000b\"'\\"
-(1 row)
-
  select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
   jsonpath 
  ----------
   "PgSQL"
  (1 row)
  
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
- jsonpath 
-----------
- "PgSQL"
-(1 row)
-
  select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
        jsonpath       
  ---------------------
   $."fooPgSQL\t\"bar"
  (1 row)
  
+select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
+ jsonpath 
+----------
+ "z"
+(1 row)
+
  select '$.g ? ($.a == 1)'::jsonpath;
        jsonpath      
  --------------------
diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out

index 8db6e47dbbc2303908f76d5afa10b54ccc7b2f2c..ecffe095b59277ce0e3075f1c24b48fe380f6a34 100644 (file)
--- a/src/test/regress/expected/jsonpath_encoding.out
+++ b/src/test/regress/expected/jsonpath_encoding.out
@@ -81,84 +81,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
   "null \\u0000 escape"
  (1 row)
  
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;            -- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath;  -- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath;        -- ERROR, g is not a hex digit
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath;        -- OK, legal escape
-ERROR:  invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
-               ^
-SELECT E'\'\uaBcD\''::jsonpath;        -- OK, uppercase and lower case both OK
- jsonpath 
-----------
- "ꯍ"
-(1 row)
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
- correct_in_utf8 
------------------
- "😄🐶"
-(1 row)
-
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
-               ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
-               ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
-               ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
-               ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-    correct_in_utf8     
-------------------------
- "the Copyright © sign"
-(1 row)
-
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-  correct_everywhere  
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-    not_an_escape     
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR:  invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-               ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR:  unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
-               ^
-DETAIL:  \u0000 cannot be converted to text.
  -- checks for quoted key names
  -- basic unicode input
  SELECT '$."\u"'::jsonpath;             -- ERROR, incomplete escape
diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out

index e6dff25d45b5c0863a9c5a4c3e8a4fc34190adcd..c8cc2173a8ca8685c70a78e54c67438c57eafec1 100644 (file)
--- a/src/test/regress/expected/jsonpath_encoding_1.out
+++ b/src/test/regress/expected/jsonpath_encoding_1.out
@@ -78,78 +78,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
   "null \\u0000 escape"
  (1 row)
  
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;            -- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath;  -- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath;        -- ERROR, g is not a hex digit
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath;        -- OK, legal escape
-ERROR:  invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
-               ^
-SELECT E'\'\uaBcD\''::jsonpath;        -- OK, uppercase and lower case both OK
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD"
-LINE 1: SELECT E'\'\uaBcD\''::jsonpath;
-               ^
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04"
-LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_...
-               ^
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
-               ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
-               ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
-               ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
-               ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9"
-LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct...
-               ^
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-  correct_everywhere  
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-    not_an_escape     
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR:  invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-               ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR:  unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
-               ^
-DETAIL:  \u0000 cannot be converted to text.
  -- checks for quoted key names
  -- basic unicode input
  SELECT '$."\u"'::jsonpath;             -- ERROR, incomplete escape
diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql

index 29ea77a4858094e9f9ed485dd33ce8f739ee092c..7afe2528c3b238b407213dd31cd74a07629e268c 100644 (file)
--- a/src/test/regress/sql/jsonpath.sql
+++ b/src/test/regress/sql/jsonpath.sql
@@ -30,10 +30,9 @@ select '$.a/+-1'::jsonpath;
  select '1 * 2 + 4 % -3 != false'::jsonpath;
  
  select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
  select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
  select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
+select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
  
  select '$.g ? ($.a == 1)'::jsonpath;
  select '$.g ? (@ == 1)'::jsonpath;
diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql

index a3b5bc39a1c4601b2a4d1af9001b214c13bd844e..3a23b7281825018efa9811f2c6ca2dd8dbcc71cc 100644 (file)
--- a/src/test/regress/sql/jsonpath_encoding.sql
+++ b/src/test/regress/sql/jsonpath_encoding.sql
@@ -24,29 +24,6 @@ select '"dollar \\u0024 character"'::jsonpath as not_an_escape;
  select '"null \u0000 escape"'::jsonpath as not_unescaped;
  select '"null \\u0000 escape"'::jsonpath as not_an_escape;
  
--- checks for single-quoted values
-
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;            -- ERROR, incomplete escape
-SELECT E'\'\u00\''::jsonpath;  -- ERROR, incomplete escape
-SELECT E'\'\u000g\''::jsonpath;        -- ERROR, g is not a hex digit
-SELECT E'\'\u0000\''::jsonpath;        -- OK, legal escape
-SELECT E'\'\uaBcD\''::jsonpath;        -- OK, uppercase and lower case both OK
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-
  -- checks for quoted key names
  
  -- basic unicode input
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 20 Sep 2019 18:22:58 +0000 (14:22 -0400)
src/backend/utils/adt/jsonpath_scan.l		patch \| blob \| history
src/test/regress/expected/jsonpath.out		patch \| blob \| history
src/test/regress/expected/jsonpath_encoding.out		patch \| blob \| history
src/test/regress/expected/jsonpath_encoding_1.out		patch \| blob \| history
src/test/regress/sql/jsonpath.sql		patch \| blob \| history
src/test/regress/sql/jsonpath_encoding.sql		patch \| blob \| history