%option noyyfree
/*
- * We use exclusive states for quoted, signle-quoted and non-quoted strings,
- * quoted variable names and C-tyle comments.
+ * We use exclusive states for quoted and non-quoted strings,
+ * quoted variable names and C-style comments.
* Exclusive states:
* <xq> - quoted strings
* <xnq> - non-quoted strings
* <xvq> - quoted variable names
- * <xsq> - single-quoted strings
* <xc> - C-style comment
*/
%x xq
%x xnq
%x xvq
-%x xsq
%x xc
-special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
-any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
+special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
blank [ \t\n\r\f]
+/* "other" means anything that's not special, blank, or '\' or '"' */
+other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
digit [0-9]
integer (0|[1-9]{digit}*)
%%
-<xnq>{any}+ {
+<xnq>{other}+ {
addstring(false, yytext, yyleng);
}
return checkKeyword();
}
-
<xnq>\/\* {
yylval->str = scanstring;
BEGIN xc;
}
-<xnq>({special}|\"|\') {
+<xnq>({special}|\") {
yylval->str = scanstring;
yyless(0);
BEGIN INITIAL;
return checkKeyword();
}
-<xnq,xq,xvq,xsq>\\[\"\'\\] { addchar(false, yytext[1]); }
-
-<xnq,xq,xvq,xsq>\\b { addchar(false, '\b'); }
+<xnq,xq,xvq>\\b { addchar(false, '\b'); }
-<xnq,xq,xvq,xsq>\\f { addchar(false, '\f'); }
+<xnq,xq,xvq>\\f { addchar(false, '\f'); }
-<xnq,xq,xvq,xsq>\\n { addchar(false, '\n'); }
+<xnq,xq,xvq>\\n { addchar(false, '\n'); }
-<xnq,xq,xvq,xsq>\\r { addchar(false, '\r'); }
+<xnq,xq,xvq>\\r { addchar(false, '\r'); }
-<xnq,xq,xvq,xsq>\\t { addchar(false, '\t'); }
+<xnq,xq,xvq>\\t { addchar(false, '\t'); }
-<xnq,xq,xvq,xsq>\\v { addchar(false, '\v'); }
+<xnq,xq,xvq>\\v { addchar(false, '\v'); }
-<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); }
-<xnq,xq,xvq,xsq>{hex_char} { parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char} { parseHexChar(yytext); }
-<xnq,xq,xvq,xsq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); }
-<xnq,xq,xvq,xsq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); }
-<xnq,xq,xvq,xsq>{unicode}+\\ {
- /* throw back the \\, and treat as unicode */
- yyless(yyleng - 1);
- parseUnicode(yytext, yyleng);
- }
+<xnq,xq,xvq>{unicode}+\\ {
+ /* throw back the \\, and treat as unicode */
+ yyless(yyleng - 1);
+ parseUnicode(yytext, yyleng);
+ }
-<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "escape sequence is invalid"); }
+<xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
-<xnq,xq,xvq,xsq>\\ { yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\ { yyerror(NULL, "unexpected end after backslash"); }
-<xq,xvq,xsq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); }
<xq>\" {
yylval->str = scanstring;
return VARIABLE_P;
}
-<xsq>\' {
- yylval->str = scanstring;
- BEGIN INITIAL;
- return STRING_P;
- }
-
<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
-<xsq>[^\\\']+ { addstring(false, yytext, yyleng); }
-
<xc>\*\/ { BEGIN INITIAL; }
<xc>[^\*]+ { }
\> { return GREATER_P; }
-\${any}+ {
+\${other}+ {
addstring(true, yytext + 1, yyleng - 1);
addchar(false, '\0');
yylval->str = scanstring;
({realfail1}|{realfail2}) { yyerror(NULL, "invalid floating point number"); }
-{any}+ {
- addstring(true, yytext, yyleng);
- BEGIN xnq;
- }
-
\" {
addchar(true, '\0');
BEGIN xq;
}
-\' {
- addchar(true, '\0');
- BEGIN xsq;
- }
-
\\ {
yyless(0);
addchar(true, '\0');
BEGIN xnq;
}
+{other}+ {
+ addstring(true, yytext, yyleng);
+ BEGIN xnq;
+ }
+
<<EOF>> { yyterminate(); }
%%
"\b\f\r\n\t\u000b\"'\\"
(1 row)
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
- jsonpath
--------------------------
- "\b\f\r\n\t\u000b\"'\\"
-(1 row)
-
select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
jsonpath
----------
"PgSQL"
(1 row)
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
- jsonpath
-----------
- "PgSQL"
-(1 row)
-
select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
jsonpath
---------------------
$."fooPgSQL\t\"bar"
(1 row)
+select '"\z"'::jsonpath; -- unrecognized escape is just the literal char
+ jsonpath
+----------
+ "z"
+(1 row)
+
select '$.g ? ($.a == 1)'::jsonpath;
jsonpath
--------------------
"null \\u0000 escape"
(1 row)
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape
-ERROR: invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
- ^
-SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK
- jsonpath
-----------
- "ꯍ"
-(1 row)
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
- correct_in_utf8
------------------
- "😄🐶"
-(1 row)
-
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
- ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
- ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
- ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
- ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
- correct_in_utf8
-------------------------
- "the Copyright © sign"
-(1 row)
-
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
- correct_everywhere
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
- not_an_escape
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR: invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
- ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR: unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
- ^
-DETAIL: \u0000 cannot be converted to text.
-- checks for quoted key names
-- basic unicode input
SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape
"null \\u0000 escape"
(1 row)
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit
-ERROR: invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
- ^
-HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape
-ERROR: invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
- ^
-SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK
-ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD"
-LINE 1: SELECT E'\'\uaBcD\''::jsonpath;
- ^
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04"
-LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_...
- ^
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
- ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
- ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
- ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
- ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9"
-LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct...
- ^
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
- correct_everywhere
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
- not_an_escape
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR: invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
- ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR: unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
- ^
-DETAIL: \u0000 cannot be converted to text.
-- checks for quoted key names
-- basic unicode input
SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape
select '1 * 2 + 4 % -3 != false'::jsonpath;
select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
+select '"\z"'::jsonpath; -- unrecognized escape is just the literal char
select '$.g ? ($.a == 1)'::jsonpath;
select '$.g ? (@ == 1)'::jsonpath;
select '"null \u0000 escape"'::jsonpath as not_unescaped;
select '"null \\u0000 escape"'::jsonpath as not_an_escape;
--- checks for single-quoted values
-
--- basic unicode input
-SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape
-SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape
-SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit
-SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape
-SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-
-- checks for quoted key names
-- basic unicode input