From 1d88a75c424664cc85f307a876cde85191d27272 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 25 Mar 2019 15:43:56 +0300 Subject: [PATCH] Get rid of backtracking in jsonpath_scan.l Non-backtracking flex parsers work faster than backtracking ones. So, this commit gets rid of backtracking in jsonpath_scan.l. That required explicit handling of some cases as well as manual backtracking for some cases. More regression tests for numerics are added. Discussion: https://mail.google.com/mail/u/0?ik=a20b091faa&view=om&permmsgid=msg-f%3A1628425344167939063 Author: John Naylor, Nikita Gluknov, Alexander Korotkov --- src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/jsonpath_scan.l | 56 ++-- src/test/regress/expected/jsonb_jsonpath.out | 2 +- src/test/regress/expected/jsonpath.out | 168 ++++++++++++ .../regress/expected/jsonpath_encoding.out | 249 ++++++++++++++++++ .../regress/expected/jsonpath_encoding_1.out | 237 +++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/jsonb_jsonpath.sql | 2 +- src/test/regress/sql/jsonpath.sql | 30 +++ src/test/regress/sql/jsonpath_encoding.sql | 71 +++++ 11 files changed, 795 insertions(+), 24 deletions(-) create mode 100644 src/test/regress/expected/jsonpath_encoding.out create mode 100644 src/test/regress/expected/jsonpath_encoding_1.out create mode 100644 src/test/regress/sql/jsonpath_encoding.sql diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index b64ab4ed88..4ef769749d 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -34,6 +34,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ windowfuncs.o xid.o xml.o jsonpath_scan.c: FLEXFLAGS = -CF -p -p +jsonpath_scan.c: FLEX_NO_BACKUP=yes # Force these dependencies to be known even without dependency info built: jsonpath_gram.o: jsonpath_scan.c diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index e93307f407..4b913c3bee 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -31,7 +31,7 @@ static void addstring(bool init, char *s, int l); static void addchar(bool init, char s); static enum yytokentype checkKeyword(void); static void parseUnicode(char *s, int l); -static void parseHexChars(char *s, int l); +static void parseHexChar(char *s); /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ #undef fprintf @@ -78,9 +78,20 @@ fprintf_to_ereport(const char *fmt, const char *msg) special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] blank [ \t\n\r\f] + +digit [0-9] +integer {digit}+ +decimal {digit}*\.{digit}+ +decimalfail {digit}+\. +real ({integer}|{decimal})[Ee][-+]?{digit}+ +realfail1 ({integer}|{decimal})[Ee] +realfail2 ({integer}|{decimal})[Ee][-+] + hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) +unicodefail \\u({hex_dig}{0,3}|\{{hex_dig}{0,6}) hex_char \\x{hex_dig}{2} +hex_fail \\x{hex_dig}{0,1} %% @@ -129,11 +140,17 @@ hex_char \\x{hex_dig}{2} {unicode}+ { parseUnicode(yytext, yyleng); } -{hex_char}+ { parseHexChars(yytext, yyleng); } +{hex_char} { parseHexChar(yytext); } + +{unicode}*{unicodefail} { yyerror(NULL, "Unicode sequence is invalid"); } -\\x { yyerror(NULL, "Hex character sequence is invalid"); } +{hex_fail} { yyerror(NULL, "Hex character sequence is invalid"); } -\\u { yyerror(NULL, "Unicode sequence is invalid"); } +{unicode}+\\ { + /* throw back the \\, and treat as unicode */ + yyless(yyleng - 1); + parseUnicode(yytext, yyleng); + } \\. { yyerror(NULL, "Escape sequence is invalid"); } @@ -214,34 +231,38 @@ hex_char \\x{hex_dig}{2} BEGIN xc; } -[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ +{real} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -\.[0-9]+[eE][+-]?[0-9]+ { /* float */ +{decimal} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -([0-9]+)?\.[0-9]+ { +{integer} { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; - return NUMERIC_P; + return INT_P; } -[0-9]+ { +{decimalfail} { + /* throw back the ., and treat as integer */ + yyless(yyleng - 1); addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return INT_P; } +({realfail1}|{realfail2}) { yyerror(NULL, "Floating point number is invalid"); } + {any}+ { addstring(true, yytext, yyleng); BEGIN xnq; @@ -571,7 +592,7 @@ addUnicode(int ch, int *hi_surrogate) static void parseUnicode(char *s, int l) { - int i; + int i = 2; int hi_surrogate = -1; for (i = 2; i < l; i += 2) /* skip '\u' */ @@ -606,19 +627,12 @@ parseUnicode(char *s, int l) /* Parse sequence of hex-encoded characters */ static void -parseHexChars(char *s, int l) +parseHexChar(char *s) { - int i; - - Assert(l % 4 /* \xXX */ == 0); - - for (i = 0; i < l / 4; i++) - { - int ch = (hexval(s[i * 4 + 2]) << 4) | - hexval(s[i * 4 + 3]); + int ch = (hexval(s[2]) << 4) | + hexval(s[3]); - addUnicodeChar(ch); - } + addUnicodeChar(ch); } /* diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index e604bae6a3..4a84d9157f 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -1297,7 +1297,7 @@ select jsonb_path_query('null', 'true.type()'); "boolean" (1 row) -select jsonb_path_query('null', '123.type()'); +select jsonb_path_query('null', '(123).type()'); jsonb_path_query ------------------ "number" diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out index baaf9e3667..b7de491503 100644 --- a/src/test/regress/expected/jsonpath.out +++ b/src/test/regress/expected/jsonpath.out @@ -365,6 +365,18 @@ select '1.type()'::jsonpath; 1.type() (1 row) +select '(1).type()'::jsonpath; + jsonpath +---------- + 1.type() +(1 row) + +select '1.2.type()'::jsonpath; + jsonpath +------------ + 1.2.type() +(1 row) + select '"aaa".type()'::jsonpath; jsonpath -------------- @@ -804,3 +816,159 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath; $?(@."a" < 101) (1 row) +select '0'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '00'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '0.0'::jsonpath; + jsonpath +---------- + 0.0 +(1 row) + +select '0.000'::jsonpath; + jsonpath +---------- + 0.000 +(1 row) + +select '0.000e1'::jsonpath; + jsonpath +---------- + 0.00 +(1 row) + +select '0.000e2'::jsonpath; + jsonpath +---------- + 0.0 +(1 row) + +select '0.000e3'::jsonpath; + jsonpath +---------- + 0 +(1 row) + +select '0.0010'::jsonpath; + jsonpath +---------- + 0.0010 +(1 row) + +select '0.0010e-1'::jsonpath; + jsonpath +---------- + 0.00010 +(1 row) + +select '0.0010e+1'::jsonpath; + jsonpath +---------- + 0.010 +(1 row) + +select '0.0010e+2'::jsonpath; + jsonpath +---------- + 0.10 +(1 row) + +select '1e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1e'::jsonpath; + ^ +DETAIL: Floating point number is invalid at or near "1e" +select '1.e'::jsonpath; + jsonpath +---------- + 1."e" +(1 row) + +select '1.2e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1.2e'::jsonpath; + ^ +DETAIL: Floating point number is invalid at or near "1.2e" +select '1.2.e'::jsonpath; + jsonpath +---------- + 1.2."e" +(1 row) + +select '(1.2).e'::jsonpath; + jsonpath +---------- + 1.2."e" +(1 row) + +select '1e3'::jsonpath; + jsonpath +---------- + 1000 +(1 row) + +select '1.e3'::jsonpath; + jsonpath +---------- + 1."e3" +(1 row) + +select '1.e3.e'::jsonpath; + jsonpath +------------ + 1."e3"."e" +(1 row) + +select '1.e3.e4'::jsonpath; + jsonpath +------------- + 1."e3"."e4" +(1 row) + +select '1.2e3'::jsonpath; + jsonpath +---------- + 1200 +(1 row) + +select '1.2.e3'::jsonpath; + jsonpath +---------- + 1.2."e3" +(1 row) + +select '(1.2).e3'::jsonpath; + jsonpath +---------- + 1.2."e3" +(1 row) + +select '1..e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1..e'::jsonpath; + ^ +DETAIL: syntax error, unexpected '.' at or near "." +select '1..e3'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '1..e3'::jsonpath; + ^ +DETAIL: syntax error, unexpected '.' at or near "." +select '(1.).e'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '(1.).e'::jsonpath; + ^ +DETAIL: syntax error, unexpected ')' at or near ")" +select '(1.).e3'::jsonpath; +ERROR: bad jsonpath representation +LINE 1: select '(1.).e3'::jsonpath; + ^ +DETAIL: syntax error, unexpected ')' at or near ")" diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out new file mode 100644 index 0000000000..6d828d1724 --- /dev/null +++ b/src/test/regress/expected/jsonpath_encoding.out @@ -0,0 +1,249 @@ +-- encoding-sensitive tests for jsonpath +-- checks for double-quoted values +-- basic unicode input +SELECT '"\u"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u" +SELECT '"\u00"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u00"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u00" +SELECT '"\u000g"'::jsonpath; -- ERROR, g is not a hex digit +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u000g"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u000" +SELECT '"\u0000"'::jsonpath; -- OK, legal escape +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonpath; + ^ +DETAIL: \u0000 cannot be converted to text. +SELECT '"\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK + jsonpath +---------- + "ꯍ" +(1 row) + +-- handling of unicode surrogate pairs +select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; + correct_in_utf8 +----------------- + "😄🐶" +(1 row) + +select '"\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ud83d\ud83d"'::jsonpath; + ^ +DETAIL: Unicode high surrogate must not follow a high surrogate. +select '"\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ude04\ud83d"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '"\ud83dX"'::jsonpath; -- orphan high surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ud83dX"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '"\ude04X"'::jsonpath; -- orphan low surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ude04X"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +--handling of simple unicode escapes +select '"the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; + correct_in_utf8 +------------------------ + "the Copyright © sign" +(1 row) + +select '"dollar \u0024 character"'::jsonpath as correct_everywhere; + correct_everywhere +---------------------- + "dollar $ character" +(1 row) + +select '"dollar \\u0024 character"'::jsonpath as not_an_escape; + not_an_escape +---------------------------- + "dollar \\u0024 character" +(1 row) + +select '"null \u0000 escape"'::jsonpath as not_unescaped; +ERROR: unsupported Unicode escape sequence +LINE 1: select '"null \u0000 escape"'::jsonpath as not_unescaped; + ^ +DETAIL: \u0000 cannot be converted to text. +select '"null \\u0000 escape"'::jsonpath as not_an_escape; + not_an_escape +----------------------- + "null \\u0000 escape" +(1 row) + +-- checks for single-quoted values +-- basic unicode input +SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u00\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u000g\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape +ERROR: invalid Unicode escape value at or near "E'\'\u0000" +LINE 1: SELECT E'\'\u0000\''::jsonpath; + ^ +SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK + jsonpath +---------- + "ꯍ" +(1 row) + +-- handling of unicode surrogate pairs +select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; + correct_in_utf8 +----------------- + "😄🐶" +(1 row) + +select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row +ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d" +LINE 1: select E'\'\ud83d\ud83d\''::jsonpath; + ^ +select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order +ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" +LINE 1: select E'\'\ude04\ud83d\''::jsonpath; + ^ +select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate +ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX" +LINE 1: select E'\'\ud83dX\''::jsonpath; + ^ +select E'\'\ude04X\''::jsonpath; -- orphan low surrogate +ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" +LINE 1: select E'\'\ude04X\''::jsonpath; + ^ +--handling of simple unicode escapes +select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; + correct_in_utf8 +------------------------ + "the Copyright © sign" +(1 row) + +select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; + correct_everywhere +---------------------- + "dollar $ character" +(1 row) + +select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; + not_an_escape +---------------------- + "dollar $ character" +(1 row) + +select E'\'null \u0000 escape\''::jsonpath as not_unescaped; +ERROR: invalid Unicode escape value at or near "E'\'null \u0000" +LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped; + ^ +select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; +ERROR: unsupported Unicode escape sequence +LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape... + ^ +DETAIL: \u0000 cannot be converted to text. +-- checks for quoted key names +-- basic unicode input +SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u" +SELECT '$."\u00"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u00"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u00" +SELECT '$."\u000g"'::jsonpath; -- ERROR, g is not a hex digit +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u000g"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u000" +SELECT '$."\u0000"'::jsonpath; -- OK, legal escape +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '$."\u0000"'::jsonpath; + ^ +DETAIL: \u0000 cannot be converted to text. +SELECT '$."\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK + jsonpath +---------- + $."ꯍ" +(1 row) + +-- handling of unicode surrogate pairs +select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; + correct_in_utf8 +----------------- + $."😄🐶" +(1 row) + +select '$."\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ud83d\ud83d"'::jsonpath; + ^ +DETAIL: Unicode high surrogate must not follow a high surrogate. +select '$."\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ude04\ud83d"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '$."\ud83dX"'::jsonpath; -- orphan high surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ud83dX"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '$."\ude04X"'::jsonpath; -- orphan low surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ude04X"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +--handling of simple unicode escapes +select '$."the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; + correct_in_utf8 +-------------------------- + $."the Copyright © sign" +(1 row) + +select '$."dollar \u0024 character"'::jsonpath as correct_everywhere; + correct_everywhere +------------------------ + $."dollar $ character" +(1 row) + +select '$."dollar \\u0024 character"'::jsonpath as not_an_escape; + not_an_escape +------------------------------ + $."dollar \\u0024 character" +(1 row) + +select '$."null \u0000 escape"'::jsonpath as not_unescaped; +ERROR: unsupported Unicode escape sequence +LINE 1: select '$."null \u0000 escape"'::jsonpath as not_unescaped; + ^ +DETAIL: \u0000 cannot be converted to text. +select '$."null \\u0000 escape"'::jsonpath as not_an_escape; + not_an_escape +------------------------- + $."null \\u0000 escape" +(1 row) + diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out new file mode 100644 index 0000000000..04179a8df7 --- /dev/null +++ b/src/test/regress/expected/jsonpath_encoding_1.out @@ -0,0 +1,237 @@ +-- encoding-sensitive tests for jsonpath +-- checks for double-quoted values +-- basic unicode input +SELECT '"\u"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u" +SELECT '"\u00"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u00"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u00" +SELECT '"\u000g"'::jsonpath; -- ERROR, g is not a hex digit +ERROR: bad jsonpath representation +LINE 1: SELECT '"\u000g"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u000" +SELECT '"\u0000"'::jsonpath; -- OK, legal escape +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '"\u0000"'::jsonpath; + ^ +DETAIL: \u0000 cannot be converted to text. +SELECT '"\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK +ERROR: invalid input syntax for type jsonpath +LINE 1: SELECT '"\uaBcD"'::jsonpath; + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +-- handling of unicode surrogate pairs +select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +select '"\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ud83d\ud83d"'::jsonpath; + ^ +DETAIL: Unicode high surrogate must not follow a high surrogate. +select '"\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ude04\ud83d"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '"\ud83dX"'::jsonpath; -- orphan high surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ud83dX"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '"\ude04X"'::jsonpath; -- orphan low surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"\ude04X"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +--handling of simple unicode escapes +select '"the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; +ERROR: invalid input syntax for type jsonpath +LINE 1: select '"the Copyright \u00a9 sign"'::jsonpath as correct_in... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +select '"dollar \u0024 character"'::jsonpath as correct_everywhere; + correct_everywhere +---------------------- + "dollar $ character" +(1 row) + +select '"dollar \\u0024 character"'::jsonpath as not_an_escape; + not_an_escape +---------------------------- + "dollar \\u0024 character" +(1 row) + +select '"null \u0000 escape"'::jsonpath as not_unescaped; +ERROR: unsupported Unicode escape sequence +LINE 1: select '"null \u0000 escape"'::jsonpath as not_unescaped; + ^ +DETAIL: \u0000 cannot be converted to text. +select '"null \\u0000 escape"'::jsonpath as not_an_escape; + not_an_escape +----------------------- + "null \\u0000 escape" +(1 row) + +-- checks for single-quoted values +-- basic unicode input +SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u00\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit +ERROR: invalid Unicode escape +LINE 1: SELECT E'\'\u000g\''::jsonpath; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape +ERROR: invalid Unicode escape value at or near "E'\'\u0000" +LINE 1: SELECT E'\'\u0000\''::jsonpath; + ^ +SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK +ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD" +LINE 1: SELECT E'\'\uaBcD\''::jsonpath; + ^ +-- handling of unicode surrogate pairs +select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; +ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04" +LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_... + ^ +select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row +ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d" +LINE 1: select E'\'\ud83d\ud83d\''::jsonpath; + ^ +select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order +ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" +LINE 1: select E'\'\ude04\ud83d\''::jsonpath; + ^ +select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate +ERROR: invalid Unicode surrogate pair at or near "E'\'\ud83dX" +LINE 1: select E'\'\ud83dX\''::jsonpath; + ^ +select E'\'\ude04X\''::jsonpath; -- orphan low surrogate +ERROR: invalid Unicode surrogate pair at or near "E'\'\ude04" +LINE 1: select E'\'\ude04X\''::jsonpath; + ^ +--handling of simple unicode escapes +select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; +ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9" +LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct... + ^ +select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; + correct_everywhere +---------------------- + "dollar $ character" +(1 row) + +select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; + not_an_escape +---------------------- + "dollar $ character" +(1 row) + +select E'\'null \u0000 escape\''::jsonpath as not_unescaped; +ERROR: invalid Unicode escape value at or near "E'\'null \u0000" +LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped; + ^ +select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; +ERROR: unsupported Unicode escape sequence +LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape... + ^ +DETAIL: \u0000 cannot be converted to text. +-- checks for quoted key names +-- basic unicode input +SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u" +SELECT '$."\u00"'::jsonpath; -- ERROR, incomplete escape +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u00"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u00" +SELECT '$."\u000g"'::jsonpath; -- ERROR, g is not a hex digit +ERROR: bad jsonpath representation +LINE 1: SELECT '$."\u000g"'::jsonpath; + ^ +DETAIL: Unicode sequence is invalid at or near "\u000" +SELECT '$."\u0000"'::jsonpath; -- OK, legal escape +ERROR: unsupported Unicode escape sequence +LINE 1: SELECT '$."\u0000"'::jsonpath; + ^ +DETAIL: \u0000 cannot be converted to text. +SELECT '$."\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK +ERROR: invalid input syntax for type jsonpath +LINE 1: SELECT '$."\uaBcD"'::jsonpath; + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +-- handling of unicode surrogate pairs +select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_i... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +select '$."\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ud83d\ud83d"'::jsonpath; + ^ +DETAIL: Unicode high surrogate must not follow a high surrogate. +select '$."\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ude04\ud83d"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '$."\ud83dX"'::jsonpath; -- orphan high surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ud83dX"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +select '$."\ude04X"'::jsonpath; -- orphan low surrogate +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."\ude04X"'::jsonpath; + ^ +DETAIL: Unicode low surrogate must follow a high surrogate. +--handling of simple unicode escapes +select '$."the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; +ERROR: invalid input syntax for type jsonpath +LINE 1: select '$."the Copyright \u00a9 sign"'::jsonpath as correct_... + ^ +DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. +select '$."dollar \u0024 character"'::jsonpath as correct_everywhere; + correct_everywhere +------------------------ + $."dollar $ character" +(1 row) + +select '$."dollar \\u0024 character"'::jsonpath as not_an_escape; + not_an_escape +------------------------------ + $."dollar \\u0024 character" +(1 row) + +select '$."null \u0000 escape"'::jsonpath as not_unescaped; +ERROR: unsupported Unicode escape sequence +LINE 1: select '$."null \u0000 escape"'::jsonpath as not_unescaped; + ^ +DETAIL: \u0000 cannot be converted to text. +select '$."null \\u0000 escape"'::jsonpath as not_an_escape; + not_an_escape +------------------------- + $."null \\u0000 escape" +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index de4989ff94..908fbf650a 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -109,7 +109,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo # ---------- # Another group of parallel tests (JSON related) # ---------- -test: json jsonb json_encoding jsonpath jsonb_jsonpath +test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 175ee263b6..fa754d1c6b 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -160,6 +160,7 @@ test: json test: jsonb test: json_encoding test: jsonpath +test: jsonpath_encoding test: jsonb_jsonpath test: indirect_toast test: equivclass diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index 41b346b2d4..28c861bb17 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -269,7 +269,7 @@ select jsonb_path_query('[null,1,true,"a",[],{}]', 'lax $.type()'); select jsonb_path_query('[null,1,true,"a",[],{}]', '$[*].type()'); select jsonb_path_query('null', 'null.type()'); select jsonb_path_query('null', 'true.type()'); -select jsonb_path_query('null', '123.type()'); +select jsonb_path_query('null', '(123).type()'); select jsonb_path_query('null', '"123".type()'); select jsonb_path_query('{"a": 2}', '($.a - 5).abs() + 10'); diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql index e5f3391a66..9171ddbc6c 100644 --- a/src/test/regress/sql/jsonpath.sql +++ b/src/test/regress/sql/jsonpath.sql @@ -66,6 +66,8 @@ select '$[$[0] ? (last > 0)]'::jsonpath; select 'null.type()'::jsonpath; select '1.type()'::jsonpath; +select '(1).type()'::jsonpath; +select '1.2.type()'::jsonpath; select '"aaa".type()'::jsonpath; select 'true.type()'::jsonpath; select '$.double().floor().ceiling().abs()'::jsonpath; @@ -145,3 +147,31 @@ select '$ ? (@.a < +0.1e+1)'::jsonpath; select '$ ? (@.a < 10.1e+1)'::jsonpath; select '$ ? (@.a < -10.1e+1)'::jsonpath; select '$ ? (@.a < +10.1e+1)'::jsonpath; + +select '0'::jsonpath; +select '00'::jsonpath; +select '0.0'::jsonpath; +select '0.000'::jsonpath; +select '0.000e1'::jsonpath; +select '0.000e2'::jsonpath; +select '0.000e3'::jsonpath; +select '0.0010'::jsonpath; +select '0.0010e-1'::jsonpath; +select '0.0010e+1'::jsonpath; +select '0.0010e+2'::jsonpath; +select '1e'::jsonpath; +select '1.e'::jsonpath; +select '1.2e'::jsonpath; +select '1.2.e'::jsonpath; +select '(1.2).e'::jsonpath; +select '1e3'::jsonpath; +select '1.e3'::jsonpath; +select '1.e3.e'::jsonpath; +select '1.e3.e4'::jsonpath; +select '1.2e3'::jsonpath; +select '1.2.e3'::jsonpath; +select '(1.2).e3'::jsonpath; +select '1..e'::jsonpath; +select '1..e3'::jsonpath; +select '(1.).e'::jsonpath; +select '(1.).e3'::jsonpath; diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql new file mode 100644 index 0000000000..a3b5bc39a1 --- /dev/null +++ b/src/test/regress/sql/jsonpath_encoding.sql @@ -0,0 +1,71 @@ + +-- encoding-sensitive tests for jsonpath + +-- checks for double-quoted values + +-- basic unicode input +SELECT '"\u"'::jsonpath; -- ERROR, incomplete escape +SELECT '"\u00"'::jsonpath; -- ERROR, incomplete escape +SELECT '"\u000g"'::jsonpath; -- ERROR, g is not a hex digit +SELECT '"\u0000"'::jsonpath; -- OK, legal escape +SELECT '"\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK + +-- handling of unicode surrogate pairs +select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; +select '"\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +select '"\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +select '"\ud83dX"'::jsonpath; -- orphan high surrogate +select '"\ude04X"'::jsonpath; -- orphan low surrogate + +--handling of simple unicode escapes +select '"the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; +select '"dollar \u0024 character"'::jsonpath as correct_everywhere; +select '"dollar \\u0024 character"'::jsonpath as not_an_escape; +select '"null \u0000 escape"'::jsonpath as not_unescaped; +select '"null \\u0000 escape"'::jsonpath as not_an_escape; + +-- checks for single-quoted values + +-- basic unicode input +SELECT E'\'\u\''::jsonpath; -- ERROR, incomplete escape +SELECT E'\'\u00\''::jsonpath; -- ERROR, incomplete escape +SELECT E'\'\u000g\''::jsonpath; -- ERROR, g is not a hex digit +SELECT E'\'\u0000\''::jsonpath; -- OK, legal escape +SELECT E'\'\uaBcD\''::jsonpath; -- OK, uppercase and lower case both OK + +-- handling of unicode surrogate pairs +select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8; +select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row +select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order +select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate +select E'\'\ude04X\''::jsonpath; -- orphan low surrogate + +--handling of simple unicode escapes +select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8; +select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere; +select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape; +select E'\'null \u0000 escape\''::jsonpath as not_unescaped; +select E'\'null \\u0000 escape\''::jsonpath as not_an_escape; + +-- checks for quoted key names + +-- basic unicode input +SELECT '$."\u"'::jsonpath; -- ERROR, incomplete escape +SELECT '$."\u00"'::jsonpath; -- ERROR, incomplete escape +SELECT '$."\u000g"'::jsonpath; -- ERROR, g is not a hex digit +SELECT '$."\u0000"'::jsonpath; -- OK, legal escape +SELECT '$."\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK + +-- handling of unicode surrogate pairs +select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; +select '$."\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row +select '$."\ude04\ud83d"'::jsonpath; -- surrogates in wrong order +select '$."\ud83dX"'::jsonpath; -- orphan high surrogate +select '$."\ude04X"'::jsonpath; -- orphan low surrogate + +--handling of simple unicode escapes +select '$."the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; +select '$."dollar \u0024 character"'::jsonpath as correct_everywhere; +select '$."dollar \\u0024 character"'::jsonpath as not_an_escape; +select '$."null \u0000 escape"'::jsonpath as not_unescaped; +select '$."null \\u0000 escape"'::jsonpath as not_an_escape; -- 2.40.0