2 /*-------------------------------------------------------------------------
5 * Grammar definitions for jsonpath datatype
7 * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
9 * Copyright (c) 2019, PostgreSQL Global Development Group
12 * src/backend/utils/adt/jsonpath_gram.y
14 *-------------------------------------------------------------------------
19 #include "catalog/pg_collation.h"
21 #include "miscadmin.h"
22 #include "nodes/pg_list.h"
23 #include "regex/regex.h"
24 #include "utils/builtins.h"
25 #include "utils/jsonpath.h"
27 /* struct JsonPathString is shared between scan and gram */
28 typedef struct JsonPathString
37 /* flex 2.5.4 doesn't bother with a decl for this */
38 int jsonpath_yylex(union YYSTYPE *yylval_param);
39 int jsonpath_yyparse(JsonPathParseResult **result);
40 void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
42 static JsonPathParseItem *makeItemType(JsonPathItemType type);
43 static JsonPathParseItem *makeItemString(JsonPathString *s);
44 static JsonPathParseItem *makeItemVariable(JsonPathString *s);
45 static JsonPathParseItem *makeItemKey(JsonPathString *s);
46 static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
47 static JsonPathParseItem *makeItemBool(bool val);
48 static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
49 JsonPathParseItem *la,
50 JsonPathParseItem *ra);
51 static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
52 JsonPathParseItem *a);
53 static JsonPathParseItem *makeItemList(List *list);
54 static JsonPathParseItem *makeIndexArray(List *list);
55 static JsonPathParseItem *makeAny(int first, int last);
56 static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
57 JsonPathString *pattern,
58 JsonPathString *flags);
61 * Bison doesn't allocate anything that needs to live across parser calls,
62 * so we can easily have it use palloc instead of malloc. This prevents
63 * memory leaks if we error out during parsing. Note this only works with
64 * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
65 * if possible, so there's not really much problem anyhow, at least if
66 * you're building with gcc.
68 #define YYMALLOC palloc
73 /* BISON Declarations */
76 %name-prefix="jsonpath_yy"
78 %parse-param {JsonPathParseResult **result}
82 List *elems; /* list of JsonPathParseItem */
83 List *indexs; /* list of integers */
84 JsonPathParseItem *value;
85 JsonPathParseResult *result;
86 JsonPathItemType optype;
91 %token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
92 %token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
93 %token <str> OR_P AND_P NOT_P
94 %token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
95 %token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
96 %token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
97 %token <str> DATETIME_P
101 %type <value> scalar_value path_primary expr array_accessor
102 any_path accessor_op key predicate delimited_predicate
103 index_elem starts_with_initial expr_or_predicate
104 datetime_template opt_datetime_template
106 %type <elems> accessor_expr
108 %type <indexs> index_list
110 %type <optype> comp_op method
116 %type <integer> any_level
126 /* Grammar follows */
130 mode expr_or_predicate {
131 *result = palloc(sizeof(JsonPathParseResult));
132 (*result)->expr = $2;
135 | /* EMPTY */ { *result = NULL; }
140 | predicate { $$ = $1; }
144 STRICT_P { $$ = false; }
145 | LAX_P { $$ = true; }
146 | /* EMPTY */ { $$ = true; }
150 STRING_P { $$ = makeItemString(&$1); }
151 | NULL_P { $$ = makeItemString(NULL); }
152 | TRUE_P { $$ = makeItemBool(true); }
153 | FALSE_P { $$ = makeItemBool(false); }
154 | NUMERIC_P { $$ = makeItemNumeric(&$1); }
155 | INT_P { $$ = makeItemNumeric(&$1); }
156 | VARIABLE_P { $$ = makeItemVariable(&$1); }
160 EQUAL_P { $$ = jpiEqual; }
161 | NOTEQUAL_P { $$ = jpiNotEqual; }
162 | LESS_P { $$ = jpiLess; }
163 | GREATER_P { $$ = jpiGreater; }
164 | LESSEQUAL_P { $$ = jpiLessOrEqual; }
165 | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
169 '(' predicate ')' { $$ = $2; }
170 | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
174 delimited_predicate { $$ = $1; }
175 | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
176 | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
177 | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
178 | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
179 | '(' predicate ')' IS_P UNKNOWN_P
180 { $$ = makeItemUnary(jpiIsUnknown, $2); }
181 | expr STARTS_P WITH_P starts_with_initial
182 { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
183 | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
184 | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
185 { $$ = makeItemLikeRegex($1, &$3, &$5); }
189 STRING_P { $$ = makeItemString(&$1); }
190 | VARIABLE_P { $$ = makeItemVariable(&$1); }
194 scalar_value { $$ = $1; }
195 | '$' { $$ = makeItemType(jpiRoot); }
196 | '@' { $$ = makeItemType(jpiCurrent); }
197 | LAST_P { $$ = makeItemType(jpiLast); }
201 path_primary { $$ = list_make1($1); }
202 | '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
203 | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
204 | accessor_expr accessor_op { $$ = lappend($1, $2); }
208 accessor_expr { $$ = makeItemList($1); }
209 | '(' expr ')' { $$ = $2; }
210 | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
211 | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
212 | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
213 | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
214 | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
215 | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
216 | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
220 expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
221 | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
225 index_elem { $$ = list_make1($1); }
226 | index_list ',' index_elem { $$ = lappend($1, $3); }
230 '[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
231 | '[' index_list ']' { $$ = makeIndexArray($2); }
235 INT_P { $$ = pg_atoi($1.val, 4, 0); }
236 | LAST_P { $$ = -1; }
240 ANY_P { $$ = makeAny(0, -1); }
241 | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
242 | ANY_P '{' any_level TO_P any_level '}'
243 { $$ = makeAny($3, $5); }
248 | '.' '*' { $$ = makeItemType(jpiAnyKey); }
249 | array_accessor { $$ = $1; }
250 | '.' any_path { $$ = $2; }
251 | '.' method '(' ')' { $$ = makeItemType($2); }
252 | '.' DATETIME_P '(' opt_datetime_template ')'
253 { $$ = makeItemUnary(jpiDatetime, $4); }
254 | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
258 STRING_P { $$ = makeItemString(&$1); }
261 opt_datetime_template:
262 datetime_template { $$ = $1; }
263 | /* EMPTY */ { $$ = NULL; }
267 key_name { $$ = makeItemKey(&$1); }
298 ABS_P { $$ = jpiAbs; }
299 | SIZE_P { $$ = jpiSize; }
300 | TYPE_P { $$ = jpiType; }
301 | FLOOR_P { $$ = jpiFloor; }
302 | DOUBLE_P { $$ = jpiDouble; }
303 | CEILING_P { $$ = jpiCeiling; }
304 | KEYVALUE_P { $$ = jpiKeyValue; }
309 * The helper functions below allocate and fill JsonPathParseItem's of various
313 static JsonPathParseItem *
314 makeItemType(JsonPathItemType type)
316 JsonPathParseItem *v = palloc(sizeof(*v));
318 CHECK_FOR_INTERRUPTS();
326 static JsonPathParseItem *
327 makeItemString(JsonPathString *s)
329 JsonPathParseItem *v;
333 v = makeItemType(jpiNull);
337 v = makeItemType(jpiString);
338 v->value.string.val = s->val;
339 v->value.string.len = s->len;
345 static JsonPathParseItem *
346 makeItemVariable(JsonPathString *s)
348 JsonPathParseItem *v;
350 v = makeItemType(jpiVariable);
351 v->value.string.val = s->val;
352 v->value.string.len = s->len;
357 static JsonPathParseItem *
358 makeItemKey(JsonPathString *s)
360 JsonPathParseItem *v;
362 v = makeItemString(s);
368 static JsonPathParseItem *
369 makeItemNumeric(JsonPathString *s)
371 JsonPathParseItem *v;
373 v = makeItemType(jpiNumeric);
375 DatumGetNumeric(DirectFunctionCall3(numeric_in,
376 CStringGetDatum(s->val),
377 ObjectIdGetDatum(InvalidOid),
383 static JsonPathParseItem *
384 makeItemBool(bool val)
386 JsonPathParseItem *v = makeItemType(jpiBool);
388 v->value.boolean = val;
393 static JsonPathParseItem *
394 makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
396 JsonPathParseItem *v = makeItemType(type);
398 v->value.args.left = la;
399 v->value.args.right = ra;
404 static JsonPathParseItem *
405 makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
407 JsonPathParseItem *v;
409 if (type == jpiPlus && a->type == jpiNumeric && !a->next)
412 if (type == jpiMinus && a->type == jpiNumeric && !a->next)
414 v = makeItemType(jpiNumeric);
416 DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
417 NumericGetDatum(a->value.numeric)));
421 v = makeItemType(type);
428 static JsonPathParseItem *
429 makeItemList(List *list)
431 JsonPathParseItem *head,
435 head = end = (JsonPathParseItem *) linitial(list);
437 if (list_length(list) == 1)
440 /* append items to the end of already existing list */
444 for_each_cell(cell, list, list_second_cell(list))
446 JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
455 static JsonPathParseItem *
456 makeIndexArray(List *list)
458 JsonPathParseItem *v = makeItemType(jpiIndexArray);
462 Assert(list_length(list) > 0);
463 v->value.array.nelems = list_length(list);
465 v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
466 v->value.array.nelems);
470 JsonPathParseItem *jpi = lfirst(cell);
472 Assert(jpi->type == jpiSubscript);
474 v->value.array.elems[i].from = jpi->value.args.left;
475 v->value.array.elems[i++].to = jpi->value.args.right;
481 static JsonPathParseItem *
482 makeAny(int first, int last)
484 JsonPathParseItem *v = makeItemType(jpiAny);
486 v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
487 v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
492 static JsonPathParseItem *
493 makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
494 JsonPathString *flags)
496 JsonPathParseItem *v = makeItemType(jpiLikeRegex);
500 v->value.like_regex.expr = expr;
501 v->value.like_regex.pattern = pattern->val;
502 v->value.like_regex.patternlen = pattern->len;
504 /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
505 v->value.like_regex.flags = 0;
506 for (i = 0; flags && i < flags->len; i++)
508 switch (flags->val[i])
511 v->value.like_regex.flags |= JSP_REGEX_ICASE;
514 v->value.like_regex.flags |= JSP_REGEX_DOTALL;
517 v->value.like_regex.flags |= JSP_REGEX_MLINE;
520 v->value.like_regex.flags |= JSP_REGEX_WSPACE;
523 v->value.like_regex.flags |= JSP_REGEX_QUOTE;
527 (errcode(ERRCODE_SYNTAX_ERROR),
528 errmsg("invalid input syntax for type %s", "jsonpath"),
529 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
535 /* Convert flags to what RE_compile_and_cache needs */
536 cflags = jspConvertRegexFlags(v->value.like_regex.flags);
538 /* check regex validity */
539 (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
541 cflags, DEFAULT_COLLATION_OID);
547 * Convert from XQuery regex flags to those recognized by our regex library.
550 jspConvertRegexFlags(uint32 xflags)
552 /* By default, XQuery is very nearly the same as Spencer's AREs */
553 int cflags = REG_ADVANCED;
555 /* Ignore-case means the same thing, too, modulo locale issues */
556 if (xflags & JSP_REGEX_ICASE)
559 /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
560 if (xflags & JSP_REGEX_QUOTE)
562 cflags &= ~REG_ADVANCED;
567 /* Note that dotall mode is the default in POSIX */
568 if (!(xflags & JSP_REGEX_DOTALL))
569 cflags |= REG_NLSTOP;
570 if (xflags & JSP_REGEX_MLINE)
571 cflags |= REG_NLANCH;
574 * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
575 * not really enough alike to justify treating JSP_REGEX_WSPACE as
576 * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
577 * future we'll modify the regex library to have an option for
578 * XQuery-style ignore-whitespace mode.
580 if (xflags & JSP_REGEX_WSPACE)
582 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
583 errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
590 * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
591 * unavoidable because jsonpath_gram does not create a .h file to export its
592 * token symbols. If these files ever grow large enough to be worth compiling
593 * separately, that could be fixed; but for now it seems like useless
597 #include "jsonpath_scan.c"