From fe29d3d3fa4a147a5103f314bc09287c4c32a81d Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Mon, 17 Feb 2014 12:28:26 -0600 Subject: [PATCH] Add `?`, `.[]?`, and `..` operators Make XPath-like `//a/b` recursive structure traversal easier in jq, which then becomes: ..|.a?.b? The `?` operator suppresses errors about . not being an array or object. The `..` operator is equivalent to calling the new `recurse_down` built-in, which in turn is equivalent to recurse(.[]?) Note that `..a` is not supported; neither is `...a`. That could be add added, but it doesn't seem worth the trouble of saving the need to type a '|'. --- builtin.c | 1 + docs/content/3.manual/manual.yml | 48 ++++++++++++++++++++++++++++++++ execute.c | 25 +++++++++++------ lexer.l | 3 +- opcode_list.h | 2 ++ parser.y | 46 ++++++++++++++++++++++++++---- tests/all.test | 19 +++++++++++++ 7 files changed, 130 insertions(+), 14 deletions(-) diff --git a/builtin.c b/builtin.c index ddf66d0..e60fa85 100644 --- a/builtin.c +++ b/builtin.c @@ -672,6 +672,7 @@ static const char* const jq_builtins[] = { "def _assign(paths; value): value as $v | reduce path(paths) as $p (.; setpath($p; $v));", "def _modify(paths; update): reduce path(paths) as $p (.; setpath($p; getpath($p) | update));", "def recurse(f): ., (f | select(. != null) | recurse(f));", + "def recurse_down: recurse(.[]?);", "def to_entries: [keys[] as $k | {key: $k, value: .[$k]}];", "def from_entries: map({(.key): .value}) | add;", "def with_entries(f): to_entries | map(f) | from_entries;", diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml index fe1d8d4..a175b05 100644 --- a/docs/content/3.manual/manual.yml +++ b/docs/content/3.manual/manual.yml @@ -217,6 +217,26 @@ sections: input: '{"foo": 42}' output: [42] + - title: "`.foo?`" + body: | + + Just like `.foo`, but does not output even an error when `.` + is not an array or an object. + + examples: + - program: '.foo?' + input: '{"foo": 42, "bar": "less interesting data"}' + output: [42] + - program: '.foo?' + input: '{"notfoo": true, "alsonotfoo": false}' + output: ['null'] + - program: '.["foo"]?' + input: '{"foo": 42}' + output: [42] + - program: '[.foo?]' + input: '[1,2]' + output: ['[]'] + - title: "`.[]`, `.[2]`, `.[10:15]`" body: | @@ -234,6 +254,10 @@ sections: the array), or omitted (in which case it refers to the start or end of the array). + The `?` "operator" can also be used with the slice operator, + as in `.[10:15]?`, which outputs values where the inputs are + slice-able. + examples: - program: '.[0]' input: '[{"name":"JSON", "good":true}, {"name":"XML", "good":false}]' @@ -286,6 +310,12 @@ sections: input: '{"a": 1, "b": 1}' output: ['1', '1'] + - title: "`.[]?`" + body: | + + Like `.[]`, but no errors will be output if . is not an array + or object. + - title: "`,`" body: | @@ -1022,6 +1052,24 @@ sections: - '{"foo":[{"foo":[]}]}' - '{"foo":[]}' + - title: `recurse_down` + body: | + + A quieter version of `recurse(.[])`, equivalent to: + + def recurse_down: recurse(.[]?); + + - title: `..` + body: | + + Short-hand for `recurse_down`. This is intended to resemble + the XPath `//` operator. Note that `..a` does not work; use + `..|a` instead. + + examples: + - program: '..|.a?' + input: '[[{"a":1}]]' + output: ['1'] - title: "String interpolation - `\(foo)`" body: | diff --git a/execute.c b/execute.c index 387bf38..6e0ddc1 100644 --- a/execute.c +++ b/execute.c @@ -503,7 +503,8 @@ jv jq_next(jq_state *jq) { goto do_backtrack; } - case INDEX: { + case INDEX: + case INDEX_OPT: { jv t = stack_pop(jq); jv k = stack_pop(jq); path_append(jq, jv_copy(k)); @@ -511,7 +512,10 @@ jv jq_next(jq_state *jq) { if (jv_is_valid(v)) { stack_push(jq, v); } else { - print_error(jq, v); + if (opcode == INDEX) + print_error(jq, v); + else + jv_free(v); goto do_backtrack; } break; @@ -536,16 +540,18 @@ jv jq_next(jq_state *jq) { } case EACH: + case EACH_OPT: stack_push(jq, jv_number(-1)); // fallthrough - case ON_BACKTRACK(EACH): { + case ON_BACKTRACK(EACH): + case ON_BACKTRACK(EACH_OPT): { int idx = jv_number_value(stack_pop(jq)); jv container = stack_pop(jq); int keep_going, is_last = 0; jv key, value; if (jv_get_kind(container) == JV_KIND_ARRAY) { - if (opcode == EACH) idx = 0; + if (opcode == EACH || opcode == EACH_OPT) idx = 0; else idx = idx + 1; int len = jv_array_length(jv_copy(container)); keep_going = idx < len; @@ -555,7 +561,7 @@ jv jq_next(jq_state *jq) { value = jv_array_get(jv_copy(container), idx); } } else if (jv_get_kind(container) == JV_KIND_OBJECT) { - if (opcode == EACH) idx = jv_object_iter(container); + if (opcode == EACH || opcode == EACH_OPT) idx = jv_object_iter(container); else idx = jv_object_iter_next(container, idx); keep_going = jv_object_iter_valid(container, idx); if (keep_going) { @@ -563,9 +569,12 @@ jv jq_next(jq_state *jq) { value = jv_object_iter_value(container, idx); } } else { - assert(opcode == EACH); - print_error(jq, jv_invalid_with_msg(jv_string_fmt("Cannot iterate over %s", - jv_kind_name(jv_get_kind(container))))); + assert(opcode == EACH || opcode == EACH_OPT); + if (opcode == EACH) { + print_error(jq, + jv_invalid_with_msg(jv_string_fmt("Cannot iterate over %s", + jv_kind_name(jv_get_kind(container))))); + } keep_going = 0; } diff --git a/lexer.l b/lexer.l index 3c318bc..22d1d94 100644 --- a/lexer.l +++ b/lexer.l @@ -61,7 +61,8 @@ struct lexer_param; "//=" { return SETDEFINEDOR; } "<=" { return LESSEQ; } ">=" { return GREATEREQ; } -"."|"="|";"|","|":"|"|"|"+"|"-"|"*"|"/"|"%"|"\$"|"<"|">" { return yytext[0];} +".." { return REC; } +"."|"?"|"="|";"|","|":"|"|"|"+"|"-"|"*"|"/"|"%"|"\$"|"<"|">" { return yytext[0];} "["|"{"|"(" { return enter(yytext[0], YY_START, yyscanner); diff --git a/opcode_list.h b/opcode_list.h index c9730ba..2b27f78 100644 --- a/opcode_list.h +++ b/opcode_list.h @@ -6,7 +6,9 @@ OP(LOADV, VARIABLE, 1, 1) OP(LOADVN, VARIABLE, 1, 1) OP(STOREV, VARIABLE, 1, 0) OP(INDEX, NONE, 2, 1) +OP(INDEX_OPT, NONE, 2, 1) OP(EACH, NONE, 1, 1) +OP(EACH_OPT, NONE, 1, 1) OP(FORK, BRANCH, 0, 0) OP(JUMP, BRANCH, 0, 0) OP(JUMP_F,BRANCH, 1, 0) diff --git a/parser.y b/parser.y index bc7bba9..4474fc8 100644 --- a/parser.y +++ b/parser.y @@ -50,6 +50,8 @@ struct lexer_param; %token FIELD %token LITERAL %token FORMAT +%token Q "?" +%token REC ".." %token SETMOD "%=" %token EQ "==" %token NEQ "!=" @@ -137,7 +139,11 @@ static block gen_index(block obj, block key) { return BLOCK(gen_subexp(key), obj, gen_op_simple(INDEX)); } -static block gen_slice_index(block obj, block start, block end) { +static block gen_index_opt(block obj, block key) { + return BLOCK(gen_subexp(key), obj, gen_op_simple(INDEX_OPT)); +} + +static block gen_slice_index(block obj, block start, block end, opcode idx_op) { block key = BLOCK(gen_subexp(gen_const(jv_object())), gen_subexp(gen_const(jv_string("start"))), gen_subexp(start), @@ -145,7 +151,7 @@ static block gen_slice_index(block obj, block start, block end) { gen_subexp(gen_const(jv_string("end"))), gen_subexp(end), gen_op_simple(INSERT)); - return BLOCK(key, obj, gen_op_simple(INDEX)); + return BLOCK(key, obj, gen_op_simple(idx_op)); } static block gen_binop(block a, block b, int op) { @@ -470,6 +476,21 @@ Term: '.' { $$ = gen_noop(); } | +REC { + $$ = gen_call("recurse_down", gen_noop()); +} | +Term FIELD '?' { + $$ = gen_index_opt($1, gen_const($2)); +} | +FIELD '?' { + $$ = gen_index_opt(gen_noop(), gen_const($1)); +} | +Term '.' String '?' { + $$ = gen_index_opt($1, $3); +} | +'.' String '?' { + $$ = gen_index_opt(gen_noop(), $2); +} | Term FIELD { $$ = gen_index($1, gen_const($2)); } | @@ -492,20 +513,35 @@ Term '.' String { $$ = gen_noop(); } | /* FIXME: string literals */ +Term '[' Exp ']' '?' { + $$ = gen_index_opt($1, $3); +} | Term '[' Exp ']' { $$ = gen_index($1, $3); } | +Term '[' ']' '?' { + $$ = block_join($1, gen_op_simple(EACH_OPT)); +} | Term '[' ']' { $$ = block_join($1, gen_op_simple(EACH)); } | +Term '[' Exp ':' Exp ']' '?' { + $$ = gen_slice_index($1, $3, $5, INDEX_OPT); +} | +Term '[' Exp ':' ']' '?' { + $$ = gen_slice_index($1, $3, gen_const(jv_null()), INDEX_OPT); +} | +Term '[' ':' Exp ']' '?' { + $$ = gen_slice_index($1, gen_const(jv_null()), $4, INDEX_OPT); +} | Term '[' Exp ':' Exp ']' { - $$ = gen_slice_index($1, $3, $5); + $$ = gen_slice_index($1, $3, $5, INDEX); } | Term '[' Exp ':' ']' { - $$ = gen_slice_index($1, $3, gen_const(jv_null())); + $$ = gen_slice_index($1, $3, gen_const(jv_null()), INDEX); } | Term '[' ':' Exp ']' { - $$ = gen_slice_index($1, gen_const(jv_null()), $4); + $$ = gen_slice_index($1, gen_const(jv_null()), $4, INDEX); } | LITERAL { $$ = gen_const($1); diff --git a/tests/all.test b/tests/all.test index 96a08da..a2c097c 100644 --- a/tests/all.test +++ b/tests/all.test @@ -127,6 +127,25 @@ null {"foo": {"bar": 20}} 20 +[.[]|.foo?] +[1,[2],{"foo":3,"bar":4},{},{"foo":5}] +[3,null,5] + +[.[]|.foo?.bar?] +[1,[2],[],{"foo":3},{"foo":{"bar":4}},{}] +[4,null] + +[..] +[1,[[2]],{ "a":[1]}] +[[1,[[2]],{"a":[1]}],1,[[2]],[2],2,{"a":[1]},[1],1] + +[.[]|.[]?] +[1,null,[],[1,[2,[[3]]]],[{}],[{"a":[1,[2]]}]] +[1,[2,[[3]]],{},{"a":[1,[2]]}] + +[.[]|.[1:3]?] +[1,null,true,false,"abcdef",{},{"a":1,"b":2},[],[1,2,3,4,5],[1,2]] +[null,"bc",[],[2,3],[2]] # # Multiple outputs, iteration -- 2.40.0