From 7dc34b92aff7a38e16c0ef608238d03e1ac3d213 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Sun, 28 Dec 2014 00:32:06 -0600 Subject: [PATCH] Add `label $name | EXP`; fix `break` This is to fix the problem where `break` is dynamic, not lexical. With this it should be possible to do this sort of thing: label $break | inputs | if ... then $break|error else . end This is a backwards-incompatible change for master, but the previous `break` hadn't shipped yet. Still needed: - testing --- builtin.c | 17 +++--- bytecode.h | 3 ++ compile.c | 93 ++++++++++++++++++++++++++------ compile.h | 2 + docs/content/3.manual/manual.yml | 62 +++++++++++++++++++-- execute.c | 7 +++ lexer.l | 4 ++ main.c | 1 + opcode_list.h | 1 + parser.y | 44 ++++++++++++++- tests/all.test | 24 ++++++++- 11 files changed, 225 insertions(+), 33 deletions(-) diff --git a/builtin.c b/builtin.c index 6399aff..1b1cd65 100644 --- a/builtin.c +++ b/builtin.c @@ -989,7 +989,6 @@ static block bind_bytecoded_builtins(block b) { static const char* const jq_builtins[] = { "def error: error(.);", - "def break: error(\"break\");", "def map(f): [.[] | f];", "def map_values(f): .[] |= f;", "def select(f): if f then . else empty end;", @@ -1024,14 +1023,14 @@ static const char* const jq_builtins[] = { "def paths: path(recurse(if (type|. == \"array\" or . == \"object\") then .[] else empty end))|select(length > 0);", "def paths(node_filter): . as $dot|paths|select(. as $p|$dot|getpath($p)|node_filter);", "def any(generator; condition):" - " [foreach generator as $i" + " [label | foreach generator as $i" " (false;" " if . then break elif $i | condition then true else . end;" " if . then . else empty end)] | length == 1;", "def any(condition): any(.[]; condition);", "def any: any(.);", "def all(generator; condition): " - " [foreach generator as $i" + " [label | foreach generator as $i" " (true;" " if .|not then break elif $i | condition then . else false end;" " if .|not then . else empty end)] | length == 0;", @@ -1133,13 +1132,13 @@ static const char* const jq_builtins[] = { "def while(cond; update): " " def _while: " " if cond then ., (update | _while) else empty end; " - " try _while catch if .==\"break\" then empty else . end;", + " _while;", "def until(cond; next): " " def _until: " " if cond then . else (next|_until) end;" " _until;", - "def limit($n; exp): if $n < 0 then exp else foreach exp as $item ([$n, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1]) end;", - "def first(g): foreach g as $item ([false, null]; if .[0]==true then break else [true, $item] end; .[1]);", + "def limit($n; exp): if $n < 0 then exp else label | foreach exp as $item ([$n, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1]) end;", + "def first(g): label | foreach g as $item ([false, null]; if .[0]==true then break else [true, $item] end; .[1]);", "def last(g): reduce g as $item (null; $item);", "def nth($n; g): if $n < 0 then error(\"nth doesn't support negative indices\") else last(limit($n + 1; g)) end;", "def first: .[0];", @@ -1157,12 +1156,12 @@ static const char* const jq_builtins[] = { " end;", "def in(xs): . as $x | xs | has($x);", "def inside(xs): . as $x | xs | contains($x);", - "def input: try _input catch if .==\"break\" then empty else . end;", + "def input: _input;", "def repeat(exp): " " def _repeat: " " exp, _repeat;" - " try _repeat catch if .==\"break\" then empty else . end;", - "def inputs: repeat(_input);", + " _repeat;", + "def inputs: try repeat(_input) catch if .==\"break\" then empty else .|error end;", // # like ruby's downcase - only characters A to Z are affected "def ascii_downcase:" " explode | map( if 65 <= . and . <= 90 then . + 32 else . end) | implode;", diff --git a/bytecode.h b/bytecode.h index ee50d2c..8662c4d 100644 --- a/bytecode.h +++ b/bytecode.h @@ -25,6 +25,9 @@ enum { OP_HAS_UFUNC = 64, OP_IS_CALL_PSEUDO = 128, OP_HAS_BINDING = 1024, + // NOTE: Not actually part of any op -- a pseudo-op flag for special + // handling of `break`. + OP_BIND_WILDCARD = 2048, }; struct opcode_description { opcode op; diff --git a/compile.c b/compile.c index 042161d..431f66e 100644 --- a/compile.c +++ b/compile.c @@ -241,6 +241,7 @@ int block_has_only_binders_and_imports(block binders, int bindflags) { int block_has_only_binders(block binders, int bindflags) { bindflags |= OP_HAS_BINDING; + bindflags &= ~OP_BIND_WILDCARD; for (inst* curr = binders.first; curr; curr = curr->next) { if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) { return 0; @@ -291,11 +292,12 @@ static int block_count_refs(block binder, block body) { return nrefs; } -static int block_bind_subblock(block binder, block body, int bindflags) { +static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { assert(block_is_single(binder)); - assert((opcode_describe(binder.first->op)->flags & bindflags) == bindflags); + assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); assert(binder.first->symbol); assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first); + assert(break_distance >= 0); binder.first->bound_by = binder.first; if (binder.first->nformals == -1) @@ -303,9 +305,16 @@ static int block_bind_subblock(block binder, block body, int bindflags) { int nrefs = 0; for (inst* i = body.first; i; i = i->next) { int flags = opcode_describe(i->op)->flags; - if ((flags & bindflags) == bindflags && - i->bound_by == 0 && - !strcmp(i->symbol, binder.first->symbol)) { + if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 && + (!strcmp(i->symbol, binder.first->symbol) || + // Check for break/break2/break3; see parser.y + ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' && + break_distance <= 3 && (i->symbol[1] == '1' + break_distance) && + i->symbol[2] == '\0'))) { + if (bindflags & OP_BIND_WILDCARD) { + jv_mem_free(i->symbol); + i->symbol = strdup(binder.first->symbol); + } // bind this instruction if (i->op == CALL_JQ && i->nactuals == -1) i->nactuals = block_count_actuals(i->arglist); @@ -313,11 +322,17 @@ static int block_bind_subblock(block binder, block body, int bindflags) { i->bound_by = binder.first; nrefs++; } + } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 && + !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) && + !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) { + // Increment the break distance required for this binder to match + // a break whenever we come across a STOREV of *anonlabel... + break_distance++; } // binding recurses into closures - nrefs += block_bind_subblock(binder, i->subfn, bindflags); + nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance); // binding recurses into argument list - nrefs += block_bind_subblock(binder, i->arglist, bindflags); + nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance); } return nrefs; } @@ -327,7 +342,7 @@ static int block_bind_each(block binder, block body, int bindflags) { bindflags |= OP_HAS_BINDING; int nrefs = 0; for (inst* curr = binder.first; curr; curr = curr->next) { - nrefs += block_bind_subblock(inst_block(curr), body, bindflags); + nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0); } return nrefs; } @@ -354,7 +369,7 @@ block block_bind_library(block binder, block body, int bindflags, const char* li strcpy(tname, matchname); strcpy(tname+matchlen,cname); curr->symbol = tname; - nrefs += block_bind_subblock(inst_block(curr), body, bindflags); + nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0); curr->symbol = cname; free(tname); } @@ -483,13 +498,13 @@ block gen_function(const char* name, block formals, block body) { i->op = CLOSURE_PARAM; body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body); } - block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING); + block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); } i->subfn = body; i->symbol = strdup(name); i->arglist = formals; block b = inst_block(i); - block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING); + block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); return b; } @@ -707,10 +722,7 @@ block gen_foreach(const char* varname, block source, block init, block update, b // want to output it, so we backtrack. gen_op_simple(BACKTRACK)); inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop - block handler = gen_cond(gen_call("_equal", BLOCK(gen_lambda(gen_const(jv_string("break"))), gen_lambda(gen_noop()))), - gen_op_simple(BACKTRACK), - gen_call("error", gen_noop())); - return gen_try(foreach, handler); + return foreach; } block gen_definedor(block a, block b) { @@ -790,20 +802,45 @@ block gen_var_binding(block var, const char* name, block body) { body, OP_HAS_VARIABLE)); } +// Like gen_var_binding(), but bind `break`'s wildcard unbound variable +static block gen_wildvar_binding(block var, const char* name, block body) { + return BLOCK(gen_op_simple(DUP), var, + block_bind(gen_op_unbound(STOREV, name), + body, OP_HAS_VARIABLE | OP_BIND_WILDCARD)); +} + block gen_cond(block cond, block iftrue, block iffalse) { return BLOCK(gen_op_simple(DUP), cond, gen_condbranch(BLOCK(gen_op_simple(POP), iftrue), BLOCK(gen_op_simple(POP), iffalse))); } +block gen_try_handler(block handler) { + // Quite a pain just to hide jq's internal errors. + return gen_cond(// `if type=="object" and .__jq + gen_and(gen_call("_equal", + BLOCK(gen_lambda(gen_const(jv_string("object"))), + gen_lambda(gen_noop()))), + BLOCK(gen_subexp(gen_const(jv_string("__jq"))), + gen_noop(), + gen_op_simple(INDEX))), + // `then error` + gen_call("error", gen_noop()), + // `else HANDLER end` + handler); +} + block gen_try(block exp, block handler) { /* - * Produce: + * Produce something like: * FORK_OPT
* * JUMP * * + * If this is not an internal try/catch, then catch and re-raise + * internal errors to prevent them from leaking. + * * The handler will only execute if we backtrack to the FORK_OPT with * an error (exception). If produces no value then FORK_OPT * will backtrack (propagate the `empty`, as it were. If @@ -817,6 +854,25 @@ block gen_try(block exp, block handler) { return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler); } +block gen_label(const char *label, block exp) { + block cond = gen_call("_equal", + BLOCK(gen_lambda(gen_noop()), + gen_lambda(gen_op_unbound(LOADV, label)))); + return gen_wildvar_binding(gen_op_simple(GENLABEL), label, + BLOCK(gen_op_simple(POP), + // try exp catch if . == $label + // then empty + // else error end + // + // Can't use gen_binop(), as that's firmly + // stuck in parser.y as it refers to things + // like EQ. + gen_try(exp, + gen_cond(cond, + gen_op_simple(BACKTRACK), + gen_call("error", gen_noop()))))); +} + block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) { for (int cfunc=0; cfuncop)->flags & OP_HAS_BINDING) { if (!curr->bound_by) { - locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist)); + if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0') + locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure"); + else + locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist)); errors++; // don't process this instruction if it's not well-defined ret = BLOCK(ret, inst_block(curr)); diff --git a/compile.h b/compile.h index 7d5e6c1..25544e1 100644 --- a/compile.h +++ b/compile.h @@ -50,7 +50,9 @@ block gen_or(block a, block b); block gen_var_binding(block var, const char* name, block body); block gen_cond(block cond, block iftrue, block iffalse); +block gen_try_handler(block handler); block gen_try(block exp, block handler); +block gen_label(const char *label, block exp); block gen_cbinding(const struct cfunction* functions, int nfunctions, block b); diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml index 354a595..0355371 100644 --- a/docs/content/3.manual/manual.yml +++ b/docs/content/3.manual/manual.yml @@ -1740,6 +1740,55 @@ sections: input: 'true' output: ['"some exception"'] + - title: Breaking out of control structures + body: | + + A convenient use of try/catch is to break out of control + structures like `reduce`, `foreach`, `while`, and so on. + + For example: + + # Repeat an expression until it raises "break" as an + # error, then stop repeating without re-raising the error. + # But if the error caught is not "break" then re-raise it. + try repeat(exp) catch .=="break" then empty else error; + + jq has a syntax for lexical labels to "break" or "go (back) to": + + label | ... break ... + + The `break` builtin will cause the program to return to act as + though the nearest `label` (to the left) produced `empty`. + + The `break2` builtin will return the program to the second + nearest `label`, then `empty`. + + The `break3` builtin will return the program to the third + nearest `label`, then `empty`. + + A variation on this is named labels: + + label $name | ... break $name + + The `break $name` form returns to the named label and produces + `empty`. + + In all cases the relationship between the `break` and + corresponding `label` is lexical: the label has to be + "visible" from the break. + + To break out of a `reduce`, for example: + + label | reduce .[] as $item (null; if .==false then break else ... end) + + The following jq program produces a syntax error: + + break + + because: + + jq: error: break used outside labeled control structure + - title: "`?` operator" body: | @@ -2171,8 +2220,7 @@ sections: This is mostly useful only for constructing `reduce`- and `limit`-like functions. But it is much more general, as it - allows for partial reductions (see the example below), as well - as breaking out of the "loop" with `break`. + allows for partial reductions (see the example below). examples: - program: '[foreach .[] as $item @@ -2199,8 +2247,14 @@ sections: def recurse(f): def r: ., (f | select(. != null) | r); r; def while(cond; update): - def w: if cond then ., (update | _while) else empty end; - try _while catch if . == "break" then empty else . end; + def _while: + if cond then ., (update | _while) else empty end; + _while; + + def repeat(exp): + def _repeat: + exp, _repeat; + _repeat; - title: Generators and iterators body: | diff --git a/execute.c b/execute.c index 67a1629..3be6abf 100644 --- a/execute.c +++ b/execute.c @@ -37,6 +37,7 @@ struct jq_state { int subexp_nest; int debug_trace_enabled; int initial_execution; + unsigned next_label; jv attrs; jq_input_cb input_cb; @@ -351,6 +352,11 @@ jv jq_next(jq_state *jq) { break; } + case GENLABEL: { + stack_push(jq, JV_OBJECT(jv_string("__jq"), jv_number(jq->next_label++))); + break; + } + case DUP: { jv v = stack_pop(jq); stack_push(jq, jv_copy(v)); @@ -838,6 +844,7 @@ jq_state *jq_init(void) { return NULL; jq->bc = 0; + jq->next_label = 0; stack_init(&jq->stk); jq->stk_top = 0; diff --git a/lexer.l b/lexer.l index 70ad050..065ee72 100644 --- a/lexer.l +++ b/lexer.l @@ -57,6 +57,10 @@ struct lexer_param; "//" { return DEFINEDOR; } "try" { return TRY; } "catch" { return CATCH; } +"label" { return LABEL; } +"break" { return BREAK; } +"break2" { return BREAK2; } +"break3" { return BREAK3; } "|=" { return SETPIPE; } "+=" { return SETPLUS; } "-=" { return SETMINUS; } diff --git a/main.c b/main.c index 5b12f48..4457c58 100644 --- a/main.c +++ b/main.c @@ -351,6 +351,7 @@ int main(int argc, char* argv[]) { options |= EXIT_STATUS; if (!short_opts) continue; } + // FIXME: For --arg* we should check that the varname is acceptable if (isoption(argv[i], 0, "arg", &short_opts)) { if (i >= argc - 2) { fprintf(stderr, "%s: --arg takes two parameters (e.g. -a varname value)\n", progname); diff --git a/opcode_list.h b/opcode_list.h index 38e6385..bdc4f48 100644 --- a/opcode_list.h +++ b/opcode_list.h @@ -40,3 +40,4 @@ OP(TOP, NONE, 0, 0) OP(CLOSURE_PARAM_REGULAR, DEFINITION, 0, 0) OP(DEPS, CONSTANT, 0, 0) OP(MODULEMETA, CONSTANT, 0, 0) +OP(GENLABEL, NONE, 0, 1) diff --git a/parser.y b/parser.y index 44ae009..72aad3a 100644 --- a/parser.y +++ b/parser.y @@ -70,6 +70,10 @@ struct lexer_param; %token OR "or" %token TRY "try" %token CATCH "catch" +%token LABEL "label" +%token BREAK "break" +%token BREAK2 "break2" +%token BREAK3 "break3" %token SETPIPE "|=" %token SETPLUS "+=" %token SETMINUS "-=" @@ -149,6 +153,8 @@ int yylex(YYSTYPE* yylval, YYLTYPE* yylloc, block* answer, int* errors, return tok; } +static unsigned int next_label = 0; + static block gen_dictpair(block k, block v) { return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT)); } @@ -335,7 +341,7 @@ Term "as" '$' IDENT '|' Exp { "try" Exp "catch" Exp { //$$ = BLOCK(gen_op_target(FORK_OPT, $2), $2, $4); - $$ = gen_try($2, $4); + $$ = gen_try($2, gen_try_handler($4)); } | "try" Exp { //$$ = BLOCK(gen_op_target(FORK_OPT, $2), $2, gen_op_simple(BACKTRACK)); @@ -346,6 +352,19 @@ Term "as" '$' IDENT '|' Exp { $$ = $2; } | +"label" '|' Exp { + jv v = jv_string_fmt("*anonlabel%u", next_label++); + $$ = gen_location(@$, locations, gen_label(jv_string_value(v), $3)); + jv_free(v); +} | + +"label" '$' IDENT '|' Exp { + jv v = jv_string_fmt("*label-%s", jv_string_value($3)); + $$ = gen_location(@$, locations, gen_label(jv_string_value(v), $5)); + jv_free($3); + jv_free(v); +} | + // This rule conflicts with all the other rules using the '?' operator. // It doesn't matter which bison prefers: all of them result in the same // syntax and semantics, but the more specific rules optimize better @@ -570,6 +589,29 @@ Term: REC { $$ = gen_call("recurse", gen_noop()); } | +BREAK { + $$ = gen_location(@$, locations, + BLOCK(gen_op_unbound(LOADV, "*1"), // impossible symbol + gen_call("error", gen_noop()))); +} | +BREAK2 { + $$ = gen_location(@$, locations, + BLOCK(gen_op_unbound(LOADV, "*2"), // impossible symbol + gen_call("error", gen_noop()))); +} | +BREAK3 { + $$ = gen_location(@$, locations, + BLOCK(gen_op_unbound(LOADV, "*3"), // impossible symbol + gen_call("error", gen_noop()))); +} | +BREAK '$' IDENT { + jv v = jv_string_fmt("*label-%s", jv_string_value($3)); // impossible symbol + $$ = gen_location(@$, locations, + BLOCK(gen_op_unbound(LOADV, jv_string_value(v)), + gen_call("error", gen_noop()))); + jv_free(v); + jv_free($3); +} | Term FIELD '?' { $$ = gen_index_opt($1, gen_const($2)); } | diff --git a/tests/all.test b/tests/all.test index 4477ec2..67b8076 100644 --- a/tests/all.test +++ b/tests/all.test @@ -247,15 +247,35 @@ null 1 [1,2,4,8,16,32,64] -[while(.<100; .*2|if . > 10 then break else . end)] +[label | while(.<100; .*2|if . > 10 then break else . end)] 1 [1,2,4,8] +[(label $here | .[] | if .>1 then break $here else . end), "hi!"] +[0,1,2] +[0,1,"hi!"] + +[(label $here | .[] | if .>1 then break $here else . end), "hi!"] +[0,2,1] +[0,"hi!"] + +(label | (label | 2 | break2)), 1 +null +1 + +%%FAIL +break +jq: error: break used outside labeled control structure + +%%FAIL +. as $foo | break $foo +jq: error: *label-foo/0 is not defined + [.[]|[.,1]|until(.[0] < 1; [.[0] - 1, .[1] * .[0]])|.[1]] [1,2,3,4,5] [1,2,6,24,120] -[foreach .[] as $item ([3, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1])] +[label | foreach .[] as $item ([3, null]; if .[0] < 1 then break else [.[0] -1, $item] end; .[1])] [11,22,33,44,55,66,77,88,99] [11,22,33] -- 2.40.0