From c128c2029e863a6f06dbf90a811e221a8ffdccaa Mon Sep 17 00:00:00 2001 From: Stephen Dolan Date: Tue, 21 Aug 2012 18:14:13 +0100 Subject: [PATCH] First pass at functions + tests --- c/Makefile | 2 +- c/bytecode.c | 43 ++++++++-- c/bytecode.h | 13 ++- c/compile.c | 208 +++++++++++++++++++++++++++++++++------------ c/compile.h | 5 +- c/execute.c | 42 +++++++-- c/forkable_stack.h | 15 ++++ c/frame_layout.h | 37 ++++++-- c/lexer.l | 1 + c/main.c | 5 +- c/opcode.c | 6 +- c/opcode.h | 4 + c/opcode_list.h | 5 ++ c/parser.y | 15 +++- c/testdata | 143 +++++++++++++++++++++++++++++++ 15 files changed, 458 insertions(+), 86 deletions(-) create mode 100644 c/testdata diff --git a/c/Makefile b/c/Makefile index ca20397..01c3b54 100644 --- a/c/Makefile +++ b/c/Makefile @@ -14,7 +14,7 @@ lexer.yy.c: lexer.l lexer.yy.h: lexer.yy.c parser.tab.c: parser.y lexer.yy.h - bison -W -d parser.y + bison -W -d parser.y -v --report-file=parser.info parser.tab.h: parser.tab.c parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c diff --git a/c/bytecode.c b/c/bytecode.c index 477d3aa..82e38f4 100644 --- a/c/bytecode.c +++ b/c/bytecode.c @@ -4,12 +4,29 @@ #include "bytecode.h" #include "opcode.h" -void dump_disassembly(struct bytecode* bc) { +static int bytecode_operation_length(uint16_t* codeptr) { + if (opcode_describe(*codeptr)->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST) { + return 2 + codeptr[1] * 2; + } else { + return opcode_length(*codeptr); + } +} + +void dump_disassembly(int indent, struct bytecode* bc) { + dump_code(indent, bc); + for (int i=0; insubfunctions; i++) { + printf("%*ssubfn[%d]:\n", indent, "", i); + dump_disassembly(indent+2, bc->subfunctions[i]); + } +} + +void dump_code(int indent, struct bytecode* bc) { int pc = 0; while (pc < bc->codelen) { + printf("%*s", indent, ""); dump_operation(bc, bc->code + pc); printf("\n"); - pc += opcode_length(bc->code[pc]); + pc += bytecode_operation_length(bc->code + pc); } } @@ -20,16 +37,28 @@ void dump_operation(struct bytecode* bc, uint16_t* codeptr) { printf("%s", op->name); if (op->flags & OP_HAS_IMMEDIATE) { uint16_t imm = bc->code[pc++]; - printf(" "); - if (op->flags & OP_HAS_BRANCH) { - printf("%04d", pc + imm); + if (op->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST) { + for (int i=0; icode[pc++]; + uint16_t idx = bc->code[pc++]; + if (idx & ARG_NEWCLOSURE) { + printf(" subfn[%d]", idx & ~ARG_NEWCLOSURE); + } else { + printf(" param[%d]", idx); + } + if (level) { + printf("^%d", level); + } + } + } else if (op->flags & OP_HAS_BRANCH) { + printf(" %04d", pc + imm); } else if (op->flags & OP_HAS_CONSTANT) { json_dumpf(json_array_get(bc->constants, imm), stdout, JSON_ENCODE_ANY); } else if (op->flags & OP_HAS_VARIABLE) { - printf("v%d", imm); + printf(" v%d", imm); } else { - printf("%d", imm); + printf(" %d", imm); } } } diff --git a/c/bytecode.h b/c/bytecode.h index a7141e4..b1146ad 100644 --- a/c/bytecode.h +++ b/c/bytecode.h @@ -19,6 +19,12 @@ struct symbol_table { int ncfunctions; }; +// The bytecode format matters in: +// execute.c - interpreter +// compile.c - compiler +// bytecode.c - disassembler + +#define ARG_NEWCLOSURE 0x1000 struct bytecode { uint16_t* code; @@ -30,11 +36,14 @@ struct bytecode { json_t* constants; struct symbol_table* globals; - struct bytecode* subfunctions; + struct bytecode** subfunctions; int nsubfunctions; + + struct bytecode* parent; }; -void dump_disassembly(struct bytecode* code); +void dump_disassembly(int, struct bytecode* code); +void dump_code(int, struct bytecode* code); void dump_operation(struct bytecode* bc, uint16_t* op); #endif diff --git a/c/compile.c b/c/compile.c index b102eee..8944633 100644 --- a/c/compile.c +++ b/c/compile.c @@ -14,11 +14,23 @@ struct inst { uint16_t intval; struct inst* target; json_t* constant; - char* symbol; } imm; - struct inst* var_binding; - int var_frame_idx; + // Binding + // An instruction requiring binding (for parameters/variables) + // is in one of three states: + // bound_by = NULL - Unbound free variable + // bound_by = self - This instruction binds a variable + // bound_by = other - Uses variable bound by other instruction + // The immediate field is generally not meaningful until instructions + // are bound, and even then only for instructions which bind. + struct inst* bound_by; + char* symbol; + block subfn; + + // This instruction is compiled as part of which function? + // (only used during block_compile) + struct bytecode* compiled; int bytecode_pos; // position just after this insn }; @@ -28,15 +40,16 @@ static inst* inst_new(opcode op) { i->next = i->prev = 0; i->op = op; i->bytecode_pos = -1; - i->var_binding = 0; - i->var_frame_idx = 0; + i->bound_by = 0; + i->symbol = 0; + i->subfn = gen_noop(); return i; } static void inst_free(struct inst* i) { - if (opcode_describe(i->op)->flags & - (OP_HAS_SYMBOL | OP_HAS_VARIABLE)) { - free(i->imm.symbol); + free(i->symbol); + if (opcode_describe(i->op)->flags & OP_HAS_BLOCK) { + block_free(i->subfn); } free(i); } @@ -89,25 +102,56 @@ void inst_set_target(block b, block target) { block gen_op_var_unbound(opcode op, const char* name) { assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); inst* i = inst_new(op); - i->imm.symbol = strdup(name); + i->symbol = strdup(name); return inst_block(i); } block gen_op_var_bound(opcode op, block binder) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); assert(binder.first); assert(binder.first == binder.last); - block b = gen_op_var_unbound(op, binder.first->imm.symbol); - b.first->var_binding = binder.first; + block b = gen_op_var_unbound(op, binder.first->symbol); + b.first->bound_by = binder.first; return b; } block gen_op_symbol(opcode op, const char* sym) { assert(opcode_describe(op)->flags & OP_HAS_SYMBOL); inst* i = inst_new(op); - i->imm.symbol = strdup(sym); + i->symbol = strdup(sym); + return inst_block(i); +} + +block gen_op_block_defn(opcode op, const char* name, block block) { + assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO); + assert(opcode_describe(op)->flags & OP_HAS_BLOCK); + inst* i = inst_new(op); + i->subfn = block; + i->symbol = strdup(name); return inst_block(i); } +block gen_op_block_unbound(opcode op, const char* name) { + assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO); + inst* i = inst_new(op); + i->symbol = strdup(name); + return inst_block(i); +} + + +block gen_op_call(opcode op, block arglist) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST); + inst* i = inst_new(op); + int nargs = 0; + for (inst* curr = arglist.first; curr; curr = curr->next) { + assert(opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO); + nargs++; + } + assert(nargs < 100); //FIXME + i->imm.intval = nargs; + return block_join(inst_block(i), arglist); +} + static void inst_join(inst* a, inst* b) { assert(a && b); assert(!a->next); @@ -133,22 +177,31 @@ block block_join(block a, block b) { return c; } -block block_bind(block binder, block body) { +static void block_bind_subblock(block binder, block body, int bindflags) { assert(binder.first); assert(binder.first == binder.last); - assert(opcode_describe(binder.first->op)->flags & OP_HAS_VARIABLE); - assert(binder.first->imm.symbol); - assert(binder.first->var_binding == 0); + assert((opcode_describe(binder.first->op)->flags & bindflags) == bindflags); + assert(binder.first->symbol); + assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first); - binder.first->var_binding = binder.first; + binder.first->bound_by = binder.first; for (inst* i = body.first; i; i = i->next) { - if (opcode_describe(i->op)->flags & OP_HAS_VARIABLE && - i->var_binding == 0 && - !strcmp(i->imm.symbol, binder.first->imm.symbol)) { - // bind this variable - i->var_binding = binder.first; + int flags = opcode_describe(i->op)->flags; + if ((flags & bindflags) == bindflags && + i->bound_by == 0 && + !strcmp(i->symbol, binder.first->symbol)) { + // bind this instruction + i->bound_by = binder.first; + } + if (flags & OP_HAS_BLOCK) { + block_bind_subblock(binder, i->subfn, bindflags); } } +} + +block block_bind(block binder, block body, int bindflags) { + bindflags |= OP_HAS_BINDING; + block_bind_subblock(binder, body, bindflags); return block_join(binder, body); } @@ -180,7 +233,7 @@ block gen_collect(block expr) { block_append(&c, gen_op_simple(DUP)); block_append(&c, gen_op_const(LOADK, json_array())); block array_var = block_bind(gen_op_var_unbound(STOREV, "collect"), - gen_noop()); + gen_noop(), OP_HAS_VARIABLE); block_append(&c, array_var); block tail = {0}; @@ -204,64 +257,108 @@ block gen_else(block a, block b) { assert(0); } +static uint16_t nesting_level(struct bytecode* bc, inst* target) { + uint16_t level = 0; + assert(bc && target->compiled); + while (bc && target->compiled != bc) { + level++; + bc = bc->parent; + } + assert(bc && bc == target->compiled); + return level; +} -struct bytecode* block_compile(struct symbol_table* syms, block b) { - inst* curr = b.first; +static void compile(struct bytecode* bc, block b) { int pos = 0; int var_frame_idx = 0; - for (; curr; curr = curr->next) { + bc->nsubfunctions = 0; + for (inst* curr = b.first; curr; curr = curr->next) { if (!curr->next) assert(curr == b.last); pos += opcode_length(curr->op); curr->bytecode_pos = pos; - if (opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) { - assert(curr->var_binding && "unbound variable"); - if (curr->var_binding == curr) { - curr->var_frame_idx = var_frame_idx++; - } + curr->compiled = bc; + + int opflags = opcode_describe(curr->op)->flags; + if (opflags & OP_HAS_BINDING) { + assert(curr->bound_by && "unbound term"); + } + if ((opflags & OP_HAS_VARIABLE) && + curr->bound_by == curr) { + curr->imm.intval = var_frame_idx++; + } + if (opflags & OP_HAS_BLOCK) { + assert(curr->bound_by == curr); + curr->imm.intval = bc->nsubfunctions++; } } - struct bytecode* bc = malloc(sizeof(struct bytecode)); + if (bc->nsubfunctions) { + bc->subfunctions = malloc(sizeof(struct bytecode*) * bc->nsubfunctions); + for (inst* curr = b.first; curr; curr = curr->next) { + if (!(opcode_describe(curr->op)->flags & OP_HAS_BLOCK)) + continue; + struct bytecode* subfn = malloc(sizeof(struct bytecode)); + bc->subfunctions[curr->imm.intval] = subfn; + subfn->globals = bc->globals; + subfn->parent = bc; + compile(subfn, curr->subfn); + } + } else { + bc->subfunctions = 0; + } bc->codelen = pos; uint16_t* code = malloc(sizeof(uint16_t) * bc->codelen); bc->code = code; - int* stack_height = malloc(sizeof(int) * (bc->codelen + 1)); - for (int i = 0; icodelen + 1; i++) stack_height[i] = -1; pos = 0; json_t* constant_pool = json_array(); int maxvar = -1; - int curr_stack_height = 1; - for (curr = b.first; curr; curr = curr->next) { - const struct opcode_description* op = opcode_describe(curr->op); - if (curr_stack_height < op->stack_in) { - printf("Stack underflow at %04d\n", curr->bytecode_pos); + for (inst* curr = b.first; curr; curr = curr->next) { + if (curr->op == CLOSURE_CREATE) { + // CLOSURE_CREATE opcodes define closures for use later in the + // codestream. They generate no code. + + // FIXME: make the above true :) + code[pos++] = DUP; + code[pos++] = POP; + continue; } - if (stack_height[curr->bytecode_pos] != -1 && - stack_height[curr->bytecode_pos] != curr_stack_height) { - // FIXME: not sure this is right at all :( - printf("Inconsistent stack heights at %04d %s\n", curr->bytecode_pos, op->name); - } - curr_stack_height -= op->stack_in; - curr_stack_height += op->stack_out; + const struct opcode_description* op = opcode_describe(curr->op); code[pos++] = curr->op; int opflags = op->flags; - if (opflags & OP_HAS_CONSTANT) { + assert(!(op->flags & OP_IS_CALL_PSEUDO)); + if (opflags & OP_HAS_VARIABLE_LENGTH_ARGLIST) { + int nargs = curr->imm.intval; + assert(nargs > 0); + code[pos++] = (uint16_t)nargs; + for (int i=0; inext; + assert(curr && opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO); + code[pos++] = nesting_level(bc, curr->bound_by); + switch (curr->bound_by->op) { + default: assert(0 && "Unknown type of argument"); + case CLOSURE_CREATE: + code[pos++] = curr->bound_by->imm.intval | ARG_NEWCLOSURE; + break; + } + } + } else if (opflags & OP_HAS_CONSTANT) { code[pos++] = json_array_size(constant_pool); json_array_append(constant_pool, curr->imm.constant); } else if (opflags & OP_HAS_VARIABLE) { - uint16_t var = (uint16_t)curr->var_binding->var_frame_idx; + // no closing over variables yet + assert(curr->bound_by->compiled == bc); + uint16_t var = (uint16_t)curr->bound_by->imm.intval; code[pos++] = var; if (var > maxvar) maxvar = var; } else if (opflags & OP_HAS_BRANCH) { assert(curr->imm.target->bytecode_pos != -1); assert(curr->imm.target->bytecode_pos > pos); // only forward branches code[pos] = curr->imm.target->bytecode_pos - (pos + 1); - stack_height[curr->imm.target->bytecode_pos] = curr_stack_height; pos++; } else if (opflags & OP_HAS_CFUNC) { - assert(curr->imm.symbol); + assert(curr->symbol); int found = 0; - for (int i=0; incfunctions; i++) { - if (!strcmp(curr->imm.symbol, syms->cfunctions[i].name)) { + for (int i=0; iglobals->ncfunctions; i++) { + if (!strcmp(curr->symbol, bc->globals->cfunctions[i].name)) { code[pos++] = i; found = 1; break; @@ -272,11 +369,16 @@ struct bytecode* block_compile(struct symbol_table* syms, block b) { code[pos++] = curr->imm.intval; } } - free(stack_height); bc->constants = constant_pool; bc->nlocals = maxvar + 2; // FIXME: frames of size zero? bc->nclosures = 0; +} + +struct bytecode* block_compile(struct symbol_table* syms, block b) { + struct bytecode* bc = malloc(sizeof(struct bytecode)); + bc->parent = 0; bc->globals = syms; + compile(bc, b); return bc; } diff --git a/c/compile.h b/c/compile.h index 0ba54f1..d72ddce 100644 --- a/c/compile.h +++ b/c/compile.h @@ -17,6 +17,9 @@ block gen_op_const(opcode op, json_t* constant); block gen_op_target(opcode op, block target); block gen_op_var_unbound(opcode op, const char* name); block gen_op_var_bound(opcode op, block binder); +block gen_op_block_defn(opcode op, const char* name, block block); +block gen_op_block_unbound(opcode op, const char* name); +block gen_op_call(opcode op, block arglist); block gen_op_symbol(opcode op, const char* name); block gen_subexp(block a); @@ -27,7 +30,7 @@ block gen_else(block a, block b); void block_append(block* b, block b2); block block_join(block a, block b); -block block_bind(block binder, block body); +block block_bind(block binder, block body, int bindflags); struct bytecode* block_compile(struct symbol_table*, block); diff --git a/c/execute.c b/c/execute.c index a97d80f..8bd8ec3 100644 --- a/c/execute.c +++ b/c/execute.c @@ -95,6 +95,18 @@ void stack_restore(){ forkable_stack_pop(&fork_stk); } +static struct closure make_closure(struct forkable_stack* stk, frame_ptr fr, uint16_t* pc) { + uint16_t level = *pc++; + uint16_t idx = *pc++; + fr = frame_get_level(stk, fr, level); + if (idx & ARG_NEWCLOSURE) { + int subfn_idx = idx & ~ARG_NEWCLOSURE; + assert(subfn_idx < frame_self(fr)->bc->nsubfunctions); + return closure_new(stk, frame_self(fr)->bc->subfunctions[subfn_idx]); + } else { + return *frame_closure_arg(fr, idx); + } +} #define stack_push stk_push #define stack_pop stk_pop @@ -267,7 +279,7 @@ json_t* jq_next() { stack_save(); stack_push(array); stack_push(stackval_root(json_integer(idx+1))); - frame_push_backtrack(&frame_stk, frame_current_bytecode(&frame_stk), pc - 1); + frame_push_backtrack(&frame_stk, pc - 1); stack_switch(); stackval sv = {json_array_get(array.value, idx), @@ -291,7 +303,7 @@ json_t* jq_next() { case FORK: { stack_save(); - frame_push_backtrack(&frame_stk, frame_current_bytecode(&frame_stk), pc - 1); + frame_push_backtrack(&frame_stk, pc - 1); stack_switch(); pc++; // skip offset this time break; @@ -332,13 +344,27 @@ json_t* jq_next() { break; } - /* case CALL_1_1: { - uint16_t nargs = *pc++; - + uint16_t nclosures = *pc++; + *frame_current_pc(&frame_stk) = pc + nclosures * 2; + frame_ptr new_frame = frame_push(&frame_stk, + make_closure(&frame_stk, frame_current(&frame_stk), pc)); + pc += 2; + frame_ptr old_frame = forkable_stack_peek_next(&frame_stk, new_frame); + for (int i=0; isavedlimit = state->prevlimit; forkable_stack_check(s); } + +typedef int stack_idx; + +static stack_idx forkable_stack_to_idx(struct forkable_stack* s, void* ptr) { + char* item = ptr; + int pos = item - s->stk; + assert(pos >= 0 && pos < s->length); + return s->length - pos; +} + +static void* forkable_stack_from_idx(struct forkable_stack* s, stack_idx idx) { + assert(idx >= 1 && idx <= s->length); + return &s->stk[s->length - idx]; +} + #endif diff --git a/c/frame_layout.h b/c/frame_layout.h index 55919f8..373c2b8 100644 --- a/c/frame_layout.h +++ b/c/frame_layout.h @@ -6,6 +6,7 @@ struct closure { struct bytecode* bc; uint16_t* pc; + stack_idx env; }; typedef union frame_elem { @@ -57,23 +58,41 @@ static uint16_t** frame_current_pc(struct forkable_stack* stk) { return &frame_self(frame_current(stk))->pc; } +static frame_ptr frame_get_parent(struct forkable_stack* stk, frame_ptr fr) { + return forkable_stack_from_idx(stk, frame_self(fr)->env); +} +static frame_ptr frame_get_level(struct forkable_stack* stk, frame_ptr fr, int level) { + for (int i=0; icode, -1}; + return cl; +} +static struct closure closure_new(struct forkable_stack* stk, struct bytecode* bc) { + struct closure cl = {bc, bc->code, + forkable_stack_to_idx(stk, frame_current(stk))}; + return cl; +} -static void frame_push(struct forkable_stack* stk, struct bytecode* bc) { - frame_ptr fp = forkable_stack_push(stk, frame_size(bc)); - frame_self(fp)->bc = bc; - frame_self(fp)->pc = bc->code; +static frame_ptr frame_push(struct forkable_stack* stk, struct closure cl) { + frame_ptr fp = forkable_stack_push(stk, frame_size(cl.bc)); + *frame_self(fp) = cl; + return fp; } -static void frame_push_backtrack(struct forkable_stack* stk, - struct bytecode* bc, uint16_t* pc) { +static frame_ptr frame_push_backtrack(struct forkable_stack* stk, uint16_t* pc) { + struct closure curr = *frame_self(frame_current(stk)); frame_ptr fp = forkable_stack_push(stk, sizeof(union frame_elem) * 2); - frame_self(fp)->bc = bc; - frame_self(fp)->pc = pc; + curr.pc = pc; + *frame_self(fp) = curr; + return fp; } - static void frame_pop(struct forkable_stack* stk) { forkable_stack_pop(stk); } diff --git a/c/lexer.l b/c/lexer.l index 42103c4..abf3326 100644 --- a/c/lexer.l +++ b/c/lexer.l @@ -11,6 +11,7 @@ "==" { return EQ; } "as" { return AS; } +"def" { return DEF; } "."|"="|";"|"["|"]"|","|":"|"("|")"|"{"|"}"|"|"|"+"|"\$" { return yytext[0];} [[:digit:]]+ { yylval->num = atoi(yytext); return NUMBER;} diff --git a/c/main.c b/c/main.c index 5017d9f..f8ea214 100644 --- a/c/main.c +++ b/c/main.c @@ -32,6 +32,9 @@ void run_tests() { block_append(&program, gen_op_simple(BACKTRACK)); struct bytecode* bc = block_compile(&builtins, program); block_free(program); + printf("Disassembly:\n"); + dump_disassembly(2, bc); + printf("\n"); fgets(buf, sizeof(buf), testdata); json_t* input = json_loads(buf, JSON_DECODE_ANY, 0); jq_init(bc, input); @@ -76,7 +79,7 @@ int main(int argc, char* argv[]) { block_append(&blk, block_join(gen_op_simple(YIELD), gen_op_simple(BACKTRACK))); struct bytecode* bc = block_compile(&builtins, blk); block_free(blk); - dump_disassembly(bc); + dump_disassembly(0, bc); printf("\n"); run_program(bc); } diff --git a/c/opcode.c b/c/opcode.c index 92bbcd2..ff41f86 100644 --- a/c/opcode.c +++ b/c/opcode.c @@ -2,10 +2,12 @@ #define NONE 0 #define CONSTANT (OP_HAS_IMMEDIATE | OP_HAS_CONSTANT) -#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE) +#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE | OP_HAS_BINDING) #define BRANCH (OP_HAS_IMMEDIATE | OP_HAS_BRANCH) #define CFUNC (OP_HAS_IMMEDIATE | OP_HAS_SYMBOL | OP_HAS_CFUNC) -#define UFUNC (OP_HAS_IMMEDIATE | OP_HAS_UFUNC) +#define UFUNC (OP_HAS_IMMEDIATE | OP_HAS_UFUNC | OP_HAS_VARIABLE_LENGTH_ARGLIST) +#define CLOSURE_DEFINE (OP_HAS_IMMEDIATE | OP_HAS_BLOCK | OP_IS_CALL_PSEUDO | OP_HAS_BINDING) +#define CLOSURE_REF (OP_HAS_IMMEDIATE | OP_IS_CALL_PSEUDO | OP_HAS_BINDING) #define OP(name, imm, in, out) \ {name, #name, imm, in, out}, diff --git a/c/opcode.h b/c/opcode.h index 36d4192..262ecb3 100644 --- a/c/opcode.h +++ b/c/opcode.h @@ -21,6 +21,10 @@ enum { OP_HAS_SYMBOL = 16, OP_HAS_CFUNC = 32, OP_HAS_UFUNC = 64, + OP_IS_CALL_PSEUDO = 128, + OP_HAS_VARIABLE_LENGTH_ARGLIST = 256, + OP_HAS_BLOCK = 512, + OP_HAS_BINDING = 1024, }; struct opcode_description { opcode op; diff --git a/c/opcode_list.h b/c/opcode_list.h index d3bfb98..c54e1fd 100644 --- a/c/opcode_list.h +++ b/c/opcode_list.h @@ -18,3 +18,8 @@ OP(CALL_BUILTIN_1_1, CFUNC, 1, 1) OP(CALL_BUILTIN_3_1, CFUNC, 3, 1) OP(CALL_1_1, UFUNC, 1, 1) +OP(RET, NONE, 1, 1) + +OP(CLOSURE_PARAM, CLOSURE_REF, 0, 0) +OP(CLOSURE_REF, CLOSURE_REF, 0, 0) +OP(CLOSURE_CREATE, CLOSURE_DEFINE, 0, 0) diff --git a/c/parser.y b/c/parser.y index 393f7a8..ca82fcd 100644 --- a/c/parser.y +++ b/c/parser.y @@ -20,10 +20,14 @@ %token IDENT %token NUMBER + /* revolting hack */ +%left ';' + %left '|' %left ',' %token EQ "==" %token AS "as" +%token DEF "def" %nonassoc EQ %left '+' @@ -55,12 +59,16 @@ static block gen_index(block obj, block key) { %% program: Exp { *answer = $1; } - Exp: +"def" IDENT ':' Exp ';' Exp { + block body = block_join($4, gen_op_simple(RET)); + $$ = block_bind(gen_op_block_defn(CLOSURE_CREATE, $2, body), $6, OP_IS_CALL_PSEUDO); +} | + Term "as" '$' IDENT '|' Exp { $$ = gen_op_simple(DUP); block_append(&$$, $1); - block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6)); + block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6, OP_HAS_VARIABLE)); } | Exp '|' Exp { @@ -132,6 +140,9 @@ IDENT { } | '$' IDENT { $$ = gen_op_var_unbound(LOADV, $2); +} | +'$' '$' IDENT { + $$ = gen_op_call(CALL_1_1, gen_op_block_unbound(CLOSURE_REF, $3)); } MkDict: diff --git a/c/testdata b/c/testdata new file mode 100644 index 0000000..cc4d17b --- /dev/null +++ b/c/testdata @@ -0,0 +1,143 @@ +# Tests are groups of three lines: program, input, expected output +# Blank lines and lines starting with # are ignored + +# +# Simple value tests to check parser. Input is irrelevant +# + +true +null +true + +false +null +false + +# null +# 42 +# null + +1 +null +1 + +# FIXME: much more number testing needed + +{} +null +{} + +[] +null +[] + +# FIXME: string literals + +# +# Dictionary construction syntax +# + +{a: 1} +null +{"a":1} + +# FIXME: string literals + +# +# Field access, piping +# + +.foo +{"foo": 42, "bar": 43} +42 + +.foo | .bar +{"foo": {"bar": 42}, "bar": "badvalue"} +42 + +.foo.bar +{"foo": {"bar": 42}, "bar": "badvalue"} +42 + + +# FIXME strings +# .["foo"].bar +# {"foo": {"bar": 42}, "bar": "badvalue"} +# 42 + + +# +# Multiple outputs, iteration +# + +.[] +[1,2,3] +1 +2 +3 + +[(.,1),((.,.[]),(2,3))] +["a","b"] +[["a","b"],1,["a","b"],"a","b",2,3] + +[([5,5][]),.,.[]] +[1,2,3] +[5,5,[1,2,3],1,2,3] + +{x: (1,2)},{x:3} | .x +null +1 +2 +3 + +# +# Variables +# + +1 as $x | 2 as $y | [$x,$y,$x] +null +[1,2,1] + +[1,2,3][] as $x | [[4,5,6,7][$x]] +null +[5] +[6] +[7] + +1 as $x | [$x,$x,$x as $x | $x] +null +[1,1,1] + +# [.,(.[] | {x:.},.),.,.[]] + +# +# Builtin functions +# + +# FIXME: floats vs. integer + +1+1 +null +2.0 + +.+4 +15 +19.0 + +[1,2,3] + [.] +null +[1,2,3,null] + +# +# User-defined functions +# Oh god. +# + +def f: . + 1; def g: def g: . + 100; $$f | $$g | $$f; ($$f | $$g), $$g +3.0 +106.0 +105.0 + +[[100,200][] as $x | def f: . + $x; $$f | $$f | $$f] +1 +[300.0, 600.0] \ No newline at end of file -- 2.40.0