]> granicus.if.org Git - jq/commitdiff
Refactor function argument passing into what it always should have been.
authorStephen Dolan <mu@netsoc.tcd.ie>
Tue, 4 Dec 2012 00:39:21 +0000 (00:39 +0000)
committerStephen Dolan <mu@netsoc.tcd.ie>
Tue, 4 Dec 2012 00:39:21 +0000 (00:39 +0000)
Most visible change is that error messages when a function is called
with the wrong number of arguments are much better.

builtin.c
bytecode.c
compile.c
compile.h
opcode.c
opcode.h
opcode_list.h
parser.y

index ef44793577e8a424a3824d66c37d7fa7cd71931f..2c5787da94ff2ccd4e3ea81e38e5404a15c68876 100644 (file)
--- a/builtin.c
+++ b/builtin.c
@@ -285,7 +285,8 @@ static block bind_bytecoded_builtins(block b) {
   };
   block builtins = gen_noop();
   for (unsigned i=0; i<sizeof(builtin_defs)/sizeof(builtin_defs[0]); i++) {
-    builtins = BLOCK(builtins, gen_function(builtin_defs[i].name, builtin_defs[i].code));
+    builtins = BLOCK(builtins, gen_function(builtin_defs[i].name, gen_noop(),
+                                            builtin_defs[i].code));
   }
   return block_bind(builtins, b, OP_IS_CALL_PSEUDO);
 }
index 26f8d36debe5022f8f7f275163155c4a00db3c75..e66bffbb291d43f6466589491feecc10afabe95a 100644 (file)
@@ -6,11 +6,11 @@
 #include "opcode.h"
 
 static int bytecode_operation_length(uint16_t* codeptr) {
+  int length = opcode_describe(*codeptr)->length;
   if (*codeptr == CALL_JQ) {
-    return 4 + codeptr[1] * 2;
-  } else {
-    return opcode_length(*codeptr);
+    length += codeptr[1] * 2;
   }
+  return length;
 }
 
 void dump_disassembly(int indent, struct bytecode* bc) {
index 8a493b45a1954fee6159eb6c0e4dc78562561a79..eb0fe78d3d368d10ab4c1baa03b2a021dd4922e8 100644 (file)
--- a/compile.c
+++ b/compile.c
@@ -12,7 +12,7 @@ struct inst {
 
   opcode op;
   
-  union {
+  struct {
     uint16_t intval;
     struct inst* target;
     jv constant;
@@ -222,33 +222,38 @@ static void block_bind_subblock(block binder, block body, int bindflags) {
   }
 }
 
-block block_bind(block binder, block body, int bindflags) {
+static void block_bind_each(block binder, block body, int bindflags) {
   assert(block_has_only_binders(binder, bindflags));
   bindflags |= OP_HAS_BINDING;
   for (inst* curr = binder.first; curr; curr = curr->next) {
     block_bind_subblock(inst_block(curr), body, bindflags);
   }
-  return block_join(binder, body);
 }
 
+block block_bind(block binder, block body, int bindflags) {
+  block_bind_each(binder, body, bindflags);
+  return block_join(binder, body);
+}
 
-block gen_function(const char* name, block body) {
+block gen_function(const char* name, block formals, block body) {
+  block_bind_each(formals, body, OP_IS_CALL_PSEUDO);
   inst* i = inst_new(CLOSURE_CREATE);
   i->subfn = body;
   i->symbol = strdup(name);
+  i->arglist = formals;
   block b = inst_block(i);
   block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING);
   return b;
 }
 
 block gen_lambda(block body) {
-  return gen_function("@lambda", body);
+  return gen_function("@lambda", gen_noop(), body);
 }
 
 block gen_call(const char* name, block args) {
-  inst* i = inst_new(CALL_JQ);
-  i->arglist = BLOCK(gen_op_block_unbound(CLOSURE_REF, name), args);
-  return BLOCK(inst_block(i), inst_block(inst_new(CALLSEQ_END)));
+  block b = gen_op_block_unbound(CALL_JQ, name);
+  b.first->arglist = args;
+  return b;
 }
 
 
@@ -397,28 +402,30 @@ static int count_cfunctions(block b) {
 
 
 // Expands call instructions into a calling sequence
-// Checking for argument count compatibility happens later
-static block expand_call_arglist(block b) {
+static int expand_call_arglist(struct locfile* locations, block* b) {
+  int errors = 0;
   block ret = gen_noop();
-  for (inst* curr; (curr = block_take(&b));) {
+  for (inst* curr; (curr = block_take(b));) {
+    if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
+      if (!curr->bound_by) {
+        locfile_locate(locations, curr->source, "error: %s is not defined", curr->symbol);
+        errors++;
+        // don't process this instruction if it's not well-defined
+        ret = BLOCK(ret, inst_block(curr));
+        continue;
+      }
+    }
+
+    block prelude = gen_noop();
     if (curr->op == CALL_JQ) {
-      inst* seq_end = block_take(&b);
-      assert(seq_end && seq_end->op == CALLSEQ_END);
+      int actual_args = 0, desired_args = 0;
       // We expand the argument list as a series of instructions
-      block arglist = curr->arglist;
-      curr->arglist = gen_noop();
-      assert(arglist.first && "zeroth argument (function to call) must be present");
-      inst* function = arglist.first->bound_by;
-      assert(function); // FIXME better errors
-
-      switch (function->op) {
-      default: assert(0 && "Unknown parameter type"); break;
+      switch (curr->bound_by->op) {
+      default: assert(0 && "Unknown function type"); break;
       case CLOSURE_CREATE: 
       case CLOSURE_PARAM: {
-        block prelude = gen_noop();
         block callargs = gen_noop();
-        int nargs = 0;
-        for (inst* i; (i = block_take(&arglist));) {
+        for (inst* i; (i = block_take(&curr->arglist));) {
           assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
           block b = inst_block(i);
           switch (i->op) {
@@ -431,44 +438,54 @@ static block expand_call_arglist(block b) {
             block_append(&callargs, gen_op_block_bound(CLOSURE_REF, b));
             break;
           }
-          nargs++;
+          actual_args++;
         }
+        curr->imm.intval = actual_args;
+        curr->arglist = callargs;
 
-        assert(!arglist.first);
-        assert(nargs > 0);
-        curr->imm.intval = nargs - 1;
-        ret = BLOCK(ret, prelude, inst_block(curr), callargs, inst_block(seq_end));
+        if (curr->bound_by->op == CLOSURE_CREATE) {
+          for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
+            assert(i->op == CLOSURE_PARAM);
+            desired_args++;
+          }
+        }
         break;
       }
 
       case CLOSURE_CREATE_C: {
-        // Arguments to C functions not yet supported
-        inst* cfunction_ref = block_take(&arglist);
-        block prelude = gen_noop();
-        int nargs = 1;
-        for (inst* i; (i = block_take(&arglist)); ) {
+        for (inst* i; (i = block_take(&curr->arglist)); ) {
           assert(i->op == CLOSURE_CREATE); // FIXME
           block body = i->subfn;
           i->subfn = gen_noop();
           inst_free(i);
           // arguments should be pushed in reverse order, prepend them to prelude
-          prelude = BLOCK(gen_subexp(expand_call_arglist(body)), prelude);
-          nargs++;
+          errors += expand_call_arglist(locations, &body);
+          prelude = BLOCK(gen_subexp(body), prelude);
+          actual_args++;
         }
         assert(curr->op == CALL_JQ);
         curr->op = CALL_BUILTIN;
-        curr->imm.intval = nargs;
+        curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
+        assert(curr->bound_by->op == CLOSURE_CREATE_C);
+        desired_args = curr->bound_by->imm.cfunc->nargs - 1;
         assert(!curr->arglist.first);
-        curr->arglist = inst_block(cfunction_ref);
-        ret = BLOCK(ret, prelude, inst_block(curr), inst_block(seq_end));
         break;
       }
       }
-    } else {
-      ret = BLOCK(ret, inst_block(curr));
+
+      if (actual_args != desired_args) {
+        locfile_locate(locations, curr->source, 
+                       "error: %s arguments to %s (expected %d but got %d)",
+                       actual_args > desired_args ? "too many" : "too few",
+                       curr->symbol, desired_args, actual_args);
+        errors++;
+      }
+
     }
+    ret = BLOCK(ret, prelude, inst_block(curr));
   }
-  return ret;
+  *b = ret;
+  return errors;
 }
 
 static int compile(struct locfile* locations, struct bytecode* bc, block b) {
@@ -476,8 +493,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
   int pos = 0;
   int var_frame_idx = 0;
   bc->nsubfunctions = 0;
-  bc->nclosures = 0;
-  b = expand_call_arglist(b);
+  errors += expand_call_arglist(locations, &b);
   if (bc->parent) {
     // functions should end in a return
     b = BLOCK(b, gen_op_simple(RET));
@@ -487,18 +503,19 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
   }
   for (inst* curr = b.first; curr; curr = curr->next) {
     if (!curr->next) assert(curr == b.last);
-    pos += opcode_length(curr->op);
+    int length = opcode_describe(curr->op)->length;
+    if (curr->op == CALL_JQ) {
+      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
+        length += 2;
+      }
+    }
+    pos += length;
     curr->bytecode_pos = pos;
     curr->compiled = bc;
 
-    int opflags = opcode_describe(curr->op)->flags;
-    if (opflags & OP_HAS_BINDING) {
-      if (!curr->bound_by) {
-        locfile_locate(locations, curr->source, "error: %s is not defined", curr->symbol);
-        errors++;
-      }
-    }
-    if ((opflags & OP_HAS_VARIABLE) &&
+    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
+
+    if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
         curr->bound_by == curr) {
       curr->imm.intval = var_frame_idx++;
     }
@@ -507,10 +524,6 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
       assert(curr->bound_by == curr);
       curr->imm.intval = bc->nsubfunctions++;
     }
-    if (curr->op == CLOSURE_PARAM) {
-      assert(curr->bound_by == curr);
-      curr->imm.intval = bc->nclosures++;
-    }
     if (curr->op == CLOSURE_CREATE_C) {
       assert(curr->bound_by == curr);
       int idx = bc->globals->ncfunctions++;
@@ -526,6 +539,13 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
         bc->subfunctions[curr->imm.intval] = subfn;
         subfn->globals = bc->globals;
         subfn->parent = bc;
+        subfn->nclosures = 0;
+        for (inst* param = curr->arglist.first; param; param = param->next) {
+          assert(param->op == CLOSURE_PARAM);
+          assert(param->bound_by == param);
+          param->imm.intval = subfn->nclosures++;
+          param->compiled = subfn;
+        }
         errors += compile(locations, subfn, curr->subfn);
         curr->subfn = gen_noop();
       }
@@ -544,42 +564,23 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
     if (op->length == 0)
       continue;
     code[pos++] = curr->op;
-    assert(!(op->flags & OP_IS_CALL_PSEUDO));
+    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
     if (curr->op == CALL_BUILTIN) {
-      int nargs = curr->imm.intval;
-      code[pos++] = (uint16_t)nargs;
-      assert(block_is_single(curr->arglist));
-      inst* cfunc = curr->arglist.first;
-      assert(cfunc && cfunc->bound_by->op == CLOSURE_CREATE_C);
-      code[pos++] = cfunc->bound_by->imm.intval;
-      // FIXME arg errors
-      assert(nargs == bc->globals->cfunctions[cfunc->bound_by->imm.intval].nargs);
+      assert(curr->bound_by->op == CLOSURE_CREATE_C);
+      assert(!curr->arglist.first);
+      code[pos++] = (uint16_t)curr->imm.intval;
+      code[pos++] = curr->bound_by->imm.intval;
     } else if (curr->op == CALL_JQ) {
-      int nargs = curr->imm.intval;
-      assert(nargs >= 0 && nargs < 100); //FIXME
-      code[pos++] = (uint16_t)nargs;
-      curr = curr->next;
-      assert(curr && opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO);
       assert(curr->bound_by->op == CLOSURE_CREATE ||
              curr->bound_by->op == CLOSURE_PARAM);
+      code[pos++] = (uint16_t)curr->imm.intval;
       code[pos++] = nesting_level(bc, curr->bound_by);
-      if (curr->bound_by->op == CLOSURE_CREATE) {
-        code[pos++] = curr->bound_by->imm.intval | ARG_NEWCLOSURE;
-        inst* i = curr->bound_by;
-        // FIXME arg errors
-        assert(nargs == i->compiled->subfunctions[i->imm.intval]->nclosures);
-      } else {
-        code[pos++] = curr->bound_by->imm.intval;
-        // FIXME arg errors
-        assert(nargs == 0);
-      }
-      for (int i=0; i<nargs; i++) {
-        curr = curr->next;
-        assert(curr && opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO);
-        assert(curr->op == CLOSURE_REF);
-        code[pos++] = nesting_level(bc, curr->bound_by);
-        assert(curr->bound_by->op == CLOSURE_CREATE);
-        code[pos++] = curr->bound_by->imm.intval | ARG_NEWCLOSURE;
+      code[pos++] = curr->bound_by->imm.intval | 
+        (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
+      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
+        assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
+        code[pos++] = nesting_level(bc, arg->bound_by);
+        code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
       }
     } else if (op->flags & OP_HAS_CONSTANT) {
       code[pos++] = jv_array_length(jv_copy(constant_pool));
@@ -607,6 +608,7 @@ static int compile(struct locfile* locations, struct bytecode* bc, block b) {
 int block_compile(block b, struct locfile* locations, struct bytecode** out) {
   struct bytecode* bc = malloc(sizeof(struct bytecode));
   bc->parent = 0;
+  bc->nclosures = 0;
   bc->globals = malloc(sizeof(struct symbol_table));
   int ncfunc = count_cfunctions(b);
   bc->globals->ncfunctions = 0;
index 7ac634baeba4c43b54d089c1ae2d10712947cd36..e74afce49638ecfbb096dca44ea8327c98d3f040 100644 (file)
--- a/compile.h
+++ b/compile.h
@@ -27,7 +27,7 @@ block gen_op_var_unbound(opcode op, const char* name);
 block gen_op_var_bound(opcode op, block binder);
 block gen_op_block_unbound(opcode op, const char* name);
 
-block gen_function(const char* name, block body);
+block gen_function(const char* name, block formals, block body);
 block gen_lambda(block body);
 block gen_call(const char* name, block body);
 block gen_subexp(block a);
index 9b3e5ca33bc3a1992ca57f277574899ff0813850..4e785b8f49f24ab9ef648b7a56f3da91fc063ebd 100644 (file)
--- a/opcode.c
+++ b/opcode.c
@@ -5,9 +5,8 @@
 #define CONSTANT OP_HAS_CONSTANT, 2
 #define VARIABLE (OP_HAS_VARIABLE | OP_HAS_BINDING), 3
 #define BRANCH OP_HAS_BRANCH, 2
-#define CFUNC OP_HAS_CFUNC, 3
-#define UFUNC OP_HAS_UFUNC, 2
-#define CALLSEQ_END_IMM (OP_IS_CALL_PSEUDO), 0
+#define CFUNC (OP_HAS_CFUNC | OP_HAS_BINDING), 3
+#define UFUNC (OP_HAS_UFUNC | OP_HAS_BINDING | OP_IS_CALL_PSEUDO), 4
 #define DEFINITION (OP_IS_CALL_PSEUDO | OP_HAS_BINDING), 0
 #define CLOSURE_REF_IMM (OP_IS_CALL_PSEUDO | OP_HAS_BINDING), 2
 
index 916247880b479d19f222f66976d51b9a853eaabc..ee956e275c48f4d83fc817a0e893c971dfe5b1b2 100644 (file)
--- a/opcode.h
+++ b/opcode.h
@@ -38,8 +38,4 @@ struct opcode_description {
 
 const struct opcode_description* opcode_describe(opcode op);
 
-static inline int opcode_length(opcode op) {
-  return opcode_describe(op)->length;
-}
-
 #endif
index 03cdbc8ba262a64482585411df07bc9326ee1b10..e6dece6e238144d406924ac86ad3298e91c350f1 100644 (file)
@@ -22,7 +22,6 @@ OP(CALL_BUILTIN, CFUNC, -1, 1)
 OP(CALL_JQ, UFUNC, 1, 1)
 OP(RET, NONE, 1, 1)
 
-OP(CALLSEQ_END, CALLSEQ_END_IMM, 0, 0)
 OP(CLOSURE_PARAM, DEFINITION, 0, 0)
 OP(CLOSURE_REF, CLOSURE_REF_IMM, 0, 0)
 OP(CLOSURE_CREATE, DEFINITION, 0, 0)
index e3d32e45ca2369f41e7f63a8f44ae544851ceae6..f77d6d9293c73f9d4b1b72b4f4cbc932829e6495 100644 (file)
--- a/parser.y
+++ b/parser.y
@@ -299,13 +299,14 @@ Term {
 
 FuncDef:
 "def" IDENT ':' Exp ';' {
-  $$ = gen_function(jv_string_value($2), $4);
+  $$ = gen_function(jv_string_value($2), gen_noop(), $4);
   jv_free($2);
 } |
 
 "def" IDENT '(' IDENT ')' ':' Exp ';' {
-  block body = block_bind(gen_op_block_unbound(CLOSURE_PARAM, jv_string_value($4)), $7, OP_IS_CALL_PSEUDO);
-  $$ = gen_function(jv_string_value($2), body);
+  $$ = gen_function(jv_string_value($2), 
+                    gen_op_block_unbound(CLOSURE_PARAM, jv_string_value($4)), 
+                    $7);
   jv_free($2);
   jv_free($4);
 }