From: Stephen Dolan Date: Mon, 10 Sep 2012 23:04:47 +0000 (+0100) Subject: Much, much better error reporting from the parser. X-Git-Tag: jq-1.1~56 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95dd64b0f0f5bcf1aff780caa229bc592703faa1;p=jq Much, much better error reporting from the parser. Getting proper locations through flex/bison was more effort than was advertised. --- diff --git a/c/execute.c b/c/execute.c index f59e156..9f7b63b 100644 --- a/c/execute.c +++ b/c/execute.c @@ -470,23 +470,3 @@ void jq_teardown() { pathbuf = 0; pathsize = 0; } - -void run_program(struct bytecode* bc) { -#if JQ_DEBUG - dump_disassembly(0, bc); - printf("\n"); -#endif - char buf[409600]; - fgets(buf, sizeof(buf), stdin); - jq_init(bc, jv_parse(buf)); - jv result; - while (jv_is_valid(result = jq_next())) { - jv_dump(result); - printf("\n"); - } - jv_free(result); - #if JQ_DEBUG - printf("end of results\n"); - #endif - jq_teardown(); -} diff --git a/c/jv_parse.c b/c/jv_parse.c index 9300f31..7fd4d2d 100644 --- a/c/jv_parse.c +++ b/c/jv_parse.c @@ -331,20 +331,18 @@ jv jv_parse_sized(const char* string, int length) { const char* p = string; char ch; - while (p < string + length) { + presult msg = 0; + while (msg == 0 && p < string + length) { ch = *p++; - presult msg = scan(&parser, ch); - if (msg){ - printf("ERROR: %s (parsing '%s')\n", msg, string); - return jv_invalid(); - } + msg = scan(&parser, ch); } - presult msg = finish(&parser); + if (msg == 0) msg = finish(&parser); + jv value; if (msg) { - printf("ERROR: %s (parsing '%s')\n", msg, string); - return jv_invalid(); + value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", msg, string)); + } else { + value = jv_copy(parser.next); } - jv value = jv_copy(parser.next); jv_parser_free(&parser); return value; } diff --git a/c/lexer.l b/c/lexer.l index 1794b9f..bb7f57d 100644 --- a/c/lexer.l +++ b/c/lexer.l @@ -1,10 +1,19 @@ %{ #include "compile.h" #include "parser.tab.h" /* Generated by bison. */ + +#define YY_USER_ACTION \ + do { \ + yylloc->start = yyget_extra(yyscanner); \ + yylloc->end = yylloc->start + yyleng; \ + yyset_extra(yylloc->end, yyscanner); \ + } while (0); + %} %option noyywrap nounput noinput nodefault %option reentrant +%option extra-type="int" %option bison-bridge bison-locations %% diff --git a/c/locfile.h b/c/locfile.h new file mode 100644 index 0000000..0f605bb --- /dev/null +++ b/c/locfile.h @@ -0,0 +1,62 @@ +#include +#include +#include +typedef struct { + int start, end; +} location; + +struct locfile { + const char* data; + int length; + int* linemap; + int nlines; +}; + +static void locfile_init(struct locfile* l, const char* data, int length) { + l->data = data; + l->length = length; + l->nlines = 1; + for (int i=0; inlines++; + } + l->linemap = malloc(sizeof(int) * (l->nlines + 1)); + l->linemap[0] = 0; + int line = 1; + for (int i=0; ilinemap[line] = i; + line++; + } + } + l->linemap[l->nlines] = length; +} + +static void locfile_free(struct locfile* l) { + free(l->linemap); +} + +static int locfile_get_line(struct locfile* l, int pos) { + assert(pos < l->length); + int line = 0; + while (l->linemap[line+1] < pos) line++; + assert(line < l->nlines); + return line; +} + +static int locfile_line_length(struct locfile* l, int line) { + assert(line < l->nlines); + return l->linemap[line+1] - l->linemap[line]; +} + +static void locfile_locate(struct locfile* l, location loc) { + int startline = locfile_get_line(l, loc.start); + int offset = l->linemap[startline]; + printf("%.*s\n", locfile_line_length(l, startline), l->data + offset); + printf("%*s", loc.start - offset, ""); + for (int i = loc.start; + i < loc.end && i < offset + locfile_line_length(l, startline); + i++){ + printf("^"); + } + printf("\n"); +} diff --git a/c/main.c b/c/main.c index 7c341d8..4faf2a0 100644 --- a/c/main.c +++ b/c/main.c @@ -4,13 +4,36 @@ #include "builtin.h" #include "jv.h" -block compile(const char* str); +int compile(const char* str, block* answer); void jq_init(struct bytecode* bc, jv value); jv jq_next(); void jq_teardown(); -void run_program(struct bytecode* bc); + +void run_program(struct bytecode* bc) { +#if JQ_DEBUG + dump_disassembly(0, bc); + printf("\n"); +#endif + char buf[409600]; + fgets(buf, sizeof(buf), stdin); + jv value = jv_parse(buf); + if (!jv_is_valid(value)) { + assert(0 && "couldn't parse input"); //FIXME + } + jq_init(bc, value); + jv result; + while (jv_is_valid(result = jq_next())) { + jv_dump(result); + printf("\n"); + } + jv_free(result); + #if JQ_DEBUG + printf("end of results\n"); + #endif + jq_teardown(); +} int skipline(const char* buf) { int p = 0; @@ -29,7 +52,9 @@ void run_tests() { if (skipline(buf)) continue; printf("Testing %s\n", buf); int pass = 1; - block program = compile(buf); + block program; + int nerrors = compile(buf, &program); + assert(nerrors == 0); block_append(&program, gen_op_simple(YIELD)); block_append(&program, gen_op_simple(BACKTRACK)); program = gen_cbinding(&builtins, program); @@ -86,11 +111,17 @@ void run_tests() { int main(int argc, char* argv[]) { if (argc == 1) { run_tests(); return 0; } - block blk = compile(argv[1]); + block blk; + int nerrors = compile(argv[1], &blk); + if (nerrors > 0) { + printf("%d compile %s\n", nerrors, nerrors > 1 ? "errors" : "error"); + return 1; + } block_append(&blk, block_join(gen_op_simple(YIELD), gen_op_simple(BACKTRACK))); blk = gen_cbinding(&builtins, blk); struct bytecode* bc = block_compile(blk); block_free(blk); run_program(bc); bytecode_free(bc); + return 0; } diff --git a/c/parser.y b/c/parser.y index 0f1e42c..11b88a7 100644 --- a/c/parser.y +++ b/c/parser.y @@ -4,21 +4,40 @@ #include "compile.h" %} +%code requires { +#include "locfile.h" +#define YYLTYPE location +#define YYLLOC_DEFAULT(Loc, Rhs, N) \ + do { \ + if (N) { \ + (Loc).start = YYRHSLOC(Rhs, 1).start; \ + (Loc).end = YYRHSLOC(Rhs, N).end; \ + } else { \ + (Loc).start = YYRHSLOC(Rhs, 0).end; \ + (Loc).end = YYRHSLOC(Rhs, 0).end; \ + } \ + } while (0) + } + %locations +%error-verbose %define api.pure %union { jv literal; block blk; } +%destructor { jv_free($$); } +%destructor { block_free($$); } + %parse-param {block* answer} +%parse-param {int* errors} +%parse-param {struct locfile* locations} %parse-param {yyscan_t lexer} %lex-param {yyscan_t lexer} - %token IDENT %token LITERAL - %token EQ "==" %token DEFINEDOR "//" %token AS "as" @@ -50,8 +69,18 @@ %type Exp Term MkDict MkDictPair ExpD ElseBody %{ #include "lexer.yy.h" -void yyerror(YYLTYPE* loc, block* answer, yyscan_t lexer, const char *s){ - printf("ERROR: %s\n", s); +#define FAIL(loc, msg) \ + do { \ + location l = loc; \ + yyerror(&l, answer, errors, locations, lexer, msg); \ + /*YYERROR*/; \ + } while (0) + +void yyerror(YYLTYPE* loc, block* answer, int* errors, + struct locfile* locations, yyscan_t lexer, const char *s){ + (*errors)++; + printf("error: %s\n", s); + locfile_locate(locations, *loc); } static block gen_dictpair(block k, block v) { @@ -263,7 +292,9 @@ MkDictPair $$ = gen_dictpair(gen_op_const(LOADK, $1), $3); } | LITERAL ':' ExpD { - assert(jv_get_kind($1) == JV_KIND_STRING); + if (jv_get_kind($1) != JV_KIND_STRING) { + FAIL(@1, "Object keys must be strings"); + } $$ = gen_dictpair(gen_op_const(LOADK, $1), $3); } | IDENT { @@ -275,14 +306,22 @@ MkDictPair } %% -block compile(const char* str) { +int compile(const char* str, block* answer) { yyscan_t scanner; YY_BUFFER_STATE buf; - block answer = gen_noop(); - yylex_init(&scanner); + yylex_init_extra(0, &scanner); buf = yy_scan_string(str, scanner); - yyparse(&answer, scanner); + int errors = 0; + struct locfile locations; + locfile_init(&locations, str, strlen(str)); + *answer = gen_noop(); + yyparse(answer, &errors, &locations, scanner); + locfile_free(&locations); yy_delete_buffer(buf, scanner); yylex_destroy(scanner); - return answer; + if (errors > 0) { + block_free(*answer); + *answer = gen_noop(); + } + return errors; }