From: Nicolas Williams Date: Wed, 4 Jun 2014 23:01:47 +0000 (-0500) Subject: Revert "Add -I / --online-input for huge top-level arrays" X-Git-Tag: jq-1.4~17 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ae625d0de74c2966e627e3d5e498ab72529c4251;p=jq Revert "Add -I / --online-input for huge top-level arrays" This reverts commit 77936a594d797c480f26bfcef3636a74588a6918. There are too many odd bugs in this mode, and it turns out to be a bad idea anyways. Instead, in the future a better option will be to pursue alternative parsers, such as: - streaming parser that outputs only when a new leaf value is added or an array/object is opened/closed; options here include whether to include a path in each output; - parsers for binary JSON encodings (there's a variety of them). Then one might run jq with a streaming parser and use `reduce` to coalesce inputs from some depth down (instead of from one level down as the reverted commit had intended). Besides, a fully streaming parser is desirable in some cases, therefore we should have such a thing as an option. I've explored modifying the current parser to support a streaming option, but it only makes the code very difficult to follow, which is one reason that alternate parsers makes sense. At any rate, this is all for the future. For now there's no streaming of individual texts, just text sequences. --- diff --git a/jv.h b/jv.h index f58690a..33a6982 100644 --- a/jv.h +++ b/jv.h @@ -139,11 +139,8 @@ void jv_nomem_handler(jv_nomem_handler_f, void *); jv jv_load_file(const char *, int); -typedef enum { - JV_PARSE_EXPLODE_TOPLEVEL_ARRAY = 1 -} jv_parser_flags; struct jv_parser; -struct jv_parser* jv_parser_new(jv_parser_flags); +struct jv_parser* jv_parser_new(); void jv_parser_set_buf(struct jv_parser*, const char*, int, int); jv jv_parser_next(struct jv_parser*); void jv_parser_free(struct jv_parser*); diff --git a/jv_file.c b/jv_file.c index a633160..54ed36b 100644 --- a/jv_file.c +++ b/jv_file.c @@ -18,7 +18,7 @@ jv jv_load_file(const char* filename, int raw) { data = jv_string(""); } else { data = jv_array(); - parser = jv_parser_new(0); + parser = jv_parser_new(); } while (!feof(file) && !ferror(file)) { char buf[4096]; diff --git a/jv_parse.c b/jv_parse.c index 4667f1f..cc1e7b9 100644 --- a/jv_parse.c +++ b/jv_parse.c @@ -28,8 +28,6 @@ struct jv_parser { int stackpos; int stacklen; jv next; - - jv_parser_flags flags; char* tokenbuf; int tokenpos; @@ -47,8 +45,7 @@ struct jv_parser { }; -static void parser_init(struct jv_parser* p, jv_parser_flags flags) { - p->flags = flags; +static void parser_init(struct jv_parser* p) { p->stack = 0; p->stacklen = p->stackpos = 0; p->next = jv_invalid(); @@ -113,9 +110,6 @@ static pfunc token(struct jv_parser* p, char ch) { break; case ',': - if (p->stackpos == 1 && (p->flags & JV_PARSE_EXPLODE_TOPLEVEL_ARRAY) && - jv_get_kind(p->stack[0]) == JV_KIND_ARRAY) - return 0; if (!jv_is_valid(p->next)) return "Expected value before ','"; if (p->stackpos == 0) @@ -139,22 +133,16 @@ static pfunc token(struct jv_parser* p, char ch) { if (p->stackpos == 0 || jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_ARRAY) return "Unmatched ']'"; if (jv_is_valid(p->next)) { - if (p->stackpos != 1 || !(p->flags & JV_PARSE_EXPLODE_TOPLEVEL_ARRAY)) { - p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next); - p->next = jv_invalid(); - } + p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next); + p->next = jv_invalid(); } else { if (jv_array_length(jv_copy(p->stack[p->stackpos-1])) != 0) { // this case hits on input like [1,2,3,] return "Expected another array element"; } } - if (p->stackpos == 1 && (p->flags & JV_PARSE_EXPLODE_TOPLEVEL_ARRAY)) { - jv_free(p->stack[--p->stackpos]); - } else { - jv_free(p->next); - p->next = p->stack[--p->stackpos]; - } + jv_free(p->next); + p->next = p->stack[--p->stackpos]; break; case '}': @@ -327,9 +315,7 @@ static chclass classify(char c) { static const presult OK = "output produced"; static int check_done(struct jv_parser* p, jv* out) { - if ((p->stackpos == 0 && jv_is_valid(p->next)) || - (p->stackpos == 1 && (p->flags & JV_PARSE_EXPLODE_TOPLEVEL_ARRAY) && - jv_get_kind(p->stack[0]) == JV_KIND_ARRAY && jv_is_valid(p->next))) { + if (p->stackpos == 0 && jv_is_valid(p->next)) { *out = p->next; p->next = jv_invalid(); return 1; @@ -384,9 +370,9 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) { return answer; } -struct jv_parser* jv_parser_new(jv_parser_flags flags) { +struct jv_parser* jv_parser_new() { struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser)); - parser_init(p, flags); + parser_init(p); return p; } @@ -458,7 +444,7 @@ jv jv_parser_next(struct jv_parser* p) { jv jv_parse_sized(const char* string, int length) { struct jv_parser parser; - parser_init(&parser, 0); + parser_init(&parser); jv_parser_set_buf(&parser, string, length, 0); jv value = jv_parser_next(&parser); if (jv_is_valid(value)) { diff --git a/main.c b/main.c index e128b2e..b0b4a3b 100644 --- a/main.c +++ b/main.c @@ -154,7 +154,6 @@ int main(int argc, char* argv[]) { ninput_files = 0; int further_args_are_files = 0; int jq_flags = 0; - jv_parser_flags parser_flags = 0; jv program_arguments = jv_array(); for (int i=1; i= argc - 2) { fprintf(stderr, "%s: --arg takes two parameters (e.g. -a varname value)\n", progname); @@ -283,7 +280,7 @@ int main(int argc, char* argv[]) { slurped = jv_array(); } } - struct jv_parser* parser = jv_parser_new(parser_flags); + struct jv_parser* parser = jv_parser_new(); char buf[4096]; while (read_more(buf, sizeof(buf))) { if (options & RAW_INPUT) {