#define pfunc presult
#endif
+enum last_seen {
+ JV_LAST_NONE = 0,
+ JV_LAST_OPEN_ARRAY = '[',
+ JV_LAST_OPEN_OBJECT = '{',
+ JV_LAST_COLON = ':',
+ JV_LAST_COMMA = ',',
+ JV_LAST_VALUE = 'V',
+};
+
struct jv_parser {
const char* curr_buf;
int curr_buf_length;
int flags;
- jv* stack;
- int stackpos;
- int stacklen;
- jv next;
+ jv* stack; // parser
+ int stackpos; // parser
+ int stacklen; // both (optimization; it's really pathlen for streaming)
+ jv path; // streamer
+ enum last_seen last_seen; // streamer
+ jv output; // streamer
+ jv next; // both
char* tokenbuf;
int tokenpos;
};
-static void parser_init(struct jv_parser* p) {
- p->flags = 0;
+static void parser_init(struct jv_parser* p, int flags) {
+ p->flags = flags;
+ if ((p->flags & JV_PARSE_STREAMING)) {
+ p->path = jv_array();
+ } else {
+ p->path = jv_invalid();
+ p->flags &= ~(JV_PARSE_STREAM_ERRORS);
+ }
p->stack = 0;
p->stacklen = p->stackpos = 0;
+ p->last_seen = JV_LAST_NONE;
+ p->output = jv_invalid();
p->next = jv_invalid();
p->tokenbuf = 0;
p->tokenlen = p->tokenpos = 0;
}
static void parser_reset(struct jv_parser* p) {
+ if ((p->flags & JV_PARSE_STREAMING)) {
+ jv_free(p->path);
+ p->path = jv_array();
+ }
+ p->last_seen = JV_LAST_NONE;
+ jv_free(p->output);
+ p->output = jv_invalid();
jv_free(p->next);
p->next = jv_invalid();
for (int i=0; i<p->stackpos; i++)
static void parser_free(struct jv_parser* p) {
parser_reset(p);
+ jv_free(p->path);
jv_mem_free(p->stack);
jv_mem_free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa);
}
static pfunc value(struct jv_parser* p, jv val) {
- if (jv_is_valid(p->next)) return "Expected separator between values";
+ if ((p->flags & JV_PARSE_STREAMING)) {
+ if (jv_is_valid(p->next) || p->last_seen == JV_LAST_VALUE)
+ return "Expected separator between values";
+ if (p->stacklen > 0)
+ p->last_seen = JV_LAST_VALUE;
+ else
+ p->last_seen = JV_LAST_NONE;
+ } else {
+ if (jv_is_valid(p->next)) return "Expected separator between values";
+ }
jv_free(p->next);
p->next = val;
return 0;
p->stack[p->stackpos++] = v;
}
-static pfunc token(struct jv_parser* p, char ch) {
+static pfunc parse_token(struct jv_parser* p, char ch) {
switch (ch) {
case '[':
if (jv_is_valid(p->next)) return "Expected separator between values";
return 0;
}
+static pfunc stream_token(struct jv_parser* p, char ch) {
+ jv_kind k;
+ jv last;
+
+ switch (ch) {
+ case '[':
+ if (jv_is_valid(p->next))
+ return "Expected a separator between values";
+ p->path = jv_array_append(p->path, jv_number(0)); // push
+ p->last_seen = JV_LAST_OPEN_ARRAY;
+ p->stacklen++;
+ break;
+
+ case '{':
+ if (p->last_seen == JV_LAST_VALUE)
+ return "Expected a separator between values";
+ // Push object key: null, since we don't know it yet
+ p->path = jv_array_append(p->path, jv_null()); // push
+ p->last_seen = JV_LAST_OPEN_OBJECT;
+ p->stacklen++;
+ break;
+
+ case ':':
+ if (p->stacklen == 0 || jv_get_kind(jv_array_get(jv_copy(p->path), p->stacklen - 1)) == JV_KIND_NUMBER)
+ return "':' not as part of an object";
+ if (!jv_is_valid(p->next) || p->last_seen == JV_LAST_NONE)
+ return "Expected string key before ':'";
+ if (jv_get_kind(p->next) != JV_KIND_STRING)
+ return "Object keys must be strings";
+ if (p->last_seen != JV_LAST_VALUE)
+ return "':' should follow a key";
+ p->last_seen = JV_LAST_COLON;
+ p->path = jv_array_set(p->path, p->stacklen - 1, p->next);
+ p->next = jv_invalid();
+ break;
+
+ case ',':
+ if (p->last_seen != JV_LAST_VALUE)
+ return "Expected value before ','";
+ if (p->stacklen == 0)
+ return "',' not as part of an object or array";
+ last = jv_array_get(jv_copy(p->path), p->stacklen - 1);
+ k = jv_get_kind(last);
+ if (k == JV_KIND_NUMBER) {
+ int idx = jv_number_value(last);
+
+ if (jv_is_valid(p->next)) {
+ p->output = JV_ARRAY(jv_copy(p->path), p->next);
+ p->next = jv_invalid();
+ }
+ p->path = jv_array_set(p->path, p->stacklen - 1, jv_number(idx + 1));
+ p->last_seen = JV_LAST_COMMA;
+ } else if (k == JV_KIND_STRING) {
+ if (jv_is_valid(p->next)) {
+ p->output = JV_ARRAY(jv_copy(p->path), p->next);
+ p->next = jv_invalid();
+ }
+ p->path = jv_array_set(p->path, p->stacklen - 1, jv_true()); // ready for another name:value pair
+ p->last_seen = JV_LAST_COMMA;
+ } else {
+ assert(k == JV_KIND_NULL);
+ // this case hits on input like {,}
+ // make sure to handle input like {"a", "b"} and {"a":, ...}
+ jv_free(last);
+ return "Objects must consist of key:value pairs";
+ }
+ jv_free(last);
+ break;
+
+ case ']':
+ if (p->stacklen == 0)
+ return "Unmatched ']' at the top-level";
+ if (p->last_seen == JV_LAST_COMMA)
+ return "Expected another array element";
+ if (p->last_seen == JV_LAST_OPEN_ARRAY)
+ assert(!jv_is_valid(p->next));
+
+ last = jv_array_get(jv_copy(p->path), p->stacklen - 1);
+ k = jv_get_kind(last);
+ jv_free(last);
+
+ if (k != JV_KIND_NUMBER)
+ return "Unmatched ']' in the middle of an object";
+ if (jv_is_valid(p->next)) {
+ p->output = JV_ARRAY(jv_copy(p->path), p->next, jv_true());
+ p->next = jv_invalid();
+ } else if (p->last_seen != JV_LAST_OPEN_ARRAY) {
+ p->output = JV_ARRAY(jv_copy(p->path));
+ }
+
+ p->path = jv_array_slice(p->path, 0, --(p->stacklen)); // pop
+ //assert(!jv_is_valid(p->next));
+ jv_free(p->next);
+ p->next = jv_invalid();
+
+ if (p->last_seen == JV_LAST_OPEN_ARRAY)
+ p->output = JV_ARRAY(jv_copy(p->path), jv_array()); // Empty arrays are leaves
+
+ if (p->stacklen == 0)
+ p->last_seen = JV_LAST_NONE;
+ else
+ p->last_seen = JV_LAST_VALUE;
+ break;
+
+ case '}':
+ if (p->stacklen == 0)
+ return "Unmatched '}' at the top-level";
+ if (p->last_seen == JV_LAST_COMMA)
+ return "Expected another key:value pair";
+ if (p->last_seen == JV_LAST_OPEN_OBJECT)
+ assert(!jv_is_valid(p->next));
+
+ last = jv_array_get(jv_copy(p->path), p->stacklen - 1);
+ k = jv_get_kind(last);
+ jv_free(last);
+ if (k == JV_KIND_NUMBER)
+ return "Unmatched '}' in the middle of an array";
+
+ if (jv_is_valid(p->next)) {
+ if (k != JV_KIND_STRING)
+ return "Objects must consist of key:value pairs";
+ p->output = JV_ARRAY(jv_copy(p->path), p->next, jv_true());
+ p->next = jv_invalid();
+ } else {
+ // Perhaps {"a":[]}
+ if (p->last_seen == JV_LAST_COLON)
+ // Looks like {"a":}
+ return "Missing value in key:value pair";
+ if (p->last_seen == JV_LAST_COMMA)
+ // Looks like {"a":0,}
+ return "Expected another key-value pair";
+ if (p->last_seen == JV_LAST_OPEN_ARRAY)
+ return "Unmatched '}' in the middle of an array";
+ if (p->last_seen != JV_LAST_VALUE && p->last_seen != JV_LAST_OPEN_OBJECT)
+ return "Unmatched '}'";
+ if (p->last_seen != JV_LAST_OPEN_OBJECT)
+ p->output = JV_ARRAY(jv_copy(p->path));
+ }
+ p->path = jv_array_slice(p->path, 0, --(p->stacklen)); // pop
+ jv_free(p->next);
+ p->next = jv_invalid();
+
+ if (p->last_seen == JV_LAST_OPEN_OBJECT)
+ p->output = JV_ARRAY(jv_copy(p->path), jv_object()); // Empty arrays are leaves
+
+ if (p->stacklen == 0)
+ p->last_seen = JV_LAST_NONE;
+ else
+ p->last_seen = JV_LAST_VALUE;
+ break;
+ }
+ return 0;
+}
static void tokenadd(struct jv_parser* p, char c) {
assert(p->tokenpos <= p->tokenlen);
}
assert(p->tokenpos < p->tokenlen);
p->tokenbuf[p->tokenpos++] = c;
+ p->tokenbuf[p->tokenpos] = '\0'; // for debugging
}
static int unhex4(char* hex) {
static const presult OK = "output produced";
-static int check_done(struct jv_parser* p, jv* out) {
+static int parse_check_done(struct jv_parser* p, jv* out) {
if (p->stackpos == 0 && jv_is_valid(p->next)) {
*out = p->next;
p->next = jv_invalid();
}
}
+static int stream_check_done(struct jv_parser* p, jv* out) {
+ if (p->stacklen == 0 && jv_is_valid(p->next)) {
+ *out = JV_ARRAY(jv_copy(p->path),p->next);
+ p->next = jv_invalid();
+ return 1;
+ } else if (jv_is_valid(p->output)) {
+ if (jv_array_length(jv_copy(p->output)) > 2) {
+ // At end of an array or object, necessitating one more output by
+ // which to indicate this
+ *out = jv_array_slice(jv_copy(p->output), 0, 2);
+ p->output = jv_array_slice(p->output, 0, 1); // arrange one more output
+ } else {
+ // No further processing needed
+ *out = p->output;
+ p->output = jv_invalid();
+ }
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int parse_check_truncation(struct jv_parser* p, jv out) {
+ return ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(out) == JV_KIND_NUMBER);
+}
+
+static int stream_check_truncation(struct jv_parser* p, jv out) {
+ if (!jv_is_valid(out))
+ return 0;
+ jv v = jv_array_get(jv_copy(out), 1);
+ jv_kind k = jv_get_kind(v);
+ jv_free(v);
+ return (k == JV_KIND_NUMBER || k == JV_KIND_TRUE || k == JV_KIND_FALSE || k == JV_KIND_NULL);
+}
+
+#define check_done(p, out) \
+ (((p)->flags & JV_PARSE_STREAMING) ? stream_check_done((p), (out)) : parse_check_done((p), (out)))
+
+#define token(p, ch) \
+ (((p)->flags & JV_PARSE_STREAMING) ? stream_token((p), (ch)) : parse_token((p), (ch)))
+
+#define check_truncation(p, o) \
+ (((p)->flags & JV_PARSE_STREAMING) ? stream_check_truncation((p), (o)) : parse_check_truncation((p), (o)))
+
static pfunc scan(struct jv_parser* p, char ch, jv* out) {
p->column++;
if (ch == '\n') {
if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) {
TRY(check_literal(p));
if (p->st == JV_PARSER_NORMAL && check_done(p, out)) {
- if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(*out) == JV_KIND_NUMBER) {
+ if (check_truncation(p, *out)) {
jv_free(*out);
*out = jv_invalid();
return "Potentially truncated top-level numeric value";
struct jv_parser* jv_parser_new(int flags) {
struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser));
- parser_init(p);
+ parser_init(p, flags);
p->flags = flags;
return p;
}
static const unsigned char UTF8_BOM[] = {0xEF,0xBB,0xBF};
+int jv_parser_remaining(struct jv_parser* p) {
+ if (p->curr_buf == 0)
+ return 0;
+ return (p->curr_buf_length - p->curr_buf_pos);
+}
+
void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) {
assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length)
&& "previous buffer not exhausted");
p->curr_buf_is_partial = is_partial;
}
+static jv make_error(struct jv_parser*, const char *, ...) JV_PRINTF_LIKE(2, 3);
+
+static jv make_error(struct jv_parser* p, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ jv e = jv_string_vfmt(fmt, ap);
+ va_end(ap);
+ if ((p->flags & JV_PARSE_STREAM_ERRORS))
+ return JV_ARRAY(e, jv_copy(p->path));
+ return jv_invalid_with_msg(e);
+}
+
jv jv_parser_next(struct jv_parser* p) {
- assert(p->curr_buf && "a buffer must be provided");
+ if (!p->curr_buf)
+ return jv_invalid(); // Need a buffer
if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM"));
- jv value;
+ jv value = jv_invalid();
+ if ((p->flags & JV_PARSE_STREAMING) && stream_check_done(p, &value))
+ return value;
char ch;
presult msg = 0;
while (!msg && p->curr_buf_pos < p->curr_buf_length) {
if (msg == OK) {
return value;
} else if (msg) {
- parser_reset(p);
if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) {
// Skip to the next RS
p->st = JV_PARSER_WAITING_FOR_RS;
- return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d (need RS to resync)", msg, p->line, p->column));
+ value = make_error(p, "%s at line %d, column %d (need RS to resync)", msg, p->line, p->column);
+ parser_reset(p);
+ return value;
}
+ value = make_error(p, "%s at line %d, column %d", msg, p->line, p->column);
+ parser_reset(p);
if (!(p->flags & JV_PARSE_SEQ)) {
// We're not parsing a JSON text sequence; throw this buffer away.
p->curr_buf = 0;
p->curr_buf_pos = 0;
} // Else ch must be RS; don't clear buf so we can start parsing again after this ch
- return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d", msg, p->line, p->column));
+ return value;
} else if (p->curr_buf_is_partial) {
assert(p->curr_buf_pos == p->curr_buf_length);
// need another buffer
// at EOF
if (p->st != JV_PARSER_WAITING_FOR_RS) {
if (p->st != JV_PARSER_NORMAL) {
+ value = make_error(p, "Unfinished string at EOF at line %d, column %d", p->line, p->column);
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
- return jv_invalid_with_msg(jv_string("Unfinished string"));
+ return value;
}
if ((msg = check_literal(p))) {
+ value = make_error(p, "%s at EOF at line %d, column %d", msg, p->line, p->column);
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
- return jv_invalid_with_msg(jv_string(msg));
+ return value;
}
- if (p->stackpos != 0) {
+ if (((p->flags & JV_PARSE_STREAMING) && p->stacklen != 0) ||
+ (!(p->flags & JV_PARSE_STREAMING) && p->stackpos != 0)) {
+ value = make_error(p, "Unfinished JSON term at EOF at line %d, column %d", p->line, p->column);
parser_reset(p);
p->st = JV_PARSER_WAITING_FOR_RS;
- return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
+ return value;
}
}
// p->next is either invalid (nothing here but no syntax error)
// or valid (this is the value). either way it's the thing to return
- value = p->next;
+ if ((p->flags & JV_PARSE_STREAMING) && jv_is_valid(p->next)) {
+ value = JV_ARRAY(jv_copy(p->path), p->next); // except in streaming mode we've got to make it [path,value]
+ } else {
+ value = p->next;
+ }
p->next = jv_invalid();
if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) {
jv_free(value);
- return jv_invalid_with_msg(jv_string("Potentially truncated top-level numeric value"));
+ return make_error(p, "Potentially truncated top-level numeric value at EOF at line %d, column %d", p->line, p->column);
}
return value;
}
jv jv_parse_sized(const char* string, int length) {
struct jv_parser parser;
- parser_init(&parser);
+ parser_init(&parser, 0);
jv_parser_set_buf(&parser, string, length, 0);
jv value = jv_parser_next(&parser);
if (jv_is_valid(value)) {
return ret;
}
+// XXX Move all this into the next_input_state struct
FILE* current_input;
const char** input_filenames = NULL;
int ninput_files;
}
}
+ buf[0] = 0;
if (current_input) {
if (!fgets(buf, size, current_input))
buf[0] = 0;
return next_input_idx == ninput_files && (!current_input || feof(current_input));
}
+struct next_input_state {
+ struct jv_parser *parser;
+ jv slurped;
+ char buf[4096];
+};
+
+// Blocks to read one more input from stdin and/or given files
+// When slurping, it returns just one value
+static jv next_input(jq_state *jq, void *data) {
+ struct next_input_state *state = data;
+ int is_last = 0;
+ jv value = jv_invalid(); // need more input
+ do {
+ if (options & RAW_INPUT) {
+ is_last = read_more(state->buf, sizeof(state->buf));
+ if (state->buf[0] == '\0')
+ continue;
+ int len = strlen(state->buf); // Raw input doesn't support NULs
+ if (len > 0) {
+ if (options & SLURP) {
+ state->slurped = jv_string_concat(state->slurped, jv_string(state->buf));
+ } else if (jv_is_valid(value)) {
+ if (state->buf[len-1] == '\n') {
+ // whole line
+ state->buf[len-1] = 0;
+ return jv_string_concat(value, jv_string(state->buf));
+ }
+ value = jv_string_concat(value, jv_string(state->buf));
+ }
+ }
+ } else {
+ if (jv_parser_remaining(state->parser) == 0) {
+ is_last = read_more(state->buf, sizeof(state->buf));
+ jv_parser_set_buf(state->parser, state->buf, strlen(state->buf), !is_last); // NULs also not supported here
+ }
+ value = jv_parser_next(state->parser);
+ if (options & SLURP) {
+ if (jv_is_valid(value)) {
+ state->slurped = jv_array_append(state->slurped, value);
+ value = jv_invalid();
+ } else if (jv_invalid_has_msg(jv_copy(value)))
+ return value;
+ } else if (jv_is_valid(value) || jv_invalid_has_msg(jv_copy(value))) {
+ return value;
+ }
+ }
+ } while (!is_last);
+ return value;
+}
+
int main(int argc, char* argv[]) {
jq_state *jq = NULL;
int ret = 0;
int compiled = 0;
+ int parser_flags = 0;
char *t = NULL;
if (argc) progname = argv[0];
options |= SEQ;
if (!short_opts) continue;
}
+ if (isoption(argv[i], 0, "stream", &short_opts)) {
+ parser_flags |= JV_PARSE_STREAMING;
+ if (!short_opts) continue;
+ }
+ if (isoption(argv[i], 0, "stream-errors", &short_opts)) {
+ parser_flags |= JV_PARSE_STREAM_ERRORS;
+ if (!short_opts) continue;
+ }
if (isoption(argv[i], 'e', "exit-status", &short_opts)) {
options |= EXIT_STATUS;
if (!short_opts) continue;
printf("\n");
}
+ // XXX Refactor this and input_filenames[] and related setup into a
+ // function to setup struct next_input_state.
+ if ((options & SEQ))
+ parser_flags |= JV_PARSE_SEQ;
+
+ struct next_input_state input_state;
+ input_state.parser = jv_parser_new(parser_flags);
+ if ((options & RAW_INPUT) && (options & SLURP))
+ input_state.slurped = jv_string("");
+ else if ((options & SLURP))
+ input_state.slurped = jv_array();
+ else
+ input_state.slurped = jv_invalid();
+
+ // Let jq program read from inputs
+ jq_set_input_cb(jq, next_input, &input_state);
+
if (options & PROVIDE_NULL) {
ret = process(jq, jv_null(), jq_flags);
} else {
- jv slurped;
- if (options & SLURP) {
- if (options & RAW_INPUT) {
- slurped = jv_string("");
- } else {
- slurped = jv_array();
+ jv value;
+ while (jv_is_valid((value = next_input(jq, &input_state))) || jv_invalid_has_msg(jv_copy(value))) {
+ if (jv_is_valid(value)) {
+ ret = process(jq, value, jq_flags);
+ continue;
}
- }
- struct jv_parser* parser = jv_parser_new((options & SEQ) ? JV_PARSE_SEQ : 0);
- char buf[4096];
- int is_last = 0;
- do {
- is_last = read_more(buf, sizeof(buf));
- if (options & RAW_INPUT) {
- int len = strlen(buf);
- if (len > 0) {
- if (options & SLURP) {
- slurped = jv_string_concat(slurped, jv_string(buf));
- } else {
- if (buf[len-1] == '\n') buf[len-1] = 0;
- ret = process(jq, jv_string(buf), jq_flags);
- }
- }
- } else {
- jv_parser_set_buf(parser, buf, strlen(buf), !is_last);
- jv value;
- while (jv_is_valid(value = jv_parser_next(parser)) || jv_invalid_has_msg(jv_copy(value))) {
- if (!jv_is_valid(value)) {
- jv msg = jv_invalid_get_msg(value);
- if (!(options & SEQ)) {
- // We used to treat parse errors as fatal...
- ret = 4;
- fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
- jv_free(msg);
- break;
- }
- fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg));
- jv_free(msg);
- // ...but with --seq we attempt to recover.
- continue;
- }
- if (options & SLURP) {
- slurped = jv_array_append(slurped, value);
- } else {
- ret = process(jq, value, jq_flags);
- value = jv_invalid();
- }
- }
+
+ // Parse error
+ jv msg = jv_invalid_get_msg(value);
+ if (!(options & SEQ)) {
+ // --seq -> errors are not fatal
+ ret = 4;
+ fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
+ jv_free(msg);
+ break;
}
- } while (!is_last);
- jv_parser_free(parser);
- if (ret != 0)
- goto out;
+ fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg));
+ jv_free(msg);
+ }
if (options & SLURP) {
- ret = process(jq, slurped, jq_flags);
+ ret = process(jq, input_state.slurped, jq_flags);
+ input_state.slurped = jv_invalid();
}
}
+
+ jv_free(input_state.slurped);
+ jv_parser_free(input_state.parser);
+
+ if (ret != 0)
+ goto out;
+
if ((options & IN_PLACE)) {
FILE *devnull;
#ifdef WIN32