Fixes part of #47 and #48.
#include "parser.h"
#include "locfile.h"
#include "jv_aux.h"
+#include "jv_unicode.h"
+
typedef jv (*func_1)(jv);
}
}
+#define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+
+static jv escape_string(jv input, const char* escapings) {
+
+ assert(jv_get_kind(input) == JV_KIND_STRING);
+ const char* lookup[128] = {0};
+ const char* p = escapings;
+ while (*p) {
+ lookup[(int)*p] = p+1;
+ p++;
+ p += strlen(p);
+ p++;
+ }
+
+ jv ret = jv_string("");
+ const char* i = jv_string_value(input);
+ const char* end = i + jv_string_length(jv_copy(input));
+ const char* cstart;
+ int c = 0;
+ while ((i = jvp_utf8_next((cstart = i), end, &c))) {
+ assert(c != -1);
+ if (c < 128 && lookup[c]) {
+ ret = jv_string_append_str(ret, lookup[c]);
+ } else {
+ ret = jv_string_append_buf(ret, cstart, i - cstart);
+ }
+ }
+ jv_free(input);
+ return ret;
+
+}
+
+static jv f_format(jv input, jv fmt) {
+ if (jv_get_kind(fmt) != JV_KIND_STRING) {
+ jv_free(input);
+ return type_error(fmt, "is not a valid format");
+ }
+ const char* fmt_s = jv_string_value(fmt);
+ if (!strcmp(fmt_s, "json")) {
+ jv_free(fmt);
+ return jv_dump_string(input, 0);
+ } else if (!strcmp(fmt_s, "text")) {
+ jv_free(fmt);
+ return f_tostring(input);
+ } else if (!strcmp(fmt_s, "csv")) {
+ jv_free(fmt);
+ if (jv_get_kind(input) != JV_KIND_ARRAY)
+ return type_error(input, "cannot be csv-formatted, only array");
+ jv line = jv_string("");
+ for (int i=0; i<jv_array_length(jv_copy(input)); i++) {
+ if (i) line = jv_string_append_str(line, ",");
+ jv x = jv_array_get(jv_copy(input), i);
+ switch (jv_get_kind(x)) {
+ case JV_KIND_NULL:
+ /* null rendered as empty string */
+ jv_free(x);
+ break;
+ case JV_KIND_TRUE:
+ case JV_KIND_FALSE:
+ line = jv_string_concat(line, jv_dump_string(x, 0));
+ break;
+ case JV_KIND_NUMBER:
+ if (jv_number_value(x) != jv_number_value(x)) {
+ /* NaN, render as empty string */
+ jv_free(x);
+ } else {
+ line = jv_string_concat(line, jv_dump_string(x, 0));
+ }
+ break;
+ case JV_KIND_STRING: {
+ line = jv_string_append_str(line, "\"");
+ line = jv_string_concat(line, escape_string(x, "\"\"\"\0"));
+ line = jv_string_append_str(line, "\"");
+ break;
+ }
+ default:
+ jv_free(input);
+ jv_free(line);
+ return type_error(x, "is not valid in a csv row");
+ }
+ }
+ jv_free(input);
+ return line;
+ } else if (!strcmp(fmt_s, "html")) {
+ jv_free(fmt);
+ return escape_string(f_tostring(input), "&&\0<<\0>>\0''\0\""\0");
+ } else if (!strcmp(fmt_s, "uri")) {
+ jv_free(fmt);
+ input = f_tostring(input);
+
+ int unreserved[128] = {0};
+ const char* p = CHARS_ALPHANUM "-_.!~*'()";
+ while (*p) unreserved[(int)*p++] = 1;
+
+ jv line = jv_string("");
+ const char* s = jv_string_value(input);
+ for (int i=0; i<jv_string_length(jv_copy(input)); i++) {
+ unsigned ch = (unsigned)*s;
+ if (ch < 128 && unreserved[ch]) {
+ line = jv_string_append_buf(line, s, 1);
+ } else {
+ line = jv_string_concat(line, jv_string_fmt("%%%02x", ch));
+ }
+ s++;
+ }
+ jv_free(input);
+ return line;
+ } else if (!strcmp(fmt_s, "sh")) {
+ jv_free(fmt);
+ if (jv_get_kind(input) != JV_KIND_ARRAY)
+ input = jv_array_set(jv_array(), 0, input);
+ jv line = jv_string("");
+ for (int i=0; i<jv_array_length(jv_copy(input)); i++) {
+ if (i) line = jv_string_append_str(line, " ");
+ jv x = jv_array_get(jv_copy(input), i);
+ switch (jv_get_kind(x)) {
+ case JV_KIND_NULL:
+ case JV_KIND_TRUE:
+ case JV_KIND_FALSE:
+ case JV_KIND_NUMBER:
+ line = jv_string_concat(line, jv_dump_string(x, 0));
+ break;
+
+ case JV_KIND_STRING: {
+ line = jv_string_append_str(line, "'");
+ line = jv_string_concat(line, escape_string(x, "''\\''\0"));
+ line = jv_string_append_str(line, "'");
+ break;
+ }
+
+ default:
+ jv_free(input);
+ jv_free(line);
+ return type_error(x, "can not be escaped for shell");
+ }
+ }
+ jv_free(input);
+ return line;
+ } else if (!strcmp(fmt_s, "base64")) {
+ jv_free(fmt);
+ input = f_tostring(input);
+ jv line = jv_string("");
+ const char b64[64 + 1] = CHARS_ALPHANUM "+/";
+ const char* data = jv_string_value(input);
+ int len = jv_string_length(jv_copy(input));
+ for (int i=0; i<len; i+=3) {
+ uint32_t code = 0;
+ int n = len - i >= 3 ? 3 : len-i;
+ for (int j=0; j<3; j++) {
+ code <<= 8;
+ code |= j < n ? (unsigned)data[i+j] : 0;
+ }
+ char buf[4];
+ for (int j=0; j<4; j++) {
+ buf[j] = b64[(code >> (18 - j*6)) & 0x3f];
+ }
+ if (n < 3) buf[3] = '=';
+ if (n < 2) buf[2] = '=';
+ line = jv_string_append_buf(line, buf, sizeof(buf));
+ }
+ jv_free(input);
+ return line;
+ } else {
+ jv_free(input);
+ return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format")));
+ }
+}
+
static jv f_keys(jv input) {
if (jv_get_kind(input) == JV_KIND_OBJECT || jv_get_kind(input) == JV_KIND_ARRAY) {
return jv_keys(input);
{(cfunction_ptr)f_min_by_impl, "_min_by_impl", 2},
{(cfunction_ptr)f_max_by_impl, "_max_by_impl", 2},
{(cfunction_ptr)f_error, "error", 2},
+ {(cfunction_ptr)f_format, "format", 2},
};
static struct symbol_table cbuiltins =
You can affect how jq reads and writes its input and output
using some command-line options:
- * `--slurp`/`-s`:
+ * `--slurp`/`-s`:
- Instead of running the filter for each JSON object in the
- input, read the entire input stream into a large array and run
- the filter just once.
-
- * `--raw-input`/`-R`:
-
- Don't parse the input as JSON. Instead, each line of text is
- passed to the filter as a string. If combined with `--slurp`,
- then the entire input is passed to the filter as a single long
- string.
+ Instead of running the filter for each JSON object in the
+ input, read the entire input stream into a large array and run
+ the filter just once.
+
+ * `--raw-input`/`-R`:
+
+ Don't parse the input as JSON. Instead, each line of text is
+ passed to the filter as a string. If combined with `--slurp`,
+ then the entire input is passed to the filter as a single long
+ string.
- * `--null-input`/`-n`:
+ * `--null-input`/`-n`:
- Don't read any input at all! Instead, the filter is run once
- using `null` as the input. This is useful when using jq as a
- simple calculator or to construct JSON data from scratch.
+ Don't read any input at all! Instead, the filter is run once
+ using `null` as the input. This is useful when using jq as a
+ simple calculator or to construct JSON data from scratch.
- * `--compact-output` / `-c`:
+ * `--compact-output` / `-c`:
- By default, jq pretty-prints JSON output. Using this option
- will result in more compact output by instead putting each
- JSON object on a single line.
+ By default, jq pretty-prints JSON output. Using this option
+ will result in more compact output by instead putting each
+ JSON object on a single line.
- * `--colour-output` / `-C` and `--monochrome-output` / `-M`:
-
- By default, jq outputs colored JSON if writing to a
- terminal. You can force it to produce color even if writing to
- a pipe or a file using `-C`, and disable color with `-M`.
+ * `--colour-output` / `-C` and `--monochrome-output` / `-M`:
+
+ By default, jq outputs colored JSON if writing to a
+ terminal. You can force it to produce color even if writing to
+ a pipe or a file using `-C`, and disable color with `-M`.
- * `--ascii-output` / `-a`:
+ * `--ascii-output` / `-a`:
- jq usually outputs non-ASCII Unicode codepoints as UTF-8, even
- if the input specified them as escape sequences (like
- "\u03bc"). Using this option, you can force jq to produce pure
- ASCII output with every non-ASCII character replaced with the
- equivalent escape sequence.
+ jq usually outputs non-ASCII Unicode codepoints as UTF-8, even
+ if the input specified them as escape sequences (like
+ "\u03bc"). Using this option, you can force jq to produce pure
+ ASCII output with every non-ASCII character replaced with the
+ equivalent escape sequence.
- * `--raw-output` / `-r`:
+ * `--raw-output` / `-r`:
- With this option, if the filter's result is a string then it
- will be written directly to standard output rather than being
- formatted as a JSON string with quotes. This can be useful for
- making jq filters talk to non-JSON-based systems.
+ With this option, if the filter's result is a string then it
+ will be written directly to standard output rather than being
+ formatted as a JSON string with quotes. This can be useful for
+ making jq filters talk to non-JSON-based systems.
- title: Basic filters
entries:
input: '42'
output: ['"The input was 42, which is one less than 43"']
-
+ - title: "Format strings and escaping"
+ body: |
-
+ The `@foo` syntax is used to format and escape strings,
+ which is useful for building URLs, documents in a language
+ like HTML or XML, and so forth. `@foo` can be used as a
+ filter on its own, the possible escapings are:
+
+ * `@text`:
+
+ Calls `tostring`, see that function for details.
+
+ * `@json`:
+
+ Serialises the input as JSON.
+
+ * `@html`:
+
+ Applies HTML/XML escaping, by mapping the characters
+ `<>&'"` to their entity equivalents `<`, `>`,
+ `&`, `'`, `"`.
+
+ * `@uri`:
+
+ Applies percent-encoding, by mapping all reserved URI
+ characters to a `%xx` sequence.
+
+ * `@csv`:
+
+ The input must be an array, and it is rendered as CSV
+ with double quotes for strings, and quotes escaped by
+ repetition.
+
+ * `@sh`:
+
+ The input is escaped suitable for use in a command-line
+ for a POSIX shell. If the input is an array, the output
+ will be a series of space-separated strings.
+
+ * `@base64`:
+
+ The input is converted to base64 as specified by RFC 4648.
+
+ This syntax can be combined with string interpolation in a
+ useful way. You can follow a `@foo` token with a string
+ literal. The contents of the string literal will *not* be
+ escaped. However, all interpolations made inside that string
+ literal will be escaped. For instance,
+
+ @uri "http://www.google.com/search?q=\(.search)"
+
+ will produce the following output for the input
+ `{"search":"jq!"}`:
+
+ http://www.google.com/search?q=jq%21
+
+ Note that the slashes, question mark, etc. in the URL are
+ not escaped, as they were part of the string literal.
+
+ examples:
+ - program: '@html'
+ input: '"This works if x < y"'
+ output: ['"This works if x < y"']
+
+# - program: '@html "<span>Anonymous said: \(.)</span>"'
+# input: '"<script>alert(\"lol hax\");</script>"'
+# output: ["<span>Anonymous said: <script>alert("lol hax");</script></span>"]
+ - program: '@sh "echo \(.)"'
+ input: "\"O'Hara's Ale\""
+ output: ["\"echo 'O'\\''Hara'\\''s Ale\""]
- title: Conditionals and Comparisons
entries:
return try_exit(yytext[0], YY_START, yyscanner);
}
+"@"[a-zA-Z0-9_]+ {
+ yylval->literal = jv_string_sized(yytext + 1, yyleng - 1); return FORMAT;
+}
-?[0-9.]+([eE][+-]?[0-9]+)? {
yylval->literal = jv_parse_sized(yytext, yyleng); return LITERAL;
%token INVALID_CHARACTER
%token <literal> IDENT
%token <literal> LITERAL
+%token <literal> FORMAT
%token EQ "=="
%token NEQ "!="
%token DEFINEDOR "//"
return gen_call(funcname, BLOCK(gen_lambda(a), gen_lambda(b)));
}
-static block gen_format(block a) {
- return BLOCK(a, gen_call("tostring", gen_noop()));
+static block gen_format(block a, jv fmt) {
+ return BLOCK(a, gen_call("format", BLOCK(gen_lambda(gen_const(fmt)))));
}
static block gen_update(block a, block op, int optype) {
String:
-QQSTRING_START QQString QQSTRING_END {
- $$ = $2;
+QQSTRING_START { $<literal>$ = jv_string("text"); } QQString QQSTRING_END {
+ $$ = $3;
+ jv_free($<literal>2);
+} |
+FORMAT QQSTRING_START { $<literal>$ = $1; } QQString QQSTRING_END {
+ $$ = $4;
+ jv_free($<literal>3);
}
+
QQString:
/* empty */ {
$$ = gen_const(jv_string(""));
$$ = gen_binop($1, gen_const($2), '+');
} |
QQString QQSTRING_INTERP_START Exp QQSTRING_INTERP_END {
- $$ = gen_binop($1, gen_format($3), '+');
+ $$ = gen_binop($1, gen_format($3, jv_copy($<literal>0)), '+');
}
String {
$$ = $1;
} |
+FORMAT {
+ $$ = gen_format(gen_noop(), $1);
+} |
'(' Exp ')' {
$$ = $2;
} |
null
"interpolation"
+@text,@json,([1,.] | @csv),@html,@uri,@sh,@base64
+"<>&'\""
+"<>&'\""
+"\"<>&'\\\"\""
+"1,\"<>&'\"\"\""
+"<>&'""
+"%3c%3e%26'%22"
+"'<>&'\\''\"'"
+"PD4mJyI="
+
+@html "<b>\(.)</b>"
+"<script>hax</script>"
+"<b><script>hax</script></b>"
+
#
# Dictionary construction syntax
#