"def ascii_upcase:"
" explode | map( if 97 <= . and . <= 122 then . - 32 else . end) | implode;",
+ // Streaming utilities
+ "def truncate_stream(stream):"
+ " . as $n | null | stream | . as $input | if (.[0]|length) > $n then setpath([0];$input[0][1:]) else empty end;",
+ "def fromstream(i):"
+ " foreach i as $item ("
+ " [null,false,null,false];"
+ " if ($item[0]|length) == 0 then [null,false,.[2],.[3]]"
+ " elif ($item|length) == 1 and ($item[0]|length) < 2 then [null,false,.[0],.[1]]"
+ " else . end |"
+ " . as $state |"
+ " if ($item|length) > 1 and ($item[0]|length) > 0 then"
+ " [.[0]|setpath(($item|.[0]); ($item|.[1])), "
+ " true, "
+ " $state[2], "
+ " $state[3]] "
+ " else ."
+ " end;"
+ " if ($item[0]|length) == 1 and ($item|length == 1) and .[3] then .[2] else empty end,"
+ " if ($item[0]|length) == 0 then $item[1] else empty end"
+ " );",
+ "def tostream:\n"
+ " {string:true,number:true,boolean:true,null:true} as $leaf_types |\n"
+ " . as $dot |\n"
+ " if $leaf_types[$dot|type] or length==0 then [[],$dot]\n"
+ " else\n"
+ " # We really need a _streaming_ form of `keys`.\n"
+ " # We can use `range` for arrays, but not for objects.\n"
+ " keys as $keys |\n"
+ " $keys[-1] as $last|\n"
+ " ((# for each key\n"
+ " $keys[] | . as $key |\n"
+ " $dot[$key] | . as $dot |\n"
+ " # recurse on each key/value\n"
+ " tostream|.[0]|=[$key]+.),\n"
+ " # then add the closing marker\n"
+ " [[$last]])\n"
+ " end;",
+
+
// # Assuming the input array is sorted, bsearch/1 returns
// # the index of the target if the target is in the input array; and otherwise
// # (-1 - ix), where ix is the insertion point that would leave the array sorted.
Returns the line number of the input currently being filtered.
+ - title: 'Streaming'
+ body: |
+
+ With the `--stream` option jq can parse input texts in a streaming
+ fashion, allowing jq programs to start processing large JSON texts
+ immediately rather than after the parse completes. If you have a
+ single JSON text that is 1GB in size, streaming it will allow you
+ to process it much more quickly.
+
+ However, streaming isn't easy to deal with as the jq program will
+ have `[<path>, <leaf-value>]` (and a few other forms) as inputs.
+
+ Several builtins are provided to make handling streams easier.
+
+ The examples below use the the streamed form of `[0,[1]]`, which
+ is `[[0],1],[[1,0],2],[[1,0]],[[1]])]`.
+
+ Streaming forms include `[<path>, <leaf-value>]` (to indicate any
+ scalar value, empty array, or empty object), and `[<path>]` (to
+ indicate the end of an array or object). Future versions of jq
+ run with `--stream` and `-seq` may output additional forms such as
+ `["error message"]` when an input text fails to parse.
+
+ entries:
+ - title: "`truncate_stream(stream_expression)`"
+ body: |
+
+ Consumes a number as input and truncates the corresponding
+ number of path elements from the left of the outputs of the
+ given streaming expression.
+
+ examples:
+ - program: '[1|truncate_stream([[0],1],[[1,0],2],[[1,0]],[[1]])]'
+ input: '1'
+ output: ['[[[0],2],[[0]]]']
+
+ - title: "`fromstream(stream_expression)`"
+ body: |
+
+ Outputs values corresponding to the stream expression's
+ outputs.
+
+ examples:
+ - program: 'fromstream(1|truncate_stream([[0],1],[[1,0],2],[[1,0]],[[1]]))'
+ input: 'null'
+ output: ['[2]']
+
+ - title: "`tostream`"
+ body: |
+
+ The `tostream` builtin outputs the streamed form of its input.
+
+ examples:
+ - program: '. as $dot|fromstream($dot|tostream)|.==$dot'
+ input: '[0,[1,{"a":1},{"b":2}]]'
+ output: ['true']
+
- title: Assignment
body: |
+++ /dev/null
-
-# Filter and adjust streamed values so that only values from the .th
-# level are output.
-def trunc(stream):
- . as $n | stream | . as $input | if (.[0]|length) > $n then setpath([0];$input[0][$n:]) else empty end;
-
-# Reduce streamed values back to normal
-def tovalues(i):
- def debug(msg): . as $dot | [msg, .] | debug | $dot;
- foreach i as $item (
- [null,false,null];
-
- # Updator
- #
- # If the new $item is a top-level value,
- # then clear out the current value
- . as [$cur, $cur_isvalid, $prev] |
- $item as [$path, $leaf] |
- ($item|length > 1) as $has_leaf |
- ($item|length == 1) as $closing |
- ($path|length) as $plen |
- # if the new $item terminates the current value, then cur is ready
- # for extraction and we'll start building a new value with the next
- # inputs
- if ($plen == 0) or # top-level scalar
- ($closing and $plen < 2) then [null,false,$cur]
- # else continue building up cur
- else . end |
- . as [$cur, $cur_isvalid, $prev] |
- # If the new $item has a leaf, upate the current value
- if $has_leaf and $plen > 0 then
- [$cur|setpath(($path); $leaf), # update current value
- true, # current value is now valid (if, perhaps, incomplete)
- $prev] # previous value is unchanged
- else .
- end;
-
- # Extractor
- #
- . as [$cur, $cur_isvalid, $prev] |
- $item as [$path, $leaf] |
- ($item|length > 1) as $has_leaf |
- ($item|length == 1) as $closing |
- ($path|length) as $plen |
- # If previous value is valid, output it
- if $plen == 1 and $closing then $prev else empty end,
- # and/or if the new $item is a top-level scalar, output it
- if $plen == 0 then $leaf else empty end
- );
## Test JSON sequence support
-## XXX If we add a `stream_fromjson` builtin then we can move these tests
-## into tests/all.test
-
cat > $d/expected <<EOF
ignoring parse error: Truncated value at line 2, column 5
ignoring parse error: Truncated value at line 2, column 25
fi
dd "if=tests/torture/input0.json" bs=$i count=1 2>/dev/null |
- $VALGRIND $JQ -cn --stream -L "$mods" 'import "streaming" as streaming; streaming::tovalues(inputs)' > $d/out1 2>$d/err || true
+ $VALGRIND $JQ -cn --stream 'fromstream(inputs)' > $d/out1 2>$d/err || true
if [ -n "$VALGRIND" ]; then
grep '^==[0-9][0-9]*== ERROR SUMMARY: 0 errors' $d/err > /dev/null
else