Add @base64d for decoding base64 #47

author Shaun Guth <sguth@practicefusion.com>

Sun, 24 Jan 2016 23:30:28 +0000 (23:30 +0000)

committer William Langford <wlangfor@gmail.com>

Sun, 12 Feb 2017 21:03:59 +0000 (16:03 -0500)
author Shaun Guth <sguth@practicefusion.com>
Sun, 24 Jan 2016 23:30:28 +0000 (23:30 +0000)
committer William Langford <wlangfor@gmail.com>
Sun, 12 Feb 2017 21:03:59 +0000 (16:03 -0500)
diff --git a/AUTHORS b/AUTHORS

index da04d5c4d10f2af953ed3dd5fd796658981264da..e64a2358f8aa1de787a2697584f7dee1edce0a00 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -58,6 +58,7 @@ Ryoichi KATO         <ryo1kato@gmail.com>             - doc fixes
  Rémy Léone           <remy.leone@gmail.com>           - add .travis.yml
  Santiago Lapresta    <santiago.lapresta@gmail.com>    - join, arrays, all, any, other filters
  Sebastian Freundt    <freundt@ga-group.nl>            - build
+Shaun Guth           <shaun.guth@gmail.com>           - base64d
  Shay Elkin           <shay@everything.me>
  Simon Elsbrock       <simon@iodev.org>                - Debian
  Stefan Seemayer      <stefan@seemayer.de>
diff --git a/Makefile.am b/Makefile.am

index c1eaf6decabc9fc2e64e604cc22ef394f264f05d..cf5d74dc5bd47122042bdf9b70bfc8f975e43c66 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -115,7 +115,7 @@ endif
  
  ### Tests (make check)
  
-TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test
+TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test tests/base64test
  TESTS_ENVIRONMENT = NO_VALGRIND=$(NO_VALGRIND)
  
  
diff --git a/appveyor.yml b/appveyor.yml

index 719b4dce8ee4aead0124865d7a8bfa600d4a66c0..e58981e7b233f4aec9d112158a13d7445f526131 100644 (file)
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -26,7 +26,7 @@ build_script:
  test_script:
    # tests/optionaltest and tests/shtest fail on Windows; run them
    # anyways but ignore their failures.  Also, trace shtest.
-  - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make -j3 'TESTS=tests/mantest tests/jqtest tests/onigtest' check"
+  - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make -j3 'TESTS=tests/mantest tests/jqtest tests/onigtest tests/base64test' check"
    - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make TESTS=tests/optionaltest check || cat test-suite.log"
    - bash -lc "exec 0</dev/null && cd $APPVEYOR_BUILD_FOLDER && make TRACE_TESTS=1 TESTS=tests/shtest check || cat test-suite.log"
        
diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml

index baad2ad3ce0fb7f1c6a925e4717785def47248af..0f917426622349986708ba56d81b355497269ae9 100644 (file)
--- a/docs/content/3.manual/manual.yml
+++ b/docs/content/3.manual/manual.yml
@@ -1770,6 +1770,11 @@ sections:
  
              The input is converted to base64 as specified by RFC 4648.
  
+          * `@base64d`:
+
+            The inverse of `@base64`, input is decoded as specified by RFC 4648.
+            Note\: If the decoded string is not UTF-8, the results are undefined.
+
            This syntax can be combined with string interpolation in a
            useful way. You can follow a `@foo` token with a string
            literal. The contents of the string literal will *not* be
@@ -1799,6 +1804,14 @@ sections:
              input: "\"O'Hara's Ale\""
              output: ["\"echo 'O'\\\\''Hara'\\\\''s Ale'\""]
  
+          - program: '@base64'
+            input: '"This is a message"'
+            output: ['"VGhpcyBpcyBhIG1lc3NhZ2U="']
+
+          - program: '@base64d'
+            input: '"VGhpcyBpcyBhIG1lc3NhZ2U="'
+            output: ['"This is a message"']
+
        - title: "Dates"
          body: |
  
diff --git a/src/builtin.c b/src/builtin.c

index aa0ab4d5ebd21eb910d24e35c766705cefda9ebb..24e311e11390a0ab647ee53720a33ac7498b1f9e 100644 (file)
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -396,6 +396,24 @@ static jv f_utf8bytelength(jq_state *jq, jv input) {
  
  #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  
+static const unsigned char BASE64_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "+/";
+static const unsigned char BASE64_INVALID_ENTRY = 0xFF;
+static const unsigned char BASE64_DECODE_TABLE[255] = {
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+  62, // +
+  0xFF, 0xFF, 0xFF,
+  63, // /
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
+  0xFF, 0xFF, 0xFF,
+  99, // =
+  0xFF, 0xFF, 0xFF,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // A-Z
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // a-z
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+
  static jv escape_string(jv input, const char* escapings) {
  
    assert(jv_get_kind(input) == JV_KIND_STRING);
@@ -548,7 +566,6 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
      jv_free(fmt);
      input = f_tostring(jq, input);
      jv line = jv_string("");
-    const char b64[64 + 1] = CHARS_ALPHANUM "+/";
      const unsigned char* data = (const unsigned char*)jv_string_value(input);
      int len = jv_string_length_bytes(jv_copy(input));
      for (int i=0; i<len; i+=3) {
@@ -560,7 +577,7 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
        }
        char buf[4];
        for (int j=0; j<4; j++) {
-        buf[j] = b64[(code >> (18 - j*6)) & 0x3f];
+        buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
        }
        if (n < 3) buf[3] = '=';
        if (n < 2) buf[2] = '=';
@@ -568,6 +585,49 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
      }
      jv_free(input);
      return line;
+  } else if (!strcmp(fmt_s, "base64d")) {
+    jv_free(fmt);
+    input = f_tostring(jq, input);
+    const unsigned char* data = (const unsigned char*)jv_string_value(input);
+    int len = jv_string_length_bytes(jv_copy(input));
+    size_t decoded_len = (3 * len) / 4; // 3 usable bytes for every 4 bytes of input
+    char *result = malloc(decoded_len * sizeof(char));
+    memset(result, 0, decoded_len * sizeof(char));
+    uint32_t ri = 0;
+    int input_bytes_read=0;
+    uint32_t code = 0;
+    for (int i=0; i<len && data[i] != '='; i++) {
+      if (BASE64_DECODE_TABLE[data[i]] == BASE64_INVALID_ENTRY) {
+        free(result);
+        return type_error(input, "is not valid base64 data");
+      }
+
+      code <<= 6;
+      code |= BASE64_DECODE_TABLE[data[i]];
+      input_bytes_read++;
+
+      if (input_bytes_read == 4) {
+        result[ri++] = (code >> 16) & 0xFF;
+        result[ri++] = (code >> 8) & 0xFF;
+        result[ri++] = code & 0xFF;
+        input_bytes_read = 0;
+        code = 0;
+      }
+    }
+    if (input_bytes_read == 3) {
+      result[ri++] = (code >> 10) & 0xFF;
+      result[ri++] = (code >> 2) & 0xFF;
+    } else if (input_bytes_read == 2) {
+      result[ri++] = (code >> 4) & 0xFF;
+    } else if (input_bytes_read == 1) {
+      free(result);
+      return type_error(input, "trailing base64 byte found");
+    }
+
+    jv line = jv_string_sized(result, ri);
+    jv_free(input);
+    free(result);
+    return line;
    } else {
      jv_free(input);
      return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format")));
diff --git a/tests/base64.test b/tests/base64.test

new file mode 100644 (file)

index 0000000..0f82b0b
--- /dev/null
+++ b/tests/base64.test
@@ -0,0 +1,35 @@
+# Tests are groups of three lines: program, input, expected output
+# Blank lines and lines starting with # are ignored
+
+@base64
+"<>&'\"\t"
+"PD4mJyIJ"
+
+# decoding encoded output results in same text
+(@base64|@base64d)
+"<>&'\"\t"
+"<>&'\"\t"
+
+# regression test for #436
+@base64
+"foóbar\n"
+"Zm/Ds2Jhcgo="
+
+@base64d
+"Zm/Ds2Jhcgo="
+"foóbar\n"
+
+# optional trailing equals padding (With padding, this is cWl4YmF6Cg==)
+@base64d
+"cWl4YmF6Cg"
+"qixbaz\n"
+
+# invalid base64 characters (whitespace)
+. | try @base64d catch .
+"Not base64 data"
+"string (\"Not base64...) is not valid base64 data"
+
+# invalid base64 (too many bytes, QUJD = "ABCD"
+. | try @base64d catch .
+"QUJDa"
+"string (\"QUJDa\") trailing base64 byte found"
diff --git a/tests/base64test b/tests/base64test

new file mode 100755 (executable)

index 0000000..85fe64b
--- /dev/null
+++ b/tests/base64test
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+. "${0%/*}/setup" "$@"
+
+$VALGRIND $Q $JQ -L "$mods" --run-tests $JQTESTDIR/base64.test
diff --git a/tests/jq.test b/tests/jq.test

index fa02b6d722ac66eb08cf202132b3134c16c9add4..f510a917e593ab107aea5f0530637490bb174f65 100644 (file)
--- a/tests/jq.test
+++ b/tests/jq.test
@@ -61,7 +61,7 @@ null
  null
  "interpolation"
  
-@text,@json,([1,.] | (@csv, @tsv)),@html,@uri,@sh,@base64
+@text,@json,([1,.] | (@csv, @tsv)),@html,@uri,@sh,@base64,(@base64 | @base64d)
  "<>&'\"\t"
  "<>&'\"\t"
  "\"<>&'\\\"\\t\""
@@ -71,12 +71,17 @@ null
  "%3C%3E%26'%22%09"
  "'<>&'\\''\"\t'"
  "PD4mJyIJ"
+"<>&'\"\t"
  
  # regression test for #436
  @base64
  "foóbar\n"
  "Zm/Ds2Jhcgo="
  
+@base64d
+"Zm/Ds2Jhcgo="
+"foóbar\n"
+
  @uri
  "\u03bc"
  "%CE%BC"
author	Shaun Guth <sguth@practicefusion.com>
	Sun, 24 Jan 2016 23:30:28 +0000 (23:30 +0000)
committer	William Langford <wlangfor@gmail.com>
	Sun, 12 Feb 2017 21:03:59 +0000 (16:03 -0500)
AUTHORS		patch \| blob \| history
Makefile.am		patch \| blob \| history
appveyor.yml		patch \| blob \| history
docs/content/3.manual/manual.yml		patch \| blob \| history
src/builtin.c		patch \| blob \| history
tests/base64.test	[new file with mode: 0644]	patch \| blob
tests/base64test	[new file with mode: 0755]	patch \| blob
tests/jq.test		patch \| blob \| history