patch 8.2.4695: JSON encoding could be faster

author LemonBoy <thatlemon@gmail.com>

Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)

committer Bram Moolenaar <Bram@vim.org>

Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)
author LemonBoy <thatlemon@gmail.com>
Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)
committer Bram Moolenaar <Bram@vim.org>
Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)
diff --git a/src/json.c b/src/json.c

index 5be8f7fe66ddab704dd3d013101e1b1caf5c3b23..47bf9904a312a589938a358e382d0c24b4a7a231 100644 (file)
--- a/src/json.c
+++ b/src/json.c
@@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val)
  }
  #endif
  
+/*
+ * Lookup table to quickly know if the given ASCII character must be escaped.
+ */
+static const char ascii_needs_escape[128] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+};
+
+/*
+ * Encode the utf-8 encoded string "str" into "gap".
+ */
      static void
  write_string(garray_T *gap, char_u *str)
  {
      char_u     *res = str;
      char_u     numbuf[NUMBUFLEN];
+    char_u     *from;
+#if defined(USE_ICONV)
+    vimconv_T   conv;
+    char_u     *converted = NULL;
+#endif
+    int                c;
  
      if (res == NULL)
-       ga_concat(gap, (char_u *)"\"\"");
-    else
      {
-#if defined(USE_ICONV)
-       vimconv_T   conv;
-       char_u      *converted = NULL;
+       ga_concat(gap, (char_u *)"\"\"");
+       return;
+    }
  
-       if (!enc_utf8)
-       {
-           // Convert the text from 'encoding' to utf-8, the JSON string is
-           // always utf-8.
-           conv.vc_type = CONV_NONE;
-           convert_setup(&conv, p_enc, (char_u*)"utf-8");
-           if (conv.vc_type != CONV_NONE)
-               converted = res = string_convert(&conv, res, NULL);
-           convert_setup(&conv, NULL, NULL);
-       }
+#if defined(USE_ICONV)
+    if (!enc_utf8)
+    {
+       // Convert the text from 'encoding' to utf-8, because a JSON string is
+       // always utf-8.
+       conv.vc_type = CONV_NONE;
+       convert_setup(&conv, p_enc, (char_u*)"utf-8");
+       if (conv.vc_type != CONV_NONE)
+           converted = res = string_convert(&conv, res, NULL);
+       convert_setup(&conv, NULL, NULL);
+    }
  #endif
-       ga_append(gap, '"');
-       while (*res != NUL)
+    ga_append(gap, '"');
+    // `from` is the beginning of a sequence of bytes we can directly copy from
+    // the input string, avoiding the overhead associated to decoding/encoding
+    // them.
+    from = res;
+    while ((c = *res) != NUL)
+    {
+       // always use utf-8 encoding, ignore 'encoding'
+       if (c < 0x80)
         {
-           int c;
-           // always use utf-8 encoding, ignore 'encoding'
-           c = utf_ptr2char(res);
+           if (!ascii_needs_escape[c])
+           {
+               res += 1;
+               continue;
+           }
+
+           if (res != from)
+               ga_concat_len(gap, from, res - from);
+           from = res + 1;
  
             switch (c)
             {
@@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str)
                     ga_append(gap, c);
                     break;
                 default:
-                   if (c >= 0x20)
-                   {
-                       numbuf[utf_char2bytes(c, numbuf)] = NUL;
-                       ga_concat(gap, numbuf);
-                   }
-                   else
-                   {
-                       vim_snprintf((char *)numbuf, NUMBUFLEN,
-                                                        "\\u%04lx", (long)c);
-                       ga_concat(gap, numbuf);
-                   }
+                   vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
+                                                                     (long)c);
+                   ga_concat(gap, numbuf);
+           }
+
+           res += 1;
+       }
+       else
+       {
+           int l = utf_ptr2len(res);
+
+           if (l > 1)
+           {
+               res += l;
+               continue;
             }
-           res += utf_ptr2len(res);
+
+           // Invalid utf-8 sequence, replace it with the Unicode replacement
+           // character U+FFFD.
+           if (res != from)
+               ga_concat_len(gap, from, res - from);
+           from = res + 1;
+
+           numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
+           ga_concat(gap, numbuf);
+
+           res += l;
         }
-       ga_append(gap, '"');
+    }
+
+    if (res != from)
+       ga_concat_len(gap, from, res - from);
+
+    ga_append(gap, '"');
  #if defined(USE_ICONV)
-       vim_free(converted);
+    vim_free(converted);
  #endif
-    }
  }
  
  /*
diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim

index 0248aa9ecced194289effa1fdc4cc7f0583a25d0..3ee783703336d89e0bbd9d03ddb10c2aee0c367b 100644 (file)
--- a/src/testdir/test_json.vim
+++ b/src/testdir/test_json.vim
@@ -107,6 +107,9 @@ func Test_json_encode()
    call assert_equal('"café"', json_encode("caf\xe9"))
    let &encoding = save_encoding
  
+  " Invalid utf-8 sequences are replaced with U+FFFD (replacement character)
+  call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB"))
+
    call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func')
    call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func')
  
diff --git a/src/version.c b/src/version.c

index 3e220f581f04924f7e29eb3655bc00e1161f785b..f4d300b39e0d0ea2161247700c95bfbda8bf5688 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -746,6 +746,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    4695,
  /**/
      4694,
  /**/
author	LemonBoy <thatlemon@gmail.com>
	Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)
committer	Bram Moolenaar <Bram@vim.org>
	Tue, 5 Apr 2022 14:07:32 +0000 (15:07 +0100)
src/json.c		patch \| blob \| history
src/testdir/test_json.vim		patch \| blob \| history
src/version.c		patch \| blob \| history