From beb0ef1ab2dbd9760345e3e03647b93914591d56 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Tue, 5 Apr 2022 15:07:32 +0100 Subject: [PATCH] patch 8.2.4695: JSON encoding could be faster Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes #10086) --- src/json.c | 123 +++++++++++++++++++++++++++----------- src/testdir/test_json.vim | 3 + src/version.c | 2 + 3 files changed, 93 insertions(+), 35 deletions(-) diff --git a/src/json.c b/src/json.c index 5be8f7fe6..47bf9904a 100644 --- a/src/json.c +++ b/src/json.c @@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val) } #endif +/* + * Lookup table to quickly know if the given ASCII character must be escaped. + */ +static const char ascii_needs_escape[128] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0. + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1. + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. +}; + +/* + * Encode the utf-8 encoded string "str" into "gap". + */ static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; + char_u *from; +#if defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; +#endif + int c; if (res == NULL) - ga_concat(gap, (char_u *)"\"\""); - else { -#if defined(USE_ICONV) - vimconv_T conv; - char_u *converted = NULL; + ga_concat(gap, (char_u *)"\"\""); + return; + } - if (!enc_utf8) - { - // Convert the text from 'encoding' to utf-8, the JSON string is - // always utf-8. - conv.vc_type = CONV_NONE; - convert_setup(&conv, p_enc, (char_u*)"utf-8"); - if (conv.vc_type != CONV_NONE) - converted = res = string_convert(&conv, res, NULL); - convert_setup(&conv, NULL, NULL); - } +#if defined(USE_ICONV) + if (!enc_utf8) + { + // Convert the text from 'encoding' to utf-8, because a JSON string is + // always utf-8. + conv.vc_type = CONV_NONE; + convert_setup(&conv, p_enc, (char_u*)"utf-8"); + if (conv.vc_type != CONV_NONE) + converted = res = string_convert(&conv, res, NULL); + convert_setup(&conv, NULL, NULL); + } #endif - ga_append(gap, '"'); - while (*res != NUL) + ga_append(gap, '"'); + // `from` is the beginning of a sequence of bytes we can directly copy from + // the input string, avoiding the overhead associated to decoding/encoding + // them. + from = res; + while ((c = *res) != NUL) + { + // always use utf-8 encoding, ignore 'encoding' + if (c < 0x80) { - int c; - // always use utf-8 encoding, ignore 'encoding' - c = utf_ptr2char(res); + if (!ascii_needs_escape[c]) + { + res += 1; + continue; + } + + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; switch (c) { @@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str) ga_append(gap, c); break; default: - if (c >= 0x20) - { - numbuf[utf_char2bytes(c, numbuf)] = NUL; - ga_concat(gap, numbuf); - } - else - { - vim_snprintf((char *)numbuf, NUMBUFLEN, - "\\u%04lx", (long)c); - ga_concat(gap, numbuf); - } + vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", + (long)c); + ga_concat(gap, numbuf); + } + + res += 1; + } + else + { + int l = utf_ptr2len(res); + + if (l > 1) + { + res += l; + continue; } - res += utf_ptr2len(res); + + // Invalid utf-8 sequence, replace it with the Unicode replacement + // character U+FFFD. + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; + + numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL; + ga_concat(gap, numbuf); + + res += l; } - ga_append(gap, '"'); + } + + if (res != from) + ga_concat_len(gap, from, res - from); + + ga_append(gap, '"'); #if defined(USE_ICONV) - vim_free(converted); + vim_free(converted); #endif - } } /* diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim index 0248aa9ec..3ee783703 100644 --- a/src/testdir/test_json.vim +++ b/src/testdir/test_json.vim @@ -107,6 +107,9 @@ func Test_json_encode() call assert_equal('"café"', json_encode("caf\xe9")) let &encoding = save_encoding + " Invalid utf-8 sequences are replaced with U+FFFD (replacement character) + call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB")) + call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func') call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func') diff --git a/src/version.c b/src/version.c index 3e220f581..f4d300b39 100644 --- a/src/version.c +++ b/src/version.c @@ -746,6 +746,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 4695, /**/ 4694, /**/ -- 2.40.0