]> granicus.if.org Git - vim/commitdiff
patch 7.4.1434 v7.4.1434
authorBram Moolenaar <Bram@vim.org>
Sat, 27 Feb 2016 17:41:27 +0000 (18:41 +0100)
committerBram Moolenaar <Bram@vim.org>
Sat, 27 Feb 2016 17:41:27 +0000 (18:41 +0100)
Problem:    JSON encoding doesn't hanel surrogate pair.
Solution:   Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)

src/json.c
src/testdir/test_json.vim
src/version.c

index a42faec0dba5793a762dc23590845cc961cff53c..da585e306db90dbcec7d6501fd44dc17862bbeb6 100644 (file)
@@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str)
        ga_concat(gap, (char_u *)"null");
     else
     {
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+       vimconv_T   conv;
+       char_u      *converted = NULL;
+
+       convert_setup(&conv, p_enc, (char_u*)"utf-8");
+       if (conv.vc_type != CONV_NONE)
+           converted = res = string_convert(&conv, res, NULL);
+       convert_setup(&conv, NULL, NULL);
+#endif
+
        ga_append(gap, '"');
        while (*res != NUL)
        {
-           int c = PTR2CHAR(res);
+           int c;
+#ifdef FEAT_MBYTE
+           /* always use utf-8 encoding, ignore 'encoding' */
+           c = utf_ptr2char(res);
+#else
+           c = (int)*(p);
+#endif
 
            switch (c)
            {
@@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str)
                    if (c >= 0x20)
                    {
 #ifdef FEAT_MBYTE
-                       numbuf[mb_char2bytes(c, numbuf)] = NUL;
+                       numbuf[utf_char2bytes(c, numbuf)] = NUL;
 #else
                        numbuf[0] = c;
                        numbuf[1] = NUL;
@@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str)
                        ga_concat(gap, numbuf);
                    }
            }
-           mb_cptr_adv(res);
+#ifdef FEAT_MBYTE
+           res += utf_ptr2len(res);
+#else
+           ++p;
+#endif
        }
        ga_append(gap, '"');
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+       vim_free(converted);
+#endif
     }
 }
 
@@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, typval_T *res)
     int                c;
     long       nr;
     char_u     buf[NUMBUFLEN];
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    vimconv_T   conv;
+    char_u     *converted = NULL;
+#endif
 
     if (res != NULL)
        ga_init2(&ga, 1, 200);
 
     p = reader->js_buf + reader->js_used + 1; /* skip over " */
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    convert_setup(&conv, (char_u*)"utf-8", p_enc);
+    if (conv.vc_type != CONV_NONE)
+       converted = p = string_convert(&conv, p, NULL);
+    convert_setup(&conv, NULL, NULL);
+#endif
     while (*p != '"')
     {
        if (*p == NUL || p[1] == NUL
@@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, typval_T *res)
                                                     + STRLEN(reader->js_buf);
                        }
                    }
+                   nr = 0;
+                   len = 0;
                    vim_str2nr(p + 2, NULL, &len,
                                     STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
                    p += len + 2;
+                   if (0xd800 <= nr && nr <= 0xdfff
+                           && (int)(reader->js_end - p) >= 6
+                           && *p == '\\' && *(p+1) == 'u')
+                   {
+                       long    nr2 = 0;
+
+                       /* decode surrogate pair: \ud812\u3456 */
+                       len = 0;
+                       vim_str2nr(p + 2, NULL, &len,
+                                    STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
+                       if (0xdc00 <= nr2 && nr2 <= 0xdfff)
+                       {
+                           p += len + 2;
+                           nr = (((nr - 0xd800) << 10) |
+                               ((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
+                       }
+                   }
                    if (res != NULL)
                    {
 #ifdef FEAT_MBYTE
-                       buf[(*mb_char2bytes)((int)nr, buf)] = NUL;
+                       buf[utf_char2bytes((int)nr, buf)] = NUL;
                        ga_concat(&ga, buf);
 #else
                        ga_append(&ga, nr);
@@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, typval_T *res)
        }
        else
        {
-           len = MB_PTR2LEN(p);
+#ifdef FEAT_MBYTE
+           len = utf_ptr2len(p);
+#else
+           len = 1;
+#endif
            if (res != NULL)
            {
                if (ga_grow(&ga, len) == FAIL)
                {
                    ga_clear(&ga);
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+                   vim_free(converted);
+#endif
                    return FAIL;
                }
                mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
@@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, typval_T *res)
            p += len;
        }
     }
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    vim_free(converted);
+#endif
 
     reader->js_used = (int)(p - reader->js_buf);
     if (*p == '"')
index 6c19b30fd2828c1fc792f31c5d1eef33ab605db5..369bdde2fa41dd43fd27d1bf7fabe9c5bf642923 100644 (file)
@@ -12,6 +12,12 @@ let s:var4 = "\x10\x11\x12\x13\x14\x15\x16\x17"
 let s:json5 = '"\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"'
 let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
 
+" surrogate pair
+let s:jsonsp1 = '"\ud83c\udf63"'
+let s:varsp1 = "\xf0\x9f\x8d\xa3"
+let s:jsonsp2 = '"\ud83c\u00a0"'
+let s:varsp2 = "\ud83c\u00a0"
+
 let s:jsonmb = '"s¢cĴgё"'
 let s:varmb = "s¢cĴgё"
 let s:jsonnr = '1234'
@@ -69,6 +75,8 @@ func Test_json_encode()
 
   if has('multi_byte')
     call assert_equal(s:jsonmb, json_encode(s:varmb))
+    call assert_equal(s:varsp1, json_decode(s:jsonsp1))
+    call assert_equal(s:varsp2, json_decode(s:jsonsp2))
   endif
 
   call assert_equal(s:jsonnr, json_encode(s:varnr))
@@ -105,6 +113,8 @@ func Test_json_decode()
 
   if has('multi_byte')
     call assert_equal(s:varmb, json_decode(s:jsonmb))
+    call assert_equal(s:varsp1, js_decode(s:jsonsp1))
+    call assert_equal(s:varsp2, js_decode(s:jsonsp2))
   endif
 
   call assert_equal(s:varnr, json_decode(s:jsonnr))
index 480fd8a7ab689c18f90881cd8ce2af73de1aae02..afa926f4248d87faddf7d935bf1a92c212bbc4f0 100644 (file)
@@ -743,6 +743,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1434,
 /**/
     1433,
 /**/