From: Gwynne Raskind Date: Mon, 29 Aug 2011 14:56:19 +0000 (+0000) Subject: Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by Irker... X-Git-Tag: php-5.5.0alpha1~1473 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a1501cf142381d6e1e9a285efd3cccdde5749357;p=php Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by Irker and Gwynne. --- diff --git a/ext/json/json.c b/ext/json/json.c index 39e3d4d7c9..3c20498aa8 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -95,6 +95,7 @@ static PHP_MINIT_FUNCTION(json) REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT); @@ -346,7 +347,7 @@ static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC) static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */ { - int pos = 0; + int pos = 0, ulen = 0; unsigned short us; unsigned short *utf16; @@ -378,15 +379,14 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR } } - - utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); - - len = utf8_to_utf16(utf16, s, len); - if (len <= 0) { + + utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0); + ulen = utf8_to_utf16(utf16, s, len); + if (ulen <= 0) { if (utf16) { efree(utf16); } - if (len < 0) { + if (ulen < 0) { JSON_G(error_code) = PHP_JSON_ERROR_UTF8; if (!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument"); @@ -397,12 +397,15 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR } return; } + if (!(options & PHP_JSON_UNESCAPED_UNICODE)) { + len = ulen; + } smart_str_appendc(buf, '"'); while (pos < len) { - us = utf16[pos++]; + us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++]; switch (us) { @@ -479,7 +482,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR break; default: - if (us >= ' ' && (us & 127) == us) { + if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { smart_str_appendc(buf, (unsigned char) us); } else { smart_str_appendl(buf, "\\u", 2); @@ -498,7 +501,9 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR } smart_str_appendc(buf, '"'); - efree(utf16); + if (utf16) { + efree(utf16); + } } /* }}} */ diff --git a/ext/json/php_json.h b/ext/json/php_json.h index 6feffd668d..002bbe1f55 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -62,6 +62,7 @@ extern zend_class_entry *php_json_serializable_ce; #define PHP_JSON_NUMERIC_CHECK (1<<5) #define PHP_JSON_UNESCAPED_SLASHES (1<<6) #define PHP_JSON_PRETTY_PRINT (1<<7) +#define PHP_JSON_UNESCAPED_UNICODE (1<<8) /* Internal flags */ #define PHP_JSON_OUTPUT_ARRAY 0 diff --git a/ext/json/tests/bug53946.phpt b/ext/json/tests/bug53946.phpt new file mode 100644 index 0000000000..abbb81238b --- /dev/null +++ b/ext/json/tests/bug53946.phpt @@ -0,0 +1,16 @@ +--TEST-- +bug #53946 (json_encode() with JSON_UNESCAPED_UNICODE) +--SKIPIF-- + +--FILE-- +𝄞<")); +var_dump(json_encode("latin 1234 -/ russian мама мыла раму specialchars \x02 \x08 \n U+1D11E >𝄞<", JSON_UNESCAPED_UNICODE)); +var_dump(json_encode("ab\xE0")); +var_dump(json_encode("ab\xE0", JSON_UNESCAPED_UNICODE)); +?> +--EXPECT-- +string(156) ""latin 1234 -\/ russian \u043c\u0430\u043c\u0430 \u043c\u044b\u043b\u0430 \u0440\u0430\u043c\u0443 specialchars \u0002 \b \n U+1D11E >\ud834\udd1e<"" +string(100) ""latin 1234 -\/ russian мама мыла раму specialchars \u0002 \b \n U+1D11E >𝄞<"" +string(4) "null" +string(4) "null" diff --git a/ext/json/utf8_to_utf16.c b/ext/json/utf8_to_utf16.c index 599f0e13b4..508bc9368a 100644 --- a/ext/json/utf8_to_utf16.c +++ b/ext/json/utf8_to_utf16.c @@ -30,7 +30,7 @@ SOFTWARE. #include "utf8_decode.h" int -utf8_to_utf16(unsigned short w[], char p[], int length) +utf8_to_utf16(unsigned short *w, char p[], int length) { int c; int the_index = 0; @@ -43,14 +43,17 @@ utf8_to_utf16(unsigned short w[], char p[], int length) return (c == UTF8_END) ? the_index : UTF8_ERROR; } if (c < 0x10000) { - w[the_index] = (unsigned short)c; + if (w) { + w[the_index] = (unsigned short)c; + } the_index += 1; } else { c -= 0x10000; - w[the_index] = (unsigned short)(0xD800 | (c >> 10)); - the_index += 1; - w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF)); - the_index += 1; + if (w) { + w[the_index] = (unsigned short)(0xD800 | (c >> 10)); + w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF)); + } + the_index += 2; } } } diff --git a/ext/json/utf8_to_utf16.h b/ext/json/utf8_to_utf16.h index 5aff0268bf..5c9685a991 100644 --- a/ext/json/utf8_to_utf16.h +++ b/ext/json/utf8_to_utf16.h @@ -1,3 +1,3 @@ /* utf8_to_utf16.h */ -extern int utf8_to_utf16(unsigned short w[], char p[], int length); +extern int utf8_to_utf16(unsigned short *w, char p[], int length);