]> granicus.if.org Git - php/commitdiff
Add unescaped Unicode encoding to json_encode(). Closes bug #53946. Patch by Irker...
authorGwynne Raskind <gwynne@php.net>
Mon, 29 Aug 2011 14:56:19 +0000 (14:56 +0000)
committerGwynne Raskind <gwynne@php.net>
Mon, 29 Aug 2011 14:56:19 +0000 (14:56 +0000)
ext/json/json.c
ext/json/php_json.h
ext/json/utf8_to_utf16.c
ext/json/utf8_to_utf16.h

index 39e3d4d7c98f302f6a26dbf1385aa12563f9bf29..3c20498aa8392eb9881a5455d4d5b4bad78efb60 100644 (file)
@@ -95,6 +95,7 @@ static PHP_MINIT_FUNCTION(json)
        REGISTER_LONG_CONSTANT("JSON_NUMERIC_CHECK", PHP_JSON_NUMERIC_CHECK, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("JSON_UNESCAPED_SLASHES", PHP_JSON_UNESCAPED_SLASHES, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("JSON_PRETTY_PRINT", PHP_JSON_PRETTY_PRINT, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE, CONST_CS | CONST_PERSISTENT);
 
        REGISTER_LONG_CONSTANT("JSON_ERROR_NONE", PHP_JSON_ERROR_NONE, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("JSON_ERROR_DEPTH", PHP_JSON_ERROR_DEPTH, CONST_CS | CONST_PERSISTENT);
@@ -346,7 +347,7 @@ static void json_encode_array(smart_str *buf, zval **val, int options TSRMLS_DC)
 
 static void json_escape_string(smart_str *buf, char *s, int len, int options TSRMLS_DC) /* {{{ */
 {
-       int pos = 0;
+       int pos = 0, ulen = 0;
        unsigned short us;
        unsigned short *utf16;
 
@@ -378,15 +379,14 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
                }
                
        }
-
-       utf16 = (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
-
-       len = utf8_to_utf16(utf16, s, len);
-       if (len <= 0) {
+       
+       utf16 = (options & PHP_JSON_UNESCAPED_UNICODE) ? NULL : (unsigned short *) safe_emalloc(len, sizeof(unsigned short), 0);
+       ulen = utf8_to_utf16(utf16, s, len);
+       if (ulen <= 0) {
                if (utf16) {
                        efree(utf16);
                }
-               if (len < 0) {
+               if (ulen < 0) {
                        JSON_G(error_code) = PHP_JSON_ERROR_UTF8;
                        if (!PG(display_errors)) {
                                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid UTF-8 sequence in argument");
@@ -397,12 +397,15 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
                }
                return;
        }
+       if (!(options & PHP_JSON_UNESCAPED_UNICODE)) {
+               len = ulen;
+       }
 
        smart_str_appendc(buf, '"');
 
        while (pos < len)
        {
-               us = utf16[pos++];
+               us = (options & PHP_JSON_UNESCAPED_UNICODE) ? s[pos++] : utf16[pos++];
 
                switch (us)
                {
@@ -479,7 +482,7 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
                                break;
 
                        default:
-                               if (us >= ' ' && (us & 127) == us) {
+                               if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) {
                                        smart_str_appendc(buf, (unsigned char) us);
                                } else {
                                        smart_str_appendl(buf, "\\u", 2);
@@ -498,7 +501,9 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
        }
 
        smart_str_appendc(buf, '"');
-       efree(utf16);
+       if (utf16) {
+               efree(utf16);
+       }
 }
 /* }}} */
 
index 6feffd668d2346554a8add4ba74acd3d23753c20..002bbe1f552c2e4d5c5116bd91a3793db6d9f63f 100644 (file)
@@ -62,6 +62,7 @@ extern zend_class_entry *php_json_serializable_ce;
 #define PHP_JSON_NUMERIC_CHECK (1<<5)
 #define PHP_JSON_UNESCAPED_SLASHES     (1<<6)
 #define PHP_JSON_PRETTY_PRINT  (1<<7)
+#define PHP_JSON_UNESCAPED_UNICODE     (1<<8)
 
 /* Internal flags */
 #define PHP_JSON_OUTPUT_ARRAY  0
index 599f0e13b48b26e57fcd755ab270375a5d413da1..508bc9368aebba6e43a74f00916e3403dea73658 100644 (file)
@@ -30,7 +30,7 @@ SOFTWARE.
 #include "utf8_decode.h"
 
 int 
-utf8_to_utf16(unsigned short w[], char p[], int length) 
+utf8_to_utf16(unsigned short *w, char p[], int length) 
 {
     int c;
     int the_index = 0;
@@ -43,14 +43,17 @@ utf8_to_utf16(unsigned short w[], char p[], int length)
             return (c == UTF8_END) ? the_index : UTF8_ERROR;
         }
         if (c < 0x10000) {
-            w[the_index] = (unsigned short)c;
+            if (w) {
+                w[the_index] = (unsigned short)c;
+            }
             the_index += 1;
         } else {
             c -= 0x10000;
-            w[the_index] = (unsigned short)(0xD800 | (c >> 10));
-            the_index += 1;
-            w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));
-            the_index += 1;
+            if (w) {
+                w[the_index] = (unsigned short)(0xD800 | (c >> 10));
+                w[the_index + 1] = (unsigned short)(0xDC00 | (c & 0x3FF));
+            }
+            the_index += 2;
         }
     }
 }
index 5aff0268bfcf0477a02b69b7f501494d528e390b..5c9685a99128f2d3933d58a855384833ff5a565b 100644 (file)
@@ -1,3 +1,3 @@
 /* utf8_to_utf16.h */
 
-extern int utf8_to_utf16(unsigned short w[], char p[], int length);
+extern int utf8_to_utf16(unsigned short *w, char p[], int length);