]> granicus.if.org Git - php/commitdiff
php_json_escape_string() optimization
authorDmitry Stogov <dmitry@zend.com>
Wed, 13 Jun 2018 15:26:48 +0000 (18:26 +0300)
committerDmitry Stogov <dmitry@zend.com>
Wed, 13 Jun 2018 15:26:48 +0000 (18:26 +0300)
Zend/zend_smart_str.h
ext/json/json_encoder.c

index 44db62cc00611e5df4aabf7435ffa7831dcd5273..aed87d773b02c690980c21cc9107b87b9973e92b 100644 (file)
@@ -27,6 +27,8 @@
        smart_str_appendl_ex((dest), (src), strlen(src), (what))
 #define smart_str_appends(dest, src) \
        smart_str_appendl((dest), (src), strlen(src))
+#define smart_str_extend(dest, len) \
+       smart_str_extend_ex((dest), (len), 0)
 #define smart_str_appendc(dest, c) \
        smart_str_appendc_ex((dest), (c), 0)
 #define smart_str_appendl(dest, src, len) \
@@ -71,6 +73,13 @@ do_smart_str_realloc:
        return len;
 }
 
+static zend_always_inline char* smart_str_extend_ex(smart_str *dest, size_t len, zend_bool persistent) {
+       size_t new_len = smart_str_alloc(dest, len, persistent);
+       char *ret = ZSTR_VAL(dest->s) + ZSTR_LEN(dest->s);
+       ZSTR_LEN(dest->s) = new_len;
+       return ret;
+}
+
 static zend_always_inline void smart_str_free_ex(smart_str *str, zend_bool persistent) {
        if (str->s) {
                zend_string_release_ex(str->s, persistent);
index b3d0b57a7e8a9062d3cad3830e1d9db00e92cd17..c79e694f260b052de93b6f5de89147451868ebd6 100644 (file)
@@ -33,7 +33,7 @@
 static const char digits[] = "0123456789abcdef";
 
 static int php_json_escape_string(
-               smart_str *buf, char *s, size_t len,
+               smart_str *buf, const char *s, size_t len,
                int options, php_json_encoder *encoder);
 
 static int php_json_determine_array_type(zval *val) /* {{{ */
@@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
 /* }}} */
 
 static int php_json_escape_string(
-               smart_str *buf, char *s, size_t len,
+               smart_str *buf, const char *s, size_t len,
                int options, php_json_encoder *encoder) /* {{{ */
 {
        int status;
        unsigned int us;
        size_t pos, checkpoint;
+       char *dst;
 
        if (len == 0) {
                smart_str_appendl(buf, "\"\"", 2);
@@ -287,72 +288,89 @@ static int php_json_escape_string(
 
        do {
                us = (unsigned char)s[pos];
-               if (us >= 0x80) {
-                       int utf8_sub = 0;
-                       size_t prev_pos = pos;
-
+               if (UNEXPECTED(us >= 0x80)) {
+                       if (pos) {
+                               smart_str_appendl(buf, s, pos);
+                               s += pos;
+                               pos = 0;
+                       }
                        us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
+                       len -= pos;
 
                        /* check whether UTF8 character is correct */
-                       if (status != SUCCESS) {
+                       if (UNEXPECTED(status != SUCCESS)) {
+                               s += pos;
+                               pos = 0;
                                if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
                                        /* ignore invalid UTF8 character */
                                        continue;
                                } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
                                        /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
-                                       us = 0xfffd;
-                                       utf8_sub = 1;
-                               } else {
-                                       if (buf->s) {
-                                               ZSTR_LEN(buf->s) = checkpoint;
+                                       if (options & PHP_JSON_UNESCAPED_UNICODE) {
+                                               smart_str_appendl(buf, "\xef\xbf\xbd", 3);
+                                       } else {
+                                               smart_str_appendl(buf, "\\ufffd", 6);
                                        }
+                                       continue;
+                               } else {
+                                       ZSTR_LEN(buf->s) = checkpoint;
                                        encoder->error_code = PHP_JSON_ERROR_UTF8;
                                        if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
                                                smart_str_appendl(buf, "null", 4);
                                        }
                                        return FAILURE;
                                }
-                       }
 
                        /* Escape U+2028/U+2029 line terminators, UNLESS both
                           JSON_UNESCAPED_UNICODE and
                           JSON_UNESCAPED_LINE_TERMINATORS were provided */
-                       if ((options & PHP_JSON_UNESCAPED_UNICODE)
+                       } else if ((options & PHP_JSON_UNESCAPED_UNICODE)
                            && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
                                        || us < 0x2028 || us > 0x2029)) {
-                               if (utf8_sub) {
-                                       smart_str_appendl(buf, "\xef\xbf\xbd", 3);
-                               } else {
-                                       smart_str_appendl(buf, s + prev_pos, pos - prev_pos);
-                               }
+                               smart_str_appendl(buf, s, pos);
+                               s += pos;
+                               pos = 0;
                                continue;
                        }
                        /* From http://en.wikipedia.org/wiki/UTF16 */
                        if (us >= 0x10000) {
                                unsigned int next_us;
+
                                us -= 0x10000;
                                next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
                                us = (unsigned short)((us >> 10) | 0xd800);
-                               smart_str_appendl(buf, "\\u", 2);
-                               smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
-                               smart_str_appendc(buf, digits[(us & 0xf00)  >> 8]);
-                               smart_str_appendc(buf, digits[(us & 0xf0)   >> 4]);
-                               smart_str_appendc(buf, digits[(us & 0xf)]);
+                               dst = smart_str_extend(buf, 6);
+                               dst[0] = '\\';
+                               dst[1] = 'u';
+                               dst[2] = digits[(us >> 12) & 0xf];
+                               dst[3] = digits[(us >> 8) & 0xf];
+                               dst[4] = digits[(us >> 4) & 0xf];
+                               dst[5] = digits[us & 0xf];
                                us = next_us;
                        }
-                       smart_str_appendl(buf, "\\u", 2);
-                       smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
-                       smart_str_appendc(buf, digits[(us & 0xf00)  >> 8]);
-                       smart_str_appendc(buf, digits[(us & 0xf0)   >> 4]);
-                       smart_str_appendc(buf, digits[(us & 0xf)]);
+                       dst = smart_str_extend(buf, 6);
+                       dst[0] = '\\';
+                       dst[1] = 'u';
+                       dst[2] = digits[(us >> 12) & 0xf];
+                       dst[3] = digits[(us >> 8) & 0xf];
+                       dst[4] = digits[(us >> 4) & 0xf];
+                       dst[5] = digits[us & 0xf];
+                       s += pos;
+                       pos = 0;
                } else {
                        static const uint32_t charmap[4] = {
                                0xffffffff, 0x500080c4, 0x10000000, 0x00000000};
 
-                       pos++;
+                       len--;
                        if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
-                               smart_str_appendc(buf, (unsigned char) us);
+                               pos++;
                        } else {
+                               if (pos) {
+                                       smart_str_appendl(buf, s, pos);
+                                       s += pos;
+                                       pos = 0;
+                               }
+                               s++;
                                switch (us) {
                                        case '"':
                                                if (options & PHP_JSON_HEX_QUOT) {
@@ -428,15 +446,22 @@ static int php_json_escape_string(
 
                                        default:
                                                ZEND_ASSERT(us < ' ');
-                                               smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1);
-                                               smart_str_appendc(buf, digits[(us & 0xf0)   >> 4]);
-                                               smart_str_appendc(buf, digits[(us & 0xf)]);
+                                               dst = smart_str_extend(buf, 6);
+                                               dst[0] = '\\';
+                                               dst[1] = 'u';
+                                               dst[2] = '0';
+                                               dst[3] = '0';
+                                               dst[4] = digits[(us >> 4) & 0xf];
+                                               dst[5] = digits[us & 0xf];
                                                break;
                                }
                        }
                }
-       } while (pos < len);
+       } while (len);
 
+       if (EXPECTED(pos)) {
+               smart_str_appendl(buf, s, pos);
+       }
        smart_str_appendc(buf, '"');
 
        return SUCCESS;