From: Dmitry Stogov Date: Wed, 13 Jun 2018 15:26:48 +0000 (+0300) Subject: php_json_escape_string() optimization X-Git-Tag: php-7.3.0alpha2~34 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c1ce43d1d2e500aeb0143c7c0f051e40f09ce4db;p=php php_json_escape_string() optimization --- diff --git a/Zend/zend_smart_str.h b/Zend/zend_smart_str.h index 44db62cc00..aed87d773b 100644 --- a/Zend/zend_smart_str.h +++ b/Zend/zend_smart_str.h @@ -27,6 +27,8 @@ smart_str_appendl_ex((dest), (src), strlen(src), (what)) #define smart_str_appends(dest, src) \ smart_str_appendl((dest), (src), strlen(src)) +#define smart_str_extend(dest, len) \ + smart_str_extend_ex((dest), (len), 0) #define smart_str_appendc(dest, c) \ smart_str_appendc_ex((dest), (c), 0) #define smart_str_appendl(dest, src, len) \ @@ -71,6 +73,13 @@ do_smart_str_realloc: return len; } +static zend_always_inline char* smart_str_extend_ex(smart_str *dest, size_t len, zend_bool persistent) { + size_t new_len = smart_str_alloc(dest, len, persistent); + char *ret = ZSTR_VAL(dest->s) + ZSTR_LEN(dest->s); + ZSTR_LEN(dest->s) = new_len; + return ret; +} + static zend_always_inline void smart_str_free_ex(smart_str *str, zend_bool persistent) { if (str->s) { zend_string_release_ex(str->s, persistent); diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index b3d0b57a7e..c79e694f26 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -33,7 +33,7 @@ static const char digits[] = "0123456789abcdef"; static int php_json_escape_string( - smart_str *buf, char *s, size_t len, + smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); static int php_json_determine_array_type(zval *val) /* {{{ */ @@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso /* }}} */ static int php_json_escape_string( - smart_str *buf, char *s, size_t len, + smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ { int status; unsigned int us; size_t pos, checkpoint; + char *dst; if (len == 0) { smart_str_appendl(buf, "\"\"", 2); @@ -287,72 +288,89 @@ static int php_json_escape_string( do { us = (unsigned char)s[pos]; - if (us >= 0x80) { - int utf8_sub = 0; - size_t prev_pos = pos; - + if (UNEXPECTED(us >= 0x80)) { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); + len -= pos; /* check whether UTF8 character is correct */ - if (status != SUCCESS) { + if (UNEXPECTED(status != SUCCESS)) { + s += pos; + pos = 0; if (options & PHP_JSON_INVALID_UTF8_IGNORE) { /* ignore invalid UTF8 character */ continue; } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */ - us = 0xfffd; - utf8_sub = 1; - } else { - if (buf->s) { - ZSTR_LEN(buf->s) = checkpoint; + if (options & PHP_JSON_UNESCAPED_UNICODE) { + smart_str_appendl(buf, "\xef\xbf\xbd", 3); + } else { + smart_str_appendl(buf, "\\ufffd", 6); } + continue; + } else { + ZSTR_LEN(buf->s) = checkpoint; encoder->error_code = PHP_JSON_ERROR_UTF8; if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { smart_str_appendl(buf, "null", 4); } return FAILURE; } - } /* Escape U+2028/U+2029 line terminators, UNLESS both JSON_UNESCAPED_UNICODE and JSON_UNESCAPED_LINE_TERMINATORS were provided */ - if ((options & PHP_JSON_UNESCAPED_UNICODE) + } else if ((options & PHP_JSON_UNESCAPED_UNICODE) && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) || us < 0x2028 || us > 0x2029)) { - if (utf8_sub) { - smart_str_appendl(buf, "\xef\xbf\xbd", 3); - } else { - smart_str_appendl(buf, s + prev_pos, pos - prev_pos); - } + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; continue; } /* From http://en.wikipedia.org/wiki/UTF16 */ if (us >= 0x10000) { unsigned int next_us; + us -= 0x10000; next_us = (unsigned short)((us & 0x3ff) | 0xdc00); us = (unsigned short)((us >> 10) | 0xd800); - smart_str_appendl(buf, "\\u", 2); - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; us = next_us; } - smart_str_appendl(buf, "\\u", 2); - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; + s += pos; + pos = 0; } else { static const uint32_t charmap[4] = { 0xffffffff, 0x500080c4, 0x10000000, 0x00000000}; - pos++; + len--; if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { - smart_str_appendc(buf, (unsigned char) us); + pos++; } else { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } + s++; switch (us) { case '"': if (options & PHP_JSON_HEX_QUOT) { @@ -428,15 +446,22 @@ static int php_json_escape_string( default: ZEND_ASSERT(us < ' '); - smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = '0'; + dst[3] = '0'; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; break; } } } - } while (pos < len); + } while (len); + if (EXPECTED(pos)) { + smart_str_appendl(buf, s, pos); + } smart_str_appendc(buf, '"'); return SUCCESS;