From: Christoph M. Becker Date: Tue, 23 Jun 2015 17:41:02 +0000 (+0200) Subject: Merge branch 'PHP-5.6' X-Git-Tag: php-7.0.0beta1~12^2~49^2~20 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ca049e0ae97d5a176214ef3c97962093de1a7f49;p=php Merge branch 'PHP-5.6' * PHP-5.6: updated NEWS Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string) --- ca049e0ae97d5a176214ef3c97962093de1a7f49 diff --cc ext/pcre/php_pcre.c index 418859f5d2,f7afc47458..59a0aa569f --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@@ -233,9 -225,28 +233,28 @@@ static char **make_subpats_table(int nu } /* }}} */ + /* {{{ static calculate_unit_length */ + /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */ + static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start) + { + int unit_len; + + if (pce->compile_options & PCRE_UTF8) { + char *end = start; + + /* skip continuation bytes */ + while ((*++end & 0xC0) == 0x80); + unit_len = end - start; + } else { + unit_len = 1; + } + return unit_len; + } + /* }}} */ + /* {{{ pcre_get_compiled_regex_cache */ -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) { pcre *re = NULL; pcre_extra *extra; @@@ -854,8 -799,10 +873,10 @@@ PHPAPI void php_pcre_match_impl(pcre_ca the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { + int unit_len = calculate_unit_length(pce, subject + start_offset); + - offsets[0] = start_offset; - offsets[1] = start_offset + unit_len; + offsets[0] = (int)start_offset; - offsets[1] = (int)(start_offset + 1); ++ offsets[1] = (int)(start_offset + unit_len); } else break; } else { @@@ -1247,29 -1261,25 +1268,31 @@@ PHPAPI zend_string *php_pcre_replace_im the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (g_notempty != 0 && start_offset < subject_len) { + int unit_len = calculate_unit_length(pce, piece); + offsets[0] = start_offset; - offsets[1] = start_offset + 1; - memcpy(&result->val[result_len], piece, 1); - result_len++; + offsets[1] = start_offset + unit_len; - memcpy(&result[*result_len], piece, unit_len); - *result_len += unit_len; ++ memcpy(&result->val[result_len], piece, unit_len); ++ result_len += unit_len; } else { - new_len = *result_len + subject_len - start_offset; - if (new_len + 1 > alloc_len) { - alloc_len = new_len + 1; /* now we know exactly how long it is */ - new_buf = safe_emalloc(alloc_len, sizeof(char), 0); - memcpy(new_buf, result, *result_len); - efree(result); - result = new_buf; + if (!result && subject_str) { + result = zend_string_copy(subject_str); + break; + } + new_len = result_len + subject_len - start_offset; + if (new_len > alloc_len) { + alloc_len = new_len; /* now we know exactly how long it is */ + if (NULL != result) { + result = zend_string_realloc(result, alloc_len, 0); + } else { + result = zend_string_alloc(alloc_len, 0); + } } /* stick that last bit of string on our output */ - memcpy(&result[*result_len], piece, subject_len - start_offset); - *result_len += subject_len - start_offset; - result[*result_len] = '\0'; + memcpy(&result->val[result_len], piece, subject_len - start_offset); + result_len += subject_len - start_offset; + result->val[result_len] = '\0'; + result->len = result_len; break; } } else {