From: Rui Hirokawa Date: Fri, 2 Mar 2012 14:19:05 +0000 (+0000) Subject: MFH mb_ereg_replace_callback() for security enhancements. X-Git-Tag: PHP-5.4.1-RC1~95 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5d64c94f35f0880cce4aa532893940b883db3e8a;p=php MFH mb_ereg_replace_callback() for security enhancements. --- diff --git a/NEWS b/NEWS index 2dfb9351ca..b0b9d2b8a4 100644 --- a/NEWS +++ b/NEWS @@ -46,6 +46,9 @@ PHP NEWS - Zlib: . Fixed bug #61139 (gzopen leaks when specifying invalid mode). (Nikita Popov) +- mbstring: + . MFH mb_ereg_replace_callback() for security enhancements. (Rui) + 01 Mar 2012, PHP 5.4.0 - Installation: diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 058502be4f..0d2b53a7ca 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -467,6 +467,13 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3) ZEND_ARG_INFO(0, string) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3) + ZEND_ARG_INFO(0, pattern) + ZEND_ARG_INFO(0, callback) + ZEND_ARG_INFO(0, string) + ZEND_ARG_INFO(0, option) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2) ZEND_ARG_INFO(0, pattern) ZEND_ARG_INFO(0, string) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index e3adb780a4..863eebead3 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -784,7 +784,7 @@ PHP_FUNCTION(mb_eregi) /* }}} */ /* {{{ _php_mb_regex_ereg_replace_exec */ -static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options) +static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { zval **arg_pattern_zval; @@ -794,6 +794,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp char *replace; int replace_len; + zend_fcall_info arg_replace_fci; + zend_fcall_info_cache arg_replace_fci_cache; + char *string; int string_len; @@ -826,12 +829,22 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp char *option_str = NULL; int option_str_len = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s", - &arg_pattern_zval, - &replace, &replace_len, - &string, &string_len, - &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; + if (!is_callable) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s", + &arg_pattern_zval, + &replace, &replace_len, + &string, &string_len, + &option_str, &option_str_len) == FAILURE) { + RETURN_FALSE; + } + } else { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s", + &arg_pattern_zval, + &arg_replace_fci, &arg_replace_fci_cache, + &string, &string_len, + &option_str, &option_str_len) == FAILURE) { + RETURN_FALSE; + } } if (option_str != NULL) { @@ -859,7 +872,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp RETURN_FALSE; } - if (eval) { + if (eval || is_callable) { pbuf = &eval_buf; description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); } else { @@ -867,6 +880,13 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp description = NULL; } + if (is_callable) { + if (eval) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback"); + RETURN_FALSE; + } + } + /* do the actual work */ err = 0; pos = (OnigUChar *)string; @@ -889,28 +909,32 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp #endif /* copy the part of the string before the match */ smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos)); - /* copy replacement and backrefs */ - i = 0; - p = replace; - while (i < replace_len) { - int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); - n = -1; - if ((replace_len - i) >= 2 && fwd == 1 && + + if (!is_callable) { + /* copy replacement and backrefs */ + i = 0; + p = replace; + while (i < replace_len) { + int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); + n = -1; + if ((replace_len - i) >= 2 && fwd == 1 && p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { - n = p[1] - '0'; - } - if (n >= 0 && n < regs->num_regs) { - if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) { - smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]); + n = p[1] - '0'; + } + if (n >= 0 && n < regs->num_regs) { + if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) { + smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]); + } + p += 2; + i += 2; + } else { + smart_str_appendl(pbuf, p, fwd); + p += fwd; + i += fwd; } - p += 2; - i += 2; - } else { - smart_str_appendl(pbuf, p, fwd); - p += fwd; - i += fwd; } } + if (eval) { zval v; /* null terminate buffer */ @@ -928,7 +952,40 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp /* Clean up */ eval_buf.len = 0; zval_dtor(&v); + } else if (is_callable) { + zval *retval_ptr; + zval **args[1]; + zval *subpats; + int i; + + MAKE_STD_ZVAL(subpats); + array_init(subpats); + + for (i = 0; i < regs->num_regs; i++) { + add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1); + } + + args[0] = &subpats; + /* null terminate buffer */ + smart_str_0(&eval_buf); + + arg_replace_fci.param_count = 1; + arg_replace_fci.params = args; + arg_replace_fci.retval_ptr_ptr = &retval_ptr; + if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) { + convert_to_string_ex(&retval_ptr); + smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); + eval_buf.len = 0; + zval_ptr_dtor(&retval_ptr); + } else { + efree(description); + if (!EG(exception)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function"); + } + } + zval_ptr_dtor(&subpats); } + n = regs->end[0]; if ((pos - (OnigUChar *)string) < n) { pos = (OnigUChar *)string + n; @@ -969,7 +1026,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp Replace regular expression for multibyte string */ PHP_FUNCTION(mb_ereg_replace) { - _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); + _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); } /* }}} */ @@ -977,7 +1034,15 @@ PHP_FUNCTION(mb_ereg_replace) Case insensitive replace regular expression for multibyte string */ PHP_FUNCTION(mb_eregi_replace) { - _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE); + _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0); +} +/* }}} */ + +/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option]) + regular expression for multibyte string using replacement callback */ +PHP_FUNCTION(mb_ereg_replace_callback) +{ + _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); } /* }}} */ diff --git a/ext/mbstring/php_mbregex.h b/ext/mbstring/php_mbregex.h index 37b65091dd..2464c1b1c8 100644 --- a/ext/mbstring/php_mbregex.h +++ b/ext/mbstring/php_mbregex.h @@ -34,6 +34,7 @@ PHP_FE(mb_eregi, arginfo_mb_eregi) \ PHP_FE(mb_ereg_replace, arginfo_mb_ereg_replace) \ PHP_FE(mb_eregi_replace, arginfo_mb_eregi_replace) \ + PHP_FE(mb_ereg_replace_callback, arginfo_mb_ereg_replace_callback) \ PHP_FE(mb_split, arginfo_mb_split) \ PHP_FE(mb_ereg_match, arginfo_mb_ereg_match) \ PHP_FE(mb_ereg_search, arginfo_mb_ereg_search) \ @@ -81,6 +82,7 @@ PHP_FUNCTION(mb_ereg); PHP_FUNCTION(mb_eregi); PHP_FUNCTION(mb_ereg_replace); PHP_FUNCTION(mb_eregi_replace); +PHP_FUNCTION(mb_ereg_replace_callback); PHP_FUNCTION(mb_split); PHP_FUNCTION(mb_ereg_match); PHP_FUNCTION(mb_ereg_search); diff --git a/ext/mbstring/tests/mb_ereg_replace_callback.phpt b/ext/mbstring/tests/mb_ereg_replace_callback.phpt new file mode 100644 index 0000000000..98a3809574 --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_replace_callback.phpt @@ -0,0 +1,15 @@ +--TEST-- +mb_ereg_replace_callback() +--SKIPIF-- + +--FILE-- + +--EXPECT-- +abc(3) 123(3) #",;(4) $foo(4) +