From 40fe50daf6069f73d0be43d8fcd7d5953053c10d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 11 Feb 2019 12:10:40 +0100 Subject: [PATCH] Validate pattern against mbregex encoding Oniguruma does not consistently perform this validation itself (at least on older versions), so make sure we check pattern encoding validity on the PHP side. --- ext/mbstring/php_mbregex.c | 10 +++++++--- ext/mbstring/tests/bug72994.phpt | 4 +++- ext/mbstring/tests/bug77370.phpt | 8 +++----- ext/mbstring/tests/bug77371.phpt | 5 +++-- ext/mbstring/tests/bug77381.phpt | 11 +++++++++-- ext/mbstring/tests/mb_ereg_replace_variation1.phpt | 8 ++++++-- 6 files changed, 31 insertions(+), 15 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 5b40661b09..ab4fe38df2 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -451,13 +451,18 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) { + php_error_docref(NULL, E_WARNING, + "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc)); + return NULL; + } + rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen); if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, &err_info); php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str); - retval = NULL; - goto out; + return NULL; } if (rc == MBREX(search_re)) { /* reuse the new rc? see bug #72399 */ @@ -467,7 +472,6 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl } else { retval = rc; } -out: return retval; } /* }}} */ diff --git a/ext/mbstring/tests/bug72994.phpt b/ext/mbstring/tests/bug72994.phpt index 1d37bae7be..d001fac679 100644 --- a/ext/mbstring/tests/bug72994.phpt +++ b/ext/mbstring/tests/bug72994.phpt @@ -13,5 +13,7 @@ var_dump($var1); ===DONE=== --EXPECTF-- Notice: Undefined variable: var in %s on line %d -string(0) "" + +Warning: mbereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) ===DONE=== diff --git a/ext/mbstring/tests/bug77370.phpt b/ext/mbstring/tests/bug77370.phpt index c4d25582fe..d16fcd724d 100644 --- a/ext/mbstring/tests/bug77370.phpt +++ b/ext/mbstring/tests/bug77370.phpt @@ -6,8 +6,6 @@ Bug #77370 (Buffer overflow on mb regex functions - fetch_token) ---EXPECT-- -array(1) { - [0]=> - string(0) "" -} +--EXPECTF-- +Warning: mb_split(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/bug77371.phpt b/ext/mbstring/tests/bug77371.phpt index 33e5fc115c..25f5ac9aca 100644 --- a/ext/mbstring/tests/bug77371.phpt +++ b/ext/mbstring/tests/bug77371.phpt @@ -6,5 +6,6 @@ Bug #77371 (heap buffer overflow in mb regex functions - compile_string_node) ---EXPECT-- -bool(false) \ No newline at end of file +--EXPECTF-- +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/bug77381.phpt b/ext/mbstring/tests/bug77381.phpt index cb83759fc0..9768cd34de 100644 --- a/ext/mbstring/tests/bug77381.phpt +++ b/ext/mbstring/tests/bug77381.phpt @@ -9,8 +9,15 @@ var_dump(mb_ereg("(?i)000000000000000000000\xf0","")); var_dump(mb_ereg("0000\\"."\xf5","0")); var_dump(mb_ereg("(?i)FFF00000000000000000\xfd","")); ?> ---EXPECT-- -int(1) +--EXPECTF-- +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) + +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) + +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) + +Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d bool(false) diff --git a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt index 059d2c9dd8..626e9a20e6 100644 --- a/ext/mbstring/tests/mb_ereg_replace_variation1.phpt +++ b/ext/mbstring/tests/mb_ereg_replace_variation1.phpt @@ -109,13 +109,17 @@ string(10) "string_val" string(10) "string_val" -- Iteration 4 -- -string(10) "string_val" + +Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) -- Iteration 5 -- string(10) "string_val" -- Iteration 6 -- -string(10) "string_val" + +Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d +bool(false) -- Iteration 7 -- string(10) "string_val" -- 2.40.0