From 6311581ac64372164de7ba24f086eb3b0b91eabb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 19 Mar 2019 12:11:05 +0100 Subject: [PATCH] Fix bug #73948 If PREG_UNMATCHED_AS_NULL is used, make sure that unmatched capturing groups at the end are also set to null, rather than just those in the middle. --- NEWS | 4 +- UPGRADING | 5 ++ ext/pcre/php_pcre.c | 38 ++++++++++-- ext/pcre/tests/bug61780_1.phpt | 30 ++++++++++ ext/pcre/tests/bug61780_2.phpt | 60 +++++++++++++++++++ .../tests/preg_replace_callback_flags.phpt | 8 ++- 6 files changed, 136 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index b4e5db0a24..b2eb41cbd9 100644 --- a/NEWS +++ b/NEWS @@ -67,9 +67,11 @@ PHP NEWS (Sammy Kaye Powers) - PCRE: + . Implemented FR #77094 (Support flags in preg_replace_callback). (Nikita) . Fixed bug #72685 (Repeated UTF-8 validation of same string in UTF-8 mode). (Nikita) - . Implemented FR #77094 (Support flags in preg_replace_callback). (Nikita) + . Fixed bug #73948 (Preg_match_all should return NULLs on trailing optional + capture groups). - PDO_OCI: . Support Oracle Database tracing attributes ACTION, MODULE, diff --git a/UPGRADING b/UPGRADING index d8dfe2b8a5..fc8fa459ce 100644 --- a/UPGRADING +++ b/UPGRADING @@ -50,6 +50,11 @@ PHP 7.4 UPGRADE NOTES function does not throw, so explicitly checking it is not necessary. RFC: http://php.net/manual/de/function.openssl-random-pseudo-bytes.php +- PCRE: + . When PREG_UNMATCHED_AS_NULL mode is used, trailing unmatched capturing + groups will now also be set to null (or [null, -1] if offset capture is + enabled). This means that the size of the $matches will always be the same. + - PEAR: . Installation of PEAR (including PECL) is no longer enabled by default. It can be explicitly enabled using --with-pear. This option is deprecated and diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index bdc299806c..701c7f00d2 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1028,7 +1028,7 @@ static inline void populate_match_value( static void populate_subpat_array( zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, - int count, const PCRE2_SPTR mark, zend_long flags) { + uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0; zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0; zval val; @@ -1040,6 +1040,11 @@ static void populate_subpat_array( offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null); } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, subpat_names[i], 1); + } + } } else { for (i = 0; i < count; i++) { populate_match_value( @@ -1050,6 +1055,15 @@ static void populate_subpat_array( } zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + ZVAL_NULL(&val); + if (subpat_names[i]) { + zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val); + } + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + } + } } } else { if (offset_capture) { @@ -1058,12 +1072,22 @@ static void populate_subpat_array( offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, NULL, 1); + } + } } else { for (i = 0; i < count; i++) { populate_match_value( &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_next_index_null(subpats); + } + } } } /* Add MARK, if available */ @@ -1306,7 +1330,8 @@ matched: array_init_size(&result_set, count + (mark ? 1 : 0)); mark = pcre2_get_mark(match_data); populate_subpat_array( - &result_set, subject, offsets, subpat_names, count, mark, flags); + &result_set, subject, offsets, subpat_names, + num_subpats, count, mark, flags); /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set); } @@ -1314,7 +1339,7 @@ matched: /* For each subpattern, insert it into the subpatterns array. */ mark = pcre2_get_mark(match_data); populate_subpat_array( - subpats, subject, offsets, subpat_names, count, mark, flags); + subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags); break; } } @@ -1473,14 +1498,14 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, int count, const PCRE2_SPTR mark, zend_long flags) +static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { zend_string *result_str; zval retval; /* Function return value */ zval arg; /* Argument to pass to function */ array_init_size(&arg, count + (mark ? 1 : 0)); - populate_subpat_array(&arg, subject, offsets, subpat_names, count, mark, flags); + populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags); fci->retval = &retval; fci->param_count = 1; @@ -1878,7 +1903,8 @@ matched: new_len = result_len + offsets[0] - start_offset; /* part before the match */ /* Use custom function to get replacement string and its length. */ - eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, + eval_result = preg_do_repl_func( + fci, fcc, subject, offsets, subpat_names, num_subpats, count, pcre2_get_mark(match_data), flags); ZEND_ASSERT(eval_result); diff --git a/ext/pcre/tests/bug61780_1.phpt b/ext/pcre/tests/bug61780_1.phpt index 932f43ffd6..17add891a8 100644 --- a/ext/pcre/tests/bug61780_1.phpt +++ b/ext/pcre/tests/bug61780_1.phpt @@ -145,6 +145,8 @@ array ( 0 => array ( 0 => '1', + 1 => NULL, + 2 => NULL, ), 1 => array ( @@ -156,10 +158,13 @@ array ( array ( 0 => '45', 1 => '4', + 2 => NULL, ), 3 => array ( 0 => '6', + 1 => NULL, + 2 => NULL, ), ) @@ -171,6 +176,16 @@ array ( 0 => '1', 1 => 0, ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), 1 => array ( @@ -202,6 +217,11 @@ array ( 0 => '4', 1 => 3, ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), 3 => array ( @@ -210,5 +230,15 @@ array ( 0 => '6', 1 => 5, ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), ) diff --git a/ext/pcre/tests/bug61780_2.phpt b/ext/pcre/tests/bug61780_2.phpt index acc3e96e89..01d58128c5 100644 --- a/ext/pcre/tests/bug61780_2.phpt +++ b/ext/pcre/tests/bug61780_2.phpt @@ -217,6 +217,10 @@ array ( 0 => array ( 0 => '1', + 'a' => NULL, + 1 => NULL, + 'b' => NULL, + 2 => NULL, ), 1 => array ( @@ -231,10 +235,16 @@ array ( 0 => '45', 'a' => '4', 1 => '4', + 'b' => NULL, + 2 => NULL, ), 3 => array ( 0 => '6', + 'a' => NULL, + 1 => NULL, + 'b' => NULL, + 2 => NULL, ), ) @@ -246,6 +256,26 @@ array ( 0 => '1', 1 => 0, ), + 'a' => + array ( + 0 => NULL, + 1 => -1, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 'b' => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), 1 => array ( @@ -292,6 +322,16 @@ array ( 0 => '4', 1 => 3, ), + 'b' => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), 3 => array ( @@ -300,5 +340,25 @@ array ( 0 => '6', 1 => 5, ), + 'a' => + array ( + 0 => NULL, + 1 => -1, + ), + 1 => + array ( + 0 => NULL, + 1 => -1, + ), + 'b' => + array ( + 0 => NULL, + 1 => -1, + ), + 2 => + array ( + 0 => NULL, + 1 => -1, + ), ), ) diff --git a/ext/pcre/tests/preg_replace_callback_flags.phpt b/ext/pcre/tests/preg_replace_callback_flags.phpt index f85f9a5313..7e9a130ba7 100644 --- a/ext/pcre/tests/preg_replace_callback_flags.phpt +++ b/ext/pcre/tests/preg_replace_callback_flags.phpt @@ -93,11 +93,13 @@ array(1) { } string(3) "abc" -array(2) { +array(3) { [0]=> string(1) "a" [1]=> string(1) "a" + [2]=> + NULL } array(3) { [0]=> @@ -109,11 +111,13 @@ array(3) { } string(3) "abc" -array(2) { +array(3) { [0]=> string(1) "a" [1]=> string(1) "a" + [2]=> + NULL } array(3) { [0]=> -- 2.40.0