From: Nikita Popov Date: Fri, 28 Feb 2014 16:14:26 +0000 (+0100) Subject: Add support for PCRE marks X-Git-Tag: PRE_PHPNG_MERGE~473^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=42562ee913e039ccb24ff11bc5352f0b0e02ee28;p=php Add support for PCRE marks If a MARK is set then it will be returned in the $matches array unter key "MARK". If no MARK is used or passed, the key will not be set. --- diff --git a/NEWS b/NEWS index d5d1ddb859..708b31cc2a 100644 --- a/NEWS +++ b/NEWS @@ -64,6 +64,9 @@ PHP NEWS via the new "SNI_server_certs" SSL context option. (Daniel Lowrey) . Fixed bug #66833 (Default disgest algo is still MD5, switch to SHA1). (Remi) +- PCRE: + . Added support for (*MARK) backtracking verbs. (Nikita) + - PDO_pgsql: . Cleaned up code by increasing the requirements to libpq versions providing PQexecParams, PQprepare, PQescapeStringConn, PQescapeByteaConn. According diff --git a/UPGRADING b/UPGRADING index ac31e80044..eaced08984 100755 --- a/UPGRADING +++ b/UPGRADING @@ -279,6 +279,11 @@ PHP 5.6 UPGRADE NOTES - The "SNI_enabled" SSL stream context option is now set to TRUE by default if supported by the underlying openssl library. +- PCRE: + - The information collected by the (*MARK) backtracking control verb is now + collected into the "MARK" index of the $matches array for preg_match(), + preg_match_all() and preg_replace_callback(). + - Pgsql: - pg_insert()/pg_select()/pg_update()/pg_delete()/pg_meta_data()/pg_convert() are no longer EXPERIMENTAL diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 84ad12311d..52d43ced30 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -577,6 +577,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int i, rc; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ + unsigned char *mark = NULL; /* Target for MARK name */ + zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */ /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { @@ -619,6 +621,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); @@ -695,6 +701,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + if (!marks) { + MAKE_STD_ZVAL(marks); + array_init(marks); + } + add_index_string(marks, matched - 1, (char *) mark, 1); + } /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result @@ -725,6 +739,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(result_set, "MARK", (char *) mark, 1); + } /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); } @@ -744,6 +762,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } } pcre_free((void *) stringlist); @@ -784,6 +806,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); } efree(match_sets); + + if (marks) { + add_assoc_zval(subpats, "MARK", marks); + } } efree(offsets); @@ -855,7 +881,7 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -871,6 +897,9 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char * } add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); } + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -1032,6 +1061,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ int rc; + unsigned char *mark = NULL; /* Target for MARK name */ if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; @@ -1039,6 +1069,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif eval = pce->preg_options & PREG_REPLACE_EVAL; if (is_callable_replace) { @@ -1118,7 +1152,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub new_len += eval_result_len; } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); + eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1517,6 +1551,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif /* Initialize return value */ array_init(return_value); @@ -1785,6 +1822,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt new file mode 100644 index 0000000000..8838a00500 --- /dev/null +++ b/ext/pcre/tests/marks.phpt @@ -0,0 +1,202 @@ +--TEST-- +Test support for PCRE marks +--SKIPIF-- += 8.1 is required for MARK support'); +} +?> +--FILE-- + +--EXPECTF-- +int(1) +array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" +} +int(4) +array(6) { + [0]=> + array(4) { + [0]=> + string(3) "_a_" + [1]=> + string(3) "_b_" + [2]=> + string(3) "_c_" + [3]=> + string(3) "_d_" + } + [1]=> + array(4) { + [0]=> + string(1) "a" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + } + [2]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(1) "b" + [2]=> + string(0) "" + [3]=> + string(0) "" + } + [3]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(1) "c" + [3]=> + string(0) "" + } + [4]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "d" + } + ["MARK"]=> + array(2) { + [0]=> + string(6) "A_MARK" + [2]=> + string(6) "C_MARK" + } +} +int(4) +array(4) { + [0]=> + array(3) { + [0]=> + string(3) "_a_" + [1]=> + string(1) "a" + ["MARK"]=> + string(6) "A_MARK" + } + [1]=> + array(3) { + [0]=> + string(3) "_b_" + [1]=> + string(0) "" + [2]=> + string(1) "b" + } + [2]=> + array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" + } + [3]=> + array(5) { + [0]=> + string(3) "_d_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(1) "d" + } +} +array(3) { + [0]=> + string(3) "_a_" + [1]=> + string(1) "a" + ["MARK"]=> + string(6) "A_MARK" +} +array(3) { + [0]=> + string(3) "_b_" + [1]=> + string(0) "" + [2]=> + string(1) "b" +} +array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" +} +array(5) { + [0]=> + string(3) "_d_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(1) "d" +} +string(12) "_a__b__c__d_"