]> granicus.if.org Git - php/commitdiff
Add support for PCRE marks
authorNikita Popov <nikic@php.net>
Fri, 28 Feb 2014 16:14:26 +0000 (17:14 +0100)
committerNikita Popov <nikic@php.net>
Mon, 17 Mar 2014 21:32:40 +0000 (22:32 +0100)
If a MARK is set then it will be returned in the $matches array
unter key "MARK". If no MARK is used or passed, the key will not
be set.

NEWS
UPGRADING
ext/pcre/php_pcre.c
ext/pcre/tests/marks.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index d5d1ddb8598007e0e5a6d84f09eb6b07ae6a818b..708b31cc2a9d6ff774ef21d2d7216cf90617bfac 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -64,6 +64,9 @@ PHP                                                                        NEWS
     via the new  "SNI_server_certs" SSL context option. (Daniel Lowrey)
   . Fixed bug #66833 (Default disgest algo is still MD5, switch to SHA1). (Remi)
 
+- PCRE:
+  . Added support for (*MARK) backtracking verbs. (Nikita)
+
 - PDO_pgsql:
   . Cleaned up code by increasing the requirements to libpq versions providing
     PQexecParams, PQprepare, PQescapeStringConn, PQescapeByteaConn. According
index ac31e800440695b94aec6681d619bdb4bd84ad8e..eaced08984694c57c97feb3949e7af35256256fa 100755 (executable)
--- a/UPGRADING
+++ b/UPGRADING
@@ -279,6 +279,11 @@ PHP 5.6 UPGRADE NOTES
   - The "SNI_enabled" SSL stream context option is now set to TRUE by default
     if supported by the underlying openssl library.
 
+- PCRE:
+  - The information collected by the (*MARK) backtracking control verb is now
+    collected into the "MARK" index of the $matches array for preg_match(),
+    preg_match_all() and preg_replace_callback().
+
 - Pgsql:
   - pg_insert()/pg_select()/pg_update()/pg_delete()/pg_meta_data()/pg_convert()
     are no longer EXPERIMENTAL
index 84ad12311dced69e6798705bc852b94f5b85a962..52d43ced3023f34217cf2dbd123eda7a5cc34357 100644 (file)
@@ -577,6 +577,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
        int                              i, rc;
        int                              subpats_order;         /* Order of subpattern matches */
        int                              offset_capture;    /* Capture match offsets: yes/no */
+       unsigned char   *mark = NULL;       /* Target for MARK name */
+       zval            *marks = NULL;      /* Array of marks for PREG_PATTERN_ORDER */
 
        /* Overwrite the passed-in value for subpatterns with an empty array. */
        if (subpats != NULL) {
@@ -619,6 +621,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
        }
        extra->match_limit = PCRE_G(backtrack_limit);
        extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+       extra->mark = &mark;
+       extra->flags |= PCRE_EXTRA_MARK;
+#endif
 
        /* Calculate the size of the offsets array, and allocate memory for it. */
        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
@@ -695,6 +701,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                                                                           offsets[(i<<1)+1] - offsets[i<<1], 1);
                                                        }
                                                }
+                                               /* Add MARK, if available */
+                                               if (mark) {
+                                                       if (!marks) {
+                                                               MAKE_STD_ZVAL(marks);
+                                                               array_init(marks);
+                                                       }
+                                                       add_index_string(marks, matched - 1, (char *) mark, 1);
+                                               }
                                                /*
                                                 * If the number of captured subpatterns on this run is
                                                 * less than the total possible number, pad the result
@@ -725,6 +739,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                                                                           offsets[(i<<1)+1] - offsets[i<<1], 1);
                                                        }
                                                }
+                                               /* Add MARK, if available */
+                                               if (mark) {
+                                                       add_assoc_string(result_set, "MARK", (char *) mark, 1);
+                                               }
                                                /* And add it to the output array */
                                                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
                                        }
@@ -744,6 +762,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                                                                   offsets[(i<<1)+1] - offsets[i<<1], 1);
                                                }
                                        }
+                                       /* Add MARK, if available */
+                                       if (mark) {
+                                               add_assoc_string(subpats, "MARK", (char *) mark, 1);
+                                       }
                                }
 
                                pcre_free((void *) stringlist);
@@ -784,6 +806,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
                }
                efree(match_sets);
+
+               if (marks) {
+                       add_assoc_zval(subpats, "MARK", marks);
+               }
        }
        
        efree(offsets);
@@ -855,7 +881,7 @@ static int preg_get_backref(char **str, int *backref)
 
 /* {{{ preg_do_repl_func
  */
-static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
+static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC)
 {
        zval            *retval_ptr;            /* Function return value */
        zval       **args[1];                   /* Argument to pass to function */
@@ -871,6 +897,9 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char *
                }
                add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
        }
+       if (mark) {
+               add_assoc_string(subpats, "MARK", (char *) mark, 1);
+       }
        args[0] = &subpats;
 
        if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@@ -1032,6 +1061,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
                                        *eval_result,           /* Result of eval or custom function */
                                         walk_last;                     /* Last walked character */
        int                              rc;
+       unsigned char   *mark = NULL;       /* Target for MARK name */
 
        if (extra == NULL) {
                extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
@@ -1039,6 +1069,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
        }
        extra->match_limit = PCRE_G(backtrack_limit);
        extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+       extra->mark = &mark;
+       extra->flags |= PCRE_EXTRA_MARK;
+#endif
 
        eval = pce->preg_options & PREG_REPLACE_EVAL;
        if (is_callable_replace) {
@@ -1118,7 +1152,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
                                new_len += eval_result_len;
                        } else if (is_callable_replace) {
                                /* Use custom function to get replacement string and its length. */
-                               eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
+                               eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC);
                                new_len += eval_result_len;
                        } else { /* do regular substitution */
                                walk = replace;
@@ -1517,6 +1551,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
        }
        extra->match_limit = PCRE_G(backtrack_limit);
        extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+       extra->flags &= ~PCRE_EXTRA_MARK;
+#endif
        
        /* Initialize return value */
        array_init(return_value);
@@ -1785,6 +1822,9 @@ PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
        }
        extra->match_limit = PCRE_G(backtrack_limit);
        extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+       extra->flags &= ~PCRE_EXTRA_MARK;
+#endif
 
        /* Calculate the size of the offsets array, and allocate memory for it. */
        rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt
new file mode 100644 (file)
index 0000000..8838a00
--- /dev/null
@@ -0,0 +1,202 @@
+--TEST--
+Test support for PCRE marks
+--SKIPIF--
+<?php
+if (version_compare(PCRE_VERSION, '8.1', '<')) {
+    die('skip PCRE_VERSION >= 8.1 is required for MARK support');
+}
+?>
+--FILE--
+<?php
+
+$regex = <<<'REGEX'
+/ 
+    _ (a) (*MARK:A_MARK) _
+  | _ (b) _
+  | _ (c) (*MARK:C_MARK) _
+  | _ (d) _
+/x
+REGEX;
+
+var_dump(preg_match($regex, '_c_', $matches));
+var_dump($matches);
+
+var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_PATTERN_ORDER));
+var_dump($matches);
+
+var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_SET_ORDER));
+var_dump($matches);
+
+var_dump(preg_replace_callback($regex, function($matches) {
+    var_dump($matches);
+    return $matches[0];
+}, '_a__b__c__d_'));
+
+?>
+--EXPECTF--
+int(1)
+array(5) {
+  [0]=>
+  string(3) "_c_"
+  [1]=>
+  string(0) ""
+  [2]=>
+  string(0) ""
+  [3]=>
+  string(1) "c"
+  ["MARK"]=>
+  string(6) "C_MARK"
+}
+int(4)
+array(6) {
+  [0]=>
+  array(4) {
+    [0]=>
+    string(3) "_a_"
+    [1]=>
+    string(3) "_b_"
+    [2]=>
+    string(3) "_c_"
+    [3]=>
+    string(3) "_d_"
+  }
+  [1]=>
+  array(4) {
+    [0]=>
+    string(1) "a"
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(0) ""
+    [3]=>
+    string(0) ""
+  }
+  [2]=>
+  array(4) {
+    [0]=>
+    string(0) ""
+    [1]=>
+    string(1) "b"
+    [2]=>
+    string(0) ""
+    [3]=>
+    string(0) ""
+  }
+  [3]=>
+  array(4) {
+    [0]=>
+    string(0) ""
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(1) "c"
+    [3]=>
+    string(0) ""
+  }
+  [4]=>
+  array(4) {
+    [0]=>
+    string(0) ""
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(0) ""
+    [3]=>
+    string(1) "d"
+  }
+  ["MARK"]=>
+  array(2) {
+    [0]=>
+    string(6) "A_MARK"
+    [2]=>
+    string(6) "C_MARK"
+  }
+}
+int(4)
+array(4) {
+  [0]=>
+  array(3) {
+    [0]=>
+    string(3) "_a_"
+    [1]=>
+    string(1) "a"
+    ["MARK"]=>
+    string(6) "A_MARK"
+  }
+  [1]=>
+  array(3) {
+    [0]=>
+    string(3) "_b_"
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(1) "b"
+  }
+  [2]=>
+  array(5) {
+    [0]=>
+    string(3) "_c_"
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(0) ""
+    [3]=>
+    string(1) "c"
+    ["MARK"]=>
+    string(6) "C_MARK"
+  }
+  [3]=>
+  array(5) {
+    [0]=>
+    string(3) "_d_"
+    [1]=>
+    string(0) ""
+    [2]=>
+    string(0) ""
+    [3]=>
+    string(0) ""
+    [4]=>
+    string(1) "d"
+  }
+}
+array(3) {
+  [0]=>
+  string(3) "_a_"
+  [1]=>
+  string(1) "a"
+  ["MARK"]=>
+  string(6) "A_MARK"
+}
+array(3) {
+  [0]=>
+  string(3) "_b_"
+  [1]=>
+  string(0) ""
+  [2]=>
+  string(1) "b"
+}
+array(5) {
+  [0]=>
+  string(3) "_c_"
+  [1]=>
+  string(0) ""
+  [2]=>
+  string(0) ""
+  [3]=>
+  string(1) "c"
+  ["MARK"]=>
+  string(6) "C_MARK"
+}
+array(5) {
+  [0]=>
+  string(3) "_d_"
+  [1]=>
+  string(0) ""
+  [2]=>
+  string(0) ""
+  [3]=>
+  string(0) ""
+  [4]=>
+  string(1) "d"
+}
+string(12) "_a__b__c__d_"