]> granicus.if.org Git - php/commitdiff
MFB: fix #37911
authorNuno Lopes <nlopess@php.net>
Sun, 7 Oct 2007 12:09:02 +0000 (12:09 +0000)
committerNuno Lopes <nlopess@php.net>
Sun, 7 Oct 2007 12:09:02 +0000 (12:09 +0000)
ext/pcre/php_pcre.c
ext/pcre/tests/bug37911.phpt [new file with mode: 0644]

index 5fcf7e64d5381a08dec7f85831fdda65f62d5450..0dc52d5cc0fc0615a515f114d435d8c09abe266f 100644 (file)
@@ -186,6 +186,48 @@ static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
 }
 /* }}} */
 
+/* {{{ static make_subpats_table */
+static char **make_subpats_table(int num_subpats, int rc, pcre_cache_entry *pce)
+{
+       pcre_extra *extra = pce->extra;
+       int name_cnt = 0, name_size, ni = 0;
+       char *name_table;
+       unsigned short name_idx;
+       char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
+
+       rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
+       if (rc < 0) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+               efree(subpat_names);
+               return NULL;
+       }
+       if (name_cnt > 0) {
+               int rc1, rc2;
+               rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
+               rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
+               rc = rc2 ? rc2 : rc1;
+               if (rc < 0) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+                       efree(subpat_names);
+                       return NULL;
+               }
+
+               while (ni++ < name_cnt) {
+                       name_idx = 0xff * name_table[0] + name_table[1];
+                       subpat_names[name_idx] = name_table + 2;
+                       if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
+                               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
+                               efree(subpat_names);
+                               return NULL;
+                       }
+                       name_table += name_size;
+               }
+       }
+
+       return subpat_names;
+}
+/* }}} */
+
 /* {{{ pcre_get_compiled_regex_cache
  */
 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_uchar utype, char *regex, int regex_len TSRMLS_DC)
@@ -554,7 +596,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_uchar utype, char *s
        int                              g_notempty = 0;        /* If the match should not be empty */
        const char         **stringlist;                /* Holds list of subpatterns */
        char                    *match;                         /* The current match */
-       char               **subpat_names = NULL;/* Array for named subpatterns */
+       char               **subpat_names;              /* Array for named subpatterns */
        int                              i, rc;
        int                              subpats_order;         /* Order of subpattern matches */
        int                              offset_capture;    /* Capture match offsets: yes/no */
@@ -624,55 +666,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_uchar utype, char *s
        }
        num_subpats++;
        size_offsets = num_subpats * 3;
-       offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
 
        /*
         * Build a mapping from subpattern numbers to their names. We will always
         * allocate the table, even though there may be no named subpatterns. This
         * avoids somewhat more complicated logic in the inner loops.
         */
-       subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
-       memset(subpat_names, 0, sizeof(char *) * num_subpats);
-       {
-               int name_cnt = 0, name_size, ni = 0;
-               char *name_table;
-               unsigned short name_idx;
-
-               rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
-               if (rc < 0) {
-                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
-                       efree(offsets);
-                       efree(subpat_names);
-                       RETURN_FALSE;
-               }
-               if (name_cnt > 0) {
-                       int rc1, rc2;
-                       long dummy_l;
-                       double dummy_d;
-                       rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
-                       rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
-                       rc = rc2 ? rc2 : rc1;
-                       if (rc < 0) {
-                               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
-                               efree(offsets);
-                               efree(subpat_names);
-                               RETURN_FALSE;
-                       }
-
-                       while (ni++ < name_cnt) {
-                               name_idx = 0xff * name_table[0] + name_table[1];
-                               subpat_names[name_idx] = name_table + 2;
-                               if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), &dummy_l, &dummy_d, 0) > 0) {
-                                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
-                                       efree(offsets);
-                                       efree(subpat_names);
-                                       RETURN_FALSE;
-                               }
-                               name_table += name_size;
-                       }
-               }
+       subpat_names = make_subpats_table(num_subpats, rc, pce);
+       if (!subpat_names) {
+               RETURN_FALSE;
        }
 
+       offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
+
        /* Allocate match sets array and initialize the values. */
        if (global && subpats_order == PREG_PATTERN_ORDER) {
                match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
@@ -923,7 +929,7 @@ static int preg_get_backref(char **str, int *backref)
 
 /* {{{ preg_do_repl_func
  */
-static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
+static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
 {
        zval            *retval_ptr;            /* Function return value */
        zval       **args[1];                   /* Argument to pass to function */
@@ -933,8 +939,12 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, int co
 
        MAKE_STD_ZVAL(subpats);
        array_init(subpats);
-       for (i = 0; i < count; i++)
+       for (i = 0; i < count; i++) {
+               if (subpat_names[i]) {
+                       add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+               }
                add_next_index_utf8_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+       }
        args[0] = &subpats;
 
        if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@@ -1079,6 +1089,8 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char
        int                              exoptions = 0;         /* Execution options */
        int                              count = 0;                     /* Count of matched subpatterns */
        int                             *offsets;                       /* Array of subpattern offsets */
+       char                    **subpat_names;         /* Array for named subpatterns */
+       int                              num_subpats;           /* Number of captured subpatterns */
        int                              size_offsets;          /* Size of the offsets array */
        int                              new_len;                       /* Length of needed storage */
        int                              alloc_len;                     /* Actual allocated length */
@@ -1122,12 +1134,24 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char
        }
 
        /* Calculate the size of the offsets array, and allocate memory for it. */
-       rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+       rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
        if (rc < 0) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
                return NULL;
        }
-       size_offsets = (size_offsets + 1) * 3;
+       num_subpats++;
+       size_offsets = num_subpats * 3;
+
+       /*
+        * Build a mapping from subpattern numbers to their names. We will always
+        * allocate the table, even though there may be no named subpatterns. This
+        * avoids somewhat more complicated logic in the inner loops.
+        */
+       subpat_names = make_subpats_table(num_subpats, rc, pce);
+       if (!subpat_names) {
+               return NULL;
+       }
+
        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
        
        alloc_len = 2 * subject_len + 1;
@@ -1172,8 +1196,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char
                                new_len += eval_result_len;
                        } else if (is_callable_replace) {
                                /* Use custom function to get replacement string and its length. */
-                               eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
-                                                                                                       count, &eval_result TSRMLS_CC);
+                               eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
                                new_len += eval_result_len;
                        } else { /* do regular substitution */
                                walk = replace;
@@ -1293,8 +1316,9 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype, char
                /* Advance to the next piece. */
                start_offset = offsets[1];
        }
-       
+
        efree(offsets);
+       efree(subpat_names);
 
        return result;
 }
diff --git a/ext/pcre/tests/bug37911.phpt b/ext/pcre/tests/bug37911.phpt
new file mode 100644 (file)
index 0000000..d55bd87
--- /dev/null
@@ -0,0 +1,63 @@
+--TEST--
+Bug #37911 (preg_replace_callback ignores named groups)
+--FILE--
+<?php
+
+function callback($match)
+{
+       var_dump($match);
+       return $match[1].'/'.strlen($match['name']);
+}
+
+var_dump(preg_replace_callback('|(?P<name>blub)|', 'callback', 'bla blub blah'));
+
+var_dump(preg_match('|(?P<name>blub)|', 'bla blub blah', $m));
+var_dump($m);
+
+var_dump(preg_replace_callback('|(?P<1>blub)|', 'callback', 'bla blub blah'));
+
+?>
+--EXPECTF--
+array(3) {
+  [0]=>
+  string(4) "blub"
+  ["name"]=>
+  string(4) "blub"
+  [1]=>
+  string(4) "blub"
+}
+string(15) "bla blub/4 blah"
+int(1)
+array(3) {
+  [0]=>
+  string(4) "blub"
+  ["name"]=>
+  string(4) "blub"
+  [1]=>
+  string(4) "blub"
+}
+
+Warning: preg_replace_callback(): Numeric named subpatterns are not allowed in %sbug37911.php on line 14
+NULL
+--UEXPECTF--
+array(3) {
+  [0]=>
+  unicode(4) "blub"
+  [u"name"]=>
+  unicode(4) "blub"
+  [1]=>
+  unicode(4) "blub"
+}
+unicode(15) "bla blub/4 blah"
+int(1)
+array(3) {
+  [0]=>
+  unicode(4) "blub"
+  [u"name"]=>
+  unicode(4) "blub"
+  [1]=>
+  unicode(4) "blub"
+}
+
+Warning: preg_replace_callback(): Numeric named subpatterns are not allowed in %sbug37911.php on line 14
+NULL