]> granicus.if.org Git - php/commitdiff
This submission introduces two features.
authorAndrei Zmievski <andrei@php.net>
Thu, 23 Nov 2000 17:25:13 +0000 (17:25 +0000)
committerAndrei Zmievski <andrei@php.net>
Thu, 23 Nov 2000 17:25:13 +0000 (17:25 +0000)
The first one is support for Perl-style matching regexp delimiters, i.e.
using <[{( and )}]> to delimit the regular expressions.

The second one is a new 'F' modifier that allows you to specify a function name
in the replacement argument to preg_replace(). This function will be called
when the replacement needs to be made. It is passed an array of full matched
pattern and captured subpatterns and it is expected to return a string that
will be used for replacement. 'e' and 'F' modifiers cannot be used together.

@- Implemented support for Perl-style matching regexp delimiters in PCRE.
@  You can use <{[( and )]}> to delimit your expressions now. (Andrei)

@- Introduced new 'F' modifier in PCRE that lets you specify a function
@  name in the replacement argument to preg_replace() that will be called
@  at run-time to provide the replacement string. It is passed an array of
@  matched pattern and subpatterns. (Andrei)

ext/pcre/php_pcre.c

index d4a022bbdcb7c0c4f79c2bae5c5eaa0a8762f376..53cf7da06d0ca2e7c5c2da14fe85e05a85fc846f 100644 (file)
@@ -41,6 +41,7 @@
 #define        PREG_SPLIT_NO_EMPTY     (1<<0)
 
 #define PREG_REPLACE_EVAL      (1<<0)
+#define PREG_REPLACE_FUNC      (1<<1)
 
 #ifdef ZTS
 int pcre_globals_id;
@@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
        const char                      *error;
        int                                      erroffset;
        char                             delimiter;
+       char                             start_delimiter;
+       char                             end_delimiter;
        char                            *p, *pp;
        char                            *pattern;
        int                                      regex_len;
@@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
                zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
                return NULL;
        }
-       
-       /* We need to iterate through the pattern, searching for the ending delimiter,
-          but skipping the backslashed delimiters.  If the ending delimiter is not
-          found, display a warning. */
-       pp = p;
-       while (*pp != 0) {
-               if (*pp == '\\' && pp[1] != 0) pp++;
-               else if (*pp == delimiter)
-                       break;
-               pp++;
-       }
-       if (*pp == 0) {
-               zend_error(E_WARNING, "No ending delimiter found");
-               return NULL;
+
+       start_delimiter = delimiter;
+       if ((pp = strchr("([{< )]}> )]}>", delimiter)))
+               delimiter = pp[5];
+       end_delimiter = delimiter;
+
+       if (start_delimiter == end_delimiter) {
+               /* We need to iterate through the pattern, searching for the ending delimiter,
+                  but skipping the backslashed delimiters.  If the ending delimiter is not
+                  found, display a warning. */
+               pp = p;
+               while (*pp != 0) {
+                       if (*pp == '\\' && pp[1] != 0) pp++;
+                       else if (*pp == delimiter)
+                               break;
+                       pp++;
+               }
+               if (*pp == 0) {
+                       zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter);
+                       return NULL;
+               }
+       } else {
+               /* We iterate through the pattern, searching for the matching ending
+                * delimiter. For each matching starting delimiter, we increment nesting
+                * level, and decrement it for each matching ending delimiter. If we
+                * reach the end of the pattern without matching, display a warning.
+                */
+               int brackets = 1;       /* brackets nesting level */
+               pp = p;
+               while (*pp != 0) {
+                       if (*pp == '\\' && pp[1] != 0) pp++;
+                       else if (*pp == end_delimiter && --brackets <= 0)
+                               break;
+                       else if (*pp == start_delimiter)
+                               brackets++;
+                       pp++;
+               }
+               if (*pp == 0) {
+                       zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
+                       return NULL;
+               }
        }
        
        /* Make a copy of the actual pattern. */
@@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
                        case 'X':       coptions |= PCRE_EXTRA;                 break;
 
                        /* Custom preg options */
-                       case 'e':       poptions |= PREG_REPLACE_EVAL; break;
+                       case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
+                       case 'F':       poptions |= PREG_REPLACE_FUNC;  break;
                        
                        case ' ':
                        case '\n':
@@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
                                return NULL;
                }
        }
+
+       if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) {
+               zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together");
+               efree(pattern);
+               return NULL;
+       }
        
 #if HAVE_SETLOCALE
        if (strcmp(locale, "C"))
@@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref)
        return 1;       
 }
 
+static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result)
+{
+       zval            *retval_ptr;            /* Function return value */
+       zval             function;                      /* Function to call */
+       zval            *function_ptr = &function;              /* Pointer to function to call */
+       zval       **args[0];                   /* Argument to pass to function */
+       zval            *subpats;                       /* Captured subpatterns */ 
+       int                      result_len;            /* Return value length */
+       int                      i;
+       CLS_FETCH();
+
+       ZVAL_STRING(function_ptr, function_name, 0);
+
+       MAKE_STD_ZVAL(subpats);
+       array_init(subpats);
+       for (i = 0; i < count; i++)
+               add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+       args[0] = &subpats;
+
+       if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) {
+               convert_to_string_ex(&retval_ptr);
+               *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
+               result_len = Z_STRLEN_P(retval_ptr);
+               zval_ptr_dtor(&retval_ptr);
+       } else {
+               php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name);
+               *result = empty_string;
+               result_len = 0;
+       }
+       zval_dtor(subpats);
+       FREE_ZVAL(subpats);
+
+       return result_len;
+}
 
 static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
                                                int *offsets, int count, char **result)
@@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex,   int regex_len,
        int                              size_offsets;          /* Size of the offsets array */
        int                              new_len;                       /* Length of needed storage */
        int                              alloc_len;                     /* Actual allocated length */
-       int                              eval_result_len=0;     /* Length of the eval'ed string */
+       int                              eval_result_len=0;     /* Length of the eval'ed or
+                                                                                  function-returned string */
        int                              match_len;                     /* Length of the current match */
        int                              backref;                       /* Backreference number */
        int                              eval;                          /* If the replacement string should be eval'ed */
+       int                              use_func;                      /* If the matches should be run through
+                                                                                  a function to get the replacement string */
        int                              start_offset;          /* Where the new search starts */
        int                              g_notempty = 0;        /* If the match should not be empty */
        char                    *result,                        /* Result of replacement */
@@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex,   int regex_len,
                                        *match,                         /* The current match */
                                        *piece,                         /* The current piece of subject */
                                        *replace_end,           /* End of replacement string */
-                                       *eval_result,           /* Result of eval */
+                                       *eval_result,           /* Result of eval or custom function */
                                         walk_last;                     /* Last walked character */
 
        /* Compile regex or get it from cache. */
@@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex,   int regex_len,
        start_offset = 0;
        replace_end = replace + replace_len;
        eval = preg_options & PREG_REPLACE_EVAL;
+       use_func = preg_options & PREG_REPLACE_FUNC;
        
        while (1) {
                /* Execute the regular expression. */
@@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex,   int regex_len,
                                eval_result_len = preg_do_eval(replace, replace_len, subject,
                                                                                           offsets, count, &eval_result);
                                new_len += eval_result_len;
+                       } else if (use_func) {
+                               /* Use custom function to get replacement string and its length. */
+                               eval_result_len = preg_do_repl_func(replace, subject, offsets,
+                                                                                                       count, &eval_result);
+                               new_len += eval_result_len;
                        } else { /* do regular substitution */
                                walk = replace;
                                walk_last = 0;
@@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex,   int regex_len,
                        /* copy replacement and backrefs */
                        walkbuf = result + *result_len;
                        
-                       /* If evaluating, copy result to the buffer and clean up */
-                       if (eval) {
+                       /* If evaluating or using custom function, copy result to the buffer
+                        * and clean up. */
+                       if (eval || use_func) {
                                memcpy(walkbuf, eval_result, eval_result_len);
                                *result_len += eval_result_len;
-                               efree(eval_result);
+                               STR_FREE(eval_result);
                        } else { /* do regular backreference copying */
                                walk = replace;
                                walk_last = 0;