From 0062ae621834c38b7cb065e37dc8d8bbce8448f3 Mon Sep 17 00:00:00 2001 From: Andrey Hristov Date: Wed, 26 May 1999 15:22:02 +0000 Subject: [PATCH] A few changes here. First of all, as per extensive discussion on the list, the functions are now prefixed with "preg" instead of "pcre". Secondly, global matching is now possible using preg_match_all. Please, give suggestions on a better name if this one doesn't sit well with you. Possible names are preg_global_match and preg_gmatch. preg_match_all takes 4 arguments: a regex pattern, a subject string, the array for capturing subpatterns, and a parameter that tells how the results in the subpatterns array are arranged. Basically, preg_match_all will go through the subject string and try to capture all the matches that it finds, not just the first one like preg_match. 4th parameter can be PREG_PATTERN_ORDER (default) or PREG_SET_ORDER. Example: preg_match_all("|]+)>|", "
a test
", $out, PREG_PATTERN_ORDER); This returns results so that $out[0] is an array of full pattern matches, $out[1] is an array of first captured subpattern matches, and so on. $out[0] -> ("
", "
") $out[1] -> ("div align=left", "div") Example: preg_match_all("|]+)>|", "
a test
", $out, PREG_SET_ORDER); This returns results so that $out[0] is an array of first full pattern match and subpatterns, $out[1] is an array of second full pattern match and subpatterns. $out[0] -> ("
", "div align=left") $out[1] -> ("
", "div") If anyone has a better name for these PREG_ constants and also which one should be the default, I'd like to hear it. --- ext/pcre/pcre.c | 210 +++++++++++++++++++++++++++++++++----------- ext/pcre/php_pcre.h | 5 +- 2 files changed, 161 insertions(+), 54 deletions(-) diff --git a/ext/pcre/pcre.c b/ext/pcre/pcre.c index 1831f7bc16..461e5cd890 100644 --- a/ext/pcre/pcre.c +++ b/ext/pcre/pcre.c @@ -37,13 +37,17 @@ #include "php_pcre.h" +#define PREG_PATTERN_ORDER 0 +#define PREG_SET_ORDER 1 + /* {{{ module definition structures */ unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE }; function_entry pcre_functions[] = { - PHP_FE(pcre_match, third_arg_force_ref) - PHP_FE(pcre_replace, NULL) + PHP_FE(preg_match, third_arg_force_ref) + PHP_FE(preg_match_all, third_arg_force_ref) + PHP_FE(preg_replace, NULL) {NULL, NULL, NULL} }; @@ -55,6 +59,7 @@ php3_module_entry pcre_module_entry = { /* }}} */ + #ifdef ZTS int pcre_globals_id; #else @@ -118,6 +123,9 @@ int php_minit_pcre(INIT_FUNC_ARGS) #else zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1); #endif + + REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); return SUCCESS; } /* }}} */ @@ -268,23 +276,31 @@ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) { /* }}} */ -/* {{{ proto pcre_match(string pattern, string subject [, array subpatterns ]) - Perform a Perl-style regular expression match */ -PHP_FUNCTION(pcre_match) +/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */ +void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) { zval *regex, /* Regular expression */ *subject, /* String to match against */ - *subpats = NULL; /* Array for subpatterns */ + *subpats = NULL, /* Array for subpatterns */ + *subpats_order, /* Order of the results in the subpatterns + array for global match */ + *result_set, /* Holds a set of subpatterns after + a global match */ + **match_sets; /* An array of sets of matches for each + subpattern after a global match */ pcre *re = NULL; /* Compiled regular expression */ pcre_extra *extra = NULL; /* Holds results of studying */ int exoptions = 0; /* Execution options */ - int count; /* Count of matched subpatterns */ + int count = 0; /* Count of matched subpatterns */ int *offsets; /* Array of subpattern offsets */ + int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ int matched; /* Has anything matched */ int i; + int subpats_order_val; /* Integer value of subpats_order */ const char **stringlist; /* Used to hold list of subpatterns */ - + int subject_offset; /* Current position in the subject string */ + /* Get function parameters and do error-checking. */ switch(ARG_COUNT(ht)) { case 2: @@ -297,10 +313,30 @@ PHP_FUNCTION(pcre_match) if (getParameters(ht, 3, ®ex, &subject, &subpats) == FAILURE) { WRONG_PARAM_COUNT; } + if (global) + subpats_order_val = PREG_PATTERN_ORDER; + if (!ParameterPassedByReference(ht, 3)) { + zend_error(E_WARNING, "Array to be filled with matches must be passed by reference."); + RETURN_FALSE; + } + break; + + case 4: + if (getParameters(ht, 4, ®ex, &subject, &subpats, &subpats_order) == FAILURE) { + WRONG_PARAM_COUNT; + } if (!ParameterPassedByReference(ht, 3)) { zend_error(E_WARNING, "Array to be filled with matches must be passed by reference."); RETURN_FALSE; } + + /* Make sure subpats_order is a number */ + convert_to_long(subpats_order); + subpats_order_val = subpats_order->value.lval; + if (subpats_order_val < PREG_PATTERN_ORDER || + subpats_order_val > PREG_SET_ORDER) { + zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()"); + } break; default: @@ -311,68 +347,138 @@ PHP_FUNCTION(pcre_match) convert_to_string(regex); convert_to_string(subject); + /* Make sure to clean up the passed array and initialize it. */ + if (subpats != NULL) { + zval_dtor(subpats); + array_init(subpats); + } + /* Compile regex or get it from cache. */ if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL) return; - + /* Calculate the size of the offsets array, and allocate memory for it. */ - size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3; + num_subpats = pcre_info(re, NULL, NULL) + 1; + size_offsets = num_subpats * 3; offsets = (int *)emalloc(size_offsets * sizeof(int)); - - /* Execute the regular expression. */ - count = pcre_exec(re, extra, subject->value.str.val, subject->value.str.len, - exoptions, offsets, size_offsets); - - /* Check for too many substrings condition. */ - if (count == 0) { - zend_error(E_NOTICE, "Matched, but too many substrings\n"); - count = size_offsets/3; + + /* Allocate match sets array and initialize the values */ + if (global && subpats_order_val == PREG_PATTERN_ORDER) { + match_sets = (zval **)emalloc(num_subpats * sizeof(zval *)); + for (i=0; iis_ref = 0; + match_sets[i]->refcount = 1; + } } + + /* Start from the beginning of the string */ + subject_offset = 0; - /* If something has matched */ - if (count >= 0) { - matched = 1; - - /* If subpatters array has been passed, fill it in with values. */ - if (subpats != NULL) { - /* Try to get the list of substrings and display a warning if failed. */ - if (pcre_get_substring_list(subject->value.str.val, offsets, count, &stringlist) < 0) { - efree(offsets); - efree(re); - zend_error(E_WARNING, "Get subpatterns list failed"); - return; - } + do { + /* Execute the regular expression. */ + count = pcre_exec(re, extra, &subject->value.str.val[subject_offset], + subject->value.str.len-subject_offset, + (subject_offset ? exoptions|PCRE_NOTBOL : exoptions), + offsets, size_offsets); - /* Make sure to clean up the passed array and initialize it. */ - zval_dtor(subpats); - array_init(subpats); + /* Check for too many substrings condition. */ + if (count == 0) { + zend_error(E_NOTICE, "Matched, but too many substrings\n"); + count = size_offsets/3; + } + + /* If something has matched */ + if (count >= 0) { + matched = 1; + + /* If subpatters array has been passed, fill it in with values. */ + if (subpats != NULL) { + /* Try to get the list of substrings and display a warning if failed. */ + if (pcre_get_substring_list(&subject->value.str.val[subject_offset], + offsets, count, &stringlist) < 0) { + efree(offsets); + efree(re); + zend_error(E_WARNING, "Get subpatterns list failed"); + return; + } - /* For each subpattern, insert it into the subpatterns array. */ - for (i=0; iis_ref = 0; + result_set->refcount = 1; + + /* Add all the subpatterns to it */ + for (i=0; ivalue.ht, &result_set, + sizeof(zval *), NULL); + } + } + else { /* single pattern matching */ + /* For each subpattern, insert it into the subpatterns array. */ + for (i=0; i= 0); - /* Make sure to clean up the passed array and initialize it - to empty since we don't want to leave previous values in it. */ - if (subpats != NULL) { - zval_dtor(subpats); - array_init(subpats); + /* Add the match sets to the output array and clean up */ + if (global && subpats_order_val == PREG_PATTERN_ORDER) { + for (i=0; ivalue.ht, &match_sets[i], sizeof(zval *), NULL); } + efree(match_sets); } - + efree(offsets); RETVAL_LONG(matched); } /* }}} */ + +/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ]) + Perform a Perl-style regular expression match */ +PHP_FUNCTION(preg_match) +{ + _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + + +/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns, integer order) + Perform a Perl-style global regular expression match */ +PHP_FUNCTION(preg_match_all) +{ + _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + /* {{{ int _pcre_get_backref(const char *walk, int *backref) */ static int _pcre_get_backref(const char *walk, int *backref) { @@ -602,9 +708,9 @@ static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject) } -/* {{{ proto pcre_replace(string|array regex, string|array replace, string|array subject) +/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject) Perform Perl-style regular expression replacement */ -PHP_FUNCTION(pcre_replace) +PHP_FUNCTION(preg_replace) { zval *regex, *replace, diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h index 551f35b5a4..8bf2955b54 100644 --- a/ext/pcre/php_pcre.h +++ b/ext/pcre/php_pcre.h @@ -41,8 +41,9 @@ extern int php_minit_pcre(INIT_FUNC_ARGS); extern int php_mshutdown_pcre(SHUTDOWN_FUNC_ARGS); extern int php_rinit_pcre(INIT_FUNC_ARGS); -PHP_FUNCTION(pcre_match); -PHP_FUNCTION(pcre_replace); +PHP_FUNCTION(preg_match); +PHP_FUNCTION(preg_match_all); +PHP_FUNCTION(preg_replace); extern zend_module_entry pcre_module_entry; #define pcre_module_ptr &pcre_module_entry -- 2.40.0