]> granicus.if.org Git - php/commitdiff
A few changes here.
authorAndrey Hristov <andrey@php.net>
Wed, 26 May 1999 15:22:02 +0000 (15:22 +0000)
committerAndrey Hristov <andrey@php.net>
Wed, 26 May 1999 15:22:02 +0000 (15:22 +0000)
First of all, as per extensive discussion on the list, the functions are now
prefixed with "preg" instead of "pcre".

Secondly, global matching is now possible using preg_match_all.  Please, give
suggestions on a better name if this one doesn't sit well with you.  Possible
names are preg_global_match and preg_gmatch.

preg_match_all takes 4 arguments: a regex pattern, a subject string,
the array for capturing subpatterns, and a parameter that tells how the results
in the subpatterns array are arranged.

Basically, preg_match_all will go through the subject string and try to capture
all the matches that it finds, not just the first one like preg_match.

4th parameter can be PREG_PATTERN_ORDER (default) or PREG_SET_ORDER.

Example:
preg_match_all("|</?([^>]+)>|", "<div align=left>a test</div>", $out, PREG_PATTERN_ORDER);

This returns results so that $out[0] is an array of full pattern matches, $out[1] is an array
of first captured subpattern matches, and so on.

$out[0] -> ("<div align=left>", "</div>")
$out[1] -> ("div align=left", "div")

Example:
preg_match_all("|</?([^>]+)>|", "<div align=left>a test</div>", $out, PREG_SET_ORDER);

This returns results so that $out[0] is an array of first full pattern match and subpatterns,
$out[1] is an array of second full pattern match and subpatterns.

$out[0] -> ("<div align=left>", "div align=left")
$out[1] -> ("</div>", "div")

If anyone has a better name for these PREG_ constants and also which one should be the default,
I'd like to hear it.

ext/pcre/pcre.c
ext/pcre/php_pcre.h

index 1831f7bc16ce77fc618b36608a91b444d0c8aa21..461e5cd8908bddecccb11d10c22cca82ef817f81 100644 (file)
 
 #include "php_pcre.h"
 
+#define PREG_PATTERN_ORDER     0
+#define PREG_SET_ORDER         1
+
 /* {{{ module definition structures */
 
 unsigned char third_arg_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE };
 
 function_entry pcre_functions[] = {
-       PHP_FE(pcre_match,              third_arg_force_ref)
-       PHP_FE(pcre_replace,    NULL)
+       PHP_FE(preg_match,              third_arg_force_ref)
+       PHP_FE(preg_match_all,  third_arg_force_ref)
+       PHP_FE(preg_replace,    NULL)
        {NULL,          NULL,           NULL}
 };
 
@@ -55,6 +59,7 @@ php3_module_entry pcre_module_entry = {
 
 /* }}} */
 
+
 #ifdef ZTS
 int pcre_globals_id;
 #else
@@ -118,6 +123,9 @@ int php_minit_pcre(INIT_FUNC_ARGS)
 #else
        zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, _php_free_pcre_cache, 1);
 #endif
+       
+       REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
        return SUCCESS;
 }
 /* }}} */
@@ -268,23 +276,31 @@ static pcre* _pcre_get_compiled_regex(char *regex, pcre_extra *extra) {
 /* }}} */
 
 
-/* {{{ proto pcre_match(string pattern, string subject [, array subpatterns ])
-   Perform a Perl-style regular expression match */
-PHP_FUNCTION(pcre_match)
+/* {{{ void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) */
+void _pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
 {
        zval                    *regex,                         /* Regular expression */
                                        *subject,                       /* String to match against */
-                                       *subpats = NULL;        /* Array for subpatterns */
+                                       *subpats = NULL,        /* Array for subpatterns */
+                                       *subpats_order,         /* Order of the results in the subpatterns
+                                                                                  array for global match */
+                                       *result_set,            /* Holds a set of subpatterns after
+                                                                                  a global match */
+                                  **match_sets;                /* An array of sets of matches for each
+                                                                                  subpattern after a global match */
        pcre                    *re = NULL;                     /* Compiled regular expression */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
        int                              exoptions = 0;         /* Execution options */
-       int                              count;                         /* Count of matched subpatterns */
+       int                              count = 0;                     /* Count of matched subpatterns */
        int                             *offsets;                       /* Array of subpattern offsets */
+       int                              num_subpats;           /* Number of captured subpatterns */
        int                              size_offsets;          /* Size of the offsets array */
        int                              matched;                       /* Has anything matched */
        int                              i;
+       int                              subpats_order_val;     /* Integer value of subpats_order */
        const char         **stringlist;                /* Used to hold list of subpatterns */
-
+       int                              subject_offset;        /* Current position in the subject string */
+       
        /* Get function parameters and do error-checking. */
        switch(ARG_COUNT(ht)) {
                case 2:
@@ -297,10 +313,30 @@ PHP_FUNCTION(pcre_match)
                        if (getParameters(ht, 3, &regex, &subject, &subpats) == FAILURE) {
                                WRONG_PARAM_COUNT;
                        }
+                       if (global)
+                               subpats_order_val = PREG_PATTERN_ORDER;
+                       if (!ParameterPassedByReference(ht, 3)) {
+                               zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
+                               RETURN_FALSE;
+                       }
+                       break;
+
+               case 4:
+                       if (getParameters(ht, 4, &regex, &subject, &subpats, &subpats_order) == FAILURE) {
+                               WRONG_PARAM_COUNT;
+                       }
                        if (!ParameterPassedByReference(ht, 3)) {
                                zend_error(E_WARNING, "Array to be filled with matches must be passed by reference.");
                                RETURN_FALSE;
                        }
+       
+                       /* Make sure subpats_order is a number */
+                       convert_to_long(subpats_order);
+                       subpats_order_val = subpats_order->value.lval;
+                       if (subpats_order_val < PREG_PATTERN_ORDER ||
+                               subpats_order_val > PREG_SET_ORDER) {
+                               zend_error(E_WARNING, "Wrong value for parameter 4 in call to preg_match_all()");
+                       }
                        break;
                        
                default:
@@ -311,68 +347,138 @@ PHP_FUNCTION(pcre_match)
        convert_to_string(regex);
        convert_to_string(subject);
 
+       /* Make sure to clean up the passed array and initialize it. */
+       if (subpats != NULL) {
+               zval_dtor(subpats);
+               array_init(subpats);
+       }
+
        /* Compile regex or get it from cache. */
        if ((re = _pcre_get_compiled_regex(regex->value.str.val, extra)) == NULL)
                return;
-       
+
        /* Calculate the size of the offsets array, and allocate memory for it. */
-       size_offsets = (pcre_info(re, NULL, NULL) + 1) * 3;
+       num_subpats = pcre_info(re, NULL, NULL) + 1;
+       size_offsets = num_subpats * 3;
        offsets = (int *)emalloc(size_offsets * sizeof(int));
-       
-       /* Execute the regular expression. */
-       count = pcre_exec(re, extra, subject->value.str.val, subject->value.str.len,
-                                         exoptions, offsets, size_offsets);
-
-       /* Check for too many substrings condition. */  
-       if (count == 0) {
-               zend_error(E_NOTICE, "Matched, but too many substrings\n");
-               count = size_offsets/3;
+
+       /* Allocate match sets array and initialize the values */
+       if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+               match_sets = (zval **)emalloc(num_subpats * sizeof(zval *));
+               for (i=0; i<num_subpats; i++) {
+                       match_sets[i] = (zval *)emalloc(sizeof(zval));
+                       array_init(match_sets[i]);
+                       match_sets[i]->is_ref = 0;
+                       match_sets[i]->refcount = 1;
+               }
        }
+
+       /* Start from the beginning of the string */
+       subject_offset = 0;
        
-       /* If something has matched */
-       if (count >= 0) {
-               matched = 1;
-
-               /* If subpatters array has been passed, fill it in with values. */
-               if (subpats != NULL) {
-                       /* Try to get the list of substrings and display a warning if failed. */
-                       if (pcre_get_substring_list(subject->value.str.val, offsets, count, &stringlist) < 0) {
-                               efree(offsets);
-                               efree(re);
-                               zend_error(E_WARNING, "Get subpatterns list failed");
-                               return;
-                       }
+       do {
+               /* Execute the regular expression. */
+               count = pcre_exec(re, extra, &subject->value.str.val[subject_offset],
+                                                 subject->value.str.len-subject_offset,
+                                                 (subject_offset ? exoptions|PCRE_NOTBOL : exoptions),
+                                                 offsets, size_offsets);
 
-                       /* Make sure to clean up the passed array and initialize it. */
-                       zval_dtor(subpats);
-                       array_init(subpats);
+               /* Check for too many substrings condition. */  
+               if (count == 0) {
+                       zend_error(E_NOTICE, "Matched, but too many substrings\n");
+                       count = size_offsets/3;
+               }
+
+               /* If something has matched */
+               if (count >= 0) {
+                       matched = 1;
+
+                       /* If subpatters array has been passed, fill it in with values. */
+                       if (subpats != NULL) {
+                               /* Try to get the list of substrings and display a warning if failed. */
+                               if (pcre_get_substring_list(&subject->value.str.val[subject_offset],
+                                               offsets, count, &stringlist) < 0) {
+                                       efree(offsets);
+                                       efree(re);
+                                       zend_error(E_WARNING, "Get subpatterns list failed");
+                                       return;
+                               }
 
-                       /* For each subpattern, insert it into the subpatterns array. */
-                       for (i=0; i<count; i++) {
-                               add_next_index_string(subpats, (char *)stringlist[i], 1);
+                               if (global) {   /* global pattern matching */
+                                       if (subpats_order_val == PREG_PATTERN_ORDER) {
+                                               /* For each subpattern, insert it into the appropriate array */
+                                               for (i=0; i<count; i++) {
+                                                       add_next_index_string(match_sets[i], (char *)stringlist[i], 1);
+                                               }
+                                       }
+                                       else {
+                                               /* Allocate the result set array */
+                                               result_set = emalloc(sizeof(zval));
+                                               array_init(result_set);
+                                               result_set->is_ref = 0;
+                                               result_set->refcount = 1;
+                                               
+                                               /* Add all the subpatterns to it */
+                                               for (i=0; i<count; i++) {
+                                                       add_next_index_string(result_set, (char *)stringlist[i], 1);
+                                               }
+                                               /* And add it to the output array */
+                                               zend_hash_next_index_insert(subpats->value.ht, &result_set,
+                                                               sizeof(zval *), NULL);
+                                       }
+                               }
+                               else {                  /* single pattern matching */
+                                       /* For each subpattern, insert it into the subpatterns array. */
+                                       for (i=0; i<count; i++) {
+                                               add_next_index_string(subpats, (char *)stringlist[i], 1);
+                                       }
+                               }
+
+                               php_pcre_free(stringlist);
+                               
+                               /* Advance to the position right after the last full match */
+                               subject_offset += offsets[1];
                        }
-                       
-                       php_pcre_free(stringlist);
                }
-       }
-       /* If nothing matched */
-       else {
-               matched = 0;
+               /* If nothing matched */
+               else {
+                       matched = 0;
+               }
+       } while (global && count >= 0);
 
-               /* Make sure to clean up the passed array and initialize it
-                  to empty since we don't want to leave previous values in it. */
-               if (subpats != NULL) {
-                       zval_dtor(subpats);
-                       array_init(subpats);
+       /* Add the match sets to the output array and clean up */
+       if (global && subpats_order_val == PREG_PATTERN_ORDER) {
+               for (i=0; i<num_subpats; i++) {
+                       zend_hash_next_index_insert(subpats->value.ht, &match_sets[i], sizeof(zval *), NULL);
                }
+               efree(match_sets);
        }
-
+       
        efree(offsets);
 
        RETVAL_LONG(matched);
 }
 /* }}} */
 
+
+/* {{{ proto preg_match(string pattern, string subject [, array subpatterns ])
+   Perform a Perl-style regular expression match */
+PHP_FUNCTION(preg_match)
+{
+       _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+
+/* {{{ proto preg_match_all(string pattern, string subject, array subpatterns, integer order)
+   Perform a Perl-style global regular expression match */
+PHP_FUNCTION(preg_match_all)
+{
+       _pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
+
 /* {{{ int _pcre_get_backref(const char *walk, int *backref) */
 static int _pcre_get_backref(const char *walk, int *backref)
 {
@@ -602,9 +708,9 @@ static char *_php_replace_in_subject(zval *regex, zval *replace, zval *subject)
 }
 
 
-/* {{{ proto pcre_replace(string|array regex, string|array replace, string|array subject)
+/* {{{ proto preg_replace(string|array regex, string|array replace, string|array subject)
     Perform Perl-style regular expression replacement */
-PHP_FUNCTION(pcre_replace)
+PHP_FUNCTION(preg_replace)
 {
        zval                    *regex,
                                        *replace,
index 551f35b5a46fa31f44ebb649b1c88bad24aca5a0..8bf2955b54769ec33abb89f9f92c1db94edd5770 100644 (file)
@@ -41,8 +41,9 @@ extern int php_minit_pcre(INIT_FUNC_ARGS);
 extern int php_mshutdown_pcre(SHUTDOWN_FUNC_ARGS);
 extern int php_rinit_pcre(INIT_FUNC_ARGS);
 
-PHP_FUNCTION(pcre_match);
-PHP_FUNCTION(pcre_replace);
+PHP_FUNCTION(preg_match);
+PHP_FUNCTION(preg_match_all);
+PHP_FUNCTION(preg_replace);
 
 extern zend_module_entry pcre_module_entry;
 #define pcre_module_ptr &pcre_module_entry