]> granicus.if.org Git - php/commitdiff
MFH
authorAndrei Zmievski <andrei@php.net>
Tue, 9 May 2006 18:29:26 +0000 (18:29 +0000)
committerAndrei Zmievski <andrei@php.net>
Tue, 9 May 2006 18:29:26 +0000 (18:29 +0000)
ext/pcre/php_pcre.c
ext/pcre/php_pcre.h

index 9998be2cee121074a5eb7635d2d74876623f7298..8de4bc779703ae41663712cae1959a3e37d71dbe 100644 (file)
@@ -23,6 +23,7 @@
 #endif
 
 #include "php.h"
+#include "php_ini.h"
 #include "php_globals.h"
 #include "php_pcre.h"
 #include "ext/standard/info.h"
 
 #define PREG_GREP_INVERT                       (1<<0)
 
+#define PCRE_CACHE_SIZE 4096
+
+enum {
+       PHP_PCRE_NO_ERROR = 0,
+       PHP_PCRE_INTERNAL_ERROR,
+       PHP_PCRE_BACKTRACK_LIMIT_ERROR,
+       PHP_PCRE_RECURSION_LIMIT_ERROR,
+       PHP_PCRE_BAD_UTF8_ERROR,
+};
+
+
+ZEND_DECLARE_MODULE_GLOBALS(pcre);
+
+
+static void pcre_handle_exec_error(int pcre_code TSRMLS_DC)
+{
+       int preg_code = 0;
+
+       switch (pcre_code) {
+               case PCRE_ERROR_MATCHLIMIT:
+                       preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
+                       break;
+
+               case PCRE_ERROR_RECURSIONLIMIT:
+                       preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
+                       break;
+
+               case PCRE_ERROR_BADUTF8:
+                       preg_code = PHP_PCRE_BAD_UTF8_ERROR;
+                       break;
+
+               default:
+                       preg_code = PHP_PCRE_INTERNAL_ERROR;
+                       break;
+       }
+
+       PCRE_G(error_code) = preg_code;
+}
 
-ZEND_DECLARE_MODULE_GLOBALS(pcre)
 
 static void php_free_pcre_cache(void *data)
 {
@@ -62,6 +100,9 @@ static void php_free_pcre_cache(void *data)
 static void php_pcre_init_globals(zend_pcre_globals *pcre_globals TSRMLS_DC)
 {
        zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
+       pcre_globals->backtrack_limit = 0;
+       pcre_globals->recursion_limit = 0;
+       pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
 }
 
 static void php_pcre_shutdown_globals(zend_pcre_globals *pcre_globals TSRMLS_DC)
@@ -69,6 +110,11 @@ static void php_pcre_shutdown_globals(zend_pcre_globals *pcre_globals TSRMLS_DC)
        zend_hash_destroy(&pcre_globals->pcre_cache);
 }
 
+PHP_INI_BEGIN()
+       STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
+       STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
+PHP_INI_END()
+
 
 /* {{{ PHP_MINFO_FUNCTION(pcre) */
 static PHP_MINFO_FUNCTION(pcre)
@@ -84,6 +130,8 @@ static PHP_MINFO_FUNCTION(pcre)
 static PHP_MINIT_FUNCTION(pcre)
 {
        ZEND_INIT_MODULE_GLOBALS(pcre, php_pcre_init_globals, php_pcre_shutdown_globals);
+
+       REGISTER_INI_ENTRIES();
        
        REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
@@ -93,6 +141,12 @@ static PHP_MINIT_FUNCTION(pcre)
        REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
 
+       REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
+
        return SUCCESS;
 }
 /* }}} */
@@ -106,12 +160,12 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
        php_pcre_shutdown_globals(&pcre_globals TSRMLS_CC);
 #endif
 
+       UNREGISTER_INI_ENTRIES();
+
        return SUCCESS;
 }
 /* }}} */
 
-#define PCRE_CACHE_SIZE 4096
-
 /* {{{ static pcre_clean_cache */
 static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
 {
@@ -306,6 +360,9 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *pr
           store the result in extra for passing to pcre_exec. */
        if (do_study) {
                *extra = pcre_study(re, soptions, &error);
+               if (*extra) {
+                       (*extra)->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+               }
                if (error != NULL) {
                        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Error while studying pattern");
                }
@@ -373,21 +430,22 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
        int                          regex_len;
        int                              subject_len;
        zval                    *subpats = NULL;        /* Array for subpatterns */
-       long                             flags;                         /* Match control flags */
+       long                     flags;                         /* Match control flags */
 
        zval                    *result_set,            /* Holds a set of subpatterns after
                                                                                   a global match */
                                   **match_sets = NULL; /* An array of sets of matches for each
                                                                                   subpattern after a global match */
        pcre                    *re = NULL;                     /* Compiled regular expression */
-       pcre_extra              *extra = NULL;          /* Holds results of studying */
+       pcre_extra              *extra = NULL;          /* Holds results of studying pattern */
+       pcre_extra               extra_data;            /* Used locally for exec options */
        int                              exoptions = 0;         /* Execution options */
        int                              preg_options = 0;      /* Custom preg options */
        int                              count = 0;                     /* Count of matched subpatterns */
        int                             *offsets;                       /* Array of subpattern offsets */
        int                              num_subpats;           /* Number of captured subpatterns */
        int                              size_offsets;          /* Size of the offsets array */
-       long                             start_offset = 0;      /* Where the new search starts */
+       long                     start_offset = 0;      /* Where the new search starts */
        int                              matched;                       /* Has anything matched */
        int                              subpats_order = 0; /* Order of subpattern matches */
        int                              offset_capture = 0;/* Capture match offsets: yes/no */
@@ -440,6 +498,13 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
                RETURN_FALSE;
        }
 
+       if (extra == NULL) {
+               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+               extra = &extra_data;
+       }
+       extra->match_limit = PCRE_G(backtrack_limit);
+       extra->match_limit_recursion = PCRE_G(recursion_limit);
+
        /* Calculate the size of the offsets array, and allocate memory for it. */
        rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
        if (rc < 0) {
@@ -512,6 +577,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
 
        match = NULL;
        matched = 0;
+       PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
        
        do {
                /* Execute the regular expression. */
@@ -525,7 +591,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
                }
 
                /* If something has matched */
-               if (count >= 0) {
+               if (count > 0) {
                        matched++;
                        match = subject + offsets[0];
 
@@ -537,7 +603,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
                                        efree(offsets);
                                        efree(re);
                                        zend_error(E_WARNING, "Get subpatterns list failed");
-                                       return;
+                                       RETURN_FALSE;
                                }
 
                                if (global) {   /* global pattern matching */
@@ -605,8 +671,7 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
 
                                pcre_free((void *) stringlist);
                        }
-               }
-               else { /* Failed to match */
+               } else if (count == PCRE_ERROR_NOMATCH) {
                        /* If we previously set PCRE_NOTEMPTY after a null match,
                           this is not necessarily the end. We need to advance
                           the start offset, and continue. Fudge the offset values
@@ -616,6 +681,9 @@ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
                                offsets[1] = start_offset + 1;
                        } else
                                break;
+               } else {
+                       pcre_handle_exec_error(count TSRMLS_CC);
+                       break;
                }
                
                /* If we have matched an empty string, mimic what Perl's /g options does.
@@ -834,6 +902,7 @@ PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
 {
        pcre                    *re = NULL;                     /* Compiled regular expression */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
+       pcre_extra               extra_data;            /* Used locally for exec options */
        int                              exoptions = 0;         /* Execution options */
        int                              preg_options = 0;      /* Custom preg options */
        int                              count = 0;                     /* Count of matched subpatterns */
@@ -866,6 +935,13 @@ PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
                return NULL;
        }
 
+       if (extra == NULL) {
+               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+               extra = &extra_data;
+       }
+       extra->match_limit = PCRE_G(backtrack_limit);
+       extra->match_limit_recursion = PCRE_G(recursion_limit);
+
        eval = preg_options & PREG_REPLACE_EVAL;
        if (is_callable_replace) {
                if (eval) {
@@ -895,6 +971,7 @@ PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
        match = NULL;
        *result_len = 0;
        start_offset = 0;
+       PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
        
        while (1) {
                /* Execute the regular expression. */
@@ -1000,7 +1077,7 @@ PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
                        if (limit != -1)
                                limit--;
 
-               } else { /* Failed to match */
+               } else if (count == PCRE_ERROR_NOMATCH) {
                        /* If we previously set PCRE_NOTEMPTY after a null match,
                           this is not necessarily the end. We need to advance
                           the start offset, and continue. Fudge the offset values
@@ -1025,6 +1102,9 @@ PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
                                result[*result_len] = '\0';
                                break;
                        }
+               } else {
+                       pcre_handle_exec_error(count TSRMLS_CC);
+                       break;
                }
                        
                /* If we have matched an empty string, mimic what Perl's /g options does.
@@ -1251,6 +1331,7 @@ PHP_FUNCTION(preg_split)
        pcre                    *re_bump = NULL;        /* Regex instance for empty matches */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
        pcre_extra              *extra_bump = NULL;     /* Almost dummy */
+       pcre_extra               extra_data;            /* Used locally for exec options */
        int                             *offsets;                       /* Array of subpattern offsets */
        int                              size_offsets;          /* Size of the offsets array */
        int                              exoptions = 0;         /* Execution options */
@@ -1297,6 +1378,13 @@ PHP_FUNCTION(preg_split)
        if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra, &preg_options, &coptions TSRMLS_CC)) == NULL) {
                RETURN_FALSE;
        }
+
+       if (extra == NULL) {
+               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+               extra = &extra_data;
+       }
+       extra->match_limit = PCRE_G(backtrack_limit);
+       extra->match_limit_recursion = PCRE_G(recursion_limit);
        
        /* Initialize return value */
        array_init(return_value);
@@ -1316,6 +1404,7 @@ PHP_FUNCTION(preg_split)
        next_offset = 0;
        last_match = Z_STRVAL_PP(subject);
        match = NULL;
+       PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
        
        /* Get next piece if no limit or limit not yet reached and something matched*/
        while ((limit_val == -1 || limit_val > 1)) {
@@ -1368,7 +1457,7 @@ PHP_FUNCTION(preg_split)
                                        }
                                }
                        }
-               } else { /* Failed to match */
+               } else if (count == PCRE_ERROR_NOMATCH) {
                        /* If we previously set PCRE_NOTEMPTY after a null match,
                           this is not necessarily the end. We need to advance
                           the start offset, and continue. Fudge the offset values
@@ -1396,6 +1485,9 @@ PHP_FUNCTION(preg_split)
                                }
                        } else
                                break;
+               } else {
+                       pcre_handle_exec_error(count TSRMLS_CC);
+                       break;
                }
 
                /* If we have matched an empty string, mimic what Perl's /g options does.
@@ -1527,6 +1619,7 @@ PHP_FUNCTION(preg_grep)
                                   **entry;                             /* An entry in the input array */
        pcre                    *re = NULL;                     /* Compiled regular expression */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
+       pcre_extra               extra_data;            /* Used locally for exec options */
        int                              preg_options = 0;      /* Custom preg options */
        int                             *offsets;                       /* Array of subpattern offsets */
        int                              size_offsets;          /* Size of the offsets array */
@@ -1564,6 +1657,13 @@ PHP_FUNCTION(preg_grep)
                RETURN_FALSE;
        }
 
+       if (extra == NULL) {
+               extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+               extra = &extra_data;
+       }
+       extra->match_limit = PCRE_G(backtrack_limit);
+       extra->match_limit_recursion = PCRE_G(recursion_limit);
+
        /* Calculate the size of the offsets array, and allocate memory for it. */
        rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
        if (rc < 0) {
@@ -1577,6 +1677,8 @@ PHP_FUNCTION(preg_grep)
        /* Initialize return array */
        array_init(return_value);
 
+       PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
+
        /* Go through the input array */
        zend_hash_internal_pointer_reset(Z_ARRVAL_PP(input));
        while(zend_hash_get_current_data(Z_ARRVAL_PP(input), (void **)&entry) == SUCCESS) {
@@ -1592,15 +1694,18 @@ PHP_FUNCTION(preg_grep)
                if (count == 0) {
                        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
                        count = size_offsets/3;
+               } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
+                       pcre_handle_exec_error(count TSRMLS_CC);
+                       break;
                }
 
                /* If the entry fits our requirements */
                if ((count > 0 && !invert) ||
-                       (count < 0 && invert)) {
+                       (count == PCRE_ERROR_NOMATCH && invert)) {
                        (*entry)->refcount++;
 
                        /* Add to return array */
-                       switch(zend_hash_get_current_key(Z_ARRVAL_PP(input), &string_key, &num_key, 0))
+                       switch (zend_hash_get_current_key(Z_ARRVAL_PP(input), &string_key, &num_key, 0))
                        {
                                case HASH_KEY_IS_STRING:
                                        zend_hash_update(Z_ARRVAL_P(return_value), string_key,
@@ -1622,6 +1727,18 @@ PHP_FUNCTION(preg_grep)
 }
 /* }}} */
 
+/* {{{ proto int preg_last_error()
+   Returns the error code of the last regexp execution. */
+PHP_FUNCTION(preg_last_error)
+{
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
+               return;
+       }
+
+       RETURN_LONG(PCRE_G(error_code));
+}
+/* }}} */
+
 /* {{{ module definition structures */
 
 zend_function_entry pcre_functions[] = {
@@ -1632,6 +1749,7 @@ zend_function_entry pcre_functions[] = {
        PHP_FE(preg_split,                              NULL)
        PHP_FE(preg_quote,                              NULL)
        PHP_FE(preg_grep,                               NULL)
+       PHP_FE(preg_last_error,                 NULL)
        {NULL,          NULL,                           NULL}
 };
 
index ab91d13e21ce3dece17cd04b227e5bd405d85b16..d6068cfb74d2c2ecbd5ee937b446d0c3043bd358 100644 (file)
@@ -61,6 +61,9 @@ typedef struct {
 
 ZEND_BEGIN_MODULE_GLOBALS(pcre)
        HashTable pcre_cache;
+       long backtrack_limit;
+       long recursion_limit;
+       int  error_code;
 ZEND_END_MODULE_GLOBALS(pcre)
 
 #ifdef ZTS