]> granicus.if.org Git - php/commitdiff
add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns...
authorNicolas Grekas <nicolas.grekas@gmail.com>
Tue, 16 May 2017 10:46:32 +0000 (12:46 +0200)
committerRemi Collet <remi@php.net>
Mon, 29 May 2017 09:40:29 +0000 (11:40 +0200)
UPGRADING
ext/pcre/php_pcre.c
ext/pcre/tests/001.phpt
ext/pcre/tests/003.phpt
ext/pcre/tests/004.phpt
ext/pcre/tests/bug61780.phpt
ext/pcre/tests/bug61780_1.phpt
ext/pcre/tests/bug61780_2.phpt
ext/pcre/tests/marks.phpt

index 934dca8b49e2451b3cba832a50f77cf95b1036f5..9faa926f91147dfde5caef17fe45ddaa26b40932 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -56,11 +56,6 @@ PHP 7.2 UPGRADE NOTES
     parameter (assoc) is null. Previously JSON_OBJECT_AS_ARRAY was always
     ignored.
 
-- PCRE:
-  . preg_match() and other PCRE functions now distinguish between unmatched
-    subpatterns and empty matches by reporting NULL and "" (empty string),
-    respectively. Formerly, either was reported as empty string.
-
 - Session:
   . Removed register_globals related code and "!" can be used as $_SESSION key name.
   . Session is made to manage session status correctly and prevents invalid operations.
@@ -109,6 +104,9 @@ PHP 7.2 UPGRADE NOTES
 
 - PCRE:
   . Added `J` modifier for setting PCRE_DUPNAMES.
+  . Added `PREG_UNMATCHED_AS_NULL` flag to allow distinguish between unmatched
+    subpatterns and empty matches by reporting NULL and "" (empty string),
+    respectively.
 
 - Standard:
   . Simplified password hashing API updated to support Argon2i hashes when PHP is compiled with libargon2
@@ -278,6 +276,9 @@ See also: https://wiki.php.net/rfc/deprecations_php_7_2
   . IMG_EFFECT_MULTIPLY
   . IMG_BMP
 
+- PCRE
+  . PREG_UNMATCHED_AS_NULL
+
 - Standard:
   . PASSWORD_ARGON2_DEFAULT_MEMORY_COST
   . PASSWORD_ARGON2_DEFAULT_TIME_COST
index 324a4acbfb77e798c8418e3c7ac8274aa09322d6..c7d6507d77c2512cf43038dd161b50dc33a25f7d 100644 (file)
@@ -33,6 +33,7 @@
 #define PREG_PATTERN_ORDER                     1
 #define PREG_SET_ORDER                         2
 #define PREG_OFFSET_CAPTURE                    (1<<8)
+#define PREG_UNMATCHED_AS_NULL         (1<<9)
 
 #define        PREG_SPLIT_NO_EMPTY                     (1<<0)
 #define PREG_SPLIT_DELIM_CAPTURE       (1<<1)
@@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre)
        REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
        REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
@@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra,
 /* }}} */
 
 /* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
+static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
 {
        zval match_pair, tmp;
 
        array_init_size(&match_pair, 2);
 
        /* Add (match, offset) to the return value */
-       if (offset < 0) { /* unset substring */
+       if (unmatched_as_null && offset < 0) {
                ZVAL_NULL(&tmp);
        } else {
                ZVAL_STRINGL(&tmp, str, len);
@@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
 {
        zval                     result_set,            /* Holds a set of subpatterns after
                                                                                   a global match */
-                                   *match_sets = NULL; /* An array of sets of matches for each
+                                       *match_sets = NULL;     /* An array of sets of matches for each
                                                                                   subpattern after a global match */
        pcre_extra              *extra = pce->extra;/* Holds results of studying */
        pcre_extra               extra_data;            /* Used locally for exec options */
@@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
        char               **subpat_names;              /* Array for named subpatterns */
        int                              i;
        int                              subpats_order;         /* Order of subpattern matches */
-       int                              offset_capture;    /* Capture match offsets: yes/no */
-       unsigned char   *mark = NULL;       /* Target for MARK name */
-       zval            marks;                  /* Array of marks for PREG_PATTERN_ORDER */
+       int                              offset_capture;        /* Capture match offsets: yes/no */
+       int                              unmatched_as_null;     /* Null non-matches: yes/no */
+       unsigned char   *mark = NULL;           /* Target for MARK name */
+       zval                     marks;                         /* Array of marks for PREG_PATTERN_ORDER */
        ALLOCA_FLAG(use_heap);
 
        ZVAL_UNDEF(&marks);
@@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
 
        if (use_flags) {
                offset_capture = flags & PREG_OFFSET_CAPTURE;
+               unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
 
                /*
                 * subpats_order is pre-set to pattern mode so we change it only if
@@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                }
        } else {
                offset_capture = 0;
+               unmatched_as_null = 0;
        }
 
        /* Negative offset counts from the end of the string. */
@@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                if (offset_capture) {
                                                        for (i = 0; i < count; i++) {
                                                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
-                                                                                               offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+                                                                                               offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
                                                        }
                                                } else {
                                                        for (i = 0; i < count; i++) {
-                                                               if (offsets[i<<1] < 0) { /* unset substring */
+                                                               if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                        add_next_index_null(&match_sets[i]);
                                                                } else {
                                                                        add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
@@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                /*
                                                 * If the number of captured subpatterns on this run is
                                                 * less than the total possible number, pad the result
-                                                * arrays with NULLs.
+                                                * arrays with NULLs or empty strings.
                                                 */
                                                if (count < num_subpats) {
                                                        for (; i < num_subpats; i++) {
-                                                               add_next_index_null(&match_sets[i]);
+                                                               if (unmatched_as_null) {
+                                                                       add_next_index_null(&match_sets[i]);
+                                                               } else {
+                                                                       add_next_index_string(&match_sets[i], "");
+                                                               }
                                                        }
                                                }
                                        } else {
@@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                        if (offset_capture) {
                                                                for (i = 0; i < count; i++) {
                                                                        add_offset_pair(&result_set, (char *)stringlist[i],
-                                                                                                       offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
+                                                                                                       offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
                                                                }
                                                        } else {
                                                                for (i = 0; i < count; i++) {
                                                                        if (subpat_names[i]) {
-                                                                       if (offsets[i<<1] < 0) { /* unset substring */
-                                                                               add_assoc_null(&result_set, subpat_names[i]);
-                                                                       } else {
+                                                                               if (unmatched_as_null && offsets[i<<1] < 0) {
+                                                                                       add_assoc_null(&result_set, subpat_names[i]);
+                                                                               } else {
                                                                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
-                                                                                                                                  offsets[(i<<1)+1] - offsets[i<<1]);
-                                                                       }
+                                                                                                                         offsets[(i<<1)+1] - offsets[i<<1]);
+                                                                               }
                                                                        }
-                                                                       if (offsets[i<<1] < 0) { /* unset substring */
+                                                                       if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                                add_next_index_null(&result_set);
                                                                        } else {
                                                                                add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                        if (offset_capture) {
                                                                for (i = 0; i < count; i++) {
                                                                        add_offset_pair(&result_set, (char *)stringlist[i],
-                                                                                                       offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+                                                                                                       offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
                                                                }
                                                        } else {
                                                                for (i = 0; i < count; i++) {
-                                                                       if (offsets[i<<1] < 0) { /* unset substring */
+                                                                       if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                                add_next_index_null(&result_set);
                                                                        } else {
                                                                                add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                        for (i = 0; i < count; i++) {
                                                                add_offset_pair(subpats, (char *)stringlist[i],
                                                                                                offsets[(i<<1)+1] - offsets[i<<1],
-                                                                                               offsets[i<<1], subpat_names[i]);
+                                                                                               offsets[i<<1], subpat_names[i], unmatched_as_null);
                                                        }
                                                } else {
                                                        for (i = 0; i < count; i++) {
                                                                if (subpat_names[i]) {
-                                                                       if (offsets[i<<1] < 0) { /* unset substring */
+                                                                       if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                                add_assoc_null(subpats, subpat_names[i]);
                                                                        } else {
                                                                                add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
                                                                                                                  offsets[(i<<1)+1] - offsets[i<<1]);
                                                                        }
                                                                }
-                                                               if (offsets[i<<1] < 0) { /* unset substring */
+                                                               if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                        add_next_index_null(subpats);
                                                                } else {
                                                                        add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
                                                        for (i = 0; i < count; i++) {
                                                                add_offset_pair(subpats, (char *)stringlist[i],
                                                                                                offsets[(i<<1)+1] - offsets[i<<1],
-                                                                                               offsets[i<<1], NULL);
+                                                                                               offsets[i<<1], NULL, unmatched_as_null);
                                                        }
                                                } else {
                                                        for (i = 0; i < count; i++) {
-                                                               if (offsets[i<<1] < 0) { /* unset substring */
+                                                               if (unmatched_as_null && offsets[i<<1] < 0) {
                                                                        add_next_index_null(subpats);
                                                                } else {
                                                                        add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
 
                                if (offset_capture) {
                                        /* Add (match, offset) pair to the return value */
-                                       add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
+                                       add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
                                } else {
                                        /* Add the piece to the return value */
                                        ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
@@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
                                        /* If we have matched a delimiter */
                                        if (!no_empty || match_len > 0) {
                                                if (offset_capture) {
-                                                       add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
+                                                       add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
                                                } else {
                                                        ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
                                                        zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
@@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
 
        start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
 
-       if (!no_empty || start_offset < subject_len)
-       {
+       if (!no_empty || start_offset < subject_len) {
                if (offset_capture) {
                        /* Add the last (match, offset) pair to the return value */
-                       add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
+                       add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
                } else {
                        /* Add the last piece to the return value */
                        ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
index 7aeebf3cf6a60ece8c915b6bc55a502b36840768..313f7fdc679fb8dfb872b795511af03c5a315a45 100644 (file)
@@ -52,7 +52,7 @@ array(10) {
   [2]=>
   string(2) "06"
   [3]=>
-  NULL
+  string(0) ""
   ["month"]=>
   string(2) "12"
   [4]=>
@@ -75,7 +75,7 @@ array(10) {
   [2]=>
   string(2) "12"
   [3]=>
-  NULL
+  string(0) ""
   ["month"]=>
   string(3) "Aug"
   [4]=>
index 2144032d9d0a25570b29d2d1fb55698d855039d4..e697c375c6d5fe96e42c33578ef774c04e247f01 100644 (file)
@@ -58,7 +58,7 @@ array(10) {
     [0]=>
     string(2) "20"
     [1]=>
-    NULL
+    string(0) ""
   }
   ["month"]=>
   array(2) {
@@ -127,7 +127,7 @@ array(2) {
     [2]=>
     string(2) "12"
     [3]=>
-    NULL
+    string(0) ""
     ["month"]=>
     string(3) "Aug"
     [4]=>
index 29f8204351e786bdc75f1bfeb6edee6b5107443c..11361d1b32d14aed3ed047a099a44ab8f7ebad7f 100644 (file)
@@ -24,7 +24,7 @@ array(2) {
     [1]=>
     string(12) "unsigned int"
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
     string(0) ""
     [4]=>
@@ -41,13 +41,13 @@ array(2) {
     [1]=>
     string(5) "short"
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
     string(0) ""
     [4]=>
     string(1) "a"
     [5]=>
-    NULL
+    string(0) ""
     [6]=>
     string(3) ", b"
   }
index 25b1e13126890ba61def3c0f84cd15aa95c3b25c..fdf58f569287e2ba03c629c5b5277c585a67b235 100644 (file)
@@ -2,7 +2,7 @@
 Bug #61780 (Inconsistent PCRE captures in match results): basics
 --FILE--
 <?php
-preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches);
+preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches, PREG_UNMATCHED_AS_NULL);
 var_dump($matches);
 ?>
 --EXPECT--
index d8e35c5c21f2669624df55303245b129f56661d0..dc5806cb30e40e7f7a25c66e407d55018c00a82b 100644 (file)
@@ -2,22 +2,22 @@
 Bug #61780 (Inconsistent PCRE captures in match results): numeric subpatterns
 --FILE--
 <?php
-preg_match('/(4)?(2)?\d/', '23456', $matches);
+preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
+preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(4)?(2)?\d/', '123456', $matches);
+preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
+preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER);
+preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 ?>
 --EXPECT--
index 375c02f5d24bb1edd7127c9a1a9f6c7c93492584..faf44d368bef7688d8995e4efca9ac2058190092 100644 (file)
@@ -2,22 +2,22 @@
 Bug #61780 (Inconsistent PCRE captures in match results): named subpatterns
 --FILE--
 <?php
-preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches);
+preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
+preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches);
+preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
+preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER);
+preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 echo "\n\n";
-preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
 var_export($matches);
 ?>
 --EXPECT--
index c065caab186eba7fc4a48880a2f014cfb3ee291a..8838a00500880e65d1a97916c6b46e8ccfe7a2be 100644 (file)
@@ -39,9 +39,9 @@ array(5) {
   [0]=>
   string(3) "_c_"
   [1]=>
-  NULL
+  string(0) ""
   [2]=>
-  NULL
+  string(0) ""
   [3]=>
   string(1) "c"
   ["MARK"]=>
@@ -65,42 +65,42 @@ array(6) {
     [0]=>
     string(1) "a"
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
-    NULL
+    string(0) ""
   }
   [2]=>
   array(4) {
     [0]=>
-    NULL
+    string(0) ""
     [1]=>
     string(1) "b"
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
-    NULL
+    string(0) ""
   }
   [3]=>
   array(4) {
     [0]=>
-    NULL
+    string(0) ""
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
     string(1) "c"
     [3]=>
-    NULL
+    string(0) ""
   }
   [4]=>
   array(4) {
     [0]=>
-    NULL
+    string(0) ""
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
     string(1) "d"
   }
@@ -128,7 +128,7 @@ array(4) {
     [0]=>
     string(3) "_b_"
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
     string(1) "b"
   }
@@ -137,9 +137,9 @@ array(4) {
     [0]=>
     string(3) "_c_"
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
     string(1) "c"
     ["MARK"]=>
@@ -150,11 +150,11 @@ array(4) {
     [0]=>
     string(3) "_d_"
     [1]=>
-    NULL
+    string(0) ""
     [2]=>
-    NULL
+    string(0) ""
     [3]=>
-    NULL
+    string(0) ""
     [4]=>
     string(1) "d"
   }