]> granicus.if.org Git - php/commitdiff
- Fix bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into octets).
authorMoriyoshi Koizumi <moriyoshi@php.net>
Sat, 31 Jan 2004 22:36:33 +0000 (22:36 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Sat, 31 Jan 2004 22:36:33 +0000 (22:36 +0000)
NEWS
ext/pcre/php_pcre.c
ext/pcre/php_pcre.h

diff --git a/NEWS b/NEWS
index d8b2a8c339d0693dea94fd4fa724c76138190b23..b5ed1420d6812acd54ef6087ec91f0826c8205de 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -14,6 +14,8 @@ PHP                                                                        NEWS
   (Derick)
 - Fixed problems with longlong values in mysqli. (Georg)
 - Fixed class name case preserving of user defined classes. (Marcus)
+- Fixed bug #27103 (preg_split('//u') incorrectly splits UTF-8 strings into
+  octets). (Moriyoshi)
 - Fixed bug #27042 (SPL: SeekableIterator seek() broken). (Marcus)
 - Fixed bug #27008 (Every class method can be called as static). (Marcus)
 - Fixed bug #26938 (exec() has problems reading long lines).
index c7702da11d9805000394e4e28f99aef7dcb4c61e..28babfca06978bb31e8414190e2e438d173a4cdf 100644 (file)
@@ -132,7 +132,17 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
 
 /* {{{ pcre_get_compiled_regex
  */
-PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) {
+PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
+{
+       int compile_options;
+       return pcre_get_compiled_regex_ex(regex, extra, preg_options, &compile_options);
+}
+/* }}} */
+
+/* {{{ pcre_get_compiled_regex_ex
+ */
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
+{
        pcre                            *re = NULL;
        int                                      coptions = 0;
        int                                      soptions = 0;
@@ -162,6 +172,7 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_
 #endif
                        *extra = pce->extra;
                        *preg_options = pce->preg_options;
+                       *compile_options = pce->compile_options;
                        return pce->re;
 #if HAVE_SETLOCALE
                }
@@ -236,7 +247,7 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_
 
        /* Clear out preg options */
        *preg_options = 0;
-       
+
        /* Parse through the options, setting appropriate flags.  Display
           a warning if we encounter an unknown modifier. */    
        while (*pp != 0) {
@@ -297,13 +308,15 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_
        }
 
        *preg_options = poptions;
-       
+       *compile_options = coptions;
+
        efree(pattern);
 
        /* Store the compiled pattern and extra info in the cache. */
        new_entry.re = re;
        new_entry.extra = *extra;
        new_entry.preg_options = poptions;
+       new_entry.compile_options = coptions;
 #if HAVE_SETLOCALE
        new_entry.locale = pestrdup(locale, 1);
        new_entry.tables = tables;
@@ -1168,11 +1181,14 @@ PHP_FUNCTION(preg_split)
                                   **limit,                             /* Number of pieces to return */
                                   **flags;
        pcre                    *re = NULL;                     /* Compiled regular expression */
+       pcre                    *re_bump = NULL;        /* Regex instance for empty matches */
        pcre_extra              *extra = NULL;          /* Holds results of studying */
+       pcre_extra              *extra_bump = NULL;     /* Almost dummy */
        int                             *offsets;                       /* Array of subpattern offsets */
        int                              size_offsets;          /* Size of the offsets array */
        int                              exoptions = 0;         /* Execution options */
        int                              preg_options = 0;      /* Custom preg options */
+       int                      coptions = 0;          /* Custom preg options */
        int                              argc;                          /* Argument count */
        int                              limit_val = -1;        /* Integer value of limit */
        int                              no_empty = 0;          /* If NO_EMPTY flag is set */
@@ -1210,7 +1226,7 @@ PHP_FUNCTION(preg_split)
        convert_to_string_ex(subject);
        
        /* Compile regex or get it from cache. */
-       if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra, &preg_options TSRMLS_CC)) == NULL) {
+       if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra, &preg_options, &coptions TSRMLS_CC)) == NULL) {
                RETURN_FALSE;
        }
        
@@ -1284,8 +1300,26 @@ PHP_FUNCTION(preg_split)
                           the start offset, and continue. Fudge the offset values
                           to achieve this, unless we're already at the end of the string. */
                        if (g_notempty != 0 && start_offset < Z_STRLEN_PP(subject)) {
-                               offsets[0] = start_offset;
-                               offsets[1] = start_offset + 1;
+                               if (coptions & PCRE_UTF8) {
+                                       if (re_bump == NULL) {
+                                               int dummy;
+
+                                               if ((re_bump = pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
+                                                       RETURN_FALSE;
+                                               }
+                                       }
+                                       count = pcre_exec(re_bump, extra_bump, Z_STRVAL_PP(subject),
+                                                         Z_STRLEN_PP(subject), start_offset,
+                                                         exoptions, offsets, size_offsets);
+                                       if (count < 1) {
+                                               php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
+                                               offsets[0] = start_offset;
+                                               offsets[1] = start_offset + 1;
+                                       }
+                               } else {
+                                       offsets[0] = start_offset;
+                                       offsets[1] = start_offset + 1;
+                               }
                        } else
                                break;
                }
index 25fc6abafeb4bab8b2b2919b894f228326c566cf..2f05a27f6f80af3869fb944a213f7a1c3f8fac74 100644 (file)
@@ -43,6 +43,7 @@ PHP_FUNCTION(preg_grep);
 
 PHPAPI char *php_pcre_replace(char *regex,   int regex_len, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit TSRMLS_DC);
 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options TSRMLS_DC);
+PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *coptions TSRMLS_DC);
 
 extern zend_module_entry pcre_module_entry;
 #define pcre_module_ptr &pcre_module_entry
@@ -51,6 +52,7 @@ typedef struct {
        pcre *re;
        pcre_extra *extra;
        int preg_options;
+       int compile_options;
 #if HAVE_SETLOCALE
        char *locale;
        unsigned const char *tables;