]> granicus.if.org Git - php/commitdiff
Add MBFL_SUBSTR_TO_END mode to mbfl_substr
authorNikita Popov <nikita.ppv@gmail.com>
Sun, 23 Jul 2017 21:10:53 +0000 (23:10 +0200)
committerNikita Popov <nikita.ppv@gmail.com>
Sun, 23 Jul 2017 21:17:12 +0000 (23:17 +0200)
This takes the substr from the offset to the end of the string.
This avoids pointless searching for the end position and also
saves us a length calculation in the strstr family of functions.

ext/mbstring/libmbfl/mbfl/mbfilter.c
ext/mbstring/libmbfl/mbfl/mbfilter.h
ext/mbstring/mbstring.c

index 73461028d0c0cea309497c275fae1bb0a91732c9..55c5c1d27d5f48f95c9ec3ea4d20ed7ebefca9a6 100644 (file)
@@ -1159,46 +1159,55 @@ mbfl_substr(
        if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
           encoding->mblen_table != NULL) {
                len = string->len;
-               start = from;
-               end = from + length;
-               if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
-                       start *= 2;
-                       end = start + length*2;
+               if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+                       start = from;
+               } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+                       start = from*2;
                } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
-                       start *= 4;
-                       end = start + length*4;
-               } else if (encoding->mblen_table != NULL) {
+                       start = from*4;
+               } else {
                        const unsigned char *mbtab = encoding->mblen_table;
                        start = 0;
-                       end = 0;
                        n = 0;
                        k = 0;
                        p = string->val;
-                       if (p != NULL) {
-                               /* search start position */
-                               while (k <= from) {
-                                       start = n;
-                                       if (n >= len) {
-                                               break;
-                                       }
-                                       m = mbtab[*p];
-                                       n += m;
-                                       p += m;
-                                       k++;
+                       /* search start position */
+                       while (k <= from) {
+                               start = n;
+                               if (n >= len) {
+                                       break;
                                }
-                               /* detect end position */
-                               k = 0;
-                               end = start;
-                               while (k < length) {
-                                       end = n;
-                                       if (n >= len) {
-                                               break;
-                                       }
-                                       m = mbtab[*p];
-                                       n += m;
-                                       p += m;
-                                       k++;
+                               m = mbtab[*p];
+                               n += m;
+                               p += m;
+                               k++;
+                       }
+               }
+
+               if (length == MBFL_SUBSTR_UNTIL_END) {
+                       end = len;
+               } else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+                       end = start + length;
+               } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+                       end = start + length*2;
+               } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
+                       end = start + length*4;
+               } else {
+                       const unsigned char *mbtab = encoding->mblen_table;
+                       end = start;
+                       n = start;
+                       k = 0;
+                       p = string->val + start;
+                       /* detect end position */
+                       while (k <= length) {
+                               end = n;
+                               if (n >= len) {
+                                       break;
                                }
+                               m = mbtab[*p];
+                               n += m;
+                               p += m;
+                               k++;
                        }
                }
 
@@ -1215,21 +1224,11 @@ mbfl_substr(
                /* allocate memory and copy */
                n = end - start;
                result->len = 0;
-               result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
+               result->val = w = (unsigned char*)mbfl_malloc(n + 1);
                if (w != NULL) {
-                       p = string->val;
-                       if (p != NULL) {
-                               p += start;
-                               result->len = n;
-                               while (n > 0) {
-                                       *w++ = *p++;
-                                       n--;
-                               }
-                       }
-                       *w++ = '\0';
-                       *w++ = '\0';
-                       *w++ = '\0';
-                       *w = '\0';
+                       result->len = n;
+                       memcpy(w, string->val + start, n);
+                       w[n] = '\0';
                } else {
                        result = NULL;
                }
@@ -1239,6 +1238,10 @@ mbfl_substr(
                mbfl_convert_filter *decoder;
                mbfl_convert_filter *encoder;
 
+               if (length == MBFL_SUBSTR_UNTIL_END) {
+                       length = mbfl_strlen(string) - from;
+               }
+
                mbfl_memory_device_init(&device, length + 1, 0);
                mbfl_string_init(result);
                result->no_language = string->no_language;
index 0a27d3a26567c969d7428120fc74f5ff835ef914..54858bd919c174655af8536a3155fac27df4db54 100644 (file)
@@ -202,13 +202,17 @@ mbfl_oddlen(mbfl_string *string);
 MBFLAPI extern size_t
 mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, ssize_t offset, int reverse);
 
-
 /*
  * substr_count
  */
 MBFLAPI extern size_t
 mbfl_substr_count(mbfl_string *haystack, mbfl_string *needle);
 
+/*
+ * If specified as length, the substr until the end of the string is taken.
+ */
+#define MBFL_SUBSTR_UNTIL_END ((size_t) -1)
+
 /*
  * substr
  */
index 58556410d135434a83e44b297fe7dea02ff90231..4f3b7dc08e08cd9a464da05652cded092a447379 100644 (file)
@@ -2595,7 +2595,6 @@ PHP_FUNCTION(mb_strstr)
 
        n = mbfl_strpos(&haystack, &needle, 0, 0);
        if (!mbfl_is_error(n)) {
-               size_t mblen = mbfl_strlen(&haystack);
                if (part) {
                        ret = mbfl_substr(&haystack, &result, 0, n);
                        if (ret != NULL) {
@@ -2606,8 +2605,7 @@ PHP_FUNCTION(mb_strstr)
                                RETVAL_FALSE;
                        }
                } else {
-                       size_t len = (mblen - n);
-                       ret = mbfl_substr(&haystack, &result, n, len);
+                       ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
                        if (ret != NULL) {
                                // TODO: avoid reallocation ???
                                RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2654,7 +2652,6 @@ PHP_FUNCTION(mb_strrchr)
 
        n = mbfl_strpos(&haystack, &needle, 0, 1);
        if (!mbfl_is_error(n)) {
-               size_t mblen = mbfl_strlen(&haystack);
                if (part) {
                        ret = mbfl_substr(&haystack, &result, 0, n);
                        if (ret != NULL) {
@@ -2665,8 +2662,7 @@ PHP_FUNCTION(mb_strrchr)
                                RETVAL_FALSE;
                        }
                } else {
-                       size_t len = (mblen - n);
-                       ret = mbfl_substr(&haystack, &result, n, len);
+                       ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
                        if (ret != NULL) {
                                // TODO: avoid reallocation ???
                                RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2686,7 +2682,7 @@ PHP_FUNCTION(mb_strrchr)
 PHP_FUNCTION(mb_stristr)
 {
        zend_bool part = 0;
-       size_t from_encoding_len, n, len, mblen;
+       size_t from_encoding_len, n;
        mbfl_string haystack, needle, result, *ret = NULL;
        const char *from_encoding = NULL;
        mbfl_string_init(&haystack);
@@ -2712,8 +2708,6 @@ PHP_FUNCTION(mb_stristr)
                RETURN_FALSE;
        }
 
-       mblen = mbfl_strlen(&haystack);
-
        if (part) {
                ret = mbfl_substr(&haystack, &result, 0, n);
                if (ret != NULL) {
@@ -2724,8 +2718,7 @@ PHP_FUNCTION(mb_stristr)
                        RETVAL_FALSE;
                }
        } else {
-               len = (mblen - n);
-               ret = mbfl_substr(&haystack, &result, n, len);
+               ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
                if (ret != NULL) {
                        // TODO: avoid reallocaton ???
                        RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2742,7 +2735,7 @@ PHP_FUNCTION(mb_stristr)
 PHP_FUNCTION(mb_strrichr)
 {
        zend_bool part = 0;
-       size_t n, len, mblen;
+       size_t n;
        size_t from_encoding_len;
        mbfl_string haystack, needle, result, *ret = NULL;
        const char *from_encoding = NULL;
@@ -2764,8 +2757,6 @@ PHP_FUNCTION(mb_strrichr)
                RETURN_FALSE;
        }
 
-       mblen = mbfl_strlen(&haystack);
-
        if (part) {
                ret = mbfl_substr(&haystack, &result, 0, n);
                if (ret != NULL) {
@@ -2776,8 +2767,7 @@ PHP_FUNCTION(mb_strrichr)
                        RETVAL_FALSE;
                }
        } else {
-               len = (mblen - n);
-               ret = mbfl_substr(&haystack, &result, n, len);
+               ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
                if (ret != NULL) {
                        // TODO: avoid reallocation ???
                        RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2831,7 +2821,7 @@ PHP_FUNCTION(mb_substr)
 {
        char *str, *encoding = NULL;
        zend_long from, len;
-       size_t mblen;
+       size_t mblen, real_from, real_len;
        size_t str_len, encoding_len;
        zend_bool len_is_null = 1;
        mbfl_string string, result, *ret;
@@ -2850,42 +2840,42 @@ PHP_FUNCTION(mb_substr)
        string.val = (unsigned char *)str;
        string.len = str_len;
 
-       if (len_is_null) {
-               len = str_len;
-       }
-
        /* measures length */
        mblen = 0;
-       if (from < 0 || len < 0) {
+       if (from < 0 || (!len_is_null && len < 0)) {
                mblen = mbfl_strlen(&string);
        }
 
        /* if "from" position is negative, count start position from the end
         * of the string
         */
-       if (from < 0) {
-               from = mblen + from;
-               if (from < 0) {
-                       from = 0;
-               }
+       if (from >= 0) {
+               real_from = (size_t) from;
+       } else if (-from < mblen) {
+               real_from = mblen + from;
+       } else {
+               real_from = 0;
        }
 
        /* if "length" position is negative, set it to the length
         * needed to stop that many chars from the end of the string
         */
-       if (len < 0) {
-               len = (mblen - from) + len;
-               if (len < 0) {
-                       len = 0;
-               }
+       if (len_is_null) {
+               real_len = MBFL_SUBSTR_UNTIL_END;
+       } else if (len >= 0) {
+               real_len = (size_t) len;
+       } else if (real_from < mblen && -len < mblen - real_from) {
+               real_len = (mblen - real_from) + len;
+       } else {
+               real_len = 0;
        }
 
        if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
-               && (from >= mbfl_strlen(&string))) {
+               && (real_from >= mbfl_strlen(&string))) {
                RETURN_FALSE;
        }
 
-       ret = mbfl_substr(&string, &result, from, len);
+       ret = mbfl_substr(&string, &result, real_from, real_len);
        if (NULL == ret) {
                RETURN_FALSE;
        }