]> granicus.if.org Git - php/commitdiff
- Forgot to include these files.
authorMoriyoshi Koizumi <moriyoshi@php.net>
Sun, 15 Feb 2009 07:11:04 +0000 (07:11 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Sun, 15 Feb 2009 07:11:04 +0000 (07:11 +0000)
ext/mbstring/libmbfl/mbfl/mbfilter.c
ext/mbstring/mbstring.c

index 646912ad16cbcc1366e324dc91c1d4f4f24e0b9d..1aeb38cc9b7ddcec3d3961a762ec1819e1d26dc0 100644 (file)
@@ -778,7 +778,7 @@ retry:
                        for (;;) {
                                pc->found_pos++;
                                p = h;
-                               m = pc->needle.buffer;
+                               m = (int *)pc->needle.buffer;
                                n = pc->needle_pos - 1;
                                while (n > 0 && *p == *m) {
                                        n--;
@@ -857,87 +857,203 @@ mbfl_strpos(
     int offset,
     int reverse)
 {
-       int n, result, negative_offset = 0;
-       unsigned char *p;
-       mbfl_convert_filter *filter;
-       struct collector_strpos_data pc;
+       int result;
+       mbfl_string _haystack_u8, _needle_u8;
+       const mbfl_string *haystack_u8, *needle_u8;
+       const unsigned char *u8_tbl;
 
-       if (haystack == NULL || needle == NULL) {
+       if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
                return -8;
        }
-       /* needle is converted into wchar */
-       mbfl_wchar_device_init(&pc.needle);
-       filter = mbfl_convert_filter_new(
-         needle->no_encoding,
-         mbfl_no_encoding_wchar,
-         mbfl_wchar_device_output, 0, &pc.needle);
-       if (filter == NULL) {
-               return -4;
-       }
-       p = needle->val;
-       n = needle->len;
-       if (p != NULL) {
-               while (n > 0) {
-                       if ((*filter->filter_function)(*p++, filter) < 0) {
-                               break;
-                       }
-                       n--;
+
+       {
+               const mbfl_encoding *u8_enc;
+               u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
+               if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
+                       return -8;
                }
+               u8_tbl = u8_enc->mblen_table;
        }
-       mbfl_convert_filter_flush(filter);
-       mbfl_convert_filter_delete(filter);
-       pc.needle_len = pc.needle.pos;
-       if (pc.needle.buffer == NULL) {
-               return -4;
-       }
-       if (pc.needle_len <= 0) {
-               mbfl_wchar_device_clear(&pc.needle);
-               return -2;
-       }
-       /* initialize filter and collector data */
-       filter = mbfl_convert_filter_new(
-         haystack->no_encoding,
-         mbfl_no_encoding_wchar,
-         collector_strpos, 0, &pc);
-       if (filter == NULL) {
-               mbfl_wchar_device_clear(&pc.needle);
-               return -4;
+
+       if (haystack->no_encoding != mbfl_no_encoding_utf8) {
+               mbfl_string_init(&_haystack_u8);
+               haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
+               if (haystack_u8 == NULL) {
+                       result = -4;
+                       goto out;
+               }
+       } else {
+               haystack_u8 = haystack;
        }
 
-       if (offset < 0) {
-               negative_offset = -offset - pc.needle_len;
-               if (negative_offset < 0) {
-                       negative_offset = 0;
+       if (needle->no_encoding != mbfl_no_encoding_utf8) {
+               mbfl_string_init(&_needle_u8);
+               needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
+               if (needle_u8 == NULL) {
+                       result = -4;
+                       goto out;
                }
-               offset = 0;
+       } else {
+               needle_u8 = needle;
        }
 
-       pc.start = offset;
-       pc.output = 0;
-       pc.needle_pos = 0;
-       pc.found_pos = 0;
-       pc.matched_pos = -1;
+       if (needle_u8->len < 1) {
+               result = -8;
+               goto out;
+       }
 
-       /* feed data */
-       p = haystack->val;
-       n = haystack->len - negative_offset;
-       if (p != NULL) {
-               while (n > 0) {
-                       if ((*filter->filter_function)(*p++, filter) < 0) {
-                               pc.matched_pos = -4;
-                               break;
+       result = -1;
+       if (haystack_u8->len < needle_u8->len) {
+               goto out;
+       }
+
+       if (!reverse) {
+               unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
+               unsigned int needle_u8_len = needle_u8->len;
+               unsigned int i;
+               const unsigned char *p, *q, *e;
+               const unsigned char *haystack_u8_val = haystack_u8->val,
+                                   *needle_u8_val = needle_u8->val;
+               for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
+                       jtbl[i] = needle_u8_len + 1;
+               }
+               for (i = 0; i < needle_u8_len - 1; ++i) {
+                       jtbl[needle_u8_val[i]] = needle_u8_len - i;
+               }
+               e = haystack_u8_val + haystack_u8->len;
+               p = haystack_u8_val;
+               while (--offset >= 0) {
+                       if (p >= e) {
+                               result = -16;
+                               goto out;
                        }
-                       if (pc.matched_pos >= 0 && !reverse) {
-                               break;
+                       p += u8_tbl[*p];
+               }
+               p += needle_u8_len;
+               if (p > e) {
+                       goto out;
+               }
+               while (p <= e) {
+                       const unsigned char *pv = p;
+                       q = needle_u8_val + needle_u8_len;
+                       for (;;) {
+                               if (q == needle_u8_val) {
+                                       result = 0;
+                                       while (p > haystack_u8_val) {
+                                               unsigned char c = *--p;
+                                               if (c < 0x80) {
+                                                       ++result;
+                                               } else if ((c & 0xc0) != 0x80) {
+                                                       ++result;
+                                               }       
+                                       }
+                                       goto out;
+                               }
+                               if (*--q != *--p) {
+                                       break;
+                               }
+                       }
+                       p += jtbl[*p];
+                       if (p <= pv) {
+                               p = pv + 1;
+                       }
+               }
+       } else {
+               unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
+               unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
+               unsigned int i;
+               const unsigned char *p, *e, *q, *qe;
+               const unsigned char *haystack_u8_val = haystack_u8->val,
+                                   *needle_u8_val = needle_u8->val;
+               for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
+                       jtbl[i] = needle_u8_len;
+               }
+               for (i = needle_u8_len - 1; i > 0; --i) {
+                       unsigned char c = needle_u8_val[i];
+                       jtbl[c] = i;
+                       if (c < 0x80) {
+                               ++needle_len;
+                       } else if ((c & 0xc0) != 0x80) {
+                               ++needle_len;
+                       }
+               }
+               {
+                       unsigned char c = needle_u8_val[0];
+                       if (c < 0x80) {
+                               ++needle_len;
+                       } else if ((c & 0xc0) != 0x80) {
+                               ++needle_len;
+                       }
+               }
+               e = haystack_u8_val;
+               p = e + haystack_u8->len;
+               qe = needle_u8_val + needle_u8_len;
+               if (offset < 0) {
+                       if (-offset > needle_len) {
+                               offset += needle_len; 
+                               while (offset < 0) {
+                                       unsigned char c;
+                                       if (p <= e) {
+                                               result = -16;
+                                               goto out;
+                                       }
+                                       c = *(--p);
+                                       if (c < 0x80) {
+                                               ++offset;
+                                       } else if ((c & 0xc0) != 0x80) {
+                                               ++offset;
+                                       }
+                               }
+                       }
+               } else {
+                       const unsigned char *ee = haystack_u8_val + haystack_u8->len;
+                       while (--offset >= 0) {
+                               if (e >= ee) {
+                                       result = -16;
+                                       goto out;
+                               }
+                               e += u8_tbl[*e];
+                       }
+               }
+               if (p < e + needle_u8_len) {
+                       goto out;
+               }
+               p -= needle_u8_len;
+               while (p >= e) {
+                       const unsigned char *pv = p;
+                       q = needle_u8_val;
+                       for (;;) {
+                               if (q == qe) {
+                                       result = 0;
+                                       p -= needle_u8_len;
+                                       while (p > haystack_u8_val) {
+                                               unsigned char c = *--p;
+                                               if (c < 0x80) {
+                                                       ++result;
+                                               } else if ((c & 0xc0) != 0x80) {
+                                                       ++result;
+                                               }       
+                                       }
+                                       goto out;
+                               }
+                               if (*q != *p) {
+                                       break;
+                               }
+                               ++p, ++q;
+                       }
+                       p -= jtbl[*p];
+                       if (p >= pv) {
+                               p = pv - 1;
                        }
-                       n--;
                }
        }
-       mbfl_convert_filter_flush(filter);
-       result = pc.matched_pos;
-       mbfl_convert_filter_delete(filter);
-       mbfl_wchar_device_clear(&pc.needle);
-
+out:
+       if (haystack_u8 == &_haystack_u8) {
+               mbfl_string_clear(&_haystack_u8);
+       }
+       if (needle_u8 == &_needle_u8) {
+               mbfl_string_clear(&_needle_u8);
+       }
        return result;
 }
 
index a564ee5e64c44f35d2ea2dfb976e977fed25c2e6..631c3d0e57fbf1e79b91c55ac300386c0233b3f8 100644 (file)
@@ -2385,7 +2385,7 @@ PHP_FUNCTION(mb_stripos)
                RETURN_FALSE;
        }
        if (needle.len == 0) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter.");
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
                RETURN_FALSE;
        }
        n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
@@ -2454,7 +2454,7 @@ PHP_FUNCTION(mb_strstr)
        }
 
        if (needle.len <= 0) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter.");
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
                RETURN_FALSE;
        }
        n = mbfl_strpos(&haystack, &needle, 0, 0);
@@ -2564,7 +2564,7 @@ PHP_FUNCTION(mb_stristr)
        }
 
        if (!needle.len) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty delimiter.");
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
                RETURN_FALSE;
        }
 
@@ -2683,7 +2683,7 @@ PHP_FUNCTION(mb_substr_count)
        }
 
        if (needle.len <= 0) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty substring.");
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
                RETURN_FALSE;
        }