From: Moriyoshi Koizumi Date: Sat, 9 Nov 2002 17:05:47 +0000 (+0000) Subject: Added mb_substr_count() as per the request #15097 X-Git-Tag: php-4.3.0RC1~164 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0e6445b8b17ead8170085e47a726a9b7fb1cdd08;p=php Added mb_substr_count() as per the request #15097 --- diff --git a/ext/mbstring/mbfilter.c b/ext/mbstring/mbfilter.c index d6937eef64..7e40f4c9d6 100644 --- a/ext/mbstring/mbfilter.c +++ b/ext/mbstring/mbfilter.c @@ -1,6 +1,7 @@ -/* charset=UTF-8 - * vim: encoding=utf-8: - * */ +/* + * charset=UTF-8 + * vim600: encoding=utf-8 + */ /* * "streamable kanji code filter and converter" @@ -7831,6 +7832,90 @@ mbfl_strpos( return result; } +/* + * substr_count + */ + +int +mbfl_substr_count( + mbfl_string *haystack, + mbfl_string *needle + TSRMLS_DC) +{ + int n, result = 0; + unsigned char *p; + mbfl_convert_filter *filter; + struct collector_strpos_data pc; + + if (haystack == NULL || needle == NULL) { + return -8; + } + /* needle is converted into wchar */ + mbfl_wchar_device_init(&pc.needle TSRMLS_CC); + filter = mbfl_convert_filter_new( + needle->no_encoding, + mbfl_no_encoding_wchar, + mbfl_wchar_device_output, 0, &pc.needle TSRMLS_CC); + if (filter == NULL) { + return -4; + } + p = needle->val; + n = needle->len; + if (p != NULL) { + while (n > 0) { + if ((*filter->filter_function)(*p++, filter TSRMLS_CC) < 0) { + break; + } + n--; + } + } + mbfl_convert_filter_flush(filter TSRMLS_CC); + mbfl_convert_filter_delete(filter TSRMLS_CC); + pc.needle_len = pc.needle.pos; + if (pc.needle.buffer == NULL) { + return -4; + } + if (pc.needle_len <= 0) { + mbfl_wchar_device_clear(&pc.needle TSRMLS_CC); + return -2; + } + /* initialize filter and collector data */ + filter = mbfl_convert_filter_new( + haystack->no_encoding, + mbfl_no_encoding_wchar, + collector_strpos, 0, &pc TSRMLS_CC); + if (filter == NULL) { + mbfl_wchar_device_clear(&pc.needle TSRMLS_CC); + return -4; + } + pc.start = 0; + pc.output = 0; + pc.needle_pos = 0; + pc.found_pos = 0; + pc.matched_pos = -1; + + /* feed data */ + p = haystack->val; + n = haystack->len; + if (p != NULL) { + while (n > 0) { + if ((*filter->filter_function)(*p++, filter TSRMLS_CC) < 0) { + pc.matched_pos = -4; + break; + } + if (pc.matched_pos >= 0) { + ++result; + pc.matched_pos = -1; + } + n--; + } + } + mbfl_convert_filter_flush(filter TSRMLS_CC); + mbfl_convert_filter_delete(filter TSRMLS_CC); + mbfl_wchar_device_clear(&pc.needle TSRMLS_CC); + + return result; +} /* * substr diff --git a/ext/mbstring/mbfilter.h b/ext/mbstring/mbfilter.h index 563a11ea89..b2d87baeb3 100644 --- a/ext/mbstring/mbfilter.h +++ b/ext/mbstring/mbfilter.h @@ -487,6 +487,13 @@ mbfl_oddlen(mbfl_string *string); int mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, int offset, int reverse TSRMLS_DC); + +/* + * substr_count + */ +int +mbfl_substr_count(mbfl_string *haystack, mbfl_string *needle TSRMLS_DC); + /* * substr */ diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 4202737ded..588ae84e86 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -173,6 +173,7 @@ static const struct mb_overload_def mb_ovld[] = { {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"}, {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, + {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"}, #if HAVE_MBREGEX {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, @@ -216,6 +217,7 @@ function_entry mbstring_functions[] = { PHP_FE(mb_strlen, NULL) PHP_FE(mb_strpos, NULL) PHP_FE(mb_strrpos, NULL) + PHP_FE(mb_substr_count, NULL) PHP_FE(mb_substr, NULL) PHP_FE(mb_strcut, NULL) PHP_FE(mb_strwidth, NULL) @@ -2121,6 +2123,61 @@ PHP_FUNCTION(mb_strrpos) } /* }}} */ +/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding]) + Count the number of substring occurrences */ +PHP_FUNCTION(mb_substr_count) +{ + pval **arg1, **arg2, **arg3; + int n; + mbfl_string haystack, needle; + + mbfl_string_init(&haystack); + mbfl_string_init(&needle); + haystack.no_language = MBSTRG(current_language); + haystack.no_encoding = MBSTRG(current_internal_encoding); + needle.no_language = MBSTRG(current_language); + needle.no_encoding = MBSTRG(current_internal_encoding); + switch (ZEND_NUM_ARGS()) { + case 2: + if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + case 3: + if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(arg3); + haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg3)); + if (haystack.no_encoding == mbfl_no_encoding_invalid) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg3)); + RETURN_FALSE; + } + break; + default: + WRONG_PARAM_COUNT; + } + + convert_to_string_ex(arg1); + convert_to_string_ex(arg2); + + if (Z_STRLEN_PP(arg2) <= 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle"); + RETURN_FALSE; + } + haystack.val = (unsigned char *)Z_STRVAL_PP(arg1); + haystack.len = Z_STRLEN_PP(arg1); + needle.val = (unsigned char *)Z_STRVAL_PP(arg2); + needle.len = Z_STRLEN_PP(arg2); + n = mbfl_substr_count(&haystack, &needle TSRMLS_CC); + if (n >= 0) { + RETVAL_LONG(n); + } else { + RETVAL_FALSE; + } +} +/* }}} */ + /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]]) Returns part of a string */ PHP_FUNCTION(mb_substr) diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 37e4acdb69..6668e2ebf3 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -91,6 +91,7 @@ PHP_FUNCTION(mb_output_handler); PHP_FUNCTION(mb_strlen); PHP_FUNCTION(mb_strpos); PHP_FUNCTION(mb_strrpos); +PHP_FUNCTION(mb_substr_count); PHP_FUNCTION(mb_substr); PHP_FUNCTION(mb_strcut); PHP_FUNCTION(mb_strwidth);