From b03fd376778941b12a76aa63d3dfe3d10d4d639f Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 11 Jul 2020 11:43:57 +0200 Subject: [PATCH] Code cleanup in mbstring.c --- ext/mbstring/mbstring.c | 397 ++++++++++++++++------------------------ 1 file changed, 162 insertions(+), 235 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index c400a2f5ee..1fba2db34c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1,21 +1,21 @@ /* - +----------------------------------------------------------------------+ - | Copyright (c) The PHP Group | - +----------------------------------------------------------------------+ - | This source file is subject to version 3.01 of the PHP license, | - | that is bundled with this package in the file LICENSE, and is | - | available through the world-wide-web at the following url: | - | http://www.php.net/license/3_01.txt | - | If you did not receive a copy of the PHP license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@php.net so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ - | Author: Tsukada Takuya | - | Rui Hirokawa | - | Hironori Sato | - | Shigeru Kanemoto | - +----------------------------------------------------------------------+ - */ + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Tsukada Takuya | + | Rui Hirokawa | + | Hironori Sato | + | Shigeru Kanemoto | + +----------------------------------------------------------------------+ +*/ /* {{{ includes */ #include "libmbfl/config.h" @@ -38,6 +38,7 @@ #include "libmbfl/filters/mbfilter_qprint.h" #include "libmbfl/filters/mbfilter_ucs4.h" #include "libmbfl/filters/mbfilter_utf8.h" +#include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h" #include "php_variables.h" #include "php_globals.h" @@ -861,10 +862,9 @@ static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, siz if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) { /* falls back to UTF-8 if an unknown encoding name is given */ if (new_value) { - php_error_docref("ref.mbstring", E_WARNING, - "Unknown encoding \"%s\" in ini setting", new_value); + php_error_docref("ref.mbstring", E_WARNING, "Unknown encoding \"%s\" in ini setting", new_value); } - encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); + encoding = &mbfl_encoding_utf8; } MBSTRG(internal_encoding) = encoding; MBSTRG(current_internal_encoding) = encoding; @@ -1316,95 +1316,70 @@ PHP_FUNCTION(mb_internal_encoding) /* {{{ Returns the input encoding */ PHP_FUNCTION(mb_http_input) { - char *typ = NULL; - size_t typ_len = 0; - int retname = 1; - char *list, *temp; - const mbfl_encoding *result = NULL; + char *type = NULL; + size_t type_len = 0, n; + const mbfl_encoding **entry; ZEND_PARSE_PARAMETERS_START(0, 1) Z_PARAM_OPTIONAL - Z_PARAM_STRING(typ, typ_len) + Z_PARAM_STRING(type, type_len) ZEND_PARSE_PARAMETERS_END(); - if (typ == NULL) { - result = MBSTRG(http_input_identify); + if (type == NULL) { + RETVAL_STRING(MBSTRG(http_input_identify)->name); } else { - switch (*typ) { + switch (*type) { case 'G': case 'g': - result = MBSTRG(http_input_identify_get); + RETVAL_STRING(MBSTRG(http_input_identify_get)->name); break; case 'P': case 'p': - result = MBSTRG(http_input_identify_post); + RETVAL_STRING(MBSTRG(http_input_identify_post)->name); break; case 'C': case 'c': - result = MBSTRG(http_input_identify_cookie); + RETVAL_STRING(MBSTRG(http_input_identify_cookie)->name); break; case 'S': case 's': - result = MBSTRG(http_input_identify_string); + RETVAL_STRING(MBSTRG(http_input_identify_string)->name); break; case 'I': case 'i': - { - const mbfl_encoding **entry = MBSTRG(http_input_list); - const size_t n = MBSTRG(http_input_list_size); - size_t i; - array_init(return_value); - for (i = 0; i < n; i++) { - add_next_index_string(return_value, (*entry)->name); - entry++; - } - retname = 0; + entry = MBSTRG(http_input_list); + n = MBSTRG(http_input_list_size); + array_init(return_value); + for (int i = 0; i < n; i++, entry++) { + add_next_index_string(return_value, (*entry)->name); } break; case 'L': case 'l': - { - const mbfl_encoding **entry = MBSTRG(http_input_list); - const size_t n = MBSTRG(http_input_list_size); - size_t i; - list = NULL; - for (i = 0; i < n; i++) { - if (list) { - temp = list; - spprintf(&list, 0, "%s,%s", temp, (*entry)->name); - efree(temp); - if (!list) { - break; - } - } else { - list = estrdup((*entry)->name); - } - entry++; - } - } - if (!list) { + entry = MBSTRG(http_input_list); + n = MBSTRG(http_input_list_size); + if (n == 0) { // TODO should return empty string? RETURN_FALSE; } - RETVAL_STRING(list); - efree(list); - retname = 0; + mbfl_string result; + mbfl_memory_device device; + mbfl_memory_device_init(&device, n * 12, 0); + for (int i = 0; i < n; i++, entry++) { + mbfl_memory_device_strcat(&device, (*entry)->name); + mbfl_memory_device_output(',', &device); + } + mbfl_memory_device_unput(&device); /* Remove trailing comma */ + mbfl_memory_device_result(&device, &result); + RETVAL_STRINGL((const char*)result.val, result.len); + mbfl_string_clear(&result); break; default: // TODO ValueError - result = MBSTRG(http_input_identify); + RETVAL_STRING(MBSTRG(http_input_identify)->name); break; } } - - // FIXME this bloc seems useless except for default switch case - if (retname) { - if (result) { - RETVAL_STRING(result->name); - } else { - RETVAL_FALSE; - } - } } /* }}} */ @@ -1413,7 +1388,6 @@ PHP_FUNCTION(mb_http_output) { char *name = NULL; size_t name_len; - const mbfl_encoding *encoding; ZEND_PARSE_PARAMETERS_START(0, 1) Z_PARAM_OPTIONAL @@ -1424,7 +1398,7 @@ PHP_FUNCTION(mb_http_output) ZEND_ASSERT(MBSTRG(current_http_output_encoding)); RETURN_STRING(MBSTRG(current_http_output_encoding)->name); } else { - encoding = php_mb_get_encoding_or_pass(name); + const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(name); if (!encoding) { zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name); RETURN_THROWS(); @@ -1450,11 +1424,10 @@ PHP_FUNCTION(mb_detect_order) ZEND_PARSE_PARAMETERS_END(); if (!order_str && !order_ht) { - size_t i; size_t n = MBSTRG(current_detect_order_list_size); const mbfl_encoding **entry = MBSTRG(current_detect_order_list); array_init(return_value); - for (i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { add_next_index_string(return_value, (*entry)->name); entry++; } @@ -1500,7 +1473,7 @@ static inline int php_mb_check_code_point(zend_long cp) return 0; } - /* As the we do not know the target encoding of the conversion operation that is going to + /* As we do not know the target encoding of the conversion operation that is going to * use the substitution character, we cannot check whether the codepoint is actually mapped * in the given encoding at this point. Thus we have to accept everything. */ return 1; @@ -1659,7 +1632,8 @@ PHP_FUNCTION(mb_output_handler) MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); mbfl_buffer_converter_delete(MBSTRG(outconv)); MBSTRG(outconv) = NULL; - } + } + if (encoding == &mbfl_encoding_pass) { RETURN_STRINGL(arg_string, arg_string_len); } @@ -1670,7 +1644,7 @@ PHP_FUNCTION(mb_output_handler) MBSTRG(http_output_conv_mimetypes), SG(sapi_headers).mimetype, strlen(SG(sapi_headers).mimetype))) { - if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){ + if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) { mimetype = estrdup(SG(sapi_headers).mimetype); } else { mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype); @@ -1695,7 +1669,7 @@ PHP_FUNCTION(mb_output_handler) efree(mimetype); } } - } + } /* just return if the converter is not activated. */ if (MBSTRG(outconv) == NULL) { @@ -1715,12 +1689,14 @@ PHP_FUNCTION(mb_output_handler) */ string.val = (unsigned char *)arg_string; string.len = arg_string_len; + mbfl_buffer_converter_feed(MBSTRG(outconv), &string); if (last_feed) { mbfl_buffer_converter_flush(MBSTRG(outconv)); } /* get the converter output, and return it */ mbfl_buffer_converter_result(MBSTRG(outconv), &result); + // TODO: avoid reallocation ??? RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */ efree(result.val); @@ -1739,30 +1715,31 @@ PHP_FUNCTION(mb_output_handler) /* structure to pass split params to the callback */ struct mbfl_split_params { - zval *return_value; /* php function return value structure pointer */ - mbfl_string *result_string; /* string to store result chunk */ - size_t mb_chunk_length; /* actual chunk length in chars */ - size_t split_length; /* split length in chars */ - mbfl_convert_filter *next_filter; /* widechar to encoding converter */ + zval *return_value; /* php function return value structure pointer */ + mbfl_string *result_string; /* string to store result chunk */ + size_t mb_chunk_length; /* actual chunk length in chars */ + size_t split_length; /* split length in chars */ + mbfl_convert_filter *next_filter; /* widechar to encoding converter */ }; /* callback function to fill split array */ static int mbfl_split_output(int c, void *data) { - struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */ - - (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */ - - if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */ - mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */ - mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */ - mbfl_string *chunk = params->result_string; - mbfl_memory_device_result(device, chunk); /* make chunk */ - add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */ - efree(chunk->val); - params->mb_chunk_length = 0; /* reset mb_chunk size */ - } - return 0; + struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */ + + (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */ + + if (params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */ + mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */ + mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */ + mbfl_string *chunk = params->result_string; + mbfl_memory_device_result(device, chunk); /* make chunk */ + add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */ + efree(chunk->val); + params->mb_chunk_length = 0; /* reset mb_chunk size */ + } + + return 0; } /* TODO Document this function on php.net */ @@ -1874,7 +1851,7 @@ PHP_FUNCTION(mb_str_split) (*filter->filter_function)(*p++, filter); } params.mb_chunk_length = split_length - 1; /* force to finish current chunk */ - (*filter->filter_function)(*p++, filter); /*process last char */ + (*filter->filter_function)(*p++, filter); /* process last char */ mbfl_convert_filter_delete(decoder); mbfl_convert_filter_delete(filter); @@ -1899,26 +1876,23 @@ PHP_FUNCTION(mb_str_split) /* {{{ Get character numbers of a string */ PHP_FUNCTION(mb_strlen) { - size_t n; mbfl_string string; char *str; - size_t str_len; zend_string *enc_name = NULL; ZEND_PARSE_PARAMETERS_START(1, 2) - Z_PARAM_STRING(str, str_len) + Z_PARAM_STRING(str, string.len) Z_PARAM_OPTIONAL Z_PARAM_STR_OR_NULL(enc_name) ZEND_PARSE_PARAMETERS_END(); - string.val = (unsigned char *) str; - string.len = str_len; + string.val = (unsigned char*)str; string.encoding = php_mb_get_encoding(enc_name, 2); if (!string.encoding) { RETURN_THROWS(); } - n = mbfl_strlen(&string); + size_t n = mbfl_strlen(&string); /* Only way this can fail is if the conversion creation fails * this would imply some sort of memory allocation failure which is a bug */ ZEND_ASSERT(!mbfl_is_error(n)); @@ -1950,7 +1924,6 @@ PHP_FUNCTION(mb_strpos) char *haystack_val, *needle_val; mbfl_string haystack, needle; zend_string *enc_name = NULL; - size_t n; ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack_val, haystack.len) @@ -1968,7 +1941,7 @@ PHP_FUNCTION(mb_strpos) RETURN_THROWS(); } - n = mbfl_strpos(&haystack, &needle, offset, reverse); + size_t n = mbfl_strpos(&haystack, &needle, offset, reverse); if (!mbfl_is_error(n)) { RETVAL_LONG(n); } else { @@ -1984,7 +1957,7 @@ PHP_FUNCTION(mb_strrpos) mbfl_string haystack, needle; char *haystack_val, *needle_val; zend_string *enc_name = NULL; - zend_long offset = 0, n; + zend_long offset = 0; ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack_val, haystack.len) @@ -2002,7 +1975,7 @@ PHP_FUNCTION(mb_strrpos) RETURN_THROWS(); } - n = mbfl_strpos(&haystack, &needle, offset, 1); + size_t n = mbfl_strpos(&haystack, &needle, offset, 1); if (!mbfl_is_error(n)) { RETVAL_LONG(n); } else { @@ -2015,12 +1988,10 @@ PHP_FUNCTION(mb_strrpos) /* {{{ Finds position of first occurrence of a string within another, case insensitive */ PHP_FUNCTION(mb_stripos) { - size_t n = (size_t) -1; zend_long offset = 0; mbfl_string haystack, needle; char *haystack_val, *needle_val; zend_string *from_encoding = NULL; - const mbfl_encoding *enc; ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack_val, haystack.len) @@ -2033,12 +2004,12 @@ PHP_FUNCTION(mb_stripos) haystack.val = (unsigned char*)haystack_val; needle.val = (unsigned char*)needle_val; - enc = php_mb_get_encoding(from_encoding, 4); + const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4); if (!enc) { RETURN_THROWS(); } - n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); + size_t n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); if (!mbfl_is_error(n)) { RETVAL_LONG(n); @@ -2052,12 +2023,10 @@ PHP_FUNCTION(mb_stripos) /* {{{ Finds position of last occurrence of a string within another, case insensitive */ PHP_FUNCTION(mb_strripos) { - size_t n = (size_t) -1; zend_long offset = 0; mbfl_string haystack, needle; char *haystack_val, *needle_val; zend_string *from_encoding = NULL; - const mbfl_encoding *enc; ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack_val, haystack.len) @@ -2070,12 +2039,12 @@ PHP_FUNCTION(mb_strripos) haystack.val = (unsigned char*)haystack_val; needle.val = (unsigned char*)needle_val; - enc = php_mb_get_encoding(from_encoding, 4); + const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4); if (!enc) { RETURN_THROWS(); } - n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); + size_t n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); if (!mbfl_is_error(n)) { RETVAL_LONG(n); @@ -2180,7 +2149,6 @@ PHP_FUNCTION(mb_strrichr) /* {{{ Count the number of substring occurrences */ PHP_FUNCTION(mb_substr_count) { - size_t n; mbfl_string haystack, needle; char *haystack_val, *needle_val; zend_string *enc_name = NULL; @@ -2205,7 +2173,7 @@ PHP_FUNCTION(mb_substr_count) RETURN_THROWS(); } - n = mbfl_substr_count(&haystack, &needle); + size_t n = mbfl_substr_count(&haystack, &needle); /* An error can only occur if needle is empty, * an encoding error happens (which should not happen at this stage and is a bug) * or the haystack is more than sizeof(size_t) bytes @@ -2221,7 +2189,7 @@ PHP_FUNCTION(mb_substr) char *str; zend_string *encoding = NULL; zend_long from, len; - size_t mblen, real_from, real_len; + size_t real_from, real_len; size_t str_len; zend_bool len_is_null = 1; mbfl_string string, result, *ret; @@ -2243,7 +2211,7 @@ PHP_FUNCTION(mb_substr) string.len = str_len; /* measures length */ - mblen = 0; + size_t mblen = 0; if (from < 0 || (!len_is_null && len < 0)) { mblen = mbfl_strlen(&string); } @@ -2345,7 +2313,6 @@ PHP_FUNCTION(mb_strcut) /* {{{ Gets terminal width of a string */ PHP_FUNCTION(mb_strwidth) { - size_t n; char *string_val; mbfl_string string; zend_string *enc_name = NULL; @@ -2362,7 +2329,7 @@ PHP_FUNCTION(mb_strwidth) RETURN_THROWS(); } - n = mbfl_strwidth(&string); + size_t n = mbfl_strwidth(&string); ZEND_ASSERT(n != (size_t) -1); RETVAL_LONG(n); } @@ -2594,14 +2561,12 @@ MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, cons } /* }}} */ - /* {{{ Returns converted string in desired encoding */ PHP_FUNCTION(mb_convert_encoding) { zend_string *to_encoding_name; zend_string *input_str, *from_encodings_str = NULL; HashTable *input_ht, *from_encodings_ht = NULL; - const mbfl_encoding *to_encoding; const mbfl_encoding **from_encodings; size_t num_from_encodings; zend_bool free_from_encodings; @@ -2613,7 +2578,7 @@ PHP_FUNCTION(mb_convert_encoding) Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(from_encodings_str, from_encodings_ht) ZEND_PARSE_PARAMETERS_END(); - to_encoding = php_mb_get_encoding(to_encoding_name, 2); + const mbfl_encoding *to_encoding = php_mb_get_encoding(to_encoding_name, 2); if (!to_encoding) { RETURN_THROWS(); } @@ -2645,8 +2610,7 @@ PHP_FUNCTION(mb_convert_encoding) if (input_str) { /* new encoding */ size_t size; - char *ret = php_mb_convert_encoding( - ZSTR_VAL(input_str), ZSTR_LEN(input_str), + char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), to_encoding, from_encodings, num_from_encodings, &size); if (ret != NULL) { // TODO: avoid reallocation ??? @@ -2681,11 +2645,8 @@ PHP_FUNCTION(mb_convert_case) { zend_string *from_encoding = NULL; char *str; - size_t str_len; + size_t str_len, ret_len; zend_long case_mode = 0; - char *newstr; - size_t ret_len; - const mbfl_encoding *enc; ZEND_PARSE_PARAMETERS_START(2, 3) Z_PARAM_STRING(str, str_len) @@ -2694,7 +2655,7 @@ PHP_FUNCTION(mb_convert_case) Z_PARAM_STR(from_encoding) ZEND_PARSE_PARAMETERS_END(); - enc = php_mb_get_encoding(from_encoding, 3); + const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 3); if (!enc) { RETURN_THROWS(); } @@ -2705,7 +2666,7 @@ PHP_FUNCTION(mb_convert_case) RETURN_THROWS(); } - newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc); + char *newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc); /* If newstr is NULL something went wrong in mbfl and this is a bug */ ZEND_ASSERT(newstr != NULL); @@ -2720,10 +2681,7 @@ PHP_FUNCTION(mb_strtoupper) { zend_string *from_encoding = NULL; char *str; - size_t str_len; - char *newstr; - size_t ret_len; - const mbfl_encoding *enc; + size_t str_len, ret_len; ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_STRING(str, str_len) @@ -2731,12 +2689,12 @@ PHP_FUNCTION(mb_strtoupper) Z_PARAM_STR(from_encoding) ZEND_PARSE_PARAMETERS_END(); - enc = php_mb_get_encoding(from_encoding, 2); + const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2); if (!enc) { RETURN_THROWS(); } - newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); + char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); /* If newstr is NULL something went wrong in mbfl and this is a bug */ ZEND_ASSERT(newstr != NULL); @@ -2846,16 +2804,11 @@ PHP_FUNCTION(mb_detect_encoding) /* {{{ Returns an array of all supported entity encodings */ PHP_FUNCTION(mb_list_encodings) { - const mbfl_encoding **encodings; - const mbfl_encoding *encoding; - int i = 0; - ZEND_PARSE_PARAMETERS_NONE(); array_init(return_value); - encodings = mbfl_get_supported_encodings(); - while ((encoding = encodings[i++]) != NULL) { - add_next_index_string(return_value, (char *) encoding->name); + for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) { + add_next_index_string(return_value, (*encodings)->name); } } /* }}} */ @@ -2982,69 +2935,70 @@ PHP_FUNCTION(mb_convert_kana) string.val = (unsigned char*)string_val; - /* option */ + /* "Zen" is 全, or "full"; "Han" is 半, or "half" + * This refers to "fullwidth" or "halfwidth" variants of characters used for writing Japanese */ if (optstr != NULL) { - char *p = optstr; - size_t i = 0, n = optstr_len; + char *p = optstr, *e = p + optstr_len; opt = 0; - while (i < n) { - i++; + while (p < e) { switch (*p++) { case 'A': - opt |= 0x1; + opt |= MBFL_FILT_TL_HAN2ZEN_ALL; break; case 'a': - opt |= 0x10; + opt |= MBFL_FILT_TL_ZEN2HAN_ALL; break; case 'R': - opt |= 0x2; + opt |= MBFL_FILT_TL_HAN2ZEN_ALPHA; break; case 'r': - opt |= 0x20; + opt |= MBFL_FILT_TL_ZEN2HAN_ALPHA; break; case 'N': - opt |= 0x4; + opt |= MBFL_FILT_TL_HAN2ZEN_NUMERIC; break; case 'n': - opt |= 0x40; + opt |= MBFL_FILT_TL_ZEN2HAN_NUMERIC; break; case 'S': - opt |= 0x8; + opt |= MBFL_FILT_TL_HAN2ZEN_SPACE; break; case 's': - opt |= 0x80; + opt |= MBFL_FILT_TL_ZEN2HAN_SPACE; break; case 'K': - opt |= 0x100; + opt |= MBFL_FILT_TL_HAN2ZEN_KATAKANA; break; case 'k': - opt |= 0x1000; + opt |= MBFL_FILT_TL_ZEN2HAN_KATAKANA; break; case 'H': - opt |= 0x200; + opt |= MBFL_FILT_TL_HAN2ZEN_HIRAGANA; break; case 'h': - opt |= 0x2000; + opt |= MBFL_FILT_TL_ZEN2HAN_HIRAGANA; break; case 'V': - opt |= 0x800; + opt |= MBFL_FILT_TL_HAN2ZEN_GLUE; break; case 'C': - opt |= 0x10000; + opt |= MBFL_FILT_TL_ZEN2HAN_HIRA2KANA; break; case 'c': - opt |= 0x20000; + opt |= MBFL_FILT_TL_ZEN2HAN_KANA2HIRA; break; case 'M': - opt |= 0x100000; + /* TODO: figure out what 'M' and 'm' are for, and rename the constant + * to something meaningful */ + opt |= MBFL_FILT_TL_HAN2ZEN_COMPAT1; break; case 'm': - opt |= 0x200000; + opt |= MBFL_FILT_TL_ZEN2HAN_COMPAT1; break; } } } else { - opt = 0x900; + opt = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE; } /* encoding */ @@ -3184,10 +3138,9 @@ PHP_FUNCTION(mb_convert_variables) } /* initialize string */ - mbfl_string_init(&string); - mbfl_string_init(&result); from_encoding = MBSTRG(current_internal_encoding); - string.encoding = from_encoding; + mbfl_string_init_set(&string, from_encoding); + mbfl_string_init(&result); /* pre-conversion encoding */ if (from_enc_ht) { @@ -3273,7 +3226,7 @@ PHP_FUNCTION(mb_convert_variables) /* HTML numeric entities */ /* Convert PHP array to data structure required by mbfl_html_numeric_entity */ -static int* make_conversion_map(HashTable *target_hash, int *convmap_size) +static int *make_conversion_map(HashTable *target_hash, int *convmap_size) { zval *hash_entry; @@ -3285,14 +3238,12 @@ static int* make_conversion_map(HashTable *target_hash, int *convmap_size) int *convmap = (int *)safe_emalloc(n_elems, sizeof(int), 0); int *mapelm = convmap; - int mapsize = 0; ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { *mapelm++ = zval_get_long(hash_entry); - mapsize++; } ZEND_HASH_FOREACH_END(); - *convmap_size = mapsize / 4; + *convmap_size = n_elems / 4; return convmap; } @@ -3330,7 +3281,7 @@ PHP_FUNCTION(mb_encode_numericentity) // TODO: avoid reallocation ??? RETVAL_STRINGL((char *)ret->val, ret->len); efree(ret->val); - efree((void *)convmap); + efree(convmap); } /* }}} */ @@ -4009,18 +3960,16 @@ static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *enco static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { - mbfl_string string, result, *ret = NULL; - size_t illegalchars = 0; + mbfl_string string, result; - /* initialize string */ mbfl_string_init_set(&string, encoding); mbfl_string_init(&result); string.val = (unsigned char *) input; string.len = length; - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - illegalchars = mbfl_buffer_illegalchars(convd); + mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + size_t illegalchars = mbfl_buffer_illegalchars(convd); if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { @@ -4032,21 +3981,15 @@ static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const return 0; } -MBSTRING_API int php_mb_check_encoding( - const char *input, size_t length, const mbfl_encoding *encoding) +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding) { - mbfl_buffer_converter *convd; - - convd = php_mb_init_convd(encoding); + mbfl_buffer_converter *convd = php_mb_init_convd(encoding); /* If this assertion fails this means some memory allocation failure which is a bug */ ZEND_ASSERT(convd != NULL); - if (php_mb_check_encoding_impl(convd, input, length, encoding)) { - mbfl_buffer_converter_delete(convd); - return 1; - } + int result = php_mb_check_encoding_impl(convd, input, length, encoding); mbfl_buffer_converter_delete(convd); - return 0; + return result; } static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding) @@ -4127,21 +4070,12 @@ PHP_FUNCTION(mb_check_encoding) } if (input_ht) { - if (!php_mb_check_encoding_recursive(input_ht, encoding)) { - RETURN_FALSE; - } - RETURN_TRUE; + RETURN_BOOL(php_mb_check_encoding_recursive(input_ht, encoding)); } else if (input_str) { - if (!php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding)) { - RETURN_FALSE; - } - RETURN_TRUE; + RETURN_BOOL(php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding)); } else { /* FIXME: Actually check all inputs, except $_FILES file content. */ - if (MBSTRG(illegalchars) == 0) { - RETURN_TRUE; - } - RETURN_FALSE; + RETURN_BOOL(MBSTRG(illegalchars) == 0); } } /* }}} */ @@ -4287,24 +4221,22 @@ static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint3 buf[3] = cp & 0xff; buf[4] = 0; - { - char *ret_str; - size_t ret_len; - long orig_illegalchars = MBSTRG(illegalchars); - MBSTRG(illegalchars) = 0; - ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len); - if (MBSTRG(illegalchars) != 0) { - efree(buf); - efree(ret_str); - MBSTRG(illegalchars) = orig_illegalchars; - return NULL; - } - - ret = zend_string_init(ret_str, ret_len, 0); + char *ret_str; + size_t ret_len; + long orig_illegalchars = MBSTRG(illegalchars); + MBSTRG(illegalchars) = 0; + ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len); + if (MBSTRG(illegalchars) != 0) { + efree(buf); efree(ret_str); MBSTRG(illegalchars) = orig_illegalchars; + return NULL; } + ret = zend_string_init(ret_str, ret_len, 0); + efree(ret_str); + MBSTRG(illegalchars) = orig_illegalchars; + efree(buf); return ret; } @@ -4315,7 +4247,6 @@ PHP_FUNCTION(mb_chr) { zend_long cp; zend_string *enc = NULL; - zend_string* ret; ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_LONG(cp) @@ -4323,7 +4254,7 @@ PHP_FUNCTION(mb_chr) Z_PARAM_STR_OR_NULL(enc) ZEND_PARSE_PARAMETERS_END(); - ret = php_mb_chr(cp, enc, 2); + zend_string* ret = php_mb_chr(cp, enc, 2); if (ret == NULL) { RETURN_FALSE; } @@ -4335,12 +4266,9 @@ PHP_FUNCTION(mb_chr) /* {{{ */ PHP_FUNCTION(mb_scrub) { - const mbfl_encoding *enc; char* str; size_t str_len; zend_string *enc_name = NULL; - char *ret; - size_t ret_len; ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_STRING(str, str_len) @@ -4348,12 +4276,13 @@ PHP_FUNCTION(mb_scrub) Z_PARAM_STR_OR_NULL(enc_name) ZEND_PARSE_PARAMETERS_END(); - enc = php_mb_get_encoding(enc_name, 2); + const mbfl_encoding *enc = php_mb_get_encoding(enc_name, 2); if (!enc) { RETURN_THROWS(); } - ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len); + size_t ret_len; + char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len); RETVAL_STRINGL(ret, ret_len); efree(ret); @@ -4475,10 +4404,8 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol size_t n = (size_t) -1; mbfl_string haystack, needle; - mbfl_string_init(&haystack); - mbfl_string_init(&needle); - haystack.encoding = enc; - needle.encoding = enc; + mbfl_string_init_set(&haystack, enc); + mbfl_string_init_set(&needle, enc); do { /* We're using simple case-folding here, because we'd have to deal with remapping of -- 2.40.0