From b651b967383082de0f9c15ad6fcb36dda4082b0d Mon Sep 17 00:00:00 2001 From: Yasuo Ohgaki Date: Fri, 2 Sep 2016 14:15:47 +0900 Subject: [PATCH] Allow array input for mb_check_encoding() --- UPGRADING | 6 +- ext/mbstring/mbstring.c | 175 ++++++++++++++++++---- ext/mbstring/tests/mb_check_encoding.phpt | 24 +++ 3 files changed, 174 insertions(+), 31 deletions(-) create mode 100644 ext/mbstring/tests/mb_check_encoding.phpt diff --git a/UPGRADING b/UPGRADING index c89f46d5cf..211022d0aa 100644 --- a/UPGRADING +++ b/UPGRADING @@ -1,4 +1,4 @@ -PHP 7.1 UPGRADE NOTES +PHP 7.2 UPGRADE NOTES 1. Backward Incompatible Changes 2. New Features @@ -65,6 +65,10 @@ PHP 7.1 UPGRADE NOTES . Added extended exif tag support for the following formats: Samsung, DJI, Panasonic, Sony, Pentax, Minolta & Sigma/Foveon. +- Mbstring + . mb_check_encoding() accepts array parameter. Both key and value + ecodings are checked recursively. + ======================================== 10. New Global Constants ======================================== diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index b92a0abede..4f26d5e79f 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4743,13 +4743,51 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) + +static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding) { - const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; + + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); + if (convd == NULL) { + return NULL; + } + mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); + mbfl_buffer_converter_illegal_substchar(convd, 0); + return convd; +} + + +static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { mbfl_string string, result, *ret = NULL; long illegalchars = 0; + /* initialize string */ + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); + mbfl_string_init(&result); + + string.val = (unsigned char *) input; + string.len = length; + + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + illegalchars = mbfl_buffer_illegalchars(convd); + + if (ret != NULL) { + if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { + mbfl_string_clear(&result); + return 1; + } + mbfl_string_clear(&result); + } + return 0; +} + + +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + if (input == NULL) { return MBSTRG(illegalchars) == 0; } @@ -4762,57 +4800,134 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c } } - convd = mbfl_buffer_converter_new2(encoding, encoding, 0); - + convd = php_mb_init_convd(encoding); if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); return 0; } - mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); - mbfl_buffer_converter_illegal_substchar(convd, 0); + if (php_mb_check_encoding_impl(convd, input, length, encoding)) { + mbfl_buffer_converter_delete(convd); + return 1; + } + mbfl_buffer_converter_delete(convd); + return 0; +} - /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); - mbfl_string_init(&result); - string.val = (unsigned char *) input; - string.len = length; +MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + zend_long idx; + zend_string *key; + zval *entry; - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - illegalchars = mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); + (void)(idx); - if (ret != NULL) { - if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - mbfl_string_clear(&result); - return 1; + if (enc != NULL) { + encoding = mbfl_name2encoding(ZSTR_VAL(enc)); + if (!encoding || encoding == &mbfl_encoding_pass) { + php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc)); + return 0; } + } - mbfl_string_clear(&result); + convd = php_mb_init_convd(encoding); + if (convd == NULL) { + php_error_docref(NULL, E_WARNING, "Unable to create converter"); + return 0; } - return 0; + ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) { + ZVAL_DEREF(entry); + if (key) { + if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { + mbfl_buffer_converter_delete(convd); + return 0; + } + } + switch (Z_TYPE_P(entry)) { + case IS_STRING: + if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { + mbfl_buffer_converter_delete(convd); + return 0; + } + break; + case IS_ARRAY: + if (ZEND_HASH_APPLY_PROTECTION(vars) && vars->u.v.nApplyCount++ > 0) { + vars->u.v.nApplyCount--; + php_error_docref(NULL, E_WARNING, "Cannot not handle circular references"); + return 0; + } + if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) { + mbfl_buffer_converter_delete(convd); + return 0; + } + break; + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + break; + default: + /* Other types are error. */ + mbfl_buffer_converter_delete(convd); + return 0; + } + } ZEND_HASH_FOREACH_END(); + if (ZEND_HASH_APPLY_PROTECTION(vars)) { + vars->u.v.nApplyCount--; + } + + mbfl_buffer_converter_delete(convd); + return 1; } -/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) + +/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]]) Check if the string is valid for the specified encoding */ PHP_FUNCTION(mb_check_encoding) { - char *var = NULL; - size_t var_len; - char *enc = NULL; - size_t enc_len; + zval *input = NULL; + zend_string *enc = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) { return; - } + } - RETVAL_FALSE; + /* FIXME: Actually check all inputs, except $_FILES file content. */ + if (input == NULL) { + if (MBSTRG(illegalchars) == 0) { + RETURN_TRUE; + } + RETURN_FALSE; + } - if (php_mb_check_encoding(var, var_len, enc)) { - RETVAL_TRUE; + switch(Z_TYPE_P(input)) { + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + RETURN_TRUE; + break; + case IS_STRING: + if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) { + RETURN_FALSE; + } + break; + case IS_ARRAY: + if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) { + RETURN_FALSE; + } + break; + default: + php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array"); + RETURN_FALSE; } + RETURN_TRUE; } /* }}} */ diff --git a/ext/mbstring/tests/mb_check_encoding.phpt b/ext/mbstring/tests/mb_check_encoding.phpt new file mode 100644 index 0000000000..dded51a3f6 --- /dev/null +++ b/ext/mbstring/tests/mb_check_encoding.phpt @@ -0,0 +1,24 @@ +--TEST-- +mb_check_encoding() +--SKIPIF-- + +--FILE-- +$str, $str=>'val']; +var_dump(mb_check_encoding($str), mb_check_encoding($arr)); + +// Invalid +$str = "Japanese UTF-8 text. 日本語\xFE\x01\x02のUTF-8テキスト"; +$arr1 = [1234, 12.34, TRUE, FALSE, NULL, 'key'=>$str, $str=>'val']; +$arr2 = [1234, 12.34, TRUE, FALSE, NULL, $str=>'val']; +var_dump(mb_check_encoding($str), mb_check_encoding($arr1), mb_check_encoding($arr2)); +?> +--EXPECT-- +bool(true) +bool(true) +bool(false) +bool(false) +bool(false) -- 2.50.1