]> granicus.if.org Git - php/commitdiff
Allow array input for mb_check_encoding()
authorYasuo Ohgaki <yohgaki@php.net>
Fri, 2 Sep 2016 05:15:47 +0000 (14:15 +0900)
committerYasuo Ohgaki <yohgaki@php.net>
Fri, 2 Sep 2016 05:18:34 +0000 (14:18 +0900)
UPGRADING
ext/mbstring/mbstring.c
ext/mbstring/tests/mb_check_encoding.phpt [new file with mode: 0644]

index c89f46d5cf9784d5a5b8041310c26e7ba3e77b38..211022d0aa3bf30777040b67119eb0f74f1ca7ad 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -1,4 +1,4 @@
-PHP 7.1 UPGRADE NOTES
+PHP 7.2 UPGRADE NOTES
 
 1. Backward Incompatible Changes
 2. New Features
@@ -65,6 +65,10 @@ PHP 7.1 UPGRADE NOTES
    . Added extended exif tag support for the following formats: 
      Samsung, DJI, Panasonic, Sony, Pentax, Minolta & Sigma/Foveon.
 
+- Mbstring
+  . mb_check_encoding() accepts array parameter. Both key and value
+    ecodings are checked recursively.
+
 ========================================
 10. New Global Constants
 ========================================
index b92a0abede8f483df635a7b31a573e5f47111ac4..4f26d5e79f2396dce0b1237e20a622dd11dd342f 100644 (file)
@@ -4743,13 +4743,51 @@ PHP_FUNCTION(mb_get_info)
 }
 /* }}} */
 
-MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+
+static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
 {
-       const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
        mbfl_buffer_converter *convd;
+
+       convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
+       if (convd == NULL) {
+               return NULL;
+       }
+       mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
+       mbfl_buffer_converter_illegal_substchar(convd, 0);
+       return convd;
+}
+
+
+static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
        mbfl_string string, result, *ret = NULL;
        long illegalchars = 0;
 
+       /* initialize string */
+       mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
+       mbfl_string_init(&result);
+
+       string.val = (unsigned char *) input;
+       string.len = length;
+
+       ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+       illegalchars = mbfl_buffer_illegalchars(convd);
+
+       if (ret != NULL) {
+               if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
+                       mbfl_string_clear(&result);
+                       return 1;
+               }
+               mbfl_string_clear(&result);
+       }
+       return 0;
+}
+
+
+MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+{
+       const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+       mbfl_buffer_converter *convd;
+
        if (input == NULL) {
                return MBSTRG(illegalchars) == 0;
        }
@@ -4762,57 +4800,134 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c
                }
        }
 
-       convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
-
+       convd = php_mb_init_convd(encoding);
        if (convd == NULL) {
                php_error_docref(NULL, E_WARNING, "Unable to create converter");
                return 0;
        }
 
-       mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
-       mbfl_buffer_converter_illegal_substchar(convd, 0);
+       if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
+               mbfl_buffer_converter_delete(convd);
+               return 1;
+       }
+       mbfl_buffer_converter_delete(convd);
+       return 0;
+}
 
-       /* initialize string */
-       mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
-       mbfl_string_init(&result);
 
-       string.val = (unsigned char *) input;
-       string.len = length;
+MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
+{
+       const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+       mbfl_buffer_converter *convd;
+       zend_long idx;
+       zend_string *key;
+       zval *entry;
 
-       ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
-       illegalchars = mbfl_buffer_illegalchars(convd);
-       mbfl_buffer_converter_delete(convd);
+       (void)(idx);
 
-       if (ret != NULL) {
-               if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
-                       mbfl_string_clear(&result);
-                       return 1;
+       if (enc != NULL) {
+               encoding = mbfl_name2encoding(ZSTR_VAL(enc));
+               if (!encoding || encoding == &mbfl_encoding_pass) {
+                       php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
+                       return 0;
                }
+       }
 
-               mbfl_string_clear(&result);
+       convd = php_mb_init_convd(encoding);
+       if (convd == NULL) {
+               php_error_docref(NULL, E_WARNING, "Unable to create converter");
+               return 0;
        }
 
-       return 0;
+       ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
+               ZVAL_DEREF(entry);
+               if (key) {
+                       if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
+                               mbfl_buffer_converter_delete(convd);
+                               return 0;
+                       }
+               }
+               switch (Z_TYPE_P(entry)) {
+                       case IS_STRING:
+                               if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
+                                       mbfl_buffer_converter_delete(convd);
+                                       return 0;
+                               }
+                               break;
+                       case IS_ARRAY:
+                               if (ZEND_HASH_APPLY_PROTECTION(vars) && vars->u.v.nApplyCount++ > 0) {
+                                       vars->u.v.nApplyCount--;
+                                       php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
+                                       return 0;
+                               }
+                               if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
+                                       mbfl_buffer_converter_delete(convd);
+                                       return 0;
+                               }
+                               break;
+                       case IS_LONG:
+                       case IS_DOUBLE:
+                       case IS_NULL:
+                       case IS_TRUE:
+                       case IS_FALSE:
+                               break;
+                       default:
+                               /* Other types are error. */
+                               mbfl_buffer_converter_delete(convd);
+                               return 0;
+               }
+       } ZEND_HASH_FOREACH_END();
+       if (ZEND_HASH_APPLY_PROTECTION(vars)) {
+               vars->u.v.nApplyCount--;
+       }
+
+       mbfl_buffer_converter_delete(convd);
+       return 1;
 }
 
-/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
+
+/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
    Check if the string is valid for the specified encoding */
 PHP_FUNCTION(mb_check_encoding)
 {
-       char *var = NULL;
-       size_t var_len;
-       char *enc = NULL;
-       size_t enc_len;
+       zval *input = NULL;
+       zend_string *enc = NULL;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
                return;
-    }
+       }
 
-       RETVAL_FALSE;
+       /* FIXME: Actually check all inputs, except $_FILES file content. */
+       if (input == NULL) {
+               if (MBSTRG(illegalchars) == 0) {
+                       RETURN_TRUE;
+               }
+               RETURN_FALSE;
+       }
 
-       if (php_mb_check_encoding(var, var_len, enc)) {
-               RETVAL_TRUE;
+       switch(Z_TYPE_P(input)) {
+               case IS_LONG:
+               case IS_DOUBLE:
+               case IS_NULL:
+               case IS_TRUE:
+               case IS_FALSE:
+                       RETURN_TRUE;
+                       break;
+               case IS_STRING:
+                       if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
+                               RETURN_FALSE;
+                       }
+                       break;
+               case IS_ARRAY:
+                       if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
+                               RETURN_FALSE;
+                       }
+                       break;
+               default:
+                       php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array");
+                       RETURN_FALSE;
        }
+       RETURN_TRUE;
 }
 /* }}} */
 
diff --git a/ext/mbstring/tests/mb_check_encoding.phpt b/ext/mbstring/tests/mb_check_encoding.phpt
new file mode 100644 (file)
index 0000000..dded51a
--- /dev/null
@@ -0,0 +1,24 @@
+--TEST--
+mb_check_encoding()
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+ini_set('default_charset', 'UTF-8');
+// Valid
+$str = "Japanese UTF-8 text. 日本語のUTF-8テキスト";
+$arr = [1234, 12.34, TRUE, FALSE, NULL, $str, 'key'=>$str, $str=>'val'];
+var_dump(mb_check_encoding($str), mb_check_encoding($arr));
+
+// Invalid
+$str = "Japanese UTF-8 text. 日本語\xFE\x01\x02のUTF-8テキスト";
+$arr1 = [1234, 12.34, TRUE, FALSE, NULL, 'key'=>$str, $str=>'val'];
+$arr2 = [1234, 12.34, TRUE, FALSE, NULL, $str=>'val'];
+var_dump(mb_check_encoding($str), mb_check_encoding($arr1),  mb_check_encoding($arr2));
+?>
+--EXPECT--
+bool(true)
+bool(true)
+bool(false)
+bool(false)
+bool(false)