]> granicus.if.org Git - php/commitdiff
mb_check_encoding()/mb_convert_encoding() - Improve and add recursion detection.
authorYasuo Ohgaki <yohgaki@php.net>
Sat, 15 Oct 2016 04:16:45 +0000 (13:16 +0900)
committerYasuo Ohgaki <yohgaki@php.net>
Sat, 15 Oct 2016 07:52:17 +0000 (16:52 +0900)
ext/mbstring/mbstring.c
ext/mbstring/tests/mb_check_encoding_array.phpt [new file with mode: 0644]
ext/mbstring/tests/mb_convert_encoding_array2.phpt [new file with mode: 0644]

index 2a7bba036848933049496fce58426113ab31ee14..20950e28b8aab2cc14766775b6ca0473425f499a 100644 (file)
@@ -3301,6 +3301,11 @@ MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, cons
                return NULL;
        }
 
+       if (input->u.v.nApplyCount++ > 1) {
+               input->u.v.nApplyCount--;
+               php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
+               return NULL;
+       }
        output = (HashTable *)emalloc(sizeof(HashTable));
        zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
        ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
@@ -3326,12 +3331,21 @@ MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, cons
                                break;
                        case IS_ARRAY:
                                chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
-                               array_init(&entry_tmp);
-                               Z_ARRVAL(entry_tmp) = chash;
+                               if (chash) {
+                                       Z_ARRVAL(entry_tmp) = chash;
+                                       Z_TYPE_INFO(entry_tmp) = IS_ARRAY_EX;
+                               } else {
+                                       HashTable *tmp;
+                                       tmp = (HashTable *)emalloc(sizeof(HashTable));
+                                       zend_hash_init(tmp, 0, NULL, ZVAL_PTR_DTOR, 0);
+                                       Z_ARRVAL(entry_tmp) = tmp;
+                               }
                                break;
                        case IS_OBJECT:
                        default:
-                               zval_dtor(&entry_tmp);
+                               if (key) {
+                                       efree(key_tmp);
+                               }
                                php_error_docref(NULL, E_WARNING, "Object is not supported");
                                continue;
                }
@@ -3341,6 +3355,7 @@ MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, cons
                        zend_hash_index_add(output, idx, &entry_tmp);
                }
        } ZEND_HASH_FOREACH_END();
+       input->u.v.nApplyCount--;
 
        return output;
 }
@@ -4946,6 +4961,7 @@ MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_str
        zend_long idx;
        zend_string *key;
        zval *entry;
+       int valid = 1;
 
        (void)(idx);
 
@@ -4963,30 +4979,31 @@ MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_str
                return 0;
        }
 
+       if (vars->u.v.nApplyCount++ > 1) {
+               vars->u.v.nApplyCount--;
+               mbfl_buffer_converter_delete(convd);
+               php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
+               return 0;
+       }
        ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
                ZVAL_DEREF(entry);
                if (key) {
                        if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
-                               mbfl_buffer_converter_delete(convd);
-                               return 0;
+                               valid = 0;
+                               break;
                        }
                }
                switch (Z_TYPE_P(entry)) {
                        case IS_STRING:
                                if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
-                                       mbfl_buffer_converter_delete(convd);
-                                       return 0;
+                                       valid = 0;
+                                       break;
                                }
                                break;
                        case IS_ARRAY:
-                               if (ZEND_HASH_APPLY_PROTECTION(vars) && vars->u.v.nApplyCount++ > 0) {
-                                       vars->u.v.nApplyCount--;
-                                       php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
-                                       return 0;
-                               }
                                if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
-                                       mbfl_buffer_converter_delete(convd);
-                                       return 0;
+                                       valid = 0;
+                                       break;
                                }
                                break;
                        case IS_LONG:
@@ -4997,16 +5014,13 @@ MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_str
                                break;
                        default:
                                /* Other types are error. */
-                               mbfl_buffer_converter_delete(convd);
-                               return 0;
+                               valid = 0;
+                               break;
                }
        } ZEND_HASH_FOREACH_END();
-       if (ZEND_HASH_APPLY_PROTECTION(vars)) {
-               vars->u.v.nApplyCount--;
-       }
-
+       vars->u.v.nApplyCount--;
        mbfl_buffer_converter_delete(convd);
-       return 1;
+       return valid;
 }
 
 
diff --git a/ext/mbstring/tests/mb_check_encoding_array.phpt b/ext/mbstring/tests/mb_check_encoding_array.phpt
new file mode 100644 (file)
index 0000000..91ee93a
--- /dev/null
@@ -0,0 +1,31 @@
+--TEST--
+mb_check_encoding() - Circular references
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+ini_set('default_charset', 'UTF-8');
+// Valid - Detects recursion
+$str = "Japanese UTF-8 text. 日本語のUTF-8テキスト";
+$arr = [1234, 12.34, TRUE, FALSE, NULL, $str, 'key'=>$str, $str=>'val'];
+$tmp = &$arr;
+$arr[] = $tmp;
+var_dump(mb_check_encoding($str), mb_check_encoding($arr));
+
+// Invalid - Return false due to short circuit check
+$str = "Japanese UTF-8 text. 日本語\xFE\x01\x02のUTF-8テキスト";
+$arr1 = [1234, 12.34, TRUE, FALSE, NULL, 'key'=>$str, $str=>'val'];
+$tmp = &$arr1;
+$arr1[] = $tmp;
+$arr2 = [1234, 12.34, TRUE, FALSE, NULL, $str=>'val'];
+$tmp = &$arr2;
+$arr2[] = $tmp;
+var_dump(mb_check_encoding($str), mb_check_encoding($arr1),  mb_check_encoding($arr2));
+?>
+--EXPECTF--
+Warning: mb_check_encoding(): Cannot not handle circular references in %s on line %d
+bool(true)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
diff --git a/ext/mbstring/tests/mb_convert_encoding_array2.phpt b/ext/mbstring/tests/mb_convert_encoding_array2.phpt
new file mode 100644 (file)
index 0000000..3947c79
--- /dev/null
@@ -0,0 +1,223 @@
+--TEST--
+Test mb_convert_encoding() function : Circular references
+--SKIPIF--
+<?php
+extension_loaded('mbstring') or die('skip');
+function_exists('mb_convert_encoding') or die("skip mb_convert_encoding() is not available in this build");
+?>
+--FILE--
+<?php
+/* Prototype  : string mb_convert_encoding(string $str, string $to_encoding [, mixed $from_encoding])
+ * Description: Returns converted string in desired encoding
+ * Source code: ext/mbstring/mbstring.c
+ */
+
+/*
+ * Test basic functionality of mb_convert_encoding()
+ */
+
+echo "*** Testing mb_convert_encoding() : Circular references ***\n";
+
+//All strings are the same when displayed in their respective encodings
+$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
+$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
+$tmp = &$sjis_string;
+$sjis_string[] = $tmp;
+
+$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
+$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
+$tmp = &$jis_string;
+$jis_string[] = $tmp;
+
+$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
+$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
+$tmp = &$euc_jp_string;
+$euc_jp_string[] = $tmp;
+
+$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
+$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
+$tmp = &$utf8_string;
+$utf8_string[] = $tmp;
+
+
+function base64_encode_array($input) {
+       foreach ($input as $var) {
+               if (is_scalar($var))
+                       $ret[] = base64_encode($var);
+       }
+       return $ret;
+}
+
+echo "\n-- Convert to JIS --\n";
+echo "JIS encoded string in base64:\n";
+var_dump(base64_encode_array($jis_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'JIS', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'JIS', 'EUC-JP')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'JIS', 'UTF-8')));
+
+echo "\n-- Convert to EUC-JP --\n";
+echo "EUC-JP encoded string in base64:\n";
+var_dump(base64_encode_array($euc_jp_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'EUC-JP', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'EUC-JP', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'EUC-JP', 'UTF-8')));
+
+echo "\n-- Convert to SJIS --\n";
+echo "SJIS encoded string in base64:\n";
+var_dump(base64_encode_array($sjis_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'SJIS', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'SJIS', 'EUC-JP')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'SJIS', 'UTF-8')));
+
+echo "\n-- Convert to UTF-8 --\n";
+echo "UTF-8 encoded string in base64:\n";
+var_dump(base64_encode_array($utf8_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'UTF-8', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'UTF-8', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'UTF-8', 'EUC-JP')));
+
+echo "Done";
+?>
+--EXPECT--
+*** Testing mb_convert_encoding() : Circular references ***
+
+-- Convert to JIS --
+JIS encoded string in base64:
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+Converted Strings:
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 47
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 48
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 49
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+
+-- Convert to EUC-JP --
+EUC-JP encoded string in base64:
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+Converted Strings:
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 55
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 56
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 57
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+
+-- Convert to SJIS --
+SJIS encoded string in base64:
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+Converted Strings:
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 63
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 64
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 65
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+
+-- Convert to UTF-8 --
+UTF-8 encoded string in base64:
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+Converted Strings:
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 71
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 72
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+
+Warning: mb_convert_encoding(): Cannot convert recursively referenced values in /home/yohgaki/workspace/ext/git/oss/php.net/PHP-master/ext/mbstring/tests/mb_convert_encoding_array2.php on line 73
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+Done