]> granicus.if.org Git - php/commitdiff
Added array parameter support to mb_convert_encoding()
authorYasuo Ohgaki <yohgaki@php.net>
Tue, 6 Sep 2016 09:20:24 +0000 (18:20 +0900)
committerYasuo Ohgaki <yohgaki@php.net>
Tue, 6 Sep 2016 09:20:24 +0000 (18:20 +0900)
NEWS
UPGRADING
ext/mbstring/mbstring.c
ext/mbstring/tests/mb_convert_encoding_array.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index a91137ea8d469bd0a59acb7837ccd5c498d2bf06..2f3609298dbfcd41f334b3c8f039a5930cefe4d6 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -24,10 +24,12 @@ PHP                                                                        NEWS
   . Fixed bug #70896 (gmp_fact() silently ignores non-integer input). (Sara)
 
 - Mbstring:
-  . Implemented request #66024 (mb_chr() and mb_ord()) (Masakielastic, Yasuo)
-  . Implemented request #65081 (mb_scrub()) (Masakielastic, Yasuo)
+  . Implemented request #66024 (mb_chr() and mb_ord()). (Masakielastic, Yasuo)
+  . Implemented request #65081 (mb_scrub()). (Masakielastic, Yasuo)
   . Implemented request #69086 (enhancement for mb_convert_encoding() that
-    handles multibyte replacement char nicely) (Masakielastic, Yasuo)
+    handles multibyte replacement char nicely). (Masakielastic, Yasuo)
+  . Added array input support to mb_convert_encoding(). (Yasuo)
+  . Added array input support to mb_check_encoding(). (Yasuo)
 
 <<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>
 
index 73564a78b027f2a1277fe1cd9c90ca1ac3b80dbc..d61280a94f26c4849bad0f2365b9f2086778d662 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -75,6 +75,8 @@ PHP 7.2 UPGRADE NOTES
 - Mbstring
   . mb_check_encoding() accepts array parameter. Both key and value
     ecodings are checked recursively.
+  . mb_convert_encoding() accepts array parameter. Only value encodings
+    are converted recursively.
 
 ========================================
 10. New Global Constants
index 4f26d5e79f2396dce0b1237e20a622dd11dd342f..a9d9f0b4bdcaece5d5f8ecfdc5a6b41b4b4b2b8b 100644 (file)
@@ -3181,7 +3181,7 @@ static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
 
 
 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
-MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
+MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
 {
        mbfl_string string, result, *ret;
        const mbfl_encoding *from_encoding, *to_encoding;
@@ -3288,12 +3288,71 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
 }
 /* }}} */
 
+MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
+{
+       HashTable *output, *chash;
+       zend_long idx;
+       zend_string *key, *key_tmp;
+       zval *entry, entry_tmp;
+       size_t ckey_len, cval_len;
+       char *ckey, *cval;
+
+       if (!input) {
+               return NULL;
+       }
+
+       output = (HashTable *)emalloc(sizeof(HashTable));
+       zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
+       ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
+               /* convert key */
+               if (key) {
+                       ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
+                       key_tmp = zend_string_init(ckey, ckey_len, 0);
+               }
+               /* convert value */
+               ZEND_ASSERT(entry);
+               switch(Z_TYPE_P(entry)) {
+                       case IS_STRING:
+                               cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
+                               ZVAL_STRINGL(&entry_tmp, cval, cval_len);
+                               break;
+                       case IS_NULL:
+                       case IS_TRUE:
+                       case IS_FALSE:
+                       case IS_LONG:
+                       case IS_DOUBLE:
+                               ZVAL_COPY(&entry_tmp, entry);
+                               break;
+                       case IS_ARRAY:
+                               chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
+                               array_init(&entry_tmp);
+                               Z_ARRVAL(entry_tmp) = chash;
+                               break;
+                       case IS_OBJECT:
+                       default:
+                               zval_dtor(&entry_tmp);
+                               php_error_docref(NULL, E_WARNING, "Object is not supported");
+                               continue;
+               }
+               if (key) {
+                       zend_hash_add(output, key_tmp, &entry_tmp);
+               } else {
+                       zend_hash_index_add(output, idx, &entry_tmp);
+               }
+       } ZEND_HASH_FOREACH_END();
+
+       return output;
+}
+/* }}} */
+
+
 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
    Returns converted string in desired encoding */
 PHP_FUNCTION(mb_convert_encoding)
 {
-       char *arg_str, *arg_new;
-       size_t str_len, new_len;
+       zval *input;
+       char *arg_new;
+       size_t new_len;
        zval *arg_old = NULL;
        size_t size, l, n;
        char *_from_encodings = NULL, *ret, *s_free = NULL;
@@ -3301,10 +3360,14 @@ PHP_FUNCTION(mb_convert_encoding)
        zval *hash_entry;
        HashTable *target_hash;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
                return;
        }
 
+       if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
+               convert_to_string(input);
+       }
+
        if (arg_old) {
                switch (Z_TYPE_P(arg_old)) {
                        case IS_ARRAY:
@@ -3339,19 +3402,26 @@ PHP_FUNCTION(mb_convert_encoding)
                        }
        }
 
-       /* new encoding */
-       ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
-       if (ret != NULL) {
-               // TODO: avoid reallocation ???
-               RETVAL_STRINGL(ret, size);              /* the string is already strdup()'ed */
-               efree(ret);
+       if (Z_TYPE_P(input) == IS_STRING) {
+               /* new encoding */
+               ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
+               if (ret != NULL) {
+                       // TODO: avoid reallocation ???
+                       RETVAL_STRINGL(ret, size);              /* the string is already strdup()'ed */
+                       efree(ret);
+               } else {
+                       RETVAL_FALSE;
+               }
+               if (s_free) {
+                       efree(s_free);
+               }
        } else {
-               RETVAL_FALSE;
+               HashTable *tmp;
+               tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
+               RETURN_ARR(tmp);
        }
 
-       if ( s_free) {
-               efree(s_free);
-       }
+       return;
 }
 /* }}} */
 
diff --git a/ext/mbstring/tests/mb_convert_encoding_array.phpt b/ext/mbstring/tests/mb_convert_encoding_array.phpt
new file mode 100644 (file)
index 0000000..5bb78df
--- /dev/null
@@ -0,0 +1,187 @@
+--TEST--
+Test mb_convert_encoding() function : array functionality
+--SKIPIF--
+<?php
+extension_loaded('mbstring') or die('skip');
+function_exists('mb_convert_encoding') or die("skip mb_convert_encoding() is not available in this build");
+?>
+--FILE--
+<?php
+/* Prototype  : string mb_convert_encoding(string $str, string $to_encoding [, mixed $from_encoding])
+ * Description: Returns converted string in desired encoding 
+ * Source code: ext/mbstring/mbstring.c
+ */
+
+/*
+ * Test basic functionality of mb_convert_encoding()
+ */
+
+echo "*** Testing mb_convert_encoding() : array functionality ***\n";
+
+//All strings are the same when displayed in their respective encodings
+$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
+$sjis_string[] = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
+$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
+$jis_string[] = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
+$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
+$euc_jp_string[] = base64_decode('xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow==');
+$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
+$utf8_string[] = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
+
+
+function base64_encode_array($input) {
+       foreach ($input as $var) {
+               $ret[] = base64_encode($var);
+       }
+       return $ret;
+}
+
+echo "\n-- Convert to JIS --\n";
+echo "JIS encoded string in base64:\n";
+var_dump(base64_encode_array($jis_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'JIS', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'JIS', 'EUC-JP')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'JIS', 'UTF-8')));
+
+echo "\n-- Convert to EUC-JP --\n";
+echo "EUC-JP encoded string in base64:\n";
+var_dump(base64_encode_array($euc_jp_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'EUC-JP', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'EUC-JP', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'EUC-JP', 'UTF-8')));
+
+echo "\n-- Convert to SJIS --\n";
+echo "SJIS encoded string in base64:\n";
+var_dump(base64_encode_array($sjis_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'SJIS', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'SJIS', 'EUC-JP')));
+var_dump(base64_encode_array(mb_convert_encoding($utf8_string, 'SJIS', 'UTF-8')));
+
+echo "\n-- Convert to UTF-8 --\n";
+echo "UTF-8 encoded string in base64:\n";
+var_dump(base64_encode_array($utf8_string));
+echo "Converted Strings:\n";
+var_dump(base64_encode_array(mb_convert_encoding($sjis_string, 'UTF-8', 'SJIS')));
+var_dump(base64_encode_array(mb_convert_encoding($jis_string, 'UTF-8', 'JIS')));
+var_dump(base64_encode_array(mb_convert_encoding($euc_jp_string, 'UTF-8', 'EUC-JP')));
+
+echo "Done";
+?>
+--EXPECTF--
+*** Testing mb_convert_encoding() : array functionality ***
+
+-- Convert to JIS --
+JIS encoded string in base64:
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+Converted Strings:
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+array(2) {
+  [0]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+  [1]=>
+  string(68) "GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg=="
+}
+
+-- Convert to EUC-JP --
+EUC-JP encoded string in base64:
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+Converted Strings:
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+array(2) {
+  [0]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+  [1]=>
+  string(52) "xvzL3LjspcalraW5pcikx6S5oaMwMTIzNKO1o7ajt6O4o7mhow=="
+}
+
+-- Convert to SJIS --
+SJIS encoded string in base64:
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+Converted Strings:
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+array(2) {
+  [0]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+  [1]=>
+  string(52) "k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg=="
+}
+
+-- Convert to UTF-8 --
+UTF-8 encoded string in base64:
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+Converted Strings:
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+array(2) {
+  [0]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+  [1]=>
+  string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII="
+}
+Done