]> granicus.if.org Git - php/commitdiff
- Fixed CHARSET_UNICODE_COMPAT (ISO-8859-1 is compatible in the relevant sense).
authorGustavo André dos Santos Lopes <cataphract@php.net>
Tue, 25 Jan 2011 10:57:07 +0000 (10:57 +0000)
committerGustavo André dos Santos Lopes <cataphract@php.net>
Tue, 25 Jan 2011 10:57:07 +0000 (10:57 +0000)
- Fixed usage of zend_multibyte_get_internal_encoding (its return cannot be
  cast to char*).
- Change tests to reflect that charset detection now relies on
  internal_encoding, not on current_internal_encoding.
  NOTE: This fixes the changes in rev 306077, but it remains that that change
  introduced a BC break. I assumed it was intentional

UPGRADING
ext/standard/html.c
ext/standard/html_tables.h
ext/standard/html_tables/html_table_gen.php
ext/standard/tests/strings/htmlentities05.phpt
ext/standard/tests/strings/htmlentities06.phpt
ext/standard/tests/strings/htmlentities07.phpt
ext/standard/tests/strings/htmlentities08.phpt
ext/standard/tests/strings/htmlentities09.phpt
ext/standard/tests/strings/htmlentities16.phpt

index 9c5d18affba39a7698b8ced7cf6ebb96d856e464..53a7c8a489845da55d20f20d33d8d75d21dca121 100755 (executable)
--- a/UPGRADING
+++ b/UPGRADING
@@ -148,6 +148,9 @@ UPGRADE NOTES - PHP X.Y
   behavior follows the recommendations of Unicode Technical Report #36.
 - htmlspecialchars_decode/html_entity_decode now decode &apos; if the document
   type is ENT_XML1, ENT_XHTML, or ENT_HTML5.
+- Charset detection with $charset == '' no longer turns to mbstring's
+  internal encoding defined through mb_internal_encoding(). Only the encoding
+  defined through the ini setting mbstring.internal_encoding is considered.
 - number_format() no longer truncates multibyte decimal points and thousand
   separators to the first byte.
 - The third parameter ($matches) to preg_match_all() is now optional. If
index 510d2f565e56edb2e42e096128a29122717d8c6a..1d989f818e7f11802014c5a73b453ed215809f0b 100644 (file)
@@ -367,6 +367,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
        int i;
        enum entity_charset charset = cs_utf_8;
        int len = 0;
+       const zend_encoding *zenc;
 
        /* Default is now UTF-8 */
        if (charset_hint == NULL)
@@ -376,9 +377,20 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
                goto det_charset;
        }
 
-       charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
-       if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
-               goto det_charset;
+       zenc = zend_multibyte_get_internal_encoding(TSRMLS_C);
+       if (zenc != NULL) {
+               charset_hint = zend_multibyte_get_encoding_name(zenc);
+               if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
+                       if ((len == 4) /* sizeof (none|auto|pass) */ &&
+                                       (!memcmp("pass", charset_hint, 4) ||
+                                        !memcmp("auto", charset_hint, 4) ||
+                                        !memcmp("auto", charset_hint, 4))) {
+                               charset_hint = NULL;
+                               len = 0;
+                       } else {
+                               goto det_charset;
+                       }
+               }
        }
 
        charset_hint = SG(default_charset);
index c6a75bef8d777ba85439bb375467ea80e2801081..278b6db428b19434004fd2bc1578cc04c30b1845 100644 (file)
@@ -33,7 +33,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
                                          cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
                                          cs_numelems /* used to count the number of charsets */
                                        };
-#define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_utf_8)
+#define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_8859_1)
 #define CHARSET_SINGLE_BYTE(cs)                ((cs) > cs_utf_8 && (cs) < cs_big5)
 #define CHARSET_PARTIAL_SUPPORT(cs)    ((cs) >= cs_big5)
 
index dcd21b370b9bf97a75f067da74690c03ad79b388..05997f03754b314c57307852691e23ae3afcc086 100644 (file)
@@ -56,7 +56,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
                                          cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
                                          cs_numelems /* used to count the number of charsets */
                                        };
-#define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_utf_8)
+#define CHARSET_UNICODE_COMPAT(cs)     ((cs) <= cs_8859_1)
 #define CHARSET_SINGLE_BYTE(cs)                ((cs) > cs_utf_8 && (cs) < cs_big5)
 #define CHARSET_PARTIAL_SUPPORT(cs)    ((cs) >= cs_big5)
 
index 779cf289b0b1d02b6f1e6227a3cec170a4405500..7f8adb6b99ef6f2d48a0e0b4dcc604abdfc414fa 100644 (file)
@@ -2,19 +2,12 @@
 htmlentities() test 5 (mbstring / cp1252)
 --INI--
 output_handler=
+mbstring.internal_encoding=cp1252
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       mb_internal_encoding('cp1252');
-       $php_errormsg = NULL;
-       @htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, '');
-       if ($php_errormsg) {
-               die("skip cp1252 chracter set is not supported on this platform.\n");
-       }
-?>
 --FILE--
 <?php
-       mb_internal_encoding('cp1252');
        print mb_internal_encoding()."\n";
        var_dump(htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, ''));
        var_dump(htmlentities("\x80\xa2\xa3\xa4\xa5", ENT_QUOTES, ''));
index 44d1466da9b680be8f76a46a1e4c8e9a77cdd2a6..4cc68d233f518a9c18a8fb141ddbe40319c71849 100644 (file)
@@ -2,15 +2,10 @@
 htmlentities() test 6 (mbstring / ISO-8859-15)
 --INI--
 output_handler=
+mbstring.internal_encoding=ISO-8859-15
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       @mb_internal_encoding('ISO-8859-15');
-       @htmlentities("\xbc\xbd\xbe", ENT_QUOTES, '');
-       if (@$php_errormsg) {
-               die("skip ISO-8859-15 chracter set is not supported on this platform.\n");
-       }
-?>
 --FILE--
 <?php
        mb_internal_encoding('ISO-8859-15');
index efd06f08ad0f020832dd701ccb09a29448297451..144734c12e6cc1ea6fcf350e80796e2c147f7eea 100644 (file)
@@ -2,16 +2,10 @@
 htmlentities() test 7 (mbstring / ISO-8859-1)
 --INI--
 output_handler=
+mbstring.internal_encoding=ISO-8859-1
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       mb_internal_encoding('ISO-8859-1');
-       $php_errormsg = NULL;
-       @htmlentities("\xe4\xf6\xfc", ENT_QUOTES, '');
-       if ($php_errormsg) {
-               die("skip ISO-8859-1 chracter set is not supported on this platform.\n");
-       }
-?>
 --FILE--
 <?php
        mb_internal_encoding('ISO-8859-1');
index 0f8f912f272f97226329497187ab92138e023fce..1f6dc90fcb5a4517b0cb8b09d745b52b06c06f82 100644 (file)
@@ -2,16 +2,11 @@
 htmlentities() test 8 (mbstring / EUC-JP)
 --INI--
 output_handler=
+error_reporting=~E_STRICT
+mbstring.internal_encoding=EUC-JP
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       mb_internal_encoding('EUC-JP');
-       $php_errormsg = NULL;
-       @htmlentities("\xa1\xa2\xa1\xa3\xa1\xa4", ENT_QUOTES, '');
-       if ($php_errormsg) {
-               die("skip EUC-JP chracter set is not supported on this platform.\n");   
-       }
-?>
 --FILE--
 <?php
        mb_internal_encoding('EUC-JP');
index 9127a71d06ea84efdd088cf6a3c32ae378b7b2b3..c80a77d40f39ddae07ff274f2722a3e9006e09fd 100644 (file)
@@ -2,16 +2,11 @@
 htmlentities() test 9 (mbstring / Shift_JIS)
 --INI--
 output_handler=
+error_reporting=~E_STRICT
+mbstring.internal_encoding=Shift_JIS
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       mb_internal_encoding('Shift_JIS');
-       $php_errormsg = NULL;
-       @htmlentities("\x81\x41\x81\x42\x81\x43", ENT_QUOTES, '');
-       if ($php_errormsg) {
-               die("skip Shift_JIS chracter set is not supported on this platform.\n");
-       }
-?>
 --FILE--
 <?php
        mb_internal_encoding('Shift_JIS');
index c49584e90fdddacc255d5adb3597acc1cab8ca1f..ed54670520361a6b18422957755206bd48887ea6 100644 (file)
@@ -2,17 +2,13 @@
 htmlentities() test 16 (mbstring / cp1251)
 --INI--
 output_handler=
+mbstring.internal_encoding=cp1251
 --SKIPIF--
 <?php
        extension_loaded("mbstring") or die("skip mbstring not available\n");
-       if (!@mb_internal_encoding('cp1251') ||
-               @htmlentities("\x88\xa9\xd2\xcf\xd3\xcb\xcf\xdb\xce\xd9\xca", ENT_QUOTES, '') == '') {
-               die("skip cp1251 character set is not available in this build.\n");
-       }
 ?>
 --FILE--
 <?php
-mb_internal_encoding('cp1251');
 $str = "\x88\xa9\xf0\xee\xf1\xea\xee\xf8\xed\xfb\xe9";
 var_dump(bin2hex($str), bin2hex(htmlentities($str, ENT_QUOTES, '')));
 var_dump(htmlentities($str, ENT_QUOTES | ENT_HTML5, ''));