From: Gustavo André dos Santos Lopes Date: Tue, 25 Jan 2011 10:57:07 +0000 (+0000) Subject: - Fixed CHARSET_UNICODE_COMPAT (ISO-8859-1 is compatible in the relevant sense). X-Git-Tag: php-5.4.0alpha1~191^2~316 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4a946a91e5d0e431126e38bf53efce75d7b66324;p=php - Fixed CHARSET_UNICODE_COMPAT (ISO-8859-1 is compatible in the relevant sense). - Fixed usage of zend_multibyte_get_internal_encoding (its return cannot be cast to char*). - Change tests to reflect that charset detection now relies on internal_encoding, not on current_internal_encoding. NOTE: This fixes the changes in rev 306077, but it remains that that change introduced a BC break. I assumed it was intentional --- diff --git a/UPGRADING b/UPGRADING index 9c5d18affb..53a7c8a489 100755 --- a/UPGRADING +++ b/UPGRADING @@ -148,6 +148,9 @@ UPGRADE NOTES - PHP X.Y behavior follows the recommendations of Unicode Technical Report #36. - htmlspecialchars_decode/html_entity_decode now decode ' if the document type is ENT_XML1, ENT_XHTML, or ENT_HTML5. +- Charset detection with $charset == '' no longer turns to mbstring's + internal encoding defined through mb_internal_encoding(). Only the encoding + defined through the ini setting mbstring.internal_encoding is considered. - number_format() no longer truncates multibyte decimal points and thousand separators to the first byte. - The third parameter ($matches) to preg_match_all() is now optional. If diff --git a/ext/standard/html.c b/ext/standard/html.c index 510d2f565e..1d989f818e 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -367,6 +367,7 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC) int i; enum entity_charset charset = cs_utf_8; int len = 0; + const zend_encoding *zenc; /* Default is now UTF-8 */ if (charset_hint == NULL) @@ -376,9 +377,20 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC) goto det_charset; } - charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C); - if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { - goto det_charset; + zenc = zend_multibyte_get_internal_encoding(TSRMLS_C); + if (zenc != NULL) { + charset_hint = zend_multibyte_get_encoding_name(zenc); + if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { + if ((len == 4) /* sizeof (none|auto|pass) */ && + (!memcmp("pass", charset_hint, 4) || + !memcmp("auto", charset_hint, 4) || + !memcmp("auto", charset_hint, 4))) { + charset_hint = NULL; + len = 0; + } else { + goto det_charset; + } + } } charset_hint = SG(default_charset); diff --git a/ext/standard/html_tables.h b/ext/standard/html_tables.h index c6a75bef8d..278b6db428 100644 --- a/ext/standard/html_tables.h +++ b/ext/standard/html_tables.h @@ -33,7 +33,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; -#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) +#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1) #define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5) #define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5) diff --git a/ext/standard/html_tables/html_table_gen.php b/ext/standard/html_tables/html_table_gen.php index dcd21b370b..05997f0375 100644 --- a/ext/standard/html_tables/html_table_gen.php +++ b/ext/standard/html_tables/html_table_gen.php @@ -56,7 +56,7 @@ enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; -#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) +#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1) #define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5) #define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5) diff --git a/ext/standard/tests/strings/htmlentities05.phpt b/ext/standard/tests/strings/htmlentities05.phpt index 779cf289b0..7f8adb6b99 100644 --- a/ext/standard/tests/strings/htmlentities05.phpt +++ b/ext/standard/tests/strings/htmlentities05.phpt @@ -2,19 +2,12 @@ htmlentities() test 5 (mbstring / cp1252) --INI-- output_handler= +mbstring.internal_encoding=cp1252 --SKIPIF-- --FILE-- --FILE-- --FILE-- --FILE-- --FILE-- --FILE--