From: Yasuo Ohgaki Date: Thu, 20 Mar 2014 01:49:23 +0000 (+0900) Subject: Add default_charset handling X-Git-Tag: PRE_PHPNG_MERGE~457^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e1fe76f28ae4d93306c53477968311924f83bb79;p=php Add default_charset handling --- diff --git a/ext/standard/html.c b/ext/standard/html.c index 075a4d2ff9..ad2f38f2d4 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -84,6 +84,19 @@ #define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD) #define sjis_trail(c) ((c) >= 0x40 && (c) != 0x7F && (c) < 0xFD) +/* {{{ get_charset + */ +static void get_charset(char **charset, int *charset_len TSRMLS_DC) { + if (PG(internal_encoding) && PG(internal_encoding)[0]) { + *charset_len = strlen(PG(internal_encoding)); + *charset = estrndup(PG(internal_encoding), *charset_len); + } else if (SG(default_charset) && SG(default_charset)[0] ) { + *charset_len = strlen(SG(default_charset)); + *charset = estrndup(SG(default_charset), *charset_len); + } +} +/* }}} */ + /* {{{ get_next_char */ static inline unsigned int get_next_char( @@ -1432,8 +1445,8 @@ encode_amp: */ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) { - char *str, *hint_charset = PHP_DEFAULT_CHARSET; - int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1; + char *str, *hint_charset = ""; + int str_len, hint_charset_len = 0; size_t new_len; long flags = ENT_COMPAT; char *replaced; @@ -1443,7 +1456,14 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) return; } - replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC); + if (hint_charset_len) { + replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC); + } else { + get_charset(&hint_charset, &hint_charset_len TSRMLS_CC); + replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC); + efree(hint_charset); + } + RETVAL_STRINGL(replaced, (int)new_len, 0); } /* }}} */ @@ -1504,8 +1524,8 @@ PHP_FUNCTION(htmlspecialchars_decode) Convert all HTML entities to their applicable characters */ PHP_FUNCTION(html_entity_decode) { - char *str, *hint_charset = PHP_DEFAULT_CHARSET; - int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1; + char *str, *hint_charset = ""; + int str_len, hint_charset_len = 0; size_t new_len = 0; long quote_style = ENT_COMPAT; char *replaced; @@ -1515,7 +1535,14 @@ PHP_FUNCTION(html_entity_decode) return; } - replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC); + if (hint_charset_len) { + replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC); + } else { + get_charset(&hint_charset, &hint_charset_len TSRMLS_CC); + replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC); + efree(hint_charset); + } + if (replaced) { RETURN_STRINGL(replaced, (int)new_len, 0); } diff --git a/ext/standard/tests/strings/default_charset.phpt b/ext/standard/tests/strings/default_charset.phpt new file mode 100644 index 0000000000..6ac545bf13 --- /dev/null +++ b/ext/standard/tests/strings/default_charset.phpt @@ -0,0 +1,23 @@ +--TEST-- +default_charset and htmlentities/htmlspecialchars/html_entity_decode +--INI-- +default_charset=UTF-8 +internal_encoding= +--FILE-- + +--EXPECT-- +string(6) "cp1252" +string(0) "" +string(7) "£" +string(7) "£" +string(2) "a3" +string(2) "a3"