#define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD)
#define sjis_trail(c) ((c) >= 0x40 && (c) != 0x7F && (c) < 0xFD)
+/* {{{ get_charset
+ */
+static void get_charset(char **charset, int *charset_len TSRMLS_DC) {
+ if (PG(internal_encoding) && PG(internal_encoding)[0]) {
+ *charset_len = strlen(PG(internal_encoding));
+ *charset = estrndup(PG(internal_encoding), *charset_len);
+ } else if (SG(default_charset) && SG(default_charset)[0] ) {
+ *charset_len = strlen(SG(default_charset));
+ *charset = estrndup(SG(default_charset), *charset_len);
+ }
+}
+/* }}} */
+
/* {{{ get_next_char
*/
static inline unsigned int get_next_char(
*/
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
- char *str, *hint_charset = PHP_DEFAULT_CHARSET;
- int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
+ char *str, *hint_charset = "";
+ int str_len, hint_charset_len = 0;
size_t new_len;
long flags = ENT_COMPAT;
char *replaced;
return;
}
- replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+ if (hint_charset_len) {
+ replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+ } else {
+ get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
+ replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+ efree(hint_charset);
+ }
+
RETVAL_STRINGL(replaced, (int)new_len, 0);
}
/* }}} */
Convert all HTML entities to their applicable characters */
PHP_FUNCTION(html_entity_decode)
{
- char *str, *hint_charset = PHP_DEFAULT_CHARSET;
- int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
+ char *str, *hint_charset = "";
+ int str_len, hint_charset_len = 0;
size_t new_len = 0;
long quote_style = ENT_COMPAT;
char *replaced;
return;
}
- replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+ if (hint_charset_len) {
+ replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+ } else {
+ get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
+ replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+ efree(hint_charset);
+ }
+
if (replaced) {
RETURN_STRINGL(replaced, (int)new_len, 0);
}
--- /dev/null
+--TEST--
+default_charset and htmlentities/htmlspecialchars/html_entity_decode
+--INI--
+default_charset=UTF-8
+internal_encoding=
+--FILE--
+<?php
+ini_set('default_charset', 'cp1252');
+var_dump(ini_get('default_charset'), ini_get('internal_encoding'));
+
+var_dump(htmlentities("\xA3", ENT_HTML5));
+var_dump(htmlentities("\xA3", ENT_HTML5, 'cp1252'));
+
+var_dump(bin2hex(html_entity_decode("£", ENT_HTML5)));
+var_dump(bin2hex(html_entity_decode("£", ENT_HTML5, 'cp1252')));
+?>
+--EXPECT--
+string(6) "cp1252"
+string(0) ""
+string(7) "£"
+string(7) "£"
+string(2) "a3"
+string(2) "a3"