]> granicus.if.org Git - php/commitdiff
Add default_charset handling
authorYasuo Ohgaki <yohgaki@php.net>
Thu, 20 Mar 2014 01:49:23 +0000 (10:49 +0900)
committerYasuo Ohgaki <yohgaki@php.net>
Thu, 20 Mar 2014 02:03:02 +0000 (11:03 +0900)
ext/standard/html.c
ext/standard/tests/strings/default_charset.phpt [new file with mode: 0644]

index 075a4d2ff99d223d7b41d2f0a756486ba4242617..ad2f38f2d4a047e553502163a08506bcf019fec7 100644 (file)
 #define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD)
 #define sjis_trail(c) ((c) >= 0x40  && (c) != 0x7F && (c) < 0xFD)
 
+/* {{{ get_charset
+ */
+static void get_charset(char **charset, int *charset_len TSRMLS_DC) {
+       if (PG(internal_encoding) && PG(internal_encoding)[0]) {
+               *charset_len = strlen(PG(internal_encoding));
+               *charset = estrndup(PG(internal_encoding), *charset_len);
+       } else if (SG(default_charset) && SG(default_charset)[0] ) {
+               *charset_len = strlen(SG(default_charset));
+               *charset = estrndup(SG(default_charset), *charset_len);
+       }
+}
+/* }}} */
+
 /* {{{ get_next_char
  */
 static inline unsigned int get_next_char(
@@ -1432,8 +1445,8 @@ encode_amp:
  */
 static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
 {
-       char *str, *hint_charset = PHP_DEFAULT_CHARSET;
-       int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
+       char *str, *hint_charset = "";
+       int str_len, hint_charset_len = 0;
        size_t new_len;
        long flags = ENT_COMPAT;
        char *replaced;
@@ -1443,7 +1456,14 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
                return;
        }
 
-       replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+       if (hint_charset_len) {
+               replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+       } else {
+               get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
+               replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC);
+               efree(hint_charset);
+       }
+
        RETVAL_STRINGL(replaced, (int)new_len, 0);
 }
 /* }}} */
@@ -1504,8 +1524,8 @@ PHP_FUNCTION(htmlspecialchars_decode)
    Convert all HTML entities to their applicable characters */
 PHP_FUNCTION(html_entity_decode)
 {
-       char *str, *hint_charset = PHP_DEFAULT_CHARSET;
-       int str_len, hint_charset_len = sizeof(PHP_DEFAULT_CHARSET)-1;
+       char *str, *hint_charset = "";
+       int str_len, hint_charset_len = 0;
        size_t new_len = 0;
        long quote_style = ENT_COMPAT;
        char *replaced;
@@ -1515,7 +1535,14 @@ PHP_FUNCTION(html_entity_decode)
                return;
        }
 
-       replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+       if (hint_charset_len) {
+               replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+       } else {
+               get_charset(&hint_charset, &hint_charset_len TSRMLS_CC);
+               replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC);
+               efree(hint_charset);
+       }
+
        if (replaced) {
                RETURN_STRINGL(replaced, (int)new_len, 0);
        }
diff --git a/ext/standard/tests/strings/default_charset.phpt b/ext/standard/tests/strings/default_charset.phpt
new file mode 100644 (file)
index 0000000..6ac545b
--- /dev/null
@@ -0,0 +1,23 @@
+--TEST--
+default_charset and htmlentities/htmlspecialchars/html_entity_decode
+--INI--
+default_charset=UTF-8
+internal_encoding=
+--FILE--
+<?php
+ini_set('default_charset', 'cp1252');
+var_dump(ini_get('default_charset'), ini_get('internal_encoding'));
+
+var_dump(htmlentities("\xA3", ENT_HTML5));
+var_dump(htmlentities("\xA3", ENT_HTML5, 'cp1252'));
+
+var_dump(bin2hex(html_entity_decode("&pound;", ENT_HTML5)));
+var_dump(bin2hex(html_entity_decode("&pound;", ENT_HTML5, 'cp1252')));
+?>
+--EXPECT--
+string(6) "cp1252"
+string(0) ""
+string(7) "&pound;"
+string(7) "&pound;"
+string(2) "a3"
+string(2) "a3"