]> granicus.if.org Git - php/commitdiff
Improved determine_charset() to use mbstring.internal_encoding when the last
authorMoriyoshi Koizumi <moriyoshi@php.net>
Tue, 22 Oct 2002 20:11:48 +0000 (20:11 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Tue, 22 Oct 2002 20:11:48 +0000 (20:11 +0000)
param is a null string. (If the param is omitted, it takes iso-8859-1 as
the default charset in favour of backwards compatibility.)

ext/standard/html.c

index a939084ba1eff87b49c79b390c76875db6ac4f0a..3d329f3e3a8952fc14e147b1174f81803b07ec92 100644 (file)
 #include <langinfo.h>
 #endif
 
+#if HAVE_MBSTRING
+# include "ext/mbstring/mbstring.h"
+ZEND_EXTERN_MODULE_GLOBALS(mbstring)
+#endif
+
 enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
                                          cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
                                          cs_big5hkscs, cs_sjis, cs_eucjp};
@@ -525,6 +530,36 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
                return cs_8859_1;
 
        if (strlen(charset_hint) == 0)  {
+#if HAVE_MBSTRING
+       /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
+               switch (MBSTRG(internal_encoding)) {
+                       case mbfl_no_encoding_utf8:
+                               return cs_utf_8;
+
+                       case mbfl_no_encoding_euc_jp:
+                       case mbfl_no_encoding_eucjp_win:
+                               return cs_eucjp;
+
+                       case mbfl_no_encoding_sjis:
+                       case mbfl_no_encoding_sjis_win:
+                       case mbfl_no_encoding_sjis_mac:
+                               return cs_sjis;
+
+                       case mbfl_no_encoding_cp1252:
+                               return cs_cp1252;
+
+                       case mbfl_no_encoding_8859_15:
+                               return cs_8859_15;
+
+                       case mbfl_no_encoding_big5:
+                               return cs_big5;
+
+                       case mbfl_no_encoding_euc_cn:
+                       case mbfl_no_encoding_hz:
+                       case mbfl_no_encoding_cp936:
+                               return cs_gb2312;
+               }
+#endif
                /* try to detect the charset for the locale */
 #if HAVE_NL_LANGINFO && HAVE_LOCALE_H && defined(CODESET)
                charset_hint = nl_langinfo(CODESET);