]> granicus.if.org Git - php/commitdiff
Added conversion support from script character encoding to internal character encodin...
authorRui Hirokawa <hirokawa@php.net>
Wed, 8 May 2002 12:33:44 +0000 (12:33 +0000)
committerRui Hirokawa <hirokawa@php.net>
Wed, 8 May 2002 12:33:44 +0000 (12:33 +0000)
ext/mbstring/mbfilter.c
ext/mbstring/mbfilter.h
ext/mbstring/mbstring.c
ext/mbstring/mbstring.h
main/main.c
sapi/apache/mod_php4.c

index 3e065fa4538654e267a2fec657f1cbc3f96488d1..4e335f12fb47da1b99a34f93bfc60c07e292d666 100644 (file)
@@ -685,12 +685,12 @@ static mbfl_encoding mbfl_encoding_2022jp = {
 
 
 #if defined(HAVE_MBSTR_CN)
-static const char *mbfl_encoding_euc_cn_aliases[] = {"EUC_CN", "eucCN", "x-euc-cn", NULL};
+static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", NULL};
 
 static mbfl_encoding mbfl_encoding_euc_cn = {
        mbfl_no_encoding_euc_cn,
        "EUC-CN",
-       "EUC-CN",
+       "CN-GB",
        (const char *(*)[])&mbfl_encoding_euc_cn_aliases,
        mblen_table_euccn,
        MBFL_ENCTYPE_MBCS
@@ -721,12 +721,12 @@ static mbfl_encoding mbfl_encoding_euc_tw = {
        MBFL_ENCTYPE_MBCS
 };
 
-static const char *mbfl_encoding_big5_aliases[] = {"big5", "CP950", NULL};
+static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
 
 static mbfl_encoding mbfl_encoding_big5 = {
        mbfl_no_encoding_big5,
        "BIG-5",
-       "BIG-5",
+       "CN-BIG5",
        (const char *(*)[])&mbfl_encoding_big5_aliases,
        mblen_table_big5,
        MBFL_ENCTYPE_MBCS
@@ -6995,7 +6995,53 @@ mbfl_strlen(mbfl_string *string TSRMLS_DC)
        return len;
 }
 
+#ifdef ZEND_MULTIBYTE
+/*
+ *     oddlen
+ */
+int
+mbfl_oddlen(mbfl_string *string)
+{
+       int len, n, m, k;
+       unsigned char *p;
+       const unsigned char *mbtab;
+       mbfl_encoding *encoding;
+
+       encoding = mbfl_no2encoding(string->no_encoding);
+       if (encoding == NULL || string == NULL) {
+               return -1;
+       }
 
+       len = 0;
+       if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+               return 0;
+       } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+               return len % 2;
+       } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
+               return len % 4;
+       } else if (encoding->mblen_table != NULL) {
+               mbtab = encoding->mblen_table;
+               n = 0;
+               p = string->val;
+               k = string->len;
+               /* count */
+               if (p != NULL) {
+                       while (n < k) {
+                               m = mbtab[*p];
+                               n += m;
+                               p += m;
+                       };
+               }
+               return n-k;
+       } else {
+               /* how can i do ? */
+               return 0;
+       }
+       /* NOT REACHED */
+}
+#endif /* ZEND_MULTIBYTE */
 /*
  *  strpos
  */
index 65ee94b5732a43c50a6c3362552c231d2d0918e8..a5077bd57877488583faaa24c8d405fb457c52e6 100644 (file)
@@ -461,6 +461,14 @@ mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int
 int
 mbfl_strlen(mbfl_string *string TSRMLS_DC);
 
+#ifdef ZEND_MULTIBYTE
+/*
+ * oddlen
+ */
+int
+mbfl_oddlen(mbfl_string *string);
+#endif /* ZEND_MULTIBYTE */
+
 /*
  * strpos
  */
index 40d16522d2dfa63b9e4713e935aba6035b770633..5b40ff3f890df3cd834edb1777a554c7b3b15844 100644 (file)
 #include "php_content_types.h"
 #include "SAPI.h"
 
+#ifdef ZEND_MULTIBYTE
+#include "zend_multibyte.h"
+#endif /* ZEND_MULTIBYTE */
+
 #if HAVE_MBSTRING
 
 #if HAVE_MBREGEX
@@ -524,6 +528,25 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
        return SUCCESS;
 }
 
+#ifdef ZEND_MULTIBYTE
+static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
+{
+       int *list, size;
+
+       if (php_mbstring_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+               if (MBSTRG(script_encoding_list) != NULL) {
+                       free(MBSTRG(script_encoding_list));
+               }
+               MBSTRG(script_encoding_list) = list;
+               MBSTRG(script_encoding_list_size) = size;
+       } else {
+               return FAILURE;
+       }
+
+       return SUCCESS;
+}
+#endif /* ZEND_MULTIBYTE */
+
 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
 {
        if (new_value != NULL) {
@@ -546,6 +569,9 @@ PHP_INI_BEGIN()
         PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
         PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
         PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
+#ifdef ZEND_MULTIBYTE
+        PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
+#endif /* ZEND_MULTIBYTE */
         PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
         STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateInt, func_overload, zend_mbstring_globals, mbstring_globals)
 PHP_INI_END()
@@ -579,6 +605,10 @@ php_mbstring_init_globals(zend_mbstring_globals *pglobals TSRMLS_DC)
        MBSTRG(internal_encoding) = mbfl_no_encoding_euc_jp;
        MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_jp;
 #endif
+#ifdef ZEND_MULTIBYTE
+       MBSTRG(script_encoding_list) = NULL;
+       MBSTRG(script_encoding_list_size) = 0;
+#endif /* ZEND_MULTIBYTE */
        MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
        MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
        MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
@@ -640,6 +670,11 @@ PHP_MSHUTDOWN_FUNCTION(mbstring)
        if (MBSTRG(http_input_list)) {
                free(MBSTRG(http_input_list));
        }
+#ifdef ZEND_MULTIBYTE
+       if (MBSTRG(script_encoding_list)) {
+               free(MBSTRG(script_encoding_list));
+       }
+#endif /* ZEND_MULTIBYTE */
        if (MBSTRG(detect_order_list)) {
                free(MBSTRG(detect_order_list));
        }
@@ -858,6 +893,9 @@ PHP_FUNCTION(mb_internal_encoding)
                        RETURN_FALSE;
                } else {
                        MBSTRG(current_internal_encoding) = no_encoding;
+#ifdef ZEND_MULTIBYTE
+                       zend_multibyte_set_internal_encoding(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1) TSRMLS_CC);
+#endif /* ZEND_MULTIBYTE */
                        RETURN_TRUE;
                }
        } else {
@@ -3174,6 +3212,175 @@ PHP_FUNCTION(mb_get_info)
 }
 /* }}} */
 
+
+#ifdef ZEND_MULTIBYTE
+PHPAPI int php_mbstring_set_zend_encoding(TSRMLS_D)
+{
+       /* 'd better use mbfl_memory_device? */
+       char *name, *list = NULL;
+       int n, *entry, list_size = 0;
+       zend_encoding_detector encoding_detector;
+       zend_encoding_converter encoding_converter;
+       zend_multibyte_oddlen multibyte_oddlen;
+
+       /* notify script encoding to Zend Engine */
+       entry = MBSTRG(script_encoding_list);
+       n = MBSTRG(script_encoding_list_size);
+       while (n > 0) {
+               name = (char *)mbfl_no_encoding2name(*entry);
+               if (name) {
+                       list_size += strlen(name) + 1;
+                       if (!list)
+                       {
+                               list = (char*)emalloc(list_size);
+                               if (!list)
+                                       return -1;
+                               *list = (char)NULL;
+                       }
+                       else
+                       {
+                               list = (char*)erealloc(list, list_size);
+                               if (!list)
+                                       return -1;
+                               strcat(list, ",");
+                       }
+                       strcat(list, name);
+               }
+               entry++;
+               n--;
+       }
+       zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
+       if (list)
+               efree(list);
+
+       encoding_detector = php_mbstring_encoding_detector;
+       encoding_converter = NULL;
+       multibyte_oddlen = php_mbstring_oddlen;
+
+#if defined(MBSTR_ENC_TRANS)
+       /* notify internal encoding to Zend Engine */
+       name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+       zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
+
+       encoding_converter = php_mbstring_encoding_converter;
+#endif /* defined(MBSTR_ENC_TRANS) */
+
+       zend_multibyte_set_functions(encoding_detector, encoding_converter,
+                       multibyte_oddlen TSRMLS_CC);
+
+       return 0;
+}
+
+/*
+ *     mb_detect_encoding (interface for Zend Engine)
+ */
+char* php_mbstring_encoding_detector(char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
+{
+       mbfl_string string;
+       const char *ret;
+       enum mbfl_no_encoding *elist;
+       int size, *list;
+
+       /* make encoding list */
+       list = NULL;
+       size = 0;
+       php_mbstring_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
+       if (size <= 0)
+               return NULL;
+
+       if (size > 0 && list != NULL) {
+               elist = list;
+       } else {
+               elist = MBSTRG(current_detect_order_list);
+               size = MBSTRG(current_detect_order_list_size);
+       }
+
+       mbfl_string_init(&string);
+       string.no_language = MBSTRG(current_language);
+       string.val = arg_string;
+       string.len = arg_length;
+       ret = mbfl_identify_encoding_name(&string, elist, size);
+       if (list != NULL) {
+               efree((void *)list);
+       }
+       if (ret != NULL) {
+               return estrdup(ret);
+       } else {
+               return NULL;
+       }
+}
+
+
+/*
+ *     mb_convert_encoding (interface for Zend Engine)
+ */
+int php_mbstring_encoding_converter(char **to, int *to_length, char *from,
+               int from_length, const char *encoding_to, const char *encoding_from 
+               TSRMLS_DC)
+{
+       mbfl_string string, result, *ret;
+       enum mbfl_no_encoding from_encoding, to_encoding;
+       mbfl_buffer_converter *convd;
+
+       /* new encoding */
+       to_encoding = mbfl_name2no_encoding(encoding_to);
+       if (to_encoding == mbfl_no_encoding_invalid)
+               return -1;
+       
+       /* old encoding */
+       from_encoding = mbfl_name2no_encoding(encoding_from);
+       if (from_encoding == mbfl_no_encoding_invalid)
+               return -1;
+
+       /* initialize string */
+       mbfl_string_init(&string);
+       mbfl_string_init(&result);
+       string.no_encoding = from_encoding;
+       string.no_language = MBSTRG(current_language);
+       string.val = from;
+       string.len = from_length;
+
+       /* initialize converter */
+       convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
+       if (convd == NULL)
+               return -1;
+       mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+       mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+
+       /* do it */
+       ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+       if (ret != NULL) {
+               *to = ret->val;
+               *to_length = ret->len;
+       }
+       mbfl_buffer_converter_delete(convd);
+
+       return ret ? 0 : -1;
+}
+
+
+/*
+ *     returns number of odd (e.g. appears only first byte of multibyte
+ *     character) chars
+ */
+int php_mbstring_oddlen(char *string, int length, const char *encoding TSRMLS_DC)
+{
+       mbfl_string mb_string;
+
+       mbfl_string_init(&mb_string);
+       mb_string.no_language = MBSTRG(current_language);
+       mb_string.no_encoding = mbfl_name2no_encoding(encoding);
+       mb_string.val = string;
+       mb_string.len = length;
+
+       if(mb_string.no_encoding == mbfl_no_encoding_invalid)
+               return 0;
+
+       return mbfl_oddlen(&mb_string);
+}
+
+#endif /* ZEND_MULTIBYTE */
+
 #endif /* HAVE_MBSTRING */
 
 /*
index 22ff290c5db629b4f0e16d2e07a23c8282360f43..98623e77b49878c33366d78d55857db783f0d39b 100644 (file)
@@ -129,6 +129,10 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring)
        int current_language;
        int internal_encoding;
        int current_internal_encoding;
+#ifdef ZEND_MULTIBYTE
+       int *script_encoding_list;
+       int script_encoding_list_size;
+#endif /* ZEND_MULTIBYTE */
        int http_output_encoding;
        int current_http_output_encoding;
        int http_input_identify;
@@ -177,6 +181,16 @@ struct mb_overload_def {
 #define MBSTRG(v) (mbstring_globals.v)
 #endif
 
+#ifdef ZEND_MULTIBYTE
+PHPAPI int php_mbstring_set_zend_encoding(TSRMLS_D);
+char* php_mbstring_encoding_detector(char *string, int length, char *list
+               TSRMLS_DC);
+int php_mbstring_encoding_converter(char **to, int *to_length, char *from,
+               int from_length, const char *encoding_to, const char *encoding_from
+               TSRMLS_DC);
+int php_mbstring_oddlen(char *string, int length, const char *encoding TSRMLS_DC);
+#endif /* ZEND_MULTIBYTE */
+
 #else  /* HAVE_MBSTRING */
 
 #define mbstring_module_ptr NULL
index fcd73d82392d526f92fdd29519aff0a9ef621701..16b47b6ceae43f94274b1e778b89b40e2ee596d7 100644 (file)
 #include "php_logos.h"
 #include "php_streams.h"
 
+#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
+#include "ext/mbstring/mbstring.h"
+#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
+
 #include "SAPI.h"
 /* }}} */
 
@@ -1402,6 +1406,9 @@ PHPAPI int php_execute_script(zend_file_handle *primary_file TSRMLS_DC)
                } else {
                        append_file_p = NULL;
                }
+#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
+               php_mbstring_set_zend_encoding(TSRMLS_C);
+#endif /* ZEND_MULTIBYTE && HAVE_MBSTRING */
                retval = (zend_execute_scripts(ZEND_REQUIRE TSRMLS_CC, NULL, 3, prepend_file_p, primary_file, append_file_p) == SUCCESS);
        } zend_end_try();
 
index d3ed4ef0bdb322002826703c6818d6a649734931..9c23523178af97d3623bd3c00ad6beb93203bafc 100644 (file)
 
 #include "php_apache_http.h"
 
+#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
+#include "ext/mbstring/mbstring.h"
+#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
+
 #undef shutdown
 
 /* {{{ Prototypes
@@ -459,6 +463,11 @@ static int send_php(request_rec *r, int display_source_mode, char *filename)
                fh.opened_path = NULL;
                fh.free_filename = 0;
                fh.type = ZEND_HANDLE_FILENAME;
+
+#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
+               php_mbstring_set_zend_encoding(TSRMLS_C);
+#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
+
                zend_execute_scripts(ZEND_INCLUDE TSRMLS_CC, NULL, 1, &fh);
                return OK;
        }