]> granicus.if.org Git - php/commitdiff
Fix some incompatibilities with the pre-libmbfl behaviour regarding encoding detection.
authorMoriyoshi Koizumi <moriyoshi@php.net>
Fri, 26 Sep 2003 14:42:14 +0000 (14:42 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Fri, 26 Sep 2003 14:42:14 +0000 (14:42 +0000)
ext/mbstring/mbstring.c

index 618ec90e0e926cf9096c44da40cf712e8a5ee1bf..a7ae46c41b2394eab3acd873d0ac9cc2e8eb4e7e 100644 (file)
@@ -84,62 +84,65 @@ static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC);
 static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
 /* }}} */
 
-/* {{{ php_mb_default_identify_list[] */
-#if defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+/* {{{ php_mb_default_identify_list */
+typedef struct _php_mb_nls_ident_list {
+       enum mbfl_no_language lang;
+       enum mbfl_no_encoding* list;
+       int list_size;
+} php_mb_nls_ident_list;
+
+static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_jis,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_jp,
        mbfl_no_encoding_sjis
 };
-#endif
 
-#if defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_cn,
        mbfl_no_encoding_cp936
 };
-#endif
 
-#if defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_tw,
        mbfl_no_encoding_big5
 };
-#endif
 
-#if defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_kr,
        mbfl_no_encoding_uhc
 };
-#endif
 
-#if defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_koi8r,
        mbfl_no_encoding_cp1251,
        mbfl_no_encoding_cp866
 };
-#endif
 
-#if !defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8
 };
-#endif
 
-static const int php_mb_default_identify_list_size = sizeof(php_mb_default_identify_list)/sizeof(enum mbfl_no_encoding);
+
+php_mb_nls_ident_list php_mb_default_identify_list[] = {
+       { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
+       { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
+       { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
+       { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
+       { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
+       { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
+};
+
 /* }}} */
 
 static
@@ -285,11 +288,12 @@ static mbfl_allocators _php_mb_allocators = {
  *  of parsed encodings.
  */
 static int
-php_mb_parse_encoding_list(const char *value, int value_length, int **return_list, int *return_size, int persistent)
+php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
 {
-       int n, l, size, bauto, *src, *list, *entry, ret = 1;
+       int n, l, size, bauto, ret = 1;
        char *p, *p1, *p2, *endp, *tmpstr;
        enum mbfl_no_encoding no_encoding;
+       enum mbfl_no_encoding *src, *entry, *list;
 
        list = NULL;
        if (value == NULL || value_length <= 0) {
@@ -301,6 +305,12 @@ php_mb_parse_encoding_list(const char *value, int value_length, int **return_lis
                }
                return 0;
        } else {
+               enum mbfl_no_encoding *identify_list;
+               int identify_list_size;
+
+               identify_list = MBSTRG(default_detect_order_list);
+               identify_list_size = MBSTRG(default_detect_order_list_size);
+
                /* copy the value string for work */
                if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
                        tmpstr = (char *)estrndup(value+1, value_length-2);
@@ -319,7 +329,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, int **return_lis
                        p1 = p2 + 1;
                        n++;
                }
-               size = n + php_mb_default_identify_list_size;
+               size = n + identify_list_size;
                /* make list */
                list = (int *)pecalloc(size, sizeof(int), persistent);
                if (list != NULL) {
@@ -343,23 +353,25 @@ php_mb_parse_encoding_list(const char *value, int value_length, int **return_lis
                                        p--;
                                }
                                /* convert to the encoding number and check encoding */
-                               no_encoding = mbfl_name2no_encoding(p1);
-                               if (no_encoding == mbfl_no_encoding_auto) {
+                               if (strcasecmp(p1, "auto") == 0) {
                                        if (!bauto) {
                                                bauto = 1;
-                                               l = php_mb_default_identify_list_size;
-                                               src = (int*)php_mb_default_identify_list;
+                                               l = identify_list_size;
+                                               src = identify_list;
                                                while (l > 0) {
                                                        *entry++ = *src++;
                                                        l--;
                                                        n++;
                                                }
                                        }
-                               } else if (no_encoding != mbfl_no_encoding_invalid) {
-                                       *entry++ = no_encoding;
-                                       n++;
                                } else {
-                                       ret = 0;
+                                       no_encoding = mbfl_name2no_encoding(p1);
+                                       if (no_encoding != mbfl_no_encoding_invalid) {
+                                               *entry++ = no_encoding;
+                                               n++;
+                                       } else {
+                                               ret = 0;
+                                       }
                                }
                                p1 = p2 + 1;
                        } while (n < size && p2 != NULL);
@@ -397,7 +409,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, int **return_lis
 
 /* {{{ MBSTRING_API php_mb_check_encoding_list */
 MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
-       return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0); 
+       return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);       
 }
 /* }}} */
 
@@ -407,19 +419,26 @@ MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC)
  *  of parsed encodings.
  */
 static int
-php_mb_parse_encoding_array(zval *array, int **return_list, int *return_size, int persistent)
+php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
 {
        zval **hash_entry;
        HashTable *target_hash;
-       int i, n, l, size, bauto, *list, *entry, *src, ret = 1;
+       int i, n, l, size, bauto,ret = 1;
        enum mbfl_no_encoding no_encoding;
+       enum mbfl_no_encoding *src, *list, *entry;
 
        list = NULL;
        if (Z_TYPE_P(array) == IS_ARRAY) {
+               enum mbfl_no_encoding *identify_list;
+               int identify_list_size;
+
+               identify_list = MBSTRG(default_detect_order_list);
+               identify_list_size = MBSTRG(default_detect_order_list_size);
+
                target_hash = Z_ARRVAL_P(array);
                zend_hash_internal_pointer_reset(target_hash);
                i = zend_hash_num_elements(target_hash);
-               size = i + php_mb_default_identify_list_size;
+               size = i + identify_list_size;
                list = (int *)pecalloc(size, sizeof(int), persistent);
                if (list != NULL) {
                        entry = list;
@@ -430,23 +449,25 @@ php_mb_parse_encoding_array(zval *array, int **return_list, int *return_size, in
                                        break;
                                }
                                convert_to_string_ex(hash_entry);
-                               no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
-                               if (no_encoding == mbfl_no_encoding_auto) {
+                               if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
                                        if (!bauto) {
                                                bauto = 1;
-                                               l = php_mb_default_identify_list_size;
-                                               src = (int*)php_mb_default_identify_list;
+                                               l = identify_list_size; 
+                                               src = identify_list;
                                                while (l > 0) {
                                                        *entry++ = *src++;
                                                        l--;
                                                        n++;
                                                }
                                        }
-                               } else if (no_encoding != mbfl_no_encoding_invalid) {
-                                       *entry++ = no_encoding;
-                                       n++;
                                } else {
-                                       ret = 0;;
+                                       no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
+                                       if (no_encoding != mbfl_no_encoding_invalid) {
+                                               *entry++ = no_encoding;
+                                               n++;
+                                       } else {
+                                               ret = 0;
+                                       }
                                }
                                zend_hash_move_forward(target_hash);
                                i--;
@@ -482,6 +503,25 @@ php_mb_parse_encoding_array(zval *array, int **return_list, int *return_size, in
 }
 /* }}} */
 
+/* {{{ php_mb_nls_get_default_detect_order_list */
+static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
+{
+       size_t i;
+
+       *plist = php_mb_default_identify_list_neut;
+       *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
+
+       for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
+               if (php_mb_default_identify_list[i].lang == lang) {
+                       *plist = php_mb_default_identify_list[i].list;
+                       *plist_size = php_mb_default_identify_list[i].list_size;
+                       return 1;
+               }
+       }
+       return 0;
+}
+/* }}} */
+
 /* {{{ php.ini directive handler */
 static PHP_INI_MH(OnUpdate_mbstring_language)
 {
@@ -492,6 +532,7 @@ static PHP_INI_MH(OnUpdate_mbstring_language)
                return FAILURE;
        }
        MBSTRG(language) = no_language;
+       php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
        return SUCCESS;
 }
 /* }}} */
@@ -501,7 +542,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
                if (MBSTRG(detect_order_list) != NULL) {
                        free(MBSTRG(detect_order_list));
                }
@@ -520,7 +561,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
                if (MBSTRG(http_input_list) != NULL) {
                        free(MBSTRG(http_input_list));
                }
@@ -594,7 +635,7 @@ static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
                if (MBSTRG(script_encoding_list) != NULL) {
                        free(MBSTRG(script_encoding_list));
                }
@@ -690,6 +731,8 @@ static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
        MBSTRG(detect_order_list_size) = 0;
        MBSTRG(current_detect_order_list) = NULL;
        MBSTRG(current_detect_order_list_size) = 0;
+       MBSTRG(default_detect_order_list) = php_mb_default_identify_list_neut;
+       MBSTRG(default_detect_order_list_size) = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
        MBSTRG(filter_illegal_substchar) = 0x3f;        /* '?' */
        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
@@ -838,8 +881,8 @@ PHP_RINIT_FUNCTION(mbstring)
                n = MBSTRG(detect_order_list_size);
        }
        if (n <= 0) {
-               list = (int*)php_mb_default_identify_list;
-               n = php_mb_default_identify_list_size;
+               list = MBSTRG(default_detect_order_list);
+               n = MBSTRG(default_detect_order_list_size);
        }
        entry = (int *)safe_emalloc(n, sizeof(int), 0);
        MBSTRG(current_detect_order_list) = entry;
@@ -990,6 +1033,7 @@ PHP_FUNCTION(mb_language)
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
                        RETURN_FALSE;
                } else {
+                       php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
                        MBSTRG(current_language) = no_language;
                        RETURN_TRUE;
                }
@@ -1185,7 +1229,7 @@ PHP_FUNCTION(mb_detect_order)
                size = 0;
                switch (Z_TYPE_PP(arg1)) {
                case IS_ARRAY:
-                       if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                }
@@ -1194,7 +1238,7 @@ PHP_FUNCTION(mb_detect_order)
                        break;
                default:
                        convert_to_string_ex(arg1);
-                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                }
@@ -1929,7 +1973,7 @@ MBSTRING_API char * php_mb_convert_encoding(char *input, size_t length, char *_t
        if (_from_encodings) {
                list = NULL;
                size = 0;
-           php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
+           php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
                if (size == 1) {
                        from_encoding = *list;
                        string.no_encoding = from_encoding;
@@ -2151,7 +2195,7 @@ PHP_FUNCTION(mb_detect_encoding)
        if (ZEND_NUM_ARGS() >= 2 &&  Z_STRVAL_PP(arg_list)) {
                switch (Z_TYPE_PP(arg_list)) {
                case IS_ARRAY:
-                       if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                        size = 0;
@@ -2160,7 +2204,7 @@ PHP_FUNCTION(mb_detect_encoding)
                        break;
                default:
                        convert_to_string_ex(arg_list);
-                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                        size = 0;
@@ -2432,11 +2476,11 @@ PHP_FUNCTION(mb_convert_variables)
        elistsz = 0;
        switch (Z_TYPE_PP(args[1])) {
        case IS_ARRAY:
-               php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0);
+               php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
                break;
        default:
                convert_to_string_ex(args[1]);
-               php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), &elist, &elistsz, 0);
+               php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), &elist, &elistsz, 0 TSRMLS_CC);
                break;
        }
        if (elistsz <= 0) {
@@ -3366,7 +3410,7 @@ MBSTRING_API int php_mb_gpc_encoding_detector(const char *arg_string, int arg_le
                /* make encoding list */
                list = NULL;
                size = 0;
-               php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
+               php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
                
                if (size > 0 && list != NULL) {
                        elist = list;
@@ -3374,16 +3418,16 @@ MBSTRING_API int php_mb_gpc_encoding_detector(const char *arg_string, int arg_le
                        elist = MBSTRG(current_detect_order_list);
                        size = MBSTRG(current_detect_order_list_size);
                        if (size <= 0){
-                               elist = (int*)php_mb_default_identify_list;
-                               size = php_mb_default_identify_list_size;
+                               elist = MBSTRG(default_detect_order_list);
+                               size = MBSTRG(default_detect_order_list_size);
                        }
                }
        } else {
                elist = MBSTRG(current_detect_order_list);
                size = MBSTRG(current_detect_order_list_size);
                if (size <= 0){
-                       elist = (int*)php_mb_default_identify_list;
-                       size = php_mb_default_identify_list_size;
+                       elist = MBSTRG(default_detect_order_list);
+                       size = MBSTRG(default_detect_order_list_size);
                }
        }
 
@@ -3465,7 +3509,7 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
        /* make encoding list */
        list = NULL;
        size = 0;
-       php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
+       php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
        if (size <= 0) {
                return NULL;
        }