From: Rui Hirokawa Date: Sat, 28 Jun 2003 23:37:18 +0000 (+0000) Subject: Fixed corruption of multibyte character including 0x5c as second X-Git-Tag: BEFORE_ARG_INFO~567 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2b44c63d5084c134c7c80b5f411b25f741d7354d;p=php Fixed corruption of multibyte character including 0x5c as second byte in multipart/form-data. --- diff --git a/NEWS b/NEWS index 6769db0851..fdd58044c4 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,9 @@ PHP NEWS ? ? ??? 2003, PHP 5 Beta 1 +- Fixed corruption of multibyte character including 0x5c as second + byte in multipart/form-data. (Rui) + - Switch to using Zend Engine 2, which includes numerous engine level improvements. A full list is available at http://www.php.net/zend-engine-2.php. diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index bd1ed508c5..23fa8ad664 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3222,6 +3222,124 @@ MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC) } /* }}} */ +/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */ +MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) +{ + + if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){ + return php_mb_mbchar_bytes_ex(s, + mbfl_no2encoding(MBSTRG(http_input_identify))); + } else { + return php_mb_mbchar_bytes_ex(s, + mbfl_no2encoding(MBSTRG(internal_encoding))); + } +} +/* }}} */ + +/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */ +MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, const char *encoding_to, const char *encoding_from + TSRMLS_DC) +{ + mbfl_string string, result, *ret; + enum mbfl_no_encoding from_encoding, to_encoding; + mbfl_buffer_converter *convd; + + if (encoding_to) { + /* new encoding */ + to_encoding = mbfl_name2no_encoding(encoding_to); + if (to_encoding == mbfl_no_encoding_invalid) { + return -1; + } + } else { + to_encoding = MBSTRG(current_internal_encoding); + } + if (encoding_from) { + /* old encoding */ + from_encoding = mbfl_name2no_encoding(encoding_from); + if (from_encoding == mbfl_no_encoding_invalid) { + return -1; + } + } else { + from_encoding = MBSTRG(http_input_identify); + } + + /* initialize string */ + mbfl_string_init(&string); + mbfl_string_init(&result); + string.no_encoding = from_encoding; + string.no_language = MBSTRG(current_language); + string.val = (char*)(*str); + string.len = *len; + + /* initialize converter */ + convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len TSRMLS_CC); + if (convd == NULL) { + return -1; + } + mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode) TSRMLS_CC); + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar) TSRMLS_CC); + + /* do it */ + ret = mbfl_buffer_converter_feed_result(convd, &string, &result TSRMLS_CC); + if (ret != NULL) { + efree(*str); + *str = ret->val; + *len = ret->len; + } + mbfl_buffer_converter_delete(convd TSRMLS_CC); + + return ret ? 0 : -1; +} + +/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector() + */ +MBSTRING_API int php_mb_gpc_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC) +{ + mbfl_string string; + enum mbfl_no_encoding *elist; + enum mbfl_no_encoding encoding; + + int size, *list; + + if (arg_list && strlen(arg_list)>0) { + /* make encoding list */ + list = NULL; + size = 0; + php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0); + + if (size > 0 && list != NULL) { + elist = list; + } else { + elist = MBSTRG(current_detect_order_list); + size = MBSTRG(current_detect_order_list_size); + if (size <= 0){ + elist = (int*)php_mb_default_identify_list; + size = php_mb_default_identify_list_size; + } + } + } else { + elist = MBSTRG(current_detect_order_list); + size = MBSTRG(current_detect_order_list_size); + if (size <= 0){ + elist = (int*)php_mb_default_identify_list; + size = php_mb_default_identify_list_size; + } + } + + mbfl_string_init(&string); + string.no_language = MBSTRG(current_language); + string.val = (char*)arg_string; + string.len = arg_length; + encoding = mbfl_identify_encoding_no(&string, elist, size TSRMLS_CC); + if (encoding != mbfl_no_encoding_invalid) { + MBSTRG(http_input_identify) = encoding; + return SUCCESS; + } else { + return FAILURE; + } +} +/* }}} */ + #ifdef ZEND_MULTIBYTE /* {{{ MBSTRING_API int php_mb_set_zend_encoding() */ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D) diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index fdecde7a9f..8e4bd2b911 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -136,6 +136,13 @@ MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc); MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC); +MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC); + +MBSTRING_API int php_mb_encoding_detector_ex(const char *arg_string, int arg_length, + char *arg_list TSRMLS_DC); + +MBSTRING_API int php_mb_encoding_converter_ex(char **str, int *len, const char *encoding_to, + const char *encoding_from TSRMLS_DC); ZEND_BEGIN_MODULE_GLOBALS(mbstring) enum mbfl_no_language language; diff --git a/main/rfc1867.c b/main/rfc1867.c index 9ef0710493..b8b4f533ac 100644 --- a/main/rfc1867.c +++ b/main/rfc1867.c @@ -525,7 +525,7 @@ static char *substring_conf(char *start, int len, char quote TSRMLS_DC) } else { #if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) if (php_mb_encoding_translation(TSRMLS_C)) { - size_t j = php_mb_mbchar_bytes(start+i TSRMLS_CC); + size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC); while (j-- > 0 && i < len) { *resp++ = start[i++]; } @@ -548,6 +548,12 @@ static char *php_ap_getword_conf(char **line TSRMLS_DC) { char *str = *line, *strend, *res, quote; +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) + if (php_mb_encoding_translation(TSRMLS_C)) { + php_mb_gpc_encoding_detector(str, strlen(str), NULL TSRMLS_CC); + } +#endif + while (*str && isspace(*str)) { ++str; } @@ -690,7 +696,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) { char *boundary, *s=NULL, *boundary_end = NULL, *start_arr=NULL, *array_index=NULL; char *temp_filename=NULL, *lbuf=NULL, *abuf=NULL; - int boundary_len=0, total_bytes=0, cancel_upload=0, is_arr_upload=0, array_len=0, max_file_size=0, skip_upload=0, anonindex=0, is_anonymous; + int boundary_len=0, total_bytes=0, cancel_upload=0, is_arr_upload=0, array_len=0, max_file_size=0, skip_upload=0, str_len=0, anonindex=0, is_anonymous; zval *http_post_files=NULL; zend_bool magic_quotes_gpc; multipart_buffer *mbuff; @@ -806,6 +812,14 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) } sapi_module.input_filter(PARSE_POST, param, &value, strlen(value) TSRMLS_CC); +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) + if (php_mb_encoding_translation(TSRMLS_C)) { + if (php_mb_gpc_encoding_detector(value, strlen(value), NULL TSRMLS_CC) == SUCCESS) { + str_len = strlen(value); + php_mb_gpc_encoding_converter(&value , &str_len, NULL, NULL TSRMLS_CC); + } + } +#endif safe_php_register_variable(param, value, array_ptr, 0 TSRMLS_CC); if (!strcasecmp(param, "MAX_FILE_SIZE")) { max_file_size = atol(value); @@ -931,6 +945,10 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) #if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) if (php_mb_encoding_translation(TSRMLS_C)) { + if(php_mb_gpc_encoding_detector(filename, strlen(filename), NULL TSRMLS_CC) == SUCCESS) { + str_len = strlen(filename); + php_mb_gpc_encoding_converter(&filename, &str_len, NULL, NULL TSRMLS_CC); + } s = php_mb_strrchr(filename, '\\' TSRMLS_CC); } else { s = strrchr(filename, '\\');