From: Andrei Zmievski Date: Sun, 26 Mar 2006 01:48:33 +0000 (+0000) Subject: * Remove unicode.from_error_mode and unicode.from_subst_char from INI X-Git-Tag: RELEASE_1_3~260 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=930bde5897dba3857ae8d3f90df0b5e307c1af97;p=php * Remove unicode.from_error_mode and unicode.from_subst_char from INI settings. * Add unicode_set_error_mode() and unicode_set_subst_char() functions to manipulate these global settings. --- diff --git a/Zend/zend.c b/Zend/zend.c index 0ae309e086..a16729f4aa 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -110,8 +110,8 @@ static ZEND_INI_MH(OnUpdateEncoding) *converter = NULL; } if (*converter) { - zend_set_converter_error_mode(*converter, UG(from_u_error_mode)); - zend_set_converter_subst_char(*converter, UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(*converter, UG(from_error_mode)); + zend_set_converter_subst_char(*converter, UG(from_subst_char)); } return SUCCESS; @@ -150,67 +150,20 @@ static ZEND_INI_MH(OnUpdateErrorMode) } #endif -static void zend_update_converters_error_behavior(TSRMLS_D) +void zend_update_converters_error_behavior(TSRMLS_D) { if (UG(fallback_encoding_conv)) { - zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_u_error_mode)); - zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_error_mode)); + zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(from_subst_char)); } if (UG(runtime_encoding_conv)) { - zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_u_error_mode)); - zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_error_mode)); + zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(from_subst_char)); } if (UG(output_encoding_conv)) { - zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode)); - zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode)); + zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char)); } - if (UG(http_input_encoding_conv)) { - zend_set_converter_error_mode(UG(http_input_encoding_conv), UG(from_u_error_mode)); - } -} - - -static ZEND_INI_MH(OnUpdateConversionErrorMode) -{ - if (!new_value) { - UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST; - } else { - uint16_t mode = atoi(new_value); - - if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) { - zend_error(E_WARNING, "Illegal value for conversion error mode"); - return FAILURE; - } - UG(from_u_error_mode) = mode; - } - zend_update_converters_error_behavior(TSRMLS_C); - return SUCCESS; -} - - -static ZEND_INI_MH(OnUpdateConversionSubstChar) -{ - uint8_t i = 0; - UChar32 c = 0x3f; /*'?'*/ - char *end_ptr; - - if (new_value) { - c = (int32_t)strtol(new_value, &end_ptr, 16); - if (end_ptr < new_value + strlen(new_value)) { - zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value"); - return FAILURE; - } - if (c < 0 || c >= UCHAR_MAX_VALUE) { - zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c); - return FAILURE; - } - } - U16_APPEND_UNSAFE(UG(subst_char), i, c); - UG(subst_char)[i] = 0; - UG(subst_char_len) = i; - zend_update_converters_error_behavior(TSRMLS_C); - - return SUCCESS; } @@ -223,8 +176,6 @@ ZEND_INI_BEGIN() STD_ZEND_INI_ENTRY("unicode.runtime_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, runtime_encoding_conv, zend_unicode_globals, unicode_globals) STD_ZEND_INI_ENTRY("unicode.script_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, script_encoding_conv, zend_unicode_globals, unicode_globals) STD_ZEND_INI_ENTRY("unicode.http_input_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, http_input_encoding_conv, zend_unicode_globals, unicode_globals) - ZEND_INI_ENTRY("unicode.from_error_mode", "2", ZEND_INI_ALL, OnUpdateConversionErrorMode) - ZEND_INI_ENTRY("unicode.from_error_subst_char", "3f", ZEND_INI_ALL, OnUpdateConversionSubstChar) ZEND_INI_END() @@ -956,9 +907,10 @@ static void unicode_globals_ctor(zend_unicode_globals *unicode_globals TSRMLS_DC unicode_globals->output_encoding_conv = NULL; unicode_globals->script_encoding_conv = NULL; unicode_globals->http_input_encoding_conv = NULL; - unicode_globals->subst_char_len = 0; zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8"); - unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST; + unicode_globals->from_error_mode = ZEND_CONV_ERROR_SUBST; + memset(unicode_globals->from_subst_char, 0, 3 * sizeof(UChar)); + zend_codepoint_to_uchar(0x3f, unicode_globals->from_subst_char); zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0); } diff --git a/Zend/zend_constants.c b/Zend/zend_constants.c index 699b1abcbd..be8f0f083c 100644 --- a/Zend/zend_constants.c +++ b/Zend/zend_constants.c @@ -126,6 +126,9 @@ void zend_register_standard_constants(TSRMLS_D) REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, CONST_PERSISTENT | CONST_CS); + /* true/false constants */ { zend_constant c; diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 4fb31b09f4..90bee6e899 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -301,9 +301,10 @@ struct _zend_unicode_globals { UConverter *http_input_encoding_conv;/* http input encoding converter */ UConverter *utf8_conv; /* all-purpose UTF-8 converter */ - uint16_t from_u_error_mode; - UChar subst_char[3]; - uint8_t subst_char_len; + uint16_t from_error_mode; + UChar from_subst_char[3]; + uint16_t to_error_mode; + UChar to_subst_char[3]; char *default_locale; UCollator *default_collator; diff --git a/Zend/zend_unicode.c b/Zend/zend_unicode.c index 44c31c8e92..366890431a 100644 --- a/Zend/zend_unicode.c +++ b/Zend/zend_unicode.c @@ -78,7 +78,7 @@ void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode) /* }}} */ /* {{{ zend_set_converter_subst_char */ -void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len) +void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char) { char dest[8]; int8_t dest_len = 8; @@ -86,6 +86,7 @@ void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t s UErrorCode temp = U_ZERO_ERROR; const void *old_context; UConverterFromUCallback old_cb; + int32_t subst_char_len = u_strlen(subst_char); if (!subst_char_len) return; @@ -377,10 +378,12 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC) if (U_FAILURE(status)) { int32_t offset = u_countChar32(u, num_conv)-1; + /* XXX needs to be fixed, but a leak is better than invalid memory if (s) { efree(s); } - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + */ + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); retval = FAILURE; } diff --git a/Zend/zend_unicode.h b/Zend/zend_unicode.h index e413958e77..3a760dfd43 100644 --- a/Zend/zend_unicode.h +++ b/Zend/zend_unicode.h @@ -42,6 +42,11 @@ enum { ZEND_CONV_ERROR_EXCEPTION = 0x100 }; +typedef enum { + ZEND_FROM_UNICODE, + ZEND_TO_UNICODE, +} zend_conv_direction; + extern ZEND_API zend_class_entry *unicodeConversionException; @@ -49,9 +54,10 @@ extern ZEND_API zend_class_entry *unicodeConversionException; /* internal functions */ int zend_set_converter_encoding(UConverter **converter, const char *encoding); -void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len); +void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char); void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode); void zend_register_unicode_exceptions(TSRMLS_D); +void zend_update_converters_error_behavior(TSRMLS_D); /* API functions */ @@ -84,7 +90,9 @@ static inline UChar32 zend_get_codepoint_at(UChar *str, int length, int n) int32_t offset = 0; UChar32 c = 0; - U16_FWD_N(str, offset, length, n); + if (n > 0) { + U16_FWD_N(str, offset, length, n); + } U16_GET(str, 0, offset, length, c); return c; diff --git a/ext/unicode/unicode.c b/ext/unicode/unicode.c index e18f78a499..aab935b8e7 100644 --- a/ext/unicode/unicode.c +++ b/ext/unicode/unicode.c @@ -112,6 +112,74 @@ static PHP_FUNCTION(unicode_encode) } /* }}} */ +PHP_FUNCTION(unicode_set_error_mode) +{ + zend_conv_direction direction; + long tmp, mode; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ll", &tmp, &mode) == FAILURE) { + return; + } + direction = (zend_conv_direction) tmp; + + if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) { + php_error(E_WARNING, "Invalid conversion direction value"); + RETURN_FALSE; + } + + if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) { + php_error(E_WARNING, "Illegal value for conversion error mode"); + RETURN_FALSE; + } + + if (direction == ZEND_FROM_UNICODE) { + UG(from_error_mode) = mode; + } + + zend_update_converters_error_behavior(TSRMLS_C); + RETURN_TRUE; +} + +PHP_FUNCTION(unicode_set_subst_char) +{ + zend_conv_direction direction; + UChar *subst_char; + UChar32 cp; + int subst_char_len; + long tmp; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "lu", &tmp, &subst_char, &subst_char_len) == FAILURE) { + return; + } + direction = (zend_conv_direction) tmp; + + if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) { + php_error(E_WARNING, "Invalid conversion direction value"); + RETURN_FALSE; + } + + if (subst_char_len < 1 ) { + php_error(E_WARNING, "Empty substitution character"); + RETURN_FALSE; + } + + cp = zend_get_codepoint_at(subst_char, subst_char_len, 0); + + if (cp < 0 || cp >= UCHAR_MAX_VALUE) { + zend_error(E_WARNING, "Substitution character value U+%06x is out of range (0 - 0x10FFFF)", cp); + RETURN_FALSE; + } + + if (direction == ZEND_FROM_UNICODE) { + int len; + len = zend_codepoint_to_uchar(cp, UG(from_subst_char)); + UG(from_subst_char)[len] = 0; + } + + zend_update_converters_error_behavior(TSRMLS_C); + RETURN_TRUE; +} + /* {{{ unicode_functions[] */ zend_function_entry unicode_functions[] = { PHP_FE(i18n_loc_get_default, NULL) @@ -119,6 +187,8 @@ zend_function_entry unicode_functions[] = { PHP_FE(unicode_decode, NULL) PHP_FE(unicode_semantics, NULL) PHP_FE(unicode_encode, NULL) + PHP_FE(unicode_set_error_mode, NULL) + PHP_FE(unicode_set_subst_char, NULL) { NULL, NULL, NULL } }; /* }}} */ @@ -144,7 +214,6 @@ zend_module_entry unicode_module_entry = { ZEND_GET_MODULE(unicode) #endif - /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(unicode) { @@ -158,7 +227,6 @@ PHP_MINIT_FUNCTION(unicode) } /* }}} */ - /* {{{ PHP_MSHUTDOWN_FUNCTION */ PHP_MSHUTDOWN_FUNCTION(unicode) { @@ -172,7 +240,6 @@ PHP_MSHUTDOWN_FUNCTION(unicode) } /* }}} */ - /* {{{ PHP_RINIT_FUNCTION */ PHP_RINIT_FUNCTION(unicode) { @@ -180,7 +247,6 @@ PHP_RINIT_FUNCTION(unicode) } /* }}} */ - /* {{{ PHP_RSHUTDOWN_FUNCTION */ PHP_RSHUTDOWN_FUNCTION(unicode) { @@ -188,7 +254,6 @@ PHP_RSHUTDOWN_FUNCTION(unicode) } /* }}} */ - /* {{{ PHP_MINFO_FUNCTION */ PHP_MINFO_FUNCTION(unicode) { diff --git a/main/main.c b/main/main.c index f58ea61075..609d0516cb 100644 --- a/main/main.c +++ b/main/main.c @@ -220,8 +220,8 @@ static ZEND_INI_MH(OnUpdateOutputEncoding) UG(output_encoding_conv) = NULL; } if (UG(output_encoding_conv)) { - zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode)); - zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode)); + zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char)); if (stage == ZEND_INI_STAGE_RUNTIME) { sapi_update_default_charset(TSRMLS_C); } diff --git a/main/streams/filter.c b/main/streams/filter.c index 1519f635bc..2b2009ca94 100644 --- a/main/streams/filter.c +++ b/main/streams/filter.c @@ -712,7 +712,7 @@ PHPAPI int _php_stream_bucket_convert(php_stream_bucket *bucket, unsigned char t if (U_FAILURE(status)) { int32_t offset = u_countChar32(bucket->buf.u, num_conv)-1; - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); } if (bucket->own_buf) { diff --git a/main/streams/streams.c b/main/streams/streams.c index d1315da90d..87814e275b 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -1263,7 +1263,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu if (U_FAILURE(status)) { int32_t offset = u_countChar32(buf.u, num_conv)-1; - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); } freeme = buf.s = dest; buflen = destlen; @@ -2293,8 +2293,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio } } else { /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ - zend_set_converter_error_mode(stream->output_encoding, UG(from_u_error_mode)); - zend_set_converter_subst_char(stream->output_encoding, UG(subst_char), UG(subst_char_len)); + zend_set_converter_error_mode(stream->output_encoding, UG(from_error_mode)); + zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char)); } } if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {