From 1c09ef8e37079d2d9e2e562fb1af0fbe29660b81 Mon Sep 17 00:00:00 2001 From: Andrei Zmievski Date: Thu, 23 Mar 2006 22:00:42 +0000 Subject: [PATCH] Update conversion error behavior and add some new modes. # The various escape modes are what ICU calls them. We may want to come # up with different names for UNICODE/ICU/JAVA ones.. --- Zend/zend.c | 14 ++++++--- Zend/zend_constants.c | 13 +++++--- Zend/zend_globals.h | 2 +- Zend/zend_unicode.c | 71 +++++++++++++------------------------------ Zend/zend_unicode.h | 18 ++++++++--- 5 files changed, 55 insertions(+), 63 deletions(-) diff --git a/Zend/zend.c b/Zend/zend.c index e777650ac7..0ae309e086 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -173,9 +173,15 @@ static void zend_update_converters_error_behavior(TSRMLS_D) static ZEND_INI_MH(OnUpdateConversionErrorMode) { if (!new_value) { - UG(from_u_error_mode) = ZEND_FROM_U_ERROR_SUBST; + UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST; } else { - UG(from_u_error_mode) = atoi(new_value); + uint16_t mode = atoi(new_value); + + if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) { + zend_error(E_WARNING, "Illegal value for conversion error mode"); + return FAILURE; + } + UG(from_u_error_mode) = mode; } zend_update_converters_error_behavior(TSRMLS_C); return SUCCESS; @@ -194,7 +200,7 @@ static ZEND_INI_MH(OnUpdateConversionSubstChar) zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value"); return FAILURE; } - if (c < 0 || c >= 0x10FFFF) { + if (c < 0 || c >= UCHAR_MAX_VALUE) { zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c); return FAILURE; } @@ -952,7 +958,7 @@ static void unicode_globals_ctor(zend_unicode_globals *unicode_globals TSRMLS_DC unicode_globals->http_input_encoding_conv = NULL; unicode_globals->subst_char_len = 0; zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8"); - unicode_globals->from_u_error_mode = ZEND_FROM_U_ERROR_SUBST; + unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST; zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0); } diff --git a/Zend/zend_constants.c b/Zend/zend_constants.c index 3ed7883be0..699b1abcbd 100644 --- a/Zend/zend_constants.c +++ b/Zend/zend_constants.c @@ -116,10 +116,15 @@ void zend_register_standard_constants(TSRMLS_D) REGISTER_MAIN_LONG_CONSTANT("E_ALL", E_ALL, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_INVALID_STOP", ZEND_FROM_U_ERROR_STOP, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SKIP", ZEND_FROM_U_ERROR_SKIP, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SUBSTITUTE", ZEND_FROM_U_ERROR_SUBST, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_INVALID_ESCAPE", ZEND_FROM_U_ERROR_ESCAPE, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_STOP", ZEND_CONV_ERROR_STOP, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SKIP", ZEND_CONV_ERROR_SKIP, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SUBST", ZEND_CONV_ERROR_SUBST, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_UNICODE", ZEND_CONV_ERROR_ESCAPE_UNICODE, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_ICU", ZEND_CONV_ERROR_ESCAPE_ICU, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS); + REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS); /* true/false constants */ { diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 8c8f67720c..4fb31b09f4 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -301,7 +301,7 @@ struct _zend_unicode_globals { UConverter *http_input_encoding_conv;/* http input encoding converter */ UConverter *utf8_conv; /* all-purpose UTF-8 converter */ - uint8_t from_u_error_mode; + uint16_t from_u_error_mode; UChar subst_char[3]; uint8_t subst_char_len; diff --git a/Zend/zend_unicode.c b/Zend/zend_unicode.c index 77f2583a56..f67b8b0262 100644 --- a/Zend/zend_unicode.c +++ b/Zend/zend_unicode.c @@ -35,21 +35,36 @@ void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode) UErrorCode status = U_ZERO_ERROR; switch (error_mode) { - case ZEND_FROM_U_ERROR_STOP: + case ZEND_CONV_ERROR_STOP: ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); break; - case ZEND_FROM_U_ERROR_SKIP: + case ZEND_CONV_ERROR_SKIP: ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status); break; - case ZEND_FROM_U_ERROR_ESCAPE: - /* UTODO replace with custom callback for various substitution patterns */ + case ZEND_CONV_ERROR_SUBST: + ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &status); + break; + + case ZEND_CONV_ERROR_ESCAPE_UNICODE: ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status); break; - case ZEND_FROM_U_ERROR_SUBST: - ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status); + case ZEND_CONV_ERROR_ESCAPE_ICU: + ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, NULL, NULL, &status); + break; + + case ZEND_CONV_ERROR_ESCAPE_JAVA: + ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, NULL, NULL, &status); + break; + + case ZEND_CONV_ERROR_ESCAPE_XML_DEC: + ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, NULL, NULL, &status); + break; + + case ZEND_CONV_ERROR_ESCAPE_XML_HEX: + ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, NULL, NULL, &status); break; default: @@ -317,50 +332,6 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC) char *s = NULL; int s_len; -#if 0 - /* UTODO Putting it here for now, until we figure out the framework */ - switch (UG(from_u_error_mode)) { - case ZEND_FROM_U_ERROR_STOP: - ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); - break; - - case ZEND_FROM_U_ERROR_SKIP: - ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status); - break; - - case ZEND_FROM_U_ERROR_ESCAPE: - ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status); - break; - - case ZEND_FROM_U_ERROR_SUBST: - ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status); - break; - - default: - assert(0); - break; - } - - if (UG(subst_chars)) { - char subchar[16]; - int8_t char_len = 16; - status = U_ZERO_ERROR; - ucnv_getSubstChars(UG(runtime_encoding_conv), subchar, &char_len, &status); - if (U_FAILURE(status)) { - zend_error(E_WARNING, "Could not get substitution characters"); - return FAILURE; - } - status = U_ZERO_ERROR; - ucnv_setSubstChars(UG(runtime_encoding_conv), UG(subst_chars), MIN(char_len, UG(subst_chars_len)), &status); - if (U_FAILURE(status)) { - zend_error(E_WARNING, "Could not set substitution characters"); - return FAILURE; - } - } - - status = U_ZERO_ERROR; -#endif - UChar *u = Z_USTRVAL_P(string); int u_len = Z_USTRLEN_P(string); diff --git a/Zend/zend_unicode.h b/Zend/zend_unicode.h index c99891cfb8..13c3d13a80 100644 --- a/Zend/zend_unicode.h +++ b/Zend/zend_unicode.h @@ -28,10 +28,20 @@ #include #include -#define ZEND_FROM_U_ERROR_STOP 0 -#define ZEND_FROM_U_ERROR_SKIP 1 -#define ZEND_FROM_U_ERROR_SUBST 2 -#define ZEND_FROM_U_ERROR_ESCAPE 3 +enum { + ZEND_CONV_ERROR_STOP, + ZEND_CONV_ERROR_SKIP, + ZEND_CONV_ERROR_SUBST, + ZEND_CONV_ERROR_ESCAPE_UNICODE, + ZEND_CONV_ERROR_ESCAPE_ICU, + ZEND_CONV_ERROR_ESCAPE_JAVA, + ZEND_CONV_ERROR_ESCAPE_XML_DEC, + ZEND_CONV_ERROR_ESCAPE_XML_HEX, + ZEND_CONV_ERROR_LAST_ENUM, + + ZEND_CONV_ERROR_EXCEPTION = 0x100 +}; + /* internal functions */ -- 2.50.1