settings.
* Add unicode_set_error_mode() and unicode_set_subst_char() functions to
manipulate these global settings.
*converter = NULL;
}
if (*converter) {
- zend_set_converter_error_mode(*converter, UG(from_u_error_mode));
- zend_set_converter_subst_char(*converter, UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(*converter, UG(from_error_mode));
+ zend_set_converter_subst_char(*converter, UG(from_subst_char));
}
return SUCCESS;
}
#endif
-static void zend_update_converters_error_behavior(TSRMLS_D)
+void zend_update_converters_error_behavior(TSRMLS_D)
{
if (UG(fallback_encoding_conv)) {
- zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_u_error_mode));
- zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_error_mode));
+ zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(from_subst_char));
}
if (UG(runtime_encoding_conv)) {
- zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_u_error_mode));
- zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_error_mode));
+ zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(from_subst_char));
}
if (UG(output_encoding_conv)) {
- zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode));
- zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode));
+ zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char));
}
- if (UG(http_input_encoding_conv)) {
- zend_set_converter_error_mode(UG(http_input_encoding_conv), UG(from_u_error_mode));
- }
-}
-
-
-static ZEND_INI_MH(OnUpdateConversionErrorMode)
-{
- if (!new_value) {
- UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST;
- } else {
- uint16_t mode = atoi(new_value);
-
- if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
- zend_error(E_WARNING, "Illegal value for conversion error mode");
- return FAILURE;
- }
- UG(from_u_error_mode) = mode;
- }
- zend_update_converters_error_behavior(TSRMLS_C);
- return SUCCESS;
-}
-
-
-static ZEND_INI_MH(OnUpdateConversionSubstChar)
-{
- uint8_t i = 0;
- UChar32 c = 0x3f; /*'?'*/
- char *end_ptr;
-
- if (new_value) {
- c = (int32_t)strtol(new_value, &end_ptr, 16);
- if (end_ptr < new_value + strlen(new_value)) {
- zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value");
- return FAILURE;
- }
- if (c < 0 || c >= UCHAR_MAX_VALUE) {
- zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c);
- return FAILURE;
- }
- }
- U16_APPEND_UNSAFE(UG(subst_char), i, c);
- UG(subst_char)[i] = 0;
- UG(subst_char_len) = i;
- zend_update_converters_error_behavior(TSRMLS_C);
-
- return SUCCESS;
}
STD_ZEND_INI_ENTRY("unicode.runtime_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, runtime_encoding_conv, zend_unicode_globals, unicode_globals)
STD_ZEND_INI_ENTRY("unicode.script_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, script_encoding_conv, zend_unicode_globals, unicode_globals)
STD_ZEND_INI_ENTRY("unicode.http_input_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, http_input_encoding_conv, zend_unicode_globals, unicode_globals)
- ZEND_INI_ENTRY("unicode.from_error_mode", "2", ZEND_INI_ALL, OnUpdateConversionErrorMode)
- ZEND_INI_ENTRY("unicode.from_error_subst_char", "3f", ZEND_INI_ALL, OnUpdateConversionSubstChar)
ZEND_INI_END()
unicode_globals->output_encoding_conv = NULL;
unicode_globals->script_encoding_conv = NULL;
unicode_globals->http_input_encoding_conv = NULL;
- unicode_globals->subst_char_len = 0;
zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8");
- unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST;
+ unicode_globals->from_error_mode = ZEND_CONV_ERROR_SUBST;
+ memset(unicode_globals->from_subst_char, 0, 3 * sizeof(UChar));
+ zend_codepoint_to_uchar(0x3f, unicode_globals->from_subst_char);
zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0);
}
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS);
+ REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, CONST_PERSISTENT | CONST_CS);
+ REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, CONST_PERSISTENT | CONST_CS);
+
/* true/false constants */
{
zend_constant c;
UConverter *http_input_encoding_conv;/* http input encoding converter */
UConverter *utf8_conv; /* all-purpose UTF-8 converter */
- uint16_t from_u_error_mode;
- UChar subst_char[3];
- uint8_t subst_char_len;
+ uint16_t from_error_mode;
+ UChar from_subst_char[3];
+ uint16_t to_error_mode;
+ UChar to_subst_char[3];
char *default_locale;
UCollator *default_collator;
/* }}} */
/* {{{ zend_set_converter_subst_char */
-void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len)
+void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char)
{
char dest[8];
int8_t dest_len = 8;
UErrorCode temp = U_ZERO_ERROR;
const void *old_context;
UConverterFromUCallback old_cb;
+ int32_t subst_char_len = u_strlen(subst_char);
if (!subst_char_len)
return;
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(u, num_conv)-1;
+ /* XXX needs to be fixed, but a leak is better than invalid memory
if (s) {
efree(s);
}
- zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+ */
+ zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
retval = FAILURE;
}
ZEND_CONV_ERROR_EXCEPTION = 0x100
};
+typedef enum {
+ ZEND_FROM_UNICODE,
+ ZEND_TO_UNICODE,
+} zend_conv_direction;
+
extern ZEND_API zend_class_entry *unicodeConversionException;
/* internal functions */
int zend_set_converter_encoding(UConverter **converter, const char *encoding);
-void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len);
+void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char);
void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode);
void zend_register_unicode_exceptions(TSRMLS_D);
+void zend_update_converters_error_behavior(TSRMLS_D);
/* API functions */
int32_t offset = 0;
UChar32 c = 0;
- U16_FWD_N(str, offset, length, n);
+ if (n > 0) {
+ U16_FWD_N(str, offset, length, n);
+ }
U16_GET(str, 0, offset, length, c);
return c;
}
/* }}} */
+PHP_FUNCTION(unicode_set_error_mode)
+{
+ zend_conv_direction direction;
+ long tmp, mode;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ll", &tmp, &mode) == FAILURE) {
+ return;
+ }
+ direction = (zend_conv_direction) tmp;
+
+ if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) {
+ php_error(E_WARNING, "Invalid conversion direction value");
+ RETURN_FALSE;
+ }
+
+ if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+ php_error(E_WARNING, "Illegal value for conversion error mode");
+ RETURN_FALSE;
+ }
+
+ if (direction == ZEND_FROM_UNICODE) {
+ UG(from_error_mode) = mode;
+ }
+
+ zend_update_converters_error_behavior(TSRMLS_C);
+ RETURN_TRUE;
+}
+
+PHP_FUNCTION(unicode_set_subst_char)
+{
+ zend_conv_direction direction;
+ UChar *subst_char;
+ UChar32 cp;
+ int subst_char_len;
+ long tmp;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "lu", &tmp, &subst_char, &subst_char_len) == FAILURE) {
+ return;
+ }
+ direction = (zend_conv_direction) tmp;
+
+ if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) {
+ php_error(E_WARNING, "Invalid conversion direction value");
+ RETURN_FALSE;
+ }
+
+ if (subst_char_len < 1 ) {
+ php_error(E_WARNING, "Empty substitution character");
+ RETURN_FALSE;
+ }
+
+ cp = zend_get_codepoint_at(subst_char, subst_char_len, 0);
+
+ if (cp < 0 || cp >= UCHAR_MAX_VALUE) {
+ zend_error(E_WARNING, "Substitution character value U+%06x is out of range (0 - 0x10FFFF)", cp);
+ RETURN_FALSE;
+ }
+
+ if (direction == ZEND_FROM_UNICODE) {
+ int len;
+ len = zend_codepoint_to_uchar(cp, UG(from_subst_char));
+ UG(from_subst_char)[len] = 0;
+ }
+
+ zend_update_converters_error_behavior(TSRMLS_C);
+ RETURN_TRUE;
+}
+
/* {{{ unicode_functions[] */
zend_function_entry unicode_functions[] = {
PHP_FE(i18n_loc_get_default, NULL)
PHP_FE(unicode_decode, NULL)
PHP_FE(unicode_semantics, NULL)
PHP_FE(unicode_encode, NULL)
+ PHP_FE(unicode_set_error_mode, NULL)
+ PHP_FE(unicode_set_subst_char, NULL)
{ NULL, NULL, NULL }
};
/* }}} */
ZEND_GET_MODULE(unicode)
#endif
-
/* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(unicode)
{
}
/* }}} */
-
/* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(unicode)
{
}
/* }}} */
-
/* {{{ PHP_RINIT_FUNCTION */
PHP_RINIT_FUNCTION(unicode)
{
}
/* }}} */
-
/* {{{ PHP_RSHUTDOWN_FUNCTION */
PHP_RSHUTDOWN_FUNCTION(unicode)
{
}
/* }}} */
-
/* {{{ PHP_MINFO_FUNCTION */
PHP_MINFO_FUNCTION(unicode)
{
UG(output_encoding_conv) = NULL;
}
if (UG(output_encoding_conv)) {
- zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode));
- zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode));
+ zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char));
if (stage == ZEND_INI_STAGE_RUNTIME) {
sapi_update_default_charset(TSRMLS_C);
}
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(bucket->buf.u, num_conv)-1;
- zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+ zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
}
if (bucket->own_buf) {
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(buf.u, num_conv)-1;
- zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+ zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
}
freeme = buf.s = dest;
buflen = destlen;
}
} else {
/* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
- zend_set_converter_error_mode(stream->output_encoding, UG(from_u_error_mode));
- zend_set_converter_subst_char(stream->output_encoding, UG(subst_char), UG(subst_char_len));
+ zend_set_converter_error_mode(stream->output_encoding, UG(from_error_mode));
+ zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char));
}
}
if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {