From: Antony Dovgal Date: Tue, 8 Aug 2006 16:58:06 +0000 (+0000) Subject: implement zend_unicode_to_string() and zend_string_to_unicode() X-Git-Tag: RELEASE_1_0_0RC1~2036 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f5715ee86f241510cf51c9ed17aa456eb19d1ac2;p=php implement zend_unicode_to_string() and zend_string_to_unicode() part #1 (of 2) --- diff --git a/Zend/zend.c b/Zend/zend.c index 08c84c7329..1ff693184c 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -611,7 +611,7 @@ static int zend_path_encode_wrapper(char **encpath, int *encpath_len, const UCha { UErrorCode status = U_ZERO_ERROR; - zend_convert_from_unicode(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), encpath, encpath_len, path, path_len, &status); + zend_unicode_to_string_ex(ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), encpath, encpath_len, path, path_len, &status); if (U_FAILURE(status)) { efree(*encpath); diff --git a/Zend/zend_API.h b/Zend/zend_API.h index 6d8353902a..145d451aeb 100644 --- a/Zend/zend_API.h +++ b/Zend/zend_API.h @@ -382,7 +382,7 @@ ZEND_API int add_assoc_zval_ex(zval *arg, char *key, uint key_len, zval *value); UChar *u_str; \ int u_len; \ int length = strlen(str); \ - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(str); \ } \ @@ -396,7 +396,7 @@ ZEND_API int add_assoc_zval_ex(zval *arg, char *key, uint key_len, zval *value); UErrorCode status = U_ZERO_ERROR; \ UChar *u_str; \ int u_len; \ - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(str); \ } \ @@ -587,7 +587,7 @@ ZEND_API int add_next_index_zval(zval *arg, zval *value); UChar *u_str; \ int u_len; \ int length = strlen(str); \ - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(str); \ } \ @@ -601,7 +601,7 @@ ZEND_API int add_next_index_zval(zval *arg, zval *value); UErrorCode status = U_ZERO_ERROR; \ UChar *u_str; \ int u_len; \ - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, str, length, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(str); \ } \ @@ -806,7 +806,7 @@ END_EXTERN_C() UChar *u_str; \ int u_len; \ uint length = strlen(s); \ - zend_convert_to_unicode(conv, &u_str, &u_len, s, length, &status); \ + zend_string_to_unicode_ex(conv, &u_str, &u_len, s, length, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(s); \ } \ @@ -823,7 +823,7 @@ END_EXTERN_C() UErrorCode status = U_ZERO_ERROR; \ UChar *u_str; \ int u_len; \ - zend_convert_to_unicode(conv, &u_str, &u_len, s, l, &status); \ + zend_string_to_unicode_ex(conv, &u_str, &u_len, s, l, &status); \ if ((flags) & ZSTR_AUTOFREE) { \ efree(s); \ } \ diff --git a/Zend/zend_exceptions.c b/Zend/zend_exceptions.c index 502ba6047f..7d9d1739b6 100644 --- a/Zend/zend_exceptions.c +++ b/Zend/zend_exceptions.c @@ -145,7 +145,7 @@ ZEND_METHOD(exception, __construct) UChar *u_str; int u_len; - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, message, message_len, &status); + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, message, message_len, &status); zend_update_property_unicodel(default_exception_ce, object, "message", sizeof("message")-1, u_str, u_len TSRMLS_CC); efree(u_str); } else { @@ -184,7 +184,7 @@ ZEND_METHOD(error_exception, __construct) UChar *u_str; int u_len; - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, message, message_len, &status); + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u_str, &u_len, message, message_len, &status); zend_update_property_unicodel(default_exception_ce, object, "message", sizeof("message")-1, u_str, u_len TSRMLS_CC); efree(u_str); } else { diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 0e2f18d109..80ab3d5298 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -53,7 +53,7 @@ UChar *u = NULL; \ int u_len; \ TSRMLS_FETCH(); \ - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u, &u_len, arKey.s, nKeyLength-1, &status); \ + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u, &u_len, arKey.s, nKeyLength-1, &status); \ if (U_FAILURE(status)) { \ /* UTODO: */ \ } \ @@ -1747,7 +1747,7 @@ ZEND_API void zend_hash_to_unicode(HashTable *ht, apply_func_t apply_func TSRMLS int u_len; Bucket *q; - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u, &u_len, (*p)->key.arKey.s, (*p)->nKeyLength-1, &status); + zend_string_to_unicode_ex(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u, &u_len, (*p)->key.arKey.s, (*p)->nKeyLength-1, &status); q = (Bucket *) pemalloc(sizeof(Bucket)-sizeof(q->key.arKey)+((u_len+1)*2), ht->persistent); memcpy(q, *p, sizeof(Bucket)-sizeof(q->key.arKey)); diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 036b5a07ef..b88317b9b3 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -631,7 +631,7 @@ ZEND_API int zend_convert_scanner_output(UConverter *conv, UChar **target, int * /* reset the error and perform conversion */ *status = U_ZERO_ERROR; - zend_convert_to_unicode(conv, target, target_len, source, source_len, status); + zend_string_to_unicode_ex(conv, target, target_len, source, source_len, status); /* figure out how many source bytes were consumed */ if (U_SUCCESS(*status)) { diff --git a/Zend/zend_unicode.c b/Zend/zend_unicode.c index d4a389ffdf..7a67e3785a 100644 --- a/Zend/zend_unicode.c +++ b/Zend/zend_unicode.c @@ -209,8 +209,8 @@ int zend_copy_converter(UConverter **target, UConverter *source) } /* }}} */ -/* {{{ zend_convert_to_unicode */ -ZEND_API int zend_convert_to_unicode(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status) +/* {{{ zend_string_to_unicode_ex */ +ZEND_API int zend_string_to_unicode_ex(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status) { UChar *buffer = NULL; UChar *output; @@ -281,8 +281,8 @@ ZEND_API int zend_convert_to_unicode(UConverter *conv, UChar **target, int *targ } /* }}} */ -/* {{{ zend_convert_from_unicode */ -ZEND_API int zend_convert_from_unicode(UConverter *conv, char **target, int *target_len, const UChar *source, int source_len, UErrorCode *status) +/* {{{ zend_unicode_to_string_ex */ +ZEND_API int zend_unicode_to_string_ex(UConverter *conv, char **target, int *target_len, const UChar *source, int source_len, UErrorCode *status) { char *buffer = NULL; char *output; @@ -380,12 +380,11 @@ ZEND_API char* zend_unicode_to_ascii(const UChar *us, int us_len TSRMLS_DC) int cs_len; UErrorCode status = U_ZERO_ERROR; - zend_convert_from_unicode(UG(ascii_conv), &cs, &cs_len, us, us_len, &status); + zend_unicode_to_string_ex(UG(ascii_conv), &cs, &cs_len, us, us_len, &status); if (U_FAILURE(status)) { efree(cs); return NULL; } - return cs; } /* }}} */ @@ -537,31 +536,48 @@ ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, ze } /* }}} */ -/* {{{ zval_unicode_to_string_ex */ -ZEND_API int zval_unicode_to_string_ex(zval *string, UConverter *conv TSRMLS_DC) +/* {{{ zend_unicode_to_string */ +ZEND_API int zend_unicode_to_string(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len TSRMLS_DC) { UErrorCode status = U_ZERO_ERROR; - char *s = NULL; - int s_len; int num_conv; - UChar *u = Z_USTRVAL_P(string); - int u_len = Z_USTRLEN_P(string); + if (conv == NULL) { + conv = UG(runtime_encoding_conv); + } - num_conv = zend_convert_from_unicode(conv, &s, &s_len, u, u_len, &status); + num_conv = zend_unicode_to_string_ex(conv, s, s_len, u, u_len, &status); if (U_FAILURE(status)) { int32_t offset = u_countChar32(u, num_conv); zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); - if (s) { - efree(s); + if (*s) { + efree(*s); } - ZVAL_EMPTY_STRING(string); + *s = NULL; + *s_len = 0; + return FAILURE; + } + return SUCCESS; +} +/* }}} */ + +/* {{{ zval_unicode_to_string_ex */ +ZEND_API int zval_unicode_to_string_ex(zval *string, UConverter *conv TSRMLS_DC) +{ + char *s = NULL; + int s_len; + + UChar *u = Z_USTRVAL_P(string); + int u_len = Z_USTRLEN_P(string); + + if (zend_unicode_to_string(conv, &s, &s_len, u, u_len TSRMLS_CC) == SUCCESS) { + ZVAL_STRINGL(string, s, s_len, 0); efree((UChar*)u); return FAILURE; } else { - ZVAL_STRINGL(string, s, s_len, 0); + ZVAL_EMPTY_STRING(string); efree((UChar*)u); return SUCCESS; } @@ -575,30 +591,48 @@ ZEND_API int zval_unicode_to_string(zval *string TSRMLS_DC) } /* }}} */ -/* {{{ zval_string_to_unicode_ex */ -ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC) +/* {{{ zend_string_to_unicode */ +ZEND_API int zend_string_to_unicode(UConverter *conv, UChar **u, int *u_len, char *s, int s_len TSRMLS_DC) { UErrorCode status = U_ZERO_ERROR; - UChar *u = NULL; - int u_len, num_conv; + int num_conv; - char *s = Z_STRVAL_P(string); - int s_len = Z_STRLEN_P(string); + if (conv == NULL) { + conv = UG(runtime_encoding_conv); + } - num_conv = zend_convert_to_unicode(conv, &u, &u_len, s, s_len, &status); + num_conv = zend_string_to_unicode_ex(conv, u, u_len, s, s_len, &status); if (U_FAILURE(status)) { zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC); - if (u) { - efree(u); + if (*u) { + efree(*u); } - ZVAL_EMPTY_UNICODE(string); - efree(s); + *u = NULL; + *u_len = 0; return FAILURE; - } else { + } + return SUCCESS; +} +/* }}} */ + +/* {{{ zval_string_to_unicode_ex */ +ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC) +{ + UChar *u = NULL; + int u_len; + + char *s = Z_STRVAL_P(string); + int s_len = Z_STRLEN_P(string); + + if (zend_string_to_unicode(conv, &u, &u_len, s, s_len TSRMLS_CC) == SUCCESS) { ZVAL_UNICODEL(string, u, u_len, 0); efree(s); return SUCCESS; + } else { + ZVAL_EMPTY_UNICODE(string); + efree(s); + return FAILURE; } } /* }}} */ @@ -613,16 +647,13 @@ ZEND_API int zval_string_to_unicode(zval *string TSRMLS_DC) /* {{{ zend_cmp_unicode_and_string */ ZEND_API int zend_cmp_unicode_and_string(UChar *ustr, char* str, uint len) { - UErrorCode status = U_ZERO_ERROR; UChar *u = NULL; int u_len; int retval = TRUE; TSRMLS_FETCH(); - zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &u, &u_len, str, len, &status); - if (U_FAILURE(status)) { - efree(u); - return FALSE; + if (zend_string_to_unicode(NULL, &u, &u_len, str, len TSRMLS_CC) == FAILURE) { + return FAILURE; } retval = u_memcmp(ustr, u, u_len); efree(u); diff --git a/Zend/zend_unicode.h b/Zend/zend_unicode.h index 9b26bedeb9..65bc8499f2 100644 --- a/Zend/zend_unicode.h +++ b/Zend/zend_unicode.h @@ -71,11 +71,14 @@ void zend_collator_destroy(zend_collator *zcoll); /* API functions */ -ZEND_API int zend_convert_to_unicode(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status); -ZEND_API int zend_convert_from_unicode(UConverter *conv, char **target, int *target_len, const UChar *source, int source_len, UErrorCode *status); ZEND_API void zend_convert_encodings(UConverter *target_conv, UConverter *source_conv, char **target, int *target_len, const char *source, int source_len, UErrorCode *status); ZEND_API char* zend_unicode_to_ascii(const UChar *us, int us_len TSRMLS_DC); +ZEND_API int zend_string_to_unicode_ex(UConverter *conv, UChar **target, int *target_len, const char *source, int source_len, UErrorCode *status); +ZEND_API int zend_string_to_unicode(UConverter *conv, UChar **u, int *u_len, char *s, int s_len TSRMLS_DC); +ZEND_API int zend_unicode_to_string_ex(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len, UErrorCode *status); +ZEND_API int zend_unicode_to_string(UConverter *conv, char **s, int *s_len, const UChar *u, int u_len TSRMLS_DC); + ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC); ZEND_API int zval_string_to_unicode(zval *string TSRMLS_DC); ZEND_API int zval_unicode_to_string_ex(zval *string, UConverter *conv TSRMLS_DC);