void php_register_unicode_iterators(TSRMLS_D);
-/* {{{ proto unicode unicode_decode(string input, string encoding) U
- Takes a string in the source encoding and converts it to a UTF-16 unicode string, returning the result */
+/* {{{ proto unicode unicode_decode(binary input, string encoding [, int flags]) U
+ Takes a binary string converts it to a Unicode string using the specifed encoding */
static PHP_FUNCTION(unicode_decode)
{
- union {
- void *vptr;
- char *bin;
- } input;
- zend_uchar type;
- int len;
- char *encoding;
- int enclen;
+ char *str, *enc;
+ int str_len, enc_len;
+ long flags;
+ UChar *dest;
+ int dest_len;
UErrorCode status;
UConverter *conv = NULL;
- UChar *target;
- int targetlen;
+ int num_conv;
- if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", &input.vptr, &len, &type, &encoding, &enclen)) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str, &str_len, &enc, &enc_len, &flags)) {
return;
}
- if (type == IS_UNICODE) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is already unicode");
- RETURN_FALSE;
+ if (ZEND_NUM_ARGS() > 2) {
+ if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
+ RETURN_FALSE;
+ }
+ } else {
+ flags = UG(to_error_mode);
}
status = U_ZERO_ERROR;
- conv = ucnv_open(encoding, &status);
- if (!conv) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
+ conv = ucnv_open(enc, &status);
+ if (U_FAILURE(status)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
RETURN_FALSE;
}
+ zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);
+
status = U_ZERO_ERROR;
- zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, &status);
+ num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status);
if (U_FAILURE(status)) {
- /* TODO: error handling semantics ? */
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
+ zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+ efree(dest);
+ ucnv_close(conv);
+ RETURN_FALSE;
}
- RETVAL_UNICODEL(target, targetlen, 0);
-
ucnv_close(conv);
-}
-/* }}} */
-/* {{{ proto bool unicode_semantics() U
- Check whether unicode semantics are enabled */
-static PHP_FUNCTION(unicode_semantics)
-{
- RETURN_BOOL(UG(unicode));
+ RETVAL_UNICODEL(dest, dest_len, 0);
}
/* }}} */
-/* {{{ proto string unicode_encode(unicode input, string encoding) U
- Takes a unicode string and converts it to a string in the specified encoding */
+/* {{{ proto binary unicode_encode(unicode input, string encoding [, int flags]) U
+ Takes a Unicode string and converts it to a binary string using the specified encoding */
static PHP_FUNCTION(unicode_encode)
{
UChar *uni;
- int len;
- char *encoding;
- int enclen;
+ char *enc;
+ int uni_len, enc_len;
+ long flags;
+ char *dest;
+ int dest_len;
UErrorCode status;
UConverter *conv = NULL;
- char *target;
- int targetlen;
+ int num_conv;
- if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", &uni, &len, &encoding, &enclen)) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni, &uni_len, &enc, &enc_len, &flags) == FAILURE) {
return;
}
+ if (ZEND_NUM_ARGS() > 2) {
+ if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
+ RETURN_FALSE;
+ }
+ } else {
+ flags = UG(from_error_mode);
+ }
+
status = U_ZERO_ERROR;
- conv = ucnv_open(encoding, &status);
- if (!conv) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
+ conv = ucnv_open(enc, &status);
+ if (U_FAILURE(status)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
RETURN_FALSE;
}
+ zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags);
+ zend_set_converter_subst_char(conv, UG(from_subst_char));
+
status = U_ZERO_ERROR;
- zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
+ num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status);
if (U_FAILURE(status)) {
- /* TODO: error handling semantics ? */
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
+ int32_t offset = u_countChar32(uni, num_conv);
+ zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+ efree(dest);
+ ucnv_close(conv);
+ RETURN_FALSE;
}
- RETVAL_STRINGL(target, targetlen, 0);
-
ucnv_close(conv);
+
+ RETVAL_STRINGL(dest, dest_len, 0);
+}
+/* }}} */
+
+/* {{{ proto bool unicode_semantics() U
+ Check whether unicode semantics are enabled */
+static PHP_FUNCTION(unicode_semantics)
+{
+ RETURN_BOOL(UG(unicode));
}
/* }}} */