#include "libmbfl/mbfl/mbfl_allocators.h"
#include "libmbfl/mbfl/mbfilter_pass.h"
+#include "libmbfl/filters/mbfilter_ucs4.h"
#include "php_variables.h"
#include "php_globals.h"
return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
}
-
-/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
-MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
+MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
{
mbfl_string string, result, *ret;
- const mbfl_encoding *from_encoding, *to_encoding;
mbfl_buffer_converter *convd;
- size_t size;
- const mbfl_encoding **list;
- char *output=NULL;
+ char *output = NULL;
if (output_len) {
*output_len = 0;
}
- if (!input) {
- return NULL;
- }
- /* new encoding */
- if (_to_encoding && strlen(_to_encoding)) {
- to_encoding = mbfl_name2encoding(_to_encoding);
- if (!to_encoding) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
- return NULL;
- }
- } else {
- to_encoding = MBSTRG(current_internal_encoding);
- }
/* initialize string */
mbfl_string_init(&string);
mbfl_string_init(&result);
- from_encoding = MBSTRG(current_internal_encoding);
string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
string.val = (unsigned char *)input;
string.len = length;
- /* pre-conversion encoding */
- if (_from_encodings) {
- list = NULL;
- size = 0;
- php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
- if (size == 1) {
- from_encoding = *list;
- string.no_encoding = from_encoding->no_encoding;
- } else if (size > 1) {
- /* auto detect */
- from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
- if (from_encoding) {
- string.no_encoding = from_encoding->no_encoding;
- } else {
- php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
- from_encoding = &mbfl_encoding_pass;
- to_encoding = from_encoding;
- string.no_encoding = from_encoding->no_encoding;
- }
- } else {
- php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
- }
- if (list != NULL) {
- efree((void *)list);
- }
- }
-
/* initialize converter */
convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
if (convd == NULL) {
php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
return NULL;
}
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) {
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
- } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+ } else if (php_mb_is_no_encoding_unicode(string.no_encoding)
+ && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
if (php_mb_is_no_encoding_utf8(string.no_encoding)) {
-
if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff &&
0xe000 > MBSTRG(current_filter_illegal_substchar)
) {
mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
} else {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ mbfl_buffer_converter_illegal_substchar(convd,
+ MBSTRG(current_filter_illegal_substchar));
}
-
} else {
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ mbfl_buffer_converter_illegal_substchar(convd,
+ MBSTRG(current_filter_illegal_substchar));
}
-
} else {
mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
}
}
/* }}} */
+/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
+MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
+{
+ const mbfl_encoding *from_encoding, *to_encoding;
+
+ if (output_len) {
+ *output_len = 0;
+ }
+ if (!input) {
+ return NULL;
+ }
+ /* new encoding */
+ if (_to_encoding && strlen(_to_encoding)) {
+ to_encoding = mbfl_name2encoding(_to_encoding);
+ if (!to_encoding) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
+ return NULL;
+ }
+ } else {
+ to_encoding = MBSTRG(current_internal_encoding);
+ }
+
+ /* pre-conversion encoding */
+ from_encoding = MBSTRG(current_internal_encoding);
+ if (_from_encodings) {
+ const mbfl_encoding **list = NULL;
+ size_t size = 0;
+ php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
+ if (size == 1) {
+ from_encoding = *list;
+ } else if (size > 1) {
+ /* auto detect */
+ mbfl_string string;
+ mbfl_string_init(&string);
+ string.val = (unsigned char *)input;
+ string.len = length;
+ from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
+ if (!from_encoding) {
+ php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
+ from_encoding = &mbfl_encoding_pass;
+ }
+ } else {
+ php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
+ }
+ if (list != NULL) {
+ efree((void *)list);
+ }
+ }
+
+ return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
+}
+/* }}} */
+
MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
{
HashTable *output, *chash;
/* }}} */
-static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
+static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc_name)
{
+ const mbfl_encoding *enc;
enum mbfl_no_encoding no_enc;
char* ret;
size_t ret_len;
- const mbfl_encoding *encoding;
unsigned char char_len;
zend_long cp;
- if (enc == NULL) {
- no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ if (enc_name == NULL) {
+ enc = MBSTRG(current_internal_encoding);
} else {
- no_enc = mbfl_name2no_encoding(enc);
-
- if (no_enc == mbfl_no_encoding_invalid) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ enc = mbfl_name2encoding(enc_name);
+ if (!enc) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
return -1;
}
}
+ no_enc = enc->no_encoding;
if (php_mb_is_no_encoding_unicode(no_enc)) {
-
- ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
+ ret = php_mb_convert_encoding_ex(str, str_len, &mbfl_encoding_ucs4be, enc, &ret_len);
if (ret == NULL) {
return -1;
return cp;
} else if (php_mb_is_unsupported_no_encoding(no_enc)) {
- php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
return -1;
}
- ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+ ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
if (ret == NULL) {
return -1;
}
- encoding = mbfl_no2encoding(no_enc);
- char_len = php_mb_mbchar_bytes_ex(ret, encoding);
+ char_len = php_mb_mbchar_bytes_ex(ret, enc);
if (char_len == 1) {
cp = (unsigned char) ret[0];
/* }}} */
-static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
+static inline char* php_mb_chr(zend_long cp, const char* enc_name, size_t *output_len)
{
+ const mbfl_encoding *enc;
enum mbfl_no_encoding no_enc;
char* buf;
size_t buf_len;
char* ret;
size_t ret_len;
- if (enc == NULL) {
- no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ if (enc_name == NULL) {
+ enc = MBSTRG(current_internal_encoding);
} else {
- no_enc = mbfl_name2no_encoding(enc);
- if (no_enc == mbfl_no_encoding_invalid) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ enc = mbfl_name2encoding(enc_name);
+ if (!enc) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
return NULL;
}
}
+ no_enc = enc->no_encoding;
+
if (php_mb_is_no_encoding_utf8(no_enc)) {
if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
buf[3] = cp & 0xff;
buf[4] = 0;
- ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
+ ret = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
efree(buf);
if (output_len) {
return ret;
} else if (php_mb_is_unsupported_no_encoding(no_enc)) {
- php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc_name);
return NULL;
}
buf[4] = 0;
}
- ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len);
+ ret = php_mb_convert_encoding_ex(buf, buf_len, enc, enc, &ret_len);
efree(buf);
if (output_len) {
/* }}} */
-static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc)
+static inline char* php_mb_scrub(const char* str, size_t str_len, const mbfl_encoding *enc)
{
size_t ret_len;
- return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+ return php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
}
/* {{{ proto bool mb_scrub([string str[, string encoding]]) */
PHP_FUNCTION(mb_scrub)
{
+ const mbfl_encoding *enc;
char* str;
size_t str_len;
- char *enc = NULL;
- size_t enc_len;
+ char *enc_name = NULL;
+ size_t enc_name_len;
char *ret;
ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_STRING(str, str_len)
Z_PARAM_OPTIONAL
- Z_PARAM_STRING(enc, enc_len)
+ Z_PARAM_STRING(enc_name, enc_name_len)
ZEND_PARSE_PARAMETERS_END();
- if (enc == NULL) {
- enc = (char *) MBSTRG(current_internal_encoding)->name;
- } else if (!mbfl_is_support_encoding(enc)) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
- RETURN_FALSE;
+ if (enc_name == NULL) {
+ enc = MBSTRG(current_internal_encoding);
+ } else {
+ enc = mbfl_name2encoding(enc_name);
+ if (!enc) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
+ RETURN_FALSE;
+ }
}
ret = php_mb_scrub(str, str_len, enc);
#include "mbstring.h"
#include "php_unicode.h"
#include "unicode_data.h"
+#include "libmbfl/filters/mbfilter_ucs4.h"
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
}
MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
- const char *src_encoding)
+ const char *src_encoding_name)
{
char *unicode, *newstr;
size_t unicode_len;
unsigned char *unicode_ptr;
size_t i;
- enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
+ enum mbfl_no_encoding src_no_encoding;
- if (_src_encoding == mbfl_no_encoding_invalid) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
+ const mbfl_encoding *src_encoding = mbfl_name2encoding(src_encoding_name);
+ if (!src_encoding) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding_name);
return NULL;
}
- unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len);
+ src_no_encoding = src_encoding->no_encoding;
+
+ unicode = php_mb_convert_encoding_ex(srcstr, srclen, &mbfl_encoding_ucs4be, src_encoding, &unicode_len);
if (unicode == NULL)
return NULL;
case PHP_UNICODE_CASE_UPPER:
for (i = 0; i < unicode_len; i+=4) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
break;
case PHP_UNICODE_CASE_LOWER:
for (i = 0; i < unicode_len; i+=4) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
break;
if (mode) {
if (res) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
} else {
mode = 0;
}
if (res) {
mode = 1;
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
}
}
}
- newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len);
+ newstr = php_mb_convert_encoding_ex(
+ unicode, unicode_len, src_encoding, &mbfl_encoding_ucs4be, ret_len);
efree(unicode);
return newstr;