}
break;
case 2:
- if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+ if (c == 0x78) { /* 'x' */
+ pc->status = 4;
+ } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
pc->cache = c - 0x30;
pc->status = 3;
pc->digit = 1;
(*pc->decoder->filter_function)(c, pc->decoder);
}
break;
+ case 4:
+ if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+ pc->cache = c - 0x30;
+ pc->status = 5;
+ pc->digit = 1;
+ } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
+ pc->cache = c - 0x41 + 10;
+ pc->status = 5;
+ pc->digit = 1;
+ } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */
+ pc->cache = c - 0x61 + 10;
+ pc->status = 5;
+ pc->digit = 1;
+ } else {
+ pc->status = 0;
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
+ case 5:
+ s = 0;
+ f = 0;
+ if ((c >= 0x30 && c <= 0x39) ||
+ (c >= 0x41 && c <= 0x46) ||
+ (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */
+ if (pc->digit > 9) {
+ pc->status = 0;
+ s = pc->cache;
+ f = 1;
+ } else {
+ if (c >= 0x30 && c <= 0x39) {
+ s = pc->cache*16 + (c - 0x30);
+ } else if (c >= 0x41 && c <= 0x46) {
+ s = pc->cache*16 + (c - 0x41 + 10);
+ } else {
+ s = pc->cache*16 + (c - 0x61 + 10);
+ }
+ pc->cache = s;
+ pc->digit++;
+ }
+ } else {
+ pc->status = 0;
+ s = pc->cache;
+ f = 1;
+ n = 0;
+ size = pc->mapsize;
+ while (n < size) {
+ mapelm = &(pc->convmap[n*4]);
+ d = s - mapelm[2];
+ if (d >= mapelm[0] && d <= mapelm[1]) {
+ f = 0;
+ (*pc->decoder->filter_function)(d, pc->decoder);
+ if (c != 0x3b) { /* ';' */
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
+ }
+ n++;
+ }
+ }
+ if (f) {
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ r = 1;
+ n = pc->digit;
+ while (n > 0) {
+ r *= 16;
+ n--;
+ }
+ s %= r;
+ r /= 16;
+ while (r > 0) {
+ d = s/r;
+ s %= r;
+ r /= 16;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+ }
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+ break;
default:
if (c == 0x26) { /* '&' */
pc->status = 1;
return c;
}
+static int
+collector_encode_hex_htmlnumericentity(int c, void *data)
+{
+ struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
+ int f, n, s, r, d, size, *mapelm;
+
+ size = pc->mapsize;
+ f = 0;
+ n = 0;
+ while (n < size) {
+ mapelm = &(pc->convmap[n*4]);
+ if (c >= mapelm[0] && c <= mapelm[1]) {
+ s = (c + mapelm[2]) & mapelm[3];
+ if (s >= 0) {
+ (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */
+ (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */
+ (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */
+ r = 0x1000000;
+ s %= r;
+ while (r > 0) {
+ d = s/r;
+ if (d || f) {
+ f = 1;
+ s %= r;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+ }
+ r /= 16;
+ }
+ if (!f) {
+ f = 1;
+ (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
+ }
+ (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */
+ }
+ }
+ if (f) {
+ break;
+ }
+ n++;
+ }
+ if (!f) {
+ (*pc->decoder->filter_function)(c, pc->decoder);
+ }
+
+ return c;
+}
+
mbfl_string *
mbfl_html_numeric_entity(
mbfl_string *string,
string->no_encoding,
mbfl_memory_device_output, 0, &device);
/* wchar filter */
- if (type == 0) {
+ if (type == 0) { /* decimal output */
encoder = mbfl_convert_filter_new(
string->no_encoding,
mbfl_no_encoding_wchar,
collector_encode_htmlnumericentity, 0, &pc);
- } else {
+ } else if (type == 2) { /* hex output */
+ encoder = mbfl_convert_filter_new(
+ string->no_encoding,
+ mbfl_no_encoding_wchar,
+ collector_encode_hex_htmlnumericentity, 0, &pc);
+ } else { /* type == 1: decimal/hex input */
encoder = mbfl_convert_filter_new(
string->no_encoding,
mbfl_no_encoding_wchar,
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, convmap)
ZEND_ARG_INFO(0, encoding)
+ ZEND_ARG_INFO(0, is_hex)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
HashTable *target_hash;
size_t argc = ZEND_NUM_ARGS();
int i, *convmap, *mapelm, mapsize=0;
+ zend_bool is_hex = 0;
mbfl_string string, result, *ret;
enum mbfl_no_encoding no_encoding;
- if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
return;
}
string.len = str_len;
/* encoding */
- if (argc == 3) {
+ if ((argc == 3 || argc == 4) && encoding_len > 0) {
no_encoding = mbfl_name2no_encoding(encoding);
if (no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
}
}
+ if (argc == 4) {
+ if (type == 0 && is_hex) {
+ type = 2; /* output in hex format */
+ }
+ }
+
/* conversion map */
convmap = NULL;
if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
}
/* }}} */
-/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
+/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)
{