From: Rui Hirokawa Date: Mon, 18 Jul 2011 08:36:17 +0000 (+0000) Subject: added numeric entities encode/decode in hex format. X-Git-Tag: php-5.5.0alpha1~1771 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b150a6db240b1a955d617b7bca3316ca84bd3453;p=php added numeric entities encode/decode in hex format. --- diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 9eec3b4afc..9f929cc3b2 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -2746,7 +2746,9 @@ collector_decode_htmlnumericentity(int c, void *data) } break; case 2: - if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ + if (c == 0x78) { /* 'x' */ + pc->status = 4; + } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ pc->cache = c - 0x30; pc->status = 3; pc->digit = 1; @@ -2810,6 +2812,89 @@ collector_decode_htmlnumericentity(int c, void *data) (*pc->decoder->filter_function)(c, pc->decoder); } break; + case 4: + if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */ + pc->cache = c - 0x30; + pc->status = 5; + pc->digit = 1; + } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */ + pc->cache = c - 0x41 + 10; + pc->status = 5; + pc->digit = 1; + } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f' */ + pc->cache = c - 0x61 + 10; + pc->status = 5; + pc->digit = 1; + } else { + pc->status = 0; + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + case 5: + s = 0; + f = 0; + if ((c >= 0x30 && c <= 0x39) || + (c >= 0x41 && c <= 0x46) || + (c >= 0x61 && c <= 0x66)) { /* '0' - '9' or 'a' - 'f' */ + if (pc->digit > 9) { + pc->status = 0; + s = pc->cache; + f = 1; + } else { + if (c >= 0x30 && c <= 0x39) { + s = pc->cache*16 + (c - 0x30); + } else if (c >= 0x41 && c <= 0x46) { + s = pc->cache*16 + (c - 0x41 + 10); + } else { + s = pc->cache*16 + (c - 0x61 + 10); + } + pc->cache = s; + pc->digit++; + } + } else { + pc->status = 0; + s = pc->cache; + f = 1; + n = 0; + size = pc->mapsize; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + d = s - mapelm[2]; + if (d >= mapelm[0] && d <= mapelm[1]) { + f = 0; + (*pc->decoder->filter_function)(d, pc->decoder); + if (c != 0x3b) { /* ';' */ + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; + } + n++; + } + } + if (f) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + r = 1; + n = pc->digit; + while (n > 0) { + r *= 16; + n--; + } + s %= r; + r /= 16; + while (r > 0) { + d = s/r; + s %= r; + r /= 16; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + (*pc->decoder->filter_function)(c, pc->decoder); + } + break; default: if (c == 0x26) { /* '&' */ pc->status = 1; @@ -2822,6 +2907,53 @@ collector_decode_htmlnumericentity(int c, void *data) return c; } +static int +collector_encode_hex_htmlnumericentity(int c, void *data) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data; + int f, n, s, r, d, size, *mapelm; + + size = pc->mapsize; + f = 0; + n = 0; + while (n < size) { + mapelm = &(pc->convmap[n*4]); + if (c >= mapelm[0] && c <= mapelm[1]) { + s = (c + mapelm[2]) & mapelm[3]; + if (s >= 0) { + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + r = 0x1000000; + s %= r; + while (r > 0) { + d = s/r; + if (d || f) { + f = 1; + s %= r; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + r /= 16; + } + if (!f) { + f = 1; + (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder); + } + (*pc->decoder->filter_function)(0x3b, pc->decoder); /* ';' */ + } + } + if (f) { + break; + } + n++; + } + if (!f) { + (*pc->decoder->filter_function)(c, pc->decoder); + } + + return c; +} + mbfl_string * mbfl_html_numeric_entity( mbfl_string *string, @@ -2850,12 +2982,17 @@ mbfl_html_numeric_entity( string->no_encoding, mbfl_memory_device_output, 0, &device); /* wchar filter */ - if (type == 0) { + if (type == 0) { /* decimal output */ encoder = mbfl_convert_filter_new( string->no_encoding, mbfl_no_encoding_wchar, collector_encode_htmlnumericentity, 0, &pc); - } else { + } else if (type == 2) { /* hex output */ + encoder = mbfl_convert_filter_new( + string->no_encoding, + mbfl_no_encoding_wchar, + collector_encode_hex_htmlnumericentity, 0, &pc); + } else { /* type == 1: decimal/hex input */ encoder = mbfl_convert_filter_new( string->no_encoding, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 34644a553b..f87b51abf7 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -412,6 +412,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2) ZEND_ARG_INFO(0, string) ZEND_ARG_INFO(0, convmap) ZEND_ARG_INFO(0, encoding) + ZEND_ARG_INFO(0, is_hex) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2) @@ -3682,10 +3683,11 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) HashTable *target_hash; size_t argc = ZEND_NUM_ARGS(); int i, *convmap, *mapelm, mapsize=0; + zend_bool is_hex = 0; mbfl_string string, result, *ret; enum mbfl_no_encoding no_encoding; - if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) { + if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { return; } @@ -3696,7 +3698,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) string.len = str_len; /* encoding */ - if (argc == 3) { + if ((argc == 3 || argc == 4) && encoding_len > 0) { no_encoding = mbfl_name2no_encoding(encoding); if (no_encoding == mbfl_no_encoding_invalid) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); @@ -3706,6 +3708,12 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) } } + if (argc == 4) { + if (type == 0 && is_hex) { + type = 2; /* output in hex format */ + } + } + /* conversion map */ convmap = NULL; if (Z_TYPE_P(zconvmap) == IS_ARRAY) { @@ -3743,7 +3751,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) } /* }}} */ -/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding]) +/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]]) Converts specified characters to HTML numeric entities */ PHP_FUNCTION(mb_encode_numericentity) {