From: Rui Hirokawa Date: Sat, 24 Sep 2011 02:12:17 +0000 (+0000) Subject: MFH: fixed #40685: removed '&' in mb_decode_numericentity(). X-Git-Tag: php-5.4.0beta2~76 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a6a3bd010aefe9a543d450105ea7310196c8235e;p=php MFH: fixed #40685: removed '&' in mb_decode_numericentity(). --- diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 9f929cc3b2..b3759f940d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -2954,6 +2954,80 @@ collector_encode_hex_htmlnumericentity(int c, void *data) return c; } +int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter) +{ + struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter; + int n, s, r, d; + + if (pc->status) { + switch (pc->status) { + case 1: /* '&' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + break; + case 2: /* '#' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + break; + case 3: /* '0'-'9' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + + s = pc->cache; + r = 1; + n = pc->digit; + while (n > 0) { + r *= 10; + n--; + } + s %= r; + r /= 10; + while (r > 0) { + d = s/r; + s %= r; + r /= 10; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + + break; + case 4: /* 'x' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + break; + case 5: /* '0'-'9','a'-'f' */ + (*pc->decoder->filter_function)(0x26, pc->decoder); /* '&' */ + (*pc->decoder->filter_function)(0x23, pc->decoder); /* '#' */ + (*pc->decoder->filter_function)(0x78, pc->decoder); /* 'x' */ + + s = pc->cache; + r = 1; + n = pc->digit; + while (n > 0) { + r *= 16; + n--; + } + s %= r; + r /= 16; + while (r > 0) { + d = s/r; + s %= r; + r /= 16; + (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder); + } + break; + default: + break; + } + } + + pc->status = 0; + pc->cache = 0; + pc->digit = 0; + + return 0; +} + + mbfl_string * mbfl_html_numeric_entity( mbfl_string *string, @@ -2996,7 +3070,8 @@ mbfl_html_numeric_entity( encoder = mbfl_convert_filter_new( string->no_encoding, mbfl_no_encoding_wchar, - collector_decode_htmlnumericentity, 0, &pc); + collector_decode_htmlnumericentity, + (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc); } if (pc.decoder == NULL || encoder == NULL) { mbfl_convert_filter_delete(encoder); diff --git a/ext/mbstring/tests/bug40685.phpt b/ext/mbstring/tests/bug40685.phpt new file mode 100644 index 0000000000..1a673e5d15 --- /dev/null +++ b/ext/mbstring/tests/bug40685.phpt @@ -0,0 +1,25 @@ +--TEST-- +Bug #40685 (mb_decode_numericentity() removes '&' in the string) +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +string(1) "&" +string(3) "&&&" +string(2) "&#" +string(3) "&#x" +string(4) "=" +string(5) "=" +string(1) "=" +string(1) "="