From: Moriyoshi Koizumi Date: Wed, 29 Jul 2009 04:44:08 +0000 (+0000) Subject: * Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) X-Git-Tag: php-5.2.11RC1~75 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0754517cb214ea61fae59896fa38c3591ffb05db;p=php * Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) --- diff --git a/NEWS b/NEWS index e1cd49c432..c226500a9c 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? 2009, PHP 5.2.11 +- Fixed bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities). (Moriyoshi) - Fixed regression in cURL extension that prevented flush of data to output defined as a file handle. (Ilia) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 6c6654a1b9..0163520f5b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -186,18 +186,58 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter) } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; posstatus; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; posstatus; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; posstatus; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } + } + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); } - CK((*filter->output_function)(ent, filter->data)); filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; diff --git a/ext/mbstring/tests/bug48645.phpt b/ext/mbstring/tests/bug48645.phpt new file mode 100644 index 0000000000..6185442cc1 --- /dev/null +++ b/ext/mbstring/tests/bug48645.phpt @@ -0,0 +1,162 @@ +--TEST-- +Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623782f3b" +string(10) "2623783a3b" +string(10) "262378403b" +string(10) "262378603b" +string(10) "262378473b" +string(10) "262378673b" +string(8) "2623783b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623582f3b" +string(10) "2623583a3b" +string(10) "262358403b" +string(10) "262358603b" +string(10) "262358473b" +string(10) "262358673b" +string(8) "2623583b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(8) "26232f3b" +string(8) "26233a3b" +string(6) "26233b" +string(8) "f48fbfbf" +string(20) "2623783131303030303b" +string(8) "f48fbfbf" +string(20) "2623583131303030303b" +string(8) "f48fbfbf" +string(20) "2623313131343131323b"