From: Rob Richards Date: Mon, 19 Dec 2005 14:17:30 +0000 (+0000) Subject: MFH: Fixed bug #35447 (xml_parse_into_struct() chokes on the UTF-8 BOM) X-Git-Tag: php-5.1.2RC1~45 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fc4ebee3e322fe06e13c2583f8c3df4e376449b9;p=php MFH: Fixed bug #35447 (xml_parse_into_struct() chokes on the UTF-8 BOM) add test --- diff --git a/ext/xml/compat.c b/ext/xml/compat.c index 43e116b864..7be85cab22 100644 --- a/ext/xml/compat.c +++ b/ext/xml/compat.c @@ -405,15 +405,12 @@ XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *m efree(parser); return NULL; } - if (encoding != NULL) { - parser->parser->encoding = xmlStrdup(encoding); #if LIBXML_VERSION <= 20617 /* for older versions of libxml2, allow correct detection of * charset in documents with a BOM: */ - } else { - parser->parser->charset = XML_CHAR_ENCODING_NONE; + parser->parser->charset = XML_CHAR_ENCODING_NONE; #endif - } + parser->parser->replaceEntities = 1; parser->parser->wellFormed = 0; if (sep != NULL) { diff --git a/ext/xml/tests/bug35447.phpt b/ext/xml/tests/bug35447.phpt new file mode 100644 index 0000000000..ba8b81e46c --- /dev/null +++ b/ext/xml/tests/bug35447.phpt @@ -0,0 +1,48 @@ +--TEST-- +Bug #35447 (xml_parse_into_struct() chokes on the UTF-8 BOM) +--SKIPIF-- + +--FILE-- + + + +]> +A bient&244;t +END_OF_XML; + +$parser = xml_parser_create_ns('UTF-8'); +xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0); +$result = xml_parse_into_struct($parser, $data, $vals, $index); +xml_parser_free($parser); +var_dump($vals); +?> +--EXPECT-- +array(1) { + [0]=> + array(5) { + ["tag"]=> + string(8) "resource" + ["type"]=> + string(8) "complete" + ["level"]=> + int(1) + ["attributes"]=> + array(2) { + ["key"]=> + string(7) "rSeeYou" + ["type"]=> + string(7) "literal" + } + ["value"]=> + string(13) "A bient&244;t" + } +}