From: Fred L. Drake, Jr. Date: Fri, 19 Apr 2002 18:25:07 +0000 (+0000) Subject: basic_setup(): Remove bogus assumption that all tests would use US-ASCII X-Git-Tag: R_1_95_3~98 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=38b6ee29b28b1ed27bf3a9c8a0f3f5ea7b0d4397;p=libexpat basic_setup(): Remove bogus assumption that all tests would use US-ASCII data; let Expat determine the proper encoding from the XML source text. Added several tests relating to supposed mis-interpretation of Latin-1; these show that Expat (at least in the CVS version) is properly decoding the Latin-1 text and generating the proper UTF-8 output. This closes SF bug #491986, #514281. --- diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index d70ee559..1b46aed8 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "expat.h" @@ -12,7 +13,7 @@ static XML_Parser parser; static void basic_setup(void) { - parser = XML_ParserCreate("us-ascii"); + parser = XML_ParserCreate(NULL); if (parser == NULL) fail("Parser not created."); } @@ -29,16 +30,19 @@ basic_teardown(void) * expecting. */ static void -xml_failure(void) +_xml_failure(const char *file, int line) { - char buffer[256]; - sprintf(buffer, "%s (line %d, offset %d)", + char buffer[1024]; + sprintf(buffer, "%s (line %d, offset %d)\n reported from %s, line %d", XML_ErrorString(XML_GetErrorCode(parser)), XML_GetCurrentLineNumber(parser), - XML_GetCurrentColumnNumber(parser)); + XML_GetCurrentColumnNumber(parser), + file, line); fail(buffer); } +#define xml_failure() _xml_failure(__FILE__, __LINE__) + START_TEST(test_nul_byte) { char text[] = "\0"; @@ -111,6 +115,104 @@ START_TEST(test_bom_utf16_le) END_TEST +typedef struct +{ + int count; + XML_Char data[1024]; +} CharData; + +static void +accumulate_characters(void *userData, const XML_Char *s, int len) +{ + CharData *storage = (CharData *)userData; + if (len + storage->count < sizeof(storage->data)) { + memcpy(storage->data + storage->count, s, len); + storage->count += len; + } +} + +static void +check_characters(CharData *storage, XML_Char *expected) +{ + char buffer[1024]; + int len = strlen(expected); + if (len != storage->count) { + sprintf(buffer, "wrong number of data characters: got %d, expected %d", + storage->count, len); + fail(buffer); + return; + } + if (memcmp(expected, storage->data, len) != 0) + fail("got bad data bytes"); +} + +static void +run_character_check(XML_Char *text, XML_Char *expected) +{ + CharData storage; + storage.count = 0; + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + if (!XML_Parse(parser, text, strlen(text), 1)) + xml_failure(); + check_characters(&storage, expected); +} + +/* Regression test for SF bug #491986. */ +START_TEST(test_danish_latin1) +{ + char *text = + "\n" + "Jørgen æøåÆØÅ"; + run_character_check(text, + "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); +} +END_TEST +/* End regression test for SF bug #491986. */ + + +/* Regression test for SF bug #514281. */ +START_TEST(test_french_charref_hexidecimal) +{ + char *text = + "\n" + "éèàçêÈ"; + run_character_check(text, + "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); +} +END_TEST + +START_TEST(test_french_charref_decimal) +{ + char *text = + "\n" + "éèàçêÈ"; + run_character_check(text, + "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); +} +END_TEST + +START_TEST(test_french_latin1) +{ + char *text = + "\n" + "\xE9\xE8\xE0\xE7\xEa\xC8"; + run_character_check(text, + "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); +} +END_TEST + +START_TEST(test_french_utf8) +{ + char *text = + "\n" + "\xC3\xA9"; + run_character_check(text, "\xC3\xA9"); +} +END_TEST +/* End regression test for SF bug #514281. */ + + /* Helpers used by the following test; this checks any "attr" and "refs" * attributes to make sure whitespace has been normalized. */ @@ -235,6 +337,13 @@ make_basic_suite(void) tcase_add_test(tc_chars, test_bom_utf8); tcase_add_test(tc_chars, test_bom_utf16_be); tcase_add_test(tc_chars, test_bom_utf16_le); + /* Regression test for SF bug #491986. */ + tcase_add_test(tc_chars, test_danish_latin1); + /* Regression test for SF bug #514281. */ + tcase_add_test(tc_attrs, test_french_charref_hexidecimal); + tcase_add_test(tc_attrs, test_french_charref_decimal); + tcase_add_test(tc_attrs, test_french_latin1); + tcase_add_test(tc_attrs, test_french_utf8); suite_add_tcase(s, tc_attrs); tcase_add_checked_fixture(tc_attrs, basic_setup, basic_teardown);