From: Fred L. Drake, Jr. Date: Fri, 19 Apr 2002 19:18:35 +0000 (+0000) Subject: Add a test that checks that illegal UTF-8 sequences are not allowed to pass X-Git-Tag: R_1_95_3~95 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1258481fbcbd896fa61b95440f9de98199841592;p=libexpat Add a test that checks that illegal UTF-8 sequences are not allowed to pass when we are using UTF-8 encoding. This closes SF bug #477667. --- diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index bab1b4f4..69a5cc47 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -212,6 +212,30 @@ START_TEST(test_french_utf8) END_TEST /* End regression test for SF bug #514281. */ +/* Regression test for SF bug #477667. + * This test assures that any 8-bit character followed by a 7-bit + * character will not be mistakenly interpreted as a valid UTF-8 + * sequence. + */ +START_TEST(test_illegal_utf8) +{ + char text[100]; + int i; + + for (i = 128; i <= 255; ++i) { + sprintf(text, "%ccd", i); + if (XML_Parse(parser, text, strlen(text), 1)) { + sprintf(text, + "expected token error for '%c'(ordinal %d) in UTF-8 text", + i, i); + fail(text); + } + else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(); + } +} +END_TEST + /* Helpers used by the following test; this checks any "attr" and "refs" * attributes to make sure whitespace has been normalized. @@ -337,6 +361,7 @@ make_basic_suite(void) tcase_add_test(tc_chars, test_bom_utf8); tcase_add_test(tc_chars, test_bom_utf16_be); tcase_add_test(tc_chars, test_bom_utf16_le); + tcase_add_test(tc_chars, test_illegal_utf8); /* Regression test for SF bug #491986. */ tcase_add_test(tc_chars, test_danish_latin1); /* Regression test for SF bug #514281. */