Test UTF-16 BE BOM vs explicit Latin-1 encoding parsing a full buffer

author Rhodri James <rhodri@kynesim.co.uk>

Mon, 12 Jun 2017 14:57:01 +0000 (15:57 +0100)

committer Sebastian Pipping <sebastian@pipping.org>

Sat, 22 Jul 2017 20:49:18 +0000 (22:49 +0200)
author Rhodri James <rhodri@kynesim.co.uk>
Mon, 12 Jun 2017 14:57:01 +0000 (15:57 +0100)
committer Sebastian Pipping <sebastian@pipping.org>
Sat, 22 Jul 2017 20:49:18 +0000 (22:49 +0200)
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c

index 08ba2848e7623a27116333abeddb5bafba7f8b8a..16266ad095453e9fef947a31b494b1c612f47867 100644 (file)
--- a/expat/tests/runtests.c
+++ b/expat/tests/runtests.c
@@ -5671,8 +5671,8 @@ END_TEST
  
  
  /* Parsing the full buffer rather than a byte at a time makes a
- * difference to the encoding scanning code, so repeat the above test
- * without breaking it down by byte.
+ * difference to the encoding scanning code, so repeat the above tests
+ * without breaking them down by byte.
   */
  START_TEST(test_ext_entity_latin1_utf16le_bom2)
  {
@@ -5708,6 +5708,40 @@ START_TEST(test_ext_entity_latin1_utf16le_bom2)
  }
  END_TEST
  
+START_TEST(test_ext_entity_latin1_utf16be_bom2)
+{
+    const char *text =
+        "<!DOCTYPE doc [\n"
+        "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
+        "]>\n"
+        "<doc>&en;</doc>";
+    ExtTest2 test_data = {
+        /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
+        /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
+         *   0x4c = L and 0x20 is a space
+         */
+        "\xfe\xff\x20\x4c",
+        4,
+        "iso-8859-1",
+        NULL,
+        EE_PARSE_FULL_BUFFER
+    };
+    /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
+    const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
+    CharData storage;
+
+
+    CharData_Init(&storage);
+    test_data.storage = &storage;
+    XML_SetExternalEntityRefHandler(parser, external_entity_loader2);
+    XML_SetUserData(parser, &test_data);
+    XML_SetCharacterDataHandler(parser, ext2_accumulate_characters);
+    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+        xml_failure(parser);
+    CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
  
  
  /*
@@ -11008,6 +11042,7 @@ make_suite(void)
      tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom);
      tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom);
      tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom2);
+    tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom2);
  
      suite_add_tcase(s, tc_namespace);
      tcase_add_checked_fixture(tc_namespace,
author	Rhodri James <rhodri@kynesim.co.uk>
	Mon, 12 Jun 2017 14:57:01 +0000 (15:57 +0100)
committer	Sebastian Pipping <sebastian@pipping.org>
	Sat, 22 Jul 2017 20:49:18 +0000 (22:49 +0200)