Added a test to make sure that whitespace in ENTITIES, IDREFS, and NMTOKENS

author Fred L. Drake, Jr. <fdrake@users.sourceforge.net>

Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)

committer Fred L. Drake, Jr. <fdrake@users.sourceforge.net>

Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)
author Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)
committer Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)
diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c

index f25ea27572aeed47cef1bccf664da9f144b1b836..cd41f5cb00c2458c3cdf46450f6bfa92faaa566e 100644 (file)
--- a/expat/tests/runtests.c
+++ b/expat/tests/runtests.c
@@ -1,5 +1,7 @@
+#include <assert.h>
  #include <check.h>
  #include <stdlib.h>
+#include <stdio.h>
  
  #include "expat.h"
  
@@ -18,21 +20,34 @@ basic_setup(void)
  static void
  basic_teardown(void)
  {
-    if (parser != NULL) {
+    if (parser != NULL)
          XML_ParserFree(parser);
-    }
  }
  
+/* Generate a failure using the parser state to create an error message;
+ * this should be used when the parser reports and error we weren't
+ * expecting.
+ */
+static void
+xml_failure(void)
+{
+    char buffer[256];
+    sprintf(buffer, "%s (line %d, offset %d)",
+            XML_ErrorString(XML_GetErrorCode(parser)),
+            XML_GetCurrentLineNumber(parser),
+            XML_GetCurrentColumnNumber(parser));
+    fail(buffer);
+}
  
  START_TEST(test_nul_byte)
  {
-    char *text = "<doc>\0</doc>";
+    char text[] = "<doc>\0</doc>";
  
      /* test that a NUL byte (in US-ASCII data) is an error */
-    if (XML_Parse(parser, text, 12, 1))
+    if (XML_Parse(parser, text, sizeof(text) - 1, 1))
          fail("Parser did not report error on NUL-byte.");
-    fail_unless(XML_GetErrorCode(parser) == XML_ERROR_INVALID_TOKEN,
-                "Got wrong error code for NUL-byte in US-ASCII encoding.");
+    if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
+        xml_failure();
  }
  END_TEST
  
@@ -44,8 +59,8 @@ START_TEST(test_u0000_char)
      /* test that a NUL byte (in US-ASCII data) is an error */
      if (XML_Parse(parser, text, strlen(text), 1))
          fail("Parser did not report error on NUL-byte.");
-    fail_unless(XML_GetErrorCode(parser) == XML_ERROR_BAD_CHAR_REF,
-                "Got wrong error code for &#0;.");
+    if (XML_GetErrorCode(parser) != XML_ERROR_BAD_CHAR_REF)
+        xml_failure();
  }
  END_TEST
  
@@ -58,8 +73,8 @@ START_TEST(test_xmldecl_misplaced)
          "<a>&eee;</a>";
  
      if (!XML_Parse(parser, text, strlen(text), 1)) {
-        fail_unless(XML_GetErrorCode(parser) == XML_ERROR_MISPLACED_XML_PI,
-                    "wrong error when XML declaration is misplaced");
+        if (XML_GetErrorCode(parser) != XML_ERROR_MISPLACED_XML_PI)
+            xml_failure();
      }
      else {
          fail("expected XML_ERROR_MISPLACED_XML_PI with misplaced XML decl");
@@ -73,7 +88,7 @@ START_TEST(test_bom_utf8)
      char *text = "\357\273\277<e/>";
  
      if (!XML_Parse(parser, text, strlen(text), 1))
-        fail("false error reported for UTF-8 BOM");
+        xml_failure();
  }
  END_TEST
  
@@ -82,7 +97,7 @@ START_TEST(test_bom_utf16_be)
      char text[] = "\376\377\0<\0e\0/\0>";
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
-        fail("false error reported for UTF-16-BE BOM");
+        xml_failure();
  }
  END_TEST
  
@@ -91,15 +106,126 @@ START_TEST(test_bom_utf16_le)
      char text[] = "\377\376<\0e\0/\0>\0";
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
-        fail("false error reported for UTF-16-LE BOM");
+        xml_failure();
  }
  END_TEST
  
+
+/* Helpers used by the following test; this checks any "attr" and "refs"
+ * attributes to make sure whitespace has been normalized.
+ */
+
+/* Return true if whitespace has been normalized in a string, using
+ * the rules for attribute value normalization.  The 'is_cdata' flag
+ * is needed since CDATA attributes don't need to have multiple
+ * whitespace characters collapsed to a single space, while other
+ * attribute data types do.  (Section 3.3.3 of the recommendation.)
+ */
+static int
+is_whitespace_normalized(const XML_Char *s, int is_cdata)
+{
+    int blanks = 0;
+    int at_start = 1;
+    while (*s) {
+        if (*s == ' ')
+            ++blanks;
+        else if (*s == '\t' || *s == '\n' || *s == '\r')
+            return 0;
+        else {
+            if (at_start) {
+                at_start = 0;
+                if (blanks && !is_cdata)
+                    /* illegal leading blanks */
+                    return 0;
+            }
+            else if (blanks > 1 && !is_cdata)
+                return 0;
+            blanks = 0;
+        }
+        ++s;
+    }
+    if (blanks && !is_cdata)
+        return 0;
+    return 1;
+}
+
+/* Check the attribute whitespace checker: */
+static void
+testhelper_is_whitespace_normalized(void)
+{
+    assert(is_whitespace_normalized("abc", 0));
+    assert(is_whitespace_normalized("abc", 1));
+    assert(is_whitespace_normalized("abc def ghi", 0));
+    assert(is_whitespace_normalized("abc def ghi", 1));
+    assert(!is_whitespace_normalized(" abc def ghi", 0));
+    assert(is_whitespace_normalized(" abc def ghi", 1));
+    assert(!is_whitespace_normalized("abc  def ghi", 0));
+    assert(is_whitespace_normalized("abc  def ghi", 1));
+    assert(!is_whitespace_normalized("abc def ghi ", 0));
+    assert(is_whitespace_normalized("abc def ghi ", 1));
+    assert(!is_whitespace_normalized(" ", 0));
+    assert(is_whitespace_normalized(" ", 1));
+    assert(!is_whitespace_normalized("\t", 0));
+    assert(!is_whitespace_normalized("\t", 1));
+    assert(!is_whitespace_normalized("\n", 0));
+    assert(!is_whitespace_normalized("\n", 1));
+    assert(!is_whitespace_normalized("\r", 0));
+    assert(!is_whitespace_normalized("\r", 1));
+    assert(!is_whitespace_normalized("abc\t def", 1));
+}
+
+static void
+check_attr_contains_normalized_whitespace(void *userdata,
+                                          const XML_Char *name,
+                                          const XML_Char **atts)
+{
+    int i;
+    for (i = 0; atts[i] != NULL; i += 2) {
+        const XML_Char *attrname = atts[i];
+        const XML_Char *value = atts[i + 1];
+        if (strcmp("attr", attrname) == 0
+            || strcmp("ents", attrname) == 0
+            || strcmp("refs", attrname) == 0) {
+            if (!is_whitespace_normalized(value, 0)) {
+                char buffer[256];
+                sprintf(buffer, "attribute value not normalized: %s='%s'",
+                        attrname, value);
+                fail(buffer);
+            }
+        }
+    }
+}
+
+START_TEST(test_attr_whitespace_normalization)
+{
+    char *text =
+        "<!DOCTYPE doc [\n"
+        "  <!ATTLIST doc\n"
+        "            attr NMTOKENS #REQUIRED\n"
+        "            ents ENTITIES #REQUIRED\n"
+        "            refs IDREFS   #REQUIRED>\n"
+        "]>\n"
+        "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
+        "     ents=' ent-1   \t\r\n"
+        "            ent-2  ' >\n"
+        "  <e id='id-1'/>\n"
+        "  <e id='id-2'/>\n"
+        "</doc>";
+
+    XML_SetStartElementHandler(parser,
+                               check_attr_contains_normalized_whitespace);
+    if (!XML_Parse(parser, text, strlen(text), 1))
+        xml_failure();
+}
+END_TEST
+
+
  static Suite *
  make_basic_suite(void)
  {
      Suite *s = suite_create("basic");
      TCase *tc_chars = tcase_create("character tests");
+    TCase *tc_attrs = tcase_create("attributes");
      TCase *tc_xmldecl = tcase_create("XML declaration");
  
      suite_add_tcase(s, tc_chars);
@@ -110,6 +236,10 @@ make_basic_suite(void)
      tcase_add_test(tc_chars, test_bom_utf16_be);
      tcase_add_test(tc_chars, test_bom_utf16_le);
  
+    suite_add_tcase(s, tc_attrs);
+    tcase_add_checked_fixture(tc_attrs, basic_setup, basic_teardown);
+    tcase_add_test(tc_attrs, test_attr_whitespace_normalization);
+
      suite_add_tcase(s, tc_xmldecl);
      tcase_add_checked_fixture(tc_xmldecl, basic_setup, basic_teardown);
      tcase_add_test(tc_xmldecl, test_xmldecl_misplaced);
@@ -127,6 +257,9 @@ main(int argc, char *argv[])
      Suite *s = make_basic_suite();
      SRunner *sr = srunner_create(s);
  
+    /* run the tests for internal helper functions */
+    testhelper_is_whitespace_normalized();
+
      for (i = 1; i < argc; ++i) {
          char *opt = argv[i];
          if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)
author	Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
	Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)
committer	Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
	Fri, 16 Nov 2001 20:19:39 +0000 (20:19 +0000)