]> granicus.if.org Git - python/commitdiff
Added several new tests to check the behavior with respect to doctype
authorFred Drake <fdrake@acm.org>
Mon, 24 Sep 2001 20:22:09 +0000 (20:22 +0000)
committerFred Drake <fdrake@acm.org>
Mon, 24 Sep 2001 20:22:09 +0000 (20:22 +0000)
declarations and weird markup that we used to accept & ignore that recent
versions raised an exception for; the original behavior has been restored
and augmented (the user can decide what to do if they care; the default is
to ignore it as done in early versions).

Lib/test/test_sgmllib.py

index a37696d4458f1ff29b77990c5de336240c4f575a..ff0af9e323578fa74dd0bc6120c32a0da0729e80 100644 (file)
@@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser):
     def handle_pi(self, data):
         self.append(("pi", data))
 
+    def unknown_decl(self, decl):
+        self.append(("unknown decl", decl))
+
 
 class CDATAEventCollector(EventCollector):
     def start_cdata(self, attrs):
@@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase):
 
     collector = EventCollector
 
-    def check_events(self, source, expected_events):
+    def get_events(self, source):
         parser = self.collector()
-        for s in source:
-            parser.feed(s)
-        parser.close()
-        events = parser.get_events()
+        try:
+            for s in source:
+                parser.feed(s)
+            parser.close()
+        except:
+            #self.events = parser.events
+            raise
+        return parser.get_events()
+
+    def check_events(self, source, expected_events):
+        try:
+            events = self.get_events(source)
+        except:
+            import sys
+            #print >>sys.stderr, pprint.pformat(self.events)
+            raise
         if events != expected_events:
             self.fail("received events did not match expected events\n"
                       "Expected:\n" + pprint.pformat(expected_events) +
@@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase):
             self.fail("expected SGMLParseError for %r\nReceived:\n%s"
                       % (source, pprint.pformat(parser.get_events())))
 
+    def test_doctype_decl_internal(self):
+        inside = """\
+DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
+             SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [
+  <!ELEMENT html - O EMPTY>
+  <!ATTLIST html
+      version CDATA #IMPLIED
+      profile CDATA 'DublinCore'>
+  <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
+  <!ENTITY myEntity 'internal parsed entity'>
+  <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
+  <!ENTITY % paramEntity 'name|name|name'>
+  %paramEntity;
+  <!-- comment -->
+]"""
+        self.check_events(["<!%s>" % inside], [
+            ("decl", inside),
+            ])
+
+    def test_doctype_decl_external(self):
+        inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
+        self.check_events("<!%s>" % inside, [
+            ("decl", inside),
+            ])
+
     def test_underscore_in_attrname(self):
         # SF bug #436621
         """Make sure attribute names with underscores are accepted"""
@@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase):
             ("endtag", "b"),
             ])
 
+    def test_bare_ampersands(self):
+        self.check_events("this text & contains & ampersands &", [
+            ("data", "this text & contains & ampersands &"),
+            ])
+
+    def test_bare_pointy_brackets(self):
+        self.check_events("this < text > contains < bare>pointy< brackets", [
+            ("data", "this < text > contains < bare>pointy< brackets"),
+            ])
+
     def test_attr_syntax(self):
         output = [
           ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
@@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase):
             ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
             ])
 
+    def test_illegal_declarations(self):
+        s = 'abc<!spacer type="block" height="25">def'
+        self.check_events(s, [
+            ("data", "abc"),
+            ("unknown decl", 'spacer type="block" height="25"'),
+            ("data", "def"),
+            ])
+
     def test_weird_starttags(self):
         self.check_events("<a<a>", [
             ("starttag", "a", []),
@@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase):
             ("endtag", "cdata"),
             ])
 
+    def test_illegal_declarations(self):
+        s = 'abc<!spacer type="block" height="25">def'
+        self.check_events(s, [
+            ("data", "abc"),
+            ("unknown decl", 'spacer type="block" height="25"'),
+            ("data", "def"),
+            ])
+
     # XXX These tests have been disabled by prefixing their names with
     # an underscore.  The first two exercise outstanding bugs in the
     # sgmllib module, and the third exhibits questionable behavior
@@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase):
         self.check_parse_error("<a foo=>")
 
 
-test_support.run_unittest(SGMLParserTestCase)
+def test_main():
+    test_support.run_unittest(SGMLParserTestCase)
+
+
+if __name__ == "__main__":
+    test_main()