From: Fred Drake Date: Mon, 24 Sep 2001 20:22:09 +0000 (+0000) Subject: Added several new tests to check the behavior with respect to doctype X-Git-Tag: v2.2.1c1~1644 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=30c484916988862608e4efdfa8f8aa911e4cc0c3;p=python Added several new tests to check the behavior with respect to doctype declarations and weird markup that we used to accept & ignore that recent versions raised an exception for; the original behavior has been restored and augmented (the user can decide what to do if they care; the default is to ignore it as done in early versions). --- diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index a37696d445..ff0af9e323 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -54,6 +54,9 @@ class EventCollector(sgmllib.SGMLParser): def handle_pi(self, data): self.append(("pi", data)) + def unknown_decl(self, decl): + self.append(("unknown decl", decl)) + class CDATAEventCollector(EventCollector): def start_cdata(self, attrs): @@ -65,12 +68,24 @@ class SGMLParserTestCase(unittest.TestCase): collector = EventCollector - def check_events(self, source, expected_events): + def get_events(self, source): parser = self.collector() - for s in source: - parser.feed(s) - parser.close() - events = parser.get_events() + try: + for s in source: + parser.feed(s) + parser.close() + except: + #self.events = parser.events + raise + return parser.get_events() + + def check_events(self, source, expected_events): + try: + events = self.get_events(source) + except: + import sys + #print >>sys.stderr, pprint.pformat(self.events) + raise if events != expected_events: self.fail("received events did not match expected events\n" "Expected:\n" + pprint.pformat(expected_events) + @@ -87,6 +102,31 @@ class SGMLParserTestCase(unittest.TestCase): self.fail("expected SGMLParseError for %r\nReceived:\n%s" % (source, pprint.pformat(parser.get_events()))) + def test_doctype_decl_internal(self): + inside = """\ +DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' + SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [ + + + + + + + %paramEntity; + +]""" + self.check_events(["" % inside], [ + ("decl", inside), + ]) + + def test_doctype_decl_external(self): + inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'" + self.check_events("" % inside, [ + ("decl", inside), + ]) + def test_underscore_in_attrname(self): # SF bug #436621 """Make sure attribute names with underscores are accepted""" @@ -132,6 +172,16 @@ class SGMLParserTestCase(unittest.TestCase): ("endtag", "b"), ]) + def test_bare_ampersands(self): + self.check_events("this text & contains & ampersands &", [ + ("data", "this text & contains & ampersands &"), + ]) + + def test_bare_pointy_brackets(self): + self.check_events("this < text > contains < bare>pointy< brackets", [ + ("data", "this < text > contains < bare>pointy< brackets"), + ]) + def test_attr_syntax(self): output = [ ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")]) @@ -156,6 +206,14 @@ class SGMLParserTestCase(unittest.TestCase): ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), ]) + def test_illegal_declarations(self): + s = 'abcdef' + self.check_events(s, [ + ("data", "abc"), + ("unknown decl", 'spacer type="block" height="25"'), + ("data", "def"), + ]) + def test_weird_starttags(self): self.check_events("", [ ("starttag", "a", []), @@ -196,6 +254,14 @@ class SGMLParserTestCase(unittest.TestCase): ("endtag", "cdata"), ]) + def test_illegal_declarations(self): + s = 'abcdef' + self.check_events(s, [ + ("data", "abc"), + ("unknown decl", 'spacer type="block" height="25"'), + ("data", "def"), + ]) + # XXX These tests have been disabled by prefixing their names with # an underscore. The first two exercise outstanding bugs in the # sgmllib module, and the third exhibits questionable behavior @@ -240,4 +306,9 @@ class SGMLParserTestCase(unittest.TestCase): self.check_parse_error("") -test_support.run_unittest(SGMLParserTestCase) +def test_main(): + test_support.run_unittest(SGMLParserTestCase) + + +if __name__ == "__main__": + test_main()