]> granicus.if.org Git - python/commitdiff
When bad HTML is encountered, ignore the page rather than failing with
authorMark Hammond <mhammond@skippinet.com.au>
Thu, 27 Feb 2003 06:59:10 +0000 (06:59 +0000)
committerMark Hammond <mhammond@skippinet.com.au>
Thu, 27 Feb 2003 06:59:10 +0000 (06:59 +0000)
a traceback.

Tools/webchecker/webchecker.py

index e8d0ed746fe0932c3e7b4a65fe8718d2a936c77f..e89529e5cf3a19982fc4ce834df650041aef705f 100755 (executable)
@@ -400,7 +400,15 @@ class Checker:
         if local_fragment and self.nonames:
             self.markdone(url_pair)
             return
-        page = self.getpage(url_pair)
+        try:
+            page = self.getpage(url_pair)
+        except sgmllib.SGMLParseError, msg:
+            msg = self.sanitize(msg)
+            self.note(0, "Error parsing %s: %s",
+                          self.format_url(url_pair), msg)
+            # Dont actually mark the URL as bad - it exists, just
+            # we can't parse it!
+            page = None
         if page:
             # Store the page which corresponds to this URL.
             self.name_table[url] = page