projects
/
python
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
| inline |
side by side
(parent:
05595e9
)
When bad HTML is encountered, ignore the page rather than failing with
author
Mark Hammond
<mhammond@skippinet.com.au>
Thu, 27 Feb 2003 06:59:10 +0000
(06:59 +0000)
committer
Mark Hammond
<mhammond@skippinet.com.au>
Thu, 27 Feb 2003 06:59:10 +0000
(06:59 +0000)
a traceback.
Tools/webchecker/webchecker.py
patch
|
blob
|
history
diff --git
a/Tools/webchecker/webchecker.py
b/Tools/webchecker/webchecker.py
index e8d0ed746fe0932c3e7b4a65fe8718d2a936c77f..e89529e5cf3a19982fc4ce834df650041aef705f 100755
(executable)
--- a/
Tools/webchecker/webchecker.py
+++ b/
Tools/webchecker/webchecker.py
@@
-400,7
+400,15
@@
class Checker:
if local_fragment and self.nonames:
self.markdone(url_pair)
return
- page = self.getpage(url_pair)
+ try:
+ page = self.getpage(url_pair)
+ except sgmllib.SGMLParseError, msg:
+ msg = self.sanitize(msg)
+ self.note(0, "Error parsing %s: %s",
+ self.format_url(url_pair), msg)
+ # Dont actually mark the URL as bad - it exists, just
+ # we can't parse it!
+ page = None
if page:
# Store the page which corresponds to this URL.
self.name_table[url] = page