]> granicus.if.org Git - python/commitdiff
Some refinements of the external-link checking code: insert the errors
authorGuido van Rossum <guido@python.org>
Thu, 30 Jan 1997 04:26:57 +0000 (04:26 +0000)
committerGuido van Rossum <guido@python.org>
Thu, 30 Jan 1997 04:26:57 +0000 (04:26 +0000)
in the 'bad' dictionary (sanitize them so they are picklable; the
sanitation code is now a subroutine); don't check mailto: URLs; omit
colon in Error message.

Tools/webchecker/webchecker.py

index d6c81cce64af9afebc1911c6eaf5d7f9106d07f0..7eb9a250d5523e7df491350edadf6fcaad049856 100755 (executable)
@@ -305,13 +305,18 @@ class Checker:
            show("HREF ", url, " from", self.ext[url])
            if not checkext:
                continue
+           if url[:7] == 'mailto:':
+               if verbose > 2: print "Not checking", url
+               continue
            if verbose > 2: print "Checking", url, "..."
            try:
                f = self.urlopener.open(url)
                f.close()
                if verbose > 3: print "OK"
            except IOError, msg:
-               print "Error:", msg
+               msg = sanitize(msg)
+               print "Error", msg
+               self.bad[url] = msg
 
     def report_errors(self):
        if not self.bad:
@@ -327,7 +332,10 @@ class Checker:
            try:
                origins = self.done[url]
            except KeyError:
-               origins = self.todo[url]
+               try:
+                   origins = self.todo[url]
+               except KeyError:
+                   origins = self.ext[url]
            for source, rawlink in origins:
                triple = url, rawlink, self.bad[url]
                try:
@@ -406,13 +414,7 @@ class Checker:
        try:
            f = self.urlopener.open(url)
        except IOError, msg:
-           if (type(msg) == TupleType and
-               len(msg) >= 4 and
-               msg[0] == 'http error' and
-               type(msg[3]) == InstanceType):
-               # Remove the Message instance -- it may contain
-               # a file object which prevents pickling.
-               msg = msg[:3] + msg[4:]
+           msg = sanitize(msg)
            if verbose > 0:
                print "Error ", msg
            if verbose > 0:
@@ -549,5 +551,16 @@ def show(p1, link, p2, origins):
        print
 
 
+def sanitize(msg):
+    if (type(msg) == TupleType and
+       len(msg) >= 4 and
+       msg[0] == 'http error' and
+       type(msg[3]) == InstanceType):
+       # Remove the Message instance -- it may contain
+       # a file object which prevents pickling.
+       msg = msg[:3] + msg[4:]
+    return msg
+
+
 if __name__ == '__main__':
     main()