]> granicus.if.org Git - python/commitdiff
Give in to tabnanny
authorGuido van Rossum <guido@python.org>
Mon, 6 Apr 1998 14:29:28 +0000 (14:29 +0000)
committerGuido van Rossum <guido@python.org>
Mon, 6 Apr 1998 14:29:28 +0000 (14:29 +0000)
Lib/robotparser.py
Tools/webchecker/mimetypes.py [deleted file]
Tools/webchecker/robotparser.py
Tools/webchecker/tktools.py
Tools/webchecker/wcgui.py
Tools/webchecker/webchecker.py
Tools/webchecker/websucker.py

index 634c3fe2993dc4150c1420858525f6695c1d148d..6f85afacd9d21a605d94f92d6c10cf33d959c2ed 100644 (file)
@@ -9,79 +9,79 @@ fetchability of other URLs.
 class RobotFileParser:
 
     def __init__(self):
-       self.rules = {}
-       self.debug = 0
-       self.url = ''
-       self.last_checked = 0
+        self.rules = {}
+        self.debug = 0
+        self.url = ''
+        self.last_checked = 0
 
     def mtime(self):
-       return self.last_checked
+        return self.last_checked
 
     def modified(self):
-       import time
-       self.last_checked = time.time()
+        import time
+        self.last_checked = time.time()
 
     def set_url(self, url):
-       self.url = url
-##     import urlmisc
-##     self.url = urlmisc.canonical_url(url)
+        self.url = url
+##      import urlmisc
+##      self.url = urlmisc.canonical_url(url)
 
     def read(self):
-       import urllib
-       self.parse(urllib.urlopen(self.url).readlines())
+        import urllib
+        self.parse(urllib.urlopen(self.url).readlines())
 
     def parse(self, lines):
-       import regsub, string, regex
-       active = []
-       for line in lines:
-           if self.debug: print '>', line,
-           # blank line terminates current record
-           if not line[:-1]:
-               active = []
-               continue
-           # remove optional comment and strip line
-           line = string.strip(line[:string.find(line, '#')])
-           if not line:
-               continue
-           line = regsub.split(line, ' *: *')
-           if len(line) == 2:
-               line[0] = string.lower(line[0])
-               if line[0] == 'user-agent':
-                   # this record applies to this user agent
-                   if self.debug: print '>> user-agent:', line[1]
-                   active.append(line[1])
-                   if not self.rules.has_key(line[1]):
-                       self.rules[line[1]] = []
-               elif line[0] == 'disallow':
-                   if line[1]:
-                       if self.debug: print '>> disallow:', line[1]
-                       for agent in active:
-                           self.rules[agent].append(regex.compile(line[1]))
-                   else:
-                       pass
-                       for agent in active:
-                           if self.debug: print '>> allow', agent
-                           self.rules[agent] = []
-               else:
-                   if self.debug: print '>> unknown:', line
+        import regsub, string, regex
+        active = []
+        for line in lines:
+            if self.debug: print '>', line,
+            # blank line terminates current record
+            if not line[:-1]:
+                active = []
+                continue
+            # remove optional comment and strip line
+            line = string.strip(line[:string.find(line, '#')])
+            if not line:
+                continue
+            line = regsub.split(line, ' *: *')
+            if len(line) == 2:
+                line[0] = string.lower(line[0])
+                if line[0] == 'user-agent':
+                    # this record applies to this user agent
+                    if self.debug: print '>> user-agent:', line[1]
+                    active.append(line[1])
+                    if not self.rules.has_key(line[1]):
+                        self.rules[line[1]] = []
+                elif line[0] == 'disallow':
+                    if line[1]:
+                        if self.debug: print '>> disallow:', line[1]
+                        for agent in active:
+                            self.rules[agent].append(regex.compile(line[1]))
+                    else:
+                        pass
+                        for agent in active:
+                            if self.debug: print '>> allow', agent
+                            self.rules[agent] = []
+                else:
+                    if self.debug: print '>> unknown:', line
 
-       self.modified()
+        self.modified()
 
     # returns true if agent is allowed to fetch url
     def can_fetch(self, agent, url):
-       import urlparse
-       ag = agent
-       if not self.rules.has_key(ag): ag = '*'
-       if not self.rules.has_key(ag):
-           if self.debug: print '>> allowing', url, 'fetch by', agent
-           return 1
-       path = urlparse.urlparse(url)[2]
-       for rule in self.rules[ag]:
-           if rule.match(path) != -1:
-               if self.debug: print '>> disallowing', url, 'fetch by', agent
-               return 0
-       if self.debug: print '>> allowing', url, 'fetch by', agent
-       return 1
+        import urlparse
+        ag = agent
+        if not self.rules.has_key(ag): ag = '*'
+        if not self.rules.has_key(ag):
+            if self.debug: print '>> allowing', url, 'fetch by', agent
+            return 1
+        path = urlparse.urlparse(url)[2]
+        for rule in self.rules[ag]:
+            if rule.match(path) != -1:
+                if self.debug: print '>> disallowing', url, 'fetch by', agent
+                return 0
+        if self.debug: print '>> allowing', url, 'fetch by', agent
+        return 1
 
 def test():
     rp = RobotFileParser()
@@ -91,7 +91,7 @@ def test():
     print rp.rules
     print rp.can_fetch('*', 'http://www.calendar.com/concerts/')
     print rp.can_fetch('Musi-Cal-Robot',
-                      'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones')
+                       'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones')
 
     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/')
     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/vanagon-list-001')
diff --git a/Tools/webchecker/mimetypes.py b/Tools/webchecker/mimetypes.py
deleted file mode 100644 (file)
index 0b1748e..0000000
+++ /dev/null
@@ -1,191 +0,0 @@
-"""Guess the MIME type of a file.
-
-This module defines one useful function:
-
-guess_type(url) -- guess the MIME type and encoding of a URL.
-
-It also contains the following, for tuning the behavior:
-
-Data:
-
-knownfiles -- list of files to parse
-inited -- flag set when init() has been called
-suffixes_map -- dictionary mapping suffixes to suffixes
-encodings_map -- dictionary mapping suffixes to encodings
-types_map -- dictionary mapping suffixes to types
-
-Functions:
-
-init([files]) -- parse a list of files, default knownfiles
-read_mime_types(file) -- parse one file, return a dictionary or None
-
-"""
-
-import string
-import posixpath
-
-knownfiles = [
-    "/usr/local/etc/httpd/conf/mime.types",
-    "/usr/local/lib/netscape/mime.types",
-    ]
-
-inited = 0
-
-def guess_type(url):
-    """Guess the type of a file based on its URL.
-
-    Return value is a tuple (type, encoding) where type is None if the
-    type can't be guessed (no or unknown suffix) or a string of the
-    form type/subtype, usable for a MIME Content-type header; and
-    encoding is None for no encoding or the name of the program used
-    to encode (e.g. compress or gzip).  The mappings are table
-    driven.  Encoding suffixes are case sensitive; type suffixes are
-    first tried case sensitive, then case insensitive.
-
-    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
-    to ".tar.gz".  (This is table-driven too, using the dictionary
-    suffixes_map).
-
-    """
-    if not inited:
-       init()
-    base, ext = posixpath.splitext(url)
-    while suffix_map.has_key(ext):
-       base, ext = posixpath.splitext(base + suffix_map[ext])
-    if encodings_map.has_key(ext):
-       encoding = encodings_map[ext]
-       base, ext = posixpath.splitext(base)
-    else:
-       encoding = None
-    if types_map.has_key(ext):
-       return types_map[ext], encoding
-    elif types_map.has_key(string.lower(ext)):
-       return types_map[string.lower(ext)], encoding
-    else:
-       return None, encoding
-
-def init(files=None):
-    global inited
-    for file in files or knownfiles:
-       s = read_mime_types(file)
-       if s:
-           for key, value in s.items():
-               types_map[key] = value
-    inited = 1
-
-def read_mime_types(file):
-    try:
-       f = open(file)
-    except IOError:
-       return None
-    map = {}
-    while 1:
-       line = f.readline()
-       if not line: break
-       words = string.split(line)
-       for i in range(len(words)):
-           if words[i][0] == '#':
-               del words[i:]
-               break
-       if not words: continue
-       type, suffixes = words[0], words[1:]
-       for suff in suffixes:
-           map['.'+suff] = type
-    f.close()
-    return map
-
-suffix_map = {
-    '.tgz': '.tar.gz',
-    '.taz': '.tar.gz',
-    '.tz': '.tar.gz',
-}
-
-encodings_map = {
-    '.gz': 'gzip',
-    '.Z': 'compress',
-    }
-
-types_map = {
-    '.a': 'application/octet-stream',
-    '.ai': 'application/postscript',
-    '.aif': 'audio/x-aiff',
-    '.aifc': 'audio/x-aiff',
-    '.aiff': 'audio/x-aiff',
-    '.au': 'audio/basic',
-    '.avi': 'video/x-msvideo',
-    '.bcpio': 'application/x-bcpio',
-    '.bin': 'application/octet-stream',
-    '.cdf': 'application/x-netcdf',
-    '.cpio': 'application/x-cpio',
-    '.csh': 'application/x-csh',
-    '.dll': 'application/octet-stream',
-    '.dvi': 'application/x-dvi',
-    '.exe': 'application/octet-stream',
-    '.eps': 'application/postscript',
-    '.etx': 'text/x-setext',
-    '.gif': 'image/gif',
-    '.gtar': 'application/x-gtar',
-    '.hdf': 'application/x-hdf',
-    '.htm': 'text/html',
-    '.html': 'text/html',
-    '.shtml': 'text/html',
-    '.ief': 'image/ief',
-    '.jpe': 'image/jpeg',
-    '.jpeg': 'image/jpeg',
-    '.jpg': 'image/jpeg',
-    '.latex': 'application/x-latex',
-    '.man': 'application/x-troff-man',
-    '.me': 'application/x-troff-me',
-    '.mif': 'application/x-mif',
-    '.mov': 'video/quicktime',
-    '.movie': 'video/x-sgi-movie',
-    '.mpe': 'video/mpeg',
-    '.mpeg': 'video/mpeg',
-    '.mpg': 'video/mpeg',
-    '.ms': 'application/x-troff-ms',
-    '.nc': 'application/x-netcdf',
-    '.o': 'application/octet-stream',
-    '.obj': 'application/octet-stream',
-    '.oda': 'application/oda',
-    '.pbm': 'image/x-portable-bitmap',
-    '.pdf': 'application/pdf',
-    '.pgm': 'image/x-portable-graymap',
-    '.pnm': 'image/x-portable-anymap',
-    '.png': 'image/png',
-    '.ppm': 'image/x-portable-pixmap',
-    '.py': 'text/x-python',
-    '.pyc': 'application/x-python-code',
-    '.ps': 'application/postscript',
-    '.qt': 'video/quicktime',
-    '.ras': 'image/x-cmu-raster',
-    '.rgb': 'image/x-rgb',
-    '.roff': 'application/x-troff',
-    '.rtf': 'application/rtf',
-    '.rtx': 'text/richtext',
-    '.sgm': 'text/x-sgml',
-    '.sgml': 'text/x-sgml',
-    '.sh': 'application/x-sh',
-    '.shar': 'application/x-shar',
-    '.snd': 'audio/basic',
-    '.so': 'application/octet-stream',
-    '.src': 'application/x-wais-source',
-    '.sv4cpio': 'application/x-sv4cpio',
-    '.sv4crc': 'application/x-sv4crc',
-    '.t': 'application/x-troff',
-    '.tar': 'application/x-tar',
-    '.tcl': 'application/x-tcl',
-    '.tex': 'application/x-tex',
-    '.texi': 'application/x-texinfo',
-    '.texinfo': 'application/x-texinfo',
-    '.tif': 'image/tiff',
-    '.tiff': 'image/tiff',
-    '.tr': 'application/x-troff',
-    '.tsv': 'text/tab-separated-values',
-    '.txt': 'text/plain',
-    '.ustar': 'application/x-ustar',
-    '.wav': 'audio/x-wav',
-    '.xbm': 'image/x-xbitmap',
-    '.xpm': 'image/x-xpixmap',
-    '.xwd': 'image/x-xwindowdump',
-    '.zip': 'application/zip',
-    }
index 634c3fe2993dc4150c1420858525f6695c1d148d..6f85afacd9d21a605d94f92d6c10cf33d959c2ed 100644 (file)
@@ -9,79 +9,79 @@ fetchability of other URLs.
 class RobotFileParser:
 
     def __init__(self):
-       self.rules = {}
-       self.debug = 0
-       self.url = ''
-       self.last_checked = 0
+        self.rules = {}
+        self.debug = 0
+        self.url = ''
+        self.last_checked = 0
 
     def mtime(self):
-       return self.last_checked
+        return self.last_checked
 
     def modified(self):
-       import time
-       self.last_checked = time.time()
+        import time
+        self.last_checked = time.time()
 
     def set_url(self, url):
-       self.url = url
-##     import urlmisc
-##     self.url = urlmisc.canonical_url(url)
+        self.url = url
+##      import urlmisc
+##      self.url = urlmisc.canonical_url(url)
 
     def read(self):
-       import urllib
-       self.parse(urllib.urlopen(self.url).readlines())
+        import urllib
+        self.parse(urllib.urlopen(self.url).readlines())
 
     def parse(self, lines):
-       import regsub, string, regex
-       active = []
-       for line in lines:
-           if self.debug: print '>', line,
-           # blank line terminates current record
-           if not line[:-1]:
-               active = []
-               continue
-           # remove optional comment and strip line
-           line = string.strip(line[:string.find(line, '#')])
-           if not line:
-               continue
-           line = regsub.split(line, ' *: *')
-           if len(line) == 2:
-               line[0] = string.lower(line[0])
-               if line[0] == 'user-agent':
-                   # this record applies to this user agent
-                   if self.debug: print '>> user-agent:', line[1]
-                   active.append(line[1])
-                   if not self.rules.has_key(line[1]):
-                       self.rules[line[1]] = []
-               elif line[0] == 'disallow':
-                   if line[1]:
-                       if self.debug: print '>> disallow:', line[1]
-                       for agent in active:
-                           self.rules[agent].append(regex.compile(line[1]))
-                   else:
-                       pass
-                       for agent in active:
-                           if self.debug: print '>> allow', agent
-                           self.rules[agent] = []
-               else:
-                   if self.debug: print '>> unknown:', line
+        import regsub, string, regex
+        active = []
+        for line in lines:
+            if self.debug: print '>', line,
+            # blank line terminates current record
+            if not line[:-1]:
+                active = []
+                continue
+            # remove optional comment and strip line
+            line = string.strip(line[:string.find(line, '#')])
+            if not line:
+                continue
+            line = regsub.split(line, ' *: *')
+            if len(line) == 2:
+                line[0] = string.lower(line[0])
+                if line[0] == 'user-agent':
+                    # this record applies to this user agent
+                    if self.debug: print '>> user-agent:', line[1]
+                    active.append(line[1])
+                    if not self.rules.has_key(line[1]):
+                        self.rules[line[1]] = []
+                elif line[0] == 'disallow':
+                    if line[1]:
+                        if self.debug: print '>> disallow:', line[1]
+                        for agent in active:
+                            self.rules[agent].append(regex.compile(line[1]))
+                    else:
+                        pass
+                        for agent in active:
+                            if self.debug: print '>> allow', agent
+                            self.rules[agent] = []
+                else:
+                    if self.debug: print '>> unknown:', line
 
-       self.modified()
+        self.modified()
 
     # returns true if agent is allowed to fetch url
     def can_fetch(self, agent, url):
-       import urlparse
-       ag = agent
-       if not self.rules.has_key(ag): ag = '*'
-       if not self.rules.has_key(ag):
-           if self.debug: print '>> allowing', url, 'fetch by', agent
-           return 1
-       path = urlparse.urlparse(url)[2]
-       for rule in self.rules[ag]:
-           if rule.match(path) != -1:
-               if self.debug: print '>> disallowing', url, 'fetch by', agent
-               return 0
-       if self.debug: print '>> allowing', url, 'fetch by', agent
-       return 1
+        import urlparse
+        ag = agent
+        if not self.rules.has_key(ag): ag = '*'
+        if not self.rules.has_key(ag):
+            if self.debug: print '>> allowing', url, 'fetch by', agent
+            return 1
+        path = urlparse.urlparse(url)[2]
+        for rule in self.rules[ag]:
+            if rule.match(path) != -1:
+                if self.debug: print '>> disallowing', url, 'fetch by', agent
+                return 0
+        if self.debug: print '>> allowing', url, 'fetch by', agent
+        return 1
 
 def test():
     rp = RobotFileParser()
@@ -91,7 +91,7 @@ def test():
     print rp.rules
     print rp.can_fetch('*', 'http://www.calendar.com/concerts/')
     print rp.can_fetch('Musi-Cal-Robot',
-                      'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones')
+                       'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones')
 
     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/')
     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/vanagon-list-001')
index 6734530a02b84864841465f8f89a318a6e87f9ea..0db4d490880e35a5b0e6caa4dde0e47d61b3e876 100644 (file)
@@ -7,8 +7,8 @@ from Tkinter import *
 
 def _clear_entry_widget(event):
     try:
-       widget = event.widget
-       widget.delete(0, INSERT)
+        widget = event.widget
+        widget.delete(0, INSERT)
     except: pass
 def install_keybindings(root):
     root.bind_class('Entry', '<Control-u>', _clear_entry_widget)
@@ -23,12 +23,12 @@ def make_toplevel(master, title=None, class_=None):
     """
 
     if class_:
-       widget = Toplevel(master, class_=class_)
+        widget = Toplevel(master, class_=class_)
     else:
-       widget = Toplevel(master)
+        widget = Toplevel(master)
     if title:
-       widget.title(title)
-       widget.iconname(title)
+        widget.title(title)
+        widget.iconname(title)
     return widget
 
 def set_transient(widget, master, relx=0.5, rely=0.3, expose=1):
@@ -43,26 +43,26 @@ def set_transient(widget, master, relx=0.5, rely=0.3, expose=1):
     widget.transient(master)
     widget.update_idletasks() # Actualize geometry information
     if master.winfo_ismapped():
-       m_width = master.winfo_width()
-       m_height = master.winfo_height()
-       m_x = master.winfo_rootx()
-       m_y = master.winfo_rooty()
+        m_width = master.winfo_width()
+        m_height = master.winfo_height()
+        m_x = master.winfo_rootx()
+        m_y = master.winfo_rooty()
     else:
-       m_width = master.winfo_screenwidth()
-       m_height = master.winfo_screenheight()
-       m_x = m_y = 0
+        m_width = master.winfo_screenwidth()
+        m_height = master.winfo_screenheight()
+        m_x = m_y = 0
     w_width = widget.winfo_reqwidth()
     w_height = widget.winfo_reqheight()
     x = m_x + (m_width - w_width) * relx
     y = m_y + (m_height - w_height) * rely
     widget.geometry("+%d+%d" % (x, y))
     if expose:
-       widget.deiconify()      # Become visible at the desired location
+        widget.deiconify()      # Become visible at the desired location
     return widget
 
 
 def make_scrollbars(parent, hbar, vbar, pack=1, class_=None, name=None,
-                   takefocus=0):
+                    takefocus=0):
 
     """Subroutine to create a frame with scrollbars.
 
@@ -76,38 +76,38 @@ def make_scrollbars(parent, hbar, vbar, pack=1, class_=None, name=None,
 
     """
     if class_:
-       if name: frame = Frame(parent, class_=class_, name=name)
-       else: frame = Frame(parent, class_=class_)
+        if name: frame = Frame(parent, class_=class_, name=name)
+        else: frame = Frame(parent, class_=class_)
     else:
-       if name: frame = Frame(parent, name=name)
-       else: frame = Frame(parent)
+        if name: frame = Frame(parent, name=name)
+        else: frame = Frame(parent)
 
     if pack:
-       frame.pack(fill=BOTH, expand=1)
+        frame.pack(fill=BOTH, expand=1)
 
     corner = None
     if vbar:
-       if not hbar:
-           vbar = Scrollbar(frame, takefocus=takefocus)
-           vbar.pack(fill=Y, side=RIGHT)
-       else:
-           vbarframe = Frame(frame, borderwidth=0)
-           vbarframe.pack(fill=Y, side=RIGHT)
-           vbar = Scrollbar(frame, name="vbar", takefocus=takefocus)
-           vbar.pack(in_=vbarframe, expand=1, fill=Y, side=TOP)
-           sbwidth = vbar.winfo_reqwidth()
-           corner = Frame(vbarframe, width=sbwidth, height=sbwidth)
-           corner.propagate(0)
-           corner.pack(side=BOTTOM)
+        if not hbar:
+            vbar = Scrollbar(frame, takefocus=takefocus)
+            vbar.pack(fill=Y, side=RIGHT)
+        else:
+            vbarframe = Frame(frame, borderwidth=0)
+            vbarframe.pack(fill=Y, side=RIGHT)
+            vbar = Scrollbar(frame, name="vbar", takefocus=takefocus)
+            vbar.pack(in_=vbarframe, expand=1, fill=Y, side=TOP)
+            sbwidth = vbar.winfo_reqwidth()
+            corner = Frame(vbarframe, width=sbwidth, height=sbwidth)
+            corner.propagate(0)
+            corner.pack(side=BOTTOM)
     else:
-       vbar = None
+        vbar = None
 
     if hbar:
-       hbar = Scrollbar(frame, orient=HORIZONTAL, name="hbar",
-                        takefocus=takefocus)
-       hbar.pack(fill=X, side=BOTTOM)
+        hbar = Scrollbar(frame, orient=HORIZONTAL, name="hbar",
+                         takefocus=takefocus)
+        hbar.pack(fill=X, side=BOTTOM)
     else:
-       hbar = None
+        hbar = None
 
     return hbar, vbar, frame
 
@@ -121,20 +121,20 @@ def set_scroll_commands(widget, hbar, vbar):
     """
 
     if vbar:
-       widget['yscrollcommand'] = (vbar, 'set')
-       vbar['command'] = (widget, 'yview')
+        widget['yscrollcommand'] = (vbar, 'set')
+        vbar['command'] = (widget, 'yview')
 
     if hbar:
-       widget['xscrollcommand'] = (hbar, 'set')
-       hbar['command'] = (widget, 'xview')
+        widget['xscrollcommand'] = (hbar, 'set')
+        hbar['command'] = (widget, 'xview')
 
     widget.vbar = vbar
     widget.hbar = hbar
 
 
 def make_text_box(parent, width=0, height=0, hbar=0, vbar=1,
-                 fill=BOTH, expand=1, wrap=WORD, pack=1,
-                 class_=None, name=None, takefocus=None):
+                  fill=BOTH, expand=1, wrap=WORD, pack=1,
+                  class_=None, name=None, takefocus=None):
 
     """Subroutine to create a text box.
 
@@ -148,8 +148,8 @@ def make_text_box(parent, width=0, height=0, hbar=0, vbar=1,
 
     """
     hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack,
-                                       class_=class_, name=name,
-                                       takefocus=takefocus)
+                                        class_=class_, name=name,
+                                        takefocus=takefocus)
 
     widget = Text(frame, wrap=wrap, name="text")
     if width: widget.config(width=width)
@@ -162,16 +162,16 @@ def make_text_box(parent, width=0, height=0, hbar=0, vbar=1,
 
 
 def make_list_box(parent, width=0, height=0, hbar=0, vbar=1,
-                 fill=BOTH, expand=1, pack=1, class_=None, name=None,
-                 takefocus=None):
+                  fill=BOTH, expand=1, pack=1, class_=None, name=None,
+                  takefocus=None):
 
     """Subroutine to create a list box.
 
     Like make_text_box().
     """
     hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack,
-                                       class_=class_, name=name,
-                                       takefocus=takefocus)
+                                        class_=class_, name=name,
+                                        takefocus=takefocus)
 
     widget = Listbox(frame, name="listbox")
     if width: widget.config(width=width)
@@ -184,8 +184,8 @@ def make_list_box(parent, width=0, height=0, hbar=0, vbar=1,
 
 
 def make_canvas(parent, width=0, height=0, hbar=1, vbar=1,
-               fill=BOTH, expand=1, pack=1, class_=None, name=None,
-               takefocus=None):
+                fill=BOTH, expand=1, pack=1, class_=None, name=None,
+                takefocus=None):
 
     """Subroutine to create a canvas.
 
@@ -194,8 +194,8 @@ def make_canvas(parent, width=0, height=0, hbar=1, vbar=1,
     """
 
     hbar, vbar, frame = make_scrollbars(parent, hbar, vbar, pack,
-                                       class_=class_, name=name,
-                                       takefocus=takefocus)
+                                        class_=class_, name=name,
+                                        takefocus=takefocus)
 
     widget = Canvas(frame, scrollregion=(0, 0, width, height), name="canvas")
     if width: widget.config(width=width)
@@ -228,9 +228,9 @@ def make_form_entry(parent, label, borderwidth=None):
     label.pack(side=LEFT)
 
     if borderwidth is None:
-       entry = Entry(frame, relief=SUNKEN)
+        entry = Entry(frame, relief=SUNKEN)
     else:
-       entry = Entry(frame, relief=SUNKEN, borderwidth=borderwidth)
+        entry = Entry(frame, relief=SUNKEN, borderwidth=borderwidth)
     entry.pack(side=LEFT, fill=X, expand=1)
 
     return entry, frame
@@ -243,8 +243,8 @@ def make_form_entry(parent, label, borderwidth=None):
 # expandable while still aligning the colons.  This doesn't work yet.
 #
 def make_labeled_form_entry(parent, label, entrywidth=20, entryheight=1,
-                           labelwidth=0, borderwidth=None,
-                           takefocus=None):
+                            labelwidth=0, borderwidth=None,
+                            takefocus=None):
     """Subroutine to create a form entry.
 
     Create:
@@ -261,32 +261,32 @@ def make_labeled_form_entry(parent, label, entrywidth=20, entryheight=1,
     label = Label(frame, text=label, width=labelwidth, anchor=E)
     label.pack(side=LEFT)
     if entryheight == 1:
-       if borderwidth is None:
-           entry = Entry(frame, relief=SUNKEN, width=entrywidth)
-       else:
-           entry = Entry(frame, relief=SUNKEN, width=entrywidth,
-                         borderwidth=borderwidth)
-       entry.pack(side=RIGHT, expand=1, fill=X)
-       frame.pack(fill=X)
+        if borderwidth is None:
+            entry = Entry(frame, relief=SUNKEN, width=entrywidth)
+        else:
+            entry = Entry(frame, relief=SUNKEN, width=entrywidth,
+                          borderwidth=borderwidth)
+        entry.pack(side=RIGHT, expand=1, fill=X)
+        frame.pack(fill=X)
     else:
-       entry = make_text_box(frame, entrywidth, entryheight, 1, 1,
-                             takefocus=takefocus)
-       frame.pack(fill=BOTH, expand=1)
+        entry = make_text_box(frame, entrywidth, entryheight, 1, 1,
+                              takefocus=takefocus)
+        frame.pack(fill=BOTH, expand=1)
 
     return entry, frame, label
 
 
 def make_double_frame(master=None, class_=None, name=None, relief=RAISED,
-                     borderwidth=1):
+                      borderwidth=1):
     """Create a pair of frames suitable for 'hosting' a dialog."""
     if name:
-       if class_: frame = Frame(master, class_=class_, name=name)
-       else: frame = Frame(master, name=name)
+        if class_: frame = Frame(master, class_=class_, name=name)
+        else: frame = Frame(master, name=name)
     else:
-       if class_: frame = Frame(master, class_=class_)
-       else: frame = Frame(master)
+        if class_: frame = Frame(master, class_=class_)
+        else: frame = Frame(master)
     top = Frame(frame, name="topframe", relief=relief,
-               borderwidth=borderwidth)
+                borderwidth=borderwidth)
     bottom = Frame(frame, name="bottomframe")
     bottom.pack(fill=X, padx='1m', pady='1m', side=BOTTOM)
     top.pack(expand=1, fill=BOTH, padx='1m', pady='1m')
@@ -298,7 +298,7 @@ def make_double_frame(master=None, class_=None, name=None, relief=RAISED,
 
 
 def make_group_frame(master, name=None, label=None, fill=Y,
-                    side=None, expand=None, font=None):
+                     side=None, expand=None, font=None):
     """Create nested frames with a border and optional label.
 
     The outer frame is only used to provide the decorative border, to
@@ -311,7 +311,7 @@ def make_group_frame(master, name=None, label=None, fill=Y,
     outer = Frame(master, borderwidth=2, relief=GROOVE)
     outer.pack(expand=expand, fill=fill, side=side)
     if label:
-       Label(outer, text=label, font=font, anchor=W).pack(fill=X)
+        Label(outer, text=label, font=font, anchor=W).pack(fill=X)
     inner = Frame(master, borderwidth='1m', name=name)
     inner.pack(expand=1, fill=BOTH, in_=outer)
     inner.forget = outer.forget
@@ -326,20 +326,20 @@ def unify_button_widths(*buttons):
     """
     wid = 0
     for btn in buttons:
-       wid = max(wid, len(btn["text"]))
+        wid = max(wid, len(btn["text"]))
     for btn in buttons:
-       btn["width"] = wid
+        btn["width"] = wid
 
 
 def flatten(msg):
     """Turn a list or tuple into a single string -- recursively."""
     t = type(msg)
     if t in (ListType, TupleType):
-       msg = string.join(map(flatten, msg))
+        msg = string.join(map(flatten, msg))
     elif t is ClassType:
-       msg = msg.__name__
+        msg = msg.__name__
     else:
-       msg = str(msg)
+        msg = str(msg)
     return msg
 
 
@@ -356,8 +356,8 @@ def test():
     entry, eframe = make_form_entry(root, 'Boolean:')
     text, tframe = make_text_box(root)
     def enter(event, entry=entry, text=text):
-       s = boolean(entry.get()) and '\nyes' or '\nno'
-       text.insert('end', s)
+        s = boolean(entry.get()) and '\nyes' or '\nno'
+        text.insert('end', s)
     entry.bind('<Return>', enter)
     entry.insert(END, flatten(sys.argv))
     root.mainloop()
index 027718fb5bf2c266f3308bf18891a53d647efee3..600082978a3b73834060ce17d203d375a18ab079 100755 (executable)
@@ -72,365 +72,365 @@ if sys.platform == 'mac':
 
 def main():
     try:
-       opts, args = getopt.getopt(sys.argv[1:], 'm:qv')
+        opts, args = getopt.getopt(sys.argv[1:], 'm:qv')
     except getopt.error, msg:
-       sys.stdout = sys.stderr
-       print msg
-       print __doc__%vars(webchecker)
-       sys.exit(2)
+        sys.stdout = sys.stderr
+        print msg
+        print __doc__%vars(webchecker)
+        sys.exit(2)
     for o, a in opts:
-       if o == '-m':
-           webchecker.maxpage = string.atoi(a)
-       if o == '-q':
-           webchecker.verbose = 0
-       if o == '-v':
-           webchecker.verbose = webchecker.verbose + 1
+        if o == '-m':
+            webchecker.maxpage = string.atoi(a)
+        if o == '-q':
+            webchecker.verbose = 0
+        if o == '-v':
+            webchecker.verbose = webchecker.verbose + 1
     root = Tk(className='Webchecker')
     root.protocol("WM_DELETE_WINDOW", root.quit)
     c = CheckerWindow(root)
     if args:
-       for arg in args[:-1]:
-           c.addroot(arg)
-       c.suggestroot(args[-1])
+        for arg in args[:-1]:
+            c.addroot(arg)
+        c.suggestroot(args[-1])
     root.mainloop()
 
 
 class CheckerWindow(webchecker.Checker):
 
     def __init__(self, parent, root=webchecker.DEFROOT):
-       self.__parent = parent
-
-       self.__topcontrols = Frame(parent)
-       self.__topcontrols.pack(side=TOP, fill=X)
-       self.__label = Label(self.__topcontrols, text="Root URL:")
-       self.__label.pack(side=LEFT)
-       self.__rootentry = Entry(self.__topcontrols, width=60)
-       self.__rootentry.pack(side=LEFT)
-       self.__rootentry.bind('<Return>', self.enterroot)
-       self.__rootentry.focus_set()
-
-       self.__controls = Frame(parent)
-       self.__controls.pack(side=TOP, fill=X)
-       self.__running = 0
-       self.__start = Button(self.__controls, text="Run", command=self.start)
-       self.__start.pack(side=LEFT)
-       self.__stop = Button(self.__controls, text="Stop", command=self.stop,
-                            state=DISABLED)
-       self.__stop.pack(side=LEFT)
-       self.__step = Button(self.__controls, text="Check one",
-                            command=self.step)
-       self.__step.pack(side=LEFT)
-       self.__cv = BooleanVar(parent)
-       self.__cv.set(self.checkext)
-       self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
-                                     command=self.update_checkext,
-                                     text="Check nonlocal links",)
-       self.__checkext.pack(side=LEFT)
-       self.__reset = Button(self.__controls, text="Start over", command=self.reset)
-       self.__reset.pack(side=LEFT)
-       if __name__ == '__main__': # No Quit button under Grail!
-           self.__quit = Button(self.__controls, text="Quit",
-                                command=self.__parent.quit)
-           self.__quit.pack(side=RIGHT)
-
-       self.__status = Label(parent, text="Status: initial", anchor=W)
-       self.__status.pack(side=TOP, fill=X)
-       self.__checking = Label(parent, text="Idle", anchor=W)
-       self.__checking.pack(side=TOP, fill=X)
-       self.__mp = mp = MultiPanel(parent)
-       sys.stdout = self.__log = LogPanel(mp, "Log")
-       self.__todo = ListPanel(mp, "To check", self.showinfo)
-       self.__done = ListPanel(mp, "Checked", self.showinfo)
-       self.__bad = ListPanel(mp, "Bad links", self.showinfo)
-       self.__errors = ListPanel(mp, "Pages w/ bad links", self.showinfo)
-       self.__details = LogPanel(mp, "Details")
-       webchecker.Checker.__init__(self)
-       if root:
-           root = string.strip(str(root))
-           if root:
-               self.suggestroot(root)
-       self.newstatus()
+        self.__parent = parent
+
+        self.__topcontrols = Frame(parent)
+        self.__topcontrols.pack(side=TOP, fill=X)
+        self.__label = Label(self.__topcontrols, text="Root URL:")
+        self.__label.pack(side=LEFT)
+        self.__rootentry = Entry(self.__topcontrols, width=60)
+        self.__rootentry.pack(side=LEFT)
+        self.__rootentry.bind('<Return>', self.enterroot)
+        self.__rootentry.focus_set()
+
+        self.__controls = Frame(parent)
+        self.__controls.pack(side=TOP, fill=X)
+        self.__running = 0
+        self.__start = Button(self.__controls, text="Run", command=self.start)
+        self.__start.pack(side=LEFT)
+        self.__stop = Button(self.__controls, text="Stop", command=self.stop,
+                             state=DISABLED)
+        self.__stop.pack(side=LEFT)
+        self.__step = Button(self.__controls, text="Check one",
+                             command=self.step)
+        self.__step.pack(side=LEFT)
+        self.__cv = BooleanVar(parent)
+        self.__cv.set(self.checkext)
+        self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
+                                      command=self.update_checkext,
+                                      text="Check nonlocal links",)
+        self.__checkext.pack(side=LEFT)
+        self.__reset = Button(self.__controls, text="Start over", command=self.reset)
+        self.__reset.pack(side=LEFT)
+        if __name__ == '__main__': # No Quit button under Grail!
+            self.__quit = Button(self.__controls, text="Quit",
+                                 command=self.__parent.quit)
+            self.__quit.pack(side=RIGHT)
+
+        self.__status = Label(parent, text="Status: initial", anchor=W)
+        self.__status.pack(side=TOP, fill=X)
+        self.__checking = Label(parent, text="Idle", anchor=W)
+        self.__checking.pack(side=TOP, fill=X)
+        self.__mp = mp = MultiPanel(parent)
+        sys.stdout = self.__log = LogPanel(mp, "Log")
+        self.__todo = ListPanel(mp, "To check", self.showinfo)
+        self.__done = ListPanel(mp, "Checked", self.showinfo)
+        self.__bad = ListPanel(mp, "Bad links", self.showinfo)
+        self.__errors = ListPanel(mp, "Pages w/ bad links", self.showinfo)
+        self.__details = LogPanel(mp, "Details")
+        webchecker.Checker.__init__(self)
+        if root:
+            root = string.strip(str(root))
+            if root:
+                self.suggestroot(root)
+        self.newstatus()
 
     def reset(self):
-       webchecker.Checker.reset(self)
-       for p in self.__todo, self.__done, self.__bad, self.__errors:
-           p.clear()
+        webchecker.Checker.reset(self)
+        for p in self.__todo, self.__done, self.__bad, self.__errors:
+            p.clear()
 
     def suggestroot(self, root):
-       self.__rootentry.delete(0, END)
-       self.__rootentry.insert(END, root)
-       self.__rootentry.select_range(0, END)
+        self.__rootentry.delete(0, END)
+        self.__rootentry.insert(END, root)
+        self.__rootentry.select_range(0, END)
 
     def enterroot(self, event=None):
-       root = self.__rootentry.get()
-       root = string.strip(root)
-       if root:
-           self.__checking.config(text="Adding root "+root)
-           self.__checking.update_idletasks()
-           self.addroot(root)
-           self.__checking.config(text="Idle")
-           try:
-               i = self.__todo.items.index(root)
-           except (ValueError, IndexError):
-               pass
-           else:
-               self.__todo.list.select_clear(0, END)
-               self.__todo.list.select_set(i)
-               self.__todo.list.yview(i)
-       self.__rootentry.delete(0, END)
+        root = self.__rootentry.get()
+        root = string.strip(root)
+        if root:
+            self.__checking.config(text="Adding root "+root)
+            self.__checking.update_idletasks()
+            self.addroot(root)
+            self.__checking.config(text="Idle")
+            try:
+                i = self.__todo.items.index(root)
+            except (ValueError, IndexError):
+                pass
+            else:
+                self.__todo.list.select_clear(0, END)
+                self.__todo.list.select_set(i)
+                self.__todo.list.yview(i)
+        self.__rootentry.delete(0, END)
 
     def start(self):
-       self.__start.config(state=DISABLED, relief=SUNKEN)
-       self.__stop.config(state=NORMAL)
-       self.__step.config(state=DISABLED)
-       self.enterroot()
-       self.__running = 1
-       self.go()
+        self.__start.config(state=DISABLED, relief=SUNKEN)
+        self.__stop.config(state=NORMAL)
+        self.__step.config(state=DISABLED)
+        self.enterroot()
+        self.__running = 1
+        self.go()
 
     def stop(self):
-       self.__stop.config(state=DISABLED, relief=SUNKEN)
-       self.__running = 0
+        self.__stop.config(state=DISABLED, relief=SUNKEN)
+        self.__running = 0
 
     def step(self):
-       self.__start.config(state=DISABLED)
-       self.__step.config(state=DISABLED, relief=SUNKEN)
-       self.enterroot()
-       self.__running = 0
-       self.dosomething()
+        self.__start.config(state=DISABLED)
+        self.__step.config(state=DISABLED, relief=SUNKEN)
+        self.enterroot()
+        self.__running = 0
+        self.dosomething()
 
     def go(self):
-       if self.__running:
-           self.__parent.after_idle(self.dosomething)
-       else:
-           self.__checking.config(text="Idle")
-           self.__start.config(state=NORMAL, relief=RAISED)
-           self.__stop.config(state=DISABLED, relief=RAISED)
-           self.__step.config(state=NORMAL, relief=RAISED)
+        if self.__running:
+            self.__parent.after_idle(self.dosomething)
+        else:
+            self.__checking.config(text="Idle")
+            self.__start.config(state=NORMAL, relief=RAISED)
+            self.__stop.config(state=DISABLED, relief=RAISED)
+            self.__step.config(state=NORMAL, relief=RAISED)
 
     __busy = 0
 
     def dosomething(self):
-       if self.__busy: return
-       self.__busy = 1
-       if self.todo:
-           l = self.__todo.selectedindices()
-           if l:
-               i = l[0]
-           else:
-               i = 0
-               self.__todo.list.select_set(i)
-           self.__todo.list.yview(i)
-           url = self.__todo.items[i]
-           self.__checking.config(text="Checking "+url)
-           self.__parent.update()
-           self.dopage(url)
-       else:
-           self.stop()
-       self.__busy = 0
-       self.go()
+        if self.__busy: return
+        self.__busy = 1
+        if self.todo:
+            l = self.__todo.selectedindices()
+            if l:
+                i = l[0]
+            else:
+                i = 0
+                self.__todo.list.select_set(i)
+            self.__todo.list.yview(i)
+            url = self.__todo.items[i]
+            self.__checking.config(text="Checking "+url)
+            self.__parent.update()
+            self.dopage(url)
+        else:
+            self.stop()
+        self.__busy = 0
+        self.go()
 
     def showinfo(self, url):
-       d = self.__details
-       d.clear()
-       d.put("URL:    %s\n" % url)
-       if self.bad.has_key(url):
-           d.put("Error:  %s\n" % str(self.bad[url]))
-       if url in self.roots:
-           d.put("Note:   This is a root URL\n")
-       if self.done.has_key(url):
-           d.put("Status: checked\n")
-           o = self.done[url]
-       elif self.todo.has_key(url):
-           d.put("Status: to check\n")
-           o = self.todo[url]
-       else:
-           d.put("Status: unknown (!)\n")
-           o = []
-       if self.errors.has_key(url):
-           d.put("Bad links from this page:\n")
-           for triple in self.errors[url]:
-               link, rawlink, msg = triple
-               d.put("  HREF  %s" % link)
-               if link != rawlink: d.put(" (%s)" %rawlink)
-               d.put("\n")
-               d.put("  error %s\n" % str(msg))
-       self.__mp.showpanel("Details")
-       for source, rawlink in o:
-           d.put("Origin: %s" % source)
-           if rawlink != url:
-               d.put(" (%s)" % rawlink)
-           d.put("\n")
-       d.text.yview("1.0")
+        d = self.__details
+        d.clear()
+        d.put("URL:    %s\n" % url)
+        if self.bad.has_key(url):
+            d.put("Error:  %s\n" % str(self.bad[url]))
+        if url in self.roots:
+            d.put("Note:   This is a root URL\n")
+        if self.done.has_key(url):
+            d.put("Status: checked\n")
+            o = self.done[url]
+        elif self.todo.has_key(url):
+            d.put("Status: to check\n")
+            o = self.todo[url]
+        else:
+            d.put("Status: unknown (!)\n")
+            o = []
+        if self.errors.has_key(url):
+            d.put("Bad links from this page:\n")
+            for triple in self.errors[url]:
+                link, rawlink, msg = triple
+                d.put("  HREF  %s" % link)
+                if link != rawlink: d.put(" (%s)" %rawlink)
+                d.put("\n")
+                d.put("  error %s\n" % str(msg))
+        self.__mp.showpanel("Details")
+        for source, rawlink in o:
+            d.put("Origin: %s" % source)
+            if rawlink != url:
+                d.put(" (%s)" % rawlink)
+            d.put("\n")
+        d.text.yview("1.0")
 
     def setbad(self, url, msg):
-       webchecker.Checker.setbad(self, url, msg)
-       self.__bad.insert(url)
-       self.newstatus()
+        webchecker.Checker.setbad(self, url, msg)
+        self.__bad.insert(url)
+        self.newstatus()
 
     def setgood(self, url):
-       webchecker.Checker.setgood(self, url)
-       self.__bad.remove(url)
-       self.newstatus()
+        webchecker.Checker.setgood(self, url)
+        self.__bad.remove(url)
+        self.newstatus()
 
     def newlink(self, url, origin):
-       webchecker.Checker.newlink(self, url, origin)
-       if self.done.has_key(url):
-           self.__done.insert(url)
-       elif self.todo.has_key(url):
-           self.__todo.insert(url)
-       self.newstatus()
+        webchecker.Checker.newlink(self, url, origin)
+        if self.done.has_key(url):
+            self.__done.insert(url)
+        elif self.todo.has_key(url):
+            self.__todo.insert(url)
+        self.newstatus()
 
     def markdone(self, url):
-       webchecker.Checker.markdone(self, url)
-       self.__done.insert(url)
-       self.__todo.remove(url)
-       self.newstatus()
+        webchecker.Checker.markdone(self, url)
+        self.__done.insert(url)
+        self.__todo.remove(url)
+        self.newstatus()
 
     def seterror(self, url, triple):
-       webchecker.Checker.seterror(self, url, triple)
-       self.__errors.insert(url)
-       self.newstatus()
+        webchecker.Checker.seterror(self, url, triple)
+        self.__errors.insert(url)
+        self.newstatus()
 
     def newstatus(self):
-       self.__status.config(text="Status: "+self.status())
-       self.__parent.update()
+        self.__status.config(text="Status: "+self.status())
+        self.__parent.update()
 
     def update_checkext(self):
-       self.checkext = self.__cv.get()
+        self.checkext = self.__cv.get()
 
 
 class ListPanel:
 
     def __init__(self, mp, name, showinfo=None):
-       self.mp = mp
-       self.name = name
-       self.showinfo = showinfo
-       self.panel = mp.addpanel(name)
-       self.list, self.frame = tktools.make_list_box(
-           self.panel, width=60, height=5)
-       self.list.config(exportselection=0)
-       if showinfo:
-           self.list.bind('<Double-Button-1>', self.doubleclick)
-       self.items = []
+        self.mp = mp
+        self.name = name
+        self.showinfo = showinfo
+        self.panel = mp.addpanel(name)
+        self.list, self.frame = tktools.make_list_box(
+            self.panel, width=60, height=5)
+        self.list.config(exportselection=0)
+        if showinfo:
+            self.list.bind('<Double-Button-1>', self.doubleclick)
+        self.items = []
 
     def clear(self):
-       self.items = []
-       self.list.delete(0, END)
-       self.mp.hidepanel(self.name)
+        self.items = []
+        self.list.delete(0, END)
+        self.mp.hidepanel(self.name)
 
     def doubleclick(self, event):
-       l = self.selectedindices()
-       if l:
-           self.showinfo(self.list.get(l[0]))
+        l = self.selectedindices()
+        if l:
+            self.showinfo(self.list.get(l[0]))
 
     def selectedindices(self):
-       l = self.list.curselection()
-       if not l: return []
-       return map(string.atoi, l)
+        l = self.list.curselection()
+        if not l: return []
+        return map(string.atoi, l)
 
     def insert(self, url):
-       if url not in self.items:
-           if not self.items:
-               self.mp.showpanel(self.name)
-           # (I tried sorting alphabetically, but the display is too jumpy)
-           i = len(self.items)
-           self.list.insert(i, url)
-           self.list.yview(i)
-           self.items.insert(i, url)
+        if url not in self.items:
+            if not self.items:
+                self.mp.showpanel(self.name)
+            # (I tried sorting alphabetically, but the display is too jumpy)
+            i = len(self.items)
+            self.list.insert(i, url)
+            self.list.yview(i)
+            self.items.insert(i, url)
 
     def remove(self, url):
-       try:
-           i = self.items.index(url)
-       except (ValueError, IndexError):
-           pass
-       else:
-           was_selected = i in self.selectedindices()
-           self.list.delete(i)
-           del self.items[i]
-           if not self.items:
-               self.mp.hidepanel(self.name)
-           elif was_selected:
-               if i >= len(self.items):
-                   i = len(self.items) - 1
-               self.list.select_set(i)
+        try:
+            i = self.items.index(url)
+        except (ValueError, IndexError):
+            pass
+        else:
+            was_selected = i in self.selectedindices()
+            self.list.delete(i)
+            del self.items[i]
+            if not self.items:
+                self.mp.hidepanel(self.name)
+            elif was_selected:
+                if i >= len(self.items):
+                    i = len(self.items) - 1
+                self.list.select_set(i)
 
 
 class LogPanel:
 
     def __init__(self, mp, name):
-       self.mp = mp
-       self.name = name
-       self.panel = mp.addpanel(name)
-       self.text, self.frame = tktools.make_text_box(self.panel, height=10)
-       self.text.config(wrap=NONE)
+        self.mp = mp
+        self.name = name
+        self.panel = mp.addpanel(name)
+        self.text, self.frame = tktools.make_text_box(self.panel, height=10)
+        self.text.config(wrap=NONE)
 
     def clear(self):
-       self.text.delete("1.0", END)
-       self.text.yview("1.0")
+        self.text.delete("1.0", END)
+        self.text.yview("1.0")
 
     def put(self, s):
-       self.text.insert(END, s)
-       if '\n' in s:
-           self.text.yview(END)
+        self.text.insert(END, s)
+        if '\n' in s:
+            self.text.yview(END)
 
     def write(self, s):
-       self.text.insert(END, s)
-       if '\n' in s:
-           self.text.yview(END)
-           self.panel.update()
+        self.text.insert(END, s)
+        if '\n' in s:
+            self.text.yview(END)
+            self.panel.update()
 
 
 class MultiPanel:
 
     def __init__(self, parent):
-       self.parent = parent
-       self.frame = Frame(self.parent)
-       self.frame.pack(expand=1, fill=BOTH)
-       self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
-       self.topframe.pack(fill=X)
-       self.botframe = Frame(self.frame)
-       self.botframe.pack(expand=1, fill=BOTH)
-       self.panelnames = []
-       self.panels = {}
+        self.parent = parent
+        self.frame = Frame(self.parent)
+        self.frame.pack(expand=1, fill=BOTH)
+        self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
+        self.topframe.pack(fill=X)
+        self.botframe = Frame(self.frame)
+        self.botframe.pack(expand=1, fill=BOTH)
+        self.panelnames = []
+        self.panels = {}
 
     def addpanel(self, name, on=0):
-       v = StringVar(self.parent)
-       if on:
-           v.set(name)
-       else:
-           v.set("")
-       check = Checkbutton(self.topframe, text=name,
-                           offvalue="", onvalue=name, variable=v,
-                           command=self.checkpanel)
-       check.pack(side=LEFT)
-       panel = Frame(self.botframe)
-       label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
-       label.pack(side=TOP, fill=X)
-       t = v, check, panel
-       self.panelnames.append(name)
-       self.panels[name] = t
-       if on:
-           panel.pack(expand=1, fill=BOTH)
-       return panel
+        v = StringVar(self.parent)
+        if on:
+            v.set(name)
+        else:
+            v.set("")
+        check = Checkbutton(self.topframe, text=name,
+                            offvalue="", onvalue=name, variable=v,
+                            command=self.checkpanel)
+        check.pack(side=LEFT)
+        panel = Frame(self.botframe)
+        label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
+        label.pack(side=TOP, fill=X)
+        t = v, check, panel
+        self.panelnames.append(name)
+        self.panels[name] = t
+        if on:
+            panel.pack(expand=1, fill=BOTH)
+        return panel
 
     def showpanel(self, name):
-       v, check, panel = self.panels[name]
-       v.set(name)
-       panel.pack(expand=1, fill=BOTH)
+        v, check, panel = self.panels[name]
+        v.set(name)
+        panel.pack(expand=1, fill=BOTH)
 
     def hidepanel(self, name):
-       v, check, panel = self.panels[name]
-       v.set("")
-       panel.pack_forget()
+        v, check, panel = self.panels[name]
+        v.set("")
+        panel.pack_forget()
 
     def checkpanel(self):
-       for name in self.panelnames:
-           v, check, panel = self.panels[name]
-           panel.pack_forget()
-       for name in self.panelnames:
-           v, check, panel = self.panels[name]
-           if v.get():
-               panel.pack(expand=1, fill=BOTH)
+        for name in self.panelnames:
+            v, check, panel = self.panels[name]
+            panel.pack_forget()
+        for name in self.panelnames:
+            v, check, panel = self.panels[name]
+            if v.get():
+                panel.pack(expand=1, fill=BOTH)
 
 
 if __name__ == '__main__':
index 23dcf80df7850470d87256512a415b3afcd80885..5459e9772b9a9901aa48c852b52c35f3e322e329 100755 (executable)
@@ -116,17 +116,17 @@ import robotparser
 if __version__[0] == '$':
     _v = string.split(__version__)
     if len(_v) == 3:
-       __version__ = _v[1]
+        __version__ = _v[1]
 
 
 # Tunable parameters
-DEFROOT = "file:/usr/local/etc/httpd/htdocs/"  # Default root URL
-CHECKEXT = 1                           # Check external references (1 deep)
-VERBOSE = 1                            # Verbosity level (0-3)
-MAXPAGE = 150000                       # Ignore files bigger than this
-ROUNDSIZE = 50                         # Number of links processed per round
-DUMPFILE = "@webchecker.pickle"                # Pickled checkpoint
-AGENTNAME = "webchecker"               # Agent name for robots.txt parser
+DEFROOT = "file:/usr/local/etc/httpd/htdocs/"   # Default root URL
+CHECKEXT = 1                            # Check external references (1 deep)
+VERBOSE = 1                             # Verbosity level (0-3)
+MAXPAGE = 150000                        # Ignore files bigger than this
+ROUNDSIZE = 50                          # Number of links processed per round
+DUMPFILE = "@webchecker.pickle"         # Pickled checkpoint
+AGENTNAME = "webchecker"                # Agent name for robots.txt parser
 
 
 # Global variables
@@ -142,76 +142,76 @@ def main():
     norun = 0
 
     try:
-       opts, args = getopt.getopt(sys.argv[1:], 'Rd:m:nqr:vx')
+        opts, args = getopt.getopt(sys.argv[1:], 'Rd:m:nqr:vx')
     except getopt.error, msg:
-       sys.stdout = sys.stderr
-       print msg
-       print __doc__%globals()
-       sys.exit(2)
+        sys.stdout = sys.stderr
+        print msg
+        print __doc__%globals()
+        sys.exit(2)
     for o, a in opts:
-       if o == '-R':
-           restart = 1
-       if o == '-d':
-           dumpfile = a
-       if o == '-m':
-           maxpage = string.atoi(a)
-       if o == '-n':
-           norun = 1
-       if o == '-q':
-           verbose = 0
-       if o == '-r':
-           roundsize = string.atoi(a)
-       if o == '-v':
-           verbose = verbose + 1
-       if o == '-x':
-           checkext = not checkext
+        if o == '-R':
+            restart = 1
+        if o == '-d':
+            dumpfile = a
+        if o == '-m':
+            maxpage = string.atoi(a)
+        if o == '-n':
+            norun = 1
+        if o == '-q':
+            verbose = 0
+        if o == '-r':
+            roundsize = string.atoi(a)
+        if o == '-v':
+            verbose = verbose + 1
+        if o == '-x':
+            checkext = not checkext
 
     if verbose > 0:
-       print AGENTNAME, "version", __version__
+        print AGENTNAME, "version", __version__
 
     if restart:
-       c = load_pickle(dumpfile=dumpfile, verbose=verbose)
+        c = load_pickle(dumpfile=dumpfile, verbose=verbose)
     else:
-       c = Checker()
+        c = Checker()
 
     c.setflags(checkext=checkext, verbose=verbose,
-              maxpage=maxpage, roundsize=roundsize)
+               maxpage=maxpage, roundsize=roundsize)
 
     if not restart and not args:
-       args.append(DEFROOT)
+        args.append(DEFROOT)
 
     for arg in args:
-       c.addroot(arg)
+        c.addroot(arg)
 
     if not norun:
-       try:
-           c.run()
-       except KeyboardInterrupt:
-           if verbose > 0:
-               print "[run interrupted]"
+        try:
+            c.run()
+        except KeyboardInterrupt:
+            if verbose > 0:
+                print "[run interrupted]"
 
     try:
-       c.report()
+        c.report()
     except KeyboardInterrupt:
-       if verbose > 0:
-           print "[report interrupted]"
+        if verbose > 0:
+            print "[report interrupted]"
 
     if c.save_pickle(dumpfile):
-       if dumpfile == DUMPFILE:
-           print "Use ``%s -R'' to restart." % sys.argv[0]
-       else:
-           print "Use ``%s -R -d %s'' to restart." % (sys.argv[0], dumpfile)
+        if dumpfile == DUMPFILE:
+            print "Use ``%s -R'' to restart." % sys.argv[0]
+        else:
+            print "Use ``%s -R -d %s'' to restart." % (sys.argv[0], dumpfile)
 
 
 def load_pickle(dumpfile=DUMPFILE, verbose=VERBOSE):
     if verbose > 0:
-       print "Loading checkpoint from %s ..." % dumpfile
+        print "Loading checkpoint from %s ..." % dumpfile
     f = open(dumpfile, "rb")
     c = pickle.load(f)
     f.close()
     if verbose > 0:
-       print "Done."
-       print "Root:", string.join(c.roots, "\n      ")
+        print "Done."
+        print "Root:", string.join(c.roots, "\n      ")
     return c
 
 
@@ -225,364 +225,364 @@ class Checker:
     validflags = tuple(dir())
 
     def __init__(self):
-       self.reset()
+        self.reset()
 
     def setflags(self, **kw):
-       for key in kw.keys():
-           if key not in self.validflags:
-               raise NameError, "invalid keyword argument: %s" % str(key)
-       for key, value in kw.items():
-           setattr(self, key, value)
+        for key in kw.keys():
+            if key not in self.validflags:
+                raise NameError, "invalid keyword argument: %s" % str(key)
+        for key, value in kw.items():
+            setattr(self, key, value)
 
     def reset(self):
-       self.roots = []
-       self.todo = {}
-       self.done = {}
-       self.bad = {}
-       self.round = 0
-       # The following are not pickled:
-       self.robots = {}
-       self.errors = {}
-       self.urlopener = MyURLopener()
-       self.changed = 0
+        self.roots = []
+        self.todo = {}
+        self.done = {}
+        self.bad = {}
+        self.round = 0
+        # The following are not pickled:
+        self.robots = {}
+        self.errors = {}
+        self.urlopener = MyURLopener()
+        self.changed = 0
 
     def __getstate__(self):
-       return (self.roots, self.todo, self.done, self.bad, self.round)
+        return (self.roots, self.todo, self.done, self.bad, self.round)
 
     def __setstate__(self, state):
-       self.reset()
-       (self.roots, self.todo, self.done, self.bad, self.round) = state
-       for root in self.roots:
-           self.addrobot(root)
-       for url in self.bad.keys():
-           self.markerror(url)
+        self.reset()
+        (self.roots, self.todo, self.done, self.bad, self.round) = state
+        for root in self.roots:
+            self.addrobot(root)
+        for url in self.bad.keys():
+            self.markerror(url)
 
     def addroot(self, root):
-       if root not in self.roots:
-           troot = root
-           scheme, netloc, path, params, query, fragment = \
-                   urlparse.urlparse(root)
-           i = string.rfind(path, "/") + 1
-           if 0 < i < len(path):
-               path = path[:i]
-               troot = urlparse.urlunparse((scheme, netloc, path,
-                                            params, query, fragment))
-           self.roots.append(troot)
-           self.addrobot(root)
-           self.newlink(root, ("<root>", root))
+        if root not in self.roots:
+            troot = root
+            scheme, netloc, path, params, query, fragment = \
+                    urlparse.urlparse(root)
+            i = string.rfind(path, "/") + 1
+            if 0 < i < len(path):
+                path = path[:i]
+                troot = urlparse.urlunparse((scheme, netloc, path,
+                                             params, query, fragment))
+            self.roots.append(troot)
+            self.addrobot(root)
+            self.newlink(root, ("<root>", root))
 
     def addrobot(self, root):
-       root = urlparse.urljoin(root, "/")
-       if self.robots.has_key(root): return
-       url = urlparse.urljoin(root, "/robots.txt")
-       self.robots[root] = rp = robotparser.RobotFileParser()
-       if self.verbose > 2:
-           print "Parsing", url
-           rp.debug = self.verbose > 3
-       rp.set_url(url)
-       try:
-           rp.read()
-       except IOError, msg:
-           if self.verbose > 1:
-               print "I/O error parsing", url, ":", msg
+        root = urlparse.urljoin(root, "/")
+        if self.robots.has_key(root): return
+        url = urlparse.urljoin(root, "/robots.txt")
+        self.robots[root] = rp = robotparser.RobotFileParser()
+        if self.verbose > 2:
+            print "Parsing", url
+            rp.debug = self.verbose > 3
+        rp.set_url(url)
+        try:
+            rp.read()
+        except IOError, msg:
+            if self.verbose > 1:
+                print "I/O error parsing", url, ":", msg
 
     def run(self):
-       while self.todo:
-           self.round = self.round + 1
-           if self.verbose > 0:
-               print
-               print "Round %d (%s)" % (self.round, self.status())
-               print 
-           urls = self.todo.keys()[:self.roundsize]
-           for url in urls:
-               self.dopage(url)
+        while self.todo:
+            self.round = self.round + 1
+            if self.verbose > 0:
+                print
+                print "Round %d (%s)" % (self.round, self.status())
+                print 
+            urls = self.todo.keys()[:self.roundsize]
+            for url in urls:
+                self.dopage(url)
 
     def status(self):
-       return "%d total, %d to do, %d done, %d bad" % (
-           len(self.todo)+len(self.done),
-           len(self.todo), len(self.done),
-           len(self.bad))
+        return "%d total, %d to do, %d done, %d bad" % (
+            len(self.todo)+len(self.done),
+            len(self.todo), len(self.done),
+            len(self.bad))
 
     def report(self):
-       print
-       if not self.todo: print "Final",
-       else: print "Interim",
-       print "Report (%s)" % self.status()
-       self.report_errors()
+        print
+        if not self.todo: print "Final",
+        else: print "Interim",
+        print "Report (%s)" % self.status()
+        self.report_errors()
 
     def report_errors(self):
-       if not self.bad:
-           print
-           print "No errors"
-           return
-       print
-       print "Error Report:"
-       sources = self.errors.keys()
-       sources.sort()
-       for source in sources:
-           triples = self.errors[source]
-           print
-           if len(triples) > 1:
-               print len(triples), "Errors in", source
-           else:
-               print "Error in", source
-           for url, rawlink, msg in triples:
-               print "  HREF", url,
-               if rawlink != url: print "(%s)" % rawlink,
-               print
-               print "   msg", msg
+        if not self.bad:
+            print
+            print "No errors"
+            return
+        print
+        print "Error Report:"
+        sources = self.errors.keys()
+        sources.sort()
+        for source in sources:
+            triples = self.errors[source]
+            print
+            if len(triples) > 1:
+                print len(triples), "Errors in", source
+            else:
+                print "Error in", source
+            for url, rawlink, msg in triples:
+                print "  HREF", url,
+                if rawlink != url: print "(%s)" % rawlink,
+                print
+                print "   msg", msg
 
     def dopage(self, url):
-       if self.verbose > 1:
-           if self.verbose > 2:
-               self.show("Check ", url, "  from", self.todo[url])
-           else:
-               print "Check ", url
-       page = self.getpage(url)
-       if page:
-           for info in page.getlinkinfos():
-               link, rawlink = info
-               origin = url, rawlink
-               self.newlink(link, origin)
-       self.markdone(url)
+        if self.verbose > 1:
+            if self.verbose > 2:
+                self.show("Check ", url, "  from", self.todo[url])
+            else:
+                print "Check ", url
+        page = self.getpage(url)
+        if page:
+            for info in page.getlinkinfos():
+                link, rawlink = info
+                origin = url, rawlink
+                self.newlink(link, origin)
+        self.markdone(url)
 
     def newlink(self, url, origin):
-       if self.done.has_key(url):
-           self.newdonelink(url, origin)
-       else:
-           self.newtodolink(url, origin)
+        if self.done.has_key(url):
+            self.newdonelink(url, origin)
+        else:
+            self.newtodolink(url, origin)
 
     def newdonelink(self, url, origin):
-       self.done[url].append(origin)
-       if self.verbose > 3:
-           print "  Done link", url
+        self.done[url].append(origin)
+        if self.verbose > 3:
+            print "  Done link", url
 
     def newtodolink(self, url, origin):
-       if self.todo.has_key(url):
-           self.todo[url].append(origin)
-           if self.verbose > 3:
-               print "  Seen todo link", url
-       else:
-           self.todo[url] = [origin]
-           if self.verbose > 3:
-               print "  New todo link", url
+        if self.todo.has_key(url):
+            self.todo[url].append(origin)
+            if self.verbose > 3:
+                print "  Seen todo link", url
+        else:
+            self.todo[url] = [origin]
+            if self.verbose > 3:
+                print "  New todo link", url
 
     def markdone(self, url):
-       self.done[url] = self.todo[url]
-       del self.todo[url]
-       self.changed = 1
+        self.done[url] = self.todo[url]
+        del self.todo[url]
+        self.changed = 1
 
     def inroots(self, url):
-       for root in self.roots:
-           if url[:len(root)] == root:
-               root = urlparse.urljoin(root, "/")
-               return self.robots[root].can_fetch(AGENTNAME, url)
-       return 0
+        for root in self.roots:
+            if url[:len(root)] == root:
+                root = urlparse.urljoin(root, "/")
+                return self.robots[root].can_fetch(AGENTNAME, url)
+        return 0
 
     def getpage(self, url):
-       if url[:7] == 'mailto:' or url[:5] == 'news:':
-           if self.verbose > 1: print " Not checking mailto/news URL"
-           return None
-       isint = self.inroots(url)
-       if not isint:
-           if not self.checkext:
-               if self.verbose > 1: print " Not checking ext link"
-               return None
-           f = self.openpage(url)
-           if f:
-               self.safeclose(f)
-           return None
-       text, nurl = self.readhtml(url)
-       if nurl != url:
-           if self.verbose > 1:
-               print " Redirected to", nurl
-           url = nurl
-       if text:
-           return Page(text, url, verbose=self.verbose, maxpage=self.maxpage)
+        if url[:7] == 'mailto:' or url[:5] == 'news:':
+            if self.verbose > 1: print " Not checking mailto/news URL"
+            return None
+        isint = self.inroots(url)
+        if not isint:
+            if not self.checkext:
+                if self.verbose > 1: print " Not checking ext link"
+                return None
+            f = self.openpage(url)
+            if f:
+                self.safeclose(f)
+            return None
+        text, nurl = self.readhtml(url)
+        if nurl != url:
+            if self.verbose > 1:
+                print " Redirected to", nurl
+            url = nurl
+        if text:
+            return Page(text, url, verbose=self.verbose, maxpage=self.maxpage)
 
     def readhtml(self, url):
-       text = None
-       f, url = self.openhtml(url)
-       if f:
-           text = f.read()
-           f.close()
-       return text, url
+        text = None
+        f, url = self.openhtml(url)
+        if f:
+            text = f.read()
+            f.close()
+        return text, url
 
     def openhtml(self, url):
-       f = self.openpage(url)
-       if f:
-           url = f.geturl()
-           info = f.info()
-           if not self.checkforhtml(info, url):
-               self.safeclose(f)
-               f = None
-       return f, url
+        f = self.openpage(url)
+        if f:
+            url = f.geturl()
+            info = f.info()
+            if not self.checkforhtml(info, url):
+                self.safeclose(f)
+                f = None
+        return f, url
 
     def openpage(self, url):
-       try:
-           return self.urlopener.open(url)
-       except IOError, msg:
-           msg = self.sanitize(msg)
-           if self.verbose > 0:
-               print "Error ", msg
-           if self.verbose > 0:
-               self.show(" HREF ", url, "  from", self.todo[url])
-           self.setbad(url, msg)
-           return None
+        try:
+            return self.urlopener.open(url)
+        except IOError, msg:
+            msg = self.sanitize(msg)
+            if self.verbose > 0:
+                print "Error ", msg
+            if self.verbose > 0:
+                self.show(" HREF ", url, "  from", self.todo[url])
+            self.setbad(url, msg)
+            return None
 
     def checkforhtml(self, info, url):
-       if info.has_key('content-type'):
-           ctype = string.lower(info['content-type'])
-       else:
-           if url[-1:] == "/":
-               return 1
-           ctype, encoding = mimetypes.guess_type(url)
-       if ctype == 'text/html':
-           return 1
-       else:
-           if self.verbose > 1:
-               print " Not HTML, mime type", ctype
-           return 0
+        if info.has_key('content-type'):
+            ctype = string.lower(info['content-type'])
+        else:
+            if url[-1:] == "/":
+                return 1
+            ctype, encoding = mimetypes.guess_type(url)
+        if ctype == 'text/html':
+            return 1
+        else:
+            if self.verbose > 1:
+                print " Not HTML, mime type", ctype
+            return 0
 
     def setgood(self, url):
-       if self.bad.has_key(url):
-           del self.bad[url]
-           self.changed = 1
-           if self.verbose > 0:
-               print "(Clear previously seen error)"
+        if self.bad.has_key(url):
+            del self.bad[url]
+            self.changed = 1
+            if self.verbose > 0:
+                print "(Clear previously seen error)"
 
     def setbad(self, url, msg):
-       if self.bad.has_key(url) and self.bad[url] == msg:
-           if self.verbose > 0:
-               print "(Seen this error before)"
-           return
-       self.bad[url] = msg
-       self.changed = 1
-       self.markerror(url)
-       
+        if self.bad.has_key(url) and self.bad[url] == msg:
+            if self.verbose > 0:
+                print "(Seen this error before)"
+            return
+        self.bad[url] = msg
+        self.changed = 1
+        self.markerror(url)
+        
     def markerror(self, url):
-       try:
-           origins = self.todo[url]
-       except KeyError:
-           origins = self.done[url]
-       for source, rawlink in origins:
-           triple = url, rawlink, self.bad[url]
-           self.seterror(source, triple)
+        try:
+            origins = self.todo[url]
+        except KeyError:
+            origins = self.done[url]
+        for source, rawlink in origins:
+            triple = url, rawlink, self.bad[url]
+            self.seterror(source, triple)
 
     def seterror(self, url, triple):
-       try:
-           self.errors[url].append(triple)
-       except KeyError:
-           self.errors[url] = [triple]
+        try:
+            self.errors[url].append(triple)
+        except KeyError:
+            self.errors[url] = [triple]
 
     # The following used to be toplevel functions; they have been
     # changed into methods so they can be overridden in subclasses.
 
     def show(self, p1, link, p2, origins):
-       print p1, link
-       i = 0
-       for source, rawlink in origins:
-           i = i+1
-           if i == 2:
-               p2 = ' '*len(p2)
-           print p2, source,
-           if rawlink != link: print "(%s)" % rawlink,
-           print
+        print p1, link
+        i = 0
+        for source, rawlink in origins:
+            i = i+1
+            if i == 2:
+                p2 = ' '*len(p2)
+            print p2, source,
+            if rawlink != link: print "(%s)" % rawlink,
+            print
 
     def sanitize(self, msg):
-       if isinstance(IOError, ClassType) and isinstance(msg, IOError):
-           # Do the other branch recursively
-           msg.args = self.sanitize(msg.args)
-       elif isinstance(msg, TupleType):
-           if len(msg) >= 4 and msg[0] == 'http error' and \
-              isinstance(msg[3], InstanceType):
-               # Remove the Message instance -- it may contain
-               # a file object which prevents pickling.
-               msg = msg[:3] + msg[4:]
-       return msg
+        if isinstance(IOError, ClassType) and isinstance(msg, IOError):
+            # Do the other branch recursively
+            msg.args = self.sanitize(msg.args)
+        elif isinstance(msg, TupleType):
+            if len(msg) >= 4 and msg[0] == 'http error' and \
+               isinstance(msg[3], InstanceType):
+                # Remove the Message instance -- it may contain
+                # a file object which prevents pickling.
+                msg = msg[:3] + msg[4:]
+        return msg
 
     def safeclose(self, f):
-       try:
-           url = f.geturl()
-       except AttributeError:
-           pass
-       else:
-           if url[:4] == 'ftp:' or url[:7] == 'file://':
-               # Apparently ftp connections don't like to be closed
-               # prematurely...
-               text = f.read()
-       f.close()
+        try:
+            url = f.geturl()
+        except AttributeError:
+            pass
+        else:
+            if url[:4] == 'ftp:' or url[:7] == 'file://':
+                # Apparently ftp connections don't like to be closed
+                # prematurely...
+                text = f.read()
+        f.close()
 
     def save_pickle(self, dumpfile=DUMPFILE):
-       if not self.changed:
-           if self.verbose > 0:
-               print
-               print "No need to save checkpoint"
-       elif not dumpfile:
-           if self.verbose > 0:
-               print "No dumpfile, won't save checkpoint"
-       else:
-           if self.verbose > 0:
-               print
-               print "Saving checkpoint to %s ..." % dumpfile
-           newfile = dumpfile + ".new"
-           f = open(newfile, "wb")
-           pickle.dump(self, f)
-           f.close()
-           try:
-               os.unlink(dumpfile)
-           except os.error:
-               pass
-           os.rename(newfile, dumpfile)
-           if self.verbose > 0:
-               print "Done."
-           return 1
+        if not self.changed:
+            if self.verbose > 0:
+                print
+                print "No need to save checkpoint"
+        elif not dumpfile:
+            if self.verbose > 0:
+                print "No dumpfile, won't save checkpoint"
+        else:
+            if self.verbose > 0:
+                print
+                print "Saving checkpoint to %s ..." % dumpfile
+            newfile = dumpfile + ".new"
+            f = open(newfile, "wb")
+            pickle.dump(self, f)
+            f.close()
+            try:
+                os.unlink(dumpfile)
+            except os.error:
+                pass
+            os.rename(newfile, dumpfile)
+            if self.verbose > 0:
+                print "Done."
+            return 1
 
 
 class Page:
 
     def __init__(self, text, url, verbose=VERBOSE, maxpage=MAXPAGE):
-       self.text = text
-       self.url = url
-       self.verbose = verbose
-       self.maxpage = maxpage
+        self.text = text
+        self.url = url
+        self.verbose = verbose
+        self.maxpage = maxpage
 
     def getlinkinfos(self):
-       size = len(self.text)
-       if size > self.maxpage:
-           if self.verbose > 0:
-               print "Skip huge file", self.url
-               print "  (%.0f Kbytes)" % (size*0.001)
-           return []
-       if self.verbose > 2:
-           print "  Parsing", self.url, "(%d bytes)" % size
-       parser = MyHTMLParser(verbose=self.verbose)
-       parser.feed(self.text)
-       parser.close()
-       rawlinks = parser.getlinks()
-       base = urlparse.urljoin(self.url, parser.getbase() or "")
-       infos = []
-       for rawlink in rawlinks:
-           t = urlparse.urlparse(rawlink)
-           t = t[:-1] + ('',)
-           rawlink = urlparse.urlunparse(t)
-           link = urlparse.urljoin(base, rawlink)
-           infos.append((link, rawlink))
-       return infos
+        size = len(self.text)
+        if size > self.maxpage:
+            if self.verbose > 0:
+                print "Skip huge file", self.url
+                print "  (%.0f Kbytes)" % (size*0.001)
+            return []
+        if self.verbose > 2:
+            print "  Parsing", self.url, "(%d bytes)" % size
+        parser = MyHTMLParser(verbose=self.verbose)
+        parser.feed(self.text)
+        parser.close()
+        rawlinks = parser.getlinks()
+        base = urlparse.urljoin(self.url, parser.getbase() or "")
+        infos = []
+        for rawlink in rawlinks:
+            t = urlparse.urlparse(rawlink)
+            t = t[:-1] + ('',)
+            rawlink = urlparse.urlunparse(t)
+            link = urlparse.urljoin(base, rawlink)
+            infos.append((link, rawlink))
+        return infos
 
 
 class MyStringIO(StringIO.StringIO):
 
     def __init__(self, url, info):
-       self.__url = url
-       self.__info = info
-       StringIO.StringIO.__init__(self)
+        self.__url = url
+        self.__info = info
+        StringIO.StringIO.__init__(self)
 
     def info(self):
-       return self.__info
+        return self.__info
 
     def geturl(self):
-       return self.__url
+        return self.__url
 
 
 class MyURLopener(urllib.FancyURLopener):
@@ -590,81 +590,81 @@ class MyURLopener(urllib.FancyURLopener):
     http_error_default = urllib.URLopener.http_error_default
 
     def __init__(*args):
-       self = args[0]
-       apply(urllib.FancyURLopener.__init__, args)
-       self.addheaders = [
-           ('User-agent', 'Python-webchecker/%s' % __version__),
-           ]
+        self = args[0]
+        apply(urllib.FancyURLopener.__init__, args)
+        self.addheaders = [
+            ('User-agent', 'Python-webchecker/%s' % __version__),
+            ]
 
     def http_error_401(self, url, fp, errcode, errmsg, headers):
         return None
 
     def open_file(self, url):
-       path = urllib.url2pathname(urllib.unquote(url))
-       if path[-1] != os.sep:
-           url = url + '/'
-       if os.path.isdir(path):
-           indexpath = os.path.join(path, "index.html")
-           if os.path.exists(indexpath):
-               return self.open_file(url + "index.html")
-           try:
-               names = os.listdir(path)
-           except os.error, msg:
-               raise IOError, msg, sys.exc_traceback
-           names.sort()
-           s = MyStringIO("file:"+url, {'content-type': 'text/html'})
-           s.write('<BASE HREF="file:%s">\n' %
-                   urllib.quote(os.path.join(path, "")))
-           for name in names:
-               q = urllib.quote(name)
-               s.write('<A HREF="%s">%s</A>\n' % (q, q))
-           s.seek(0)
-           return s
-       return urllib.FancyURLopener.open_file(self, path)
+        path = urllib.url2pathname(urllib.unquote(url))
+        if path[-1] != os.sep:
+            url = url + '/'
+        if os.path.isdir(path):
+            indexpath = os.path.join(path, "index.html")
+            if os.path.exists(indexpath):
+                return self.open_file(url + "index.html")
+            try:
+                names = os.listdir(path)
+            except os.error, msg:
+                raise IOError, msg, sys.exc_traceback
+            names.sort()
+            s = MyStringIO("file:"+url, {'content-type': 'text/html'})
+            s.write('<BASE HREF="file:%s">\n' %
+                    urllib.quote(os.path.join(path, "")))
+            for name in names:
+                q = urllib.quote(name)
+                s.write('<A HREF="%s">%s</A>\n' % (q, q))
+            s.seek(0)
+            return s
+        return urllib.FancyURLopener.open_file(self, path)
 
 
 class MyHTMLParser(sgmllib.SGMLParser):
 
     def __init__(self, verbose=VERBOSE):
-       self.base = None
-       self.links = {}
-       self.myverbose = verbose
-       sgmllib.SGMLParser.__init__(self)
+        self.base = None
+        self.links = {}
+        self.myverbose = verbose
+        sgmllib.SGMLParser.__init__(self)
 
     def start_a(self, attributes):
-       self.link_attr(attributes, 'href')
+        self.link_attr(attributes, 'href')
 
     def end_a(self): pass
 
     def do_area(self, attributes):
-       self.link_attr(attributes, 'href')
+        self.link_attr(attributes, 'href')
 
     def do_img(self, attributes):
-       self.link_attr(attributes, 'src', 'lowsrc')
+        self.link_attr(attributes, 'src', 'lowsrc')
 
     def do_frame(self, attributes):
-       self.link_attr(attributes, 'src')
+        self.link_attr(attributes, 'src')
 
     def link_attr(self, attributes, *args):
-       for name, value in attributes:
-           if name in args:
-               if value: value = string.strip(value)
-               if value: self.links[value] = None
+        for name, value in attributes:
+            if name in args:
+                if value: value = string.strip(value)
+                if value: self.links[value] = None
 
     def do_base(self, attributes):
-       for name, value in attributes:
-           if name == 'href':
-               if value: value = string.strip(value)
-               if value:
-                   if self.myverbose > 1:
-                       print "  Base", value
-                   self.base = value
+        for name, value in attributes:
+            if name == 'href':
+                if value: value = string.strip(value)
+                if value:
+                    if self.myverbose > 1:
+                        print "  Base", value
+                    self.base = value
 
     def getlinks(self):
-       return self.links.keys()
+        return self.links.keys()
 
     def getbase(self):
-       return self.base
+        return self.base
 
 
 if __name__ == '__main__':
index 6169446580c32208174b63c796ebebd1bdffc9b8..852df07dc41b76e51652c6d73b77b101a126dd94 100755 (executable)
@@ -16,29 +16,29 @@ import webchecker
 if __version__[0] == '$':
     _v = string.split(__version__)
     if len(_v) == 3:
-       __version__ = _v[1]
+        __version__ = _v[1]
 
 def main():
     verbose = webchecker.VERBOSE
     try:
-       opts, args = getopt.getopt(sys.argv[1:], "qv")
+        opts, args = getopt.getopt(sys.argv[1:], "qv")
     except getopt.error, msg:
-       print msg
-       print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..."
-       return 2
+        print msg
+        print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..."
+        return 2
     for o, a in opts:
-       if o == "-q":
-           verbose = 0
-       if o == "-v":
-           verbose = verbose + 1
+        if o == "-q":
+            verbose = 0
+        if o == "-v":
+            verbose = verbose + 1
     c = Sucker()
     c.setflags(verbose=verbose)
     c.urlopener.addheaders = [
-           ('User-agent', 'websucker/%s' % __version__),
-       ]
+            ('User-agent', 'websucker/%s' % __version__),
+        ]
     for arg in args:
-       print "Adding root", arg
-       c.addroot(arg)
+        print "Adding root", arg
+        c.addroot(arg)
     print "Run..."
     c.run()
 
@@ -47,57 +47,57 @@ class Sucker(webchecker.Checker):
     checkext = 0
 
     def readhtml(self, url):
-       text = None
-       path = self.savefilename(url)
-       try:
-           f = open(path, "rb")
-       except IOError:
-           f = self.openpage(url)
-           if f:
-               info = f.info()
-               nurl = f.geturl()
-               if nurl != url:
-                   url = nurl
-                   path = self.savefilename(url)
-               text = f.read()
-               f.close()
-               self.savefile(text, path)
-               if not self.checkforhtml(info, url):
-                   text = None
-       else:
-           if self.checkforhtml({}, url):
-               text = f.read()
-           f.close()
-       return text, url
+        text = None
+        path = self.savefilename(url)
+        try:
+            f = open(path, "rb")
+        except IOError:
+            f = self.openpage(url)
+            if f:
+                info = f.info()
+                nurl = f.geturl()
+                if nurl != url:
+                    url = nurl
+                    path = self.savefilename(url)
+                text = f.read()
+                f.close()
+                self.savefile(text, path)
+                if not self.checkforhtml(info, url):
+                    text = None
+        else:
+            if self.checkforhtml({}, url):
+                text = f.read()
+            f.close()
+        return text, url
 
     def savefile(self, text, path):
-       dir, base = os.path.split(path)
-       makedirs(dir)
-       f = open(path, "wb")
-       f.write(text)
-       f.close()
-       print "saved", path
+        dir, base = os.path.split(path)
+        makedirs(dir)
+        f = open(path, "wb")
+        f.write(text)
+        f.close()
+        print "saved", path
 
     def savefilename(self, url):
-       type, rest = urllib.splittype(url)
-       host, path = urllib.splithost(rest)
-       while path[:1] == "/": path = path[1:]
-       user, host = urllib.splituser(host)
-       host, port = urllib.splitnport(host)
-       host = string.lower(host)
-       path = os.path.join(host, path)
-       if path[-1] == "/": path = path + "index.html"
-       if os.sep != "/":
-           path = string.join(string.split(path, "/"), os.sep)
-       return path
+        type, rest = urllib.splittype(url)
+        host, path = urllib.splithost(rest)
+        while path[:1] == "/": path = path[1:]
+        user, host = urllib.splituser(host)
+        host, port = urllib.splitnport(host)
+        host = string.lower(host)
+        path = os.path.join(host, path)
+        if path[-1] == "/": path = path + "index.html"
+        if os.sep != "/":
+            path = string.join(string.split(path, "/"), os.sep)
+        return path
 
 def makedirs(dir):
     if not dir or os.path.exists(dir):
-       return
+        return
     head, tail = os.path.split(dir)
     if not tail:
-       print "Huh?  Don't know how to make dir", dir
-       return
+        print "Huh?  Don't know how to make dir", dir
+        return
     makedirs(head)
     os.mkdir(dir, 0777)