From: Raymond Hettinger Date: Sat, 13 Mar 2004 20:27:23 +0000 (+0000) Subject: SF patch #911431: robot.txt must be robots.txt X-Git-Tag: v2.4a1~709 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d95f1ad570791a5dffb16addd74b35b46f77fdc;p=python SF patch #911431: robot.txt must be robots.txt (Contributed by George Yoshida.) --- diff --git a/Lib/robotparser.py b/Lib/robotparser.py index e2af545848..6b23188f19 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -83,7 +83,7 @@ class RobotFileParser: self.entries.append(entry) def parse(self, lines): - """parse the input lines from a robot.txt file. + """parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines.""" state = 0 @@ -148,7 +148,7 @@ class RobotFileParser: def can_fetch(self, useragent, url): """using the parsed robots.txt decide if useragent can fetch url""" - _debug("Checking robot.txt allowance for:\n user agent: %s\n url: %s" % + _debug("Checking robots.txt allowance for:\n user agent: %s\n url: %s" % (useragent, url)) if self.disallow_all: return False diff --git a/Misc/cheatsheet b/Misc/cheatsheet index 0c16ddb334..487949a55e 100644 --- a/Misc/cheatsheet +++ b/Misc/cheatsheet @@ -1962,7 +1962,7 @@ repr Redo repr() but with limits on most sizes. rexec Restricted execution facilities ("safe" exec, eval, etc). rfc822 RFC-822 message manipulation class. rlcompleter Word completion for GNU readline 2.0. -robotparser Parse robot.txt files, useful for web spiders. +robotparser Parse robots.txt files, useful for web spiders. sched A generally useful event scheduler class. sets Module for a set datatype. sgmllib A parser for SGML.