From: Berker Peksag Date: Thu, 23 Nov 2017 23:40:26 +0000 (+0300) Subject: bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529) X-Git-Tag: v3.7.0a3~109 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3df02dbc8e197053105f9dffeae40b04ec66766e;p=python bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529) --- diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst index 7d31932f96..e3b90e673c 100644 --- a/Doc/library/urllib.robotparser.rst +++ b/Doc/library/urllib.robotparser.rst @@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html. .. method:: request_rate(useragent) Returns the contents of the ``Request-rate`` parameter from - ``robots.txt`` in the form of a :func:`~collections.namedtuple` - ``(requests, seconds)``. If there is no such parameter or it doesn't - apply to the *useragent* specified or the ``robots.txt`` entry for this - parameter has invalid syntax, return ``None``. + ``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``. + If there is no such parameter or it doesn't apply to the *useragent* + specified or the ``robots.txt`` entry for this parameter has invalid + syntax, return ``None``. .. versionadded:: 3.6 diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 5c1a571f1b..75198b70ad 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -3,7 +3,6 @@ import os import threading import unittest import urllib.robotparser -from collections import namedtuple from test import support from http.server import BaseHTTPRequestHandler, HTTPServer @@ -87,6 +86,10 @@ class BaseRequestRateTest(BaseRobotTest): self.parser.crawl_delay(agent), self.crawl_delay ) if self.request_rate: + self.assertIsInstance( + self.parser.request_rate(agent), + urllib.robotparser.RequestRate + ) self.assertEqual( self.parser.request_rate(agent).requests, self.request_rate.requests @@ -108,7 +111,7 @@ Disallow: /a%2fb.html Disallow: /%7ejoe/index.html """ agent = 'figtree' - request_rate = namedtuple('req_rate', 'requests seconds')(9, 30) + request_rate = urllib.robotparser.RequestRate(9, 30) crawl_delay = 3 good = [('figtree', '/foo.html')] bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', @@ -237,7 +240,7 @@ Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ """ - request_rate = namedtuple('req_rate', 'requests seconds')(3, 15) + request_rate = urllib.robotparser.RequestRate(3, 15) crawl_delay = 1 good = ['/', '/test.html'] bad = ['/cyberworld/map/index.html'] diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 9dab4c1c3a..daac29c68d 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -16,6 +16,9 @@ import urllib.request __all__ = ["RobotFileParser"] +RequestRate = collections.namedtuple("RequestRate", "requests seconds") + + class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. @@ -136,11 +139,7 @@ class RobotFileParser: # check if all values are sane if (len(numbers) == 2 and numbers[0].strip().isdigit() and numbers[1].strip().isdigit()): - req_rate = collections.namedtuple('req_rate', - 'requests seconds') - entry.req_rate = req_rate - entry.req_rate.requests = int(numbers[0]) - entry.req_rate.seconds = int(numbers[1]) + entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1])) state = 2 if state == 2: self._add_entry(entry) diff --git a/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst new file mode 100644 index 0000000000..89a193c9ef --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst @@ -0,0 +1,5 @@ +Fix wrong usage of :func:`collections.namedtuple` in +the :meth:`RobotFileParser.parse() ` +method. + +Initial patch by Robin Wellner.