]> granicus.if.org Git - python/commitdiff
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Thu, 23 Nov 2017 23:57:58 +0000 (15:57 -0800)
committerRaymond Hettinger <rhettinger@users.noreply.github.com>
Thu, 23 Nov 2017 23:57:58 +0000 (15:57 -0800)
(cherry picked from commit 3df02dbc8e197053105f9dffeae40b04ec66766e)

Doc/library/urllib.robotparser.rst
Lib/test/test_robotparser.py
Lib/urllib/robotparser.py
Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst [new file with mode: 0644]

index 7d31932f9656e49c3863bb8e63cec7673756d946..e3b90e673caaf0c67ac5ec93f53622268bb04fe3 100644 (file)
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
    .. method:: request_rate(useragent)
 
       Returns the contents of the ``Request-rate`` parameter from
-      ``robots.txt`` in the form of a :func:`~collections.namedtuple`
-      ``(requests, seconds)``.  If there is no such parameter or it doesn't
-      apply to the *useragent* specified or the ``robots.txt`` entry for this
-      parameter has invalid syntax, return ``None``.
+      ``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
+      If there is no such parameter or it doesn't apply to the *useragent*
+      specified or the ``robots.txt`` entry for this parameter has invalid
+      syntax, return ``None``.
 
       .. versionadded:: 3.6
 
index 0f64ba8b060defbbc129423f063f18d4aa2ac498..e47344c11953144b39a8925609e7526565b10fbf 100644 (file)
@@ -2,7 +2,6 @@ import io
 import os
 import unittest
 import urllib.robotparser
-from collections import namedtuple
 from test import support
 from http.server import BaseHTTPRequestHandler, HTTPServer
 try:
@@ -90,6 +89,10 @@ class BaseRequestRateTest(BaseRobotTest):
                         self.parser.crawl_delay(agent), self.crawl_delay
                     )
                 if self.request_rate:
+                    self.assertIsInstance(
+                        self.parser.request_rate(agent),
+                        urllib.robotparser.RequestRate
+                    )
                     self.assertEqual(
                         self.parser.request_rate(agent).requests,
                         self.request_rate.requests
@@ -111,7 +114,7 @@ Disallow: /a%2fb.html
 Disallow: /%7ejoe/index.html
     """
     agent = 'figtree'
-    request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
+    request_rate = urllib.robotparser.RequestRate(9, 30)
     crawl_delay = 3
     good = [('figtree', '/foo.html')]
     bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -240,7 +243,7 @@ Crawl-delay: 1
 Request-rate: 3/15
 Disallow: /cyberworld/map/
     """
-    request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
+    request_rate = urllib.robotparser.RequestRate(3, 15)
     crawl_delay = 1
     good = ['/', '/test.html']
     bad = ['/cyberworld/map/index.html']
index 9dab4c1c3a8880c5d306030610d9accad528bf4e..daac29c68dc36d2c771ae2a8f13b07aa36a4b2c9 100644 (file)
@@ -16,6 +16,9 @@ import urllib.request
 
 __all__ = ["RobotFileParser"]
 
+RequestRate = collections.namedtuple("RequestRate", "requests seconds")
+
+
 class RobotFileParser:
     """ This class provides a set of methods to read, parse and answer
     questions about a single robots.txt file.
@@ -136,11 +139,7 @@ class RobotFileParser:
                         # check if all values are sane
                         if (len(numbers) == 2 and numbers[0].strip().isdigit()
                             and numbers[1].strip().isdigit()):
-                            req_rate = collections.namedtuple('req_rate',
-                                                              'requests seconds')
-                            entry.req_rate = req_rate
-                            entry.req_rate.requests = int(numbers[0])
-                            entry.req_rate.seconds = int(numbers[1])
+                            entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
                         state = 2
         if state == 2:
             self._add_entry(entry)
diff --git a/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
new file mode 100644 (file)
index 0000000..89a193c
--- /dev/null
@@ -0,0 +1,5 @@
+Fix wrong usage of :func:`collections.namedtuple` in
+the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
+method.
+
+Initial patch by Robin Wellner.