]> granicus.if.org Git - python/commitdiff
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529)
authorBerker Peksag <berker.peksag@gmail.com>
Thu, 23 Nov 2017 23:40:26 +0000 (02:40 +0300)
committerRaymond Hettinger <rhettinger@users.noreply.github.com>
Thu, 23 Nov 2017 23:40:26 +0000 (15:40 -0800)
Doc/library/urllib.robotparser.rst
Lib/test/test_robotparser.py
Lib/urllib/robotparser.py
Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst [new file with mode: 0644]

index 7d31932f9656e49c3863bb8e63cec7673756d946..e3b90e673caaf0c67ac5ec93f53622268bb04fe3 100644 (file)
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
    .. method:: request_rate(useragent)
 
       Returns the contents of the ``Request-rate`` parameter from
-      ``robots.txt`` in the form of a :func:`~collections.namedtuple`
-      ``(requests, seconds)``.  If there is no such parameter or it doesn't
-      apply to the *useragent* specified or the ``robots.txt`` entry for this
-      parameter has invalid syntax, return ``None``.
+      ``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
+      If there is no such parameter or it doesn't apply to the *useragent*
+      specified or the ``robots.txt`` entry for this parameter has invalid
+      syntax, return ``None``.
 
       .. versionadded:: 3.6
 
index 5c1a571f1b6d70291b777899a734ae89eb9fb6b5..75198b70ad4ff5fc04eaf2d56860cb3abb6ca258 100644 (file)
@@ -3,7 +3,6 @@ import os
 import threading
 import unittest
 import urllib.robotparser
-from collections import namedtuple
 from test import support
 from http.server import BaseHTTPRequestHandler, HTTPServer
 
@@ -87,6 +86,10 @@ class BaseRequestRateTest(BaseRobotTest):
                         self.parser.crawl_delay(agent), self.crawl_delay
                     )
                 if self.request_rate:
+                    self.assertIsInstance(
+                        self.parser.request_rate(agent),
+                        urllib.robotparser.RequestRate
+                    )
                     self.assertEqual(
                         self.parser.request_rate(agent).requests,
                         self.request_rate.requests
@@ -108,7 +111,7 @@ Disallow: /a%2fb.html
 Disallow: /%7ejoe/index.html
     """
     agent = 'figtree'
-    request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
+    request_rate = urllib.robotparser.RequestRate(9, 30)
     crawl_delay = 3
     good = [('figtree', '/foo.html')]
     bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -237,7 +240,7 @@ Crawl-delay: 1
 Request-rate: 3/15
 Disallow: /cyberworld/map/
     """
-    request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
+    request_rate = urllib.robotparser.RequestRate(3, 15)
     crawl_delay = 1
     good = ['/', '/test.html']
     bad = ['/cyberworld/map/index.html']
index 9dab4c1c3a8880c5d306030610d9accad528bf4e..daac29c68dc36d2c771ae2a8f13b07aa36a4b2c9 100644 (file)
@@ -16,6 +16,9 @@ import urllib.request
 
 __all__ = ["RobotFileParser"]
 
+RequestRate = collections.namedtuple("RequestRate", "requests seconds")
+
+
 class RobotFileParser:
     """ This class provides a set of methods to read, parse and answer
     questions about a single robots.txt file.
@@ -136,11 +139,7 @@ class RobotFileParser:
                         # check if all values are sane
                         if (len(numbers) == 2 and numbers[0].strip().isdigit()
                             and numbers[1].strip().isdigit()):
-                            req_rate = collections.namedtuple('req_rate',
-                                                              'requests seconds')
-                            entry.req_rate = req_rate
-                            entry.req_rate.requests = int(numbers[0])
-                            entry.req_rate.seconds = int(numbers[1])
+                            entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
                         state = 2
         if state == 2:
             self._add_entry(entry)
diff --git a/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
new file mode 100644 (file)
index 0000000..89a193c
--- /dev/null
@@ -0,0 +1,5 @@
+Fix wrong usage of :func:`collections.namedtuple` in
+the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
+method.
+
+Initial patch by Robin Wellner.