]> granicus.if.org Git - python/commitdiff
Issue #28151: Use pythontest.net in test_robotparser
authorBerker Peksag <berker.peksag@gmail.com>
Sun, 18 Sep 2016 08:21:57 +0000 (11:21 +0300)
committerBerker Peksag <berker.peksag@gmail.com>
Sun, 18 Sep 2016 08:21:57 +0000 (11:21 +0300)
Lib/test/test_robotparser.py

index d4bf45376acca31093a094423c48ef8731d800e0..51b48ce53c91481e7ef5d5922f3f218b2b9eaaf4 100644 (file)
@@ -1,4 +1,5 @@
 import io
+import os
 import unittest
 import urllib.robotparser
 from collections import namedtuple
@@ -272,14 +273,42 @@ class PasswordProtectedSiteTestCase(unittest.TestCase):
 
 class NetworkTestCase(unittest.TestCase):
 
-    def testPythonOrg(self):
+    base_url = 'http://www.pythontest.net/'
+    robots_txt = '{}elsewhere/robots.txt'.format(base_url)
+
+    @classmethod
+    def setUpClass(cls):
         support.requires('network')
-        with support.transient_internet('www.python.org'):
-            parser = urllib.robotparser.RobotFileParser(
-                "http://www.python.org/robots.txt")
-            parser.read()
-            self.assertTrue(
-                parser.can_fetch("*", "http://www.python.org/robots.txt"))
+        with support.transient_internet(cls.base_url):
+            cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt)
+            cls.parser.read()
+
+    def url(self, path):
+        return '{}{}{}'.format(
+            self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
+        )
+
+    def test_basic(self):
+        self.assertFalse(self.parser.disallow_all)
+        self.assertFalse(self.parser.allow_all)
+        self.assertGreater(self.parser.mtime(), 0)
+        self.assertFalse(self.parser.crawl_delay('*'))
+        self.assertFalse(self.parser.request_rate('*'))
+
+    def test_can_fetch(self):
+        self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
+        self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
+        self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
+        self.assertTrue(self.parser.can_fetch('*', self.base_url))
+
+    def test_read_404(self):
+        parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
+        parser.read()
+        self.assertTrue(parser.allow_all)
+        self.assertFalse(parser.disallow_all)
+        self.assertEqual(parser.mtime(), 0)
 
 if __name__=='__main__':
     unittest.main()