Fix urllib2.urlopen() handling of chunked content encoding.

author Jeremy Hylton <jeremy@alum.mit.edu>

Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)

committer Jeremy Hylton <jeremy@alum.mit.edu>

Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)
author Jeremy Hylton <jeremy@alum.mit.edu>
Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)
committer Jeremy Hylton <jeremy@alum.mit.edu>
Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py

index 6e9901f5699f8991f43fc98a87bd001837cd0c44..c68d244a6c8693baf5e77cd3ad11c342e5c1e067 100644 (file)
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -423,6 +423,8 @@ class HandlerTests(unittest.TestCase):
                  self.msg = msg
                  self.status = status
                  self.reason = reason
+            def read(self):
+                return ''
          class MockHTTPClass:
              def __init__(self):
                  self.req_headers = []
diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py

new file mode 100644 (file)

index 0000000..3c23246
--- /dev/null
+++ b/Lib/test/test_urllib2net.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+import unittest
+from test import test_support
+
+import socket
+import urllib2
+import sys
+import os
+import mimetools
+
+class URLTimeoutTest(unittest.TestCase):
+
+    TIMEOUT = 10.0
+
+    def setUp(self):
+        socket.setdefaulttimeout(self.TIMEOUT)
+
+    def tearDown(self):
+        socket.setdefaulttimeout(None)
+
+    def testURLread(self):
+        f = urllib2.urlopen("http://www.python.org/")
+        x = f.read()
+
+class urlopenNetworkTests(unittest.TestCase):
+    """Tests urllib2.urlopen using the network.
+
+    These tests are not exhaustive.  Assuming that testing using files does a
+    good job overall of some of the basic interface features.  There are no
+    tests exercising the optional 'data' and 'proxies' arguments.  No tests
+    for transparent redirection have been written.
+
+    setUp is not used for always constructing a connection to
+    http://www.python.org/ since there a few tests that don't use that address
+    and making a connection is expensive enough to warrant minimizing unneeded
+    connections.
+
+    """
+
+    def test_basic(self):
+        # Simple test expected to pass.
+        open_url = urllib2.urlopen("http://www.python.org/")
+        for attr in ("read", "close", "info", "geturl"):
+            self.assert_(hasattr(open_url, attr), "object returned from "
+                            "urlopen lacks the %s attribute" % attr)
+        try:
+            self.assert_(open_url.read(), "calling 'read' failed")
+        finally:
+            open_url.close()
+
+    def test_info(self):
+        # Test 'info'.
+        open_url = urllib2.urlopen("http://www.python.org/")
+        try:
+            info_obj = open_url.info()
+        finally:
+            open_url.close()
+            self.assert_(isinstance(info_obj, mimetools.Message),
+                         "object returned by 'info' is not an instance of "
+                         "mimetools.Message")
+            self.assertEqual(info_obj.getsubtype(), "html")
+
+    def test_geturl(self):
+        # Make sure same URL as opened is returned by geturl.
+        URL = "http://www.python.org/"
+        open_url = urllib2.urlopen(URL)
+        try:
+            gotten_url = open_url.geturl()
+        finally:
+            open_url.close()
+        self.assertEqual(gotten_url, URL)
+
+    def test_bad_address(self):
+        # Make sure proper exception is raised when connecting to a bogus
+        # address.
+        self.assertRaises(IOError,
+                          # SF patch 809915:  In Sep 2003, VeriSign started
+                          # highjacking invalid .com and .net addresses to
+                          # boost traffic to their own site.  This test
+                          # started failing then.  One hopes the .invalid
+                          # domain will be spared to serve its defined
+                          # purpose.
+                          # urllib2.urlopen, "http://www.sadflkjsasadf.com/")
+                          urllib2.urlopen, "http://www.python.invalid/")
+
+def test_main():
+    test_support.requires("network")
+    test_support.run_unittest(URLTimeoutTest, urlopenNetworkTests)
+
+if __name__ == "__main__":
+    test_main()
diff --git a/Lib/urllib2.py b/Lib/urllib2.py

index c525f8ca2324b7ac5add8ecccacdc05fa9315b39..9ec8b9b4967e5e5793f1c72e8d7f1bb9f07d369d 100644 (file)
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -997,8 +997,20 @@ class AbstractHTTPHandler(BaseHandler):
              raise URLError(err)
  
          # Pick apart the HTTPResponse object to get the addinfourl
-        # object initialized properly
-        resp = addinfourl(r.fp, r.msg, req.get_full_url())
+        # object initialized properly.
+
+        # Wrap the HTTPResponse object in socket's file object adapter
+        # for Windows.  That adapter calls recv(), so delegate recv()
+        # to read().  This weird wrapping allows the returned object to
+        # have readline() and readlines() methods.
+        
+        # XXX It might be better to extract the read buffering code
+        # out of socket._fileobject() and into a base class.
+        
+        r.recv = r.read
+        fp = socket._fileobject(r)
+        
+        resp = addinfourl(fp, r.msg, req.get_full_url())
          resp.code = r.status
          resp.msg = r.reason
          return resp
author	Jeremy Hylton <jeremy@alum.mit.edu>
	Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)
committer	Jeremy Hylton <jeremy@alum.mit.edu>
	Sat, 7 Aug 2004 17:40:50 +0000 (17:40 +0000)
Lib/test/test_urllib2.py		patch \| blob \| history
Lib/test/test_urllib2net.py	[new file with mode: 0644]	patch \| blob
Lib/urllib2.py		patch \| blob \| history