]> granicus.if.org Git - python/commitdiff
Change urllib to use HTTPConnection rather than old HTTP class.
authorJeremy Hylton <jeremy@alum.mit.edu>
Sat, 4 Aug 2007 19:22:00 +0000 (19:22 +0000)
committerJeremy Hylton <jeremy@alum.mit.edu>
Sat, 4 Aug 2007 19:22:00 +0000 (19:22 +0000)
The HTTP class is a backwards compatibility layer for the Python 1.5
API.  (The only remaining use in the std library is xmlrpclib.)

The current change makes urllib issue HTTP/1.0 requests with
HTTPConnection, because is accesses HTTPResponse.fp directly instead
of using the read() method.  Using fp directly interacts poorly with
persistent connections.  There are probably better solutions than the
current one, but this is a start.

Lib/httplib.py
Lib/urllib.py

index 4d7e5c5dc3c7f5ac59bbd3a95e1b6058bc0a7ffc..89af296af82d672686e3485518752757a2c6d18d 100644 (file)
@@ -322,6 +322,13 @@ class HTTPResponse:
     # accepts iso-8859-1.
 
     def __init__(self, sock, debuglevel=0, strict=0, method=None):
+        # XXX If the response includes a content-length header, we
+        # need to make sure that the client doesn't read more than the
+        # specified number of bytes.  If it does, it will block until
+        # the server times out and closes the connection.  (The only
+        # applies to HTTP/1.1 connections.)  Since some clients access
+        # self.fp directly rather than calling read(), this is a little
+        # tricky.
         self.fp = sock.makefile("rb", 0)
         self.debuglevel = debuglevel
         self.strict = strict
index 2037a9da3248a63ac4f86c90a591b5f3435aebe5..d486bcc5dd53b1ee93646767eccdb9e55fec04ee 100644 (file)
@@ -81,11 +81,13 @@ def urlopen(url, data=None, proxies=None):
         return opener.open(url)
     else:
         return opener.open(url, data)
+
 def urlretrieve(url, filename=None, reporthook=None, data=None):
     global _urlopener
     if not _urlopener:
         _urlopener = FancyURLopener()
     return _urlopener.retrieve(url, filename, reporthook, data)
+
 def urlcleanup():
     if _urlopener:
         _urlopener.cleanup()
@@ -310,37 +312,44 @@ class URLopener:
             auth = base64.b64encode(user_passwd).strip()
         else:
             auth = None
-        h = httplib.HTTP(host)
+        http_conn = httplib.HTTPConnection(host)
+        # XXX We should fix urllib so that it works with HTTP/1.1.
+        http_conn._http_vsn = 10
+        http_conn._http_vsn_str = "HTTP/1.0"
+
+        headers = {}
+        if proxy_auth:
+            headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
+        if auth:
+            headers["Authorization"] =  "Basic %s" % auth
+        if realhost:
+            headers["Host"] = realhost
+        for header, value in self.addheaders:
+            headers[header] = value
+
         if data is not None:
-            h.putrequest('POST', selector)
-            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
-            h.putheader('Content-Length', '%d' % len(data))
+            headers["Content-Type"] = "application/x-www-form-urlencoded"
+            http_conn.request("POST", selector, data, headers)
         else:
-            h.putrequest('GET', selector)
-        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
-        if auth: h.putheader('Authorization', 'Basic %s' % auth)
-        if realhost: h.putheader('Host', realhost)
-        for args in self.addheaders: h.putheader(*args)
-        h.endheaders()
-        if data is not None:
-            h.send(data)
-        errcode, errmsg, headers = h.getreply()
-        fp = h.getfile()
-        if errcode == -1:
-            if fp: fp.close()
+            http_conn.request("GET", selector, headers=headers)
+
+        try:
+            response = http_conn.getresponse()
+        except httplib.BadStatusLine:
             # something went wrong with the HTTP status line
-            raise IOError, ('http protocol error', 0,
-                            'got a bad status line', None)
-        if errcode == 200:
-            return addinfourl(fp, headers, "http:" + url)
+            raise IOError('http protocol error', 0,
+                          'got a bad status line', None)
+
+        if response.status == 200:
+            return addinfourl(response.fp, response.msg, "http:" + url)
         else:
-            if data is None:
-                return self.http_error(url, fp, errcode, errmsg, headers)
-            else:
-                return self.http_error(url, fp, errcode, errmsg, headers, data)
+            return self.http_error(
+                url, response.fp,
+                response.status, response.reason, response.msg, data)
 
     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
         """Handle http errors.
+
         Derived class can override this, or provide specific handlers
         named http_error_DDD where DDD is the 3-digit error code."""
         # First check if there's a specific handler for this error
@@ -872,6 +881,8 @@ class ftpwrapper:
 class addbase:
     """Base class for addinfo and addclosehook."""
 
+    # XXX Add a method to expose the timeout on the underlying socket?
+
     def __init__(self, fp):
         self.fp = fp
         self.read = self.fp.read