]> granicus.if.org Git - python/commitdiff
SF bug report #405939: wrong Host header with proxy
authorJeremy Hylton <jeremy@alum.mit.edu>
Fri, 8 Mar 2002 19:35:51 +0000 (19:35 +0000)
committerJeremy Hylton <jeremy@alum.mit.edu>
Fri, 8 Mar 2002 19:35:51 +0000 (19:35 +0000)
In August, Greg said this looked good, so I'm going ahead with it.

The fix is different from the one in the bug report.  Instead of using
a regular expression to extract the host from the url, I use
urlparse.urlsplit.

Martin commented that the patch doesn't address URLs that have basic
authentication username and password in the header.  I don't see any
code anywhere in httplib that supports this feature, so I'm not going
to address it for this fix.

Bug fix candidate.

Lib/httplib.py

index 04cfca6dc15602e6bbb50ab91385319db9ea1191..f299873583e06d309666b3eac746390b6e798bde 100644 (file)
@@ -69,6 +69,7 @@ Req-sent-unread-response       _CS_REQ_SENT       <response_class>
 import errno
 import mimetools
 import socket
+from urlparse import urlsplit
 
 try:
     from cStringIO import StringIO
@@ -467,9 +468,15 @@ class HTTPConnection:
             # themselves. we should NOT issue it twice; some web servers (such
             # as Apache) barf when they see two Host: headers
 
-            # if we need a non-standard port,include it in the header
-            if self.port == HTTP_PORT:
-                self.putheader('Host', self.host)
+            # If we need a non-standard port,include it in the header.
+            # If the request is going through a proxy, but the host of
+            # the actual URL, not the host of the proxy.
+
+            if url.startswith('http:'):
+                nil, netloc, nil, nil, nil = urlsplit(url)
+                self.putheader('Host', netloc)
+            elif self.port == HTTP_PORT:
+                self.putheader('Host', netloc)
             else:
                 self.putheader('Host', "%s:%s" % (self.host, self.port))
 
@@ -856,6 +863,17 @@ def test():
     print
     print h.getfile().read()
 
+    # minimal test that code to extract host from url works
+    class HTTP11(HTTP):
+        _http_vsn = 11
+        _http_vsn_str = 'HTTP/1.1'
+
+    h = HTTP11('www.python.org')
+    h.putrequest('GET', 'http://www.python.org/~jeremy/')
+    h.endheaders()
+    h.getreply()
+    h.close()
+
     if hasattr(socket, 'ssl'):
         host = 'sourceforge.net'
         selector = '/projects/python'