MAXAMOUNT = 1048576
class HTTPMessage(email.message.Message):
+ # XXX The only usage of this method is in
+ # http.server.CGIHTTPRequestHandler. Maybe move the code there so
+ # that it doesn't need to be part of the public API. The API has
+ # never been defined so this could cause backwards compatibility
+ # issues.
+
def getallmatchingheaders(self, name):
"""Find all header lines matching a given header name.
# text following RFC 2047. The basic status line parsing only
# accepts iso-8859-1.
- def __init__(self, sock, debuglevel=0, strict=0, method=None):
- # If the response includes a content-length header, we
- # need to make sure that the client doesn't read more than the
+ def __init__(self, sock, debuglevel=0, strict=0, method=None, url=None):
+ # If the response includes a content-length header, we need to
+ # make sure that the client doesn't read more than the
# specified number of bytes. If it does, it will block until
- # the server times out and closes the connection. (The only
- # applies to HTTP/1.1 connections.) This will happen if a self.fp.read()
- # is done (without a size) whether self.fp is buffered or not.
- # So, no self.fp.read() by clients unless they know what they are doing.
+ # the server times out and closes the connection. This will
+ # happen if a self.fp.read() is done (without a size) whether
+ # self.fp is buffered or not. So, no self.fp.read() by
+ # clients unless they know what they are doing.
self.fp = sock.makefile("rb")
self.debuglevel = debuglevel
self.strict = strict
self._method = method
- self.msg = None
+ # The HTTPResponse object is returned via urllib. The clients
+ # of http and urllib expect different attributes for the
+ # headers. headers is used here and supports urllib. msg is
+ # provided as a backwards compatibility layer for http
+ # clients.
+
+ self.headers = self.msg = None
# from the Status-Line of the response
self.version = _UNKNOWN # HTTP-Version
return version, status, reason
def begin(self):
- if self.msg is not None:
+ if self.headers is not None:
# we've already started reading the response
return
if self.debuglevel > 0:
print("header:", skip)
- self.status = status
+ self.code = self.status = status
self.reason = reason.strip()
if version == "HTTP/1.0":
self.version = 10
self.length = None
self.chunked = False
self.will_close = True
- self.msg = email.message_from_string('')
+ self.headers = self.msg = email.message_from_string('')
return
- self.msg = parse_headers(self.fp)
+ self.headers = self.msg = parse_headers(self.fp)
if self.debuglevel > 0:
- for hdr in self.msg:
+ for hdr in self.headers:
print("header:", hdr, end=" ")
# are we using the chunked-style of transfer encoding?
- tr_enc = self.msg.get("transfer-encoding")
+ tr_enc = self.headers.get("transfer-encoding")
if tr_enc and tr_enc.lower() == "chunked":
self.chunked = True
self.chunk_left = None
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
self.length = None
- length = self.msg.get("content-length")
+ length = self.headers.get("content-length")
# are we using the chunked-style of transfer encoding?
- tr_enc = self.msg.get("transfer-encoding")
+ tr_enc = self.headers.get("transfer-encoding")
if length and not self.chunked:
try:
self.length = int(length)
self.will_close = True
def _check_close(self):
- conn = self.msg.get("connection")
+ conn = self.headers.get("connection")
if self.version == 11:
# An HTTP/1.1 proxy is assumed to stay open unless
# explicitly closed.
- conn = self.msg.get("connection")
+ conn = self.headers.get("connection")
if conn and "close" in conn.lower():
return True
return False
# connections, using rules different than HTTP/1.1.
# For older HTTP, Keep-Alive indicates persistent connection.
- if self.msg.get("keep-alive"):
+ if self.headers.get("keep-alive"):
return False
# At least Akamai returns a "Connection: Keep-Alive" header,
return False
# Proxy-Connection is a netscape hack.
- pconn = self.msg.get("proxy-connection")
+ pconn = self.headers.get("proxy-connection")
if pconn and "keep-alive" in pconn.lower():
return False
return self.fp.fileno()
def getheader(self, name, default=None):
- if self.msg is None:
+ if self.headers is None:
raise ResponseNotReady()
- return ', '.join(self.msg.get_all(name, default))
+ return ', '.join(self.headers.get_all(name, default))
def getheaders(self):
"""Return list of (header, value) tuples."""
- if self.msg is None:
+ if self.headers is None:
raise ResponseNotReady()
- return list(self.msg.items())
+ return list(self.headers.items())
# We override IOBase.__iter__ so that it doesn't check for closed-ness
def __iter__(self):
return self
+ # For compatibility with old-style urllib responses.
+
+ def info(self):
+ return self.headers
+
+ def geturl(self):
+ return self.url
+
+ def getcode(self):
+ return self.status
class HTTPConnection:
if self.__state == _CS_IDLE:
self.__state = _CS_REQ_STARTED
else:
- raise CannotSendRequest()
+ raise CannotSendRequest(self.__state)
# Save the method we use, we need it later in the response phase
self._method = method
self.endheaders(body)
def getresponse(self):
- """Get the response from the server."""
+ """Get the response from the server.
+
+ If the HTTPConnection is in the correct state, returns an
+ instance of HTTPResponse or of whatever object is returned by
+ class the response_class variable.
+
+ If a request has not been sent or if a previous response has
+ not be handled, ResponseNotReady is raised. If the HTTP
+ response indicates that the connection should be closed, then
+ it will be closed before the response is returned. When the
+ connection is closed, the underlying socket is closed.
+ """
# if a prior response has been completed, then forget about it.
if self.__response and self.__response.isclosed():
self.__response = None
- #
# if a prior response exists, then it must be completed (otherwise, we
# cannot read this response's header to determine the connection-close
# behavior)
# isclosed() status to become true.
#
if self.__state != _CS_REQ_SENT or self.__response:
- raise ResponseNotReady()
+ raise ResponseNotReady(self.__state)
if self.debuglevel > 0:
response = self.response_class(self.sock, self.debuglevel,
-# Issues in merging urllib and urllib2:
-# 1. They both define a function named urlopen()
-
"""An extensible library for opening URLs using a variety of protocols
The simplest way to use this module is to call the urlopen function,
# abstract factory for opener
import base64
+import bisect
import email
import hashlib
import http.client
import socket
import sys
import time
-import bisect
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
comparison.
"""
- url = request.get_full_url()
+ url = request.full_url
host = urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
- self.__original = unwrap(url)
- self.type = None
- # self.__r_type is what's left after doing the splittype
- self.host = None
- self.port = None
+ self.full_url = unwrap(url)
self.data = data
self.headers = {}
for key, value in headers.items():
origin_req_host = request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
+ self._parse()
- def __getattr__(self, attr):
- # XXX this is a fallback mechanism to guard against these
- # methods getting called in a non-standard order. this may be
- # too complicated and/or unnecessary.
- # XXX should the __r_XXX attributes be public?
- if attr[:12] == '_Request__r_':
- name = attr[12:]
- if hasattr(Request, 'get_' + name):
- getattr(self, 'get_' + name)()
- return getattr(self, attr)
- raise AttributeError(attr)
+ def _parse(self):
+ self.type, rest = splittype(self.full_url)
+ if self.type is None:
+ raise ValueError("unknown url type: %s" % self.full_url)
+ self.host, self.selector = splithost(rest)
+ if self.host:
+ self.host = unquote(self.host)
def get_method(self):
- if self.has_data():
+ if self.data is not None:
return "POST"
else:
return "GET"
- # XXX these helper methods are lame
+ # Begin deprecated methods
def add_data(self, data):
self.data = data
return self.data
def get_full_url(self):
- return self.__original
+ return self.full_url
def get_type(self):
- if self.type is None:
- self.type, self.__r_type = splittype(self.__original)
- if self.type is None:
- raise ValueError("unknown url type: %s" % self.__original)
return self.type
def get_host(self):
- if self.host is None:
- self.host, self.__r_host = splithost(self.__r_type)
- if self.host:
- self.host = unquote(self.host)
return self.host
def get_selector(self):
- return self.__r_host
+ return self.selector
- def set_proxy(self, host, type):
- self.host, self.type = host, type
- self.__r_host = self.__original
-
- def has_proxy(self):
- return self.__r_host == self.__original
+ def is_unverifiable(self):
+ return self.unverifiable
def get_origin_req_host(self):
return self.origin_req_host
- def is_unverifiable(self):
- return self.unverifiable
+ # End deprecated methods
+
+ def set_proxy(self, host, type):
+ self.host, self.type = host, type
+ self.selector = self.full_url
+
+ def has_proxy(self):
+ return self.selector == self.full_url
def add_header(self, key, val):
# useful for something like authentication
else:
req = fullurl
if data is not None:
- req.add_data(data)
+ req.data = data
req.timeout = timeout
- protocol = req.get_type()
+ protocol = req.type
# pre-process request
meth_name = protocol+"_request"
if result:
return result
- protocol = req.get_type()
+ protocol = req.type
result = self._call_chain(self.handle_open, protocol, protocol +
'_open', req)
if result:
class HTTPDefaultErrorHandler(BaseHandler):
def http_error_default(self, req, fp, code, msg, hdrs):
- raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
+ raise HTTPError(req.full_url, code, msg, hdrs, fp)
class HTTPRedirectHandler(BaseHandler):
# maximum number of redirections to any single URL
m = req.get_method()
if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST")):
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+ raise HTTPError(req.full_url, code, msg, headers, fp)
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
if k.lower() not in CONTENT_HEADERS)
return Request(newurl,
headers=newheaders,
- origin_req_host=req.get_origin_req_host(),
+ origin_req_host=req.origin_req_host,
unverifiable=True)
# Implementation note: To avoid the server sending us into an
urlparts[2] = "/"
newurl = urlunparse(urlparts)
- newurl = urljoin(req.get_full_url(), newurl)
+ newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current
# request, although that might interact poorly with other
visited = new.redirect_dict = req.redirect_dict
if (visited.get(newurl, 0) >= self.max_repeats or
len(visited) >= self.max_redirections):
- raise HTTPError(req.get_full_url(), code,
+ raise HTTPError(req.full_url, code,
self.inf_msg + msg, headers, fp)
else:
visited = new.redirect_dict = req.redirect_dict = {}
meth(r, proxy, type))
def proxy_open(self, req, proxy, type):
- orig_type = req.get_type()
+ orig_type = req.type
proxy_type, user, password, hostport = _parse_proxy(proxy)
if proxy_type is None:
proxy_type = orig_type
auth_header = 'Authorization'
def http_error_401(self, req, fp, code, msg, headers):
- url = req.get_full_url()
+ url = req.full_url
return self.http_error_auth_reqed('www-authenticate',
url, req, headers)
# authority. Assume there isn't one, since urllib.request does not (and
# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
# userinfo.
- authority = req.get_host()
+ authority = req.host
return self.http_error_auth_reqed('proxy-authenticate',
authority, req, headers)
# prompting for the information. Crap. This isn't great
# but it's better than the current 'repeat until recursion
# depth exceeded' approach <wink>
- raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+ raise HTTPError(req.full_url, 401, "digest auth failed",
headers, None)
else:
self.retried += 1
if H is None:
return None
- user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+ user, pw = self.passwd.find_user_password(realm, req.full_url)
if user is None:
return None
# XXX not implemented yet
- if req.has_data():
- entdig = self.get_entity_digest(req.get_data(), chal)
+ if req.data is not None:
+ entdig = self.get_entity_digest(req.data, chal)
else:
entdig = None
A1 = "%s:%s:%s" % (user, realm, pw)
A2 = "%s:%s" % (req.get_method(),
# XXX selector: what about proxies and full urls
- req.get_selector())
+ req.selector)
if qop == 'auth':
self.nonce_count += 1
ncvalue = '%08x' % self.nonce_count
# XXX should the partial digests be encoded too?
base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
- 'response="%s"' % (user, realm, nonce, req.get_selector(),
+ 'response="%s"' % (user, realm, nonce, req.selector,
respdig)
if opaque:
base += ', opaque="%s"' % opaque
handler_order = 490 # before Basic auth
def http_error_401(self, req, fp, code, msg, headers):
- host = urlparse(req.get_full_url())[1]
+ host = urlparse(req.full_url)[1]
retry = self.http_error_auth_reqed('www-authenticate',
host, req, headers)
self.reset_retry_count()
handler_order = 490 # before Basic auth
def http_error_407(self, req, fp, code, msg, headers):
- host = req.get_host()
+ host = req.host
retry = self.http_error_auth_reqed('proxy-authenticate',
host, req, headers)
self.reset_retry_count()
self._debuglevel = level
def do_request_(self, request):
- host = request.get_host()
+ host = request.host
if not host:
raise URLError('no host given')
- if request.has_data(): # POST
- data = request.get_data()
+ if request.data is not None: # POST
+ data = request.data
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
sel_host = host
if request.has_proxy():
- scheme, sel = splittype(request.get_selector())
+ scheme, sel = splittype(request.selector)
sel_host, sel_path = splithost(sel)
if not request.has_header('Host'):
request.add_unredirected_header('Host', sel_host)
return request
def do_open(self, http_class, req):
- """Return an addinfourl object for the request, using http_class.
+ """Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
- The addinfourl return value is a file-like object. It also
- has methods and attributes including:
- - info(): return a email Message object for the headers
- - geturl(): return the original request URL
- - code: HTTP status code
"""
- host = req.get_host()
+ host = req.host
if not host:
raise URLError('no host given')
# So make sure the connection gets closed after the (only)
# request.
headers["Connection"] = "close"
- headers = dict(
- (name.title(), val) for name, val in headers.items())
+ headers = dict((name.title(), val) for name, val in headers.items())
try:
- h.request(req.get_method(), req.get_selector(), req.data, headers)
- r = h.getresponse()
- except socket.error as err: # XXX what error?
+ h.request(req.get_method(), req.selector, req.data, headers)
+ r = h.getresponse() # an HTTPResponse instance
+ except socket.error as err:
raise URLError(err)
-## resp = addinfourl(r.fp, r.msg, req.get_full_url())
- resp = addinfourl(r, r.msg, req.get_full_url())
- resp.code = r.status
- resp.msg = r.reason
- return resp
+ r.url = req.full_url
+ # This line replaces the .msg attribute of the HTTPResponse
+ # with .headers, because urllib clients expect the response to
+ # have the reason in .msg. It would be good to mark this
+ # attribute is deprecated and get then to use info() or
+ # .headers.
+ r.msg = r.reason
+ return r
class HTTPHandler(AbstractHTTPHandler):
class UnknownHandler(BaseHandler):
def unknown_open(self, req):
- type = req.get_type()
+ type = req.type
raise URLError('unknown url type: %s' % type)
def parse_keqv_list(l):
class FileHandler(BaseHandler):
# Use local file or FTP depending on form of URL
def file_open(self, req):
- url = req.get_selector()
+ url = req.selector
if url[:2] == '//' and url[2:3] != '/':
req.type = 'ftp'
return self.parent.open(req)
def open_local_file(self, req):
import email.utils
import mimetypes
- host = req.get_host()
- file = req.get_selector()
+ host = req.host
+ file = req.selector
localfile = url2pathname(file)
try:
stats = os.stat(localfile)
def ftp_open(self, req):
import ftplib
import mimetypes
- host = req.get_host()
+ host = req.host
if not host:
raise URLError('ftp error: no host given')
host, port = splitport(host)
host = socket.gethostbyname(host)
except socket.error as msg:
raise URLError(msg)
- path, attrs = splitattr(req.get_selector())
+ path, attrs = splitattr(req.selector)
dirs = path.split('/')
dirs = list(map(unquote, dirs))
dirs, file = dirs[:-1], dirs[-1]
type = value.upper()
fp, retrlen = fw.retrfile(file, type)
headers = ""
- mtype = mimetypes.guess_type(req.get_full_url())[0]
+ mtype = mimetypes.guess_type(req.full_url)[0]
if mtype:
headers += "Content-type: %s\n" % mtype
if retrlen is not None and retrlen >= 0:
headers += "Content-length: %d\n" % retrlen
headers = email.message_from_string(headers)
- return addinfourl(fp, headers, req.get_full_url())
+ return addinfourl(fp, headers, req.full_url)
except ftplib.all_errors as msg:
exc = URLError('ftp error: %s' % msg)
raise exc.with_traceback(sys.exc_info()[2])
else:
auth = None
http_conn = connection_factory(host)
- # XXX We should fix urllib so that it works with HTTP/1.1.
- http_conn._http_vsn = 10
- http_conn._http_vsn_str = "HTTP/1.0"
+## # XXX We should fix urllib so that it works with HTTP/1.1.
+## http_conn._http_vsn = 10
+## http_conn._http_vsn_str = "HTTP/1.0"
headers = {}
if proxy_auth: