It consists of code from urllib, urllib2, urlparse, and robotparser.
The old modules have all been removed. The new package has five
submodules: urllib.parse, urllib.request, urllib.response,
urllib.error, and urllib.robotparser. The urllib.request.urlopen()
function uses the url opener from urllib2.
Note that the unittests have not been renamed for the
beta, but they will be renamed in the future.
Joint work with Senthil Kumaran.
from io import StringIO
import sys
import os
-import urllib
+import urllib.parse
import email.parser
__all__ = ["MiniFieldStorage", "FieldStorage",
else:
continue
if len(nv[1]) or keep_blank_values:
- name = urllib.unquote(nv[0].replace('+', ' '))
- value = urllib.unquote(nv[1].replace('+', ' '))
+ name = urllib.parse.unquote(nv[0].replace('+', ' '))
+ value = urllib.parse.unquote(nv[1].replace('+', ' '))
r.append((name, value))
return r
__revision__ = "$Id$"
-import os, string, urllib2, getpass, urlparse
+import os, string, getpass
import io
+import urllib.parse, urllib.request
from distutils.core import PyPIRCCommand
from distutils.errors import *
def classifiers(self):
''' Fetch the list of classifiers from the server.
'''
- response = urllib2.urlopen(self.repository+'?:action=list_classifiers')
+ url = self.repository+'?:action=list_classifiers'
+ response = urllib.request.urlopen(url)
print(response.read())
def verify_metadata(self):
password = getpass.getpass('Password: ')
# set up the authentication
- auth = urllib2.HTTPPasswordMgr()
- host = urlparse.urlparse(self.repository)[1]
+ auth = urllib.request.HTTPPasswordMgr()
+ host = urllib.parse.urlparse(self.repository)[1]
auth.add_password(self.realm, host, username, password)
# send the info to the server and report the result
code, result = self.post_to_server(self.build_post_data('submit'),
'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary,
'Content-length': str(len(body))
}
- req = urllib2.Request(self.repository, body, headers)
+ req = urllib.request.Request(self.repository, body, headers)
# handle HTTP and include the Basic Auth handler
- opener = urllib2.build_opener(
- urllib2.HTTPBasicAuthHandler(password_mgr=auth)
+ opener = urllib.request.build_opener(
+ urllib.request.HTTPBasicAuthHandler(password_mgr=auth)
)
data = ''
try:
result = opener.open(req)
- except urllib2.HTTPError as e:
+ except urllib.error.HTTPError as e:
if self.show_response:
data = e.fp.read()
result = e.code, e.msg
- except urllib2.URLError as e:
+ except urllib.error.URLError as e:
result = 500, str(e)
else:
if self.show_response:
import configparser
import http.client
import base64
-import urlparse
+import urllib.parse
class upload(PyPIRCCommand):
self.announce("Submitting %s to %s" % (filename, self.repository), log.INFO)
# build the Request
- # We can't use urllib2 since we need to send the Basic
+ # We can't use urllib since we need to send the Basic
# auth right with the first request
+ # TODO(jhylton): Can we fix urllib?
schema, netloc, url, params, query, fragments = \
- urlparse.urlparse(self.repository)
+ urllib.parse.urlparse(self.repository)
assert not params and not query and not fragments
if schema == 'http':
http = http.client.HTTPConnection(netloc)
import base64
import random
import socket
+import urllib.parse
import warnings
from io import StringIO
charset is given but not language, the string is encoded using the empty
string for language.
"""
- import urllib
- s = urllib.quote(s, safe='')
+ s = urllib.parse.quote(s, safe='')
if charset is None and language is None:
return s
if language is None:
params is a sequence of 2-tuples containing (param name, string value).
"""
- import urllib
# Copy params so we don't mess with the original
params = params[:]
new_params = []
# language specifiers at the beginning of the string.
for num, s, encoded in continuations:
if encoded:
- s = urllib.unquote(s)
+ s = urllib.parse.unquote(s)
extended = True
value.append(s)
value = quote(EMPTYSTRING.join(value))
import socket
import email.parser
import email.message
-from urlparse import urlsplit
+from urllib.parse import urlsplit
import warnings
__all__ = ["HTTPResponse", "HTTPConnection",
__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
-import re, urlparse, copy, time, urllib
+import copy
+import re
+import time
+import urllib.parse, urllib.request
try:
import threading as _threading
except ImportError:
"""
url = request.get_full_url()
- host = urlparse.urlparse(url)[1]
+ host = urllib.parse.urlparse(url)[1]
if host == "":
host = request.get_header("Host", "")
def request_path(request):
"""request-URI, as defined by RFC 2965."""
url = request.get_full_url()
- #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
- #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
- path, parameters, query, frag = urlparse.urlparse(url)[2:]
+ path, parameters, query, frag = urllib.parse.urlparse(url)[2:]
if parameters:
path = "%s;%s" % (path, parameters)
path = escape_path(path)
- req_path = urlparse.urlunparse(("", "", path, "", query, frag))
+ req_path = urllib.parse.urlunparse(("", "", path, "", query, frag))
if not req_path.startswith("/"):
# fix bad RFC 2396 absoluteURI
req_path = "/"+req_path
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
- path = urllib.quote(path, HTTP_PATH_SAFE)
+ path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
return path
"""Collection of HTTP cookies.
You may not need to know about this class: try
- urllib2.build_opener(HTTPCookieProcessor).open(url).
-
+ urllib.request.build_opener(HTTPCookieProcessor).open(url).
"""
non_word_re = re.compile(r"\W")
import time
import socket # For gethostbyaddr()
import shutil
-import urllib
+import urllib.parse
import select
import mimetypes
import posixpath
return None
list.sort(key=lambda a: a.lower())
r = []
- displaypath = cgi.escape(urllib.unquote(self.path))
+ displaypath = cgi.escape(urllib.parse.unquote(self.path))
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a>\n'
- % (urllib.quote(linkname), cgi.escape(displayname)))
+ % (urllib.parse.quote(linkname), cgi.escape(displayname)))
r.append("</ul>\n<hr>\n</body>\n</html>\n")
enc = sys.getfilesystemencoding()
encoded = ''.join(r).encode(enc)
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
- path = posixpath.normpath(urllib.unquote(path))
+ path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
env['SERVER_PROTOCOL'] = self.protocol_version
env['SERVER_PORT'] = str(self.server.server_port)
env['REQUEST_METHOD'] = self.command
- uqrest = urllib.unquote(rest)
+ uqrest = urllib.parse.unquote(rest)
env['PATH_INFO'] = uqrest
env['PATH_TRANSLATED'] = self.translate_path(uqrest)
env['SCRIPT_NAME'] = scriptname
Do not import directly; use urllib instead."""
-import urllib
+import urllib.parse
import os
__all__ = ["url2pathname","pathname2url"]
#
# XXXX The .. handling should be fixed...
#
- tp = urllib.splittype(pathname)[0]
+ tp = urllib.parsesplittype(pathname)[0]
if tp and tp != 'file':
raise RuntimeError('Cannot convert non-local URL to pathname')
# Turn starting /// into /, an empty hostname means current host
i = i + 1
rv = ':' + ':'.join(components)
# and finally unquote slashes and other funny characters
- return urllib.unquote(rv)
+ return urllib.parseunquote(rv)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
return '/'.join(components)
def _pncomp2url(component):
- component = urllib.quote(component[:31], safe='') # We want to quote slashes
- return component
+ # We want to quote slashes
+ return urllib.parsequote(component[:31], safe='')
def test():
for url in ["index.html",
import os
import posixpath
-import urllib
+import urllib.parse
__all__ = [
"guess_type","guess_extension","guess_all_extensions",
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
- scheme, url = urllib.splittype(url)
+ scheme, url = urllib.parse.splittype(url)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
def dash_R_cleanup(fs, ps, pic, abcs):
import gc, copyreg
import _strptime, linecache
- import urlparse, urllib, urllib2, mimetypes, doctest
+ import urllib.parse, urllib.request, mimetypes, doctest
import struct, filecmp, _abcoll
from distutils.dir_util import _path_created
from weakref import WeakSet
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
- urlparse.clear_cache()
- urllib.urlcleanup()
- urllib2.install_opener(None)
+ urllib.parse.clear_cache()
+ urllib.request.urlcleanup()
linecache.clearcache()
mimetypes._default_mime_types()
filecmp._cache.clear()
testcase.fail('Missing SyntaxError: "%s"' % statement)
def open_urlresource(url, *args, **kw):
- import urllib, urlparse
+ import urllib.request, urllib.parse
requires('urlfetch')
- filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
+ filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
for path in [os.path.curdir, os.path.pardir]:
fn = os.path.join(path, filename)
return open(fn, *args, **kw)
print('\tfetching %s ...' % url, file=get_original_stdout())
- fn, _ = urllib.urlretrieve(url, filename)
+ fn, _ = urllib.request.urlretrieve(url, filename)
return open(fn, *args, **kw)
self.check_all("re")
self.check_all("reprlib")
self.check_all("rlcompleter")
- self.check_all("robotparser")
+ self.check_all("urllib.robotparser")
self.check_all("sched")
self.check_all("shelve")
self.check_all("shlex")
self.check_all("traceback")
self.check_all("tty")
self.check_all("unittest")
- self.check_all("urllib")
- self.check_all("urlparse")
self.check_all("uu")
self.check_all("warnings")
self.check_all("wave")
"""Tests for http/cookiejar.py."""
-import re, os, time, urllib2
+import re, os, time, urllib.request
from unittest import TestCase
from test import support
def _interact(cookiejar, url, set_cookie_hdrs, hdr_name):
"""Perform a single request / response cycle, returning Cookie: header."""
- req = urllib2.Request(url)
+ req = urllib.request.Request(url)
cookiejar.add_cookie_header(req)
cookie_hdr = req.get_header("Cookie", "")
headers = []
("http://foo/", "foo.local", True),
("http://foo/", ".local", True),
]:
- request = urllib2.Request(url)
+ request = urllib.request.Request(url)
r = pol.domain_return_ok(domain, request)
if ok: self.assert_(r)
else: self.assert_(not r)
def test_request_path(self):
# with parameters
- req = urllib2.Request("http://www.example.com/rheum/rhaponicum;"
- "foo=bar;sing=song?apples=pears&spam=eggs#ni")
+ req = urllib.request.Request(
+ "http://www.example.com/rheum/rhaponicum;"
+ "foo=bar;sing=song?apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum;"
"foo=bar;sing=song?apples=pears&spam=eggs#ni")
# without parameters
- req = urllib2.Request("http://www.example.com/rheum/rhaponicum?"
- "apples=pears&spam=eggs#ni")
+ req = urllib.request.Request(
+ "http://www.example.com/rheum/rhaponicum?"
+ "apples=pears&spam=eggs#ni")
self.assertEquals(request_path(req), "/rheum/rhaponicum?"
"apples=pears&spam=eggs#ni")
# missing final slash
- req = urllib2.Request("http://www.example.com")
+ req = urllib.request.Request("http://www.example.com")
self.assertEquals(request_path(req), "/")
def test_request_port(self):
- req = urllib2.Request("http://www.acme.com:1234/",
- headers={"Host": "www.acme.com:4321"})
+ req = urllib.request.Request("http://www.acme.com:1234/",
+ headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), "1234")
- req = urllib2.Request("http://www.acme.com/",
- headers={"Host": "www.acme.com:4321"})
+ req = urllib.request.Request("http://www.acme.com/",
+ headers={"Host": "www.acme.com:4321"})
self.assertEquals(request_port(req), DEFAULT_HTTP_PORT)
def test_request_host(self):
# this request is illegal (RFC2616, 14.2.3)
- req = urllib2.Request("http://1.1.1.1/",
- headers={"Host": "www.acme.com:80"})
+ req = urllib.request.Request("http://1.1.1.1/",
+ headers={"Host": "www.acme.com:80"})
# libwww-perl wants this response, but that seems wrong (RFC 2616,
# section 5.2, point 1., and RFC 2965 section 1, paragraph 3)
#self.assertEquals(request_host(req), "www.acme.com")
self.assertEquals(request_host(req), "1.1.1.1")
- req = urllib2.Request("http://www.acme.com/",
- headers={"Host": "irrelevant.com"})
+ req = urllib.request.Request("http://www.acme.com/",
+ headers={"Host": "irrelevant.com"})
self.assertEquals(request_host(req), "www.acme.com")
# not actually sure this one is valid Request object, so maybe should
# remove test for no host in url in request_host function?
- req = urllib2.Request("/resource.html",
- headers={"Host": "www.acme.com"})
+ req = urllib.request.Request("/resource.html",
+ headers={"Host": "www.acme.com"})
self.assertEquals(request_host(req), "www.acme.com")
# port shouldn't be in request-host
- req = urllib2.Request("http://www.acme.com:2345/resource.html",
- headers={"Host": "www.acme.com:5432"})
+ req = urllib.request.Request("http://www.acme.com:2345/resource.html",
+ headers={"Host": "www.acme.com:5432"})
self.assertEquals(request_host(req), "www.acme.com")
def test_is_HDN(self):
blocked_domains=["acme.com"],
allowed_domains=["www.acme.com"]))
- req = urllib2.Request("http://acme.com/")
+ req = urllib.request.Request("http://acme.com/")
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
res = FakeResponse(headers, "http://acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 0)
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
- req = urllib2.Request("http://www.coyote.com/")
+ req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
# set a cookie with non-allowed domain...
- req = urllib2.Request("http://www.coyote.com/")
+ req = urllib.request.Request("http://www.coyote.com/")
res = FakeResponse(headers, "http://www.coyote.com/")
cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0])
c = CookieJar(policy=pol)
headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"]
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 0)
self.assertEquals(len(c), 1)
c.clear()
- req = urllib2.Request("http://www.roadrunner.net/")
+ req = urllib.request.Request("http://www.roadrunner.net/")
res = FakeResponse(headers, "http://www.roadrunner.net/")
c.extract_cookies(res, req)
self.assertEquals(len(c), 1)
- req = urllib2.Request("http://www.roadrunner.net/")
+ req = urllib.request.Request("http://www.roadrunner.net/")
c.add_cookie_header(req)
self.assert_((req.has_header("Cookie") and
req.has_header("Cookie2")))
self.assertEquals(len(c), 1)
# set a cookie with blocked domain...
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
res = FakeResponse(headers, "http://www.acme.com/")
cookies = c.make_cookies(res, req)
c.set_cookie(cookies[0])
url = "http://www.acme.com"
c = CookieJar(DefaultCookiePolicy(rfc2965=True))
interact_2965(c, url, "foo=bar; Version=1")
- req = urllib2.Request(url)
+ req = urllib.request.Request(url)
self.assertEquals(len(c), 1)
c.add_cookie_header(req)
self.assert_(req.has_header("Cookie"))
def cookiejar_from_cookie_headers(headers):
c = CookieJar()
- req = urllib2.Request("http://www.example.com/")
+ req = urllib.request.Request("http://www.example.com/")
r = FakeResponse(headers, "http://www.example.com/")
c.extract_cookies(r, req)
return c
c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
- #req = urllib2.Request("http://1.1.1.1/",
+ #req = urllib.request.Request("http://1.1.1.1/",
# headers={"Host": "www.acme.com:80"})
- req = urllib2.Request("http://www.acme.com:80/",
+ req = urllib.request.Request("http://www.acme.com:80/",
headers={"Host": "www.acme.com:80"})
headers.append(
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.acme.com/foo/bar")
+ req = urllib.request.Request("http://www.acme.com/foo/bar")
c.add_cookie_header(req)
h = req.get_header("Cookie")
res = FakeResponse(headers, "http://www.acme.com")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
h = req.get_header("Cookie")
"CUSTOMER=WILE_E_COYOTE" in h and
"SHIPPING=FEDEX" not in h)
- req = urllib2.Request("http://www.acme.com/foo/")
+ req = urllib.request.Request("http://www.acme.com/foo/")
c.add_cookie_header(req)
h = req.get_header("Cookie")
c = CookieJar()
headers = []
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/")
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.acme.com/")
+ req = urllib.request.Request("http://www.acme.com/")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
res = FakeResponse(headers, "http://www.acme.com/")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.acme.com/ammo")
+ req = urllib.request.Request("http://www.acme.com/ammo")
c.add_cookie_header(req)
self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*"
# Some additional Netscape cookies tests.
c = CookieJar()
headers = []
- req = urllib2.Request("http://foo.bar.acme.com/foo")
+ req = urllib.request.Request("http://foo.bar.acme.com/foo")
# Netscape allows a host part that contains dots
headers.append("Set-Cookie: Customer=WILE_E_COYOTE; domain=.acme.com")
res = FakeResponse(headers, "http://www.acme.com/foo")
c.extract_cookies(res, req)
- req = urllib2.Request("http://foo.bar.acme.com/foo")
+ req = urllib.request.Request("http://foo.bar.acme.com/foo")
c.add_cookie_header(req)
self.assert_(
"PART_NUMBER=3,4" in req.get_header("Cookie") and
c = CookieJar(DefaultCookiePolicy(rfc2965 = True))
headers = []
- req = urllib2.Request("http://www.ants.com/")
+ req = urllib.request.Request("http://www.ants.com/")
headers.append("Set-Cookie: JSESSIONID=ABCDERANDOM123; Path=")
res = FakeResponse(headers, "http://www.ants.com/")
c.extract_cookies(res, req)
- req = urllib2.Request("http://www.ants.com/")
+ req = urllib.request.Request("http://www.ants.com/")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
self.assertEquals(req.get_header("Cookie2"), '$Version="1"')
# missing path in the request URI
- req = urllib2.Request("http://www.ants.com:8080")
+ req = urllib.request.Request("http://www.ants.com:8080")
c.add_cookie_header(req)
self.assertEquals(req.get_header("Cookie"),
# Check session cookies are deleted properly by
# CookieJar.clear_session_cookies method
- req = urllib2.Request('http://www.perlmeister.com/scripts')
+ req = urllib.request.Request('http://www.perlmeister.com/scripts')
headers = []
headers.append("Set-Cookie: s1=session;Path=/scripts")
headers.append("Set-Cookie: p1=perm; Domain=.perlmeister.com;"
import sys
import base64
import shutil
-import urllib
+import urllib.parse
import http.client
import tempfile
import threading
(res.read(), res.getheader('Content-type'), res.status))
def test_post(self):
- params = urllib.urlencode({'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
+ params = urllib.parse.urlencode(
+ {'spam' : 1, 'eggs' : 'python', 'bacon' : 123456})
headers = {'Content-type' : 'application/x-www-form-urlencoded'}
res = self.request('/cgi-bin/file2.py', 'POST', params, headers)
i = ImpWrapper()
sys.meta_path.append(i)
sys.path_hooks.append(ImpWrapper)
- mnames = ("colorsys", "urlparse", "distutils.core")
+ mnames = ("colorsys", "urllib.parse", "distutils.core")
for mname in mnames:
parent = mname.split(".")[0]
- for n in list(sys.modules.keys()):
+ for n in list(sys.modules):
if n.startswith(parent):
del sys.modules[n]
for mname in mnames:
m = __import__(mname, globals(), locals(), ["__dummy__"])
m.__loader__ # to make sure we actually handled the import
- # Delete urllib from modules because urlparse was imported above.
- # Without this hack, test_socket_ssl fails if run in this order:
- # regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
- try:
- del sys.modules['urllib']
- except KeyError:
- pass
+## # Delete urllib from modules because urlparse was imported above.
+## # Without this hack, test_socket_ssl fails if run in this order:
+## # regrtest.py test_codecmaps_tw test_importhooks test_socket_ssl
+## try:
+## del sys.modules['urllib']
+## except KeyError:
+## pass
def test_main():
support.run_unittest(ImportHooksTestCase)
# These were once about the 10 longest modules
cm('random', ignore=('Random',)) # from _random import Random as CoreGenerator
cm('cgi', ignore=('log',)) # set with = in module
- cm('urllib', ignore=('_CFNumberToInt32',
- '_CStringFromCFString',
- '_CFSetup',
- 'getproxies_registry',
- 'proxy_bypass_registry',
- 'proxy_bypass_macosx_sysconf',
- 'open_https',
- '_https_connection',
- 'getproxies_macosx_sysconf',
- 'getproxies_internetconfig',)) # not on all platforms
cm('pickle')
cm('aifc', ignore=('openfp',)) # set with = in module
cm('sre_parse', ignore=('dump',)) # from sre_constants import *
-import unittest, robotparser
import io
+import unittest
+import urllib.robotparser
from test import support
class RobotTestCase(unittest.TestCase):
agent="test_robotparser"):
lines = io.StringIO(robots_txt).readlines()
- parser = robotparser.RobotFileParser()
+ parser = urllib.robotparser.RobotFileParser()
parser.parse(lines)
for url in good_urls:
tests.addTest(RobotTestCase(index, parser, url, 1, agent))
support.requires('network')
# whole site is password-protected.
url = 'http://mueblesmoraleda.com'
- parser = robotparser.RobotFileParser()
+ parser = urllib.robotparser.RobotFileParser()
parser.set_url(url)
parser.read()
self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
import time
import os
import pprint
-import urllib, urlparse
+import urllib.parse, urllib.request
import shutil
import traceback
import asyncore
"""
# abandon query parameters
- path = urlparse.urlparse(path)[2]
- path = os.path.normpath(urllib.unquote(path))
+ path = urllib.parse.urlparse(path)[2]
+ path = os.path.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.root
# now fetch the same data from the HTTPS server
url = 'https://%s:%d/%s' % (
HOST, server.port, os.path.split(CERTFILE)[1])
- f = urllib.urlopen(url)
+ f = urllib.request.urlopen(url)
dlen = f.info().get("content-length")
if dlen and (int(dlen) > 0):
d2 = f.read(int(dlen))
"""Regresssion tests for urllib"""
-import urllib
+import urllib.parse
+import urllib.request
import http.client
import email.message
import io
hex_repr = "0%s" % hex_repr
return "%" + hex_repr
+# Shortcut for testing FancyURLopener
+_urlopener = None
+def urlopen(url, data=None, proxies=None):
+ """urlopen(url [, data]) -> open file-like object"""
+ global _urlopener
+ if proxies is not None:
+ opener = urllib.request.FancyURLopener(proxies=proxies)
+ elif not _urlopener:
+ opener = urllib.request.FancyURLopener()
+ _urlopener = opener
+ else:
+ opener = _urlopener
+ if data is None:
+ return opener.open(url)
+ else:
+ return opener.open(url, data)
+
class urlopen_FileTests(unittest.TestCase):
"""Test urlopen() opening a temporary file.
"""
def setUp(self):
- """Setup of a temp file to use for testing"""
- self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, "ascii")
- FILE = open(support.TESTFN, 'wb')
+ # Create a temp file to use for testing
+ self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
+ "ascii")
+ f = open(support.TESTFN, 'wb')
try:
- FILE.write(self.text)
+ f.write(self.text)
finally:
- FILE.close()
+ f.close()
self.pathname = support.TESTFN
- self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
+ self.returned_obj = urlopen("file:%s" % self.pathname)
def tearDown(self):
"""Shut down the open object"""
def test_read(self):
self.fakehttp(b"Hello!")
try:
- fp = urllib.urlopen("http://python.org/")
+ fp = urlopen("http://python.org/")
self.assertEqual(fp.readline(), b"Hello!")
self.assertEqual(fp.readline(), b"")
self.assertEqual(fp.geturl(), 'http://python.org/')
Content-Type: text/html; charset=iso-8859-1
''')
try:
- self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
+ self.assertRaises(IOError, urlopen, "http://python.org/")
finally:
self.unfakehttp()
# data. (#1680230)
self.fakehttp(b'')
try:
- self.assertRaises(IOError, urllib.urlopen, "http://something")
+ self.assertRaises(IOError, urlopen, "http://something")
finally:
self.unfakehttp()
except: pass
def constructLocalFileUrl(self, filePath):
- return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
+ return "file://%s" % urllib.request.pathname2url(
+ os.path.abspath(filePath))
def createNewTempFile(self, data=b""):
"""Creates a new temporary file containing the specified data,
def test_basic(self):
# Make sure that a local file just gets its own location returned and
# a headers value is returned.
- result = urllib.urlretrieve("file:%s" % support.TESTFN)
+ result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
self.assertEqual(result[0], support.TESTFN)
self.assert_(isinstance(result[1], email.message.Message),
"did not get a email.message.Message instance as second "
# Test that setting the filename argument works.
second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp)
- result = urllib.urlretrieve(self.constructLocalFileUrl(
+ result = urllib.request.urlretrieve(self.constructLocalFileUrl(
support.TESTFN), second_temp)
self.assertEqual(second_temp, result[0])
self.assert_(os.path.exists(second_temp), "copy of the file was not "
count_holder[0] = count_holder[0] + 1
second_temp = "%s.2" % support.TESTFN
self.registerFileForCleanUp(second_temp)
- urllib.urlretrieve(self.constructLocalFileUrl(support.TESTFN),
+ urllib.request.urlretrieve(
+ self.constructLocalFileUrl(support.TESTFN),
second_temp, hooktester)
def test_reporthook_0_bytes(self):
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile()
- urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
+ urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 1)
self.assertEqual(report[0][2], 0)
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 5)
- urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
+ urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 2)
self.assertEqual(report[0][1], 8192)
def hooktester(count, block_size, total_size, _report=report):
_report.append((count, block_size, total_size))
srcFileName = self.createNewTempFile(b"x" * 8193)
- urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
+ urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
support.TESTFN, hooktester)
self.assertEqual(len(report), 3)
self.assertEqual(report[0][1], 8192)
class QuotingTests(unittest.TestCase):
"""Tests for urllib.quote() and urllib.quote_plus()
- According to RFC 2396 ("Uniform Resource Identifiers), to escape a
- character you write it as '%' + <2 character US-ASCII hex value>. The Python
- code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
- Case does not matter on the hex letters.
+ According to RFC 2396 (Uniform Resource Identifiers), to escape a
+ character you write it as '%' + <2 character US-ASCII hex value>.
+ The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
+ character properly. Case does not matter on the hex letters.
The various character sets specified are:
"abcdefghijklmnopqrstuvwxyz",
"0123456789",
"_.-"])
- result = urllib.quote(do_not_quote)
+ result = urllib.parse.quote(do_not_quote)
self.assertEqual(do_not_quote, result,
"using quote(): %s != %s" % (do_not_quote, result))
- result = urllib.quote_plus(do_not_quote)
+ result = urllib.parse.quote_plus(do_not_quote)
self.assertEqual(do_not_quote, result,
"using quote_plus(): %s != %s" % (do_not_quote, result))
def test_default_safe(self):
# Test '/' is default value for 'safe' parameter
- self.assertEqual(urllib.quote.__defaults__[0], '/')
+ self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
def test_safe(self):
# Test setting 'safe' parameter does what it should do
quote_by_default = "<>"
- result = urllib.quote(quote_by_default, safe=quote_by_default)
+ result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result,
"using quote(): %s != %s" % (quote_by_default, result))
- result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
+ result = urllib.parse.quote_plus(quote_by_default, safe=quote_by_default)
self.assertEqual(quote_by_default, result,
"using quote_plus(): %s != %s" %
(quote_by_default, result))
should_quote.append(chr(127)) # For 0x7F
should_quote = ''.join(should_quote)
for char in should_quote:
- result = urllib.quote(char)
+ result = urllib.parse.quote(char)
self.assertEqual(hexescape(char), result,
"using quote(): %s should be escaped to %s, not %s" %
(char, hexescape(char), result))
- result = urllib.quote_plus(char)
+ result = urllib.parse.quote_plus(char)
self.assertEqual(hexescape(char), result,
"using quote_plus(): "
"%s should be escapes to %s, not %s" %
del should_quote
partial_quote = "ab[]cd"
expected = "ab%5B%5Dcd"
- result = urllib.quote(partial_quote)
+ result = urllib.parse.quote(partial_quote)
self.assertEqual(expected, result,
"using quote(): %s != %s" % (expected, result))
self.assertEqual(expected, result,
def test_quoting_space(self):
# Make sure quote() and quote_plus() handle spaces as specified in
# their unique way
- result = urllib.quote(' ')
+ result = urllib.parse.quote(' ')
self.assertEqual(result, hexescape(' '),
"using quote(): %s != %s" % (result, hexescape(' ')))
- result = urllib.quote_plus(' ')
+ result = urllib.parse.quote_plus(' ')
self.assertEqual(result, '+',
"using quote_plus(): %s != +" % result)
given = "a b cd e f"
expect = given.replace(' ', hexescape(' '))
- result = urllib.quote(given)
+ result = urllib.parse.quote(given)
self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result))
expect = given.replace(' ', '+')
- result = urllib.quote_plus(given)
+ result = urllib.parse.quote_plus(given)
self.assertEqual(expect, result,
"using quote_plus(): %s != %s" % (expect, result))
def test_quoting_plus(self):
- self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
+ self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
'alpha%2Bbeta+gamma')
- self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
+ self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
'alpha+beta+gamma')
class UnquotingTests(unittest.TestCase):
for num in range(128):
given = hexescape(chr(num))
expect = chr(num)
- result = urllib.unquote(given)
+ result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result))
- result = urllib.unquote_plus(given)
+ result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" %
(expect, result))
escape_list.append(given)
escape_string = ''.join(escape_list)
del escape_list
- result = urllib.unquote(escape_string)
+ result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1,
"using quote(): not all characters escaped; %s" %
result)
- result = urllib.unquote(escape_string)
+ result = urllib.parse.unquote(escape_string)
self.assertEqual(result.count('%'), 1,
"using unquote(): not all characters escaped: "
"%s" % result)
# interspersed
given = 'ab%sd' % hexescape('c')
expect = "abcd"
- result = urllib.unquote(given)
+ result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using quote(): %s != %s" % (expect, result))
- result = urllib.unquote_plus(given)
+ result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result))
# Test difference between unquote() and unquote_plus()
given = "are+there+spaces..."
expect = given
- result = urllib.unquote(given)
+ result = urllib.parse.unquote(given)
self.assertEqual(expect, result,
"using unquote(): %s != %s" % (expect, result))
expect = given.replace('+', ' ')
- result = urllib.unquote_plus(given)
+ result = urllib.parse.unquote_plus(given)
self.assertEqual(expect, result,
"using unquote_plus(): %s != %s" % (expect, result))
def test_unquote_with_unicode(self):
- r = urllib.unquote('br%C3%BCckner_sapporo_20050930.doc')
+ r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc')
self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
class urlencode_Tests(unittest.TestCase):
"""
expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
- result = urllib.urlencode(given)
+ result = urllib.parse.urlencode(given)
for expected in expect_somewhere:
self.assert_(expected in result,
"testing %s: %s not found in %s" %
# Make sure keys and values are quoted using quote_plus()
given = {"&":"="}
expect = "%s=%s" % (hexescape('&'), hexescape('='))
- result = urllib.urlencode(given)
+ result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
given = {"key name":"A bunch of pluses"}
expect = "key+name=A+bunch+of+pluses"
- result = urllib.urlencode(given)
+ result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
def test_doseq(self):
# Test that passing True for 'doseq' parameter works correctly
given = {'sequence':['1', '2', '3']}
- expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
- result = urllib.urlencode(given)
+ expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
+ result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
- result = urllib.urlencode(given, True)
+ result = urllib.parse.urlencode(given, True)
for value in given["sequence"]:
expect = "sequence=%s" % value
self.assert_(expect in result,
# Make sure simple tests pass
expected_path = os.path.join("parts", "of", "a", "path")
expected_url = "parts/of/a/path"
- result = urllib.pathname2url(expected_path)
+ result = urllib.request.pathname2url(expected_path)
self.assertEqual(expected_url, result,
"pathname2url() failed; %s != %s" %
(result, expected_url))
- result = urllib.url2pathname(expected_url)
+ result = urllib.request.url2pathname(expected_url)
self.assertEqual(expected_path, result,
"url2pathame() failed; %s != %s" %
(result, expected_path))
# Test automatic quoting and unquoting works for pathnam2url() and
# url2pathname() respectively
given = os.path.join("needs", "quot=ing", "here")
- expect = "needs/%s/here" % urllib.quote("quot=ing")
- result = urllib.pathname2url(given)
+ expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
+ result = urllib.request.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
expect = given
- result = urllib.url2pathname(result)
+ result = urllib.request.url2pathname(result)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))
given = os.path.join("make sure", "using_quote")
- expect = "%s/using_quote" % urllib.quote("make sure")
- result = urllib.pathname2url(given)
+ expect = "%s/using_quote" % urllib.parse.quote("make sure")
+ result = urllib.request.pathname2url(given)
self.assertEqual(expect, result,
"pathname2url() failed; %s != %s" %
(expect, result))
given = "make+sure/using_unquote"
expect = os.path.join("make+sure", "using_unquote")
- result = urllib.url2pathname(given)
+ result = urllib.request.url2pathname(given)
self.assertEqual(expect, result,
"url2pathname() failed; %s != %s" %
(expect, result))
import io
import socket
-import urllib2
-from urllib2 import Request, OpenerDirector
+import urllib.request
+from urllib.request import Request, OpenerDirector
# XXX
# Request
def test_trivial(self):
# A couple trivial tests
- self.assertRaises(ValueError, urllib2.urlopen, 'bogus url')
+ self.assertRaises(ValueError, urllib.request.urlopen, 'bogus url')
# XXX Name hacking to get this to work on Windows.
- fname = os.path.abspath(urllib2.__file__).replace('\\', '/')
+ fname = os.path.abspath(urllib.request.__file__).replace('\\', '/')
if fname[1:2] == ":":
fname = fname[2:]
# And more hacking to get it to work on MacOS. This assumes
fname = '/' + fname.replace(':', '/')
file_url = "file://%s" % fname
- f = urllib2.urlopen(file_url)
+ f = urllib.request.urlopen(file_url)
buf = f.read()
f.close()
def test_parse_http_list(self):
- tests = [('a,b,c', ['a', 'b', 'c']),
- ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
- ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
- ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
+ tests = [
+ ('a,b,c', ['a', 'b', 'c']),
+ ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
+ ('a, b, "c", "d", "e,f", g, h',
+ ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
+ ('a="b\\"c", d="e\\,f", g="h\\\\i"',
+ ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
for string, list in tests:
- self.assertEquals(urllib2.parse_http_list(string), list)
+ self.assertEquals(urllib.request.parse_http_list(string), list)
def test_request_headers_dict():
def test_password_manager(self):
"""
- >>> mgr = urllib2.HTTPPasswordMgr()
+ >>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
>>> add("Some Realm", "http://example.com/", "joe", "password")
>>> add("Some Realm", "http://example.com/ni", "ni", "ni")
def test_password_manager_default_port(self):
"""
- >>> mgr = urllib2.HTTPPasswordMgr()
+ >>> mgr = urllib.request.HTTPPasswordMgr()
>>> add = mgr.add_password
The point to note here is that we can't guess the default port if there's
res = MockResponse(200, "OK", {}, "")
return self.parent.error("http", args[0], res, code, "", {})
elif action == "raise":
- raise urllib2.URLError("blah")
+ raise urllib.error.URLError("blah")
assert False
def close(self): pass
def add_parent(self, parent):
opener.add_handler(h)
return opener
-class MockHTTPHandler(urllib2.BaseHandler):
+class MockHTTPHandler(urllib.request.BaseHandler):
# useful for testing redirections and auth
# sends supplied headers and code as first response
# sends 200 OK as second response
# TypeError in real code; here, returning self from these mock
# methods would either cause no exception, or AttributeError.
- from urllib2 import URLError
+ from urllib.error import URLError
o = OpenerDirector()
meth_spec = [
[("redirect_request", "return self")],
]
handlers = add_ordered_mock_handlers(o, meth_spec)
- o.add_handler(urllib2.UnknownHandler())
+ o.add_handler(urllib.request.UnknownHandler())
for scheme in "do", "proxy", "redirect":
self.assertRaises(URLError, o.open, scheme+"://example.com/")
handlers = add_ordered_mock_handlers(o, meth_spec)
req = Request("http://example.com/")
- self.assertRaises(urllib2.URLError, o.open, req)
+ self.assertRaises(urllib.error.URLError, o.open, req)
self.assertEqual(o.calls, [(handlers[0], "http_open", (req,), {})])
## def test_error(self):
def sanepathname2url(path):
- import urllib
- urlpath = urllib.pathname2url(path)
+ urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
# XXX don't ask me about the mac...
self.filename, self.filetype = filename, filetype
return io.StringIO(self.data), len(self.data)
- class NullFTPHandler(urllib2.FTPHandler):
+ class NullFTPHandler(urllib.request.FTPHandler):
def __init__(self, data): self.data = data
def connect_ftp(self, user, passwd, host, port, dirs,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
def test_file(self):
import email.utils, socket
- h = urllib2.FileHandler()
+ h = urllib.request.FileHandler()
o = h.parent = MockOpener()
TESTFN = support.TESTFN
finally:
f.close()
- self.assertRaises(urllib2.URLError,
+ self.assertRaises(urllib.error.URLError,
h.file_open, Request(url))
finally:
os.remove(TESTFN)
- h = urllib2.FileHandler()
+ h = urllib.request.FileHandler()
o = h.parent = MockOpener()
# XXXX why does // mean ftp (and /// mean not ftp!), and where
# is file: scheme specified? I think this is really a bug, and
try:
h.file_open(req)
# XXXX remove OSError when bug fixed
- except (urllib2.URLError, OSError):
+ except (urllib.error.URLError, OSError):
self.assert_(not ftp)
else:
self.assert_(o.req is req)
return ''
class MockHTTPClass:
def __init__(self):
+ self.level = 0
self.req_headers = []
self.data = None
self.raise_on_endheaders = False
def getresponse(self):
return MockHTTPResponse(MockFile(), {}, 200, "OK")
- h = urllib2.AbstractHTTPHandler()
+ h = urllib.request.AbstractHTTPHandler()
o = h.parent = MockOpener()
url = "http://example.com/"
# check socket.error converted to URLError
http.raise_on_endheaders = True
- self.assertRaises(urllib2.URLError, h.do_open, http, req)
+ self.assertRaises(urllib.error.URLError, h.do_open, http, req)
# check adding of standard headers
o.addheaders = [("Spam", "eggs")]
self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
def test_errors(self):
- h = urllib2.HTTPErrorProcessor()
+ h = urllib.request.HTTPErrorProcessor()
o = h.parent = MockOpener()
url = "http://example.com/"
def test_cookies(self):
cj = MockCookieJar()
- h = urllib2.HTTPCookieProcessor(cj)
+ h = urllib.request.HTTPCookieProcessor(cj)
o = h.parent = MockOpener()
req = Request("http://example.com/")
def test_redirect(self):
from_url = "http://example.com/a.html"
to_url = "http://example.com/b.html"
- h = urllib2.HTTPRedirectHandler()
+ h = urllib.request.HTTPRedirectHandler()
o = h.parent = MockOpener()
# ordinary redirect behaviour
try:
method(req, MockFile(), code, "Blah",
MockHeaders({"location": to_url}))
- except urllib2.HTTPError:
+ except urllib.error.HTTPError:
# 307 in response to POST requires user OK
self.assert_(code == 307 and data is not None)
self.assertEqual(o.req.get_full_url(), to_url)
while 1:
redirect(h, req, "http://example.com/")
count = count + 1
- except urllib2.HTTPError:
+ except urllib.error.HTTPError:
# don't stop until max_repeats, because cookies may introduce state
- self.assertEqual(count, urllib2.HTTPRedirectHandler.max_repeats)
+ self.assertEqual(count, urllib.request.HTTPRedirectHandler.max_repeats)
# detect endless non-repeating chain of redirects
req = Request(from_url, origin_req_host="example.com")
while 1:
redirect(h, req, "http://example.com/%d" % count)
count = count + 1
- except urllib2.HTTPError:
+ except urllib.error.HTTPError:
self.assertEqual(count,
- urllib2.HTTPRedirectHandler.max_redirections)
+ urllib.request.HTTPRedirectHandler.max_redirections)
def test_cookie_redirect(self):
# cookies shouldn't leak into redirected requests
cj = CookieJar()
interact_netscape(cj, "http://www.example.com/", "spam=eggs")
hh = MockHTTPHandler(302, "Location: http://www.cracker.com/\r\n\r\n")
- hdeh = urllib2.HTTPDefaultErrorHandler()
- hrh = urllib2.HTTPRedirectHandler()
- cp = urllib2.HTTPCookieProcessor(cj)
+ hdeh = urllib.request.HTTPDefaultErrorHandler()
+ hrh = urllib.request.HTTPRedirectHandler()
+ cp = urllib.request.HTTPCookieProcessor(cj)
o = build_test_opener(hh, hdeh, hrh, cp)
o.open("http://www.example.com/")
self.assert_(not hh.req.has_header("Cookie"))
def test_proxy(self):
o = OpenerDirector()
- ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
+ ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
o.add_handler(ph)
meth_spec = [
[("http_open", "return response")]
def test_basic_auth(self, quote_char='"'):
opener = OpenerDirector()
password_manager = MockPasswordManager()
- auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
+ auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
realm = "ACME Widget Store"
http_handler = MockHTTPHandler(
401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
def test_proxy_basic_auth(self):
opener = OpenerDirector()
- ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
+ ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
opener.add_handler(ph)
password_manager = MockPasswordManager()
- auth_handler = urllib2.ProxyBasicAuthHandler(password_manager)
+ auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
realm = "ACME Networks"
http_handler = MockHTTPHandler(
407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
self.recorded = []
def record(self, info):
self.recorded.append(info)
- class TestDigestAuthHandler(urllib2.HTTPDigestAuthHandler):
+ class TestDigestAuthHandler(urllib.request.HTTPDigestAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("digest")
- urllib2.HTTPDigestAuthHandler.http_error_401(self,
+ urllib.request.HTTPDigestAuthHandler.http_error_401(self,
*args, **kwds)
- class TestBasicAuthHandler(urllib2.HTTPBasicAuthHandler):
+ class TestBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
def http_error_401(self, *args, **kwds):
self.parent.record("basic")
- urllib2.HTTPBasicAuthHandler.http_error_401(self,
+ urllib.request.HTTPBasicAuthHandler.http_error_401(self,
*args, **kwds)
opener = RecordingOpenerDirector()
class MiscTests(unittest.TestCase):
def test_build_opener(self):
- class MyHTTPHandler(urllib2.HTTPHandler): pass
- class FooHandler(urllib2.BaseHandler):
+ class MyHTTPHandler(urllib.request.HTTPHandler): pass
+ class FooHandler(urllib.request.BaseHandler):
def foo_open(self): pass
- class BarHandler(urllib2.BaseHandler):
+ class BarHandler(urllib.request.BaseHandler):
def bar_open(self): pass
- build_opener = urllib2.build_opener
+ build_opener = urllib.request.build_opener
o = build_opener(FooHandler, BarHandler)
self.opener_has_handler(o, FooHandler)
# a particular case of overriding: default handlers can be passed
# in explicitly
o = build_opener()
- self.opener_has_handler(o, urllib2.HTTPHandler)
- o = build_opener(urllib2.HTTPHandler)
- self.opener_has_handler(o, urllib2.HTTPHandler)
- o = build_opener(urllib2.HTTPHandler())
- self.opener_has_handler(o, urllib2.HTTPHandler)
+ self.opener_has_handler(o, urllib.request.HTTPHandler)
+ o = build_opener(urllib.request.HTTPHandler)
+ self.opener_has_handler(o, urllib.request.HTTPHandler)
+ o = build_opener(urllib.request.HTTPHandler())
+ self.opener_has_handler(o, urllib.request.HTTPHandler)
# Issue2670: multiple handlers sharing the same base class
- class MyOtherHTTPHandler(urllib2.HTTPHandler): pass
+ class MyOtherHTTPHandler(urllib.request.HTTPHandler): pass
o = build_opener(MyHTTPHandler, MyOtherHTTPHandler)
self.opener_has_handler(o, MyHTTPHandler)
self.opener_has_handler(o, MyOtherHTTPHandler)
def test_main(verbose=None):
from test import test_urllib2
support.run_doctest(test_urllib2, verbose)
- support.run_doctest(urllib2, verbose)
+ support.run_doctest(urllib.request, verbose)
tests = (TrivialTests,
OpenerDirectorTests,
HandlerTests,
import email
import threading
-import urlparse
-import urllib2
+import urllib.parse
+import urllib.request
import http.server
import unittest
import hashlib
self._stop_server = False
self.ready = threading.Event()
request_handler.protocol_version = "HTTP/1.0"
- self.httpd = LoopbackHttpServer(('127.0.0.1', 0),
+ self.httpd = LoopbackHttpServer(("127.0.0.1", 0),
request_handler)
#print "Serving HTTP on %s port %s" % (self.httpd.server_name,
# self.httpd.server_port)
if len(self._users) == 0:
return True
- if 'Proxy-Authorization' not in request_handler.headers:
+ if "Proxy-Authorization" not in request_handler.headers:
return self._return_auth_challenge(request_handler)
else:
auth_dict = self._create_auth_dict(
- request_handler.headers['Proxy-Authorization']
+ request_handler.headers["Proxy-Authorization"]
)
if auth_dict["username"] in self._users:
password = self._users[ auth_dict["username"] ]
def log_message(self, format, *args):
# Uncomment the next line for debugging.
- #sys.stderr.write(format % args)
+ # sys.stderr.write(format % args)
pass
def do_GET(self):
- (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
- self.path, 'http')
+ (scm, netloc, path, params, query, fragment) = urllib.parse.urlparse(
+ self.path, "http")
self.short_path = path
if self.digest_auth_handler.handle_request(self):
self.send_response(200, "OK")
self.server.start()
self.server.ready.wait()
proxy_url = "http://127.0.0.1:%d" % self.server.port
- handler = urllib2.ProxyHandler({"http" : proxy_url})
- self._digest_auth_handler = urllib2.ProxyDigestAuthHandler()
- self.opener = urllib2.build_opener(handler, self._digest_auth_handler)
+ handler = urllib.request.ProxyHandler({"http" : proxy_url})
+ self._digest_auth_handler = urllib.request.ProxyDigestAuthHandler()
+ self.opener = urllib.request.build_opener(
+ handler, self._digest_auth_handler)
def tearDown(self):
self.server.stop()
self._digest_auth_handler.add_password(self.REALM, self.URL,
self.USER, self.PASSWD+"bad")
FakeProxyHandler.digest_auth_handler.set_qop("auth")
- self.assertRaises(urllib2.HTTPError,
+ self.assertRaises(urllib.error.HTTPError,
self.opener.open,
self.URL)
def test_proxy_with_no_password_raises_httperror(self):
FakeProxyHandler.digest_auth_handler.set_qop("auth")
- self.assertRaises(urllib2.HTTPError,
+ self.assertRaises(urllib.error.HTTPError,
self.opener.open,
self.URL)
FakeProxyHandler.digest_auth_handler.set_qop("auth-int")
try:
result = self.opener.open(self.URL)
- except urllib2.URLError:
+ except urllib.error.URLError:
# It's okay if we don't support auth-int, but we certainly
# shouldn't receive any kind of exception here other than
# a URLError.
self.wfile.write(body)
def do_POST(self):
- content_length = self.headers['Content-Length']
+ content_length = self.headers["Content-Length"]
post_data = self.rfile.read(int(content_length))
self.do_GET()
self.requests.append(post_data)
for (header, value) in headers:
self.send_header(header, value % self.port)
if body:
- self.send_header('Content-type', 'text/plain')
+ self.send_header("Content-type", "text/plain")
self.end_headers()
return body
self.end_headers()
for transparent redirection have been written.
"""
- def start_server(self, responses):
+ def setUp(self):
+ self.server = None
+
+ def tearDown(self):
+ if self.server is not None:
+ self.server.stop()
+
+ def urlopen(self, url, data=None):
+ f = urllib.request.urlopen(url, data)
+ result = f.read()
+ f.close()
+ return result
+
+ def start_server(self, responses=None):
+ if responses is None:
+ responses = [(200, [], b"we don't care")]
handler = GetRequestHandler(responses)
self.server = LoopbackHttpServerThread(handler)
handler.port = port
return handler
-
def test_redirection(self):
- expected_response = b'We got here...'
+ expected_response = b"We got here..."
responses = [
- (302, [('Location', 'http://localhost:%s/somewhere_else')], ''),
+ (302, [("Location", "http://localhost:%s/somewhere_else")], ""),
(200, [], expected_response)
]
handler = self.start_server(responses)
-
- try:
- f = urllib2.urlopen('http://localhost:%s/' % handler.port)
- data = f.read()
- f.close()
-
- self.assertEquals(data, expected_response)
- self.assertEquals(handler.requests, ['/', '/somewhere_else'])
- finally:
- self.server.stop()
-
+ data = self.urlopen("http://localhost:%s/" % handler.port)
+ self.assertEquals(data, expected_response)
+ self.assertEquals(handler.requests, ["/", "/somewhere_else"])
def test_404(self):
- expected_response = b'Bad bad bad...'
+ expected_response = b"Bad bad bad..."
handler = self.start_server([(404, [], expected_response)])
try:
- try:
- urllib2.urlopen('http://localhost:%s/weeble' % handler.port)
- except urllib2.URLError as f:
- data = f.read()
- f.close()
- else:
- self.fail('404 should raise URLError')
-
- self.assertEquals(data, expected_response)
- self.assertEquals(handler.requests, ['/weeble'])
- finally:
- self.server.stop()
+ self.urlopen("http://localhost:%s/weeble" % handler.port)
+ except urllib.error.URLError as f:
+ data = f.read()
+ f.close()
+ else:
+ self.fail("404 should raise URLError")
+ self.assertEquals(data, expected_response)
+ self.assertEquals(handler.requests, ["/weeble"])
def test_200(self):
- expected_response = b'pycon 2008...'
+ expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)])
-
- try:
- f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port)
- data = f.read()
- f.close()
-
- self.assertEquals(data, expected_response)
- self.assertEquals(handler.requests, ['/bizarre'])
- finally:
- self.server.stop()
+ data = self.urlopen("http://localhost:%s/bizarre" % handler.port)
+ self.assertEquals(data, expected_response)
+ self.assertEquals(handler.requests, ["/bizarre"])
def test_200_with_parameters(self):
- expected_response = b'pycon 2008...'
+ expected_response = b"pycon 2008..."
handler = self.start_server([(200, [], expected_response)])
-
- try:
- f = urllib2.urlopen('http://localhost:%s/bizarre' % handler.port, b'get=with_feeling')
- data = f.read()
- f.close()
-
- self.assertEquals(data, expected_response)
- self.assertEquals(handler.requests, ['/bizarre', b'get=with_feeling'])
- finally:
- self.server.stop()
-
+ data = self.urlopen("http://localhost:%s/bizarre" % handler.port,
+ b"get=with_feeling")
+ self.assertEquals(data, expected_response)
+ self.assertEquals(handler.requests, ["/bizarre", b"get=with_feeling"])
def test_sending_headers(self):
- handler = self.start_server([(200, [], b"we don't care")])
-
- try:
- req = urllib2.Request("http://localhost:%s/" % handler.port,
- headers={'Range': 'bytes=20-39'})
- urllib2.urlopen(req)
- self.assertEqual(handler.headers_received['Range'], 'bytes=20-39')
- finally:
- self.server.stop()
+ handler = self.start_server()
+ req = urllib.request.Request("http://localhost:%s/" % handler.port,
+ headers={"Range": "bytes=20-39"})
+ urllib.request.urlopen(req)
+ self.assertEqual(handler.headers_received["Range"], "bytes=20-39")
def test_basic(self):
- handler = self.start_server([(200, [], b"we don't care")])
-
+ handler = self.start_server()
+ open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
+ for attr in ("read", "close", "info", "geturl"):
+ self.assert_(hasattr(open_url, attr), "object returned from "
+ "urlopen lacks the %s attribute" % attr)
try:
- open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
- for attr in ("read", "close", "info", "geturl"):
- self.assert_(hasattr(open_url, attr), "object returned from "
- "urlopen lacks the %s attribute" % attr)
- try:
- self.assert_(open_url.read(), "calling 'read' failed")
- finally:
- open_url.close()
+ self.assert_(open_url.read(), "calling 'read' failed")
finally:
- self.server.stop()
+ open_url.close()
def test_info(self):
- handler = self.start_server([(200, [], b"we don't care")])
-
+ handler = self.start_server()
try:
- open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
+ open_url = urllib.request.urlopen(
+ "http://localhost:%s" % handler.port)
info_obj = open_url.info()
self.assert_(isinstance(info_obj, email.message.Message),
"object returned by 'info' is not an instance of "
def test_geturl(self):
# Make sure same URL as opened is returned by geturl.
- handler = self.start_server([(200, [], b"we don't care")])
-
- try:
- open_url = urllib2.urlopen("http://localhost:%s" % handler.port)
- url = open_url.geturl()
- self.assertEqual(url, "http://localhost:%s" % handler.port)
- finally:
- self.server.stop()
-
+ handler = self.start_server()
+ open_url = urllib.request.urlopen("http://localhost:%s" % handler.port)
+ url = open_url.geturl()
+ self.assertEqual(url, "http://localhost:%s" % handler.port)
def test_bad_address(self):
# Make sure proper exception is raised when connecting to a bogus
# started failing then. One hopes the .invalid
# domain will be spared to serve its defined
# purpose.
- # urllib2.urlopen, "http://www.sadflkjsasadf.com/")
- urllib2.urlopen, "http://www.python.invalid./")
-
+ urllib.request.urlopen,
+ "http://www.python.invalid./")
def test_main():
- # We will NOT depend on the network resource flag
- # (Lib/test/regrtest.py -u network) since all tests here are only
- # localhost. However, if this is a bad rationale, then uncomment
- # the next line.
- #support.requires("network")
-
support.run_unittest(ProxyAuthTests)
support.run_unittest(TestUrlopen)
from test import support
from test.test_urllib2 import sanepathname2url
+import os
import socket
-import urllib2
import sys
-import os
+import urllib.error
+import urllib.request
def _retry_thrice(func, exc, *args, **kwargs):
# Connecting to remote hosts is flaky. Make it more robust by retrying
# the connection several times.
-_urlopen_with_retry = _wrap_with_retry_thrice(urllib2.urlopen, urllib2.URLError)
+_urlopen_with_retry = _wrap_with_retry_thrice(urllib.request.urlopen,
+ urllib.error.URLError)
class AuthTests(unittest.TestCase):
# calling .close() on urllib2's response objects should close the
# underlying socket
- # delve deep into response to fetch socket._socketobject
response = _urlopen_with_retry("http://www.python.org/")
- abused_fileobject = response.fp
- httpresponse = abused_fileobject.raw
- self.assert_(httpresponse.__class__ is http.client.HTTPResponse)
- fileobject = httpresponse.fp
-
- self.assert_(not fileobject.closed)
+ sock = response.fp
+ self.assert_(not sock.closed)
response.close()
- self.assert_(fileobject.closed)
+ self.assert_(sock.closed)
class OtherNetworkTests(unittest.TestCase):
def setUp(self):
f.write('hi there\n')
f.close()
urls = [
- 'file:'+sanepathname2url(os.path.abspath(TESTFN)),
- ('file:///nonsensename/etc/passwd', None, urllib2.URLError),
+ 'file:' + sanepathname2url(os.path.abspath(TESTFN)),
+ ('file:///nonsensename/etc/passwd', None,
+ urllib.error.URLError),
]
self._test_urls(urls, self._extra_handlers(), retry=True)
finally:
import logging
debug = logging.getLogger("test_urllib2").debug
- urlopen = urllib2.build_opener(*handlers).open
+ urlopen = urllib.request.build_opener(*handlers).open
if retry:
- urlopen = _wrap_with_retry_thrice(urlopen, urllib2.URLError)
+ urlopen = _wrap_with_retry_thrice(urlopen, urllib.error.URLError)
for url in urls:
if isinstance(url, tuple):
def _extra_handlers(self):
handlers = []
- cfh = urllib2.CacheFTPHandler()
+ cfh = urllib.request.CacheFTPHandler()
cfh.setTimeout(1)
handlers.append(cfh)
def test_http_basic(self):
self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org")
- self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
+ self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_default_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org")
finally:
socket.setdefaulttimeout(None)
- self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 60)
+ self.assertEqual(u.fp._sock.gettimeout(), 60)
def test_http_no_timeout(self):
self.assertTrue(socket.getdefaulttimeout() is None)
u = _urlopen_with_retry("http://www.python.org", timeout=None)
finally:
socket.setdefaulttimeout(None)
- self.assertTrue(u.fp.raw.fp._sock.gettimeout() is None)
+ self.assertTrue(u.fp._sock.gettimeout() is None)
def test_http_timeout(self):
u = _urlopen_with_retry("http://www.python.org", timeout=120)
- self.assertEqual(u.fp.raw.fp._sock.gettimeout(), 120)
+ self.assertEqual(u.fp._sock.gettimeout(), 120)
FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/"
from test import support
import socket
-import urllib
+import urllib.request
import sys
import os
import email.message
socket.setdefaulttimeout(None)
def testURLread(self):
- f = _open_with_retry(urllib.urlopen, "http://www.python.org/")
+ f = _open_with_retry(urllib.request.urlopen, "http://www.python.org/")
x = f.read()
class urlopenNetworkTests(unittest.TestCase):
- """Tests urllib.urlopen using the network.
+ """Tests urllib.reqest.urlopen using the network.
These tests are not exhaustive. Assuming that testing using files does a
good job overall of some of the basic interface features. There are no
"""
def urlopen(self, *args):
- return _open_with_retry(urllib.urlopen, *args)
+ return _open_with_retry(urllib.request.urlopen, *args)
def test_basic(self):
# Simple test expected to pass.
def test_getcode(self):
# test getcode() with the fancy opener to get 404 error codes
URL = "http://www.python.org/XXXinvalidXXX"
- open_url = urllib.FancyURLopener().open(URL)
+ open_url = urllib.request.FancyURLopener().open(URL)
try:
code = open_url.getcode()
finally:
def test_fileno(self):
if (sys.platform in ('win32',) or
- not hasattr(os, 'fdopen')):
+ not hasattr(os, 'fdopen')):
# On Windows, socket handles are not file descriptors; this
# test can't pass on Windows.
return
# domain will be spared to serve its defined
# purpose.
# urllib.urlopen, "http://www.sadflkjsasadf.com/")
- urllib.urlopen, "http://www.python.invalid./")
+ urllib.request.urlopen,
+ "http://www.python.invalid./")
class urlretrieveNetworkTests(unittest.TestCase):
- """Tests urllib.urlretrieve using the network."""
+ """Tests urllib.request.urlretrieve using the network."""
def urlretrieve(self, *args):
- return _open_with_retry(urllib.urlretrieve, *args)
+ return _open_with_retry(urllib.request.urlretrieve, *args)
def test_basic(self):
# Test basic functionality.
from test import support
import unittest
-import urlparse
+import urllib.parse
RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
class UrlParseTestCase(unittest.TestCase):
def checkRoundtrips(self, url, parsed, split):
- result = urlparse.urlparse(url)
+ result = urllib.parse.urlparse(url)
self.assertEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
result.params, result.query, result.fragment)
self.assertEqual(t, parsed)
# put it back together and it should be the same
- result2 = urlparse.urlunparse(result)
+ result2 = urllib.parse.urlunparse(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it
# again to get the same result:
- result3 = urlparse.urlparse(result.geturl())
+ result3 = urllib.parse.urlparse(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
self.assertEqual(result3.port, result.port)
# check the roundtrip using urlsplit() as well
- result = urlparse.urlsplit(url)
+ result = urllib.parse.urlsplit(url)
self.assertEqual(result, split)
t = (result.scheme, result.netloc, result.path,
result.query, result.fragment)
self.assertEqual(t, split)
- result2 = urlparse.urlunsplit(result)
+ result2 = urllib.parse.urlunsplit(result)
self.assertEqual(result2, url)
self.assertEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl()
- result3 = urlparse.urlsplit(result.geturl())
+ result3 = urllib.parse.urlsplit(result.geturl())
self.assertEqual(result3.geturl(), result.geturl())
self.assertEqual(result3, result)
self.assertEqual(result3.scheme, result.scheme)
self.checkRoundtrips(url, parsed, split)
def test_http_roundtrips(self):
- # urlparse.urlsplit treats 'http:' as an optimized special case,
+ # urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
# Three cheers for white box knowledge!
testcases = [
self.checkRoundtrips(url, parsed, split)
def checkJoin(self, base, relurl, expected):
- self.assertEqual(urlparse.urljoin(base, relurl), expected,
+ self.assertEqual(urllib.parse.urljoin(base, relurl), expected,
(base, relurl, expected))
def test_unparse_parse(self):
for u in ['Python', './Python']:
- self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
- self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
+ self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
+ self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
def test_RFC1808(self):
# "normal" cases from RFC 1808:
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
]:
- self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
+ self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag))
def test_urlsplit_attributes(self):
url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
- p = urlparse.urlsplit(url)
+ p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
self.assertEqual(p.path, "/doc/")
#self.assertEqual(p.geturl(), url)
url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
- p = urlparse.urlsplit(url)
+ p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
# and request email addresses as usernames.
url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
- p = urlparse.urlsplit(url)
+ p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
self.assertEqual(p.path, "/doc/")
def test_attributes_bad_port(self):
"""Check handling of non-integer ports."""
- p = urlparse.urlsplit("http://www.example.net:foo")
+ p = urllib.parse.urlsplit("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
- p = urlparse.urlparse("http://www.example.net:foo")
+ p = urllib.parse.urlparse("http://www.example.net:foo")
self.assertEqual(p.netloc, "www.example.net:foo")
self.assertRaises(ValueError, lambda: p.port)
# scheme://netloc syntax, the netloc and related attributes
# should be left empty.
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
- p = urlparse.urlsplit(uri)
+ p = urllib.parse.urlsplit(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), uri)
- p = urlparse.urlparse(uri)
+ p = urllib.parse.urlparse(uri)
self.assertEqual(p.netloc, "")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
def test_noslash(self):
# Issue 1637: http://foo.com?query is legal
- self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
+ self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_main():
(int(2**34),))
xmlrpclib.dumps((xmlrpclib.MAXINT, xmlrpclib.MININT))
- self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MAXINT+1,))
- self.assertRaises(OverflowError, xmlrpclib.dumps, (xmlrpclib.MININT-1,))
+ self.assertRaises(OverflowError, xmlrpclib.dumps,
+ (xmlrpclib.MAXINT+1,))
+ self.assertRaises(OverflowError, xmlrpclib.dumps,
+ (xmlrpclib.MININT-1,))
def dummy_write(s):
pass
m = xmlrpclib.Marshaller()
m.dump_int(xmlrpclib.MAXINT, dummy_write)
m.dump_int(xmlrpclib.MININT, dummy_write)
- self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MAXINT+1, dummy_write)
- self.assertRaises(OverflowError, m.dump_int, xmlrpclib.MININT-1, dummy_write)
-
+ self.assertRaises(OverflowError, m.dump_int,
+ xmlrpclib.MAXINT+1, dummy_write)
+ self.assertRaises(OverflowError, m.dump_int,
+ xmlrpclib.MININT-1, dummy_write)
def test_dump_none(self):
value = alist + [None]
xmlrpclib.loads(strg)[0][0])
self.assertRaises(TypeError, xmlrpclib.dumps, (arg1,))
-
class HelperTestCase(unittest.TestCase):
def test_escape(self):
self.assertEqual(xmlrpclib.escape("a&b"), "a&b")
# private methods
self.assertRaises(AttributeError,
xmlrpc.server.resolve_dotted_attribute, str, '__add')
-
self.assert_(xmlrpc.server.resolve_dotted_attribute(str, 'title'))
class DateTimeTestCase(unittest.TestCase):
def test_time(self):
d = 1181399930.036952
t = xmlrpclib.DateTime(d)
- self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
+ self.assertEqual(str(t),
+ time.strftime("%Y%m%dT%H:%M:%S", time.localtime(d)))
def test_time_tuple(self):
d = (2007,6,9,10,38,50,5,160,0)
def test_time_struct(self):
d = time.localtime(1181399930.036952)
t = xmlrpclib.DateTime(d)
- self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
+ self.assertEqual(str(t), time.strftime("%Y%m%dT%H:%M:%S", d))
def test_datetime_datetime(self):
d = datetime.datetime(2007,1,2,3,4,5)
self.assertEqual(response.reason, 'Not Found')
def test_introspection1(self):
+ expected_methods = set(['pow', 'div', 'my_function', 'add',
+ 'system.listMethods', 'system.methodHelp',
+ 'system.methodSignature', 'system.multicall'])
try:
p = xmlrpclib.ServerProxy('http://localhost:%d' % PORT)
meth = p.system.listMethods()
- expected_methods = set(['pow', 'div', 'my_function', 'add',
- 'system.listMethods', 'system.methodHelp',
- 'system.methodSignature', 'system.multicall'])
self.assertEqual(set(meth), expected_methods)
except (xmlrpclib.ProtocolError, socket.error) as e:
# ignore failures due to non-blocking socket 'unavailable' errors
# will respond exception, if so, our goal is achieved ;)
handle = open(support.TESTFN, "r").read()
- # start with 44th char so as not to get http header, we just need only xml
+ # start with 44th char so as not to get http header, we just
+ # need only xml
self.assertRaises(xmlrpclib.Fault, xmlrpclib.loads, handle[44:])
os.remove("xmldata.txt")
+++ /dev/null
-"""Open an arbitrary URL.
-
-See the following document for more info on URLs:
-"Names and Addresses, URIs, URLs, URNs, URCs", at
-http://www.w3.org/pub/WWW/Addressing/Overview.html
-
-See also the HTTP spec (from which the error codes are derived):
-"HTTP - Hypertext Transfer Protocol", at
-http://www.w3.org/pub/WWW/Protocols/
-
-Related standards and specs:
-- RFC1808: the "relative URL" spec. (authoritative status)
-- RFC1738 - the "URL standard". (authoritative status)
-- RFC1630 - the "URI spec". (informational status)
-
-The object returned by URLopener().open(file) will differ per
-protocol. All you know is that is has methods read(), readline(),
-readlines(), fileno(), close() and info(). The read*(), fileno()
-and close() methods work like those of open files.
-The info() method returns a email.message.Message object which can be
-used to query various info about the object, if available.
-(email.message.Message objects provide a dict-like interface.)
-"""
-
-import http.client
-import email.message
-import email
-import os
-import socket
-import sys
-import time
-from urlparse import urljoin as basejoin
-
-__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
- "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
- "urlencode", "url2pathname", "pathname2url", "splittag",
- "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
- "splittype", "splithost", "splituser", "splitpasswd", "splitport",
- "splitnport", "splitquery", "splitattr", "splitvalue",
- "getproxies"]
-
-__version__ = '1.17' # XXX This version is not always updated :-(
-
-MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
-
-# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif os.name == 'nt':
- from nturl2path import url2pathname, pathname2url
-else:
- def url2pathname(pathname):
- """OS-specific conversion from a relative URL of the 'file' scheme
- to a file system path; not recommended for general use."""
- return unquote(pathname)
-
- def pathname2url(pathname):
- """OS-specific conversion from a file system path to a relative URL
- of the 'file' scheme; not recommended for general use."""
- return quote(pathname)
-
-# This really consists of two pieces:
-# (1) a class which handles opening of all sorts of URLs
-# (plus assorted utilities etc.)
-# (2) a set of functions for parsing URLs
-# XXX Should these be separated out into different modules?
-
-
-# Shortcut for basic usage
-_urlopener = None
-def urlopen(url, data=None, proxies=None):
- """urlopen(url [, data]) -> open file-like object"""
- global _urlopener
- if proxies is not None:
- opener = FancyURLopener(proxies=proxies)
- elif not _urlopener:
- opener = FancyURLopener()
- _urlopener = opener
- else:
- opener = _urlopener
- if data is None:
- return opener.open(url)
- else:
- return opener.open(url, data)
-
-def urlretrieve(url, filename=None, reporthook=None, data=None):
- global _urlopener
- if not _urlopener:
- _urlopener = FancyURLopener()
- return _urlopener.retrieve(url, filename, reporthook, data)
-
-def urlcleanup():
- if _urlopener:
- _urlopener.cleanup()
-
-# check for SSL
-try:
- import ssl
-except:
- _have_ssl = False
-else:
- _have_ssl = True
-
-# exception raised when downloaded size does not match content-length
-class ContentTooShortError(IOError):
- def __init__(self, message, content):
- IOError.__init__(self, message)
- self.content = content
-
-ftpcache = {}
-class URLopener:
- """Class to open URLs.
- This is a class rather than just a subroutine because we may need
- more than one set of global protocol-specific options.
- Note -- this is a base class for those who don't want the
- automatic handling of errors type 302 (relocated) and 401
- (authorization needed)."""
-
- __tempfiles = None
-
- version = "Python-urllib/%s" % __version__
-
- # Constructor
- def __init__(self, proxies=None, **x509):
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'keys'), "proxies must be a mapping"
- self.proxies = proxies
- self.key_file = x509.get('key_file')
- self.cert_file = x509.get('cert_file')
- self.addheaders = [('User-Agent', self.version)]
- self.__tempfiles = []
- self.__unlink = os.unlink # See cleanup()
- self.tempcache = None
- # Undocumented feature: if you assign {} to tempcache,
- # it is used to cache files retrieved with
- # self.retrieve(). This is not enabled by default
- # since it does not work for changing documents (and I
- # haven't got the logic to check expiration headers
- # yet).
- self.ftpcache = ftpcache
- # Undocumented feature: you can use a different
- # ftp cache by assigning to the .ftpcache member;
- # in case you want logically independent URL openers
- # XXX This is not threadsafe. Bah.
-
- def __del__(self):
- self.close()
-
- def close(self):
- self.cleanup()
-
- def cleanup(self):
- # This code sometimes runs when the rest of this module
- # has already been deleted, so it can't use any globals
- # or import anything.
- if self.__tempfiles:
- for file in self.__tempfiles:
- try:
- self.__unlink(file)
- except OSError:
- pass
- del self.__tempfiles[:]
- if self.tempcache:
- self.tempcache.clear()
-
- def addheader(self, *args):
- """Add a header to be used by the HTTP interface only
- e.g. u.addheader('Accept', 'sound/basic')"""
- self.addheaders.append(args)
-
- # External interface
- def open(self, fullurl, data=None):
- """Use URLopener().open(file) instead of open(file, 'r')."""
- fullurl = unwrap(toBytes(fullurl))
- if self.tempcache and fullurl in self.tempcache:
- filename, headers = self.tempcache[fullurl]
- fp = open(filename, 'rb')
- return addinfourl(fp, headers, fullurl)
- urltype, url = splittype(fullurl)
- if not urltype:
- urltype = 'file'
- if urltype in self.proxies:
- proxy = self.proxies[urltype]
- urltype, proxyhost = splittype(proxy)
- host, selector = splithost(proxyhost)
- url = (host, fullurl) # Signal special case to open_*()
- else:
- proxy = None
- name = 'open_' + urltype
- self.type = urltype
- name = name.replace('-', '_')
- if not hasattr(self, name):
- if proxy:
- return self.open_unknown_proxy(proxy, fullurl, data)
- else:
- return self.open_unknown(fullurl, data)
- try:
- if data is None:
- return getattr(self, name)(url)
- else:
- return getattr(self, name)(url, data)
- except socket.error as msg:
- raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
-
- def open_unknown(self, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = splittype(fullurl)
- raise IOError('url error', 'unknown url type', type)
-
- def open_unknown_proxy(self, proxy, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = splittype(fullurl)
- raise IOError('url error', 'invalid proxy for %s' % type, proxy)
-
- # External interface
- def retrieve(self, url, filename=None, reporthook=None, data=None):
- """retrieve(url) returns (filename, headers) for a local object
- or (tempfilename, headers) for a remote object."""
- url = unwrap(toBytes(url))
- if self.tempcache and url in self.tempcache:
- return self.tempcache[url]
- type, url1 = splittype(url)
- if filename is None and (not type or type == 'file'):
- try:
- fp = self.open_local_file(url1)
- hdrs = fp.info()
- del fp
- return url2pathname(splithost(url1)[1]), hdrs
- except IOError as msg:
- pass
- fp = self.open(url, data)
- headers = fp.info()
- if filename:
- tfp = open(filename, 'wb')
- else:
- import tempfile
- garbage, path = splittype(url)
- garbage, path = splithost(path or "")
- path, garbage = splitquery(path or "")
- path, garbage = splitattr(path or "")
- suffix = os.path.splitext(path)[1]
- (fd, filename) = tempfile.mkstemp(suffix)
- self.__tempfiles.append(filename)
- tfp = os.fdopen(fd, 'wb')
- result = filename, headers
- if self.tempcache is not None:
- self.tempcache[url] = result
- bs = 1024*8
- size = -1
- read = 0
- blocknum = 0
- if reporthook:
- if "content-length" in headers:
- size = int(headers["Content-Length"])
- reporthook(blocknum, bs, size)
- while 1:
- block = fp.read(bs)
- if not block:
- break
- read += len(block)
- tfp.write(block)
- blocknum += 1
- if reporthook:
- reporthook(blocknum, bs, size)
- fp.close()
- tfp.close()
- del fp
- del tfp
-
- # raise exception if actual size does not match content-length header
- if size >= 0 and read < size:
- raise ContentTooShortError("retrieval incomplete: got only %i out "
- "of %i bytes" % (read, size), result)
-
- return result
-
- # Each method named open_<type> knows how to open that type of URL
-
- def _open_generic_http(self, connection_factory, url, data):
- """Make an HTTP connection using connection_class.
-
- This is an internal method that should be called from
- open_http() or open_https().
-
- Arguments:
- - connection_factory should take a host name and return an
- HTTPConnection instance.
- - url is the url to retrieval or a host, relative-path pair.
- - data is payload for a POST request or None.
- """
-
- user_passwd = None
- proxy_passwd= None
- if isinstance(url, str):
- host, selector = splithost(url)
- if host:
- user_passwd, host = splituser(host)
- host = unquote(host)
- realhost = host
- else:
- host, selector = url
- # check whether the proxy contains authorization information
- proxy_passwd, host = splituser(host)
- # now we proceed with the url we want to obtain
- urltype, rest = splittype(selector)
- url = rest
- user_passwd = None
- if urltype.lower() != 'http':
- realhost = None
- else:
- realhost, rest = splithost(rest)
- if realhost:
- user_passwd, realhost = splituser(realhost)
- if user_passwd:
- selector = "%s://%s%s" % (urltype, realhost, rest)
- if proxy_bypass(realhost):
- host = realhost
-
- #print "proxy via http:", host, selector
- if not host: raise IOError('http error', 'no host given')
-
- if proxy_passwd:
- import base64
- proxy_auth = base64.b64encode(proxy_passwd).strip()
- else:
- proxy_auth = None
-
- if user_passwd:
- import base64
- auth = base64.b64encode(user_passwd).strip()
- else:
- auth = None
- http_conn = connection_factory(host)
- # XXX We should fix urllib so that it works with HTTP/1.1.
- http_conn._http_vsn = 10
- http_conn._http_vsn_str = "HTTP/1.0"
-
- headers = {}
- if proxy_auth:
- headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
- if auth:
- headers["Authorization"] = "Basic %s" % auth
- if realhost:
- headers["Host"] = realhost
- for header, value in self.addheaders:
- headers[header] = value
-
- if data is not None:
- headers["Content-Type"] = "application/x-www-form-urlencoded"
- http_conn.request("POST", selector, data, headers)
- else:
- http_conn.request("GET", selector, headers=headers)
-
- try:
- response = http_conn.getresponse()
- except http.client.BadStatusLine:
- # something went wrong with the HTTP status line
- raise IOError('http protocol error', 0,
- 'got a bad status line', None)
-
- # According to RFC 2616, "2xx" code indicates that the client's
- # request was successfully received, understood, and accepted.
- if (200 <= response.status < 300):
- return addinfourl(response.fp, response.msg, "http:" + url,
- response.status)
- else:
- return self.http_error(
- url, response.fp,
- response.status, response.reason, response.msg, data)
-
- def open_http(self, url, data=None):
- """Use HTTP protocol."""
- return self._open_generic_http(http.client.HTTPConnection, url, data)
-
- def http_error(self, url, fp, errcode, errmsg, headers, data=None):
- """Handle http errors.
-
- Derived class can override this, or provide specific handlers
- named http_error_DDD where DDD is the 3-digit error code."""
- # First check if there's a specific handler for this error
- name = 'http_error_%d' % errcode
- if hasattr(self, name):
- method = getattr(self, name)
- if data is None:
- result = method(url, fp, errcode, errmsg, headers)
- else:
- result = method(url, fp, errcode, errmsg, headers, data)
- if result: return result
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handler: close the connection and raise IOError."""
- void = fp.read()
- fp.close()
- raise IOError('http error', errcode, errmsg, headers)
-
- if _have_ssl:
- def _https_connection(self, host):
- return http.client.HTTPSConnection(host,
- key_file=self.key_file,
- cert_file=self.cert_file)
-
- def open_https(self, url, data=None):
- """Use HTTPS protocol."""
- return self._open_generic_http(self._https_connection, url, data)
-
- def open_file(self, url):
- """Use local file or FTP depending on form of URL."""
- if not isinstance(url, str):
- raise IOError('file error', 'proxy support for file protocol currently not implemented')
- if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- return self.open_ftp(url)
- else:
- return self.open_local_file(url)
-
- def open_local_file(self, url):
- """Use local file."""
- import mimetypes, email.utils
- host, file = splithost(url)
- localname = url2pathname(file)
- try:
- stats = os.stat(localname)
- except OSError as e:
- raise IOError(e.errno, e.strerror, e.filename)
- size = stats.st_size
- modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(url)[0]
- headers = email.message_from_string(
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified))
- if not host:
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- return addinfourl(open(localname, 'rb'),
- headers, urlfile)
- host, port = splitport(host)
- if not port \
- and socket.gethostbyname(host) in (localhost(), thishost()):
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- return addinfourl(open(localname, 'rb'),
- headers, urlfile)
- raise IOError('local file error', 'not on local host')
-
- def open_ftp(self, url):
- """Use FTP protocol."""
- if not isinstance(url, str):
- raise IOError('ftp error', 'proxy support for ftp protocol currently not implemented')
- import mimetypes
- host, path = splithost(url)
- if not host: raise IOError('ftp error', 'no host given')
- host, port = splitport(host)
- user, host = splituser(host)
- if user: user, passwd = splitpasswd(user)
- else: passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
- host = socket.gethostbyname(host)
- if not port:
- import ftplib
- port = ftplib.FTP_PORT
- else:
- port = int(port)
- path, attrs = splitattr(path)
- path = unquote(path)
- dirs = path.split('/')
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]: dirs = dirs[1:]
- if dirs and not dirs[0]: dirs[0] = '/'
- key = user, host, port, '/'.join(dirs)
- # XXX thread unsafe!
- if len(self.ftpcache) > MAXFTPCACHE:
- # Prune the cache, rather arbitrarily
- for k in self.ftpcache.keys():
- if k != key:
- v = self.ftpcache[k]
- del self.ftpcache[k]
- v.close()
- try:
- if not key in self.ftpcache:
- self.ftpcache[key] = \
- ftpwrapper(user, passwd, host, port, dirs)
- if not file: type = 'D'
- else: type = 'I'
- for attr in attrs:
- attr, value = splitvalue(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
- (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
- mtype = mimetypes.guess_type("ftp:" + url)[0]
- headers = ""
- if mtype:
- headers += "Content-Type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-Length: %d\n" % retrlen
- headers = email.message_from_string(headers)
- return addinfourl(fp, headers, "ftp:" + url)
- except ftperrors() as msg:
- raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
-
- def open_data(self, url, data=None):
- """Use "data" URL."""
- if not isinstance(url, str):
- raise IOError('data error', 'proxy support for data protocol currently not implemented')
- # ignore POSTed data
- #
- # syntax of data URLs:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- from io import StringIO
- try:
- [type, data] = url.split(',', 1)
- except ValueError:
- raise IOError('data error', 'bad data URL')
- if not type:
- type = 'text/plain;charset=US-ASCII'
- semi = type.rfind(';')
- if semi >= 0 and '=' not in type[semi:]:
- encoding = type[semi+1:]
- type = type[:semi]
- else:
- encoding = ''
- msg = []
- msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
- time.gmtime(time.time())))
- msg.append('Content-type: %s' % type)
- if encoding == 'base64':
- import base64
- data = base64.decodestring(data)
- else:
- data = unquote(data)
- msg.append('Content-Length: %d' % len(data))
- msg.append('')
- msg.append(data)
- msg = '\n'.join(msg)
- headers = email.message_from_string(msg)
- f = StringIO(msg)
- #f.fileno = None # needed for addinfourl
- return addinfourl(f, headers, url)
-
-
-class FancyURLopener(URLopener):
- """Derived class with handlers for errors we can handle (perhaps)."""
-
- def __init__(self, *args, **kwargs):
- URLopener.__init__(self, *args, **kwargs)
- self.auth_cache = {}
- self.tries = 0
- self.maxtries = 10
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handling -- don't raise an exception."""
- return addinfourl(fp, headers, "http:" + url, errcode)
-
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 302 -- relocated (temporarily)."""
- self.tries += 1
- if self.maxtries and self.tries >= self.maxtries:
- if hasattr(self, "http_error_500"):
- meth = self.http_error_500
- else:
- meth = self.http_error_default
- self.tries = 0
- return meth(url, fp, 500,
- "Internal Server Error: Redirect Recursion", headers)
- result = self.redirect_internal(url, fp, errcode, errmsg, headers,
- data)
- self.tries = 0
- return result
-
- def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
- if 'location' in headers:
- newurl = headers['location']
- elif 'uri' in headers:
- newurl = headers['uri']
- else:
- return
- void = fp.read()
- fp.close()
- # In case the server sent a relative URL, join with original:
- newurl = basejoin(self.type + ":" + url, newurl)
- return self.open(newurl)
-
- def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 301 -- also relocated (permanently)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 303 -- also relocated (essentially identical to 302)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 307 -- relocated, but turn POST into error."""
- if data is None:
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
- else:
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 401 -- authentication required.
- This function supports Basic authentication only."""
- if not 'www-authenticate' in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['www-authenticate']
- import re
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- name = 'retry_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 407 -- proxy authentication required.
- This function supports Basic authentication only."""
- if not 'proxy-authenticate' in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['proxy-authenticate']
- import re
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- name = 'retry_proxy_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def retry_proxy_http_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- newurl = 'http://' + host + selector
- proxy = self.proxies['http']
- urltype, proxyhost = splittype(proxy)
- proxyhost, proxyselector = splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
- self.proxies['http'] = 'http://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_proxy_https_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- newurl = 'https://' + host + selector
- proxy = self.proxies['https']
- urltype, proxyhost = splittype(proxy)
- proxyhost, proxyselector = splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
- self.proxies['https'] = 'https://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_http_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
- newurl = 'http://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_https_basic_auth(self, url, realm, data=None):
- host, selector = splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
- newurl = 'https://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def get_user_passwd(self, host, realm, clear_cache = 0):
- key = realm + '@' + host.lower()
- if key in self.auth_cache:
- if clear_cache:
- del self.auth_cache[key]
- else:
- return self.auth_cache[key]
- user, passwd = self.prompt_user_passwd(host, realm)
- if user or passwd: self.auth_cache[key] = (user, passwd)
- return user, passwd
-
- def prompt_user_passwd(self, host, realm):
- """Override this in a GUI environment!"""
- import getpass
- try:
- user = input("Enter username for %s at %s: " % (realm, host))
- passwd = getpass.getpass("Enter password for %s in %s at %s: " %
- (user, realm, host))
- return user, passwd
- except KeyboardInterrupt:
- print()
- return None, None
-
-
-# Utility functions
-
-_localhost = None
-def localhost():
- """Return the IP address of the magic hostname 'localhost'."""
- global _localhost
- if _localhost is None:
- _localhost = socket.gethostbyname('localhost')
- return _localhost
-
-_thishost = None
-def thishost():
- """Return the IP address of the current host."""
- global _thishost
- if _thishost is None:
- _thishost = socket.gethostbyname(socket.gethostname())
- return _thishost
-
-_ftperrors = None
-def ftperrors():
- """Return the set of errors raised by the FTP class."""
- global _ftperrors
- if _ftperrors is None:
- import ftplib
- _ftperrors = ftplib.all_errors
- return _ftperrors
-
-_noheaders = None
-def noheaders():
- """Return an empty email.message.Message object."""
- global _noheaders
- if _noheaders is None:
- _noheaders = email.message.Message()
- return _noheaders
-
-
-# Utility classes
-
-class ftpwrapper:
- """Class used by open_ftp() for cache of open FTP connections."""
-
- def __init__(self, user, passwd, host, port, dirs,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- self.user = user
- self.passwd = passwd
- self.host = host
- self.port = port
- self.dirs = dirs
- self.timeout = timeout
- self.init()
-
- def init(self):
- import ftplib
- self.busy = 0
- self.ftp = ftplib.FTP()
- self.ftp.connect(self.host, self.port, self.timeout)
- self.ftp.login(self.user, self.passwd)
- for dir in self.dirs:
- self.ftp.cwd(dir)
-
- def retrfile(self, file, type):
- import ftplib
- self.endtransfer()
- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
- else: cmd = 'TYPE ' + type; isdir = 0
- try:
- self.ftp.voidcmd(cmd)
- except ftplib.all_errors:
- self.init()
- self.ftp.voidcmd(cmd)
- conn = None
- if file and not isdir:
- # Try to retrieve as a file
- try:
- cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd)
- except ftplib.error_perm as reason:
- if str(reason)[:3] != '550':
- raise IOError('ftp error', reason).with_traceback(sys.exc_info()[2])
- if not conn:
- # Set transfer mode to ASCII!
- self.ftp.voidcmd('TYPE A')
- # Try a directory listing. Verify that directory exists.
- if file:
- pwd = self.ftp.pwd()
- try:
- try:
- self.ftp.cwd(file)
- except ftplib.error_perm as reason:
- raise IOError('ftp error', reason) from reason
- finally:
- self.ftp.cwd(pwd)
- cmd = 'LIST ' + file
- else:
- cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
- self.busy = 1
- # Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'),
- self.endtransfer), conn[1])
- def endtransfer(self):
- if not self.busy:
- return
- self.busy = 0
- try:
- self.ftp.voidresp()
- except ftperrors():
- pass
-
- def close(self):
- self.endtransfer()
- try:
- self.ftp.close()
- except ftperrors():
- pass
-
-class addbase:
- """Base class for addinfo and addclosehook."""
-
- # XXX Add a method to expose the timeout on the underlying socket?
-
- def __init__(self, fp):
- self.fp = fp
- self.read = self.fp.read
- self.readline = self.fp.readline
- if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
- if hasattr(self.fp, "fileno"):
- self.fileno = self.fp.fileno
- else:
- self.fileno = lambda: None
- if hasattr(self.fp, "__iter__"):
- self.__iter__ = self.fp.__iter__
- if hasattr(self.fp, "__next__"):
- self.__next__ = self.fp.__next__
-
- def __repr__(self):
- return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
- id(self), self.fp)
-
- def close(self):
- self.read = None
- self.readline = None
- self.readlines = None
- self.fileno = None
- if self.fp: self.fp.close()
- self.fp = None
-
-class addclosehook(addbase):
- """Class to add a close hook to an open file."""
-
- def __init__(self, fp, closehook, *hookargs):
- addbase.__init__(self, fp)
- self.closehook = closehook
- self.hookargs = hookargs
-
- def close(self):
- addbase.close(self)
- if self.closehook:
- self.closehook(*self.hookargs)
- self.closehook = None
- self.hookargs = None
-
-class addinfo(addbase):
- """class to add an info() method to an open file."""
-
- def __init__(self, fp, headers):
- addbase.__init__(self, fp)
- self.headers = headers
-
- def info(self):
- return self.headers
-
-class addinfourl(addbase):
- """class to add info() and geturl() methods to an open file."""
-
- def __init__(self, fp, headers, url, code=None):
- addbase.__init__(self, fp)
- self.headers = headers
- self.url = url
- self.code = code
-
- def info(self):
- return self.headers
-
- def getcode(self):
- return self.code
-
- def geturl(self):
- return self.url
-
-
-# Utilities to parse URLs (most of these return None for missing parts):
-# unwrap('<URL:type://host/path>') --> 'type://host/path'
-# splittype('type:opaquestring') --> 'type', 'opaquestring'
-# splithost('//host[:port]/path') --> 'host[:port]', '/path'
-# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
-# splitpasswd('user:passwd') -> 'user', 'passwd'
-# splitport('host:port') --> 'host', 'port'
-# splitquery('/path?query') --> '/path', 'query'
-# splittag('/path#tag') --> '/path', 'tag'
-# splitattr('/path;attr1=value1;attr2=value2;...') ->
-# '/path', ['attr1=value1', 'attr2=value2', ...]
-# splitvalue('attr=value') --> 'attr', 'value'
-# unquote('abc%20def') -> 'abc def'
-# quote('abc def') -> 'abc%20def')
-
-def toBytes(url):
- """toBytes(u"URL") --> 'URL'."""
- # Most URL schemes require ASCII. If that changes, the conversion
- # can be relaxed.
- # XXX get rid of toBytes()
- if isinstance(url, str):
- try:
- url = url.encode("ASCII").decode()
- except UnicodeError:
- raise UnicodeError("URL " + repr(url) +
- " contains non-ASCII characters")
- return url
-
-def unwrap(url):
- """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
- url = str(url).strip()
- if url[:1] == '<' and url[-1:] == '>':
- url = url[1:-1].strip()
- if url[:4] == 'URL:': url = url[4:].strip()
- return url
-
-_typeprog = None
-def splittype(url):
- """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
- global _typeprog
- if _typeprog is None:
- import re
- _typeprog = re.compile('^([^/:]+):')
-
- match = _typeprog.match(url)
- if match:
- scheme = match.group(1)
- return scheme.lower(), url[len(scheme) + 1:]
- return None, url
-
-_hostprog = None
-def splithost(url):
- """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
- global _hostprog
- if _hostprog is None:
- import re
- _hostprog = re.compile('^//([^/?]*)(.*)$')
-
- match = _hostprog.match(url)
- if match: return match.group(1, 2)
- return None, url
-
-_userprog = None
-def splituser(host):
- """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
- global _userprog
- if _userprog is None:
- import re
- _userprog = re.compile('^(.*)@(.*)$')
-
- match = _userprog.match(host)
- if match: return map(unquote, match.group(1, 2))
- return None, host
-
-_passwdprog = None
-def splitpasswd(user):
- """splitpasswd('user:passwd') -> 'user', 'passwd'."""
- global _passwdprog
- if _passwdprog is None:
- import re
- _passwdprog = re.compile('^([^:]*):(.*)$')
-
- match = _passwdprog.match(user)
- if match: return match.group(1, 2)
- return user, None
-
-# splittag('/path#tag') --> '/path', 'tag'
-_portprog = None
-def splitport(host):
- """splitport('host:port') --> 'host', 'port'."""
- global _portprog
- if _portprog is None:
- import re
- _portprog = re.compile('^(.*):([0-9]+)$')
-
- match = _portprog.match(host)
- if match: return match.group(1, 2)
- return host, None
-
-_nportprog = None
-def splitnport(host, defport=-1):
- """Split host and port, returning numeric port.
- Return given default port if no ':' found; defaults to -1.
- Return numerical port if a valid number are found after ':'.
- Return None if ':' but not a valid number."""
- global _nportprog
- if _nportprog is None:
- import re
- _nportprog = re.compile('^(.*):(.*)$')
-
- match = _nportprog.match(host)
- if match:
- host, port = match.group(1, 2)
- try:
- if not port: raise ValueError("no digits")
- nport = int(port)
- except ValueError:
- nport = None
- return host, nport
- return host, defport
-
-_queryprog = None
-def splitquery(url):
- """splitquery('/path?query') --> '/path', 'query'."""
- global _queryprog
- if _queryprog is None:
- import re
- _queryprog = re.compile('^(.*)\?([^?]*)$')
-
- match = _queryprog.match(url)
- if match: return match.group(1, 2)
- return url, None
-
-_tagprog = None
-def splittag(url):
- """splittag('/path#tag') --> '/path', 'tag'."""
- global _tagprog
- if _tagprog is None:
- import re
- _tagprog = re.compile('^(.*)#([^#]*)$')
-
- match = _tagprog.match(url)
- if match: return match.group(1, 2)
- return url, None
-
-def splitattr(url):
- """splitattr('/path;attr1=value1;attr2=value2;...') ->
- '/path', ['attr1=value1', 'attr2=value2', ...]."""
- words = url.split(';')
- return words[0], words[1:]
-
-_valueprog = None
-def splitvalue(attr):
- """splitvalue('attr=value') --> 'attr', 'value'."""
- global _valueprog
- if _valueprog is None:
- import re
- _valueprog = re.compile('^([^=]*)=(.*)$')
-
- match = _valueprog.match(attr)
- if match: return match.group(1, 2)
- return attr, None
-
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
-
-def unquote(s):
- """unquote('abc%20def') -> 'abc def'."""
- res = s.split('%')
- for i in range(1, len(res)):
- item = res[i]
- try:
- res[i] = _hextochr[item[:2]] + item[2:]
- except KeyError:
- res[i] = '%' + item
- except UnicodeDecodeError:
- res[i] = chr(int(item[:2], 16)) + item[2:]
- return "".join(res)
-
-def unquote_plus(s):
- """unquote('%7e/abc+def') -> '~/abc def'"""
- s = s.replace('+', ' ')
- return unquote(s)
-
-always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- 'abcdefghijklmnopqrstuvwxyz'
- '0123456789' '_.-')
-_safe_quoters= {}
-
-class Quoter:
- def __init__(self, safe):
- self.cache = {}
- self.safe = safe + always_safe
-
- def __call__(self, c):
- try:
- return self.cache[c]
- except KeyError:
- if ord(c) < 256:
- res = (c in self.safe) and c or ('%%%02X' % ord(c))
- self.cache[c] = res
- return res
- else:
- return "".join(['%%%02X' % i for i in c.encode("utf-8")])
-
-def quote(s, safe = '/'):
- """quote('abc def') -> 'abc%20def'
-
- Each part of a URL, e.g. the path info, the query, etc., has a
- different set of reserved characters that must be quoted.
-
- RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
- the following reserved characters.
-
- reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
- "$" | ","
-
- Each of these characters is reserved in some component of a URL,
- but not necessarily in all of them.
-
- By default, the quote function is intended for quoting the path
- section of a URL. Thus, it will not encode '/'. This character
- is reserved, but in typical usage the quote function is being
- called on a path where the existing slash characters are used as
- reserved characters.
- """
- cachekey = (safe, always_safe)
- try:
- quoter = _safe_quoters[cachekey]
- except KeyError:
- quoter = Quoter(safe)
- _safe_quoters[cachekey] = quoter
- res = map(quoter, s)
- return ''.join(res)
-
-def quote_plus(s, safe = ''):
- """Quote the query fragment of a URL; replacing ' ' with '+'"""
- if ' ' in s:
- s = quote(s, safe + ' ')
- return s.replace(' ', '+')
- return quote(s, safe)
-
-def urlencode(query,doseq=0):
- """Encode a sequence of two-element tuples or dictionary into a URL query string.
-
- If any values in the query arg are sequences and doseq is true, each
- sequence element is converted to a separate parameter.
-
- If the query arg is a sequence of two-element tuples, the order of the
- parameters in the output will match the order of parameters in the
- input.
- """
-
- if hasattr(query,"items"):
- # mapping objects
- query = query.items()
- else:
- # it's a bother at times that strings and string-like objects are
- # sequences...
- try:
- # non-sequence items should not work with len()
- # non-empty strings will fail this
- if len(query) and not isinstance(query[0], tuple):
- raise TypeError
- # zero-length sequences of all types will get here and succeed,
- # but that's a minor nit - since the original implementation
- # allowed empty dicts that type of behavior probably should be
- # preserved for consistency
- except TypeError:
- ty,va,tb = sys.exc_info()
- raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
-
- l = []
- if not doseq:
- # preserve old behavior
- for k, v in query:
- k = quote_plus(str(k))
- v = quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- for k, v in query:
- k = quote_plus(str(k))
- if isinstance(v, str):
- v = quote_plus(v)
- l.append(k + '=' + v)
- elif isinstance(v, str):
- # is there a reasonable way to convert to ASCII?
- # encode generates a string, but "replace" or "ignore"
- # lose information and "strict" can raise UnicodeError
- v = quote_plus(v.encode("ASCII","replace"))
- l.append(k + '=' + v)
- else:
- try:
- # is this a sufficient test for sequence-ness?
- x = len(v)
- except TypeError:
- # not a sequence
- v = quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- # loop over the sequence
- for elt in v:
- l.append(k + '=' + quote_plus(str(elt)))
- return '&'.join(l)
-
-# Proxy handling
-def getproxies_environment():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Scan the environment for variables named <scheme>_proxy;
- this seems to be the standard convention. If you need a
- different way, you can pass a proxies dictionary to the
- [Fancy]URLopener constructor.
-
- """
- proxies = {}
- for name, value in os.environ.items():
- name = name.lower()
- if name == 'no_proxy':
- # handled in proxy_bypass_environment
- continue
- if value and name[-6:] == '_proxy':
- proxies[name[:-6]] = value
- return proxies
-
-def proxy_bypass_environment(host):
- """Test if proxies should not be used for a particular host.
-
- Checks the environment for a variable named no_proxy, which should
- be a list of DNS suffixes separated by commas, or '*' for all hosts.
- """
- no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
- # '*' is special case for always bypass
- if no_proxy == '*':
- return 1
- # strip port off host
- hostonly, port = splitport(host)
- # check if the host ends with any of the DNS suffixes
- for name in no_proxy.split(','):
- if name and (hostonly.endswith(name) or host.endswith(name)):
- return 1
- # otherwise, don't bypass
- return 0
-
-
-if sys.platform == 'darwin':
-
- def _CFSetup(sc):
- from ctypes import c_int32, c_void_p, c_char_p, c_int
- sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
- sc.CFStringCreateWithCString.restype = c_void_p
- sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
- sc.SCDynamicStoreCopyProxies.restype = c_void_p
- sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
- sc.CFDictionaryGetValue.restype = c_void_p
- sc.CFStringGetLength.argtypes = [ c_void_p ]
- sc.CFStringGetLength.restype = c_int32
- sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
- sc.CFStringGetCString.restype = c_int32
- sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
- sc.CFNumberGetValue.restype = c_int32
- sc.CFRelease.argtypes = [ c_void_p ]
- sc.CFRelease.restype = None
-
- def _CStringFromCFString(sc, value):
- from ctypes import create_string_buffer
- length = sc.CFStringGetLength(value) + 1
- buff = create_string_buffer(length)
- sc.CFStringGetCString(value, buff, length, 0)
- return buff.value
-
- def _CFNumberToInt32(sc, cfnum):
- from ctypes import byref, c_int
- val = c_int()
- kCFNumberSInt32Type = 3
- sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
- return val.value
-
-
- def proxy_bypass_macosx_sysconf(host):
- """
- Return True iff this host shouldn't be accessed using a proxy
-
- This function uses the MacOSX framework SystemConfiguration
- to fetch the proxy information.
- """
- from ctypes import cdll
- from ctypes.util import find_library
- import re
- import socket
- from fnmatch import fnmatch
-
- def ip2num(ipAddr):
- parts = ipAddr.split('.')
- parts = map(int, parts)
- if len(parts) != 4:
- parts = (parts + [0, 0, 0, 0])[:4]
- return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
-
- sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
- _CFSetup(sc)
-
- hostIP = None
-
- if not sc:
- return False
-
- kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
- kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
- "ExcludeSimpleHostnames", 0)
-
-
- proxyDict = sc.SCDynamicStoreCopyProxies(None)
- if proxyDict is None:
- return False
-
- try:
- # Check for simple host names:
- if '.' not in host:
- exclude_simple = sc.CFDictionaryGetValue(proxyDict,
- kSCPropNetProxiesExcludeSimpleHostnames)
- if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
- return True
-
-
- # Check the exceptions list:
- exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
- if exceptions:
- # Items in the list are strings like these: *.local, 169.254/16
- for index in xrange(sc.CFArrayGetCount(exceptions)):
- value = sc.CFArrayGetValueAtIndex(exceptions, index)
- if not value: continue
- value = _CStringFromCFString(sc, value)
-
- m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
- if m is not None:
- if hostIP is None:
- hostIP = socket.gethostbyname(host)
- hostIP = ip2num(hostIP)
-
- base = ip2num(m.group(1))
- mask = int(m.group(2)[1:])
- mask = 32 - mask
-
- if (hostIP >> mask) == (base >> mask):
- return True
-
- elif fnmatch(host, value):
- return True
-
- return False
-
- finally:
- sc.CFRelease(kSCPropNetProxiesExceptionsList)
- sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
-
-
-
- def getproxies_macosx_sysconf():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- This function uses the MacOSX framework SystemConfiguration
- to fetch the proxy information.
- """
- from ctypes import cdll
- from ctypes.util import find_library
-
- sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
- _CFSetup(sc)
-
- if not sc:
- return {}
-
- kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, b"HTTPEnable", 0)
- kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, b"HTTPProxy", 0)
- kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, b"HTTPPort", 0)
-
- kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, b"HTTPSEnable", 0)
- kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, b"HTTPSProxy", 0)
- kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, b"HTTPSPort", 0)
-
- kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, b"FTPEnable", 0)
- kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, b"FTPPassive", 0)
- kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, b"FTPPort", 0)
- kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, b"FTPProxy", 0)
-
- kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, b"GopherEnable", 0)
- kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, b"GopherPort", 0)
- kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, b"GopherProxy", 0)
-
- proxies = {}
- proxyDict = sc.SCDynamicStoreCopyProxies(None)
-
- try:
- # HTTP:
- enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
- if enabled and _CFNumberToInt32(sc, enabled):
- proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
- port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
-
- if proxy:
- proxy = _CStringFromCFString(sc, proxy)
- if port:
- port = _CFNumberToInt32(sc, port)
- proxies["http"] = "http://%s:%i" % (proxy, port)
- else:
- proxies["http"] = "http://%s" % (proxy, )
-
- # HTTPS:
- enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
- if enabled and _CFNumberToInt32(sc, enabled):
- proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
- port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
-
- if proxy:
- proxy = _CStringFromCFString(sc, proxy)
- if port:
- port = _CFNumberToInt32(sc, port)
- proxies["https"] = "http://%s:%i" % (proxy, port)
- else:
- proxies["https"] = "http://%s" % (proxy, )
-
- # FTP:
- enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
- if enabled and _CFNumberToInt32(sc, enabled):
- proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
- port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
-
- if proxy:
- proxy = _CStringFromCFString(sc, proxy)
- if port:
- port = _CFNumberToInt32(sc, port)
- proxies["ftp"] = "http://%s:%i" % (proxy, port)
- else:
- proxies["ftp"] = "http://%s" % (proxy, )
-
- # Gopher:
- enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
- if enabled and _CFNumberToInt32(sc, enabled):
- proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
- port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
-
- if proxy:
- proxy = _CStringFromCFString(sc, proxy)
- if port:
- port = _CFNumberToInt32(sc, port)
- proxies["gopher"] = "http://%s:%i" % (proxy, port)
- else:
- proxies["gopher"] = "http://%s" % (proxy, )
- finally:
- sc.CFRelease(proxyDict)
-
- sc.CFRelease(kSCPropNetProxiesHTTPEnable)
- sc.CFRelease(kSCPropNetProxiesHTTPProxy)
- sc.CFRelease(kSCPropNetProxiesHTTPPort)
- sc.CFRelease(kSCPropNetProxiesFTPEnable)
- sc.CFRelease(kSCPropNetProxiesFTPPassive)
- sc.CFRelease(kSCPropNetProxiesFTPPort)
- sc.CFRelease(kSCPropNetProxiesFTPProxy)
- sc.CFRelease(kSCPropNetProxiesGopherEnable)
- sc.CFRelease(kSCPropNetProxiesGopherPort)
- sc.CFRelease(kSCPropNetProxiesGopherProxy)
-
- return proxies
-
-
-
- def proxy_bypass(host):
- if getproxies_environment():
- return proxy_bypass_environment(host)
- else:
- return proxy_bypass_macosx_sysconf(host)
-
- def getproxies():
- return getproxies_environment() or getproxies_macosx_sysconf()
-
-elif os.name == 'nt':
- def getproxies_registry():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Win32 uses the registry to store proxies.
-
- """
- proxies = {}
- try:
- import winreg
- except ImportError:
- # Std module, so should be around - but you never know!
- return proxies
- try:
- internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
- r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
- proxyEnable = winreg.QueryValueEx(internetSettings,
- 'ProxyEnable')[0]
- if proxyEnable:
- # Returned as Unicode but problems if not converted to ASCII
- proxyServer = str(winreg.QueryValueEx(internetSettings,
- 'ProxyServer')[0])
- if '=' in proxyServer:
- # Per-protocol settings
- for p in proxyServer.split(';'):
- protocol, address = p.split('=', 1)
- # See if address has a type:// prefix
- import re
- if not re.match('^([^/:]+)://', address):
- address = '%s://%s' % (protocol, address)
- proxies[protocol] = address
- else:
- # Use one setting for all protocols
- if proxyServer[:5] == 'http:':
- proxies['http'] = proxyServer
- else:
- proxies['http'] = 'http://%s' % proxyServer
- proxies['ftp'] = 'ftp://%s' % proxyServer
- internetSettings.Close()
- except (WindowsError, ValueError, TypeError):
- # Either registry key not found etc, or the value in an
- # unexpected format.
- # proxies already set up to be empty so nothing to do
- pass
- return proxies
-
- def getproxies():
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Returns settings gathered from the environment, if specified,
- or the registry.
-
- """
- return getproxies_environment() or getproxies_registry()
-
- def proxy_bypass_registry(host):
- try:
- import winreg
- import re
- except ImportError:
- # Std modules, so should be around - but you never know!
- return 0
- try:
- internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
- r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
- proxyEnable = winreg.QueryValueEx(internetSettings,
- 'ProxyEnable')[0]
- proxyOverride = str(winreg.QueryValueEx(internetSettings,
- 'ProxyOverride')[0])
- # ^^^^ Returned as Unicode but problems if not converted to ASCII
- except WindowsError:
- return 0
- if not proxyEnable or not proxyOverride:
- return 0
- # try to make a host list from name and IP address.
- rawHost, port = splitport(host)
- host = [rawHost]
- try:
- addr = socket.gethostbyname(rawHost)
- if addr != rawHost:
- host.append(addr)
- except socket.error:
- pass
- try:
- fqdn = socket.getfqdn(rawHost)
- if fqdn != rawHost:
- host.append(fqdn)
- except socket.error:
- pass
- # make a check value list from the registry entry: replace the
- # '<local>' string by the localhost entry and the corresponding
- # canonical entry.
- proxyOverride = proxyOverride.split(';')
- i = 0
- while i < len(proxyOverride):
- if proxyOverride[i] == '<local>':
- proxyOverride[i:i+1] = ['localhost',
- '127.0.0.1',
- socket.gethostname(),
- socket.gethostbyname(
- socket.gethostname())]
- i += 1
- # print proxyOverride
- # now check if we match one of the registry values.
- for test in proxyOverride:
- test = test.replace(".", r"\.") # mask dots
- test = test.replace("*", r".*") # change glob sequence
- test = test.replace("?", r".") # change glob char
- for val in host:
- # print "%s <--> %s" %( test, val )
- if re.match(test, val, re.I):
- return 1
- return 0
-
- def proxy_bypass(host):
- """Return a dictionary of scheme -> proxy server URL mappings.
-
- Returns settings gathered from the environment, if specified,
- or the registry.
-
- """
- if getproxies_environment():
- return proxy_bypass_environment(host)
- else:
- return proxy_bypass_registry(host)
-
-else:
- # By default use environment variables
- getproxies = getproxies_environment
- proxy_bypass = proxy_bypass_environment
-
-# Test and time quote() and unquote()
-def test1():
- s = ''
- for i in range(256): s = s + chr(i)
- s = s*4
- t0 = time.time()
- qs = quote(s)
- uqs = unquote(qs)
- t1 = time.time()
- if uqs != s:
- print('Wrong!')
- print(repr(s))
- print(repr(qs))
- print(repr(uqs))
- print(round(t1 - t0, 3), 'sec')
-
-
-def reporthook(blocknum, blocksize, totalsize):
- # Report during remote transfers
- print("Block number: %d, Block size: %d, Total size: %d" % (
- blocknum, blocksize, totalsize))
-
-# Test program
-def test(args=[]):
- if not args:
- args = [
- '/etc/passwd',
- 'file:/etc/passwd',
- 'file://localhost/etc/passwd',
- 'ftp://ftp.gnu.org/pub/README',
- 'http://www.python.org/index.html',
- ]
- if hasattr(URLopener, "open_https"):
- args.append('https://synergy.as.cmu.edu/~geek/')
- try:
- for url in args:
- print('-'*10, url, '-'*10)
- fn, h = urlretrieve(url, None, reporthook)
- print(fn)
- if h:
- print('======')
- for k in h.keys(): print(k + ':', h[k])
- print('======')
- fp = open(fn, 'rb')
- data = fp.read()
- del fp
- data = data.replace("\r", "")
- print(data)
- fn, h = None, None
- print('-'*40)
- finally:
- urlcleanup()
-
-def main():
- import getopt, sys
- try:
- opts, args = getopt.getopt(sys.argv[1:], "th")
- except getopt.error as msg:
- print(msg)
- print("Use -h for help")
- return
- t = 0
- for o, a in opts:
- if o == '-t':
- t = t + 1
- if o == '-h':
- print("Usage: python urllib.py [-t] [url ...]")
- print("-t runs self-test;", end=' ')
- print("otherwise, contents of urls are printed")
- return
- if t:
- if t > 1:
- test1()
- test(args)
- else:
- if not args:
- print("Use -h for help")
- for url in args:
- print(urlopen(url).read(), end=' ')
-
-# Run test program when run as a script
-if __name__ == '__main__':
- main()
--- /dev/null
+"""Exception classes raised by urllib.
+
+The base exception class is URLError, which inherits from IOError. It
+doesn't define any behavior of its own, but is the base class for all
+exceptions defined in this package.
+
+HTTPError is an exception class that is also a valid HTTP response
+instance. It behaves this way because HTTP protocol errors are valid
+responses, with a status code, headers, and a body. In some contexts,
+an application may want to handle an exception like a regular
+response.
+"""
+
+import urllib.response
+
+# do these error classes make sense?
+# make sure all of the IOError stuff is overridden. we just want to be
+# subtypes.
+
+class URLError(IOError):
+ # URLError is a sub-type of IOError, but it doesn't share any of
+ # the implementation. need to override __init__ and __str__.
+ # It sets self.args for compatibility with other EnvironmentError
+ # subclasses, but args doesn't have the typical format with errno in
+ # slot 0 and strerror in slot 1. This may be better than nothing.
+ def __init__(self, reason, filename=None):
+ self.args = reason,
+ self.reason = reason
+ if filename is not None:
+ self.filename = filename
+
+ def __str__(self):
+ return '<urlopen error %s>' % self.reason
+
+class HTTPError(URLError, urllib.response.addinfourl):
+ """Raised when HTTP error occurs, but also acts like non-error return"""
+ __super_init = urllib.response.addinfourl.__init__
+
+ def __init__(self, url, code, msg, hdrs, fp):
+ self.code = code
+ self.msg = msg
+ self.hdrs = hdrs
+ self.fp = fp
+ self.filename = url
+ # The addinfourl classes depend on fp being a valid file
+ # object. In some cases, the HTTPError may not have a valid
+ # file object. If this happens, the simplest workaround is to
+ # not initialize the base classes.
+ if fp is not None:
+ self.__super_init(fp, hdrs, url, code)
+
+ def __str__(self):
+ return 'HTTP Error %s: %s' % (self.code, self.msg)
+
+# exception raised when downloaded size does not match content-length
+class ContentTooShortError(URLError):
+ def __init__(self, message, content):
+ URLError.__init__(self, message)
+ self.content = content
return url, ''
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
+def unquote(s):
+ """unquote('abc%20def') -> 'abc def'."""
+ res = s.split('%')
+ for i in range(1, len(res)):
+ item = res[i]
+ try:
+ res[i] = _hextochr[item[:2]] + item[2:]
+ except KeyError:
+ res[i] = '%' + item
+ except UnicodeDecodeError:
+ res[i] = chr(int(item[:2], 16)) + item[2:]
+ return "".join(res)
+
+def unquote_plus(s):
+ """unquote('%7e/abc+def') -> '~/abc def'"""
+ s = s.replace('+', ' ')
+ return unquote(s)
+
+always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '_.-')
+_safe_quoters= {}
+
+class Quoter:
+ def __init__(self, safe):
+ self.cache = {}
+ self.safe = safe + always_safe
+
+ def __call__(self, c):
+ try:
+ return self.cache[c]
+ except KeyError:
+ if ord(c) < 256:
+ res = (c in self.safe) and c or ('%%%02X' % ord(c))
+ self.cache[c] = res
+ return res
+ else:
+ return "".join(['%%%02X' % i for i in c.encode("utf-8")])
+
+def quote(s, safe = '/'):
+ """quote('abc def') -> 'abc%20def'
+
+ Each part of a URL, e.g. the path info, the query, etc., has a
+ different set of reserved characters that must be quoted.
+
+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
+ the following reserved characters.
+
+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+ "$" | ","
+
+ Each of these characters is reserved in some component of a URL,
+ but not necessarily in all of them.
+
+ By default, the quote function is intended for quoting the path
+ section of a URL. Thus, it will not encode '/'. This character
+ is reserved, but in typical usage the quote function is being
+ called on a path where the existing slash characters are used as
+ reserved characters.
+ """
+ cachekey = (safe, always_safe)
+ try:
+ quoter = _safe_quoters[cachekey]
+ except KeyError:
+ quoter = Quoter(safe)
+ _safe_quoters[cachekey] = quoter
+ res = map(quoter, s)
+ return ''.join(res)
+
+def quote_plus(s, safe = ''):
+ """Quote the query fragment of a URL; replacing ' ' with '+'"""
+ if ' ' in s:
+ s = quote(s, safe + ' ')
+ return s.replace(' ', '+')
+ return quote(s, safe)
+
+def urlencode(query,doseq=0):
+ """Encode a sequence of two-element tuples or dictionary into a URL query string.
+
+ If any values in the query arg are sequences and doseq is true, each
+ sequence element is converted to a separate parameter.
+
+ If the query arg is a sequence of two-element tuples, the order of the
+ parameters in the output will match the order of parameters in the
+ input.
+ """
+
+ if hasattr(query,"items"):
+ # mapping objects
+ query = query.items()
+ else:
+ # it's a bother at times that strings and string-like objects are
+ # sequences...
+ try:
+ # non-sequence items should not work with len()
+ # non-empty strings will fail this
+ if len(query) and not isinstance(query[0], tuple):
+ raise TypeError
+ # zero-length sequences of all types will get here and succeed,
+ # but that's a minor nit - since the original implementation
+ # allowed empty dicts that type of behavior probably should be
+ # preserved for consistency
+ except TypeError:
+ ty,va,tb = sys.exc_info()
+ raise TypeError("not a valid non-string sequence or mapping object").with_traceback(tb)
+
+ l = []
+ if not doseq:
+ # preserve old behavior
+ for k, v in query:
+ k = quote_plus(str(k))
+ v = quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ for k, v in query:
+ k = quote_plus(str(k))
+ if isinstance(v, str):
+ v = quote_plus(v)
+ l.append(k + '=' + v)
+ elif isinstance(v, str):
+ # is there a reasonable way to convert to ASCII?
+ # encode generates a string, but "replace" or "ignore"
+ # lose information and "strict" can raise UnicodeError
+ v = quote_plus(v.encode("ASCII","replace"))
+ l.append(k + '=' + v)
+ else:
+ try:
+ # is this a sufficient test for sequence-ness?
+ x = len(v)
+ except TypeError:
+ # not a sequence
+ v = quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ # loop over the sequence
+ for elt in v:
+ l.append(k + '=' + quote_plus(str(elt)))
+ return '&'.join(l)
+
+# Utilities to parse URLs (most of these return None for missing parts):
+# unwrap('<URL:type://host/path>') --> 'type://host/path'
+# splittype('type:opaquestring') --> 'type', 'opaquestring'
+# splithost('//host[:port]/path') --> 'host[:port]', '/path'
+# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
+# splitpasswd('user:passwd') -> 'user', 'passwd'
+# splitport('host:port') --> 'host', 'port'
+# splitquery('/path?query') --> '/path', 'query'
+# splittag('/path#tag') --> '/path', 'tag'
+# splitattr('/path;attr1=value1;attr2=value2;...') ->
+# '/path', ['attr1=value1', 'attr2=value2', ...]
+# splitvalue('attr=value') --> 'attr', 'value'
+# urllib.parse.unquote('abc%20def') -> 'abc def'
+# quote('abc def') -> 'abc%20def')
+
+def toBytes(url):
+ """toBytes(u"URL") --> 'URL'."""
+ # Most URL schemes require ASCII. If that changes, the conversion
+ # can be relaxed.
+ # XXX get rid of toBytes()
+ if isinstance(url, str):
+ try:
+ url = url.encode("ASCII").decode()
+ except UnicodeError:
+ raise UnicodeError("URL " + repr(url) +
+ " contains non-ASCII characters")
+ return url
+
+def unwrap(url):
+ """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
+ url = str(url).strip()
+ if url[:1] == '<' and url[-1:] == '>':
+ url = url[1:-1].strip()
+ if url[:4] == 'URL:': url = url[4:].strip()
+ return url
+
+_typeprog = None
+def splittype(url):
+ """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+ global _typeprog
+ if _typeprog is None:
+ import re
+ _typeprog = re.compile('^([^/:]+):')
+
+ match = _typeprog.match(url)
+ if match:
+ scheme = match.group(1)
+ return scheme.lower(), url[len(scheme) + 1:]
+ return None, url
+
+_hostprog = None
+def splithost(url):
+ """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+ global _hostprog
+ if _hostprog is None:
+ import re
+ _hostprog = re.compile('^//([^/?]*)(.*)$')
+
+ match = _hostprog.match(url)
+ if match: return match.group(1, 2)
+ return None, url
+
+_userprog = None
+def splituser(host):
+ """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+ global _userprog
+ if _userprog is None:
+ import re
+ _userprog = re.compile('^(.*)@(.*)$')
+
+ match = _userprog.match(host)
+ if match: return map(unquote, match.group(1, 2))
+ return None, host
+
+_passwdprog = None
+def splitpasswd(user):
+ """splitpasswd('user:passwd') -> 'user', 'passwd'."""
+ global _passwdprog
+ if _passwdprog is None:
+ import re
+ _passwdprog = re.compile('^([^:]*):(.*)$')
+
+ match = _passwdprog.match(user)
+ if match: return match.group(1, 2)
+ return user, None
+
+# splittag('/path#tag') --> '/path', 'tag'
+_portprog = None
+def splitport(host):
+ """splitport('host:port') --> 'host', 'port'."""
+ global _portprog
+ if _portprog is None:
+ import re
+ _portprog = re.compile('^(.*):([0-9]+)$')
+
+ match = _portprog.match(host)
+ if match: return match.group(1, 2)
+ return host, None
+
+_nportprog = None
+def splitnport(host, defport=-1):
+ """Split host and port, returning numeric port.
+ Return given default port if no ':' found; defaults to -1.
+ Return numerical port if a valid number are found after ':'.
+ Return None if ':' but not a valid number."""
+ global _nportprog
+ if _nportprog is None:
+ import re
+ _nportprog = re.compile('^(.*):(.*)$')
+
+ match = _nportprog.match(host)
+ if match:
+ host, port = match.group(1, 2)
+ try:
+ if not port: raise ValueError("no digits")
+ nport = int(port)
+ except ValueError:
+ nport = None
+ return host, nport
+ return host, defport
+
+_queryprog = None
+def splitquery(url):
+ """splitquery('/path?query') --> '/path', 'query'."""
+ global _queryprog
+ if _queryprog is None:
+ import re
+ _queryprog = re.compile('^(.*)\?([^?]*)$')
+
+ match = _queryprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+_tagprog = None
+def splittag(url):
+ """splittag('/path#tag') --> '/path', 'tag'."""
+ global _tagprog
+ if _tagprog is None:
+ import re
+ _tagprog = re.compile('^(.*)#([^#]*)$')
+
+ match = _tagprog.match(url)
+ if match: return match.group(1, 2)
+ return url, None
+
+def splitattr(url):
+ """splitattr('/path;attr1=value1;attr2=value2;...') ->
+ '/path', ['attr1=value1', 'attr2=value2', ...]."""
+ words = url.split(';')
+ return words[0], words[1:]
+
+_valueprog = None
+def splitvalue(attr):
+ """splitvalue('attr=value') --> 'attr', 'value'."""
+ global _valueprog
+ if _valueprog is None:
+ import re
+ _valueprog = re.compile('^([^=]*)=(.*)$')
+
+ match = _valueprog.match(attr)
+ if match: return match.group(1, 2)
+ return attr, None
+
test_input = """
http://a/b/c/d
--- /dev/null
+# Issues in merging urllib and urllib2:
+# 1. They both define a function named urlopen()
+
+"""An extensible library for opening URLs using a variety of protocols
+
+The simplest way to use this module is to call the urlopen function,
+which accepts a string containing a URL or a Request object (described
+below). It opens the URL and returns the results as file-like
+object; the returned object has some extra methods described below.
+
+The OpenerDirector manages a collection of Handler objects that do
+all the actual work. Each Handler implements a particular protocol or
+option. The OpenerDirector is a composite object that invokes the
+Handlers needed to open the requested URL. For example, the
+HTTPHandler performs HTTP GET and POST requests and deals with
+non-error returns. The HTTPRedirectHandler automatically deals with
+HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
+deals with digest authentication.
+
+urlopen(url, data=None) -- Basic usage is the same as original
+urllib. pass the url and optionally data to post to an HTTP URL, and
+get a file-like object back. One difference is that you can also pass
+a Request instance instead of URL. Raises a URLError (subclass of
+IOError); for HTTP errors, raises an HTTPError, which can also be
+treated as a valid response.
+
+build_opener -- Function that creates a new OpenerDirector instance.
+Will install the default handlers. Accepts one or more Handlers as
+arguments, either instances or Handler classes that it will
+instantiate. If one of the argument is a subclass of the default
+handler, the argument will be installed instead of the default.
+
+install_opener -- Installs a new opener as the default opener.
+
+objects of interest:
+OpenerDirector --
+
+Request -- An object that encapsulates the state of a request. The
+state can be as simple as the URL. It can also include extra HTTP
+headers, e.g. a User-Agent.
+
+BaseHandler --
+
+internals:
+BaseHandler and parent
+_call_chain conventions
+
+Example usage:
+
+import urllib2
+
+# set up authentication info
+authinfo = urllib2.HTTPBasicAuthHandler()
+authinfo.add_password(realm='PDQ Application',
+ uri='https://mahler:8092/site-updates.py',
+ user='klem',
+ passwd='geheim$parole')
+
+proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
+
+# build a new opener that adds authentication and caching FTP handlers
+opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
+
+# install it
+urllib2.install_opener(opener)
+
+f = urllib2.urlopen('http://www.python.org/')
+"""
+
+# XXX issues:
+# If an authentication error handler that tries to perform
+# authentication for some reason but fails, how should the error be
+# signalled? The client needs to know the HTTP error code. But if
+# the handler knows that the problem was, e.g., that it didn't know
+# that hash algo that requested in the challenge, it would be good to
+# pass that information along to the client, too.
+# ftp errors aren't handled cleanly
+# check digest against correct (i.e. non-apache) implementation
+
+# Possible extensions:
+# complex proxies XXX not sure what exactly was meant by this
+# abstract factory for opener
+
+import base64
+import email
+import hashlib
+import http.client
+import io
+import os
+import posixpath
+import random
+import re
+import socket
+import sys
+import time
+import urllib.parse, urllib.error, urllib.response
+import bisect
+
+from io import StringIO
+
+# check for SSL
+try:
+ import ssl
+except:
+ _have_ssl = False
+else:
+ _have_ssl = True
+assert _have_ssl
+
+# used in User-Agent header sent
+__version__ = sys.version[:3]
+
+_opener = None
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ global _opener
+ if _opener is None:
+ _opener = build_opener()
+ return _opener.open(url, data, timeout)
+
+def install_opener(opener):
+ global _opener
+ _opener = opener
+
+# TODO(jhylton): Make this work with the same global opener.
+_urlopener = None
+def urlretrieve(url, filename=None, reporthook=None, data=None):
+ global _urlopener
+ if not _urlopener:
+ _urlopener = FancyURLopener()
+ return _urlopener.retrieve(url, filename, reporthook, data)
+
+def urlcleanup():
+ if _urlopener:
+ _urlopener.cleanup()
+ global _opener
+ if _opener:
+ _opener = None
+
+# copied from cookielib.py
+_cut_port_re = re.compile(r":\d+$")
+def request_host(request):
+ """Return request-host, as defined by RFC 2965.
+
+ Variation from RFC: returned value is lowercased, for convenient
+ comparison.
+
+ """
+ url = request.get_full_url()
+ host = urllib.parse.urlparse(url)[1]
+ if host == "":
+ host = request.get_header("Host", "")
+
+ # remove port, if present
+ host = _cut_port_re.sub("", host, 1)
+ return host.lower()
+
+class Request:
+
+ def __init__(self, url, data=None, headers={},
+ origin_req_host=None, unverifiable=False):
+ # unwrap('<URL:type://host/path>') --> 'type://host/path'
+ self.__original = urllib.parse.unwrap(url)
+ self.type = None
+ # self.__r_type is what's left after doing the splittype
+ self.host = None
+ self.port = None
+ self.data = data
+ self.headers = {}
+ for key, value in headers.items():
+ self.add_header(key, value)
+ self.unredirected_hdrs = {}
+ if origin_req_host is None:
+ origin_req_host = request_host(self)
+ self.origin_req_host = origin_req_host
+ self.unverifiable = unverifiable
+
+ def __getattr__(self, attr):
+ # XXX this is a fallback mechanism to guard against these
+ # methods getting called in a non-standard order. this may be
+ # too complicated and/or unnecessary.
+ # XXX should the __r_XXX attributes be public?
+ if attr[:12] == '_Request__r_':
+ name = attr[12:]
+ if hasattr(Request, 'get_' + name):
+ getattr(self, 'get_' + name)()
+ return getattr(self, attr)
+ raise AttributeError(attr)
+
+ def get_method(self):
+ if self.has_data():
+ return "POST"
+ else:
+ return "GET"
+
+ # XXX these helper methods are lame
+
+ def add_data(self, data):
+ self.data = data
+
+ def has_data(self):
+ return self.data is not None
+
+ def get_data(self):
+ return self.data
+
+ def get_full_url(self):
+ return self.__original
+
+ def get_type(self):
+ if self.type is None:
+ self.type, self.__r_type = urllib.parse.splittype(self.__original)
+ if self.type is None:
+ raise ValueError("unknown url type: %s" % self.__original)
+ return self.type
+
+ def get_host(self):
+ if self.host is None:
+ self.host, self.__r_host = urllib.parse.splithost(self.__r_type)
+ if self.host:
+ self.host = urllib.parse.unquote(self.host)
+ return self.host
+
+ def get_selector(self):
+ return self.__r_host
+
+ def set_proxy(self, host, type):
+ self.host, self.type = host, type
+ self.__r_host = self.__original
+
+ def get_origin_req_host(self):
+ return self.origin_req_host
+
+ def is_unverifiable(self):
+ return self.unverifiable
+
+ def add_header(self, key, val):
+ # useful for something like authentication
+ self.headers[key.capitalize()] = val
+
+ def add_unredirected_header(self, key, val):
+ # will not be added to a redirected request
+ self.unredirected_hdrs[key.capitalize()] = val
+
+ def has_header(self, header_name):
+ return (header_name in self.headers or
+ header_name in self.unredirected_hdrs)
+
+ def get_header(self, header_name, default=None):
+ return self.headers.get(
+ header_name,
+ self.unredirected_hdrs.get(header_name, default))
+
+ def header_items(self):
+ hdrs = self.unredirected_hdrs.copy()
+ hdrs.update(self.headers)
+ return list(hdrs.items())
+
+class OpenerDirector:
+ def __init__(self):
+ client_version = "Python-urllib/%s" % __version__
+ self.addheaders = [('User-agent', client_version)]
+ # manage the individual handlers
+ self.handlers = []
+ self.handle_open = {}
+ self.handle_error = {}
+ self.process_response = {}
+ self.process_request = {}
+
+ def add_handler(self, handler):
+ if not hasattr(handler, "add_parent"):
+ raise TypeError("expected BaseHandler instance, got %r" %
+ type(handler))
+
+ added = False
+ for meth in dir(handler):
+ if meth in ["redirect_request", "do_open", "proxy_open"]:
+ # oops, coincidental match
+ continue
+
+ i = meth.find("_")
+ protocol = meth[:i]
+ condition = meth[i+1:]
+
+ if condition.startswith("error"):
+ j = condition.find("_") + i + 1
+ kind = meth[j+1:]
+ try:
+ kind = int(kind)
+ except ValueError:
+ pass
+ lookup = self.handle_error.get(protocol, {})
+ self.handle_error[protocol] = lookup
+ elif condition == "open":
+ kind = protocol
+ lookup = self.handle_open
+ elif condition == "response":
+ kind = protocol
+ lookup = self.process_response
+ elif condition == "request":
+ kind = protocol
+ lookup = self.process_request
+ else:
+ continue
+
+ handlers = lookup.setdefault(kind, [])
+ if handlers:
+ bisect.insort(handlers, handler)
+ else:
+ handlers.append(handler)
+ added = True
+
+ if added:
+ # the handlers must work in an specific order, the order
+ # is specified in a Handler attribute
+ bisect.insort(self.handlers, handler)
+ handler.add_parent(self)
+
+ def close(self):
+ # Only exists for backwards compatibility.
+ pass
+
+ def _call_chain(self, chain, kind, meth_name, *args):
+ # Handlers raise an exception if no one else should try to handle
+ # the request, or return None if they can't but another handler
+ # could. Otherwise, they return the response.
+ handlers = chain.get(kind, ())
+ for handler in handlers:
+ func = getattr(handler, meth_name)
+
+ result = func(*args)
+ if result is not None:
+ return result
+
+ def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ # accept a URL or a Request object
+ if isinstance(fullurl, str):
+ req = Request(fullurl, data)
+ else:
+ req = fullurl
+ if data is not None:
+ req.add_data(data)
+
+ req.timeout = timeout
+ protocol = req.get_type()
+
+ # pre-process request
+ meth_name = protocol+"_request"
+ for processor in self.process_request.get(protocol, []):
+ meth = getattr(processor, meth_name)
+ req = meth(req)
+
+ response = self._open(req, data)
+
+ # post-process response
+ meth_name = protocol+"_response"
+ for processor in self.process_response.get(protocol, []):
+ meth = getattr(processor, meth_name)
+ response = meth(req, response)
+
+ return response
+
+ def _open(self, req, data=None):
+ result = self._call_chain(self.handle_open, 'default',
+ 'default_open', req)
+ if result:
+ return result
+
+ protocol = req.get_type()
+ result = self._call_chain(self.handle_open, protocol, protocol +
+ '_open', req)
+ if result:
+ return result
+
+ return self._call_chain(self.handle_open, 'unknown',
+ 'unknown_open', req)
+
+ def error(self, proto, *args):
+ if proto in ('http', 'https'):
+ # XXX http[s] protocols are special-cased
+ dict = self.handle_error['http'] # https is not different than http
+ proto = args[2] # YUCK!
+ meth_name = 'http_error_%s' % proto
+ http_err = 1
+ orig_args = args
+ else:
+ dict = self.handle_error
+ meth_name = proto + '_error'
+ http_err = 0
+ args = (dict, proto, meth_name) + args
+ result = self._call_chain(*args)
+ if result:
+ return result
+
+ if http_err:
+ args = (dict, 'default', 'http_error_default') + orig_args
+ return self._call_chain(*args)
+
+# XXX probably also want an abstract factory that knows when it makes
+# sense to skip a superclass in favor of a subclass and when it might
+# make sense to include both
+
+def build_opener(*handlers):
+ """Create an opener object from a list of handlers.
+
+ The opener will use several default handlers, including support
+ for HTTP and FTP.
+
+ If any of the handlers passed as arguments are subclasses of the
+ default handlers, the default handlers will not be used.
+ """
+ def isclass(obj):
+ return isinstance(obj, type) or hasattr(obj, "__bases__")
+
+ opener = OpenerDirector()
+ default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
+ HTTPDefaultErrorHandler, HTTPRedirectHandler,
+ FTPHandler, FileHandler, HTTPErrorProcessor]
+ if hasattr(http.client, "HTTPSConnection"):
+ default_classes.append(HTTPSHandler)
+ else:
+ import pdb; pdb.set_trace()
+ skip = set()
+ for klass in default_classes:
+ for check in handlers:
+ if isclass(check):
+ if issubclass(check, klass):
+ skip.add(klass)
+ elif isinstance(check, klass):
+ skip.add(klass)
+ for klass in skip:
+ default_classes.remove(klass)
+
+ for klass in default_classes:
+ opener.add_handler(klass())
+
+ for h in handlers:
+ if isclass(h):
+ h = h()
+ opener.add_handler(h)
+ return opener
+
+class BaseHandler:
+ handler_order = 500
+
+ def add_parent(self, parent):
+ self.parent = parent
+
+ def close(self):
+ # Only exists for backwards compatibility
+ pass
+
+ def __lt__(self, other):
+ if not hasattr(other, "handler_order"):
+ # Try to preserve the old behavior of having custom classes
+ # inserted after default ones (works only for custom user
+ # classes which are not aware of handler_order).
+ return True
+ return self.handler_order < other.handler_order
+
+
+class HTTPErrorProcessor(BaseHandler):
+ """Process HTTP error responses."""
+ handler_order = 1000 # after all other processing
+
+ def http_response(self, request, response):
+ code, msg, hdrs = response.code, response.msg, response.info()
+
+ # According to RFC 2616, "2xx" code indicates that the client's
+ # request was successfully received, understood, and accepted.
+ if not (200 <= code < 300):
+ response = self.parent.error(
+ 'http', request, response, code, msg, hdrs)
+
+ return response
+
+ https_response = http_response
+
+class HTTPDefaultErrorHandler(BaseHandler):
+ def http_error_default(self, req, fp, code, msg, hdrs):
+ raise urllib.error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)
+
+class HTTPRedirectHandler(BaseHandler):
+ # maximum number of redirections to any single URL
+ # this is needed because of the state that cookies introduce
+ max_repeats = 4
+ # maximum total number of redirections (regardless of URL) before
+ # assuming we're in a loop
+ max_redirections = 10
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a
+ redirection response is received. If a redirection should
+ take place, return a new Request to allow http_error_30x to
+ perform the redirect. Otherwise, raise HTTPError if no-one
+ else should try to handle this url. Return None if you can't
+ but another Handler might.
+ """
+ m = req.get_method()
+ if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+ or code in (301, 302, 303) and m == "POST")):
+ raise urllib.error.HTTPError(req.get_full_url(),
+ code, msg, headers, fp)
+
+ # Strictly (according to RFC 2616), 301 or 302 in response to
+ # a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib2, in this case). In practice,
+ # essentially all clients do redirect in this case, so we do
+ # the same.
+ # be conciliant with URIs containing a space
+ newurl = newurl.replace(' ', '%20')
+ CONTENT_HEADERS = ("content-length", "content-type")
+ newheaders = dict((k, v) for k, v in req.headers.items()
+ if k.lower() not in CONTENT_HEADERS)
+ return Request(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+
+ # Implementation note: To avoid the server sending us into an
+ # infinite loop, the request object needs to track what URLs we
+ # have already seen. Do this by adding a handler-specific
+ # attribute to the Request object.
+ def http_error_302(self, req, fp, code, msg, headers):
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
+ if "location" in headers:
+ newurl = headers["location"]
+ elif "uri" in headers:
+ newurl = headers["uri"]
+ else:
+ return
+ newurl = urllib.parse.urljoin(req.get_full_url(), newurl)
+
+ # XXX Probably want to forget about the state of the current
+ # request, although that might interact poorly with other
+ # handlers that also use handler-specific request attributes
+ new = self.redirect_request(req, fp, code, msg, headers, newurl)
+ if new is None:
+ return
+
+ # loop detection
+ # .redirect_dict has a key url if url was previously visited.
+ if hasattr(req, 'redirect_dict'):
+ visited = new.redirect_dict = req.redirect_dict
+ if (visited.get(newurl, 0) >= self.max_repeats or
+ len(visited) >= self.max_redirections):
+ raise urllib.error.HTTPError(req.get_full_url(), code,
+ self.inf_msg + msg, headers, fp)
+ else:
+ visited = new.redirect_dict = req.redirect_dict = {}
+ visited[newurl] = visited.get(newurl, 0) + 1
+
+ # Don't close the fp until we are sure that we won't use it
+ # with HTTPError.
+ fp.read()
+ fp.close()
+
+ return self.parent.open(new)
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+ inf_msg = "The HTTP server returned a redirect error that would " \
+ "lead to an infinite loop.\n" \
+ "The last 30x error message was:\n"
+
+
+def _parse_proxy(proxy):
+ """Return (scheme, user, password, host/port) given a URL or an authority.
+
+ If a URL is supplied, it must have an authority (host:port) component.
+ According to RFC 3986, having an authority component means the URL must
+ have two slashes after the scheme:
+
+ >>> _parse_proxy('file:/ftp.example.com/')
+ Traceback (most recent call last):
+ ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+ The first three items of the returned tuple may be None.
+
+ Examples of authority parsing:
+
+ >>> _parse_proxy('proxy.example.com')
+ (None, None, None, 'proxy.example.com')
+ >>> _parse_proxy('proxy.example.com:3128')
+ (None, None, None, 'proxy.example.com:3128')
+
+ The authority component may optionally include userinfo (assumed to be
+ username:password):
+
+ >>> _parse_proxy('joe:password@proxy.example.com')
+ (None, 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('joe:password@proxy.example.com:3128')
+ (None, 'joe', 'password', 'proxy.example.com:3128')
+
+ Same examples, but with URLs instead:
+
+ >>> _parse_proxy('http://proxy.example.com/')
+ ('http', None, None, 'proxy.example.com')
+ >>> _parse_proxy('http://proxy.example.com:3128/')
+ ('http', None, None, 'proxy.example.com:3128')
+ >>> _parse_proxy('http://joe:password@proxy.example.com/')
+ ('http', 'joe', 'password', 'proxy.example.com')
+ >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+ ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+ Everything after the authority is ignored:
+
+ >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+ ('ftp', 'joe', 'password', 'proxy.example.com')
+
+ Test for no trailing '/' case:
+
+ >>> _parse_proxy('http://joe:password@proxy.example.com')
+ ('http', 'joe', 'password', 'proxy.example.com')
+
+ """
+ scheme, r_scheme = urllib.parse.splittype(proxy)
+ if not r_scheme.startswith("/"):
+ # authority
+ scheme = None
+ authority = proxy
+ else:
+ # URL
+ if not r_scheme.startswith("//"):
+ raise ValueError("proxy URL with no authority: %r" % proxy)
+ # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+ # and 3.3.), path is empty or starts with '/'
+ end = r_scheme.find("/", 2)
+ if end == -1:
+ end = None
+ authority = r_scheme[2:end]
+ userinfo, hostport = urllib.parse.splituser(authority)
+ if userinfo is not None:
+ user, password = urllib.parse.splitpasswd(userinfo)
+ else:
+ user = password = None
+ return scheme, user, password, hostport
+
+class ProxyHandler(BaseHandler):
+ # Proxies must be in front
+ handler_order = 100
+
+ def __init__(self, proxies=None):
+ if proxies is None:
+ proxies = getproxies()
+ assert hasattr(proxies, 'keys'), "proxies must be a mapping"
+ self.proxies = proxies
+ for type, url in proxies.items():
+ setattr(self, '%s_open' % type,
+ lambda r, proxy=url, type=type, meth=self.proxy_open: \
+ meth(r, proxy, type))
+
+ def proxy_open(self, req, proxy, type):
+ orig_type = req.get_type()
+ proxy_type, user, password, hostport = _parse_proxy(proxy)
+ if proxy_type is None:
+ proxy_type = orig_type
+ if user and password:
+ user_pass = '%s:%s' % (unquote(user),
+ urllib.parse.unquote(password))
+ creds = base64.b64encode(user_pass.encode()).decode("ascii")
+ req.add_header('Proxy-authorization', 'Basic ' + creds)
+ hostport = urllib.parse.unquote(hostport)
+ req.set_proxy(hostport, proxy_type)
+ if orig_type == proxy_type:
+ # let other handlers take care of it
+ return None
+ else:
+ # need to start over, because the other handlers don't
+ # grok the proxy's URL type
+ # e.g. if we have a constructor arg proxies like so:
+ # {'http': 'ftp://proxy.example.com'}, we may end up turning
+ # a request for http://acme.example.com/a into one for
+ # ftp://proxy.example.com/a
+ return self.parent.open(req)
+
+class HTTPPasswordMgr:
+
+ def __init__(self):
+ self.passwd = {}
+
+ def add_password(self, realm, uri, user, passwd):
+ # uri could be a single URI or a sequence
+ if isinstance(uri, str):
+ uri = [uri]
+ if not realm in self.passwd:
+ self.passwd[realm] = {}
+ for default_port in True, False:
+ reduced_uri = tuple(
+ [self.reduce_uri(u, default_port) for u in uri])
+ self.passwd[realm][reduced_uri] = (user, passwd)
+
+ def find_user_password(self, realm, authuri):
+ domains = self.passwd.get(realm, {})
+ for default_port in True, False:
+ reduced_authuri = self.reduce_uri(authuri, default_port)
+ for uris, authinfo in domains.items():
+ for uri in uris:
+ if self.is_suburi(uri, reduced_authuri):
+ return authinfo
+ return None, None
+
+ def reduce_uri(self, uri, default_port=True):
+ """Accept authority or URI and extract only the authority and path."""
+ # note HTTP URLs do not have a userinfo component
+ parts = urllib.parse.urlsplit(uri)
+ if parts[1]:
+ # URI
+ scheme = parts[0]
+ authority = parts[1]
+ path = parts[2] or '/'
+ else:
+ # host or host:port
+ scheme = None
+ authority = uri
+ path = '/'
+ host, port = urllib.parse.splitport(authority)
+ if default_port and port is None and scheme is not None:
+ dport = {"http": 80,
+ "https": 443,
+ }.get(scheme)
+ if dport is not None:
+ authority = "%s:%d" % (host, dport)
+ return authority, path
+
+ def is_suburi(self, base, test):
+ """Check if test is below base in a URI tree
+
+ Both args must be URIs in reduced form.
+ """
+ if base == test:
+ return True
+ if base[0] != test[0]:
+ return False
+ common = posixpath.commonprefix((base[1], test[1]))
+ if len(common) == len(base[1]):
+ return True
+ return False
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+ def find_user_password(self, realm, authuri):
+ user, password = HTTPPasswordMgr.find_user_password(self, realm,
+ authuri)
+ if user is not None:
+ return user, password
+ return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+ # XXX this allows for multiple auth-schemes, but will stupidly pick
+ # the last one with a realm specified.
+
+ # allow for double- and single-quoted realm values
+ # (single quotes are a violation of the RFC, but appear in the wild)
+ rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
+ 'realm=(["\'])(.*?)\\2', re.I)
+
+ # XXX could pre-emptively send auth info already accepted (RFC 2617,
+ # end of section 2, and section 1.2 immediately after "credentials"
+ # production).
+
+ def __init__(self, password_mgr=None):
+ if password_mgr is None:
+ password_mgr = HTTPPasswordMgr()
+ self.passwd = password_mgr
+ self.add_password = self.passwd.add_password
+
+ def http_error_auth_reqed(self, authreq, host, req, headers):
+ # host may be an authority (without userinfo) or a URL with an
+ # authority
+ # XXX could be multiple headers
+ authreq = headers.get(authreq, None)
+ if authreq:
+ mo = AbstractBasicAuthHandler.rx.search(authreq)
+ if mo:
+ scheme, quote, realm = mo.groups()
+ if scheme.lower() == 'basic':
+ return self.retry_http_basic_auth(host, req, realm)
+
+ def retry_http_basic_auth(self, host, req, realm):
+ user, pw = self.passwd.find_user_password(realm, host)
+ if pw is not None:
+ raw = "%s:%s" % (user, pw)
+ auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
+ if req.headers.get(self.auth_header, None) == auth:
+ return None
+ req.add_header(self.auth_header, auth)
+ return self.parent.open(req)
+ else:
+ return None
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Authorization'
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ url = req.get_full_url()
+ return self.http_error_auth_reqed('www-authenticate',
+ url, req, headers)
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+ auth_header = 'Proxy-authorization'
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ # http_error_auth_reqed requires that there is no userinfo component in
+ # authority. Assume there isn't one, since urllib2 does not (and
+ # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+ # userinfo.
+ authority = req.get_host()
+ return self.http_error_auth_reqed('proxy-authenticate',
+ authority, req, headers)
+
+
+def randombytes(n):
+ """Return n random bytes."""
+ return os.urandom(n)
+
+class AbstractDigestAuthHandler:
+ # Digest authentication is specified in RFC 2617.
+
+ # XXX The client does not inspect the Authentication-Info header
+ # in a successful response.
+
+ # XXX It should be possible to test this implementation against
+ # a mock server that just generates a static set of challenges.
+
+ # XXX qop="auth-int" supports is shaky
+
+ def __init__(self, passwd=None):
+ if passwd is None:
+ passwd = HTTPPasswordMgr()
+ self.passwd = passwd
+ self.add_password = self.passwd.add_password
+ self.retried = 0
+ self.nonce_count = 0
+
+ def reset_retry_count(self):
+ self.retried = 0
+
+ def http_error_auth_reqed(self, auth_header, host, req, headers):
+ authreq = headers.get(auth_header, None)
+ if self.retried > 5:
+ # Don't fail endlessly - if we failed once, we'll probably
+ # fail a second time. Hm. Unless the Password Manager is
+ # prompting for the information. Crap. This isn't great
+ # but it's better than the current 'repeat until recursion
+ # depth exceeded' approach <wink>
+ raise urllib.error.HTTPError(req.get_full_url(), 401,
+ "digest auth failed",
+ headers, None)
+ else:
+ self.retried += 1
+ if authreq:
+ scheme = authreq.split()[0]
+ if scheme.lower() == 'digest':
+ return self.retry_http_digest_auth(req, authreq)
+
+ def retry_http_digest_auth(self, req, auth):
+ token, challenge = auth.split(' ', 1)
+ chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
+ auth = self.get_authorization(req, chal)
+ if auth:
+ auth_val = 'Digest %s' % auth
+ if req.headers.get(self.auth_header, None) == auth_val:
+ return None
+ req.add_unredirected_header(self.auth_header, auth_val)
+ resp = self.parent.open(req)
+ return resp
+
+ def get_cnonce(self, nonce):
+ # The cnonce-value is an opaque
+ # quoted string value provided by the client and used by both client
+ # and server to avoid chosen plaintext attacks, to provide mutual
+ # authentication, and to provide some message integrity protection.
+ # This isn't a fabulous effort, but it's probably Good Enough.
+ s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
+ b = s.encode("ascii") + randombytes(8)
+ dig = hashlib.sha1(b).hexdigest()
+ return dig[:16]
+
+ def get_authorization(self, req, chal):
+ try:
+ realm = chal['realm']
+ nonce = chal['nonce']
+ qop = chal.get('qop')
+ algorithm = chal.get('algorithm', 'MD5')
+ # mod_digest doesn't send an opaque, even though it isn't
+ # supposed to be optional
+ opaque = chal.get('opaque', None)
+ except KeyError:
+ return None
+
+ H, KD = self.get_algorithm_impls(algorithm)
+ if H is None:
+ return None
+
+ user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+ if user is None:
+ return None
+
+ # XXX not implemented yet
+ if req.has_data():
+ entdig = self.get_entity_digest(req.get_data(), chal)
+ else:
+ entdig = None
+
+ A1 = "%s:%s:%s" % (user, realm, pw)
+ A2 = "%s:%s" % (req.get_method(),
+ # XXX selector: what about proxies and full urls
+ req.get_selector())
+ if qop == 'auth':
+ self.nonce_count += 1
+ ncvalue = '%08x' % self.nonce_count
+ cnonce = self.get_cnonce(nonce)
+ noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+ respdig = KD(H(A1), noncebit)
+ elif qop is None:
+ respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+ else:
+ # XXX handle auth-int.
+ raise urllib.error.URLError("qop '%s' is not supported." % qop)
+
+ # XXX should the partial digests be encoded too?
+
+ base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+ 'response="%s"' % (user, realm, nonce, req.get_selector(),
+ respdig)
+ if opaque:
+ base += ', opaque="%s"' % opaque
+ if entdig:
+ base += ', digest="%s"' % entdig
+ base += ', algorithm="%s"' % algorithm
+ if qop:
+ base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+ return base
+
+ def get_algorithm_impls(self, algorithm):
+ # lambdas assume digest modules are imported at the top level
+ if algorithm == 'MD5':
+ H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
+ elif algorithm == 'SHA':
+ H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
+ # XXX MD5-sess
+ KD = lambda s, d: H("%s:%s" % (s, d))
+ return H, KD
+
+ def get_entity_digest(self, data, chal):
+ # XXX not implemented yet
+ return None
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+ """An authentication protocol defined by RFC 2069
+
+ Digest authentication improves on basic authentication because it
+ does not transmit passwords in the clear.
+ """
+
+ auth_header = 'Authorization'
+ handler_order = 490 # before Basic auth
+
+ def http_error_401(self, req, fp, code, msg, headers):
+ host = urllib.parse.urlparse(req.get_full_url())[1]
+ retry = self.http_error_auth_reqed('www-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+ auth_header = 'Proxy-Authorization'
+ handler_order = 490 # before Basic auth
+
+ def http_error_407(self, req, fp, code, msg, headers):
+ host = req.get_host()
+ retry = self.http_error_auth_reqed('proxy-authenticate',
+ host, req, headers)
+ self.reset_retry_count()
+ return retry
+
+class AbstractHTTPHandler(BaseHandler):
+
+ def __init__(self, debuglevel=0):
+ self._debuglevel = debuglevel
+
+ def set_http_debuglevel(self, level):
+ self._debuglevel = level
+
+ def do_request_(self, request):
+ host = request.get_host()
+ if not host:
+ raise urllib.error.URLError('no host given')
+
+ if request.has_data(): # POST
+ data = request.get_data()
+ if not request.has_header('Content-type'):
+ request.add_unredirected_header(
+ 'Content-type',
+ 'application/x-www-form-urlencoded')
+ if not request.has_header('Content-length'):
+ request.add_unredirected_header(
+ 'Content-length', '%d' % len(data))
+
+ scheme, sel = urllib.parse.splittype(request.get_selector())
+ sel_host, sel_path = urllib.parse.splithost(sel)
+ if not request.has_header('Host'):
+ request.add_unredirected_header('Host', sel_host or host)
+ for name, value in self.parent.addheaders:
+ name = name.capitalize()
+ if not request.has_header(name):
+ request.add_unredirected_header(name, value)
+
+ return request
+
+ def do_open(self, http_class, req):
+ """Return an addinfourl object for the request, using http_class.
+
+ http_class must implement the HTTPConnection API from http.client.
+ The addinfourl return value is a file-like object. It also
+ has methods and attributes including:
+ - info(): return a mimetools.Message object for the headers
+ - geturl(): return the original request URL
+ - code: HTTP status code
+ """
+ host = req.get_host()
+ if not host:
+ raise urllib.error.URLError('no host given')
+
+ h = http_class(host, timeout=req.timeout) # will parse host:port
+ headers = dict(req.headers)
+ headers.update(req.unredirected_hdrs)
+
+ # TODO(jhylton): Should this be redesigned to handle
+ # persistent connections?
+
+ # We want to make an HTTP/1.1 request, but the addinfourl
+ # class isn't prepared to deal with a persistent connection.
+ # It will try to read all remaining data from the socket,
+ # which will block while the server waits for the next request.
+ # So make sure the connection gets closed after the (only)
+ # request.
+ headers["Connection"] = "close"
+ headers = dict(
+ (name.title(), val) for name, val in headers.items())
+ try:
+ h.request(req.get_method(), req.get_selector(), req.data, headers)
+ r = h.getresponse()
+ except socket.error as err: # XXX what error?
+ raise urllib.error.URLError(err)
+
+ resp = urllib.response.addinfourl(r.fp, r.msg, req.get_full_url())
+ resp.code = r.status
+ resp.msg = r.reason
+ return resp
+
+
+class HTTPHandler(AbstractHTTPHandler):
+
+ def http_open(self, req):
+ return self.do_open(http.client.HTTPConnection, req)
+
+ http_request = AbstractHTTPHandler.do_request_
+
+if hasattr(http.client, 'HTTPSConnection'):
+ class HTTPSHandler(AbstractHTTPHandler):
+
+ def https_open(self, req):
+ return self.do_open(http.client.HTTPSConnection, req)
+
+ https_request = AbstractHTTPHandler.do_request_
+
+class HTTPCookieProcessor(BaseHandler):
+ def __init__(self, cookiejar=None):
+ import http.cookiejar
+ if cookiejar is None:
+ cookiejar = http.cookiejar.CookieJar()
+ self.cookiejar = cookiejar
+
+ def http_request(self, request):
+ self.cookiejar.add_cookie_header(request)
+ return request
+
+ def http_response(self, request, response):
+ self.cookiejar.extract_cookies(response, request)
+ return response
+
+ https_request = http_request
+ https_response = http_response
+
+class UnknownHandler(BaseHandler):
+ def unknown_open(self, req):
+ type = req.get_type()
+ raise urllib.error.URLError('unknown url type: %s' % type)
+
+def parse_keqv_list(l):
+ """Parse list of key=value strings where keys are not duplicated."""
+ parsed = {}
+ for elt in l:
+ k, v = elt.split('=', 1)
+ if v[0] == '"' and v[-1] == '"':
+ v = v[1:-1]
+ parsed[k] = v
+ return parsed
+
+def parse_http_list(s):
+ """Parse lists as described by RFC 2068 Section 2.
+
+ In particular, parse comma-separated lists where the elements of
+ the list may include quoted-strings. A quoted-string could
+ contain a comma. A non-quoted string could have quotes in the
+ middle. Neither commas nor quotes count if they are escaped.
+ Only double-quotes count, not single-quotes.
+ """
+ res = []
+ part = ''
+
+ escape = quote = False
+ for cur in s:
+ if escape:
+ part += cur
+ escape = False
+ continue
+ if quote:
+ if cur == '\\':
+ escape = True
+ continue
+ elif cur == '"':
+ quote = False
+ part += cur
+ continue
+
+ if cur == ',':
+ res.append(part)
+ part = ''
+ continue
+
+ if cur == '"':
+ quote = True
+
+ part += cur
+
+ # append last part
+ if part:
+ res.append(part)
+
+ return [part.strip() for part in res]
+
+class FileHandler(BaseHandler):
+ # Use local file or FTP depending on form of URL
+ def file_open(self, req):
+ url = req.get_selector()
+ if url[:2] == '//' and url[2:3] != '/':
+ req.type = 'ftp'
+ return self.parent.open(req)
+ else:
+ return self.open_local_file(req)
+
+ # names for the localhost
+ names = None
+ def get_names(self):
+ if FileHandler.names is None:
+ try:
+ FileHandler.names = (socket.gethostbyname('localhost'),
+ socket.gethostbyname(socket.gethostname()))
+ except socket.gaierror:
+ FileHandler.names = (socket.gethostbyname('localhost'),)
+ return FileHandler.names
+
+ # not entirely sure what the rules are here
+ def open_local_file(self, req):
+ import email.utils
+ import mimetypes
+ host = req.get_host()
+ file = req.get_selector()
+ localfile = url2pathname(file)
+ try:
+ stats = os.stat(localfile)
+ size = stats.st_size
+ modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
+ mtype = mimetypes.guess_type(file)[0]
+ headers = email.message_from_string(
+ 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified))
+ if host:
+ host, port = urllib.parse.splitport(host)
+ if not host or \
+ (not port and _safe_gethostbyname(host) in self.get_names()):
+ return urllib.response.addinfourl(open(localfile, 'rb'),
+ headers, 'file:'+file)
+ except OSError as msg:
+ # urllib2 users shouldn't expect OSErrors coming from urlopen()
+ raise urllib.error.URLError(msg)
+ raise urllib.error.URLError('file not on local host')
+
+def _safe_gethostbyname(host):
+ try:
+ return socket.gethostbyname(host)
+ except socket.gaierror:
+ return None
+
+class FTPHandler(BaseHandler):
+ def ftp_open(self, req):
+ import ftplib
+ import mimetypes
+ host = req.get_host()
+ if not host:
+ raise urllib.error.URLError('ftp error: no host given')
+ host, port = urllib.parse.splitport(host)
+ if port is None:
+ port = ftplib.FTP_PORT
+ else:
+ port = int(port)
+
+ # username/password handling
+ user, host = urllib.parse.splituser(host)
+ if user:
+ user, passwd = urllib.parse.splitpasswd(user)
+ else:
+ passwd = None
+ host = urllib.parse.unquote(host)
+ user = urllib.parse.unquote(user or '')
+ passwd = urllib.parse.unquote(passwd or '')
+
+ try:
+ host = socket.gethostbyname(host)
+ except socket.error as msg:
+ raise urllib.error.URLError(msg)
+ path, attrs = urllib.parse.splitattr(req.get_selector())
+ dirs = path.split('/')
+ dirs = list(map(urllib.parse.unquote, dirs))
+ dirs, file = dirs[:-1], dirs[-1]
+ if dirs and not dirs[0]:
+ dirs = dirs[1:]
+ try:
+ fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
+ type = file and 'I' or 'D'
+ for attr in attrs:
+ attr, value = urllib.parse.splitvalue(attr)
+ if attr.lower() == 'type' and \
+ value in ('a', 'A', 'i', 'I', 'd', 'D'):
+ type = value.upper()
+ fp, retrlen = fw.retrfile(file, type)
+ headers = ""
+ mtype = mimetypes.guess_type(req.get_full_url())[0]
+ if mtype:
+ headers += "Content-type: %s\n" % mtype
+ if retrlen is not None and retrlen >= 0:
+ headers += "Content-length: %d\n" % retrlen
+ headers = email.message_from_string(headers)
+ return urllib.response.addinfourl(fp, headers, req.get_full_url())
+ except ftplib.all_errors as msg:
+ exc = urllib.error.URLError('ftp error: %s' % msg)
+ raise exc.with_traceback(sys.exc_info()[2])
+
+ def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+ fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
+ return fw
+
+class CacheFTPHandler(FTPHandler):
+ # XXX would be nice to have pluggable cache strategies
+ # XXX this stuff is definitely not thread safe
+ def __init__(self):
+ self.cache = {}
+ self.timeout = {}
+ self.soonest = 0
+ self.delay = 60
+ self.max_conns = 16
+
+ def setTimeout(self, t):
+ self.delay = t
+
+ def setMaxConns(self, m):
+ self.max_conns = m
+
+ def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+ key = user, host, port, '/'.join(dirs), timeout
+ if key in self.cache:
+ self.timeout[key] = time.time() + self.delay
+ else:
+ self.cache[key] = ftpwrapper(user, passwd, host, port,
+ dirs, timeout)
+ self.timeout[key] = time.time() + self.delay
+ self.check_cache()
+ return self.cache[key]
+
+ def check_cache(self):
+ # first check for old ones
+ t = time.time()
+ if self.soonest <= t:
+ for k, v in list(self.timeout.items()):
+ if v < t:
+ self.cache[k].close()
+ del self.cache[k]
+ del self.timeout[k]
+ self.soonest = min(list(self.timeout.values()))
+
+ # then check the size
+ if len(self.cache) == self.max_conns:
+ for k, v in list(self.timeout.items()):
+ if v == self.soonest:
+ del self.cache[k]
+ del self.timeout[k]
+ break
+ self.soonest = min(list(self.timeout.values()))
+
+# Code move from the old urllib module
+
+MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
+
+# Helper for non-unix systems
+if os.name == 'mac':
+ from macurl2path import url2pathname, pathname2url
+elif os.name == 'nt':
+ from nturl2path import url2pathname, pathname2url
+else:
+ def url2pathname(pathname):
+ """OS-specific conversion from a relative URL of the 'file' scheme
+ to a file system path; not recommended for general use."""
+ return urllib.parse.unquote(pathname)
+
+ def pathname2url(pathname):
+ """OS-specific conversion from a file system path to a relative URL
+ of the 'file' scheme; not recommended for general use."""
+ return urllib.parse.quote(pathname)
+
+# This really consists of two pieces:
+# (1) a class which handles opening of all sorts of URLs
+# (plus assorted utilities etc.)
+# (2) a set of functions for parsing URLs
+# XXX Should these be separated out into different modules?
+
+
+ftpcache = {}
+class URLopener:
+ """Class to open URLs.
+ This is a class rather than just a subroutine because we may need
+ more than one set of global protocol-specific options.
+ Note -- this is a base class for those who don't want the
+ automatic handling of errors type 302 (relocated) and 401
+ (authorization needed)."""
+
+ __tempfiles = None
+
+ version = "Python-urllib/%s" % __version__
+
+ # Constructor
+ def __init__(self, proxies=None, **x509):
+ if proxies is None:
+ proxies = getproxies()
+ assert hasattr(proxies, 'keys'), "proxies must be a mapping"
+ self.proxies = proxies
+ self.key_file = x509.get('key_file')
+ self.cert_file = x509.get('cert_file')
+ self.addheaders = [('User-Agent', self.version)]
+ self.__tempfiles = []
+ self.__unlink = os.unlink # See cleanup()
+ self.tempcache = None
+ # Undocumented feature: if you assign {} to tempcache,
+ # it is used to cache files retrieved with
+ # self.retrieve(). This is not enabled by default
+ # since it does not work for changing documents (and I
+ # haven't got the logic to check expiration headers
+ # yet).
+ self.ftpcache = ftpcache
+ # Undocumented feature: you can use a different
+ # ftp cache by assigning to the .ftpcache member;
+ # in case you want logically independent URL openers
+ # XXX This is not threadsafe. Bah.
+
+ def __del__(self):
+ self.close()
+
+ def close(self):
+ self.cleanup()
+
+ def cleanup(self):
+ # This code sometimes runs when the rest of this module
+ # has already been deleted, so it can't use any globals
+ # or import anything.
+ if self.__tempfiles:
+ for file in self.__tempfiles:
+ try:
+ self.__unlink(file)
+ except OSError:
+ pass
+ del self.__tempfiles[:]
+ if self.tempcache:
+ self.tempcache.clear()
+
+ def addheader(self, *args):
+ """Add a header to be used by the HTTP interface only
+ e.g. u.addheader('Accept', 'sound/basic')"""
+ self.addheaders.append(args)
+
+ # External interface
+ def open(self, fullurl, data=None):
+ """Use URLopener().open(file) instead of open(file, 'r')."""
+ fullurl = urllib.parse.unwrap(urllib.parse.toBytes(fullurl))
+ if self.tempcache and fullurl in self.tempcache:
+ filename, headers = self.tempcache[fullurl]
+ fp = open(filename, 'rb')
+ return urllib.response.addinfourl(fp, headers, fullurl)
+ urltype, url = urllib.parse.splittype(fullurl)
+ if not urltype:
+ urltype = 'file'
+ if urltype in self.proxies:
+ proxy = self.proxies[urltype]
+ urltype, proxyhost = urllib.parse.splittype(proxy)
+ host, selector = urllib.parse.splithost(proxyhost)
+ url = (host, fullurl) # Signal special case to open_*()
+ else:
+ proxy = None
+ name = 'open_' + urltype
+ self.type = urltype
+ name = name.replace('-', '_')
+ if not hasattr(self, name):
+ if proxy:
+ return self.open_unknown_proxy(proxy, fullurl, data)
+ else:
+ return self.open_unknown(fullurl, data)
+ try:
+ if data is None:
+ return getattr(self, name)(url)
+ else:
+ return getattr(self, name)(url, data)
+ except socket.error as msg:
+ raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
+
+ def open_unknown(self, fullurl, data=None):
+ """Overridable interface to open unknown URL type."""
+ type, url = urllib.parse.splittype(fullurl)
+ raise IOError('url error', 'unknown url type', type)
+
+ def open_unknown_proxy(self, proxy, fullurl, data=None):
+ """Overridable interface to open unknown URL type."""
+ type, url = urllib.parse.splittype(fullurl)
+ raise IOError('url error', 'invalid proxy for %s' % type, proxy)
+
+ # External interface
+ def retrieve(self, url, filename=None, reporthook=None, data=None):
+ """retrieve(url) returns (filename, headers) for a local object
+ or (tempfilename, headers) for a remote object."""
+ url = urllib.parse.unwrap(urllib.parse.toBytes(url))
+ if self.tempcache and url in self.tempcache:
+ return self.tempcache[url]
+ type, url1 = urllib.parse.splittype(url)
+ if filename is None and (not type or type == 'file'):
+ try:
+ fp = self.open_local_file(url1)
+ hdrs = fp.info()
+ del fp
+ return url2pathname(urllib.parse.splithost(url1)[1]), hdrs
+ except IOError as msg:
+ pass
+ fp = self.open(url, data)
+ headers = fp.info()
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ import tempfile
+ garbage, path = urllib.parse.splittype(url)
+ garbage, path = urllib.parse.splithost(path or "")
+ path, garbage = urllib.parse.splitquery(path or "")
+ path, garbage = urllib.parse.splitattr(path or "")
+ suffix = os.path.splitext(path)[1]
+ (fd, filename) = tempfile.mkstemp(suffix)
+ self.__tempfiles.append(filename)
+ tfp = os.fdopen(fd, 'wb')
+ result = filename, headers
+ if self.tempcache is not None:
+ self.tempcache[url] = result
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+ if reporthook:
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+ reporthook(blocknum, bs, size)
+ while 1:
+ block = fp.read(bs)
+ if not block:
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, bs, size)
+ fp.close()
+ tfp.close()
+ del fp
+ del tfp
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise urllib.error.ContentTooShortError(
+ "retrieval incomplete: got only %i out of %i bytes"
+ % (read, size), result)
+
+ return result
+
+ # Each method named open_<type> knows how to open that type of URL
+
+ def _open_generic_http(self, connection_factory, url, data):
+ """Make an HTTP connection using connection_class.
+
+ This is an internal method that should be called from
+ open_http() or open_https().
+
+ Arguments:
+ - connection_factory should take a host name and return an
+ HTTPConnection instance.
+ - url is the url to retrieval or a host, relative-path pair.
+ - data is payload for a POST request or None.
+ """
+
+ user_passwd = None
+ proxy_passwd= None
+ if isinstance(url, str):
+ host, selector = urllib.parse.splithost(url)
+ if host:
+ user_passwd, host = urllib.parse.splituser(host)
+ host = urllib.parse.unquote(host)
+ realhost = host
+ else:
+ host, selector = url
+ # check whether the proxy contains authorization information
+ proxy_passwd, host = urllib.parse.splituser(host)
+ # now we proceed with the url we want to obtain
+ urltype, rest = urllib.parse.splittype(selector)
+ url = rest
+ user_passwd = None
+ if urltype.lower() != 'http':
+ realhost = None
+ else:
+ realhost, rest = urllib.parse.splithost(rest)
+ if realhost:
+ user_passwd, realhost = urllib.parse.splituser(realhost)
+ if user_passwd:
+ selector = "%s://%s%s" % (urltype, realhost, rest)
+ if proxy_bypass(realhost):
+ host = realhost
+
+ #print "proxy via http:", host, selector
+ if not host: raise IOError('http error', 'no host given')
+
+ if proxy_passwd:
+ import base64
+ proxy_auth = base64.b64encode(proxy_passwd).strip()
+ else:
+ proxy_auth = None
+
+ if user_passwd:
+ import base64
+ auth = base64.b64encode(user_passwd).strip()
+ else:
+ auth = None
+ http_conn = connection_factory(host)
+ # XXX We should fix urllib so that it works with HTTP/1.1.
+ http_conn._http_vsn = 10
+ http_conn._http_vsn_str = "HTTP/1.0"
+
+ headers = {}
+ if proxy_auth:
+ headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
+ if auth:
+ headers["Authorization"] = "Basic %s" % auth
+ if realhost:
+ headers["Host"] = realhost
+ for header, value in self.addheaders:
+ headers[header] = value
+
+ if data is not None:
+ headers["Content-Type"] = "application/x-www-form-urlencoded"
+ http_conn.request("POST", selector, data, headers)
+ else:
+ http_conn.request("GET", selector, headers=headers)
+
+ try:
+ response = http_conn.getresponse()
+ except http.client.BadStatusLine:
+ # something went wrong with the HTTP status line
+ raise urllib.error.URLError("http protocol error: bad status line")
+
+ # According to RFC 2616, "2xx" code indicates that the client's
+ # request was successfully received, understood, and accepted.
+ if 200 <= response.status < 300:
+ return urllib.response.addinfourl(response.fp, response.msg,
+ "http:" + url,
+ response.status)
+ else:
+ return self.http_error(
+ url, response.fp,
+ response.status, response.reason, response.msg, data)
+
+ def open_http(self, url, data=None):
+ """Use HTTP protocol."""
+ return self._open_generic_http(http.client.HTTPConnection, url, data)
+
+ def http_error(self, url, fp, errcode, errmsg, headers, data=None):
+ """Handle http errors.
+
+ Derived class can override this, or provide specific handlers
+ named http_error_DDD where DDD is the 3-digit error code."""
+ # First check if there's a specific handler for this error
+ name = 'http_error_%d' % errcode
+ if hasattr(self, name):
+ method = getattr(self, name)
+ if data is None:
+ result = method(url, fp, errcode, errmsg, headers)
+ else:
+ result = method(url, fp, errcode, errmsg, headers, data)
+ if result: return result
+ return self.http_error_default(url, fp, errcode, errmsg, headers)
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ """Default error handler: close the connection and raise IOError."""
+ void = fp.read()
+ fp.close()
+ raise urllib.error.HTTPError(url, errcode, errmsg, headers, None)
+
+ if _have_ssl:
+ def _https_connection(self, host):
+ return http.client.HTTPSConnection(host,
+ key_file=self.key_file,
+ cert_file=self.cert_file)
+
+ def open_https(self, url, data=None):
+ """Use HTTPS protocol."""
+ return self._open_generic_http(self._https_connection, url, data)
+
+ def open_file(self, url):
+ """Use local file or FTP depending on form of URL."""
+ if not isinstance(url, str):
+ raise URLError('file error', 'proxy support for file protocol currently not implemented')
+ if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
+ return self.open_ftp(url)
+ else:
+ return self.open_local_file(url)
+
+ def open_local_file(self, url):
+ """Use local file."""
+ import mimetypes, email.utils
+ from io import StringIO
+ host, file = urllib.parse.splithost(url)
+ localname = url2pathname(file)
+ try:
+ stats = os.stat(localname)
+ except OSError as e:
+ raise URLError(e.errno, e.strerror, e.filename)
+ size = stats.st_size
+ modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
+ mtype = mimetypes.guess_type(url)[0]
+ headers = email.message_from_string(
+ 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified))
+ if not host:
+ urlfile = file
+ if file[:1] == '/':
+ urlfile = 'file://' + file
+ return urllib.response.addinfourl(open(localname, 'rb'),
+ headers, urlfile)
+ host, port = urllib.parse.splitport(host)
+ if (not port
+ and socket.gethostbyname(host) in (localhost(), thishost())):
+ urlfile = file
+ if file[:1] == '/':
+ urlfile = 'file://' + file
+ return urllib.response.addinfourl(open(localname, 'rb'),
+ headers, urlfile)
+ raise URLError('local file error', 'not on local host')
+
+ def open_ftp(self, url):
+ """Use FTP protocol."""
+ if not isinstance(url, str):
+ raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
+ import mimetypes
+ from io import StringIO
+ host, path = urllib.parse.splithost(url)
+ if not host: raise URLError('ftp error', 'no host given')
+ host, port = urllib.parse.splitport(host)
+ user, host = urllib.parse.splituser(host)
+ if user: user, passwd = urllib.parse.splitpasswd(user)
+ else: passwd = None
+ host = urllib.parse.unquote(host)
+ user = urllib.parse.unquote(user or '')
+ passwd = urllib.parse.unquote(passwd or '')
+ host = socket.gethostbyname(host)
+ if not port:
+ import ftplib
+ port = ftplib.FTP_PORT
+ else:
+ port = int(port)
+ path, attrs = urllib.parse.splitattr(path)
+ path = urllib.parse.unquote(path)
+ dirs = path.split('/')
+ dirs, file = dirs[:-1], dirs[-1]
+ if dirs and not dirs[0]: dirs = dirs[1:]
+ if dirs and not dirs[0]: dirs[0] = '/'
+ key = user, host, port, '/'.join(dirs)
+ # XXX thread unsafe!
+ if len(self.ftpcache) > MAXFTPCACHE:
+ # Prune the cache, rather arbitrarily
+ for k in self.ftpcache.keys():
+ if k != key:
+ v = self.ftpcache[k]
+ del self.ftpcache[k]
+ v.close()
+ try:
+ if not key in self.ftpcache:
+ self.ftpcache[key] = \
+ ftpwrapper(user, passwd, host, port, dirs)
+ if not file: type = 'D'
+ else: type = 'I'
+ for attr in attrs:
+ attr, value = urllib.parse.splitvalue(attr)
+ if attr.lower() == 'type' and \
+ value in ('a', 'A', 'i', 'I', 'd', 'D'):
+ type = value.upper()
+ (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
+ mtype = mimetypes.guess_type("ftp:" + url)[0]
+ headers = ""
+ if mtype:
+ headers += "Content-Type: %s\n" % mtype
+ if retrlen is not None and retrlen >= 0:
+ headers += "Content-Length: %d\n" % retrlen
+ headers = email.message_from_string(headers)
+ return urllib.response.addinfourl(fp, headers, "ftp:" + url)
+ except ftperrors() as msg:
+ raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
+
+ def open_data(self, url, data=None):
+ """Use "data" URL."""
+ if not isinstance(url, str):
+ raise URLError('data error', 'proxy support for data protocol currently not implemented')
+ # ignore POSTed data
+ #
+ # syntax of data URLs:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ try:
+ [type, data] = url.split(',', 1)
+ except ValueError:
+ raise IOError('data error', 'bad data URL')
+ if not type:
+ type = 'text/plain;charset=US-ASCII'
+ semi = type.rfind(';')
+ if semi >= 0 and '=' not in type[semi:]:
+ encoding = type[semi+1:]
+ type = type[:semi]
+ else:
+ encoding = ''
+ msg = []
+ msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
+ time.gmtime(time.time())))
+ msg.append('Content-type: %s' % type)
+ if encoding == 'base64':
+ import base64
+ data = base64.decodestring(data)
+ else:
+ data = urllib.parse.unquote(data)
+ msg.append('Content-Length: %d' % len(data))
+ msg.append('')
+ msg.append(data)
+ msg = '\n'.join(msg)
+ headers = mimetools.message_from_string(msg)
+ #f.fileno = None # needed for addinfourl
+ return urllib.response.addinfourl(f, headers, url)
+
+
+class FancyURLopener(URLopener):
+ """Derived class with handlers for errors we can handle (perhaps)."""
+
+ def __init__(self, *args, **kwargs):
+ URLopener.__init__(self, *args, **kwargs)
+ self.auth_cache = {}
+ self.tries = 0
+ self.maxtries = 10
+
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ """Default error handling -- don't raise an exception."""
+ return urllib.response.addinfourl(fp, headers, "http:" + url, errcode)
+
+ def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 302 -- relocated (temporarily)."""
+ self.tries += 1
+ if self.maxtries and self.tries >= self.maxtries:
+ if hasattr(self, "http_error_500"):
+ meth = self.http_error_500
+ else:
+ meth = self.http_error_default
+ self.tries = 0
+ return meth(url, fp, 500,
+ "Internal Server Error: Redirect Recursion", headers)
+ result = self.redirect_internal(url, fp, errcode, errmsg, headers,
+ data)
+ self.tries = 0
+ return result
+
+ def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
+ if 'location' in headers:
+ newurl = headers['location']
+ elif 'uri' in headers:
+ newurl = headers['uri']
+ else:
+ return
+ void = fp.read()
+ fp.close()
+ # In case the server sent a relative URL, join with original:
+ newurl = basejoin(self.type + ":" + url, newurl)
+ return self.open(newurl)
+
+ def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 301 -- also relocated (permanently)."""
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+
+ def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 303 -- also relocated (essentially identical to 302)."""
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+
+ def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 307 -- relocated, but turn POST into error."""
+ if data is None:
+ return self.http_error_302(url, fp, errcode, errmsg, headers, data)
+ else:
+ return self.http_error_default(url, fp, errcode, errmsg, headers)
+
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 401 -- authentication required.
+ This function supports Basic authentication only."""
+ if not 'www-authenticate' in headers:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ stuff = headers['www-authenticate']
+ import re
+ match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
+ if not match:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ scheme, realm = match.groups()
+ if scheme.lower() != 'basic':
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ name = 'retry_' + self.type + '_basic_auth'
+ if data is None:
+ return getattr(self,name)(url, realm)
+ else:
+ return getattr(self,name)(url, realm, data)
+
+ def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
+ """Error 407 -- proxy authentication required.
+ This function supports Basic authentication only."""
+ if not 'proxy-authenticate' in headers:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ stuff = headers['proxy-authenticate']
+ import re
+ match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
+ if not match:
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ scheme, realm = match.groups()
+ if scheme.lower() != 'basic':
+ URLopener.http_error_default(self, url, fp,
+ errcode, errmsg, headers)
+ name = 'retry_proxy_' + self.type + '_basic_auth'
+ if data is None:
+ return getattr(self,name)(url, realm)
+ else:
+ return getattr(self,name)(url, realm, data)
+
+ def retry_proxy_http_basic_auth(self, url, realm, data=None):
+ host, selector = urllib.parse.splithost(url)
+ newurl = 'http://' + host + selector
+ proxy = self.proxies['http']
+ urltype, proxyhost = urllib.parse.splittype(proxy)
+ proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
+ i = proxyhost.find('@') + 1
+ proxyhost = proxyhost[i:]
+ user, passwd = self.get_user_passwd(proxyhost, realm, i)
+ if not (user or passwd): return None
+ proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
+ quote(passwd, safe=''), proxyhost)
+ self.proxies['http'] = 'http://' + proxyhost + proxyselector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_proxy_https_basic_auth(self, url, realm, data=None):
+ host, selector = urllib.parse.splithost(url)
+ newurl = 'https://' + host + selector
+ proxy = self.proxies['https']
+ urltype, proxyhost = urllib.parse.splittype(proxy)
+ proxyhost, proxyselector = urllib.parse.splithost(proxyhost)
+ i = proxyhost.find('@') + 1
+ proxyhost = proxyhost[i:]
+ user, passwd = self.get_user_passwd(proxyhost, realm, i)
+ if not (user or passwd): return None
+ proxyhost = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
+ quote(passwd, safe=''), proxyhost)
+ self.proxies['https'] = 'https://' + proxyhost + proxyselector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_http_basic_auth(self, url, realm, data=None):
+ host, selector = urllib.parse.splithost(url)
+ i = host.find('@') + 1
+ host = host[i:]
+ user, passwd = self.get_user_passwd(host, realm, i)
+ if not (user or passwd): return None
+ host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
+ quote(passwd, safe=''), host)
+ newurl = 'http://' + host + selector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def retry_https_basic_auth(self, url, realm, data=None):
+ host, selector = urllib.parse.splithost(url)
+ i = host.find('@') + 1
+ host = host[i:]
+ user, passwd = self.get_user_passwd(host, realm, i)
+ if not (user or passwd): return None
+ host = "%s:%s@%s" % (urllib.parse.quote(user, safe=''),
+ quote(passwd, safe=''), host)
+ newurl = 'https://' + host + selector
+ if data is None:
+ return self.open(newurl)
+ else:
+ return self.open(newurl, data)
+
+ def get_user_passwd(self, host, realm, clear_cache = 0):
+ key = realm + '@' + host.lower()
+ if key in self.auth_cache:
+ if clear_cache:
+ del self.auth_cache[key]
+ else:
+ return self.auth_cache[key]
+ user, passwd = self.prompt_user_passwd(host, realm)
+ if user or passwd: self.auth_cache[key] = (user, passwd)
+ return user, passwd
+
+ def prompt_user_passwd(self, host, realm):
+ """Override this in a GUI environment!"""
+ import getpass
+ try:
+ user = input("Enter username for %s at %s: " % (realm, host))
+ passwd = getpass.getpass("Enter password for %s in %s at %s: " %
+ (user, realm, host))
+ return user, passwd
+ except KeyboardInterrupt:
+ print()
+ return None, None
+
+
+# Utility functions
+
+_localhost = None
+def localhost():
+ """Return the IP address of the magic hostname 'localhost'."""
+ global _localhost
+ if _localhost is None:
+ _localhost = socket.gethostbyname('localhost')
+ return _localhost
+
+_thishost = None
+def thishost():
+ """Return the IP address of the current host."""
+ global _thishost
+ if _thishost is None:
+ _thishost = socket.gethostbyname(socket.gethostname())
+ return _thishost
+
+_ftperrors = None
+def ftperrors():
+ """Return the set of errors raised by the FTP class."""
+ global _ftperrors
+ if _ftperrors is None:
+ import ftplib
+ _ftperrors = ftplib.all_errors
+ return _ftperrors
+
+_noheaders = None
+def noheaders():
+ """Return an empty mimetools.Message object."""
+ global _noheaders
+ if _noheaders is None:
+ _noheaders = mimetools.message_from_string("")
+ return _noheaders
+
+
+# Utility classes
+
+class ftpwrapper:
+ """Class used by open_ftp() for cache of open FTP connections."""
+
+ def __init__(self, user, passwd, host, port, dirs, timeout=None):
+ self.user = user
+ self.passwd = passwd
+ self.host = host
+ self.port = port
+ self.dirs = dirs
+ self.timeout = timeout
+ self.init()
+
+ def init(self):
+ import ftplib
+ self.busy = 0
+ self.ftp = ftplib.FTP()
+ self.ftp.connect(self.host, self.port, self.timeout)
+ self.ftp.login(self.user, self.passwd)
+ for dir in self.dirs:
+ self.ftp.cwd(dir)
+
+ def retrfile(self, file, type):
+ import ftplib
+ self.endtransfer()
+ if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
+ else: cmd = 'TYPE ' + type; isdir = 0
+ try:
+ self.ftp.voidcmd(cmd)
+ except ftplib.all_errors:
+ self.init()
+ self.ftp.voidcmd(cmd)
+ conn = None
+ if file and not isdir:
+ # Try to retrieve as a file
+ try:
+ cmd = 'RETR ' + file
+ conn = self.ftp.ntransfercmd(cmd)
+ except ftplib.error_perm as reason:
+ if str(reason)[:3] != '550':
+ raise urllib.error.URLError('ftp error', reason).with_traceback(sys.exc_info()[2])
+ if not conn:
+ # Set transfer mode to ASCII!
+ self.ftp.voidcmd('TYPE A')
+ # Try a directory listing. Verify that directory exists.
+ if file:
+ pwd = self.ftp.pwd()
+ try:
+ try:
+ self.ftp.cwd(file)
+ except ftplib.error_perm as reason:
+ raise urllib.error.URLError('ftp error', reason) from reason
+ finally:
+ self.ftp.cwd(pwd)
+ cmd = 'LIST ' + file
+ else:
+ cmd = 'LIST'
+ conn = self.ftp.ntransfercmd(cmd)
+ self.busy = 1
+ # Pass back both a suitably decorated object and a retrieval length
+ return (urllib.response.addclosehook(conn[0].makefile('rb'),
+ self.endtransfer), conn[1])
+ def endtransfer(self):
+ if not self.busy:
+ return
+ self.busy = 0
+ try:
+ self.ftp.voidresp()
+ except ftperrors():
+ pass
+
+ def close(self):
+ self.endtransfer()
+ try:
+ self.ftp.close()
+ except ftperrors():
+ pass
+
+# Proxy handling
+def getproxies_environment():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Scan the environment for variables named <scheme>_proxy;
+ this seems to be the standard convention. If you need a
+ different way, you can pass a proxies dictionary to the
+ [Fancy]URLopener constructor.
+
+ """
+ proxies = {}
+ for name, value in os.environ.items():
+ name = name.lower()
+ if name == 'no_proxy':
+ # handled in proxy_bypass_environment
+ continue
+ if value and name[-6:] == '_proxy':
+ proxies[name[:-6]] = value
+ return proxies
+
+def proxy_bypass_environment(host):
+ """Test if proxies should not be used for a particular host.
+
+ Checks the environment for a variable named no_proxy, which should
+ be a list of DNS suffixes separated by commas, or '*' for all hosts.
+ """
+ no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
+ # '*' is special case for always bypass
+ if no_proxy == '*':
+ return 1
+ # strip port off host
+ hostonly, port = urllib.parse.splitport(host)
+ # check if the host ends with any of the DNS suffixes
+ for name in no_proxy.split(','):
+ if name and (hostonly.endswith(name) or host.endswith(name)):
+ return 1
+ # otherwise, don't bypass
+ return 0
+
+
+if sys.platform == 'darwin':
+ def getproxies_internetconfig():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ By convention the mac uses Internet Config to store
+ proxies. An HTTP proxy, for instance, is stored under
+ the HttpProxy key.
+
+ """
+ try:
+ import ic
+ except ImportError:
+ return {}
+
+ try:
+ config = ic.IC()
+ except ic.error:
+ return {}
+ proxies = {}
+ # HTTP:
+ if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
+ try:
+ value = config['HTTPProxyHost']
+ except ic.error:
+ pass
+ else:
+ proxies['http'] = 'http://%s' % value
+ # FTP: XXX To be done.
+ # Gopher: XXX To be done.
+ return proxies
+
+ def proxy_bypass(host):
+ if getproxies_environment():
+ return proxy_bypass_environment(host)
+ else:
+ return 0
+
+ def getproxies():
+ return getproxies_environment() or getproxies_internetconfig()
+
+elif os.name == 'nt':
+ def getproxies_registry():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Win32 uses the registry to store proxies.
+
+ """
+ proxies = {}
+ try:
+ import _winreg
+ except ImportError:
+ # Std module, so should be around - but you never know!
+ return proxies
+ try:
+ internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
+ r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
+ proxyEnable = _winreg.QueryValueEx(internetSettings,
+ 'ProxyEnable')[0]
+ if proxyEnable:
+ # Returned as Unicode but problems if not converted to ASCII
+ proxyServer = str(_winreg.QueryValueEx(internetSettings,
+ 'ProxyServer')[0])
+ if '=' in proxyServer:
+ # Per-protocol settings
+ for p in proxyServer.split(';'):
+ protocol, address = p.split('=', 1)
+ # See if address has a type:// prefix
+ import re
+ if not re.match('^([^/:]+)://', address):
+ address = '%s://%s' % (protocol, address)
+ proxies[protocol] = address
+ else:
+ # Use one setting for all protocols
+ if proxyServer[:5] == 'http:':
+ proxies['http'] = proxyServer
+ else:
+ proxies['http'] = 'http://%s' % proxyServer
+ proxies['ftp'] = 'ftp://%s' % proxyServer
+ internetSettings.Close()
+ except (WindowsError, ValueError, TypeError):
+ # Either registry key not found etc, or the value in an
+ # unexpected format.
+ # proxies already set up to be empty so nothing to do
+ pass
+ return proxies
+
+ def getproxies():
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Returns settings gathered from the environment, if specified,
+ or the registry.
+
+ """
+ return getproxies_environment() or getproxies_registry()
+
+ def proxy_bypass_registry(host):
+ try:
+ import _winreg
+ import re
+ except ImportError:
+ # Std modules, so should be around - but you never know!
+ return 0
+ try:
+ internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
+ r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
+ proxyEnable = _winreg.QueryValueEx(internetSettings,
+ 'ProxyEnable')[0]
+ proxyOverride = str(_winreg.QueryValueEx(internetSettings,
+ 'ProxyOverride')[0])
+ # ^^^^ Returned as Unicode but problems if not converted to ASCII
+ except WindowsError:
+ return 0
+ if not proxyEnable or not proxyOverride:
+ return 0
+ # try to make a host list from name and IP address.
+ rawHost, port = urllib.parse.splitport(host)
+ host = [rawHost]
+ try:
+ addr = socket.gethostbyname(rawHost)
+ if addr != rawHost:
+ host.append(addr)
+ except socket.error:
+ pass
+ try:
+ fqdn = socket.getfqdn(rawHost)
+ if fqdn != rawHost:
+ host.append(fqdn)
+ except socket.error:
+ pass
+ # make a check value list from the registry entry: replace the
+ # '<local>' string by the localhost entry and the corresponding
+ # canonical entry.
+ proxyOverride = proxyOverride.split(';')
+ i = 0
+ while i < len(proxyOverride):
+ if proxyOverride[i] == '<local>':
+ proxyOverride[i:i+1] = ['localhost',
+ '127.0.0.1',
+ socket.gethostname(),
+ socket.gethostbyname(
+ socket.gethostname())]
+ i += 1
+ # print proxyOverride
+ # now check if we match one of the registry values.
+ for test in proxyOverride:
+ test = test.replace(".", r"\.") # mask dots
+ test = test.replace("*", r".*") # change glob sequence
+ test = test.replace("?", r".") # change glob char
+ for val in host:
+ # print "%s <--> %s" %( test, val )
+ if re.match(test, val, re.I):
+ return 1
+ return 0
+
+ def proxy_bypass(host):
+ """Return a dictionary of scheme -> proxy server URL mappings.
+
+ Returns settings gathered from the environment, if specified,
+ or the registry.
+
+ """
+ if getproxies_environment():
+ return proxy_bypass_environment(host)
+ else:
+ return proxy_bypass_registry(host)
+
+else:
+ # By default use environment variables
+ getproxies = getproxies_environment
+ proxy_bypass = proxy_bypass_environment
--- /dev/null
+"""Response classes used by urllib.
+
+The base class, addbase, defines a minimal file-like interface,
+including read() and readline(). The typical response object is an
+addinfourl instance, which defines an info() method that returns
+headers and a geturl() method that returns the url.
+"""
+
+class addbase(object):
+ """Base class for addinfo and addclosehook."""
+
+ # XXX Add a method to expose the timeout on the underlying socket?
+
+ def __init__(self, fp):
+ # TODO(jhylton): Is there a better way to delegate using io?
+ self.fp = fp
+ self.read = self.fp.read
+ self.readline = self.fp.readline
+ # TODO(jhylton): Make sure an object with readlines() is also iterable
+ if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
+ if hasattr(self.fp, "fileno"):
+ self.fileno = self.fp.fileno
+ else:
+ self.fileno = lambda: None
+ if hasattr(self.fp, "__iter__"):
+ self.__iter__ = self.fp.__iter__
+ if hasattr(self.fp, "__next__"):
+ self.__next__ = self.fp.__next__
+
+ def __repr__(self):
+ return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
+ id(self), self.fp)
+
+ def close(self):
+ self.read = None
+ self.readline = None
+ self.readlines = None
+ self.fileno = None
+ if self.fp: self.fp.close()
+ self.fp = None
+
+class addclosehook(addbase):
+ """Class to add a close hook to an open file."""
+
+ def __init__(self, fp, closehook, *hookargs):
+ addbase.__init__(self, fp)
+ self.closehook = closehook
+ self.hookargs = hookargs
+
+ def close(self):
+ addbase.close(self)
+ if self.closehook:
+ self.closehook(*self.hookargs)
+ self.closehook = None
+ self.hookargs = None
+
+class addinfo(addbase):
+ """class to add an info() method to an open file."""
+
+ def __init__(self, fp, headers):
+ addbase.__init__(self, fp)
+ self.headers = headers
+
+ def info(self):
+ return self.headers
+
+class addinfourl(addbase):
+ """class to add info() and geturl() methods to an open file."""
+
+ def __init__(self, fp, headers, url, code=None):
+ addbase.__init__(self, fp)
+ self.headers = headers
+ self.url = url
+ self.code = code
+
+ def info(self):
+ return self.headers
+
+ def getcode(self):
+ return self.code
+
+ def geturl(self):
+ return self.url
The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
-import urlparse
-import urllib
+
+import urllib.parse, urllib.request
__all__ = ["RobotFileParser"]
def set_url(self, url):
"""Sets the URL referring to a robots.txt file."""
self.url = url
- self.host, self.path = urlparse.urlparse(url)[1:3]
+ self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
- opener = URLopener()
- f = opener.open(self.url)
- lines = []
- line = f.readline()
- while line:
- lines.append(line.strip())
- line = f.readline()
- self.errcode = opener.errcode
- if self.errcode in (401, 403):
- self.disallow_all = True
- elif self.errcode >= 400:
- self.allow_all = True
- elif self.errcode == 200 and lines:
- self.parse(lines)
+ try:
+ f = urllib.request.urlopen(self.url)
+ except urllib.error.HTTPError as err:
+ if err.code in (401, 403):
+ self.disallow_all = True
+ elif err.code >= 400:
+ self.allow_all = True
+ else:
+ self.parse(f.read().splitlines())
def _add_entry(self, entry):
if "*" in entry.useragents:
self.entries.append(entry)
def parse(self, lines):
- """parse the input lines from a robots.txt file.
- We allow that a user-agent: line is not preceded by
- one or more blank lines."""
+ """Parse the input lines from a robots.txt file.
+
+ We allow that a user-agent: line is not preceded by
+ one or more blank lines.
+ """
state = 0
- linenumber = 0
entry = Entry()
for line in lines:
- linenumber = linenumber + 1
if not line:
if state == 1:
entry = Entry()
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
- line[1] = urllib.unquote(line[1].strip())
+ line[1] = urllib.parse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
self._add_entry(entry)
return True
# search for given user agent matches
# the first match counts
- url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
+ url = urllib.parse.quote(urllib.parse.urlparse(urllib.parse.unquote(url))[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
# agent not found ==> access granted
return True
-
def __str__(self):
return ''.join([str(entry) + "\n" for entry in self.entries])
if path == '' and not allowance:
# an empty value means allow all
allowance = True
- self.path = urllib.quote(path)
+ self.path = urllib.parse.quote(path)
self.allowance = allowance
def applies_to(self, filename):
if line.applies_to(filename):
return line.allowance
return True
-
-class URLopener(urllib.FancyURLopener):
- def __init__(self, *args):
- urllib.FancyURLopener.__init__(self, *args)
- self.errcode = 200
-
- def prompt_user_passwd(self, host, realm):
- ## If robots.txt file is accessible only with a password,
- ## we act as if the file wasn't there.
- return None, None
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- self.errcode = errcode
- return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
- errmsg, headers)
+++ /dev/null
-"""An extensible library for opening URLs using a variety of protocols
-
-The simplest way to use this module is to call the urlopen function,
-which accepts a string containing a URL or a Request object (described
-below). It opens the URL and returns the results as file-like
-object; the returned object has some extra methods described below.
-
-The OpenerDirector manages a collection of Handler objects that do
-all the actual work. Each Handler implements a particular protocol or
-option. The OpenerDirector is a composite object that invokes the
-Handlers needed to open the requested URL. For example, the
-HTTPHandler performs HTTP GET and POST requests and deals with
-non-error returns. The HTTPRedirectHandler automatically deals with
-HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
-deals with digest authentication.
-
-urlopen(url, data=None) -- Basic usage is the same as original
-urllib. pass the url and optionally data to post to an HTTP URL, and
-get a file-like object back. One difference is that you can also pass
-a Request instance instead of URL. Raises a URLError (subclass of
-IOError); for HTTP errors, raises an HTTPError, which can also be
-treated as a valid response.
-
-build_opener -- Function that creates a new OpenerDirector instance.
-Will install the default handlers. Accepts one or more Handlers as
-arguments, either instances or Handler classes that it will
-instantiate. If one of the argument is a subclass of the default
-handler, the argument will be installed instead of the default.
-
-install_opener -- Installs a new opener as the default opener.
-
-objects of interest:
-OpenerDirector --
-
-Request -- An object that encapsulates the state of a request. The
-state can be as simple as the URL. It can also include extra HTTP
-headers, e.g. a User-Agent.
-
-BaseHandler --
-
-exceptions:
-URLError -- A subclass of IOError, individual protocols have their own
-specific subclass.
-
-HTTPError -- Also a valid HTTP response, so you can treat an HTTP error
-as an exceptional event or valid response.
-
-internals:
-BaseHandler and parent
-_call_chain conventions
-
-Example usage:
-
-import urllib2
-
-# set up authentication info
-authinfo = urllib2.HTTPBasicAuthHandler()
-authinfo.add_password(realm='PDQ Application',
- uri='https://mahler:8092/site-updates.py',
- user='klem',
- passwd='geheim$parole')
-
-proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
-
-# build a new opener that adds authentication and caching FTP handlers
-opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
-
-# install it
-urllib2.install_opener(opener)
-
-f = urllib2.urlopen('http://www.python.org/')
-
-
-"""
-
-# XXX issues:
-# If an authentication error handler that tries to perform
-# authentication for some reason but fails, how should the error be
-# signalled? The client needs to know the HTTP error code. But if
-# the handler knows that the problem was, e.g., that it didn't know
-# that hash algo that requested in the challenge, it would be good to
-# pass that information along to the client, too.
-# ftp errors aren't handled cleanly
-# check digest against correct (i.e. non-apache) implementation
-
-# Possible extensions:
-# complex proxies XXX not sure what exactly was meant by this
-# abstract factory for opener
-
-import base64
-import hashlib
-import http.client
-import io
-import email
-import os
-import posixpath
-import random
-import re
-import socket
-import sys
-import time
-import urlparse
-import bisect
-
-from io import StringIO
-
-from urllib import (unwrap, unquote, splittype, splithost, quote,
- addinfourl, splitport, splitquery,
- splitattr, ftpwrapper, noheaders, splituser, splitpasswd, splitvalue)
-
-# support for FileHandler, proxies via environment variables
-from urllib import localhost, url2pathname, getproxies
-
-# used in User-Agent header sent
-__version__ = sys.version[:3]
-
-_opener = None
-def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- global _opener
- if _opener is None:
- _opener = build_opener()
- return _opener.open(url, data, timeout)
-
-def install_opener(opener):
- global _opener
- _opener = opener
-
-# do these error classes make sense?
-# make sure all of the IOError stuff is overridden. we just want to be
-# subtypes.
-
-class URLError(IOError):
- # URLError is a sub-type of IOError, but it doesn't share any of
- # the implementation. need to override __init__ and __str__.
- # It sets self.args for compatibility with other EnvironmentError
- # subclasses, but args doesn't have the typical format with errno in
- # slot 0 and strerror in slot 1. This may be better than nothing.
- def __init__(self, reason):
- self.args = reason,
- self.reason = reason
-
- def __str__(self):
- return '<urlopen error %s>' % self.reason
-
-class HTTPError(URLError, addinfourl):
- """Raised when HTTP error occurs, but also acts like non-error return"""
- __super_init = addinfourl.__init__
-
- def __init__(self, url, code, msg, hdrs, fp):
- self.code = code
- self.msg = msg
- self.hdrs = hdrs
- self.fp = fp
- self.filename = url
- # The addinfourl classes depend on fp being a valid file
- # object. In some cases, the HTTPError may not have a valid
- # file object. If this happens, the simplest workaround is to
- # not initialize the base classes.
- if fp is not None:
- self.__super_init(fp, hdrs, url, code)
-
- def __str__(self):
- return 'HTTP Error %s: %s' % (self.code, self.msg)
-
-# copied from cookielib.py
-_cut_port_re = re.compile(r":\d+$")
-def request_host(request):
- """Return request-host, as defined by RFC 2965.
-
- Variation from RFC: returned value is lowercased, for convenient
- comparison.
-
- """
- url = request.get_full_url()
- host = urlparse.urlparse(url)[1]
- if host == "":
- host = request.get_header("Host", "")
-
- # remove port, if present
- host = _cut_port_re.sub("", host, 1)
- return host.lower()
-
-class Request:
-
- def __init__(self, url, data=None, headers={},
- origin_req_host=None, unverifiable=False):
- # unwrap('<URL:type://host/path>') --> 'type://host/path'
- self.__original = unwrap(url)
- self.type = None
- # self.__r_type is what's left after doing the splittype
- self.host = None
- self.port = None
- self.data = data
- self.headers = {}
- for key, value in headers.items():
- self.add_header(key, value)
- self.unredirected_hdrs = {}
- if origin_req_host is None:
- origin_req_host = request_host(self)
- self.origin_req_host = origin_req_host
- self.unverifiable = unverifiable
-
- def __getattr__(self, attr):
- # XXX this is a fallback mechanism to guard against these
- # methods getting called in a non-standard order. this may be
- # too complicated and/or unnecessary.
- # XXX should the __r_XXX attributes be public?
- if attr[:12] == '_Request__r_':
- name = attr[12:]
- if hasattr(Request, 'get_' + name):
- getattr(self, 'get_' + name)()
- return getattr(self, attr)
- raise AttributeError(attr)
-
- def get_method(self):
- if self.has_data():
- return "POST"
- else:
- return "GET"
-
- # XXX these helper methods are lame
-
- def add_data(self, data):
- self.data = data
-
- def has_data(self):
- return self.data is not None
-
- def get_data(self):
- return self.data
-
- def get_full_url(self):
- return self.__original
-
- def get_type(self):
- if self.type is None:
- self.type, self.__r_type = splittype(self.__original)
- if self.type is None:
- raise ValueError("unknown url type: %s" % self.__original)
- return self.type
-
- def get_host(self):
- if self.host is None:
- self.host, self.__r_host = splithost(self.__r_type)
- if self.host:
- self.host = unquote(self.host)
- return self.host
-
- def get_selector(self):
- return self.__r_host
-
- def set_proxy(self, host, type):
- self.host, self.type = host, type
- self.__r_host = self.__original
-
- def get_origin_req_host(self):
- return self.origin_req_host
-
- def is_unverifiable(self):
- return self.unverifiable
-
- def add_header(self, key, val):
- # useful for something like authentication
- self.headers[key.capitalize()] = val
-
- def add_unredirected_header(self, key, val):
- # will not be added to a redirected request
- self.unredirected_hdrs[key.capitalize()] = val
-
- def has_header(self, header_name):
- return (header_name in self.headers or
- header_name in self.unredirected_hdrs)
-
- def get_header(self, header_name, default=None):
- return self.headers.get(
- header_name,
- self.unredirected_hdrs.get(header_name, default))
-
- def header_items(self):
- hdrs = self.unredirected_hdrs.copy()
- hdrs.update(self.headers)
- return list(hdrs.items())
-
-class OpenerDirector:
- def __init__(self):
- client_version = "Python-urllib/%s" % __version__
- self.addheaders = [('User-agent', client_version)]
- # manage the individual handlers
- self.handlers = []
- self.handle_open = {}
- self.handle_error = {}
- self.process_response = {}
- self.process_request = {}
-
- def add_handler(self, handler):
- if not hasattr(handler, "add_parent"):
- raise TypeError("expected BaseHandler instance, got %r" %
- type(handler))
-
- added = False
- for meth in dir(handler):
- if meth in ["redirect_request", "do_open", "proxy_open"]:
- # oops, coincidental match
- continue
-
- i = meth.find("_")
- protocol = meth[:i]
- condition = meth[i+1:]
-
- if condition.startswith("error"):
- j = condition.find("_") + i + 1
- kind = meth[j+1:]
- try:
- kind = int(kind)
- except ValueError:
- pass
- lookup = self.handle_error.get(protocol, {})
- self.handle_error[protocol] = lookup
- elif condition == "open":
- kind = protocol
- lookup = self.handle_open
- elif condition == "response":
- kind = protocol
- lookup = self.process_response
- elif condition == "request":
- kind = protocol
- lookup = self.process_request
- else:
- continue
-
- handlers = lookup.setdefault(kind, [])
- if handlers:
- bisect.insort(handlers, handler)
- else:
- handlers.append(handler)
- added = True
-
- if added:
- # the handlers must work in an specific order, the order
- # is specified in a Handler attribute
- bisect.insort(self.handlers, handler)
- handler.add_parent(self)
-
- def close(self):
- # Only exists for backwards compatibility.
- pass
-
- def _call_chain(self, chain, kind, meth_name, *args):
- # Handlers raise an exception if no one else should try to handle
- # the request, or return None if they can't but another handler
- # could. Otherwise, they return the response.
- handlers = chain.get(kind, ())
- for handler in handlers:
- func = getattr(handler, meth_name)
-
- result = func(*args)
- if result is not None:
- return result
-
- def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
- # accept a URL or a Request object
- if isinstance(fullurl, str):
- req = Request(fullurl, data)
- else:
- req = fullurl
- if data is not None:
- req.add_data(data)
-
- req.timeout = timeout
- protocol = req.get_type()
-
- # pre-process request
- meth_name = protocol+"_request"
- for processor in self.process_request.get(protocol, []):
- meth = getattr(processor, meth_name)
- req = meth(req)
-
- response = self._open(req, data)
-
- # post-process response
- meth_name = protocol+"_response"
- for processor in self.process_response.get(protocol, []):
- meth = getattr(processor, meth_name)
- response = meth(req, response)
-
- return response
-
- def _open(self, req, data=None):
- result = self._call_chain(self.handle_open, 'default',
- 'default_open', req)
- if result:
- return result
-
- protocol = req.get_type()
- result = self._call_chain(self.handle_open, protocol, protocol +
- '_open', req)
- if result:
- return result
-
- return self._call_chain(self.handle_open, 'unknown',
- 'unknown_open', req)
-
- def error(self, proto, *args):
- if proto in ('http', 'https'):
- # XXX http[s] protocols are special-cased
- dict = self.handle_error['http'] # https is not different than http
- proto = args[2] # YUCK!
- meth_name = 'http_error_%s' % proto
- http_err = 1
- orig_args = args
- else:
- dict = self.handle_error
- meth_name = proto + '_error'
- http_err = 0
- args = (dict, proto, meth_name) + args
- result = self._call_chain(*args)
- if result:
- return result
-
- if http_err:
- args = (dict, 'default', 'http_error_default') + orig_args
- return self._call_chain(*args)
-
-# XXX probably also want an abstract factory that knows when it makes
-# sense to skip a superclass in favor of a subclass and when it might
-# make sense to include both
-
-def build_opener(*handlers):
- """Create an opener object from a list of handlers.
-
- The opener will use several default handlers, including support
- for HTTP and FTP.
-
- If any of the handlers passed as arguments are subclasses of the
- default handlers, the default handlers will not be used.
- """
- def isclass(obj):
- return isinstance(obj, type) or hasattr(obj, "__bases__")
-
- opener = OpenerDirector()
- default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
- HTTPDefaultErrorHandler, HTTPRedirectHandler,
- FTPHandler, FileHandler, HTTPErrorProcessor]
- if hasattr(http.client, 'HTTPS'):
- default_classes.append(HTTPSHandler)
- skip = set()
- for klass in default_classes:
- for check in handlers:
- if isclass(check):
- if issubclass(check, klass):
- skip.add(klass)
- elif isinstance(check, klass):
- skip.add(klass)
- for klass in skip:
- default_classes.remove(klass)
-
- for klass in default_classes:
- opener.add_handler(klass())
-
- for h in handlers:
- if isclass(h):
- h = h()
- opener.add_handler(h)
- return opener
-
-class BaseHandler:
- handler_order = 500
-
- def add_parent(self, parent):
- self.parent = parent
-
- def close(self):
- # Only exists for backwards compatibility
- pass
-
- def __lt__(self, other):
- if not hasattr(other, "handler_order"):
- # Try to preserve the old behavior of having custom classes
- # inserted after default ones (works only for custom user
- # classes which are not aware of handler_order).
- return True
- return self.handler_order < other.handler_order
-
-
-class HTTPErrorProcessor(BaseHandler):
- """Process HTTP error responses."""
- handler_order = 1000 # after all other processing
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- # According to RFC 2616, "2xx" code indicates that the client's
- # request was successfully received, understood, and accepted.
- if not (200 <= code < 300):
- response = self.parent.error(
- 'http', request, response, code, msg, hdrs)
-
- return response
-
- https_response = http_response
-
-class HTTPDefaultErrorHandler(BaseHandler):
- def http_error_default(self, req, fp, code, msg, hdrs):
- raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
-
-class HTTPRedirectHandler(BaseHandler):
- # maximum number of redirections to any single URL
- # this is needed because of the state that cookies introduce
- max_repeats = 4
- # maximum total number of redirections (regardless of URL) before
- # assuming we're in a loop
- max_redirections = 10
-
- def redirect_request(self, req, fp, code, msg, headers, newurl):
- """Return a Request or None in response to a redirect.
-
- This is called by the http_error_30x methods when a
- redirection response is received. If a redirection should
- take place, return a new Request to allow http_error_30x to
- perform the redirect. Otherwise, raise HTTPError if no-one
- else should try to handle this url. Return None if you can't
- but another Handler might.
- """
- m = req.get_method()
- if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
- or code in (301, 302, 303) and m == "POST"):
- # Strictly (according to RFC 2616), 301 or 302 in response
- # to a POST MUST NOT cause a redirection without confirmation
- # from the user (of urllib2, in this case). In practice,
- # essentially all clients do redirect in this case, so we
- # do the same.
- # be conciliant with URIs containing a space
- newurl = newurl.replace(' ', '%20')
- newheaders = dict((k,v) for k,v in req.headers.items()
- if k.lower() not in ("content-length", "content-type")
- )
- return Request(newurl,
- headers=newheaders,
- origin_req_host=req.get_origin_req_host(),
- unverifiable=True)
- else:
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
- # Implementation note: To avoid the server sending us into an
- # infinite loop, the request object needs to track what URLs we
- # have already seen. Do this by adding a handler-specific
- # attribute to the Request object.
- def http_error_302(self, req, fp, code, msg, headers):
- # Some servers (incorrectly) return multiple Location headers
- # (so probably same goes for URI). Use first header.
- if 'location' in headers:
- newurl = headers['location']
- elif 'uri' in headers:
- newurl = headers['uri']
- else:
- return
- newurl = urlparse.urljoin(req.get_full_url(), newurl)
-
- # XXX Probably want to forget about the state of the current
- # request, although that might interact poorly with other
- # handlers that also use handler-specific request attributes
- new = self.redirect_request(req, fp, code, msg, headers, newurl)
- if new is None:
- return
-
- # loop detection
- # .redirect_dict has a key url if url was previously visited.
- if hasattr(req, 'redirect_dict'):
- visited = new.redirect_dict = req.redirect_dict
- if (visited.get(newurl, 0) >= self.max_repeats or
- len(visited) >= self.max_redirections):
- raise HTTPError(req.get_full_url(), code,
- self.inf_msg + msg, headers, fp)
- else:
- visited = new.redirect_dict = req.redirect_dict = {}
- visited[newurl] = visited.get(newurl, 0) + 1
-
- # Don't close the fp until we are sure that we won't use it
- # with HTTPError.
- fp.read()
- fp.close()
-
- return self.parent.open(new)
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
- inf_msg = "The HTTP server returned a redirect error that would " \
- "lead to an infinite loop.\n" \
- "The last 30x error message was:\n"
-
-
-def _parse_proxy(proxy):
- """Return (scheme, user, password, host/port) given a URL or an authority.
-
- If a URL is supplied, it must have an authority (host:port) component.
- According to RFC 3986, having an authority component means the URL must
- have two slashes after the scheme:
-
- >>> _parse_proxy('file:/ftp.example.com/')
- Traceback (most recent call last):
- ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
-
- The first three items of the returned tuple may be None.
-
- Examples of authority parsing:
-
- >>> _parse_proxy('proxy.example.com')
- (None, None, None, 'proxy.example.com')
- >>> _parse_proxy('proxy.example.com:3128')
- (None, None, None, 'proxy.example.com:3128')
-
- The authority component may optionally include userinfo (assumed to be
- username:password):
-
- >>> _parse_proxy('joe:password@proxy.example.com')
- (None, 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('joe:password@proxy.example.com:3128')
- (None, 'joe', 'password', 'proxy.example.com:3128')
-
- Same examples, but with URLs instead:
-
- >>> _parse_proxy('http://proxy.example.com/')
- ('http', None, None, 'proxy.example.com')
- >>> _parse_proxy('http://proxy.example.com:3128/')
- ('http', None, None, 'proxy.example.com:3128')
- >>> _parse_proxy('http://joe:password@proxy.example.com/')
- ('http', 'joe', 'password', 'proxy.example.com')
- >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
- ('http', 'joe', 'password', 'proxy.example.com:3128')
-
- Everything after the authority is ignored:
-
- >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
- ('ftp', 'joe', 'password', 'proxy.example.com')
-
- Test for no trailing '/' case:
-
- >>> _parse_proxy('http://joe:password@proxy.example.com')
- ('http', 'joe', 'password', 'proxy.example.com')
-
- """
- scheme, r_scheme = splittype(proxy)
- if not r_scheme.startswith("/"):
- # authority
- scheme = None
- authority = proxy
- else:
- # URL
- if not r_scheme.startswith("//"):
- raise ValueError("proxy URL with no authority: %r" % proxy)
- # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
- # and 3.3.), path is empty or starts with '/'
- end = r_scheme.find("/", 2)
- if end == -1:
- end = None
- authority = r_scheme[2:end]
- userinfo, hostport = splituser(authority)
- if userinfo is not None:
- user, password = splitpasswd(userinfo)
- else:
- user = password = None
- return scheme, user, password, hostport
-
-class ProxyHandler(BaseHandler):
- # Proxies must be in front
- handler_order = 100
-
- def __init__(self, proxies=None):
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'keys'), "proxies must be a mapping"
- self.proxies = proxies
- for type, url in proxies.items():
- setattr(self, '%s_open' % type,
- lambda r, proxy=url, type=type, meth=self.proxy_open: \
- meth(r, proxy, type))
-
- def proxy_open(self, req, proxy, type):
- orig_type = req.get_type()
- proxy_type, user, password, hostport = _parse_proxy(proxy)
- if proxy_type is None:
- proxy_type = orig_type
- if user and password:
- user_pass = '%s:%s' % (unquote(user), unquote(password))
- creds = base64.b64encode(user_pass.encode()).decode("ascii")
- req.add_header('Proxy-authorization', 'Basic ' + creds)
- hostport = unquote(hostport)
- req.set_proxy(hostport, proxy_type)
- if orig_type == proxy_type:
- # let other handlers take care of it
- return None
- else:
- # need to start over, because the other handlers don't
- # grok the proxy's URL type
- # e.g. if we have a constructor arg proxies like so:
- # {'http': 'ftp://proxy.example.com'}, we may end up turning
- # a request for http://acme.example.com/a into one for
- # ftp://proxy.example.com/a
- return self.parent.open(req)
-
-class HTTPPasswordMgr:
-
- def __init__(self):
- self.passwd = {}
-
- def add_password(self, realm, uri, user, passwd):
- # uri could be a single URI or a sequence
- if isinstance(uri, str):
- uri = [uri]
- if not realm in self.passwd:
- self.passwd[realm] = {}
- for default_port in True, False:
- reduced_uri = tuple(
- [self.reduce_uri(u, default_port) for u in uri])
- self.passwd[realm][reduced_uri] = (user, passwd)
-
- def find_user_password(self, realm, authuri):
- domains = self.passwd.get(realm, {})
- for default_port in True, False:
- reduced_authuri = self.reduce_uri(authuri, default_port)
- for uris, authinfo in domains.items():
- for uri in uris:
- if self.is_suburi(uri, reduced_authuri):
- return authinfo
- return None, None
-
- def reduce_uri(self, uri, default_port=True):
- """Accept authority or URI and extract only the authority and path."""
- # note HTTP URLs do not have a userinfo component
- parts = urlparse.urlsplit(uri)
- if parts[1]:
- # URI
- scheme = parts[0]
- authority = parts[1]
- path = parts[2] or '/'
- else:
- # host or host:port
- scheme = None
- authority = uri
- path = '/'
- host, port = splitport(authority)
- if default_port and port is None and scheme is not None:
- dport = {"http": 80,
- "https": 443,
- }.get(scheme)
- if dport is not None:
- authority = "%s:%d" % (host, dport)
- return authority, path
-
- def is_suburi(self, base, test):
- """Check if test is below base in a URI tree
-
- Both args must be URIs in reduced form.
- """
- if base == test:
- return True
- if base[0] != test[0]:
- return False
- common = posixpath.commonprefix((base[1], test[1]))
- if len(common) == len(base[1]):
- return True
- return False
-
-
-class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
-
- def find_user_password(self, realm, authuri):
- user, password = HTTPPasswordMgr.find_user_password(self, realm,
- authuri)
- if user is not None:
- return user, password
- return HTTPPasswordMgr.find_user_password(self, None, authuri)
-
-
-class AbstractBasicAuthHandler:
-
- # XXX this allows for multiple auth-schemes, but will stupidly pick
- # the last one with a realm specified.
-
- # allow for double- and single-quoted realm values
- # (single quotes are a violation of the RFC, but appear in the wild)
- rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\'])(.*?)\\2', re.I)
-
- # XXX could pre-emptively send auth info already accepted (RFC 2617,
- # end of section 2, and section 1.2 immediately after "credentials"
- # production).
-
- def __init__(self, password_mgr=None):
- if password_mgr is None:
- password_mgr = HTTPPasswordMgr()
- self.passwd = password_mgr
- self.add_password = self.passwd.add_password
-
- def http_error_auth_reqed(self, authreq, host, req, headers):
- # host may be an authority (without userinfo) or a URL with an
- # authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
- if authreq:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if scheme.lower() == 'basic':
- return self.retry_http_basic_auth(host, req, realm)
-
- def retry_http_basic_auth(self, host, req, realm):
- user, pw = self.passwd.find_user_password(realm, host)
- if pw is not None:
- raw = "%s:%s" % (user, pw)
- auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
- if req.headers.get(self.auth_header, None) == auth:
- return None
- req.add_header(self.auth_header, auth)
- return self.parent.open(req)
- else:
- return None
-
-
-class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Authorization'
-
- def http_error_401(self, req, fp, code, msg, headers):
- url = req.get_full_url()
- return self.http_error_auth_reqed('www-authenticate',
- url, req, headers)
-
-
-class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
- auth_header = 'Proxy-authorization'
-
- def http_error_407(self, req, fp, code, msg, headers):
- # http_error_auth_reqed requires that there is no userinfo component in
- # authority. Assume there isn't one, since urllib2 does not (and
- # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
- # userinfo.
- authority = req.get_host()
- return self.http_error_auth_reqed('proxy-authenticate',
- authority, req, headers)
-
-
-def randombytes(n):
- """Return n random bytes."""
- return os.urandom(n)
-
-class AbstractDigestAuthHandler:
- # Digest authentication is specified in RFC 2617.
-
- # XXX The client does not inspect the Authentication-Info header
- # in a successful response.
-
- # XXX It should be possible to test this implementation against
- # a mock server that just generates a static set of challenges.
-
- # XXX qop="auth-int" supports is shaky
-
- def __init__(self, passwd=None):
- if passwd is None:
- passwd = HTTPPasswordMgr()
- self.passwd = passwd
- self.add_password = self.passwd.add_password
- self.retried = 0
- self.nonce_count = 0
-
- def reset_retry_count(self):
- self.retried = 0
-
- def http_error_auth_reqed(self, auth_header, host, req, headers):
- authreq = headers.get(auth_header, None)
- if self.retried > 5:
- # Don't fail endlessly - if we failed once, we'll probably
- # fail a second time. Hm. Unless the Password Manager is
- # prompting for the information. Crap. This isn't great
- # but it's better than the current 'repeat until recursion
- # depth exceeded' approach <wink>
- raise HTTPError(req.get_full_url(), 401, "digest auth failed",
- headers, None)
- else:
- self.retried += 1
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() == 'digest':
- return self.retry_http_digest_auth(req, authreq)
-
- def retry_http_digest_auth(self, req, auth):
- token, challenge = auth.split(' ', 1)
- chal = parse_keqv_list(parse_http_list(challenge))
- auth = self.get_authorization(req, chal)
- if auth:
- auth_val = 'Digest %s' % auth
- if req.headers.get(self.auth_header, None) == auth_val:
- return None
- req.add_unredirected_header(self.auth_header, auth_val)
- resp = self.parent.open(req)
- return resp
-
- def get_cnonce(self, nonce):
- # The cnonce-value is an opaque
- # quoted string value provided by the client and used by both client
- # and server to avoid chosen plaintext attacks, to provide mutual
- # authentication, and to provide some message integrity protection.
- # This isn't a fabulous effort, but it's probably Good Enough.
- s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
- b = s.encode("ascii") + randombytes(8)
- dig = hashlib.sha1(b).hexdigest()
- return dig[:16]
-
- def get_authorization(self, req, chal):
- try:
- realm = chal['realm']
- nonce = chal['nonce']
- qop = chal.get('qop')
- algorithm = chal.get('algorithm', 'MD5')
- # mod_digest doesn't send an opaque, even though it isn't
- # supposed to be optional
- opaque = chal.get('opaque', None)
- except KeyError:
- return None
-
- H, KD = self.get_algorithm_impls(algorithm)
- if H is None:
- return None
-
- user, pw = self.passwd.find_user_password(realm, req.get_full_url())
- if user is None:
- return None
-
- # XXX not implemented yet
- if req.has_data():
- entdig = self.get_entity_digest(req.get_data(), chal)
- else:
- entdig = None
-
- A1 = "%s:%s:%s" % (user, realm, pw)
- A2 = "%s:%s" % (req.get_method(),
- # XXX selector: what about proxies and full urls
- req.get_selector())
- if qop == 'auth':
- self.nonce_count += 1
- ncvalue = '%08x' % self.nonce_count
- cnonce = self.get_cnonce(nonce)
- noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
- respdig = KD(H(A1), noncebit)
- elif qop is None:
- respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
- else:
- # XXX handle auth-int.
- raise URLError("qop '%s' is not supported." % qop)
-
- # XXX should the partial digests be encoded too?
-
- base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
- 'response="%s"' % (user, realm, nonce, req.get_selector(),
- respdig)
- if opaque:
- base += ', opaque="%s"' % opaque
- if entdig:
- base += ', digest="%s"' % entdig
- base += ', algorithm="%s"' % algorithm
- if qop:
- base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
- return base
-
- def get_algorithm_impls(self, algorithm):
- # algorithm should be case-insensitive according to RFC2617
- algorithm = algorithm.upper()
- # lambdas assume digest modules are imported at the top level
- if algorithm == 'MD5':
- H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
- elif algorithm == 'SHA':
- H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
- # XXX MD5-sess
- KD = lambda s, d: H("%s:%s" % (s, d))
- return H, KD
-
- def get_entity_digest(self, data, chal):
- # XXX not implemented yet
- return None
-
-
-class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
- """An authentication protocol defined by RFC 2069
-
- Digest authentication improves on basic authentication because it
- does not transmit passwords in the clear.
- """
-
- auth_header = 'Authorization'
- handler_order = 490 # before Basic auth
-
- def http_error_401(self, req, fp, code, msg, headers):
- host = urlparse.urlparse(req.get_full_url())[1]
- retry = self.http_error_auth_reqed('www-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-
-class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
-
- auth_header = 'Proxy-Authorization'
- handler_order = 490 # before Basic auth
-
- def http_error_407(self, req, fp, code, msg, headers):
- host = req.get_host()
- retry = self.http_error_auth_reqed('proxy-authenticate',
- host, req, headers)
- self.reset_retry_count()
- return retry
-
-class AbstractHTTPHandler(BaseHandler):
-
- def __init__(self, debuglevel=0):
- self._debuglevel = debuglevel
-
- def set_http_debuglevel(self, level):
- self._debuglevel = level
-
- def do_request_(self, request):
- host = request.get_host()
- if not host:
- raise URLError('no host given')
-
- if request.has_data(): # POST
- data = request.get_data()
- if not request.has_header('Content-type'):
- request.add_unredirected_header(
- 'Content-type',
- 'application/x-www-form-urlencoded')
- if not request.has_header('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
-
- scheme, sel = splittype(request.get_selector())
- sel_host, sel_path = splithost(sel)
- if not request.has_header('Host'):
- request.add_unredirected_header('Host', sel_host or host)
- for name, value in self.parent.addheaders:
- name = name.capitalize()
- if not request.has_header(name):
- request.add_unredirected_header(name, value)
-
- return request
-
- def do_open(self, http_class, req):
- """Return an addinfourl object for the request, using http_class.
-
- http_class must implement the HTTPConnection API from http.client.
- The addinfourl return value is a file-like object. It also
- has methods and attributes including:
- - info(): return a email.message.Message object for the headers
- - geturl(): return the original request URL
- - code: HTTP status code
- """
- host = req.get_host()
- if not host:
- raise URLError('no host given')
-
- h = http_class(host, timeout=req.timeout) # will parse host:port
- h.set_debuglevel(self._debuglevel)
-
- headers = dict(req.headers)
- headers.update(req.unredirected_hdrs)
- # We want to make an HTTP/1.1 request, but the addinfourl
- # class isn't prepared to deal with a persistent connection.
- # It will try to read all remaining data from the socket,
- # which will block while the server waits for the next request.
- # So make sure the connection gets closed after the (only)
- # request.
- headers["Connection"] = "close"
- headers = dict(
- (name.title(), val) for name, val in headers.items())
- try:
- h.request(req.get_method(), req.get_selector(), req.data, headers)
- r = h.getresponse()
- except socket.error as err: # XXX what error?
- raise URLError(err)
-
- # Pick apart the HTTPResponse object to get the addinfourl
- # object initialized properly.
-
- # XXX Should an HTTPResponse object really be passed to
- # BufferedReader? If so, we should change http.client to support
- # this use directly.
-
- # Add some fake methods to the reader to satisfy BufferedReader.
- r.readable = lambda: True
- r.writable = r.seekable = lambda: False
- r._checkReadable = lambda: True
- r._checkWritable = lambda: False
- fp = io.BufferedReader(r)
-
- resp = addinfourl(fp, r.msg, req.get_full_url())
- resp.code = r.status
- resp.msg = r.reason
- return resp
-
-
-class HTTPHandler(AbstractHTTPHandler):
-
- def http_open(self, req):
- return self.do_open(http.client.HTTPConnection, req)
-
- http_request = AbstractHTTPHandler.do_request_
-
-if hasattr(http.client, 'HTTPS'):
- class HTTPSHandler(AbstractHTTPHandler):
-
- def https_open(self, req):
- return self.do_open(http.client.HTTPSConnection, req)
-
- https_request = AbstractHTTPHandler.do_request_
-
-class HTTPCookieProcessor(BaseHandler):
- def __init__(self, cookiejar=None):
- import http.cookiejar
- if cookiejar is None:
- cookiejar = http.cookiejar.CookieJar()
- self.cookiejar = cookiejar
-
- def http_request(self, request):
- self.cookiejar.add_cookie_header(request)
- return request
-
- def http_response(self, request, response):
- self.cookiejar.extract_cookies(response, request)
- return response
-
- https_request = http_request
- https_response = http_response
-
-class UnknownHandler(BaseHandler):
- def unknown_open(self, req):
- type = req.get_type()
- raise URLError('unknown url type: %s' % type)
-
-def parse_keqv_list(l):
- """Parse list of key=value strings where keys are not duplicated."""
- parsed = {}
- for elt in l:
- # Because of a trailing comma in the auth string, elt could be the
- # empty string.
- if not elt:
- continue
- k, v = elt.split('=', 1)
- if v[0] == '"' and v[-1] == '"':
- v = v[1:-1]
- parsed[k] = v
- return parsed
-
-def parse_http_list(s):
- """Parse lists as described by RFC 2068 Section 2.
-
- In particular, parse comma-separated lists where the elements of
- the list may include quoted-strings. A quoted-string could
- contain a comma. A non-quoted string could have quotes in the
- middle. Neither commas nor quotes count if they are escaped.
- Only double-quotes count, not single-quotes.
- """
- res = []
- part = ''
-
- escape = quote = False
- for cur in s:
- if escape:
- part += cur
- escape = False
- continue
- if quote:
- if cur == '\\':
- escape = True
- continue
- elif cur == '"':
- quote = False
- part += cur
- continue
-
- if cur == ',':
- res.append(part)
- part = ''
- continue
-
- if cur == '"':
- quote = True
-
- part += cur
-
- # append last part
- if part:
- res.append(part)
-
- return [part.strip() for part in res]
-
-class FileHandler(BaseHandler):
- # Use local file or FTP depending on form of URL
- def file_open(self, req):
- url = req.get_selector()
- if url[:2] == '//' and url[2:3] != '/':
- req.type = 'ftp'
- return self.parent.open(req)
- else:
- return self.open_local_file(req)
-
- # names for the localhost
- names = None
- def get_names(self):
- if FileHandler.names is None:
- try:
- FileHandler.names = (socket.gethostbyname('localhost'),
- socket.gethostbyname(socket.gethostname()))
- except socket.gaierror:
- FileHandler.names = (socket.gethostbyname('localhost'),)
- return FileHandler.names
-
- # not entirely sure what the rules are here
- def open_local_file(self, req):
- import email.utils
- import mimetypes
- host = req.get_host()
- file = req.get_selector()
- localfile = url2pathname(file)
- try:
- stats = os.stat(localfile)
- size = stats.st_size
- modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(file)[0]
- headers = email.message_from_string(
- 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified))
- if host:
- host, port = splitport(host)
- if not host or \
- (not port and _safe_gethostbyname(host) in self.get_names()):
- return addinfourl(open(localfile, 'rb'),
- headers, 'file:'+file)
- except OSError as msg:
- # urllib2 users shouldn't expect OSErrors coming from urlopen()
- raise URLError(msg)
- raise URLError('file not on local host')
-
-def _safe_gethostbyname(host):
- try:
- return socket.gethostbyname(host)
- except socket.gaierror:
- return None
-
-class FTPHandler(BaseHandler):
- def ftp_open(self, req):
- import ftplib
- import mimetypes
- host = req.get_host()
- if not host:
- raise URLError('ftp error: no host given')
- host, port = splitport(host)
- if port is None:
- port = ftplib.FTP_PORT
- else:
- port = int(port)
-
- # username/password handling
- user, host = splituser(host)
- if user:
- user, passwd = splitpasswd(user)
- else:
- passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
-
- try:
- host = socket.gethostbyname(host)
- except socket.error as msg:
- raise URLError(msg)
- path, attrs = splitattr(req.get_selector())
- dirs = path.split('/')
- dirs = list(map(unquote, dirs))
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]:
- dirs = dirs[1:]
- try:
- fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
- type = file and 'I' or 'D'
- for attr in attrs:
- attr, value = splitvalue(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
- fp, retrlen = fw.retrfile(file, type)
- headers = ""
- mtype = mimetypes.guess_type(req.get_full_url())[0]
- if mtype:
- headers += "Content-type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-length: %d\n" % retrlen
- headers = email.message_from_string(headers)
- sf = StringIO(str(headers))
- return addinfourl(fp, headers, req.get_full_url())
- except ftplib.all_errors as msg:
- raise URLError('ftp error: %s' % msg).with_traceback(sys.exc_info()[2])
-
- def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
- return fw
-
-class CacheFTPHandler(FTPHandler):
- # XXX would be nice to have pluggable cache strategies
- # XXX this stuff is definitely not thread safe
- def __init__(self):
- self.cache = {}
- self.timeout = {}
- self.soonest = 0
- self.delay = 60
- self.max_conns = 16
-
- def setTimeout(self, t):
- self.delay = t
-
- def setMaxConns(self, m):
- self.max_conns = m
-
- def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- key = user, host, port, '/'.join(dirs), timeout
- if key in self.cache:
- self.timeout[key] = time.time() + self.delay
- else:
- self.cache[key] = ftpwrapper(user, passwd, host, port, dirs, timeout)
- self.timeout[key] = time.time() + self.delay
- self.check_cache()
- return self.cache[key]
-
- def check_cache(self):
- # first check for old ones
- t = time.time()
- if self.soonest <= t:
- for k, v in list(self.timeout.items()):
- if v < t:
- self.cache[k].close()
- del self.cache[k]
- del self.timeout[k]
- self.soonest = min(list(self.timeout.values()))
-
- # then check the size
- if len(self.cache) == self.max_conns:
- for k, v in list(self.timeout.items()):
- if v == self.soonest:
- del self.cache[k]
- del self.timeout[k]
- break
- self.soonest = min(list(self.timeout.values()))
"""
from http.server import BaseHTTPRequestHandler, HTTPServer
-import urllib, sys
+import sys
+import urllib.parse
from wsgiref.handlers import SimpleHandler
__version__ = "0.1"
else:
path,query = self.path,''
- env['PATH_INFO'] = urllib.unquote(path)
+ env['PATH_INFO'] = urllib.parse.unquote(path)
env['QUERY_STRING'] = query
host = self.address_string()
def application_uri(environ):
"""Return the application's base URI (no PATH_INFO or QUERY_STRING)"""
url = environ['wsgi.url_scheme']+'://'
- from urllib import quote
+ from urllib.parse import quote
if environ.get('HTTP_HOST'):
url += environ['HTTP_HOST']
def request_uri(environ, include_query=1):
"""Return the full request URI, optionally including the query string"""
url = application_uri(environ)
- from urllib import quote
+ from urllib.parse import quote
path_info = quote(environ.get('PATH_INFO',''))
if not environ.get('SCRIPT_NAME'):
url += path_info[1:]
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
- import urllib2
- fp = urllib2.urlopen(input.systemId)
+ import urllib.request
+ fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
def parseWithContext(self, input, cnode, action):
source.encoding = self._guess_media_encoding(source)
# determine the base URI is we can
- import posixpath, urlparse
- parts = urlparse.urlparse(systemId)
+ import posixpath, urllib.parse
+ parts = urllib.parse.urlparse(systemId)
scheme, netloc, path, params, query, fragment = parts
# XXX should we check the scheme here as well?
if path and not path.endswith("/"):
path = posixpath.dirname(path) + "/"
parts = scheme, netloc, path, params, query, fragment
- source.baseURI = urlparse.urlunparse(parts)
+ source.baseURI = urllib.parse.urlunparse(parts)
return source
return self._opener
def _create_opener(self):
- import urllib2
- return urllib2.build_opener()
+ import urllib.request
+ return urllib.request.build_opener()
def _guess_media_encoding(self, source):
info = source.byteStream.info()
convenience of application and driver writers.
"""
-import os, urlparse, urllib
+import os, urllib.parse, urllib.request
from . import handler
from . import xmlreader
source.setSystemId(sysidfilename)
f = open(sysidfilename, "rb")
else:
- source.setSystemId(urlparse.urljoin(base, sysid))
- f = urllib.urlopen(source.getSystemId())
+ source.setSystemId(urllib.parse.urljoin(base, sysid))
+ f = urllib.request.urlopen(source.getSystemId())
source.setByteStream(f)
if isinstance(host, tuple):
host, x509 = host
- import urllib
- auth, host = urllib.splituser(host)
+ import urllib.parse
+ auth, host = urllib.parse.splituser(host)
if auth:
import base64
- auth = base64.encodestring(urllib.unquote(auth))
+ auth = base64.encodestring(urllib.parse.unquote(auth))
auth = "".join(auth.split()) # get rid of whitespace
extra_headers = [
("Authorization", "Basic " + auth)
# establish a "logical" server connection
# get the url
- import urllib
- type, uri = urllib.splittype(uri)
+ import urllib.parse
+ type, uri = urllib.parse.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported XML-RPC protocol")
- self.__host, self.__handler = urllib.splithost(uri)
+ self.__host, self.__handler = urllib.parse.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
email email/mime email/test email/test/data \
html json json/tests http dbm xmlrpc \
sqlite3 sqlite3/test \
- logging bsddb bsddb/test csv wsgiref \
+ logging bsddb bsddb/test csv wsgiref urllib \
lib2to3 lib2to3/fixes lib2to3/pgen2 lib2to3/tests \
ctypes ctypes/test ctypes/macholib idlelib idlelib/Icons \
distutils distutils/command distutils/tests $(XMLLIBSUBDIRS) \
Library
-------
+- a new ``urllib`` package was created. It consists of code from
+ ``urllib``, ``urllib2``, ``urlparse``, and ``robotparser``. The old
+ modules have all been removed. The new package has five submodules:
+ ``urllib.parse``, ``urllib.request``, ``urllib.response``,
+ ``urllib.error``, and ``urllib.robotparser``. The
+ ``urllib.request.urlopen()`` function uses the url opener from
+ ``urllib2``. (Note that the unittests have not been renamed for the
+ beta, but they will be renamed in the future.)
+
- rfc822 has been removed in favor of the email package.
- mimetools has been removed in favor of the email package.