self.assertEqual(splithost('/foo/bar/baz.html'),
(None, '/foo/bar/baz.html'))
+ # bpo-30500: # starts a fragment.
+ self.assertEqual(splithost('//127.0.0.1#@host.com'),
+ ('127.0.0.1', '/#@host.com'))
+ self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
+ ('127.0.0.1', '/#@host.com:80'))
+ self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
+ ('127.0.0.1:80', '/#@host.com'))
+
+ # Empty host is returned as empty string.
+ self.assertEqual(splithost("///file"),
+ ('', '/file'))
+
+ # Trailing semicolon, question mark and hash symbol are kept.
+ self.assertEqual(splithost("//example.net/file;"),
+ ('example.net', '/file;'))
+ self.assertEqual(splithost("//example.net/file?"),
+ ('example.net', '/file?'))
+ self.assertEqual(splithost("//example.net/file#"),
+ ('example.net', '/file#'))
+
def test_splituser(self):
splituser = urllib.splituser
self.assertEqual(splituser('User:Pass@www.python.org:080'),
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- import re
- _hostprog = re.compile('^//([^/?]*)(.*)$')
+ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match:
Library
-------
+- [Security] bpo-30500: Fix urllib.splithost() to correctly parse
+ fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
+ correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``
+ as the host in an authentification (``login@host``).
+
- [Security] bpo-29591: Update expat copy from 2.1.1 to 2.2.0 to get fixes
of CVE-2016-0718 and CVE-2016-4472. See
https://sourceforge.net/p/expat/bugs/537/ for more information.