Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)
diff --git a/Lib/http/server.py b/Lib/http/server.py

index 2a9502870580f3247064ff55864b505b862ad73f..f916fdd95cbb71011be1b4f7b717295b5649821b 100644 (file)
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -747,7 +747,12 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
              return None
          list.sort(key=lambda a: a.lower())
          r = []
-        displaypath = html.escape(urllib.parse.unquote(self.path))
+        try:
+            displaypath = urllib.parse.unquote(self.path,
+                                               errors='surrogatepass')
+        except UnicodeDecodeError:
+            displaypath = urllib.parse.unquote(path)
+        displaypath = html.escape(displaypath)
          enc = sys.getfilesystemencoding()
          title = 'Directory listing for %s' % displaypath
          r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
@@ -769,9 +774,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
                  displayname = name + "@"
                  # Note: a link to a directory displays with @ and links with /
              r.append('<li><a href="%s">%s</a></li>'
-                    % (urllib.parse.quote(linkname), html.escape(displayname)))
+                    % (urllib.parse.quote(linkname,
+                                          errors='surrogatepass'),
+                       html.escape(displayname)))
          r.append('</ul>\n<hr>\n</body>\n</html>\n')
-        encoded = '\n'.join(r).encode(enc)
+        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
          f = io.BytesIO()
          f.write(encoded)
          f.seek(0)
@@ -794,7 +801,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
          path = path.split('#',1)[0]
          # Don't forget explicit trailing slash when normalizing. Issue17324
          trailing_slash = path.rstrip().endswith('/')
-        path = posixpath.normpath(urllib.parse.unquote(path))
+        try:
+            path = urllib.parse.unquote(path, errors='surrogatepass')
+        except UnicodeDecodeError:
+            path = urllib.parse.unquote(path)
+        path = posixpath.normpath(path)
          words = path.split('/')
          words = filter(None, words)
          path = os.getcwd()
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py

index 0f4b9ba7b785b5af013e51bc7880e055af4b419a..8c22651949f12ca2ae5e6f36f1a2380d248af637 100644 (file)
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -14,6 +14,7 @@ import re
  import base64
  import shutil
  import urllib.parse
+import html
  import http.client
  import tempfile
  from io import BytesIO
@@ -266,6 +267,24 @@ class SimpleHTTPServerTestCase(BaseTestCase):
          self.assertIsNotNone(response.reason)
          if data:
              self.assertEqual(data, body)
+        return body
+
+    @unittest.skipUnless(support.TESTFN_UNDECODABLE,
+                         'need support.TESTFN_UNDECODABLE')
+    def test_undecodable_filename(self):
+        filename = os.fsdecode(support.TESTFN_UNDECODABLE) + '.txt'
+        with open(os.path.join(self.tempdir, filename), 'wb') as f:
+            f.write(support.TESTFN_UNDECODABLE)
+        response = self.request(self.tempdir_name + '/')
+        body = self.check_status_and_reason(response, 200)
+        quotedname = urllib.parse.quote(filename, errors='surrogatepass')
+        self.assertIn(('href="%s"' % quotedname)
+                      .encode('utf-8', 'surrogateescape'), body)
+        self.assertIn(('>%s<' % html.escape(filename))
+                      .encode('utf-8', 'surrogateescape'), body)
+        response = self.request(self.tempdir_name + '/' + quotedname)
+        self.check_status_and_reason(response, 200,
+                                     data=support.TESTFN_UNDECODABLE)
  
      def test_get(self):
          #constructs the path relative to the root directory of the HTTPServer
diff --git a/Misc/NEWS b/Misc/NEWS

index 18e5a6d1054adf0f8a1708709ea985c902adec61..71304b3fcc9799440b846b95bfe09e8abeae66d6 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -27,6 +27,8 @@ Core and Builtins
  Library
  -------
  
+- Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.
+
  - Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems()
    in the mailbox module.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 17 Aug 2014 05:22:11 +0000 (08:22 +0300)
Lib/http/server.py		patch \| blob \| history
Lib/test/test_httpservers.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history