From 9fc720e5e4f772598013ea48a3f0d22b2b6b04fa Mon Sep 17 00:00:00 2001 From: "David K. Hess" Date: Mon, 24 Jun 2019 18:46:59 -0500 Subject: [PATCH] bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062) --- Doc/library/mimetypes.rst | 4 + Lib/mimetypes.py | 251 +++++++++--------- Lib/test/test_mimetypes.py | 51 ++++ .../2017-08-15-11-24-41.bpo-4963.LRYres.rst | 2 + 4 files changed, 188 insertions(+), 120 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst index 5728407cb3..f610032acb 100644 --- a/Doc/library/mimetypes.rst +++ b/Doc/library/mimetypes.rst @@ -93,6 +93,10 @@ behavior of the module. Specifying an empty list for *files* will prevent the system defaults from being applied: only the well-known values will be present from a built-in list. + If *files* is ``None`` the internal data structure is completely rebuilt to its + initial default value. This is a stable operation and will produce the same results + when called multiple times. + .. versionchanged:: 3.2 Previously, Windows registry settings were ignored. diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 8861b75362..01a16fdf9a 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -66,13 +66,13 @@ class MimeTypes: def __init__(self, filenames=(), strict=True): if not inited: init() - self.encodings_map = encodings_map.copy() - self.suffix_map = suffix_map.copy() + self.encodings_map = _encodings_map_default.copy() + self.suffix_map = _suffix_map_default.copy() self.types_map = ({}, {}) # dict for (non-strict, strict) self.types_map_inv = ({}, {}) - for (ext, type) in types_map.items(): + for (ext, type) in _types_map_default.items(): self.add_type(type, ext, True) - for (ext, type) in common_types.items(): + for (ext, type) in _common_types_default.items(): self.add_type(type, ext, False) for name in filenames: self.read(name, strict) @@ -346,11 +346,19 @@ def init(files=None): global suffix_map, types_map, encodings_map, common_types global inited, _db inited = True # so that MimeTypes.__init__() doesn't call us again - db = MimeTypes() - if files is None: + + if files is None or _db is None: + db = MimeTypes() if _winreg: db.read_windows_registry() - files = knownfiles + + if files is None: + files = knownfiles + else: + files = knownfiles + list(files) + else: + db = _db + for file in files: if os.path.isfile(file): db.read(file) @@ -374,12 +382,12 @@ def read_mime_types(file): def _default_mime_types(): - global suffix_map - global encodings_map - global types_map - global common_types + global suffix_map, _suffix_map_default + global encodings_map, _encodings_map_default + global types_map, _types_map_default + global common_types, _common_types_default - suffix_map = { + suffix_map = _suffix_map_default = { '.svgz': '.svg.gz', '.tgz': '.tar.gz', '.taz': '.tar.gz', @@ -388,7 +396,7 @@ def _default_mime_types(): '.txz': '.tar.xz', } - encodings_map = { + encodings_map = _encodings_map_default = { '.gz': 'gzip', '.Z': 'compress', '.bz2': 'bzip2', @@ -399,152 +407,155 @@ def _default_mime_types(): # at http://www.iana.org/assignments/media-types # or extensions, i.e. using the x- prefix - # If you add to these, please keep them sorted! - types_map = { + # If you add to these, please keep them sorted by mime type. + # Make sure the entry with the preferred file extension for a particular mime type + # appears before any others of the same mimetype. + types_map = _types_map_default = { + '.js' : 'application/javascript', + '.mjs' : 'application/javascript', + '.json' : 'application/json', + '.doc' : 'application/msword', + '.dot' : 'application/msword', + '.wiz' : 'application/msword', + '.bin' : 'application/octet-stream', '.a' : 'application/octet-stream', + '.dll' : 'application/octet-stream', + '.exe' : 'application/octet-stream', + '.o' : 'application/octet-stream', + '.obj' : 'application/octet-stream', + '.so' : 'application/octet-stream', + '.oda' : 'application/oda', + '.pdf' : 'application/pdf', + '.p7c' : 'application/pkcs7-mime', + '.ps' : 'application/postscript', '.ai' : 'application/postscript', - '.aif' : 'audio/x-aiff', - '.aifc' : 'audio/x-aiff', - '.aiff' : 'audio/x-aiff', - '.au' : 'audio/basic', - '.avi' : 'video/x-msvideo', - '.bat' : 'text/plain', + '.eps' : 'application/postscript', + '.m3u' : 'application/vnd.apple.mpegurl', + '.m3u8' : 'application/vnd.apple.mpegurl', + '.xls' : 'application/vnd.ms-excel', + '.xlb' : 'application/vnd.ms-excel', + '.ppt' : 'application/vnd.ms-powerpoint', + '.pot' : 'application/vnd.ms-powerpoint', + '.ppa' : 'application/vnd.ms-powerpoint', + '.pps' : 'application/vnd.ms-powerpoint', + '.pwz' : 'application/vnd.ms-powerpoint', + '.wasm' : 'application/wasm', '.bcpio' : 'application/x-bcpio', - '.bin' : 'application/octet-stream', - '.bmp' : 'image/bmp', - '.c' : 'text/plain', - '.cdf' : 'application/x-netcdf', '.cpio' : 'application/x-cpio', '.csh' : 'application/x-csh', - '.css' : 'text/css', - '.csv' : 'text/csv', - '.dll' : 'application/octet-stream', - '.doc' : 'application/msword', - '.dot' : 'application/msword', '.dvi' : 'application/x-dvi', - '.eml' : 'message/rfc822', - '.eps' : 'application/postscript', - '.etx' : 'text/x-setext', - '.exe' : 'application/octet-stream', - '.gif' : 'image/gif', '.gtar' : 'application/x-gtar', - '.h' : 'text/plain', '.hdf' : 'application/x-hdf', - '.htm' : 'text/html', - '.html' : 'text/html', - '.ico' : 'image/vnd.microsoft.icon', - '.ief' : 'image/ief', - '.jpe' : 'image/jpeg', - '.jpeg' : 'image/jpeg', - '.jpg' : 'image/jpeg', - '.js' : 'application/javascript', - '.json' : 'application/json', - '.ksh' : 'text/plain', '.latex' : 'application/x-latex', - '.m1v' : 'video/mpeg', - '.m3u' : 'application/vnd.apple.mpegurl', - '.m3u8' : 'application/vnd.apple.mpegurl', - '.man' : 'application/x-troff-man', - '.me' : 'application/x-troff-me', - '.mht' : 'message/rfc822', - '.mhtml' : 'message/rfc822', '.mif' : 'application/x-mif', - '.mjs' : 'application/javascript', - '.mov' : 'video/quicktime', - '.movie' : 'video/x-sgi-movie', - '.mp2' : 'audio/mpeg', - '.mp3' : 'audio/mpeg', - '.mp4' : 'video/mp4', - '.mpa' : 'video/mpeg', - '.mpe' : 'video/mpeg', - '.mpeg' : 'video/mpeg', - '.mpg' : 'video/mpeg', - '.ms' : 'application/x-troff-ms', + '.cdf' : 'application/x-netcdf', '.nc' : 'application/x-netcdf', - '.nws' : 'message/rfc822', - '.o' : 'application/octet-stream', - '.obj' : 'application/octet-stream', - '.oda' : 'application/oda', '.p12' : 'application/x-pkcs12', - '.p7c' : 'application/pkcs7-mime', - '.pbm' : 'image/x-portable-bitmap', - '.pdf' : 'application/pdf', '.pfx' : 'application/x-pkcs12', - '.pgm' : 'image/x-portable-graymap', - '.pl' : 'text/plain', - '.png' : 'image/png', - '.pnm' : 'image/x-portable-anymap', - '.pot' : 'application/vnd.ms-powerpoint', - '.ppa' : 'application/vnd.ms-powerpoint', - '.ppm' : 'image/x-portable-pixmap', - '.pps' : 'application/vnd.ms-powerpoint', - '.ppt' : 'application/vnd.ms-powerpoint', - '.ps' : 'application/postscript', - '.pwz' : 'application/vnd.ms-powerpoint', - '.py' : 'text/x-python', + '.ram' : 'application/x-pn-realaudio', '.pyc' : 'application/x-python-code', '.pyo' : 'application/x-python-code', - '.qt' : 'video/quicktime', - '.ra' : 'audio/x-pn-realaudio', - '.ram' : 'application/x-pn-realaudio', - '.ras' : 'image/x-cmu-raster', - '.rdf' : 'application/xml', - '.rgb' : 'image/x-rgb', - '.roff' : 'application/x-troff', - '.rtx' : 'text/richtext', - '.sgm' : 'text/x-sgml', - '.sgml' : 'text/x-sgml', '.sh' : 'application/x-sh', '.shar' : 'application/x-shar', - '.snd' : 'audio/basic', - '.so' : 'application/octet-stream', - '.src' : 'application/x-wais-source', + '.swf' : 'application/x-shockwave-flash', '.sv4cpio': 'application/x-sv4cpio', '.sv4crc' : 'application/x-sv4crc', - '.svg' : 'image/svg+xml', - '.swf' : 'application/x-shockwave-flash', - '.t' : 'application/x-troff', '.tar' : 'application/x-tar', '.tcl' : 'application/x-tcl', '.tex' : 'application/x-tex', '.texi' : 'application/x-texinfo', '.texinfo': 'application/x-texinfo', - '.tif' : 'image/tiff', - '.tiff' : 'image/tiff', + '.roff' : 'application/x-troff', + '.t' : 'application/x-troff', '.tr' : 'application/x-troff', - '.tsv' : 'text/tab-separated-values', - '.txt' : 'text/plain', + '.man' : 'application/x-troff-man', + '.me' : 'application/x-troff-me', + '.ms' : 'application/x-troff-ms', '.ustar' : 'application/x-ustar', - '.vcf' : 'text/x-vcard', - '.wasm' : 'application/wasm', - '.wav' : 'audio/x-wav', - '.webm' : 'video/webm', - '.wiz' : 'application/msword', + '.src' : 'application/x-wais-source', + '.xsl' : 'application/xml', + '.rdf' : 'application/xml', '.wsdl' : 'application/xml', - '.xbm' : 'image/x-xbitmap', - '.xlb' : 'application/vnd.ms-excel', - '.xls' : 'application/vnd.ms-excel', - '.xml' : 'text/xml', '.xpdl' : 'application/xml', + '.zip' : 'application/zip', + '.au' : 'audio/basic', + '.snd' : 'audio/basic', + '.mp3' : 'audio/mpeg', + '.mp2' : 'audio/mpeg', + '.aif' : 'audio/x-aiff', + '.aifc' : 'audio/x-aiff', + '.aiff' : 'audio/x-aiff', + '.ra' : 'audio/x-pn-realaudio', + '.wav' : 'audio/x-wav', + '.bmp' : 'image/bmp', + '.gif' : 'image/gif', + '.ief' : 'image/ief', + '.jpg' : 'image/jpeg', + '.jpe' : 'image/jpeg', + '.jpeg' : 'image/jpeg', + '.png' : 'image/png', + '.svg' : 'image/svg+xml', + '.tiff' : 'image/tiff', + '.tif' : 'image/tiff', + '.ico' : 'image/vnd.microsoft.icon', + '.ras' : 'image/x-cmu-raster', + '.bmp' : 'image/x-ms-bmp', + '.pnm' : 'image/x-portable-anymap', + '.pbm' : 'image/x-portable-bitmap', + '.pgm' : 'image/x-portable-graymap', + '.ppm' : 'image/x-portable-pixmap', + '.rgb' : 'image/x-rgb', + '.xbm' : 'image/x-xbitmap', '.xpm' : 'image/x-xpixmap', - '.xsl' : 'application/xml', '.xwd' : 'image/x-xwindowdump', - '.zip' : 'application/zip', + '.eml' : 'message/rfc822', + '.mht' : 'message/rfc822', + '.mhtml' : 'message/rfc822', + '.nws' : 'message/rfc822', + '.css' : 'text/css', + '.csv' : 'text/csv', + '.html' : 'text/html', + '.htm' : 'text/html', + '.txt' : 'text/plain', + '.bat' : 'text/plain', + '.c' : 'text/plain', + '.h' : 'text/plain', + '.ksh' : 'text/plain', + '.pl' : 'text/plain', + '.rtx' : 'text/richtext', + '.tsv' : 'text/tab-separated-values', + '.py' : 'text/x-python', + '.etx' : 'text/x-setext', + '.sgm' : 'text/x-sgml', + '.sgml' : 'text/x-sgml', + '.vcf' : 'text/x-vcard', + '.xml' : 'text/xml', + '.mp4' : 'video/mp4', + '.mpeg' : 'video/mpeg', + '.m1v' : 'video/mpeg', + '.mpa' : 'video/mpeg', + '.mpe' : 'video/mpeg', + '.mpg' : 'video/mpeg', + '.mov' : 'video/quicktime', + '.qt' : 'video/quicktime', + '.webm' : 'video/webm', + '.avi' : 'video/x-msvideo', + '.movie' : 'video/x-sgi-movie', } # These are non-standard types, commonly found in the wild. They will # only match if strict=0 flag is given to the API methods. # Please sort these too - common_types = { - '.jpg' : 'image/jpg', - '.mid' : 'audio/midi', + common_types = _common_types_default = { + '.rtf' : 'application/rtf', '.midi': 'audio/midi', + '.mid' : 'audio/midi', + '.jpg' : 'image/jpg', + '.pict': 'image/pict', '.pct' : 'image/pict', '.pic' : 'image/pict', - '.pict': 'image/pict', - '.rtf' : 'application/rtf', - '.xul' : 'text/xul' + '.xul' : 'text/xul', } diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index c4b2fe2047..bfd5eeedaa 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -79,6 +79,57 @@ class MimeTypesTestCase(unittest.TestCase): strict=True) self.assertEqual(exts, ['.g3', '.g\xb3']) + def test_init_reinitializes(self): + # Issue 4936: make sure an init starts clean + # First, put some poison into the types table + mimetypes.add_type('foo/bar', '.foobar') + self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar') + # Reinitialize + mimetypes.init() + # Poison should be gone. + self.assertEqual(mimetypes.guess_extension('foo/bar'), None) + + def test_preferred_extension(self): + def check_extensions(): + self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin') + self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps') + self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u') + self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls') + self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt') + self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi') + self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff') + self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl') + self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3') + self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg') + self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff') + self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml') + self.assertEqual(mimetypes.guess_extension('text/html'), '.html') + self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt') + self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg') + self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov') + + check_extensions() + mimetypes.init() + check_extensions() + + def test_init_stability(self): + mimetypes.init() + + suffix_map = mimetypes.suffix_map + encodings_map = mimetypes.encodings_map + types_map = mimetypes.types_map + common_types = mimetypes.common_types + + mimetypes.init() + self.assertIsNot(suffix_map, mimetypes.suffix_map) + self.assertIsNot(encodings_map, mimetypes.encodings_map) + self.assertIsNot(types_map, mimetypes.types_map) + self.assertIsNot(common_types, mimetypes.common_types) + self.assertEqual(suffix_map, mimetypes.suffix_map) + self.assertEqual(encodings_map, mimetypes.encodings_map) + self.assertEqual(types_map, mimetypes.types_map) + self.assertEqual(common_types, mimetypes.common_types) + def test_path_like_ob(self): filename = "LICENSE.txt" filepath = pathlib.Path(filename) diff --git a/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst new file mode 100644 index 0000000000..3b060052fd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst @@ -0,0 +1,2 @@ +Fixed non-deterministic behavior related to mimetypes extension mapping and +module reinitialization. -- 2.40.0