]> granicus.if.org Git - python/commitdiff
bpo-18378: Recognize "UTF-8" as a valid name in locale._parse_localename (GH-14736)
authorRonald Oussoren <ronaldoussoren@mac.com>
Thu, 29 Aug 2019 04:33:52 +0000 (06:33 +0200)
committerNed Deily <nad@python.org>
Thu, 29 Aug 2019 04:33:52 +0000 (00:33 -0400)
Lib/locale.py
Lib/test/test_locale.py
Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst [new file with mode: 0644]

index f3d3973d038c5188973e9a7cb7c9e5b295a6367f..dd8a08524a018e8ff34f10a8d96b6cbbc171b063 100644 (file)
@@ -492,6 +492,10 @@ def _parse_localename(localename):
         return tuple(code.split('.')[:2])
     elif code == 'C':
         return None, None
+    elif code == 'UTF-8':
+        # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
+        # for getting UTF-8 handling for text.
+        return None, 'UTF-8'
     raise ValueError('unknown locale: %s' % localename)
 
 def _build_localename(localetuple):
index 792a15c50f9219f7c5ee10fe70d8276d4c043096..c5d8e269d631834d67e54189269ee2d1c2e7af2e 100644 (file)
@@ -493,6 +493,42 @@ class NormalizeTest(unittest.TestCase):
 
 
 class TestMiscellaneous(unittest.TestCase):
+    def test_defaults_UTF8(self):
+        # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is
+        # valid. Futhermore LC_CTYPE=UTF is used by the UTF-8 locale coercing
+        # during interpreter startup (on macOS).
+        import _locale
+        import os
+
+        self.assertEqual(locale._parse_localename('UTF-8'), (None, 'UTF-8'))
+
+        if hasattr(_locale, '_getdefaultlocale'):
+            orig_getlocale = _locale._getdefaultlocale
+            del _locale._getdefaultlocale
+        else:
+            orig_getlocale = None
+
+        orig_env = {}
+        try:
+            for key in ('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE'):
+                if key in os.environ:
+                    orig_env[key] = os.environ[key]
+                    del os.environ[key]
+
+            os.environ['LC_CTYPE'] = 'UTF-8'
+
+            self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8'))
+
+        finally:
+            for k in orig_env:
+                os.environ[k] = orig_env[k]
+
+            if 'LC_CTYPE' not in orig_env:
+                del os.environ['LC_CTYPE']
+
+            if orig_getlocale is not None:
+                _locale._getdefaultlocale = orig_getlocale
+
     def test_getpreferredencoding(self):
         # Invoke getpreferredencoding to make sure it does not cause exceptions.
         enc = locale.getpreferredencoding()
diff --git a/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst
new file mode 100644 (file)
index 0000000..6dda8ab
--- /dev/null
@@ -0,0 +1 @@
+Recognize "UTF-8" as a valid value for LC_CTYPE in locale._parse_localename.