From: Barry Warsaw Date: Sat, 9 Oct 2004 21:08:30 +0000 (+0000) Subject: __init__(): Coerce the input_charset to unicode (with ascii encoding) before X-Git-Tag: v2.4b1~65 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ea7c7af10b72ec4f3c5ad2bb6beb1d3667ff978e;p=python __init__(): Coerce the input_charset to unicode (with ascii encoding) before calling .lower() on it. This fixes the problem described in SF patch # 866982 where in the tr_TR.ISO-8859-9 locale, 'I'.lower() isn't 'i'. unicodes are locale insensitive. --- diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py index 6a3e3ca10f..df860c5e5b 100644 --- a/Lib/email/Charset.py +++ b/Lib/email/Charset.py @@ -185,8 +185,9 @@ class Charset: this attribute will have the same value as the input_codec. """ def __init__(self, input_charset=DEFAULT_CHARSET): - # RFC 2046, $4.1.2 says charsets are not case sensitive - input_charset = input_charset.lower() + # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to + # unicode because its .lower() is locale insensitive. + input_charset = unicode(input_charset, 'ascii').lower() # Set the input charset after filtering through the aliases self.input_charset = ALIASES.get(input_charset, input_charset) # We can try to guess which encoding and conversion to use by the