From 3e902b9994fe4eefd3890f2c392862de0e31ffe3 Mon Sep 17 00:00:00 2001 From: Rocco Rutte Date: Sat, 11 Jul 2009 14:52:11 +0200 Subject: [PATCH] Recognize charset extensions, see #3150. With utf-8//TRANSLIT, we internally didn't recognize it as utf-8. This leads to badly broken behaviour if --without-wc-funcs is used for some reason. In that case, if we have utf-8 as charset, we implement our own wide char functions; for all other charsets, we use the system single-byte locale functions. And using these with utf-8 is broken. --- charset.c | 53 ++++++++++++++++++++++++++++++++++++++--------------- mbyte.c | 2 +- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/charset.c b/charset.c index b07f494a5..6d33970aa 100644 --- a/charset.c +++ b/charset.c @@ -239,36 +239,42 @@ void mutt_set_langinfo_charset (void) #endif +/* this first ties off any charset extension such as //TRANSLIT, + canonicalizes the charset and re-adds the extension */ void mutt_canonical_charset (char *dest, size_t dlen, const char *name) { size_t i; - char *p; - char scratch[LONG_STRING]; + char *p, *ext; + char in[LONG_STRING], scratch[LONG_STRING]; + + strfcpy (in, name, sizeof (in)); + if ((ext = strchr (in, '/'))) + *ext++ = 0; - if (!ascii_strcasecmp (name, "utf-8") || !ascii_strcasecmp (name, "utf8")) + if (!ascii_strcasecmp (in, "utf-8") || !ascii_strcasecmp (in, "utf8")) { strfcpy (dest, "utf-8", dlen); - return; + goto out; } /* catch some common iso-8859-something misspellings */ - if (!ascii_strncasecmp (name, "8859", 4) && name[4] != '-') - snprintf (scratch, sizeof (scratch), "iso-8859-%s", name +4); - else if (!ascii_strncasecmp (name, "8859-", 5)) - snprintf (scratch, sizeof (scratch), "iso-8859-%s", name + 5); - else if (!ascii_strncasecmp (name, "iso8859", 7) && name[7] != '-') - snprintf (scratch, sizeof (scratch), "iso_8859-%s", name + 7); - else if (!ascii_strncasecmp (name, "iso8859-", 8)) - snprintf (scratch, sizeof (scratch), "iso_8859-%s", name + 8); + if (!ascii_strncasecmp (in, "8859", 4) && in[4] != '-') + snprintf (scratch, sizeof (scratch), "iso-8859-%s", in +4); + else if (!ascii_strncasecmp (in, "8859-", 5)) + snprintf (scratch, sizeof (scratch), "iso-8859-%s", in + 5); + else if (!ascii_strncasecmp (in, "iso8859", 7) && in[7] != '-') + snprintf (scratch, sizeof (scratch), "iso_8859-%s", in + 7); + else if (!ascii_strncasecmp (in, "iso8859-", 8)) + snprintf (scratch, sizeof (scratch), "iso_8859-%s", in + 8); else - strfcpy (scratch, NONULL(name), sizeof (scratch)); + strfcpy (scratch, NONULL(in), sizeof (scratch)); for (i = 0; PreferredMIMENames[i].key; i++) if (!ascii_strcasecmp (scratch, PreferredMIMENames[i].key) || !mutt_strcasecmp (scratch, PreferredMIMENames[i].key)) { strfcpy (dest, PreferredMIMENames[i].pref, dlen); - return; + goto out; } strfcpy (dest, scratch, dlen); @@ -276,16 +282,33 @@ void mutt_canonical_charset (char *dest, size_t dlen, const char *name) /* for cosmetics' sake, transform to lowercase. */ for (p = dest; *p; p++) *p = ascii_tolower (*p); + +out: + if (ext && *ext) + { + safe_strcat (dest, dlen, "/"); + safe_strcat (dest, dlen, ext); + } } int mutt_chscmp (const char *s, const char *chs) { char buffer[STRING]; + int a, b; if (!s) return 0; + /* charsets may have extensions mutt_canonical_charset() + leaves intact; we expect `chs' to originate from mutt + code, not user input (i.e. `chs' does _not_ have any + extension) + we simply check if the shorter string is a prefix for + the longer */ mutt_canonical_charset (buffer, sizeof (buffer), s); - return !ascii_strcasecmp (buffer, chs); + a = mutt_strlen (buffer); + b = mutt_strlen (chs); + return !ascii_strncasecmp (a > b ? buffer : chs, + a > b ? chs : buffer, MIN(a,b)); } char *mutt_get_default_charset () diff --git a/mbyte.c b/mbyte.c index d89794afd..6e4502464 100644 --- a/mbyte.c +++ b/mbyte.c @@ -64,7 +64,7 @@ void mutt_set_charset (char *charset) } #endif - if (!strcmp(buffer, "utf-8")) + if (mutt_is_utf8 (buffer)) Charset_is_utf8 = 1; #ifndef HAVE_WC_FUNCS else if (!ascii_strcasecmp(buffer, "euc-jp") || !ascii_strcasecmp(buffer, "shift_jis") -- 2.40.0