From b0f4ac704664b45d050fa8ec20faebc9663a2f9c Mon Sep 17 00:00:00 2001
From: TAKIZAWA Takashi <taki@luna.email.ne.jp>
Date: Tue, 27 Feb 2007 17:44:08 +0000
Subject: [PATCH] Updated $assumed_charset patch (closes: #2218). Thanks to
 TAKAHASHI Tamotsu for the fixes and for handling patch conflicts.

---
 UPDATING  |   2 +-
 charset.c |  96 +++++++--------------------------------------------
 charset.h |   3 +-
 globals.h |   2 +-
 handler.c |  22 +++++-------
 init.h    |  50 +++++++++++++--------------
 mutt.h    |   4 +--
 parse.c   |   8 ++---
 rfc2047.c | 101 +++++++++++++++++++++++++++++++++++-------------------
 rfc2047.h |   1 +
 rfc2231.c |   7 ++--
 sendlib.c |  10 +++---
 12 files changed, 127 insertions(+), 179 deletions(-)

diff --git a/UPDATING b/UPDATING
index 44a20281..fa20cff2 100644
--- a/UPDATING
+++ b/UPDATING
@@ -4,8 +4,8 @@ mutt. Please read this file carefully when upgrading your installation.
 The keys used are:
   !: modified feature, -: deleted feature, +: new feature
 
+  + $assumed_charset, $attach_charset, $ignore_linear_white_space
   + $save_history, $history_file (save history across sessions)
-  + $assumed_charset, $file_charset, $strict_mime
   + $smtp_url (ESMTP relay support)
   + $crypt_use_pka (use GPGME PKA signature verification)
 
diff --git a/charset.c b/charset.c
index 85a2ea6d..b52911ab 100644
--- a/charset.c
+++ b/charset.c
@@ -282,6 +282,19 @@ int mutt_chscmp (const char *s, const char *chs)
   return !ascii_strcasecmp (buffer, chs);
 }
 
+char *mutt_get_default_charset ()
+{
+  static char fcharset[SHORT_STRING];
+  const char *c = AssumedCharset;
+  const char *c1;
+
+  if (c && *c) {
+    c1 = strchr (c, ':');
+    strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset));
+    return fcharset;
+  }
+  return strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */
+}
 
 #ifndef HAVE_ICONV
 
@@ -591,86 +604,3 @@ void fgetconv_close (FGETCONV **_fc)
     iconv_close (fc->cd);
   FREE (_fc);		/* __FREE_CHECKED__ */
 }
-
-const char *mutt_get_first_charset (const char *charset)
-{
-  static char fcharset[SHORT_STRING];
-  const char *c, *c1;
-
-  c = charset;
-  if (!mutt_strlen(c))
-    return "us-ascii";
-  if (!(c1 = strchr (c, ':')))
-    return charset;
-  strfcpy (fcharset, c, c1 - c + 1);
-  return fcharset;
-}
-
-static size_t convert_string (ICONV_CONST char *f, size_t flen,
-                             const char *from, const char *to,
-                             char **t, size_t *tlen)
-{
-  iconv_t cd;
-  char *buf, *ob;
-  size_t obl, n;
-  int e;
-
-  cd = mutt_iconv_open (to, from, 0);
-  if (cd == (iconv_t)(-1))
-    return (size_t)(-1);
-  obl = 4 * flen + 1;
-  ob = buf = safe_malloc (obl);
-  n = iconv (cd, &f, &flen, &ob, &obl);
-  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
-  {
-    e = errno;
-    FREE (&buf);
-    iconv_close (cd);
-    errno = e;
-    return (size_t)(-1);
-  }
-  *ob = '\0';
-
-  *tlen = ob - buf;
-
-  safe_realloc ((void **) &buf, ob - buf + 1);
-  *t = buf;
-  iconv_close (cd);
-
-  return n;
-}
-
-int mutt_convert_nonmime_string (char **ps)
-{
-  const char *c, *c1;
-
-  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
-  {
-    char *u = *ps;
-    char *s;
-    char *fromcode;
-    size_t m, n;
-    size_t ulen = mutt_strlen (*ps);
-    size_t slen;
-
-    if (!u || !*u)
-      return 0;
-
-    c1 = strchr (c, ':');
-    n = c1 ? c1 - c : mutt_strlen (c);
-    if (!n)
-      continue;
-    fromcode = safe_malloc (n + 1);
-    strfcpy (fromcode, c, n + 1);
-    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
-    FREE (&fromcode);
-    if (m != (size_t)(-1))
-    {
-      FREE (ps); /* __FREE_CHECKED__ */
-      *ps = s;
-      return 0;
-    }
-  }
-  return -1;
-}
-
diff --git a/charset.h b/charset.h
index edb24157..3fba1cce 100644
--- a/charset.h
+++ b/charset.h
@@ -35,8 +35,6 @@ int iconv_close (iconv_t);
 #endif
 
 int mutt_convert_string (char **, const char *, const char *, int);
-const char *mutt_get_first_charset (const char *);
-int mutt_convert_nonmime_string (char **);
 
 iconv_t mutt_iconv_open (const char *, const char *, int);
 size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
@@ -49,6 +47,7 @@ char * fgetconvs (char *, size_t, FGETCONV *);
 void fgetconv_close (FGETCONV **);
 
 void mutt_set_langinfo_charset (void);
+char *mutt_get_default_charset ();
 
 #define M_ICONV_HOOK_FROM 1
 #define M_ICONV_HOOK_TO   2
diff --git a/globals.h b/globals.h
index 0b87488f..85ae034b 100644
--- a/globals.h
+++ b/globals.h
@@ -36,6 +36,7 @@ WHERE char *AliasFmt;
 WHERE char *AssumedCharset;
 WHERE char *AttachSep;
 WHERE char *Attribution;
+WHERE char *AttachCharset;
 WHERE char *AttachFormat;
 WHERE char *Charset;
 WHERE char *ComposeFormat;
@@ -48,7 +49,6 @@ WHERE char *DsnNotify;
 WHERE char *DsnReturn;
 WHERE char *Editor;
 WHERE char *EscChar;
-WHERE char *FileCharset;
 WHERE char *FolderFormat;
 WHERE char *ForwFmt;
 WHERE char *Fqdn;
diff --git a/handler.c b/handler.c
index 46163116..deea6531 100644
--- a/handler.c
+++ b/handler.c
@@ -1743,22 +1743,16 @@ void mutt_decode_attachment (BODY *b, STATE *s)
   int istext = mutt_is_text_part (b);
   iconv_t cd = (iconv_t)(-1);
 
-  if (istext)
+  if (istext && s->flags & M_CHARCONV)
   {
-    if(s->flags & M_CHARCONV)
-    {
-      char *charset = mutt_get_parameter ("charset", b->parameter);
-      if (!option (OPTSTRICTMIME) && !charset)
-        charset = mutt_get_first_charset (AssumedCharset);
-      if (charset && Charset)
-        cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
-    }
-    else
-    {
-      if (b->file_charset)
-        cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
-    }
+    char *charset = mutt_get_parameter ("charset", b->parameter);
+    if (!charset && AssumedCharset && *AssumedCharset)
+      charset = mutt_get_default_charset ();
+    if (charset && Charset)
+      cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
   }
+  else if (istext && b->charset)
+    cd = mutt_iconv_open (Charset, b->charset, M_ICONV_HOOK_FROM);
 
   fseeko (s->fpin, b->offset, 0);
   switch (b->encoding)
diff --git a/init.h b/init.h
index f5e2057f..1cc04726 100644
--- a/init.h
+++ b/init.h
@@ -218,7 +218,7 @@ struct option_t MuttVars[] = {
   ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
   ** editing the body of an outgoing message.
   */  
-  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
+  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0},
   /*
   ** .pp
   ** This variable is a colon-separated list of character encoding
@@ -233,7 +233,20 @@ struct option_t MuttVars[] = {
   **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
   ** .pp
   ** However, only the first content is valid for the message body.
-  ** This variable is valid only if $$strict_mime is unset.
+  */
+  { "attach_charset",    DT_STR,  R_NONE, UL &AttachCharset, UL 0 },
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for text file attachments.
+  ** If unset, $$charset value will be used instead.
+  ** For example, the following configuration would work for Japanese
+  ** text handling:
+  ** .pp
+  **   set attach_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** Note: "iso-2022-*" must be put at the head of the value as shown above
+  ** if included.
   */
   { "attach_format",	DT_STR,  R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n %T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
   /*
@@ -614,19 +627,8 @@ struct option_t MuttVars[] = {
   ** signed.
   ** (PGP only)
   */
-  { "file_charset",    DT_STR,  R_NONE, UL &FileCharset, UL 0 },
+  { "file_charset",	DT_SYN, R_NONE, UL "attach_charset", 0 },
   /*
-  ** .pp
-  ** This variable is a colon-separated list of character encoding
-  ** schemes for text file attatchments.
-  ** If unset, $$charset value will be used instead.
-  ** For example, the following configuration would work for Japanese
-  ** text handling:
-  ** .pp
-  **   set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
-  ** .pp
-  ** Note: "iso-2022-*" must be put at the head of the value as shown above
-  ** if included.
   */
   { "folder",		DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
   /*
@@ -846,6 +848,13 @@ struct option_t MuttVars[] = {
   ** addresses.  This overrides the compile time definition obtained from
   ** /etc/resolv.conf.
   */
+  { "ignore_linear_white_space",    DT_BOOL, R_NONE, OPTIGNORELWS, 0 },
+  /*
+  ** .pp
+  ** This option replaces linear-white-space between encoded-word
+  ** and *text to a single space to prevent the display of MIME-encoded
+  ** ``Subject'' field from being divided into multiple lines.
+  */
   { "ignore_list_reply_to", DT_BOOL, R_NONE, OPTIGNORELISTREPLYTO, 0 },
   /*
   ** .pp
@@ -2808,19 +2817,6 @@ struct option_t MuttVars[] = {
   ** Setting this variable causes the ``status bar'' to be displayed on
   ** the first line of the screen rather than near the bottom.
   */
-  { "strict_mime",    DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
-  /*
-  ** .pp
-  ** When unset, non MIME-compliant messages that doesn't have any
-  ** charset indication in ``Content-Type'' field can be displayed
-  ** (non MIME-compliant messages are often generated by old mailers
-  ** or buggy mailers like MS Outlook Express).
-  ** See also $$assumed_charset.
-  ** .pp
-  ** This option also replaces linear-white-space between encoded-word
-  ** and *text to a single space to prevent the display of MIME-encoded
-  ** ``Subject'' field from being devided into multiple lines.
-  */
   { "strict_threads",	DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
   /*
   ** .pp
diff --git a/mutt.h b/mutt.h
index 0aaed1d0..48878d2d 100644
--- a/mutt.h
+++ b/mutt.h
@@ -370,6 +370,7 @@ enum
   OPTHIDETHREADSUBJECT,
   OPTHIDETOPLIMITED,
   OPTHIDETOPMISSING,
+  OPTIGNORELWS,
   OPTIGNORELISTREPLYTO,
 #ifdef USE_IMAP
   OPTIMAPCHECKSUBSCRIBED,
@@ -428,7 +429,6 @@ enum
   OPTSORTRE,
   OPTSPAMSEP,
   OPTSTATUSONTOP,
-  OPTSTRICTMIME,
   OPTSTRICTTHREADS,
   OPTSUSPEND,
   OPTTEXTFLOWED,
@@ -643,7 +643,7 @@ typedef struct body
 				 * If NULL, filename is used 
 				 * instead.
 				 */
-  char *file_charset;           /* charset of attached file */
+  char *charset;                /* charset of attached file */
   CONTENT *content;             /* structure used to store detailed info about
 				 * the content of the attachment.  this is used
 				 * to determine what content-transfer-encoding
diff --git a/parse.c b/parse.c
index 9f3999ae..8cf54530 100644
--- a/parse.c
+++ b/parse.c
@@ -217,7 +217,7 @@ static PARAMETER *parse_parameters (const char *s)
 	s++;
 	for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
 	{
-	  if (!option (OPTSTRICTMIME)) {
+	  if (AssumedCharset && *AssumedCharset) {
             /* As iso-2022-* has a characer of '"' with non-ascii state,
 	     * ignore it. */
             if (*s == 0x1b && i < sizeof (buffer) - 2)
@@ -402,9 +402,9 @@ void mutt_parse_content_type (char *s, BODY *ct)
   if (ct->type == TYPETEXT)
   {
     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
-                         (const char *) mutt_get_first_charset (AssumedCharset),
-                         &ct->parameter);
+      mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
+                         (const char *) mutt_get_default_charset ()
+                         : "us-ascii", &ct->parameter);
   }
 
 }
diff --git a/rfc2047.c b/rfc2047.c
index caac526e..98f3e3bf 100644
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -87,6 +87,43 @@ static size_t convert_string (ICONV_CONST char *f, size_t flen,
   return n;
 }
 
+int convert_nonmime_string (char **ps)
+{
+  const char *c, *c1;
+
+  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
+  {
+    char *u = *ps;
+    char *s;
+    char *fromcode;
+    size_t m, n;
+    size_t ulen = mutt_strlen (*ps);
+    size_t slen;
+
+    if (!u || !*u)
+      return 0;
+
+    c1 = strchr (c, ':');
+    n = c1 ? c1 - c : mutt_strlen (c);
+    if (!n)
+      return 0;
+    fromcode = safe_malloc (n + 1);
+    strfcpy (fromcode, c, n + 1);
+    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+    FREE (&fromcode);
+    if (m != (size_t)(-1))
+    {
+      FREE (ps); /* __FREE_CHECKED__ */
+      *ps = s;
+      return 0;
+    }
+  }
+  mutt_convert_string (ps,
+      (const char *)mutt_get_default_charset (AssumedCharset),
+      Charset, M_ICONV_HOOK_FROM);
+  return -1;
+}
+
 char *mutt_choose_charset (const char *fromcode, const char *charsets,
 		      char *u, size_t ulen, char **d, size_t *dlen)
 {
@@ -711,7 +748,7 @@ static const char *find_encoded_word (const char *s, const char **x)
   return 0;
 }
 
-/* return length of linear white space */
+/* return length of linear-white-space */
 static size_t lwslen (const char *s, size_t n)
 {
   const char *p = s;
@@ -731,7 +768,7 @@ static size_t lwslen (const char *s, size_t n)
   return len;
 }
 
-/* return length of linear white space : reverse */
+/* return length of linear-white-space : reverse */
 static size_t lwsrlen (const char *s, size_t n)
 {
   const char *p = s + n - 1;
@@ -775,37 +812,31 @@ void rfc2047_decode (char **pd)
     if (!(p = find_encoded_word (s, &q)))
     {
       /* no encoded words */
-      if (!option (OPTSTRICTMIME))
+      if (option (OPTIGNORELWS))
       {
         n = mutt_strlen (s);
         if (found_encoded && (m = lwslen (s, n)) != 0)
         {
           if (m != n)
             *d = ' ', d++, dlen--;
-          n -= m, s += m;
-        }
-        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
-        {
-          char *t;
-          size_t tlen;
-
-          t = safe_malloc (n + 1);
-          strfcpy (t, s, n + 1);
-          if (mutt_convert_nonmime_string (&t) == 0)
-          {
-            tlen = mutt_strlen (t);
-            strncpy (d, t, tlen);
-            d += tlen;
-          }
-          else
-          {
-            strncpy (d, s, n);
-            d += n;
-          }
-          FREE (&t);
-          break;
+          s += m;
         }
       }
+      if (AssumedCharset && *AssumedCharset)
+      {
+	char *t;
+	size_t tlen;
+
+	n = mutt_strlen (s);
+	t = safe_malloc (n + 1);
+	strfcpy (t, s, n + 1);
+	convert_nonmime_string (&t);
+	tlen = mutt_strlen (t);
+	strncpy (d, t, tlen);
+	d += tlen;
+	FREE (&t);
+	break;
+      }
       strncpy (d, s, dlen);
       d += dlen;
       break;
@@ -814,9 +845,9 @@ void rfc2047_decode (char **pd)
     if (p != s)
     {
       n = (size_t) (p - s);
-      /* ignore spaces between encoded words
-       * and linear white spaces between encoded word and *text */
-      if (!option (OPTSTRICTMIME))
+      /* ignore spaces between encoded word
+       * and linear-white-space between encoded word and *text */
+      if (option (OPTIGNORELWS))
       {
         if (found_encoded && (m = lwslen (s, n)) != 0)
         {
@@ -838,13 +869,12 @@ void rfc2047_decode (char **pd)
       }
       else if (!found_encoded || strspn (s, " \t\r\n") != n)
       {
-        if (n > dlen)
-          n = dlen;
-        memcpy (d, s, n);
-        d += n;
-        dlen -= n;
+	if (n > dlen)
+	  n = dlen;
+	memcpy (d, s, n);
+	d += n;
+	dlen -= n;
       }
-
     }
 
     rfc2047_decode_word (d, p, dlen);
@@ -865,7 +895,8 @@ void rfc2047_decode_adrlist (ADDRESS *a)
 {
   while (a)
   {
-    if (a->personal)
+    if (a->personal && ((strstr (a->personal, "=?") != NULL) || 
+			(AssumedCharset && *AssumedCharset)))
       rfc2047_decode (&a->personal);
 #ifdef EXACT_ADDRESS
     if (a->val && strstr (a->val, "=?") != NULL)
diff --git a/rfc2047.h b/rfc2047.h
index 735b3565..9e15d2f2 100644
--- a/rfc2047.h
+++ b/rfc2047.h
@@ -18,6 +18,7 @@
 
 char *mutt_choose_charset (const char *fromcode, const char *charsets,
 		      char *u, size_t ulen, char **d, size_t *dlen);
+int convert_nonmime_string (char **);
 
 void _rfc2047_encode_string (char **, int, int);
 void rfc2047_encode_adrlist (ADDRESS *, const char *);
diff --git a/rfc2231.c b/rfc2231.c
index 854cdfd3..445aaa59 100644
--- a/rfc2231.c
+++ b/rfc2231.c
@@ -117,11 +117,8 @@ void rfc2231_decode_parameters (PARAMETER **headp)
 
       if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
 	rfc2047_decode (&p->value);
-      else if (!option (OPTSTRICTMIME))
-      {
-        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
-          mutt_convert_nonmime_string (&p->value);
-      }
+      else if (AssumedCharset && *AssumedCharset)
+        convert_nonmime_string (&p->value);
 
       *last = p;
       last = &p->next;
diff --git a/sendlib.c b/sendlib.c
index 4e912eb5..5c8430db 100644
--- a/sendlib.c
+++ b/sendlib.c
@@ -442,7 +442,7 @@ int mutt_write_mime_body (BODY *a, FILE *f)
   }
 
   if (a->type == TYPETEXT && (!a->noconv))
-    fc = fgetconv_open (fpin, a->file_charset, 
+    fc = fgetconv_open (fpin, a->charset, 
 			mutt_get_body_charset (send_charset, sizeof (send_charset), a),
 			0);
   else
@@ -842,7 +842,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
   CONTENT *info;
   CONTENT_STATE state;
   FILE *fp = NULL;
-  char *fromcode = NULL;
+  char *fromcode;
   char *tocode;
   char buffer[100];
   char chsbuf[STRING];
@@ -877,8 +877,8 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
   if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
   {
     char *chs = mutt_get_parameter ("charset", b->parameter);
-    char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
-                                FileCharset : Charset) : Charset;
+    char *fchs = b->use_disp ? ((AttachCharset && *AttachCharset) ?
+                                AttachCharset : Charset) : Charset;
     if (Charset && (chs || SendCharset) &&
         convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
                               &fromcode, &tocode, info) != (size_t)(-1))
@@ -888,7 +888,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY *b)
 	mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
 	mutt_set_parameter ("charset", chsbuf, &b->parameter);
       }
-      b->file_charset = fromcode;
+      b->charset = fromcode;
       FREE (&tocode);
       safe_fclose (&fp);
       return info;
-- 
2.40.0