From: Alain Bench <veronatif@free.fr>
Date: Wed, 7 Mar 2007 02:13:14 +0000 (-0800)
Subject: Remove buggy usage of M_ICONV_HOOK_TO flag in mutt_idna.c:mutt_idna_to_local().
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dbd0c91cf3c215d0ce3995048c4e388e66ef3079;p=neomutt

Remove buggy usage of M_ICONV_HOOK_TO flag in mutt_idna.c:mutt_idna_to_local().
Wipe unwanted code depending on M_ICONV_HOOK_TO in charset.c:mutt_iconv_open().
Totally wipe M_ICONV_HOOK_TO symbol.
Remove misusages of M_ICONV_HOOK_FROM flag in:
 - crypt-gpgme.c:print_utf8().
 - mutt_idna.c:mutt_idna_to_local() and mutt_local_to_idna().
 - pgp.c:pgp_traditional_encryptsign().
Document usage policy of M_ICONV_HOOK_FROM flag.
Cosmetic downcasing of some constant charset names (utf-8, euc-jp) for consistency.
Correction of a typo in the "iso8859-5" charset name.
---

diff --git a/ChangeLog b/ChangeLog
index fa556d18c..73ea4d887 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2007-03-06 17:59 -0800  Rocco Rutte  <pdmef@gmx.net>  (5e47385893fe)
+
+	* UPDATING, doc/manual.xml.head: Documentation updates for
+	format=flowed
+
+2007-03-06 17:45 -0800  Masayuki  <Masayuki Moriyama>  (78b424bf7c5a)
+
+	* ChangeLog, charset.c, hook.c, mbyte.c: Allow iconv-hook to use
+	virtual charsets as targets. (closes: #1269)
+
 2007-03-04 19:26 +0100  Alain Bench  <veronatif@free.fr>  (4dc1d87f4c25)
 
 	* doc/manual.xml.head, init.h: Doc fixes and updates concerning hooks
diff --git a/charset.c b/charset.c
index 570654ef8..25d4fa4db 100644
--- a/charset.c
+++ b/charset.c
@@ -129,7 +129,7 @@ PreferredMIMENames[] =
   { "iso-ir-144",	"iso-8859-5"	},
   { "ISO_8859-5",	"iso-8859-5"	},
   { "cyrillic",		"iso-8859-5"	},
-  { "csISOLatinCyrillic", "iso8859-5"	},
+  { "csISOLatinCyrillic", "iso-8859-5"	},
 
   { "ISO_8859-9:1989",	"iso-8859-9"	},
   { "iso-ir-148",	"iso-8859-9"	},
@@ -150,9 +150,9 @@ PreferredMIMENames[] =
   { "csShiftJis",	"Shift_JIS"	},
   
   { "Extended_UNIX_Code_Packed_Format_for_Japanese",
-      			"EUC-JP"	},
+      			"euc-jp"	},
   { "csEUCPkdFmtJapanese", 
-      			"EUC-JP"	},
+      			"euc-jp"	},
   
   { "csGB2312",		"gb2312"	},
   { "csbig5",		"big5"		},
@@ -321,7 +321,14 @@ int iconv_close (iconv_t cd)
  * Like iconv_open, but canonicalises the charsets, applies
  * charset-hooks, recanonicalises, and finally applies iconv-hooks.
  * Parameter flags=0 skips charset-hooks, while M_ICONV_HOOK_FROM
- * applies them to fromcode.
+ * applies them to fromcode. Callers should use flags=0 when fromcode
+ * can safely be considered true, either some constant, or some value
+ * provided by the user; M_ICONV_HOOK_FROM should be used only when
+ * fromcode is unsure, taken from a possibly wrong incoming MIME label,
+ * or such. Misusing M_ICONV_HOOK_FROM leads to unwanted interactions
+ * in some setups. Note: By design charset-hooks should never be, and
+ * are never, applied to tocode. Highlight note: The top-well-named
+ * M_ICONV_HOOK_FROM acts on charset-hooks, not at all on iconv-hooks.
  */
 
 iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags)
@@ -335,13 +342,6 @@ iconv_t mutt_iconv_open (const char *tocode, const char *fromcode, int flags)
 
   /* transform to MIME preferred charset names */
   mutt_canonical_charset (tocode1, sizeof (tocode1), tocode);
-
-#ifdef M_ICONV_HOOK_TO
-  /* Not used. */
-  if ((flags & M_ICONV_HOOK_TO) && (tmp = mutt_charset_hook (tocode1)))
-    mutt_canonical_charset (tocode1, sizeof (tocode1), tmp);
-#endif
-
   mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode);
 
   /* maybe apply charset-hooks and recanonicalise fromcode,
@@ -438,7 +438,9 @@ size_t mutt_iconv (iconv_t cd, ICONV_CONST char **inbuf, size_t *inbytesleft,
 
 /*
  * Convert a string
- * Used in rfc2047.c and rfc2231.c
+ * Used in rfc2047.c, rfc2231.c, crypt-gpgme.c, mutt_idna.c, and more.
+ * Parameter flags is given as-is to mutt_iconv_open(). See there
+ * for its meaning and usage policy.
  */
 
 int mutt_convert_string (char **ps, const char *from, const char *to, int flags)
@@ -511,6 +513,10 @@ struct fgetconv_not
   iconv_t cd;
 };
 
+/*
+ * Parameter flags is given as-is to mutt_iconv_open(). See there
+ * for its meaning and usage policy.
+ */
 FGETCONV *fgetconv_open (FILE *file, const char *from, const char *to, int flags)
 {
   struct fgetconv_s *fc;
diff --git a/charset.h b/charset.h
index 3fba1ccef..ab5f062bf 100644
--- a/charset.h
+++ b/charset.h
@@ -49,7 +49,11 @@ void fgetconv_close (FGETCONV **);
 void mutt_set_langinfo_charset (void);
 char *mutt_get_default_charset ();
 
-#define M_ICONV_HOOK_FROM 1
-#define M_ICONV_HOOK_TO   2
+/* flags for charset.c:mutt_convert_string(), fgetconv_open(), and
+ * mutt_iconv_open(). Note that applying charset-hooks to tocode is
+ * never needed, and sometimes hurts: Hence there is no M_ICONV_HOOK_TO
+ * flag.
+ */
+#define M_ICONV_HOOK_FROM 1	/* apply charset-hooks to fromcode */
 
 #endif /* _CHARSET_H */
diff --git a/crypt-gpgme.c b/crypt-gpgme.c
index bf1bd3821..dde507a05 100644
--- a/crypt-gpgme.c
+++ b/crypt-gpgme.c
@@ -146,7 +146,11 @@ print_utf8 (FILE *fp, const char *buf, size_t len)
   tstr = safe_malloc (len+1);
   memcpy (tstr, buf, len);
   tstr[len] = 0;
-  mutt_convert_string (&tstr, "utf-8", Charset, M_ICONV_HOOK_FROM);
+
+  /* fromcode "utf-8" is sure, so we don't want
+   * charset-hook corrections: flags must be 0.
+   */
+  mutt_convert_string (&tstr, "utf-8", Charset, 0);
   fputs (tstr, fp);
   FREE (&tstr);
 }
@@ -1921,7 +1925,11 @@ static void copy_clearsigned (gpgme_data_t data, STATE *s, char *charset)
     return;
   unlink (fname);
   FREE (&fname);
-  
+
+  /* fromcode comes from the MIME Content-Type charset label. It might
+   * be a wrong label, so we want the ability to do corrections via
+   * charset-hooks. Therefore we set flags to M_ICONV_HOOK_FROM.
+   */
   fc = fgetconv_open (fp, charset, Charset, M_ICONV_HOOK_FROM);
   
   for (complete = 1, armor_header = 1;
diff --git a/imap/utf7.c b/imap/utf7.c
index 97212ad5b..4ad1db777 100644
--- a/imap/utf7.c
+++ b/imap/utf7.c
@@ -257,7 +257,7 @@ void imap_utf7_encode (char **s)
   if (Charset)
   {
     char *t = safe_strdup (*s);
-    if (!mutt_convert_string (&t, Charset, "UTF-8", 0))
+    if (!mutt_convert_string (&t, Charset, "utf-8", 0))
     {
       char *u7 = utf8_to_utf7 (t, strlen (t), NULL, 0);
       FREE (s);		/* __FREE_CHECKED__ */
@@ -272,7 +272,7 @@ void imap_utf7_decode (char **s)
   if (Charset)
   {
     char *t = utf7_to_utf8 (*s, strlen (*s), 0, 0);
-    if (t && !mutt_convert_string (&t, "UTF-8", Charset, 0))
+    if (t && !mutt_convert_string (&t, "utf-8", Charset, 0))
     {
       FREE (s);		/* __FREE_CHECKED__ */
       *s = t;
diff --git a/mbyte.c b/mbyte.c
index cd60dbed9..f00c892bc 100644
--- a/mbyte.c
+++ b/mbyte.c
@@ -73,13 +73,13 @@ void mutt_set_charset (char *charset)
     charset_is_ja = 1;
 
     /* Note flags=0 to skip charset-hooks: User masters the $charset
-     * name, and we are sure of our "UTF-8" constant. So there is no
+     * name, and we are sure of our "utf-8" constant. So there is no
      * possibility of wrong name that we would want to try to correct
      * with a charset-hook. Or rather: If $charset was wrong, we would
      * want to try to correct... $charset directly.
      */
-    charset_to_utf8 = mutt_iconv_open ("UTF-8", charset, 0);
-    charset_from_utf8 = mutt_iconv_open (charset, "UTF-8", 0);
+    charset_to_utf8 = mutt_iconv_open ("utf-8", charset, 0);
+    charset_from_utf8 = mutt_iconv_open (charset, "utf-8", 0);
   }
 #endif
 
diff --git a/mutt_idna.c b/mutt_idna.c
index 52138b3ff..721bf4f08 100644
--- a/mutt_idna.c
+++ b/mutt_idna.c
@@ -55,7 +55,9 @@ int mutt_idna_to_local (const char *in, char **out, int flags)
   /* Is this the right function?  Interesting effects with some bad identifiers! */
   if (idna_to_unicode_8z8z (in, out, 1) != IDNA_SUCCESS)
     goto notrans;
-  if (mutt_convert_string (out, "utf-8", Charset, M_ICONV_HOOK_TO) == -1)
+
+  /* we don't want charset-hook effects, so we set flags to 0 */
+  if (mutt_convert_string (out, "utf-8", Charset, 0) == -1)
     goto notrans;
 
   /* 
@@ -68,7 +70,9 @@ int mutt_idna_to_local (const char *in, char **out, int flags)
     int irrev = 0;
     char *t2 = NULL;
     char *tmp = safe_strdup (*out);
-    if (mutt_convert_string (&tmp, Charset, "utf-8", M_ICONV_HOOK_FROM) == -1)
+
+    /* we don't want charset-hook effects, so we set flags to 0 */
+    if (mutt_convert_string (&tmp, Charset, "utf-8", 0) == -1)
       irrev = 1;
     if (!irrev && idna_to_ascii_8z (tmp, &t2, 1) != IDNA_SUCCESS)
       irrev = 1;
@@ -106,7 +110,8 @@ int mutt_local_to_idna (const char *in, char **out)
     return -1;
   }
   
-  if (mutt_convert_string (&tmp, Charset, "utf-8", M_ICONV_HOOK_FROM) == -1)
+  /* we don't want charset-hook effects, so we set flags to 0 */
+  if (mutt_convert_string (&tmp, Charset, "utf-8", 0) == -1)
     rv = -1;
   if (!rv && idna_to_ascii_8z (tmp, out, 1) != IDNA_SUCCESS)
     rv = -2;
diff --git a/pgp.c b/pgp.c
index 4c58c723c..fe142de4d 100644
--- a/pgp.c
+++ b/pgp.c
@@ -201,7 +201,11 @@ static void pgp_copy_clearsigned (FILE *fpin, STATE *s, char *charset)
   FGETCONV *fc;
   
   rewind (fpin);
-  
+
+  /* fromcode comes from the MIME Content-Type charset label. It might
+   * be a wrong label, so we want the ability to do corrections via
+   * charset-hooks. Therefore we set flags to M_ICONV_HOOK_FROM.
+   */
   fc = fgetconv_open (fpin, charset, Charset, M_ICONV_HOOK_FROM);
   
   for (complete = 1, armor_header = 1;
@@ -1422,8 +1426,9 @@ BODY *pgp_traditional_encryptsign (BODY *a, int flags, char *keylist)
       send_charset = "us-ascii";
     else
       send_charset = "utf-8";
-    
-    fc = fgetconv_open (fp, from_charset, "utf-8", M_ICONV_HOOK_FROM);
+
+    /* fromcode is assumed to be correct: we set flags to 0 */
+    fc = fgetconv_open (fp, from_charset, "utf-8", 0);
     while ((c = fgetconv (fc)) != EOF)
       fputc (c, pgpin);
     
diff --git a/rfc2047.c b/rfc2047.c
index 98f3e3bf1..f61784e03 100644
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -375,7 +375,7 @@ static size_t choose_block (char *d, size_t dlen, int col,
 			    encoder_t *encoder, size_t *wlen)
 {
   size_t n, nn;
-  int utf8 = fromcode && !ascii_strcasecmp (fromcode, "UTF-8");
+  int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
 
   n = dlen;
   for (;;)
@@ -416,7 +416,7 @@ static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
   encoder_t encoder;
   char *tocode1 = 0;
   const char *tocode;
-  char *icode = "UTF-8";
+  char *icode = "utf-8";
 
   /* Try to convert to UTF-8. */
   if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
@@ -595,7 +595,7 @@ void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 
   charsets = SendCharset;
   if (!charsets || !*charsets)
-    charsets = "UTF-8";
+    charsets = "utf-8";
 
   rfc2047_encode (*pd, strlen (*pd), col,
 		  Charset, charsets, &e, &elen,
diff --git a/sendlib.c b/sendlib.c
index 5c8430db0..d9cfacb01 100644
--- a/sendlib.c
+++ b/sendlib.c
@@ -632,7 +632,7 @@ static size_t convert_file_to (FILE *file, const char *fromcode,
   CONTENT_STATE *states;
   size_t *score;
 
-  cd1 = mutt_iconv_open ("UTF-8", fromcode, 0);
+  cd1 = mutt_iconv_open ("utf-8", fromcode, 0);
   if (cd1 == (iconv_t)(-1))
     return -1;
 
@@ -642,8 +642,8 @@ static size_t convert_file_to (FILE *file, const char *fromcode,
   infos  = safe_calloc (ncodes, sizeof (CONTENT));
 
   for (i = 0; i < ncodes; i++)
-    if (ascii_strcasecmp (tocodes[i], "UTF-8"))
-      cd[i] = mutt_iconv_open (tocodes[i], "UTF-8", 0);
+    if (ascii_strcasecmp (tocodes[i], "utf-8"))
+      cd[i] = mutt_iconv_open (tocodes[i], "utf-8", 0);
     else
       /* Special case for conversion to UTF-8 */
       cd[i] = (iconv_t)(-1), score[i] = (size_t)(-1);