/*
* Copyright (C) 1996-2000 Michael R. Elkins <me@cs.hmc.edu>
+ * Copyright (C) 2000 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include "rfc2047.h"
#include <ctype.h>
+#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-typedef void encode_t (char *, size_t, const unsigned char *, const char *);
+/* If you are debugging this file, comment out the following line. */
+/*#define NDEBUG*/
+#ifdef NDEBUG
+#define assert(x)
+#else
+#include <assert.h>
+#endif
-static void q_encode_string (char *d,
- size_t dlen,
- const unsigned char *s,
- const char *send_charset)
-{
- char charset[SHORT_STRING];
- size_t cslen, wordlen;
- char *wptr = d;
- const unsigned char *t;
- int hibit = 0;
+#define ENCWORD_LEN_MAX 75
+#define ENCWORD_OVERHEAD 7 /* strlen ("=??X??=") */
- for(t = s; *t && !hibit; t++)
- if(*t & 0x80) hibit = 1;
+#define HSPACE(x) ((x) == ' ' || (x) == '\t')
- if(hibit || !mutt_strcasecmp (send_charset, "iso-2022-jp"))
+typedef size_t (*encoder_t) (char *, const char *, size_t,
+ const char *);
+
+static size_t convert_string (const char *f, size_t flen,
+ const char *from, const char *to,
+ char **t, size_t *tlen)
+{
+ iconv_t cd;
+ char *buf, *ob, *x;
+ size_t obl, n;
+ int e;
+
+ cd = iconv_open (to, from);
+ if (cd == (iconv_t)-1)
+ return -1;
+ obl = MB_LEN_MAX * flen;
+ ob = buf = safe_malloc (obl);
+ n = iconv (cd, &f, &flen, &ob, &obl);
+ if (n == -1 || iconv (cd, 0, 0, &ob, &obl) == -1)
{
- snprintf (charset, sizeof (charset), "=?%s?Q?",
- mutt_strcasecmp ("us-ascii", send_charset) == 0 ? "unknown-8bit" : NONULL(send_charset));
+ e = errno;
+ free (buf);
+ iconv_close (cd);
+ errno = e;
+ return -1;
}
- else
- strfcpy(charset, "=?us-ascii?Q?", sizeof(charset));
+ x = realloc (buf, ob - buf);
+ *t = x ? x : buf;
+ *tlen = ob - buf;
+ iconv_close (cd);
+ return n;
+}
+
+static char *choose_charset (const char *charsets, char *u, size_t ulen)
+{
+ char *tocode = 0;
+ size_t bestn = 0;
+ const char *p, *q;
- cslen = mutt_strlen (charset);
+ for (p = charsets; p; p = q ? q + 1 : 0)
+ {
+ char *s, *t;
+ size_t slen, n;
- strcpy (wptr, charset);
- wptr += cslen;
- wordlen = cslen;
- dlen -= cslen;
+ q = strchr (p, ':');
- dlen -= 3; /* save room for the word terminator */
+ n = q ? q - p : strlen (p);
- while (*s && dlen > 0)
- {
- if (wordlen >= 72)
+ if (!n ||
+ n > (ENCWORD_LEN_MAX - ENCWORD_OVERHEAD - ((MB_LEN_MAX + 2) / 3) * 4))
+ continue;
+
+ t = safe_malloc (n + 1);
+ memcpy (t, p, n), t[n] = '\0';
+ n = convert_string (u, ulen, "UTF-8", t, &s, &slen);
+ if (n == (size_t)(-1))
+ continue;
+ free (s);
+ if (!tocode || n < bestn)
{
- if (dlen < 4 + cslen)
+ free (tocode), tocode = t, bestn = n;
+ if (!bestn)
break;
-
- strcpy (wptr, "?=\n ");
- wptr += 4;
- dlen -= 4;
- strcpy (wptr, charset);
- wptr += cslen;
- wordlen = cslen;
- dlen -= cslen;
}
+ else
+ free (t);
+ }
+ return tocode;
+}
- if (*s == ' ')
+static size_t b_encoder (char *s, const char *d, size_t dlen,
+ const char *tocode)
+{
+ char *s0 = s;
+
+ memcpy (s, "=?", 2), s += 2;
+ memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
+ memcpy (s, "?B?", 3), s += 3;
+ for (;;)
+ {
+ if (!dlen)
+ break;
+ else if (dlen == 1)
{
- *wptr++ = '_';
- wordlen++;
- dlen--;
+ *s++ = B64Chars[(*d >> 2) & 0x3f];
+ *s++ = B64Chars[(*d & 0x03) << 4];
+ *s++ = '=';
+ *s++ = '=';
+ break;
}
- else if ((*s & 0x80) || *s == '\t' || *s == '_' || strchr (MimeSpecials, *s))
+ else if (dlen == 2)
{
- if (wordlen >= 70)
- {
- if (dlen < 4 + cslen)
- break;
-
- strcpy (wptr, "?=\n ");
- wptr += 4;
- dlen -= 4;
-
- strcpy (wptr, charset);
- wptr += cslen;
- wordlen = cslen;
- dlen -= cslen;
- }
-
- if (dlen < 3)
- break;
- sprintf (wptr, "=%02X", *s);
- wptr += 3;
- wordlen += 3;
- dlen -= 3;
+ *s++ = B64Chars[(*d >> 2) & 0x3f];
+ *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
+ *s++ = B64Chars[(d[1] & 0x0f) << 2];
+ *s++ = '=';
+ break;
}
else
{
- *wptr++ = *s;
- wordlen++;
- dlen--;
+ *s++ = B64Chars[(*d >> 2) & 0x3f];
+ *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
+ *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
+ *s++ = B64Chars[d[2] & 0x3f];
+ d += 3, dlen -= 3;
}
- s++;
}
+ memcpy (s, "?=", 2), s += 2;
+ return s - s0;
+}
- strcpy (wptr, "?=");
+static size_t q_encoder (char *s, const char *d, size_t dlen,
+ const char *tocode)
+{
+ char hex[] = "0123456789ABCDEF";
+ char *s0 = s;
+
+ memcpy (s, "=?", 2), s += 2;
+ memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
+ memcpy (s, "?Q?", 3), s += 3;
+ while (dlen--)
+ {
+ unsigned char c = *d++;
+ if (c >= 0x7f || c < 0x20 || c == '_' || strchr (MimeSpecials, c))
+ {
+ *s++ = '=';
+ *s++ = hex[(c & 0xf0) >> 4];
+ *s++ = hex[c & 0x0f];
+ }
+ else if (c == ' ')
+ *s++ = '_';
+ else
+ *s++ = c;
+ }
+ memcpy (s, "?=", 2), s += 2;
+ return s - s0;
}
-static void b_encode_string (char *d, size_t dlen,
- const unsigned char *s,
- const char *send_charset)
+/*
+ * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
+ * be converted to an encoded word of length *wlen using *encoder.
+ * Otherwise return an upper bound on the maximum length of the data
+ * which could be converted.
+ */
+static size_t try_block (const char *d, size_t dlen, const char *tocode,
+ encoder_t *encoder, size_t *wlen)
{
- char charset[SHORT_STRING];
- char *wptr = d;
- int cslen;
- int wordlen;
+ char buf1[ENCWORD_LEN_MAX - ENCWORD_OVERHEAD];
+ iconv_t cd;
+ const char *ib;
+ char *ob, *p;
+ size_t ibl, obl;
+ int count, len, len_b, len_q;
+
+ cd = iconv_open (tocode, "UTF-8");
+ assert (cd != (iconv_t)(-1));
+ ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
+ if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
+ iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
+ {
+ assert (errno == E2BIG);
+ iconv_close (cd);
+ assert (ib > d);
+ return (ib - d == dlen) ? dlen : ib - d + 1;
+ }
+ iconv_close (cd);
- snprintf (charset, sizeof (charset), "=?%s?B?", NONULL(send_charset));
- cslen = mutt_strlen (charset);
- strcpy (wptr, charset);
- wptr += cslen;
- wordlen = cslen;
- dlen -= cslen;
+ count = 0;
+ for (p = buf1; p < ob; p++)
+ {
+ unsigned char c = *p;
+ assert (strchr (MimeSpecials, '?'));
+ if (c >= 0x7f || c < 0x20 || *p == '_' || strchr (MimeSpecials, *p))
+ ++count;
+ }
- dlen -= 3; /* save room for the word terminator */
+ len = strlen (tocode) + ENCWORD_OVERHEAD;
+ len_b = len + (((ob - buf1) + 2) / 3) * 4;
+ len_q = len + (ob - buf1) + 2 * count;
- while (*s && dlen >= 4)
+ /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
+ if (!strcasecmp (tocode, "ISO-2022-JP"))
+ len_q = ENCWORD_LEN_MAX + 1;
+
+ if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
{
- if (wordlen >= 71)
- {
- if (dlen < 4 + cslen)
- break;
+ *encoder = b_encoder;
+ *wlen = len_b;
+ return 0;
+ }
+ else if (len_q <= ENCWORD_LEN_MAX)
+ {
+ *encoder = q_encoder;
+ *wlen = len_q;
+ return 0;
+ }
+ else
+ return dlen;
+}
+
+/*
+ * Encode the data (d, dlen) into s using the encoder.
+ * Return the length of the encoded word.
+ */
+static size_t encode_block (char *s, char *d, size_t dlen,
+ const char *tocode, encoder_t encoder)
+{
+ char buf1[ENCWORD_LEN_MAX - ENCWORD_OVERHEAD];
+ iconv_t cd;
+ const char *ib;
+ char *ob;
+ size_t ibl, obl, n1, n2;
+
+ cd = iconv_open (tocode, "UTF-8");
+ assert (cd != (iconv_t)(-1));
+ ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
+ n1 = iconv (cd, &ib, &ibl, &ob, &obl);
+ n2 = iconv (cd, 0, 0, &ob, &obl);
+ assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
+ iconv_close (cd);
+ return (*encoder) (s, buf1, ob - buf1, tocode);
+}
- strcpy (wptr, "?=\n ");
- wptr += 4;
- dlen -= 4;
+/*
+ * Discover how much of the data (d, dlen) can be converted into
+ * a single encoded word. Return how much data can be converted,
+ * and set the length *wlen of the encoded word and *encoder.
+ */
+static size_t choose_block (char *d, size_t dlen, const char *tocode,
+ size_t *wlen, encoder_t *encoder)
+{
+ size_t n, nn;
- strcpy (wptr, charset);
- wptr += cslen;
- wordlen = cslen;
- dlen -= cslen;
- }
+ n = dlen;
+ for (;;)
+ {
+ assert (n);
+ nn = try_block (d, n, tocode, encoder, wlen);
+ if (!nn)
+ break;
+ for (n = nn - 1; (d[n] & 0xc0) == 0x80; n--)
+ assert (n);
+ }
+ return n;
+}
- *wptr++ = B64Chars[ (*s >> 2) & 0x3f ];
- *wptr++ = B64Chars[ ((*s & 0x3) << 4) | ((*(s+1) >> 4) & 0xf) ];
- s++;
- if (*s)
- {
- *wptr++ = B64Chars[ ((*s & 0xf) << 2) | ((*(s+1) >> 6) & 0x3) ];
- s++;
- if (*s)
- {
- *wptr++ = B64Chars[ *s & 0x3f ];
- s++;
- }
- else
- *wptr++ = '=';
- }
+/*
+ * Place the result of RFC-2048-encoding (d, dlen) into the dynamically
+ * allocated buffer (e, elen). The input data is in charset fromcode
+ * and is converted into a charset chosen from charsets.
+ * Return 1 if the input data is invalid, 2 if no conversion is possible,
+ * otherwise 0 on success.
+ */
+static int rfc2047_encode (const char *d, size_t dlen,
+ const char *fromcode, const char *charsets,
+ char **e, size_t *elen)
+{
+ char *buf = 0;
+ size_t bufpos = 0, buflen = 0;
+ char *u0, *u;
+ size_t ulen;
+ char *tocode;
+ int prev;
+
+ /* Convert to UTF-8. */
+ if (convert_string (d, dlen, fromcode, "UTF-8", &u, &ulen))
+ return 1;
+ u0 = u;
+
+ /* Choose target charset. */
+ tocode = choose_charset (charsets, u, ulen);
+ if (!tocode)
+ {
+ free (u);
+ return 2;
+ }
+
+ for (prev = 0; ulen; prev = 1) {
+ char *t;
+ size_t n, wlen, r;
+ encoder_t encoder;
+
+ /* Decide where to start encoding. */
+ if (prev && ulen && !HSPACE (*u))
+ t = u;
else
{
- *wptr++ = '=';
- *wptr++ = '=';
+ /* Look for a non-us-ascii chararcter or "=?". */
+ for (t = u; t < u + ulen - 1; t++)
+ if ((*t & 0x80) || (*t == '=' && t[1] == '?'))
+ break;
+ if (t == u + ulen - 1 && !(*t & 0x80))
+ break;
+
+ /* Find start of that word. */
+ while (t > u && !HSPACE(*(t-1)))
+ --t;
+ if (prev) {
+ /* Include preceding characters if they are all spaces. */
+ const char *x;
+ for (x = u; x < t && HSPACE(*x); x++)
+ ;
+ if (x >= t)
+ t = u;
+ }
}
- wordlen += 4;
- dlen -= 4;
+ /* Convert some data and add encoded word to buffer. */
+ n = choose_block (t, (u + ulen) - t, tocode, &wlen, &encoder);
+ buflen = bufpos + (t == u && prev) + (t - u) + wlen;
+ safe_realloc ((void **) &buf, buflen);
+ if (t == u && prev)
+ buf[bufpos++] = ' ';
+ memcpy (buf + bufpos, u, t - u);
+ bufpos += t - u;
+ r = encode_block (buf + bufpos, t, n, tocode, encoder);
+ assert (r == wlen);
+ bufpos += wlen;
+ n += t - u;
+ u += n;
+ ulen -= n;
}
- strcpy (wptr, "?=");
+ /* Add remaining us-ascii characters to buffer. */
+ buflen = bufpos + ulen;
+ safe_realloc ((void **) &buf, buflen);
+ memcpy (buf + bufpos, u, ulen);
+
+ free (tocode);
+ free (u0);
+ *e = buf;
+ *elen = buflen;
+ return 0;
}
-void rfc2047_encode_string (char *d, size_t dlen, const unsigned char *s)
+#define MAX (ENCWORD_LEN_MAX + 1)
+
+static char *rfc2047_fold_line (char *e, size_t elen)
{
- int count = 0;
- int len;
- const unsigned char *p = s;
- encode_t *encoder;
- char send_charset[SHORT_STRING];
- char *scratch;
-
- mutt_get_send_charset(send_charset, sizeof(send_charset), NULL, 0);
-
- /* First check to see if there are any 8-bit characters */
- for (; *p; p++)
+ char *line, *p, *f;
+ int col = MAX;
+
+ p = line = safe_malloc (elen * 2); /* more than enough */
+
+ while (elen)
{
- if (*p & 0x80)
- count++;
- else if (*p == '=' && *(p+1) == '?')
+ if (elen > 2 && e[1] == '=' && e[2] == '?' && HSPACE(*e))
{
- count += 2;
- p++;
+ again:
+ if (col + elen > MAX)
+ {
+ if (col >= MAX)
+ f = e;
+ else
+ for (f = e + MAX - col; !HSPACE(*f); f--)
+ ;
+ if (e == f)
+ {
+ if (col)
+ {
+ *p++ = '\n', col = 0;
+ goto again;
+ }
+ for (f = e + MAX; f < e + elen && !HSPACE(*f); f++)
+ ;
+ }
+ memcpy (p, e, f - e), p += f - e;
+ elen -= f - e, e = f;
+ if (elen)
+ *p++ = '\n', col = 0;
+ continue;
+ }
}
+ *p++ = *e++, elen--, col++;
}
- if (!count)
- {
- strfcpy (d, (const char *)s, dlen);
- return;
- }
+ *p++ = '\0';
+ safe_realloc ((void **) &line, p - line);
+ return line;
+}
- if (mutt_strcasecmp("us-ascii", send_charset) == 0 ||
- mutt_strncasecmp("iso-8859", send_charset, 8) == 0)
- encoder = q_encode_string;
- else
- {
- /* figure out which encoding generates the most compact representation */
- len = mutt_strlen ((char *) s);
- if ((count * 2) + len <= (4 * len) / 3)
- encoder = q_encode_string;
- else
- encoder = b_encode_string;
- }
+void rfc2047_encode_string (char **pd)
+{
+ char *e;
+ size_t elen;
+ char *charsets;
- /* Hack to pull the Re: and Fwd: out of the encoded word for better
- handling by agents which do not support RFC2047. */
- if (!mutt_strncasecmp ("re: ", (char *) s, 4))
- {
- strncpy (d, (char *) s, 4);
- d += 4;
- dlen -= 4;
- s += 4;
- }
- else if (!mutt_strncasecmp ("fwd: ", (char *) s, 5))
+ charsets = SendCharset;
+ if (!charsets || !*charsets)
+ charsets = Charset;
+ if (!charsets || !*charsets)
+ charsets = "UTF-8";
+
+ if (!rfc2047_encode (*pd, strlen (*pd), Charset, charsets, &e, &elen))
{
- strncpy (d, (char *) s, 5);
- d += 5;
- dlen -= 5;
- s += 5;
+ free (*pd);
+ *pd = rfc2047_fold_line (e, elen);
+ free (e);
}
-
- scratch = safe_strdup ((const char *) s);
- if (*send_charset && mutt_strcasecmp("us-ascii", send_charset))
- mutt_convert_string (&scratch, Charset, send_charset);
-
- (*encoder) (d, dlen, (unsigned char *) scratch, send_charset);
- safe_free ((void **) &scratch);
}
void rfc2047_encode_adrlist (ADDRESS *addr)
{
ADDRESS *ptr = addr;
- char buffer[STRING];
while (ptr)
{
if (ptr->personal)
- {
- rfc2047_encode_string (buffer, sizeof (buffer), (const unsigned char *)ptr->personal);
- mutt_str_replace (&ptr->personal, buffer);
- }
+ rfc2047_encode_string (&ptr->personal);
#ifdef EXACT_ADDRESS
if (ptr->val)
- {
- rfc2047_encode_string (buffer, sizeof (buffer), (const unsigned char *)ptr->val);
- mutt_str_replace (&ptr->val, buffer);
- }
+ rfc2047_encode_string (&ptr->val);
#endif
ptr = ptr->next;
}
{
const char *pp = s, *pp1;
char *pd, *d0;
- char *t, *t1;
+ const char *t, *t1;
int enc = 0, count = 0, c1, c2, c3, c4;
char *charset = NULL;