granicus.if.org Git - mutt/blob - rfc2047.c

   1 /*
   2  * Copyright (C) 1996-2000,2010 Michael R. Elkins <me@mutt.org>
   3  * Copyright (C) 2000-2002 Edmund Grimley Evans <edmundo@rano.org>
   4  *
   5  *     This program is free software; you can redistribute it and/or modify
   6  *     it under the terms of the GNU General Public License as published by
   7  *     the Free Software Foundation; either version 2 of the License, or
   8  *     (at your option) any later version.
   9  *
  10  *     This program is distributed in the hope that it will be useful,
  11  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *     GNU General Public License for more details.
  14  *
  15  *     You should have received a copy of the GNU General Public License
  16  *     along with this program; if not, write to the Free Software
  17  *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  18  */
  19
  20 #if HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include "mutt.h"
  25 #include "mime.h"
  26 #include "charset.h"
  27 #include "rfc2047.h"
  28
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34
  35 /* If you are debugging this file, comment out the following line. */
  36 /*#define NDEBUG*/
  37
  38 #ifdef NDEBUG
  39 #define assert(x)
  40 #else
  41 #include <assert.h>
  42 #endif
  43
  44 #define ENCWORD_LEN_MAX 75
  45 #define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */
  46
  47 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  48
  49 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  50
  51 extern char RFC822Specials[];
  52
  53 typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
  54                              const char *);
  55
  56 static size_t convert_string (ICONV_CONST char *f, size_t flen,
  57                               const char *from, const char *to,
  58                               char **t, size_t *tlen)
  59 {
  60   iconv_t cd;
  61   char *buf, *ob;
  62   size_t obl, n;
  63   int e;
  64
  65   cd = mutt_iconv_open (to, from, 0);
  66   if (cd == (iconv_t)(-1))
  67     return (size_t)(-1);
  68   obl = 4 * flen + 1;
  69   ob = buf = safe_malloc (obl);
  70   n = iconv (cd, &f, &flen, &ob, &obl);
  71   if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
  72   {
  73     e = errno;
  74     FREE (&buf);
  75     iconv_close (cd);
  76     errno = e;
  77     return (size_t)(-1);
  78   }
  79   *ob = '\0';
  80
  81   *tlen = ob - buf;
  82
  83   safe_realloc (&buf, ob - buf + 1);
  84   *t = buf;
  85   iconv_close (cd);
  86
  87   return n;
  88 }
  89
  90 int convert_nonmime_string (char **ps)
  91 {
  92   const char *c, *c1;
  93
  94   for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
  95   {
  96     char *u = *ps;
  97     char *s;
  98     char *fromcode;
  99     size_t m, n;
 100     size_t ulen = mutt_strlen (*ps);
 101     size_t slen;
 102
 103     if (!u || !*u)
 104       return 0;
 105
 106     c1 = strchr (c, ':');
 107     n = c1 ? c1 - c : mutt_strlen (c);
 108     if (!n)
 109       return 0;
 110     fromcode = safe_malloc (n + 1);
 111     strfcpy (fromcode, c, n + 1);
 112     m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
 113     FREE (&fromcode);
 114     if (m != (size_t)(-1))
 115     {
 116       FREE (ps); /* __FREE_CHECKED__ */
 117       *ps = s;
 118       return 0;
 119     }
 120   }
 121   mutt_convert_string (ps,
 122                        (const char *)mutt_get_default_charset (),
 123                        Charset, MUTT_ICONV_HOOK_FROM);
 124   return -1;
 125 }
 126
 127 char *mutt_choose_charset (const char *fromcode, const char *charsets,
 128                            char *u, size_t ulen, char **d, size_t *dlen)
 129 {
 130   char canonical_buff[LONG_STRING];
 131   char *e = 0, *tocode = 0;
 132   size_t elen = 0, bestn = 0;
 133   const char *p, *q;
 134
 135   for (p = charsets; p; p = q ? q + 1 : 0)
 136   {
 137     char *s, *t;
 138     size_t slen, n;
 139
 140     q = strchr (p, ':');
 141
 142     n = q ? q - p : strlen (p);
 143     if (!n)
 144       continue;
 145
 146     t = safe_malloc (n + 1);
 147     memcpy (t, p, n);
 148     t[n] = '\0';
 149
 150     n = convert_string (u, ulen, fromcode, t, &s, &slen);
 151     if (n == (size_t)(-1))
 152     {
 153       FREE (&t);
 154       continue;
 155     }
 156
 157     if (!tocode || n < bestn)
 158     {
 159       bestn = n;
 160       FREE (&tocode);
 161       tocode = t;
 162       if (d)
 163       {
 164         FREE (&e);
 165         e = s;
 166       }
 167       else
 168         FREE (&s);
 169       elen = slen;
 170       if (!bestn)
 171         break;
 172     }
 173     else
 174     {
 175       FREE (&t);
 176       FREE (&s);
 177     }
 178   }
 179   if (tocode)
 180   {
 181     if (d)
 182       *d = e;
 183     if (dlen)
 184       *dlen = elen;
 185
 186     mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
 187     mutt_str_replace (&tocode, canonical_buff);
 188   }
 189   return tocode;
 190 }
 191
 192 static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 193                          const char *tocode)
 194 {
 195   char *s0 = s;
 196
 197   memcpy (s, "=?", 2), s += 2;
 198   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 199   memcpy (s, "?B?", 3), s += 3;
 200   for (;;)
 201   {
 202     if (!dlen)
 203       break;
 204     else if (dlen == 1)
 205     {
 206       *s++ = B64Chars[(*d >> 2) & 0x3f];
 207       *s++ = B64Chars[(*d & 0x03) << 4];
 208       *s++ = '=';
 209       *s++ = '=';
 210       break;
 211     }
 212     else if (dlen == 2)
 213     {
 214       *s++ = B64Chars[(*d >> 2) & 0x3f];
 215       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 216       *s++ = B64Chars[(d[1] & 0x0f) << 2];
 217       *s++ = '=';
 218       break;
 219     }
 220     else
 221     {
 222       *s++ = B64Chars[(*d >> 2) & 0x3f];
 223       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 224       *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 225       *s++ = B64Chars[d[2] & 0x3f];
 226       d += 3, dlen -= 3;
 227     }
 228   }
 229   memcpy (s, "?=", 2), s += 2;
 230   return s - s0;
 231 }
 232
 233 static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 234                          const char *tocode)
 235 {
 236   static const char hex[] = "0123456789ABCDEF";
 237   char *s0 = s;
 238
 239   memcpy (s, "=?", 2), s += 2;
 240   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 241   memcpy (s, "?Q?", 3), s += 3;
 242   while (dlen--)
 243   {
 244     unsigned char c = *d++;
 245     if (c == ' ')
 246       *s++ = '_';
 247     else if (c >= 0x7f || c < 0x20 || c == '_' ||  strchr (MimeSpecials, c))
 248     {
 249       *s++ = '=';
 250       *s++ = hex[(c & 0xf0) >> 4];
 251       *s++ = hex[c & 0x0f];
 252     }
 253     else
 254       *s++ = c;
 255   }
 256   memcpy (s, "?=", 2), s += 2;
 257   return s - s0;
 258 }
 259
 260 /*
 261  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 262  * be converted to an encoded word of length *wlen using *encoder.
 263  * Otherwise return an upper bound on the maximum length of the data
 264  * which could be converted.
 265  * The data is converted from fromcode (which must be stateless) to
 266  * tocode, unless fromcode is 0, in which case the data is assumed to
 267  * be already in tocode, which should be 8-bit and stateless.
 268  */
 269 static size_t try_block (ICONV_CONST char *d, size_t dlen,
 270                          const char *fromcode, const char *tocode,
 271                          encoder_t *encoder, size_t *wlen)
 272 {
 273   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 274   iconv_t cd;
 275   ICONV_CONST char *ib;
 276   char *ob, *p;
 277   size_t ibl, obl;
 278   int count, len, len_b, len_q;
 279
 280   if (fromcode)
 281   {
 282     cd = mutt_iconv_open (tocode, fromcode, 0);
 283     assert (cd != (iconv_t)(-1));
 284     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 285     if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
 286         iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
 287     {
 288       assert (errno == E2BIG);
 289       iconv_close (cd);
 290       assert (ib > d);
 291       return (ib - d == dlen) ? dlen : ib - d + 1;
 292     }
 293     iconv_close (cd);
 294   }
 295   else
 296   {
 297     if (dlen > sizeof (buf1) - strlen (tocode))
 298       return sizeof (buf1) - strlen (tocode) + 1;
 299     memcpy (buf1, d, dlen);
 300     ob = buf1 + dlen;
 301   }
 302
 303   count = 0;
 304   for (p = buf1; p < ob; p++)
 305   {
 306     unsigned char c = *p;
 307     assert (strchr (MimeSpecials, '?'));
 308     if (c >= 0x7f || c < 0x20 || *p == '_' ||
 309         (c != ' ' && strchr (MimeSpecials, *p)))
 310       ++count;
 311   }
 312
 313   len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
 314   len_b = len + (((ob - buf1) + 2) / 3) * 4;
 315   len_q = len + (ob - buf1) + 2 * count;
 316
 317   /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 318   if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 319     len_q = ENCWORD_LEN_MAX + 1;
 320
 321   if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
 322   {
 323     *encoder = b_encoder;
 324     *wlen = len_b;
 325     return 0;
 326   }
 327   else if (len_q <= ENCWORD_LEN_MAX)
 328   {
 329     *encoder = q_encoder;
 330     *wlen = len_q;
 331     return 0;
 332   }
 333   else
 334     return dlen;
 335 }
 336
 337 /*
 338  * Encode the data (d, dlen) into s using the encoder.
 339  * Return the length of the encoded word.
 340  */
 341 static size_t encode_block (char *s, char *d, size_t dlen,
 342                             const char *fromcode, const char *tocode,
 343                             encoder_t encoder)
 344 {
 345   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 346   iconv_t cd;
 347   ICONV_CONST char *ib;
 348   char *ob;
 349   size_t ibl, obl, n1, n2;
 350
 351   if (fromcode)
 352   {
 353     cd = mutt_iconv_open (tocode, fromcode, 0);
 354     assert (cd != (iconv_t)(-1));
 355     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 356     n1 = iconv (cd, &ib, &ibl, &ob, &obl);
 357     n2 = iconv (cd, 0, 0, &ob, &obl);
 358     assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
 359     iconv_close (cd);
 360     return (*encoder) (s, buf1, ob - buf1, tocode);
 361   }
 362   else
 363     return (*encoder) (s, d, dlen, tocode);
 364 }
 365
 366 /*
 367  * Discover how much of the data (d, dlen) can be converted into
 368  * a single encoded word. Return how much data can be converted,
 369  * and set the length *wlen of the encoded word and *encoder.
 370  * We start in column col, which limits the length of the word.
 371  */
 372 static size_t choose_block (char *d, size_t dlen, int col,
 373                             const char *fromcode, const char *tocode,
 374                             encoder_t *encoder, size_t *wlen)
 375 {
 376   size_t n, nn;
 377   int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
 378
 379   n = dlen;
 380   for (;;)
 381   {
 382     assert (d + n > d);
 383     nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 384     if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 385       break;
 386     n = (nn ? nn : n) - 1;
 387     assert (n > 0);
 388     if (utf8)
 389       while (n > 1 && CONTINUATION_BYTE(d[n]))
 390         --n;
 391   }
 392   return n;
 393 }
 394
 395 /*
 396  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 397  * allocated buffer (e, elen). The input data is in charset fromcode
 398  * and is converted into a charset chosen from charsets.
 399  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 400  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 401  * compatible with us-ascii and the original data is used.
 402  * The input data is assumed to be a single line starting at column col;
 403  * if col is non-zero, the preceding character was a space.
 404  */
 405 static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
 406                            const char *fromcode, const char *charsets,
 407                            char **e, size_t *elen, char *specials)
 408 {
 409   int ret = 0;
 410   char *buf;
 411   size_t bufpos, buflen;
 412   char *u = NULL, *t0, *t1, *t;
 413   char *s0, *s1;
 414   size_t ulen, r, n, wlen;
 415   encoder_t encoder;
 416   char *tocode1 = 0;
 417   const char *tocode;
 418   char *icode = "utf-8";
 419
 420   /* Try to convert to UTF-8. */
 421   if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
 422   {
 423     ret = 1;
 424     icode = 0;
 425     safe_realloc (&u, (ulen = dlen) + 1);
 426     memcpy (u, d, dlen);
 427     u[ulen] = 0;
 428   }
 429
 430   /* Find earliest and latest things we must encode. */
 431   s0 = s1 = t0 = t1 = 0;
 432   for (t = u; t < u + ulen; t++)
 433   {
 434     if ((*t & 0x80) ||
 435         (*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1)))))
 436     {
 437       if (!t0) t0 = t;
 438       t1 = t;
 439     }
 440     else if (specials && *t && strchr (specials, *t))
 441     {
 442       if (!s0) s0 = t;
 443       s1 = t;
 444     }
 445   }
 446
 447   /* If we have something to encode, include RFC822 specials */
 448   if (t0 && s0 && s0 < t0)
 449     t0 = s0;
 450   if (t1 && s1 && s1 > t1)
 451     t1 = s1;
 452
 453   if (!t0)
 454   {
 455     /* No encoding is required. */
 456     *e = u;
 457     *elen = ulen;
 458     return ret;
 459   }
 460
 461   /* Choose target charset. */
 462   tocode = fromcode;
 463   if (icode)
 464   {
 465     if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
 466       tocode = tocode1;
 467     else
 468       ret = 2, icode = 0;
 469   }
 470
 471   /* Hack to avoid labelling 8-bit data as us-ascii. */
 472   if (!icode && mutt_is_us_ascii (tocode))
 473     tocode = "unknown-8bit";
 474
 475   /* Adjust t0 for maximum length of line. */
 476   t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 477   if (t < u)  t = u;
 478   if (t < t0) t0 = t;
 479
 480
 481   /* Adjust t0 until we can encode a character after a space. */
 482   for (; t0 > u; t0--)
 483   {
 484     if (!HSPACE(*(t0-1)))
 485       continue;
 486     t = t0 + 1;
 487     if (icode)
 488       while (t < u + ulen && CONTINUATION_BYTE(*t))
 489         ++t;
 490     if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 491         col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 492       break;
 493   }
 494
 495   /* Adjust t1 until we can encode a character before a space. */
 496   for (; t1 < u + ulen; t1++)
 497   {
 498     if (!HSPACE(*t1))
 499       continue;
 500     t = t1 - 1;
 501     if (icode)
 502       while (CONTINUATION_BYTE(*t))
 503         --t;
 504     if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 505         1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 506       break;
 507   }
 508
 509   /* We shall encode the region [t0,t1). */
 510
 511   /* Initialise the output buffer with the us-ascii prefix. */
 512   buflen = 2 * ulen;
 513   buf = safe_malloc (buflen);
 514   bufpos = t0 - u;
 515   memcpy (buf, u, t0 - u);
 516
 517   col += t0 - u;
 518
 519   t = t0;
 520   for (;;)
 521   {
 522     /* Find how much we can encode. */
 523     n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 524     if (n == t1 - t)
 525     {
 526       /* See if we can fit the us-ascii suffix, too. */
 527       if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 528         break;
 529       n = t1 - t - 1;
 530       if (icode)
 531         while (CONTINUATION_BYTE(t[n]))
 532           --n;
 533       assert (t + n >= t);
 534       if (!n)
 535       {
 536         /* This should only happen in the really stupid case where the
 537            only word that needs encoding is one character long, but
 538            there is too much us-ascii stuff after it to use a single
 539            encoded word. We add the next word to the encoded region
 540            and try again. */
 541         assert (t1 < u + ulen);
 542         for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++)
 543           ;
 544         continue;
 545       }
 546       n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 547     }
 548
 549     /* Add to output buffer. */
 550 #define LINEBREAK "\n\t"
 551     if (bufpos + wlen + strlen (LINEBREAK) > buflen)
 552     {
 553       buflen = bufpos + wlen + strlen (LINEBREAK);
 554       safe_realloc (&buf, buflen);
 555     }
 556     r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 557     assert (r == wlen);
 558     bufpos += wlen;
 559     memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
 560     bufpos += strlen (LINEBREAK);
 561 #undef LINEBREAK
 562
 563     col = 1;
 564
 565     t += n;
 566   }
 567
 568   /* Add last encoded word and us-ascii suffix to buffer. */
 569   buflen = bufpos + wlen + (u + ulen - t1);
 570   safe_realloc (&buf, buflen + 1);
 571   r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 572   assert (r == wlen);
 573   bufpos += wlen;
 574   memcpy (buf + bufpos, t1, u + ulen - t1);
 575
 576   FREE (&tocode1);
 577   FREE (&u);
 578
 579   buf[buflen] = '\0';
 580
 581   *e = buf;
 582   *elen = buflen + 1;
 583   return ret;
 584 }
 585
 586 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 587 {
 588   char *e;
 589   size_t elen;
 590   char *charsets;
 591
 592   if (!Charset || !*pd)
 593     return;
 594
 595   charsets = SendCharset;
 596   if (!charsets)
 597     charsets = "utf-8";
 598
 599   rfc2047_encode (*pd, strlen (*pd), col,
 600                   Charset, charsets, &e, &elen,
 601                   encode_specials ? RFC822Specials : NULL);
 602
 603   FREE (pd);            /* __FREE_CHECKED__ */
 604   *pd = e;
 605 }
 606
 607 void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag)
 608 {
 609   ADDRESS *ptr = addr;
 610   int col = tag ? strlen (tag) + 2 : 32;
 611
 612   while (ptr)
 613   {
 614     if (ptr->personal)
 615       _rfc2047_encode_string (&ptr->personal, 1, col);
 616     else if (ptr->group && ptr->mailbox)
 617       _rfc2047_encode_string (&ptr->mailbox, 1, col);
 618 #ifdef EXACT_ADDRESS
 619     if (ptr->val)
 620       _rfc2047_encode_string (&ptr->val, 1, col);
 621 #endif
 622     ptr = ptr->next;
 623   }
 624 }
 625
 626 void rfc2047_encode_envelope (ENVELOPE *e)
 627 {
 628   rfc2047_encode_adrlist (e->from, "From");
 629   rfc2047_encode_adrlist (e->to, "To");
 630   rfc2047_encode_adrlist (e->cc, "Cc");
 631   rfc2047_encode_adrlist (e->bcc, "Bcc");
 632   rfc2047_encode_adrlist (e->reply_to, "Reply-To");
 633   rfc2047_encode_adrlist (e->mail_followup_to, "Mail-Followup-To");
 634   rfc2047_encode_adrlist (e->sender, "Sender");
 635   rfc2047_encode_string (&e->x_label);
 636   rfc2047_encode_string (&e->subject);
 637 }
 638
 639 static int rfc2047_decode_word (BUFFER *d, const char *s, char **charset)
 640 {
 641   const char *pp, *pp1;
 642   char *pd, *d0;
 643   const char *t, *t1;
 644   int enc = 0, count = 0;
 645   int rv = -1;
 646
 647   pd = d0 = safe_malloc (strlen (s));
 648
 649   for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
 650   {
 651     count++;
 652
 653     /* hack for non-compliant MUAs that allow unquoted question marks in encoded-text */
 654     if (count == 4)
 655     {
 656       while (pp1 && *(pp1 + 1) != '=')
 657         pp1 = strchr(pp1 + 1, '?');
 658       if (!pp1)
 659         goto error_out_0;
 660     }
 661
 662     switch (count)
 663     {
 664       case 2:
 665         /* ignore language specification a la RFC 2231 */
 666         t = pp1;
 667         if ((t1 = memchr (pp, '*', t - pp)))
 668           t = t1;
 669         *charset = mutt_substrdup (pp, t);
 670         break;
 671       case 3:
 672         if (toupper ((unsigned char) *pp) == 'Q')
 673           enc = ENCQUOTEDPRINTABLE;
 674         else if (toupper ((unsigned char) *pp) == 'B')
 675           enc = ENCBASE64;
 676         else
 677           goto error_out_0;
 678         break;
 679       case 4:
 680         if (enc == ENCQUOTEDPRINTABLE)
 681         {
 682           for (; pp < pp1; pp++)
 683           {
 684             if (*pp == '_')
 685               *pd++ = ' ';
 686             else if (*pp == '=' &&
 687                      (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
 688                      (!(pp[2] & ~127) && hexval(pp[2]) != -1))
 689             {
 690               *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
 691               pp += 2;
 692             }
 693             else
 694               *pd++ = *pp;
 695           }
 696           *pd = 0;
 697         }
 698         else if (enc == ENCBASE64)
 699         {
 700           int c, b = 0, k = 0;
 701
 702           for (; pp < pp1; pp++)
 703           {
 704             if (*pp == '=')
 705               break;
 706             if ((*pp & ~127) || (c = base64val(*pp)) == -1)
 707               continue;
 708             if (k + 6 >= 8)
 709             {
 710               k -= 2;
 711               *pd++ = b | (c >> k);
 712               b = c << (8 - k);
 713             }
 714             else
 715             {
 716               b |= c << (k + 2);
 717               k += 6;
 718             }
 719           }
 720           *pd = 0;
 721         }
 722         break;
 723     }
 724   }
 725
 726   mutt_buffer_addstr (d, d0);
 727   rv = 0;
 728 error_out_0:
 729   FREE (&d0);
 730   return rv;
 731 }
 732
 733 /*
 734  * Find the start and end of the first encoded word in the string.
 735  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 736  * must be B or Q. Also, we don't require the encoded word to be
 737  * separated by linear-white-space (section 5(1)).
 738  */
 739 static const char *find_encoded_word (const char *s, const char **x)
 740 {
 741   const char *p, *q;
 742
 743   q = s;
 744   while ((p = strstr (q, "=?")))
 745   {
 746     for (q = p + 2;
 747          0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
 748          q++)
 749       ;
 750     if (q[0] != '?' || q[1] == '\0' || !strchr ("BbQq", q[1]) || q[2] != '?')
 751       continue;
 752     /* non-strict check since many MUAs will not encode spaces and question marks */
 753     for (q = q + 3; 0x20 <= *q && *q < 0x7f && (*q != '?' || q[1] != '='); q++)
 754       ;
 755     if (q[0] != '?' || q[1] != '=')
 756     {
 757       --q;
 758       continue;
 759     }
 760
 761     *x = q + 2;
 762     return p;
 763   }
 764
 765   return 0;
 766 }
 767
 768 /* return length of linear-white-space */
 769 static size_t lwslen (const char *s, size_t n)
 770 {
 771   const char *p = s;
 772   size_t len = n;
 773
 774   if (n <= 0)
 775     return 0;
 776
 777   for (; p < s + n; p++)
 778     if (!strchr (" \t\r\n", *p))
 779     {
 780       len = (size_t)(p - s);
 781       break;
 782     }
 783   if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
 784     len = (size_t)0;
 785   return len;
 786 }
 787
 788 /* return length of linear-white-space : reverse */
 789 static size_t lwsrlen (const char *s, size_t n)
 790 {
 791   const char *p = s + n - 1;
 792   size_t len = n;
 793
 794   if (n <= 0)
 795     return 0;
 796
 797   if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
 798     return (size_t)0;
 799
 800   for (; p >= s; p--)
 801     if (!strchr (" \t\r\n", *p))
 802     {
 803       len = (size_t)(s + n - 1 - p);
 804       break;
 805     }
 806   return len;
 807 }
 808
 809 static void convert_and_add_text (BUFFER *d, const char *text, size_t len)
 810 {
 811   char *t;
 812
 813   if (AssumedCharset)
 814   {
 815     t = safe_malloc (len + 1);
 816     strfcpy (t, text, len + 1);
 817     convert_nonmime_string (&t);
 818     mutt_buffer_addstr (d, t);
 819     FREE (&t);
 820   }
 821   else
 822     mutt_buffer_addstr_n (d, text, len);
 823 }
 824
 825 static void convert_and_add_word (BUFFER *d, BUFFER *word, char **charset)
 826 {
 827   char *t;
 828
 829   t = safe_strdup (mutt_b2s (word));
 830   if (!t)
 831     goto out;
 832
 833   if (*charset)
 834     mutt_convert_string (&t, *charset, Charset, MUTT_ICONV_HOOK_FROM);
 835
 836   mutt_filter_unprintable (&t);
 837   mutt_buffer_addstr (d, t);
 838   FREE (&t);
 839
 840 out:
 841   mutt_buffer_clear (word);
 842   FREE (charset);  /* __FREE_CHECKED__ */
 843 }
 844
 845 /* try to decode anything that looks like a valid RFC2047 encoded
 846  * header field, ignoring RFC822 parsing rules
 847  */
 848 void rfc2047_decode (char **pd)
 849 {
 850   const char *s = *pd;
 851   const char *word_begin, *word_end;
 852   char *word_charset = NULL, *accumulated_charset = NULL;
 853   size_t m, n;
 854   int found_encoded = 0, rc;
 855   BUFFER *d, *word, *accumulated_word;
 856
 857   if (!s || !*s)
 858     return;
 859
 860   d = mutt_buffer_pool_get ();
 861   word = mutt_buffer_pool_get ();
 862   accumulated_word = mutt_buffer_pool_get ();
 863
 864   while ((word_begin = find_encoded_word (s, &word_end)) != NULL)
 865   {
 866     /* If there is text before the encoded word */
 867     if (word_begin != s)
 868     {
 869       n = (size_t) (word_begin - s);
 870
 871       if (!found_encoded || ((strspn (s, " \t\r\n") != n)))
 872       {
 873         convert_and_add_word (d, accumulated_word, &accumulated_charset);
 874
 875         if (option (OPTIGNORELWS))
 876         {
 877           if (found_encoded && (m = lwslen (s, n)) != 0)
 878           {
 879             if (m != n)
 880               mutt_buffer_addch (d, ' ');
 881             n -= m, s += m;
 882           }
 883
 884           if ((m = n - lwsrlen (s, n)) != 0)
 885           {
 886             convert_and_add_text (d, s, m);
 887             if (m != n)
 888               mutt_buffer_addch (d, ' ');
 889           }
 890         }
 891         else
 892           convert_and_add_text (d, s, n);
 893       }
 894     }
 895
 896     rc = rfc2047_decode_word (word, word_begin, &word_charset);
 897
 898     /* If the decode failed, or it's a different charset, write out
 899      * the accumulated part. */
 900     if ((rc != 0) ||
 901         (ascii_strcasecmp (accumulated_charset, word_charset) != 0))
 902     {
 903       convert_and_add_word (d, accumulated_word, &accumulated_charset);
 904     }
 905
 906     /* If the decode failed, write out the raw string. */
 907     if (rc != 0)
 908     {
 909       mutt_buffer_addstr_n (d, word_begin, word_end - word_begin);
 910     }
 911     /* Otherwise save it to be compared to the next word's charset */
 912     else
 913     {
 914       mutt_buffer_addstr (accumulated_word, mutt_b2s (word));
 915       mutt_str_replace (&accumulated_charset, word_charset);
 916     }
 917
 918     mutt_buffer_clear (word);
 919     FREE (&word_charset);
 920     found_encoded = 1;
 921     s = word_end;
 922   }
 923
 924   convert_and_add_word (d, accumulated_word, &accumulated_charset);
 925
 926   if (*s)
 927   {
 928     if (found_encoded && option (OPTIGNORELWS))
 929     {
 930       n = mutt_strlen (s);
 931       if ((m = lwslen (s, n)) != 0)
 932       {
 933         if (m != n)
 934           mutt_buffer_addch (d, ' ');
 935         s += m;
 936       }
 937     }
 938     convert_and_add_text (d, s, mutt_strlen (s));
 939   }
 940
 941   mutt_str_replace (pd, mutt_b2s (d));
 942
 943   mutt_buffer_pool_release (&d);
 944   mutt_buffer_pool_release (&word);
 945   mutt_buffer_pool_release (&accumulated_word);
 946 }
 947
 948 void rfc2047_decode_adrlist (ADDRESS *a)
 949 {
 950   while (a)
 951   {
 952     if (a->personal && ((strstr (a->personal, "=?") != NULL) ||
 953                         AssumedCharset))
 954       rfc2047_decode (&a->personal);
 955     else if (a->group && a->mailbox && (strstr (a->mailbox, "=?") != NULL))
 956       rfc2047_decode (&a->mailbox);
 957 #ifdef EXACT_ADDRESS
 958     if (a->val && strstr (a->val, "=?") != NULL)
 959       rfc2047_decode (&a->val);
 960 #endif
 961     a = a->next;
 962   }
 963 }
 964
 965 void rfc2047_decode_envelope (ENVELOPE *e)
 966 {
 967   rfc2047_decode_adrlist (e->from);
 968   rfc2047_decode_adrlist (e->to);
 969   rfc2047_decode_adrlist (e->cc);
 970   rfc2047_decode_adrlist (e->bcc);
 971   rfc2047_decode_adrlist (e->reply_to);
 972   rfc2047_decode_adrlist (e->mail_followup_to);
 973   rfc2047_decode_adrlist (e->return_path);
 974   rfc2047_decode_adrlist (e->sender);
 975   rfc2047_decode (&e->x_label);
 976   rfc2047_decode (&e->subject);
 977 }