From ebf9d198ba7a2f83b451939eabfff2c388d1c5b5 Mon Sep 17 00:00:00 2001 From: Thomas Roessler Date: Mon, 3 Jul 2000 10:11:20 +0000 Subject: [PATCH] RFC 2047 patch from EGE. --- rfc2047.c | 129 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 50 deletions(-) diff --git a/rfc2047.c b/rfc2047.c index f243d308..a7fc7e0d 100644 --- a/rfc2047.c +++ b/rfc2047.c @@ -76,7 +76,8 @@ static size_t convert_string (const char *f, size_t flen, return n; } -static char *choose_charset (const char *charsets, char *u, size_t ulen) +static char *choose_charset (const char *fromcode, const char *charsets, + char *u, size_t ulen) { char *tocode = 0; size_t bestn = 0; @@ -99,7 +100,7 @@ static char *choose_charset (const char *charsets, char *u, size_t ulen) t = safe_malloc (n + 1); memcpy (t, p, n), t[n] = '\0'; - n = convert_string (u, ulen, "UTF-8", t, &s, &slen); + n = convert_string (u, ulen, fromcode, t, &s, &slen); if (n == (size_t)(-1)) continue; free (s); @@ -188,8 +189,12 @@ static size_t q_encoder (char *s, const char *d, size_t dlen, * be converted to an encoded word of length *wlen using *encoder. * Otherwise return an upper bound on the maximum length of the data * which could be converted. + * The data is converted from fromcode (which must be stateless) to + * tocode, unless fromcode is 0, in which case the data is assumed to + * be already in tocode, which should be 8-bit and stateless. */ -static size_t try_block (const char *d, size_t dlen, const char *tocode, +static size_t try_block (const char *d, size_t dlen, + const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen) { char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; @@ -199,18 +204,28 @@ static size_t try_block (const char *d, size_t dlen, const char *tocode, size_t ibl, obl; int count, len, len_b, len_q; - cd = mutt_iconv_open (tocode, "UTF-8"); - assert (cd != (iconv_t)(-1)); - ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); - if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) || - iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) + if (fromcode) { - assert (errno == E2BIG); + cd = mutt_iconv_open (tocode, fromcode); + assert (cd != (iconv_t)(-1)); + ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); + if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) || + iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) + { + assert (errno == E2BIG); + iconv_close (cd); + assert (ib > d); + return (ib - d == dlen) ? dlen : ib - d + 1; + } iconv_close (cd); - assert (ib > d); - return (ib - d == dlen) ? dlen : ib - d + 1; } - iconv_close (cd); + else + { + if (dlen > sizeof (buf1) - strlen (tocode)) + return sizeof (buf1) - strlen (tocode) + 1; + memcpy (buf1, d, dlen); + ob = buf1 + dlen; + } count = 0; for (p = buf1; p < ob; p++) @@ -251,7 +266,8 @@ static size_t try_block (const char *d, size_t dlen, const char *tocode, * Return the length of the encoded word. */ static size_t encode_block (char *s, char *d, size_t dlen, - const char *tocode, encoder_t encoder) + const char *fromcode, const char *tocode, + encoder_t encoder) { char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1]; iconv_t cd; @@ -259,14 +275,19 @@ static size_t encode_block (char *s, char *d, size_t dlen, char *ob; size_t ibl, obl, n1, n2; - cd = mutt_iconv_open (tocode, "UTF-8"); - assert (cd != (iconv_t)(-1)); - ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); - n1 = iconv (cd, &ib, &ibl, &ob, &obl); - n2 = iconv (cd, 0, 0, &ob, &obl); - assert (n1 != (size_t)(-1) && n2 != (size_t)(-1)); - iconv_close (cd); - return (*encoder) (s, buf1, ob - buf1, tocode); + if (fromcode) + { + cd = mutt_iconv_open (tocode, fromcode); + assert (cd != (iconv_t)(-1)); + ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode); + n1 = iconv (cd, &ib, &ibl, &ob, &obl); + n2 = iconv (cd, 0, 0, &ob, &obl); + assert (n1 != (size_t)(-1) && n2 != (size_t)(-1)); + iconv_close (cd); + return (*encoder) (s, buf1, ob - buf1, tocode); + } + else + return (*encoder) (s, d, dlen, tocode); } /* @@ -276,7 +297,7 @@ static size_t encode_block (char *s, char *d, size_t dlen, * We start in column col, which limits the length of the word. */ static size_t choose_block (char *d, size_t dlen, int col, - const char *tocode, + const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen) { size_t n, nn; @@ -285,7 +306,7 @@ static size_t choose_block (char *d, size_t dlen, int col, for (;;) { assert (d + n > d); - nn = try_block (d, n, tocode, encoder, wlen); + nn = try_block (d, n, fromcode, tocode, encoder, wlen); if (!nn && col + *wlen <= ENCWORD_LEN_MAX + 1) break; nn = (nn ? nn : n) - 1; @@ -303,8 +324,9 @@ static size_t choose_block (char *d, size_t dlen, int col, * Place the result of RFC-2047-encoding (d, dlen) into the dynamically * allocated buffer (e, elen). The input data is in charset fromcode * and is converted into a charset chosen from charsets. - * Return 1 if the input data is invalid, 2 if no conversion is possible, - * otherwise 0 on success. + * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8 + * failed, otherwise 0. If conversion failed, fromcode is assumed to be + * compatible with us-ascii and the original data is used. * The input data is assumed to be a single line starting at column col; * if col is non-zero, the preceding character was a space. */ @@ -312,16 +334,22 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, const char *fromcode, const char *charsets, char **e, size_t *elen) { + int ret = 0; char *buf; size_t bufpos, buflen; char *u, *t0, *t1, *t; size_t ulen, r, n, wlen; encoder_t encoder; - char *tocode; + char *tocode1 = 0; + const char *tocode; + char *icode = "UTF-8"; - /* Convert to UTF-8. */ - if (convert_string (d, dlen, fromcode, "UTF-8", &u, &ulen)) - return 1; + /* Try to convert to UTF-8. */ + if (convert_string (d, dlen, fromcode, icode, &u, &ulen)) + { + ret = 1, icode = 0; + u = safe_malloc (ulen = dlen), memcpy (u, d, dlen); + } /* Find earliest and latest things we must encode. */ t0 = t1 = 0; @@ -336,15 +364,17 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, { /* No encoding is required. */ *e = u, *elen = ulen; - return 0; + return ret; } /* Choose target charset. */ - tocode = choose_charset (charsets, u, ulen); - if (!tocode) + tocode = fromcode; + if (icode) { - free (u); - return 2; + if ((tocode1 = choose_charset (icode, charsets, u, ulen))) + tocode = tocode1; + else + ret = 2, icode = 0; } /* Adjust t0 for maximum length of line. */ @@ -359,7 +389,7 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, continue; for (t = t0 + 1; t < u + ulen && (*t & 0xc0) == 0x80; t++) ; - if (!try_block (t0, t - t0, tocode, &encoder, &wlen) && + if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) && col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1) break; } @@ -371,7 +401,7 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, continue; for (t = t1 - 1; (*t & 0xc0) == 0x80; t--) ; - if (!try_block (t, t1 - t, tocode, &encoder, &wlen) && + if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) && 1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1) break; } @@ -390,7 +420,7 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, for (;;) { /* Find how much we can encode. */ - n = choose_block (t, t1 - t, col, tocode, &encoder, &wlen); + n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen); if (n == t1 - t) { /* See if we can fit the us-ascii suffix, too. */ @@ -411,7 +441,7 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, ; continue; } - n = choose_block (t, n, col, tocode, &encoder, &wlen); + n = choose_block (t, n, col, icode, tocode, &encoder, &wlen); } /* Add to output buffer. */ @@ -421,7 +451,7 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, buflen = bufpos + wlen + strlen (LINEBREAK); safe_realloc ((void **) &buf, buflen); } - r = encode_block (buf + bufpos, t, n, tocode, encoder); + r = encode_block (buf + bufpos, t, n, icode, tocode, encoder); assert (r == wlen); bufpos += wlen; memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK)); @@ -436,16 +466,16 @@ static int rfc2047_encode (const char *d, size_t dlen, int col, /* Add last encoded word and us-ascii suffix to buffer. */ buflen = bufpos + wlen + (u + ulen - t1); safe_realloc ((void **) &buf, buflen); - r = encode_block (buf + bufpos, t, t1 - t, tocode, encoder); + r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder); assert (r == wlen); bufpos += wlen; memcpy (buf + bufpos, t1, u + ulen - t1); - free (tocode); + free (tocode1); free (u); *e = buf; *elen = buflen; - return 0; + return ret; } void rfc2047_encode_string (char **pd) @@ -463,14 +493,13 @@ void rfc2047_encode_string (char **pd) /* Pretend that we are starting in column 32, thus allowing for a field-name with up to 30 characters. */ - if (!rfc2047_encode (*pd, strlen (*pd), 32, - Charset, charsets, &e, &elen)) - { - safe_realloc ((void **) &e, elen + 1); - e[elen] = '\0'; - free (*pd); - *pd = e; - } + rfc2047_encode (*pd, strlen (*pd), 32, + Charset, charsets, &e, &elen); + + safe_realloc ((void **) &e, elen + 1); + e[elen] = '\0'; + free (*pd); + *pd = e; } void rfc2047_encode_adrlist (ADDRESS *addr) -- 2.40.0