From: Thomas Roessler Date: Tue, 9 May 2000 15:19:26 +0000 (+0000) Subject: Edmund Grimley Evans' UTF-8 patch. X-Git-Tag: mutt-1-3-rel~5 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3de049372c8ca458e096b8c8ece2d4f95da0fe82;p=mutt Edmund Grimley Evans' UTF-8 patch. --- diff --git a/Makefile.am b/Makefile.am index e751ced4..a58f7a40 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,7 +13,12 @@ IMAP_SUBDIR = imap IMAP_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/imap endif -SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR) +if BUILD_ICONV +ICONV_SUBDIR = iconv +ICONV_INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/iconv +endif + +SUBDIRS = m4 po intl doc contrib $(CHARMAP_SUBDIR) $(IMAP_SUBDIR) $(ICONV_SUBDIR) if NEEDS_PGPEWRAP bin_SCRIPTS = pgpewrap muttbug @@ -34,12 +39,13 @@ mutt_SOURCES = $(BUILT_SOURCES) \ rfc822.c rfc1524.c rfc2047.c rfc2231.c \ score.c send.c sendlib.c signal.c sort.c \ status.c system.c thread.c charset.c history.c lib.c \ - muttlib.c editmsg.c + muttlib.c editmsg.c utf8.c mbyte.c wcwidth.c gettext.c mutt_LDADD = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAP) $(MUTTLIBS) \ - $(INTLLIBS) + $(INTLLIBS) $(LIBICONV) -mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) $(INTLDEPS) +mutt_DEPENDENCIES = @MUTT_LIB_OBJECTS@ @LIBOBJS@ $(LIBIMAPDEPS) \ + $(INTLDEPS) $(LIBICONVDEPS) makedoc_SOURCES = makedoc.c @@ -49,7 +55,8 @@ DEFS=-DSHAREDIR=\"$(sharedir)\" -DSYSCONFDIR=\"$(sysconfdir)\" \ -DBINDIR=\"$(bindir)\" -DHAVE_CONFIG_H=1 # top_srcdir is for building outside of the source tree -INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) -Iintl -I$(includedir) +INCLUDES=-I$(top_srcdir) -I. $(IMAP_INCLUDES) $(ICONV_INCLUDES) \ + -Iintl -I$(includedir) non_us_sources = pgp.c pgpinvoke.c pgpkey.c pgplib.c sha1dgst.c \ gnupgparse.c sha.h sha_locl.h \ diff --git a/VERSION b/VERSION index e9bc1499..f0bb29e7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.14 +1.3.0 diff --git a/charset.c b/charset.c index a531519c..30296275 100644 --- a/charset.c +++ b/charset.c @@ -19,13 +19,8 @@ * Boston, MA 02111, USA. */ -/* - * This module deals with POSIX.2 character set definition files. - */ - #include -#include #include #include @@ -34,49 +29,17 @@ #include #include +#include + #include "mutt.h" #include "charset.h" -/* Define this if you want any dprint () statements in this code */ - -#undef CHARSET_DEBUG - -#ifndef CHARSET_DEBUG -# undef dprint -# define dprint(a, b) (void) a +#ifndef EILSEQ +#define EILSEQ EINVAL #endif -/* Module-global variables */ - -static HASH *Translations = NULL; -static HASH *Charsets = NULL; -static HASH *CharsetAliases = NULL; - -/* Function Prototypes */ - -static CHARDESC *chardesc_new (void); -static CHARDESC *repr2descr (int repr, CHARSET * cs); - -static CHARMAP *charmap_new (void); -static CHARMAP *parse_charmap_header (FILE * fp); -static CHARSET *charset_new (size_t hash_size); - -static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to); - -static char translate_character (CHARSET * to, const char *symbol); - -static int load_charset (const char *filename, CHARSET ** csp, short multbyte); -static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp); -static int _cd_compar (const void *a, const void *b); - -static void canonical_charset (char *dest, size_t dlen, const char *name); -static void chardesc_free (CHARDESC ** cdp); -static void charmap_free (CHARMAP ** cp); -static void charset_free (CHARSET ** csp); -static void fix_symbol (char *symbol, CHARMAP * m); - -static void canonical_charset (char *dest, size_t dlen, const char *name) +void mutt_canonical_charset (char *dest, size_t dlen, const char *name) { size_t i; @@ -87,785 +50,113 @@ static void canonical_charset (char *dest, size_t dlen, const char *name) { if (strchr ("_/. ", name[i])) dest[i] = '-'; + else if ('A' <= name[i] && name[i] <= 'Z') + dest[i] = name[i] - 'A' + 'a'; else - dest[i] = tolower (name[i]); + dest[i] = name[i]; } dest[i] = '\0'; } -static CHARSET *charset_new (size_t hash_size) +int mutt_is_utf8 (const char *s) { - CHARSET *cp = safe_malloc (sizeof (CHARSET)); - size_t i; - - cp->n_symb = 256; - cp->u_symb = 0; - cp->multbyte = 1; - cp->symb_to_repr = hash_create (hash_size); - cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *)); - - for (i = 0; i < cp->n_symb; i++) - cp->description[i] = NULL; - - return cp; -} - -static void charset_free (CHARSET ** csp) -{ - CHARSET *cs = *csp; - size_t i; - - for (i = 0; i < cs->n_symb; i++) - chardesc_free (&cs->description[i]); - - safe_free ((void **) &cs->description); - - hash_destroy (&cs->symb_to_repr, NULL); - safe_free ((void **) csp); -} + char buffer[8]; -static CHARMAP *charmap_new (void) -{ - CHARMAP *m = safe_malloc (sizeof (CHARMAP)); - - m->charset = NULL; - m->escape_char = '\\'; - m->comment_char = '#'; - m->multbyte = 1; - m->aliases = NULL; - - return m; -} - -static void charmap_free (CHARMAP ** cp) -{ - if (!cp || !*cp) - return; - - mutt_free_list (&(*cp)->aliases); - safe_free ((void **) &(*cp)->charset); - safe_free ((void **) cp); - - return; -} - -static CHARDESC *chardesc_new (void) -{ - CHARDESC *p = safe_malloc (sizeof (CHARDESC)); - - p->symbol = NULL; - p->repr = -1; - - return p; -} - -static void chardesc_free (CHARDESC ** cdp) -{ - if (!cdp || !*cdp) - return; - - - safe_free ((void **) &(*cdp)->symbol); - safe_free ((void **) cdp); + if (!s) + return 0; - return; + mutt_canonical_charset (buffer, sizeof (buffer), s); + return !mutt_strcmp (buffer, "utf-8"); } -static CHARMAP *parse_charmap_header (FILE * fp) -{ - char buffer[1024]; - char *t, *u; - CHARMAP *m = charmap_new (); - - while (fgets (buffer, sizeof (buffer), fp)) - { - if ((t = strchr (buffer, '\n'))) - *t = '\0'; - else - { - charmap_free (&m); - return NULL; - } - - if (!strncmp (buffer, "CHARMAP", 7)) - break; - - if (*buffer == m->comment_char) - { - if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias")) - { - char _tmp[SHORT_STRING]; - while ((t = strtok(NULL, "\t, "))) - { - canonical_charset (_tmp, sizeof (_tmp), t); - m->aliases = mutt_add_list (m->aliases, _tmp); - } - } - continue; - } - - if (!(t = strtok (buffer, "\t "))) - continue; - - if (!(u = strtok (NULL, "\t "))) - { - charmap_free (&m); - return NULL; - } - if (!strcmp (t, "")) - { - safe_free ((void **) &m->charset); - canonical_charset (u, strlen (u) + 1, u); - m->charset = safe_strdup (u); - } - else if (!strcmp (t, "")) - { - m->comment_char = *u; - } - else if (!strcmp (t, "")) - { - m->escape_char = *u; - } - else if (!strcmp (t, "")) - { - m->multbyte = strtol (u, NULL, 0); - } - } - - return m; -} - -/* Properly handle escape characters within a symbol. */ +/* + * Like iconv_open, but canonicalises the charsets + */ -static void fix_symbol (char *symbol, CHARMAP * m) +iconv_t mutt_iconv_open (const char *tocode, const char *fromcode) { - char *s, *d; + char tocode1[SHORT_STRING]; + char fromcode1[SHORT_STRING]; - for (s = symbol, d = symbol; *s; *d++ = *s++) - { - if (*s == m->escape_char && !*++s) - break; - } - - *d = *s; + mutt_canonical_charset (tocode1, sizeof (tocode1), tocode); + mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode); + return iconv_open (tocode1, fromcode1); } -enum -{ - CL_DESCR, - CL_END, - CL_COMMENT, - CL_ERROR -}; - -static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp) -{ - char *t, *u; - short n; - CHARDESC *descr; - - if (*line == m->comment_char) - return CL_COMMENT; - - descr = *descrp = chardesc_new (); - - if (!strncmp (line, "END CHARMAP", 11)) - { - chardesc_free (descrp); - return CL_END; - } - - for (t = line; *t && isspace ((unsigned char) *t); t++) - ; - - if (*t++ != '<') - { - chardesc_free (descrp); - return CL_ERROR; - } - - for (u = t; *u && *u != '>'; u++) - { - if (*u == m->escape_char && u[1]) - u++; - } - - if (*u != '>') - { - chardesc_free (descrp); - return CL_ERROR; - } - - *u++ = '\0'; - descr->symbol = safe_strdup (t); - fix_symbol (descr->symbol, m); - - for (t = u; *t && isspace ((unsigned char) *t); t++) - ; - - for (u = t; *u && !isspace ((unsigned char) *u); u++) - ; - - *u++ = 0; - descr->repr = 0; - - for (n = 0; *t == m->escape_char && n < m->multbyte; n++) - { - switch (*++t) - { - case 'x': - descr->repr = descr->repr * 256 + strtol (++t, &t, 16); - break; - case 'd': - descr->repr = descr->repr * 256 + strtol (++t, &t, 10); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - descr->repr = descr->repr * 256 + strtol (t, &t, 8); - break; - default: - chardesc_free (descrp); - return CL_ERROR; - } - } - - if (!n) - { - chardesc_free (descrp); - return CL_ERROR; - } - - return CL_DESCR; -} - -static int _cd_compar (const void *a, const void *b) -{ - const CHARDESC *ap, *bp; - int i; - - ap = * (CHARDESC **) a; - bp = * (CHARDESC **) b; - - i = ap->repr - bp->repr; - - dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n", - ap->repr, ap->symbol, bp->repr, bp->symbol, i)); - - return i; -} /* - * Load a character set description into memory. - * - * The multibyte parameter tells us whether we are going - * to accept multibyte character sets. + * Like iconv, but keeps going even when the input is invalid + * If you're supplying inrepls, the source charset should be stateless; + * if you're supplying an outrepl, the target charset should be. */ -static int load_charset (const char *filename, CHARSET ** csp, short multbyte) +size_t mutt_iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + const char **inrepls, const char *outrepl) { - CHARDESC *cd = NULL; - CHARSET *cs = NULL; - CHARMAP *m = NULL; - FILE *fp; - char buffer[1024]; - int i; - int rv = -1; - - cs = *csp = charset_new (multbyte ? 1031 : 257); + size_t ret = 0, ret1; + const char *ib = *inbuf; + size_t ibl = *inbytesleft; + char *ob = *outbuf; + size_t obl = *outbytesleft; - dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename)); - - if ((fp = fopen (filename, "r")) == NULL) + for (;;) { - char _filename[_POSIX_PATH_MAX]; - - snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename); - dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename)); - - if ((fp = fopen (_filename, "r")) == NULL) + ret1 = iconv (cd, &ib, &ibl, &ob, &obl); + if (ret1 != (size_t)-1) + ret += ret1; + if (ibl && obl && errno == EILSEQ) { - dprint (2, (debugfile, "load_charset: Failed.\n")); - goto bail; - } - } - - if ((m = parse_charmap_header (fp)) == NULL) - goto bail; - - /* Don't handle multibyte character sets unless explicitly requested - * to do so. - */ - - if (m->multbyte > 1 && !multbyte) - { - dprint (2, (debugfile, "load_charset: m->multbyte == %d\n", - (int) m->multbyte)); - goto bail; - } - - cs->multbyte = m->multbyte; - - while (fgets (buffer, sizeof (buffer), fp) != NULL) - { - i = parse_charmap_line (buffer, m, &cd); - - if (i == CL_END) - break; - else if (i == CL_DESCR) - { - dprint (5, (debugfile, "load_charset: Got character description: <%s> -> %x\n", - cd->symbol, cd->repr)); - - if (!multbyte) + if (inrepls) { - if (0 <= cd->repr && cd->repr < 256) + /* Try replacing the input */ + const char **t; + for (t = inrepls; *t; t++) { - hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); - hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); - - /* note: we intentionally leak some memory here. */ - if (!cs->description[cd->repr]) - cs->u_symb++; - - cs->description[cd->repr] = cd; - cd = NULL; - } - } - else - { - if (cs->u_symb == cs->n_symb) - { - size_t new_size = cs->n_symb + 256; - size_t i; - - safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *)); - for (i = cs->u_symb; i < new_size; i++) - cs->description[i] = NULL; - cs->n_symb = new_size; + const char *ib1 = *t; + size_t ibl1 = strlen (*t); + char *ob1 = ob; + size_t obl1 = obl; + iconv (cd, &ib1, &ibl1, &ob1, &obl1); + if (!ibl1) + { + ++ib, --ibl; + ob = ob1, obl = obl1; + ++ret; + break; + } } - - hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); - hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); - - cs->description[cs->u_symb++] = cd; - cd = NULL; + if (*t) + continue; } - } - - if (cd) - { - dprint (5, (debugfile, "load_charset: character description still present: <%s>->%x\n", - cd->symbol, cd->repr)); - } - chardesc_free (&cd); - } - - if (multbyte) - qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar); - - rv = 0; - -bail: - charmap_free (&m); - if (fp) - fclose (fp); - if (rv) - charset_free (csp); - - return rv; -} - -static CHARDESC *repr2descr (int repr, CHARSET * cs) -{ - CHARDESC *key; - CHARDESC **r; - - if (!cs || repr < 0) - return NULL; - - if (cs->multbyte == 1) - { - if (repr < 256) - return cs->description[repr]; - else - return NULL; - } - - key = safe_malloc (sizeof(CHARDESC)); - key->repr = repr; - key->symbol = ""; /* otherwise, the - * debug code may - * segfault. ouch. - */ - - r = bsearch (&key, cs->description, cs->u_symb, - sizeof (CHARDESC *), _cd_compar); - - safe_free ((void **) &key); - - if (r) return *r; - - return NULL; -} - -/* Build a translation table. If a character cannot be - * translated correctly, we try to find an approximation - * from the portable charcter set. - * - * Note that this implies the assumption that the portable - * character set can be used without any conversion. - * - * Should be safe on POSIX systems. - */ - -static char translate_character (CHARSET * to, const char *symbol) -{ - CHARDESC *cdt; - - if ((cdt = hash_find (to->symb_to_repr, symbol))) - return (char) cdt->repr; - else - return *symbol; -} - -static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to) -{ - int i; - CHARSET_MAP *map; - CHARDESC *cd; - - /* This is for 8-bit character sets. */ - - if (!from || !to || from->multbyte > 1 || to->multbyte > 1) - return NULL; - - map = safe_malloc (sizeof (CHARSET_MAP)); - for (i = 0; i < 256; i++) - { - if (!(cd = repr2descr (i, from))) - (*map)[i] = '?'; - else - (*map)[i] = translate_character (to, cd->symbol); - } - - return map; -} - -/* Currently, just scan the various charset definition files. - * On the long run, we should cache this stuff in a file. - */ - -static HASH *load_charset_aliases (void) -{ - HASH *charset_aliases; - CHARMAP *m; - DIR *dp; - FILE *fp; - struct dirent *de; - - if ((dp = opendir (CHARMAPS_DIR)) == NULL) - return NULL; - - charset_aliases = hash_create(127); - - while ((de = readdir (dp))) - { - char fnbuff[_POSIX_PATH_MAX]; - - if (*de->d_name == '.') - continue; - - snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name); - dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff)); - if ((fp = fopen (fnbuff, "r")) == NULL) - continue; - - if ((m = parse_charmap_header (fp)) != NULL) - { - LIST *lp; - char buffer[LONG_STRING]; - - canonical_charset (buffer, sizeof (buffer), de->d_name); - m->aliases = mutt_add_list (m->aliases, buffer); - - if (m->charset) - m->aliases = mutt_add_list (m->aliases, m->charset); - - for (lp = m->aliases; lp; lp = lp->next) + if (outrepl) { - if (lp->data) + /* Try replacing the output */ + int n = strlen (outrepl); + if (n <= obl) { - dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n", - lp->data, de->d_name)); - if (hash_find (charset_aliases, lp->data)) - { - dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n", - lp->data)); - } - else - hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0); + memcpy (ob, outrepl, n); + ++ib, --ibl; + ob += n, obl -= n; + ++ret; + continue; } } - - charmap_free (&m); - } - - fclose (fp); - } - - closedir (dp); - return charset_aliases; -} - -static void init_charsets () -{ - if (Charsets) return; - - Charsets = hash_create (127); - Translations = hash_create (127); - CharsetAliases = load_charset_aliases (); -} - -CHARSET *mutt_get_charset (const char *name) -{ - CHARSET *charset; - char buffer[SHORT_STRING]; - char *real_charset; - char *hooked; - - if (!name || !*name) - return (NULL); - - init_charsets(); - canonical_charset (buffer, sizeof(buffer), name); - - /* needs to be documented */ - - if ((hooked = mutt_charset_hook (buffer))) - canonical_charset (buffer, sizeof (buffer), hooked); - - dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer)); - - if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer))) - real_charset = buffer; - - dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset)); - - if(!(charset = hash_find (Charsets, real_charset))) - { - dprint (2, (debugfile, "mutt_get_charset: Need to load.\n")); - if (load_charset(real_charset, &charset, 0) == 0) - hash_insert(Charsets, safe_strdup(real_charset), charset, 1); - else - charset = NULL; - } - return charset; -} - -CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to) -{ - char from_canon[SHORT_STRING]; - char to_canon[SHORT_STRING]; - char key[SHORT_STRING]; - char *from, *to; - CHARSET *from_cs, *to_cs; - CHARSET_MAP *map; - - if(!_from || !_to) - return NULL; - - canonical_charset(from_canon, sizeof(from_canon), _from); - canonical_charset(to_canon, sizeof(to_canon), _to); - - /* quick check for some trivial cases. Doing this before - * we actually call the initialization routine delays character - * set loading until it's _really_ needed. - */ - - if(!strcmp(from_canon, to_canon) - || (!strcmp (from_canon, "us-ascii") && !strncmp (to_canon, "iso-8859", 8))) - return NULL; - - init_charsets(); - - if(!CharsetAliases || !(from = hash_find(CharsetAliases, from_canon))) - from = from_canon; - if(!CharsetAliases || !(to = hash_find(CharsetAliases, to_canon))) - to = to_canon; - - /* quick check for the identity mapping */ - if((from == to) || !mutt_strcmp(from, to)) - return NULL; - - snprintf(key, sizeof(key), "%s %s", from, to); - if((map = hash_find(Translations, key)) == NULL) - { - from_cs = mutt_get_charset(from); - to_cs = mutt_get_charset(to); - - if((map = build_translation(from_cs, to_cs))) - hash_insert(Translations, safe_strdup(key), map, 1); - } - return map; -} - -unsigned char mutt_display_char(unsigned char ch, CHARSET_MAP *map) -{ - if (!map || !ch) - return ch; - - return (unsigned char) (*map)[ch]; -} - -int mutt_display_string(char *str, CHARSET_MAP *map) -{ - if(!map) - return -1; - - while ((*str = mutt_display_char((unsigned char)*str, map))) - str++; - - return 0; -} - -/*************************************************************/ -/* UTF-8 support */ - -int mutt_is_utf8(const char *s) -{ - char buffer[SHORT_STRING]; - - if(!s) - return 0; - - canonical_charset(buffer, sizeof(buffer), s); - return !mutt_strcmp(buffer, "utf-8"); -} - -/* macros for the various bit maps we need */ - -#define IOOOOOOO 0x80 -#define IIOOOOOO 0xc0 -#define IIIOOOOO 0xe0 -#define IIIIOOOO 0xf0 -#define IIIIIOOO 0xf8 -#define IIIIIIOO 0xfc -#define IIIIIIIO 0xfe -#define IIIIIIII 0xff - -static struct unicode_mask -{ - int mask; - int value; - short len; -} -unicode_masks[] = -{ - { IOOOOOOO, 0, 1 }, - { IIIOOOOO, IIOOOOOO, 2 }, - { IIIIOOOO, IIIOOOOO, 3 }, - { IIIIIOOO, IIIIOOOO, 4 }, - { IIIIIIOO, IIIIIOOO, 5 }, - { IIIIIIIO, IIIIIIOO, 6 }, - { 0, 0, 0 } -}; - - -static char *utf_to_unicode(int *out, char *in) -{ - struct unicode_mask *um = NULL; - short i; - - for(i = 0; unicode_masks[i].mask; i++) - { - if((*in & unicode_masks[i].mask) == unicode_masks[i].value) - { - um = &unicode_masks[i]; - break; - } - } - - if(!um) - { - *out = (int) '?'; - return in + 1; - } - - for(i = 1; i < um->len; i++) - { - if((in[i] & IIOOOOOO) != IOOOOOOO) - { - *out = (int) '?'; - return in + i; } + *inbuf = ib, *inbytesleft = ibl; + *outbuf = ob, *outbytesleft = obl; + return ret; } - - *out = ((int)in[0]) & ~um->mask & 0xff; - for(i = 1; i < um->len; i++) - *out = (*out << 6) | (((int)in[i]) & ~IIOOOOOO & 0xff); - - if(!*out) - *out = '?'; - - return in + um->len; -} - -static CHARSET *Unicode = NULL; - -static int unicode_init (void) -{ - if (!Unicode) - { - if (load_charset ("ISO_10646", &Unicode, 1) == -1) - Unicode = NULL; - } - - return (Unicode == NULL ? -1 : 0); -} - -void mutt_decode_utf8_string(char *str, CHARSET *chs) -{ - char *s, *t; - CHARDESC *cd; - int ch; - - (void) unicode_init (); - - for (s = t = str; *t; s++) - { - t = utf_to_unicode(&ch, t); - - /* handle us-ascii characters directly */ - if (0 <= ch && ch < 128) - *s = ch; - else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (chs, cd->symbol)) != -1) - *s = ch; - else - *s = '?'; - - if(!*s) *s = '?'; - } - - *s = '\0'; } - - /************************************************************* * General decoder framework + * Used in handler.c for converting to mutt's Charset */ - - #define MIN(a,b) (((a) <= (b)) ? (a): (b)) DECODER *mutt_open_decoder (const char *src, const char *dest) @@ -875,34 +166,16 @@ DECODER *mutt_open_decoder (const char *src, const char *dest) d->in.size = DECODER_BUFFSIZE; d->out.size = DECODER_BUFFSIZE; - d->_in = &d->in; - - if (!src || !dest || mutt_is_utf8 (dest)) + if (dest && src && (d->cd = mutt_iconv_open (dest, src)) != (iconv_t)-1) { - d->just_take_id = 1; - d->_in = &d->out; - return d; - } - - if (mutt_is_utf8 (src)) - { - if (!(d->chs = mutt_get_charset (dest)) || unicode_init () == -1) - { - d->just_take_id = 1; - d->_in = &d->out; - return d; - } - - d->src_is_utf8 = 1; - return d; + d->_in = &d->in; + d->outrepl = mutt_is_utf8 (dest) ? "\357\277\275" : "?"; } - - if (!(d->chm = mutt_get_translation (src, dest))) + else { d->just_take_id = 1; d->_in = &d->out; } - return d; } @@ -911,7 +184,23 @@ void mutt_free_decoder (DECODER **dpp) safe_free ((void **) dpp); } -static void _process_data (DECODER *, short); +static void _process_data (DECODER *d, short force) +{ + if (force) d->forced = 1; + + if (!d->just_take_id) + { + const char *ib = d->in.buff; + size_t ibl = d->in.used; + char *ob = d->out.buff + d->out.used; + size_t obl = d->out.size - d->out.used; + + mutt_iconv (d->cd, &ib, &ibl, &ob, &obl, 0, d->outrepl); + memmove (d->in.buff, ib, ibl); + d->in.used = ibl; + d->out.used = d->out.size - obl; + } +} void mutt_decoder_push (DECODER *d, void *_buff, size_t blen, size_t *taken) { @@ -977,75 +266,6 @@ void mutt_decoder_pop_to_state (DECODER *d, STATE *s) } } -/* this is where things actually happen */ - -static void _process_data_8bit (DECODER *d) -{ - size_t i; - - for (i = 0; i < d->in.used && d->out.used < d->out.size; i++) - d->out.buff[d->out.used++] = mutt_display_char (d->in.buff[i], d->chm); - - memmove (d->in.buff, d->in.buff + i, d->in.used - i); - d->in.used -= i; -} - -static void _process_data_utf8 (DECODER *d) -{ - size_t i, j; - CHARDESC *cd; - - for (i = 0, j = 0; i < d->in.used && d->out.used < d->out.size;) - { - while (((d->in.buff[j] & 0x80) == 0) && (j < d->in.used) && (d->out.used < d->out.size)) - d->out.buff[d->out.used++] = d->in.buff[j++]; - i = j; - - while ((d->in.buff[j] & 0x80) && j < d->in.used && - (d->forced || j + 6 < d->in.used) && d->out.used < d->out.size) - { - int ch; - char *c = utf_to_unicode (&ch, &d->in.buff[j]); - - j = c - d->in.buff; - - if (0 <= ch && ch < 128) - d->out.buff[d->out.used] = ch; - else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (d->chs, cd->symbol)) != -1) - d->out.buff[d->out.used] = ch; - else - d->out.buff[d->out.used] = '?'; - - if(!d->out.buff[d->out.used]) - d->out.buff[d->out.used] = '?'; - - d->out.used++; - } - - i = j; - - if (d->in.buff[j] & 0x80) - break; - } - - memmove (d->in.buff, d->in.buff + i, d->in.used - i); - d->in.used -= i; -} - -static void _process_data (DECODER *d, short force) -{ - if (force) d->forced = 1; - - if (!d->just_take_id) - { - if (d->src_is_utf8) - _process_data_utf8 (d); - else - _process_data_8bit (d); - } -} - -/* This one is currently lacking utf-8 support */ int mutt_recode_file (const char *fname, const char *src, const char *dest) { @@ -1061,12 +281,6 @@ int mutt_recode_file (const char *fname, const char *src, const char *dest) char *t; DECODER *dec; - if (mutt_is_utf8 (dest) && !mutt_is_utf8 (src)) - { - mutt_error (_("We can't currently handle utf-8 at this point.")); - return -1; - } - if ((fp = fopen (fname, "r+")) == NULL) { mutt_error (_("Can't open %s: %s."), fname, strerror (errno)); @@ -1161,3 +375,155 @@ bail: if (tmpfp) fclose (tmpfp); return rv; } + + +/* + * Convert a string in place + * Used in rfc2047.c and rfc2231.c + */ + +int mutt_convert_string (char *s, size_t len, const char *from, const char *to) +{ + iconv_t cd; + const char *repls[] = { "\357\277\275", "?", 0 }; + + if (to && from && (cd = mutt_iconv_open (to, from)) != (iconv_t)-1) + { + int n; + const char *ib; + char *c, *ob; + size_t ibl, obl; + const char **inrepls = 0; + char *outrepl = 0; + + if (mutt_is_utf8 (to)) + outrepl = "\357\277\275"; + else if (mutt_is_utf8 (from)) + inrepls = repls; + else + outrepl = "?"; + + n = strlen (s); + c = safe_malloc (n); + memcpy (c, s, n); + ib = c, ibl = n, ob = s, obl = len ? len-1 : n; + mutt_iconv (cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); + free (c); + iconv_close (cd); + *ob = '\0'; + return 0; + } + else + return -1; +} + + +/* + * FGETCONV stuff for converting a file while reading it + * Used in sendlib.c for converting from mutt's Charset + */ + +struct fgetconv_s +{ + FILE *file; + iconv_t cd; + char bufi[512]; + char bufo[512]; + char *p; + char *ob; + char *ib; + size_t ibl; + const char **inrepls; +}; + +struct fgetconv_not +{ + FILE *file; + iconv_t cd; +}; + +FGETCONV *fgetconv_open (FILE *file, const char *from, const char *to) +{ + struct fgetconv_s *fc; + iconv_t cd = (iconv_t)-1; + const char *repls[] = { "\357\277\275", "?", 0 }; + + if (from && to) + cd = mutt_iconv_open (to, from); + + if (cd != (iconv_t)-1) + { + fc = safe_malloc (sizeof (struct fgetconv_s)); + fc->p = fc->ob = fc->bufo; + fc->ib = fc->bufi; + fc->ibl = 0; + fc->inrepls = mutt_is_utf8 (to) ? repls : repls + 1; + } + else + fc = safe_malloc (sizeof (struct fgetconv_not)); + fc->file = file; + fc->cd = cd; + return (FGETCONV *)fc; +} + +int fgetconv (FGETCONV *_fc) +{ + struct fgetconv_s *fc = (struct fgetconv_s *)_fc; + + if (!fc) + return EOF; + if (fc->cd == (iconv_t)-1) + return fgetc (fc->file); + if (!fc->p) + return EOF; + if (fc->p < fc->ob) + return (unsigned char)*(fc->p)++; + + /* Try to convert some more */ + fc->p = fc->ob = fc->bufo; + if (fc->ibl) + { + size_t obl = sizeof (fc->bufo); + iconv (fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl); + if (fc->p < fc->ob) + return (unsigned char)*(fc->p)++; + } + + /* If we trusted iconv a bit more, we would at this point + * ask why it had stopped converting ... */ + + /* Try to read some more */ + if (fc->ibl == sizeof (fc->bufi) || + (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof (fc->bufi))) + { + fc->p = 0; + return EOF; + } + if (fc->ibl) + memcpy (fc->bufi, fc->ib, fc->ibl); + fc->ib = fc->bufi; + fc->ibl += fread (fc->ib + fc->ibl, 1, sizeof (fc->bufi) - fc->ibl, fc->file); + + /* Try harder this time to convert some */ + if (fc->ibl) + { + size_t obl = sizeof (fc->bufo); + mutt_iconv (fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl, + fc->inrepls, 0); + if (fc->p < fc->ob) + return (unsigned char)*(fc->p)++; + } + + /* Either the file has finished or one of the buffers is too small */ + fc->p = 0; + return EOF; +} + +void fgetconv_close (FGETCONV *_fc) +{ + struct fgetconv_s *fc = (struct fgetconv_s *)_fc; + + if (fc->cd != (iconv_t)-1) + iconv_close (fc->cd); + free (fc); +} diff --git a/charset.h b/charset.h index 4943da14..d76da234 100644 --- a/charset.h +++ b/charset.h @@ -19,35 +19,7 @@ #ifndef _CHARSET_H #define _CHARSET_H -typedef int CHARSET_MAP[256]; - -typedef struct descr -{ - char *symbol; - int repr; -} -CHARDESC; - -typedef struct -{ - char *charset; - char escape_char; - char comment_char; - short multbyte; - LIST *aliases; -} -CHARMAP; - -typedef struct -{ - size_t n_symb; - size_t u_symb; - - short multbyte; - HASH *symb_to_repr; - CHARDESC **description; -} -CHARSET; +#include #define DECODER_BUFFSIZE 4096 @@ -59,16 +31,14 @@ struct decoder_buff typedef struct decoder { - short src_is_utf8; + /*short src_is_utf8;*/ short just_take_id; short forced; - - /* used for utf-8 decoding */ - CHARSET *chs; + char *outrepl; + + /* conversion descriptor */ + iconv_t cd; - /* used for 8-bit to 8-bit recoding */ - CHARSET_MAP *chm; - /* the buffers */ struct decoder_buff in; struct decoder_buff out; @@ -83,12 +53,16 @@ void mutt_decoder_pop_to_state (DECODER *, STATE *); void mutt_free_decoder (DECODER **); int mutt_decoder_push_one (DECODER *, char); -CHARSET *mutt_get_charset(const char *); -CHARSET_MAP *mutt_get_translation(const char *, const char *); -int mutt_display_string(char *, CHARSET_MAP *); -int mutt_is_utf8(const char *); int mutt_recode_file (const char *, const char *, const char *); -unsigned char mutt_display_char(unsigned char, CHARSET_MAP *); -void mutt_decode_utf8_string(char *, CHARSET *); + +int mutt_convert_string (char *, size_t, const char *, const char *); + +size_t mutt_iconv (iconv_t, const char **, size_t *, char **, size_t *, const char **, const char *); + +typedef void * FGETCONV; + +FGETCONV *fgetconv_open (FILE *, const char *, const char *); +int fgetconv (FGETCONV *); +void fgetconv_close (FGETCONV *); #endif /* _CHARSET_H */ diff --git a/commands.c b/commands.c index 1927690b..43b0d2fb 100644 --- a/commands.c +++ b/commands.c @@ -217,9 +217,17 @@ void ci_bounce_message (HEADER *h, int *redraw) buf[0] = 0; rfc822_write_address (buf, sizeof (buf), adr); - snprintf (prompt, (COLS > sizeof(prompt) ? sizeof(prompt) : COLS) - 13, +#define extra_space (15 + 7 + 2) + /* + * This is the printing width of "...? ([y=yes]/n=no): ?" plus 2 + * for good measure. This is not ideal. FIXME. + */ + snprintf (prompt, sizeof (prompt), (h ? _("Bounce message to %s") : _("Bounce messages to %s")), buf); - strcat(prompt, "...?"); + mutt_format_string (prompt, sizeof (prompt), + 0, COLS-extra_space, 0, 0, + prompt, sizeof (prompt)); + strcat (prompt, "...?"); if (mutt_yesorno (prompt, 1) != 1) { rfc822_free_address (&adr); diff --git a/compose.c b/compose.c index 8b7f097c..8e0aacbe 100644 --- a/compose.c +++ b/compose.c @@ -26,6 +26,7 @@ #include "mailbox.h" #include "sort.h" #include "charset.h" +#include "iconv.h" #ifdef MIXMASTER #include "remailer.h" @@ -67,7 +68,7 @@ enum }; #define HDR_XOFFSET 10 -#define TITLE_FMT "%10s" +#define TITLE_FMT "%10s" /* Used for Prompts, which are ASCII */ #define W (COLS - HDR_XOFFSET) static char *Prompts[] = @@ -277,7 +278,8 @@ static void draw_envelope_addr (int line, ADDRESS *addr) buf[0] = 0; rfc822_write_address (buf, sizeof (buf), addr); - mvprintw (line, 0, TITLE_FMT "%-*.*s", Prompts[line - 1], W, W, buf); + mvprintw (line, 0, TITLE_FMT, Prompts[line - 1]); + mutt_paddstr (W, buf); } static void draw_envelope (HEADER *msg, char *fcc) @@ -286,10 +288,11 @@ static void draw_envelope (HEADER *msg, char *fcc) draw_envelope_addr (HDR_TO, msg->env->to); draw_envelope_addr (HDR_CC, msg->env->cc); draw_envelope_addr (HDR_BCC, msg->env->bcc); - mvprintw (HDR_SUBJECT, 0, TITLE_FMT "%-*.*s", Prompts[HDR_SUBJECT - 1], W, W, - NONULL(msg->env->subject)); + mvprintw (HDR_SUBJECT, 0, TITLE_FMT, Prompts[HDR_SUBJECT - 1]); + mutt_paddstr (W, NONULL (msg->env->subject)); draw_envelope_addr (HDR_REPLYTO, msg->env->reply_to); - mvprintw (HDR_FCC, 0, TITLE_FMT "%-*.*s", Prompts[HDR_FCC - 1], W, W, fcc); + mvprintw (HDR_FCC, 0, TITLE_FMT, Prompts[HDR_FCC - 1]); + mutt_paddstr (W, fcc); @@ -331,7 +334,8 @@ static int edit_address_list (int line, ADDRESS **addr) /* redraw the expanded list so the user can see the result */ buf[0] = 0; rfc822_write_address (buf, sizeof (buf), *addr); - mvprintw (line, HDR_XOFFSET, "%-*.*s", W, W, buf); + move (line, HDR_XOFFSET); + mutt_paddstr (W, buf); return 0; } @@ -385,6 +389,7 @@ static void update_idx (MUTTMENU *menu, ATTACHPTR **idx, short idxlen) static int change_attachment_charset (BODY *b) { char buff[SHORT_STRING]; + iconv_t cd; if (!mutt_is_text_type (b->type, b->subtype)) { @@ -396,20 +401,14 @@ static int change_attachment_charset (BODY *b) if (mutt_get_field (_("Enter character set: "), buff, sizeof(buff), 0) == -1) return 0; - - if (mutt_is_utf8(buff)) - { - if (!b->noconv) - { - mutt_error (_("UTF-8 encoding attachments has not yet been implemented.")); - return 0; - } - } - else if (mutt_get_charset (buff) == NULL) + + if ((cd = iconv_open (buff, "us-ascii")) == (iconv_t)-1) { mutt_error (_("Character set %s is unknown."), buff); return 0; } + else + iconv_close (cd); mutt_set_body_charset (b, buff); return REDRAW_CURRENT; @@ -488,7 +487,7 @@ int mutt_compose_menu (HEADER *msg, /* structure for new message */ move (HDR_SUBJECT, HDR_XOFFSET); clrtoeol (); if (msg->env->subject) - printw ("%-*.*s", W, W, msg->env->subject); + mutt_paddstr (W, msg->env->subject); } break; case OP_COMPOSE_EDIT_REPLY_TO: @@ -500,7 +499,8 @@ int mutt_compose_menu (HEADER *msg, /* structure for new message */ { strfcpy (fcc, buf, _POSIX_PATH_MAX); mutt_pretty_mailbox (fcc); - mvprintw (HDR_FCC, HDR_XOFFSET, "%-*.*s", W, W, fcc); + move (HDR_FCC, HDR_XOFFSET); + mutt_paddstr (W, fcc); fccSet = 1; } MAYBE_REDRAW (menu->redraw); @@ -765,11 +765,6 @@ int mutt_compose_menu (HEADER *msg, /* structure for new message */ mutt_error (_("Recoding only affects text attachments.")); break; } - if (mutt_is_utf8 (mutt_get_parameter ("charset", CURRENT->parameter))) - { - mutt_error (_("We currently can't encode to utf-8.")); - break; - } CURRENT->noconv = !CURRENT->noconv; if (CURRENT->noconv) mutt_message (_("The current attachment won't be converted.")); diff --git a/configure.in b/configure.in index 71dbe52c..5ee21973 100644 --- a/configure.in +++ b/configure.in @@ -382,6 +382,44 @@ AC_ARG_WITH(sharedir, [ --with-sharedir=PATH Specify where to put arch in sharedir=$mutt_cv_sharedir AC_SUBST(sharedir) +AC_ARG_WITH(iconv, [ --with-iconv Use system's iconv], [ + AC_CACHE_CHECK(for iconv, mutt_cv_func_iconv, + [ mutt_cv_func_iconv=no + mutt_cv_lib_iconv=no + AC_TRY_LINK([#include +#include ], + [iconv_t cd = iconv_open("",""); iconv(cd,NULL,NULL,NULL,NULL); iconv_close(cd);], + mutt_cv_func_iconv=yes) + if test "$mutt_cv_func_iconv" = no; then + mutt_save_LIBS="$LIBS" + LIBS="$LIBS -liconv" + AC_TRY_LINK([#include +#include ], + [iconv_t cd = iconv_open("",""); iconv(cd,NULL,NULL,NULL,NULL); iconv_close(cd);], + mutt_cv_lib_iconv=yes + mutt_cv_func_iconv=yes) + LIBS="$mutt_save_LIBS" + fi + ]) + if test "$mutt_cv_func_iconv" = no; then + AC_MSG_ERROR(No iconv) + fi + LIBICONV= + if test "$mutt_cv_lib_iconv" = yes; then + LIBICONV="-liconv" + fi + need_iconv=no +], +[ LIBICONV="-Liconv -liconv" + LIBICONVDEPS="\$(top_srcdir)/iconv/iconv.h iconv/libiconv.a" + need_iconv=yes +]) +AM_CONDITIONAL(BUILD_ICONV, test "$need_iconv" = yes) +AC_SUBST(LIBICONV) +AC_SUBST(LIBICONVDEPS) + +if test "$need_iconv" = yes ; then + mutt_cv_charmaps=/usr/share/i18n/charmaps AC_ARG_WITH(charmaps, [ --with-charmaps=PATH Where to find character set definitions], [if test x$withval != x -a $withval != yes ; then @@ -410,6 +448,8 @@ charmaps=$mutt_cv_charmaps AC_SUBST(charmaps) AM_CONDITIONAL(BUILD_CHARMAPS, test x$need_charmaps = xyes) +fi # /* need_iconv */ + AC_ARG_WITH(docdir, [ --with-docdir=PATH Specify where to put the documentation], [mutt_cv_docdir=$withval], [ AC_CACHE_CHECK(where to put the documentation, @@ -614,5 +654,5 @@ AC_DEFINE_UNQUOTED(MUTTLOCALEDIR, "$MUTTLOCALEDIR") AC_OUTPUT(Makefile intl/Makefile m4/Makefile dnl po/Makefile.in doc/Makefile contrib/Makefile dnl muttbug.sh dnl - charmaps/Makefile imap/Makefile dnl + charmaps/Makefile imap/Makefile iconv/Makefile dnl Muttrc.head) diff --git a/curs_lib.c b/curs_lib.c index fc8e0315..104b929f 100644 --- a/curs_lib.c +++ b/curs_lib.c @@ -20,6 +20,7 @@ #include "mutt_menu.h" #include "mutt_curses.h" #include "pager.h" +#include "mbyte.h" #include #include @@ -93,14 +94,15 @@ event_t mutt_getch (void) int _mutt_get_field (/* const */ char *field, char *buf, size_t buflen, int complete, int multiple, char ***files, int *numfiles) { int ret; - int len = mutt_strlen (field); /* in case field==buffer */ + int x, y; do { CLEARLINE (LINES-1); addstr (field); mutt_refresh (); - ret = _mutt_enter_string ((unsigned char *) buf, buflen, LINES-1, len, complete, multiple, files, numfiles); + getyx (stdscr, y, x); + ret = _mutt_enter_string ((unsigned char *) buf, buflen, y, x, complete, multiple, files, numfiles); } while (ret == 1); CLEARLINE (LINES-1); @@ -142,10 +144,23 @@ int mutt_yesorno (const char *msg, int def) event_t ch; unsigned char *yes = (unsigned char *) _("yes"); unsigned char *no = (unsigned char *) _("no"); - + char yes1 = 'y'; + char no1 = 'n'; + + /* + * The keys are not localised, because none of the other + * keys in mutt are localised. Also, non-ASCII characters + * are unlikely to work at present ... + */ + CLEARLINE(LINES-1); - printw("%s ([%c]/%c): ", msg, def ? *yes : *no, - def ? *no : *yes); + if (*yes == yes1 && *no == no1) /* English, or not localised */ + printw ("%s ([%c]/%c): ", msg, def ? yes1 : no1, + def ? no1 : yes1); + else + printw ("%s ([%c=%s]/%c=%s): ", msg, + def ? yes1 : no1, def ? yes : no, + def ? no1 : yes1, def ? no : yes); FOREVER { mutt_refresh (); @@ -153,12 +168,12 @@ int mutt_yesorno (const char *msg, int def) if (ch.ch == -1) return(-1); if (CI_is_return (ch.ch)) break; - else if (tolower(ch.ch) == tolower(*yes)) + else if (tolower (ch.ch) == tolower (yes1)) { def = 1; break; } - else if (tolower(ch.ch) == tolower(*no)) + else if (tolower (ch.ch) == tolower (no1)) { def = 0; break; @@ -190,16 +205,6 @@ void mutt_query_exit (void) SigInt = 0; } -static void clean_error_buf(void) -{ - char *s; - for(s = Errorbuf; *s; s++) - { - if(!IsPrint(*s)) - *s = '.'; - } -} - void mutt_curses_error (const char *fmt, ...) { va_list ap; @@ -209,8 +214,8 @@ void mutt_curses_error (const char *fmt, ...) va_end (ap); dprint (1, (debugfile, "%s\n", Errorbuf)); - Errorbuf[ (COLS < sizeof (Errorbuf) ? COLS : sizeof (Errorbuf)) - 2 ] = 0; - clean_error_buf(); + mutt_format_string (Errorbuf, sizeof (Errorbuf), + 0, COLS-2, 0, 0, Errorbuf, sizeof (Errorbuf)); if (!option (OPTKEEPQUIET)) { @@ -233,8 +238,8 @@ void mutt_message (const char *fmt, ...) vsnprintf (Errorbuf, sizeof (Errorbuf), fmt, ap); va_end (ap); - Errorbuf[ (COLS < sizeof (Errorbuf) ? COLS : sizeof (Errorbuf)) - 2 ] = 0; - clean_error_buf(); + mutt_format_string (Errorbuf, sizeof (Errorbuf), + 0, COLS-2, 0, 0, Errorbuf, sizeof (Errorbuf)); if (!option (OPTKEEPQUIET)) { @@ -455,3 +460,105 @@ int mutt_multi_choice (char *prompt, char *letters) mutt_refresh (); return choice; } + +/* + * addwch would be provided by an up-to-date curses library + */ + +int mutt_addwch (wchar_t wc) +{ + char buf[6]; /* FIXME */ + int n; + + n = mutt_wctomb (buf, wc); + if (n == -1) + return n; + else + return addnstr (buf, n); +} + +/* + * This formats a string, a bit like + * snprintf (dest, destlen, "%-*.*s", min_width, max_width, s), + * except that the widths refer to the number of character cells + * when printed. + */ + +void mutt_format_string (char *dest, size_t destlen, + int min_width, int max_width, + int right_justify, char pad_char, + const char *s, size_t n) +{ + char *p; + wchar_t wc; + int w, k; + + --destlen; + p = dest; + while ((k = mbtowc (&wc, s, n))) + { + if (k == -1 && n > 0) + { + k = 1; + wc = replacement_char (); + } + s += k, n -= k; + w = wc < M_TREE_MAX ? 1 : wcwidth (wc); /* hack */ + if (w >= 0) + { + if (w > max_width || wctomb (0, wc) > destlen) + break; + min_width -= w; + max_width -= w; + p += (k = wctomb (p, wc)); + destlen -= k; + } + } + k = (int)destlen < min_width ? destlen : min_width; + if (k <= 0) + *p = '\0'; + else if (right_justify) + { + p[k] = '\0'; + while (--p >= dest) + p[k] = *p; + while (--k >= 0) + dest[k] = pad_char; + } + else + { + while (--k >= 0) + *p++ = pad_char; + *p = '\0'; + } +} + +/* + * mutt_paddstr (n, s) is equivalent to + * mutt_format_string (bigbuf, big, n, n, 0, ' ', s, big), addstr (bigbuf) + */ + +void mutt_paddstr (int n, const char *s) +{ + wchar_t wc; + int k, w; + + while ((k = mbtowc (&wc, s, -1))) + { + if (k == -1) + { + ++s; /* skip ill-formed character */ + continue; + } + if ((w = wcwidth (wc)) >= 0) + { + if (w > n) + break; + addnstr ((char *)s, k); + n -= w; + } + s += k; + } + while (n-- > 0) + addch (' '); +} diff --git a/curs_main.c b/curs_main.c index 7af10d02..a794f9b4 100644 --- a/curs_main.c +++ b/curs_main.c @@ -521,7 +521,7 @@ int mutt_index_menu (void) menu_status_line (buf, sizeof (buf), menu, NONULL (Status)); CLEARLINE (option (OPTSTATUSONTOP) ? 0 : LINES-2); SETCOLOR (MT_COLOR_STATUS); - printw ("%-*.*s", COLS, COLS, buf); + mutt_paddstr (COLS, buf); SETCOLOR (MT_COLOR_NORMAL); menu->redraw &= ~REDRAW_STATUS; } diff --git a/enter.c b/enter.c index 716d0ef7..46786199 100644 --- a/enter.c +++ b/enter.c @@ -124,8 +124,11 @@ int _mutt_enter_string (unsigned char *buf, size_t buflen, int y, int x, j = begin; } move (y, x + j - begin); - for (; j < lastchar && j < begin + width; j++) - ADDCH (buf[j]); + { + int n = (lastchar < begin + width) ? lastchar : begin + width; + n = (n > j) ? n - j : 0; + addnstr ((char *)&buf[j], n); + } clrtoeol (); if (redraw != M_REDRAW_INIT) move (y, x + curpos - begin); diff --git a/gettext.c b/gettext.c new file mode 100644 index 00000000..338e7c5d --- /dev/null +++ b/gettext.c @@ -0,0 +1,167 @@ + +#include + +#include "mutt.h" +#include "iconv.h" +#include "lib.h" +#include "charset.h" + +/* + * One day, gettext will return strings in the appropriate + * encoding. In the meantime, we use this code to handle + * the conversion. + */ + +struct gt_hash_elem +{ + const char *key; + char *data; + struct gt_hash_elem *next; +}; + +#define gt_hash_size 127 + +static char *get_charset (const char *header) +{ + /* FIXME: the comparison should at least be case-insensitive */ + const char f[] = "\nContent-Type: text/plain; charset="; + char *charset, *i, *j; + + charset = 0; + i = strstr (header, f); + if (i) + { + i += sizeof (f)-1; + for (j = i; *j >= 32; j++) + ; + charset = safe_malloc (j-i+1); + memcpy (charset, i, j-i); + charset[j-i] = '\0'; + } + return charset; +} + +char *mutt_gettext (const char *message) +{ + static struct gt_hash_elem **messages = 0; + static char *po_header = 0; + static char *po_charset = 0; + static char *message_charset = 0; + static char *outrepl = "?"; + static iconv_t cd = (iconv_t)-1; + int change_cd = 0; + char *t, *orig; + char *header_msgid = ""; + + /* gettext ("") doesn't work due to __builtin_constant_p optimisation */ + if ((t = gettext (header_msgid)) != po_header) + { + po_header = t; + t = get_charset (po_header); + if (t != po_charset && + (!t || !po_charset || strcmp (t, po_charset))) + { + free (po_charset); + po_charset = t; + change_cd = 1; + } + else + free (t); + } + + if (message_charset != Charset && + (!message_charset || !Charset || strcmp (message_charset, Charset))) + { + free (message_charset); + if (Charset) + { + int n = strlen (Charset); + message_charset = safe_malloc (n+1); + memcpy (message_charset, Charset, n+1); + } + else + message_charset = 0; + outrepl = mutt_is_utf8 (message_charset) ? "\357\277\275" : "?"; + change_cd = 1; + } + + if (change_cd) + { + if (cd != (iconv_t)-1) + iconv_close (cd); + if (message_charset) + cd = iconv_open (message_charset, po_charset ? po_charset : "UTF-8"); + else + cd = (iconv_t)-1; + + if (messages) + { + int i; + struct gt_hash_elem *p, *pn; + + for (i = 0; i < gt_hash_size; i++) + for (p = messages[i]; p; p = pn) + { + pn = p->next; + free (p); + } + free (messages); + messages = 0; + } + } + + orig = gettext (message); + + if (cd == (iconv_t)-1) + return orig; + else + { + struct gt_hash_elem *p; + int hash; + char *s, *t; + int n, nn; + const char *ib; + char *ob; + size_t ibl, obl; + + if (!messages) + { + messages = safe_malloc (gt_hash_size * sizeof (*messages)); + memset (messages, 0, gt_hash_size * sizeof (*messages)); + } + hash = (long int)orig % gt_hash_size; /* not very clever */ + for (p = messages[hash]; p && p->key != orig; p = p->next) + ; + if (p) + return p->data; + + n = strlen (orig); + nn = n + 1; + t = safe_malloc (nn); + + ib = orig, ibl = n; + ob = t, obl = n; + for (;;) + { + mutt_iconv (cd, &ib, &ibl, &ob, &obl, 0, outrepl); + if (!ibl || obl > 256) + break; + s = t; + safe_realloc ((void **)&t, nn += n); + ob += t - s; + obl += n; + } + *ob = '\0'; + n = strlen (t); + s = safe_malloc (n+1); + memcpy (s, t, n+1); + free (t); + + p = safe_malloc (sizeof (struct gt_hash_elem)); + p->key = orig; + p->data = s; + p->next = messages[hash]; + messages[hash] = p; + return s; + } +} diff --git a/gnupgparse.c b/gnupgparse.c index f04a18df..ae54dfdd 100644 --- a/gnupgparse.c +++ b/gnupgparse.c @@ -44,6 +44,7 @@ #include "mutt.h" #include "pgp.h" #include "charset.h" +#include "iconv.h" /* for hexval */ #include "mime.h" @@ -67,12 +68,13 @@ /* decode the backslash-escaped user ids. */ -static CHARSET *_chs; +static char *_chs = 0; static void fix_uid (char *uid) { char *s, *d; - + iconv_t cd; + for (s = d = uid; *s;) { if (*s == '\\' && *(s+1) == 'x' && isxdigit (*(s+2)) && isxdigit (*(s+3))) @@ -84,8 +86,31 @@ static void fix_uid (char *uid) *d++ = *s++; } *d = '\0'; - - mutt_decode_utf8_string (uid, _chs); + + if (_chs && (cd = iconv_open (_chs, "utf-8")) != (iconv_t)-1) + { + int n = s - uid + 1; /* chars available in original buffer */ + char *buf; + const char *ib; + char *ob; + size_t ibl, obl; + + buf = safe_malloc (n+1); + ib = uid, ibl = d - uid + 1, ob = buf, obl = n; + iconv (cd, &ib, &ibl, &ob, &obl); + if (!ibl) + { + if (ob-buf < n) + { + memcpy (uid, buf, ob-buf); + uid[ob-buf] = '\0'; + } + else if (ob-buf == n && (buf[n] = 0, strlen (buf) < n)) + memcpy (uid, buf, n); + } + free (buf); + iconv_close (cd); + } } static pgp_key_t *parse_pub_line (char *buf, int *is_subkey, pgp_key_t *k) @@ -261,8 +286,9 @@ pgp_key_t *pgp_get_candidates (pgp_ring_t keyring, LIST * hints) if ((devnull = open ("/dev/null", O_RDWR)) == -1) return NULL; - _chs = mutt_get_charset (Charset); - + free (_chs); + _chs = safe_strdup (Charset); + thepid = pgp_invoke_list_keys (NULL, &fp, NULL, -1, -1, devnull, keyring, hints); if (thepid == -1) diff --git a/hdrline.c b/hdrline.c index 7e63b5cb..9ee54bef 100644 --- a/hdrline.c +++ b/hdrline.c @@ -19,6 +19,7 @@ #include "mutt.h" #include "mutt_curses.h" #include "sort.h" +#include "charset.h" #ifdef HAVE_PGP @@ -224,6 +225,36 @@ struct hdr_format_info HEADER *hdr; }; +static void hdr_format_s (char *dest, + size_t destlen, + const char *prefix, + char *s) +{ + int right_justify = 0; + char *p; + int min_width; + int max_width = INT_MAX; + + if (*prefix == '-') + ++prefix, right_justify = 0; + min_width = strtol (prefix, &p, 10); + if (p > prefix) + { + if (*p == '.') + { + prefix = p + 1; + max_width = strtol (prefix, &p, 10); + if (p <= prefix) + max_width = INT_MAX; + } + } + else + min_width = 0; + + mutt_format_string (dest, destlen, min_width, max_width, + right_justify, ' ', s, -1); +} + static const char * hdr_format_str (char *dest, size_t destlen, @@ -256,8 +287,7 @@ hdr_format_str (char *dest, case 'a': if(hdr->env->from && hdr->env->from->mailbox) { - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, hdr->env->from->mailbox); + hdr_format_s (dest, destlen, prefix, hdr->env->from->mailbox); } else dest[0] = '\0'; @@ -270,8 +300,7 @@ hdr_format_str (char *dest, if (dest[0]) { strfcpy (buf2, dest, sizeof(buf2)); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + snprintf (dest, destlen, prefix, buf2); break; } /* fall through if 'B' returns nothing */ @@ -286,15 +315,13 @@ hdr_format_str (char *dest, } else strfcpy(dest, "(null)", destlen); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); strfcpy (buf2, dest, sizeof(buf2)); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'c': mutt_pretty_size (buf2, sizeof (buf2), (long) hdr->content->length); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'C': @@ -397,8 +424,7 @@ hdr_format_str (char *dest, if (do_locales) setlocale (LC_TIME, "C"); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); if (len > 0 && op != 'd') src = cp + 1; } @@ -407,24 +433,21 @@ hdr_format_str (char *dest, case 'f': buf2[0] = 0; rfc822_write_address (buf2, sizeof (buf2), hdr->env->from); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'F': if (!optional) { - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); make_from (hdr->env, buf2, sizeof (buf2), 0); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); } else if (mutt_addr_is_user (hdr->env->from)) optional = 0; break; case 'i': - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, hdr->env->message_id ? hdr->env->message_id : ""); + hdr_format_s (dest, destlen, prefix, hdr->env->message_id ? hdr->env->message_id : ""); break; case 'l': @@ -444,8 +467,7 @@ hdr_format_str (char *dest, if (!optional) { make_from (hdr->env, buf2, sizeof (buf2), 1); - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); } else if (!check_for_mailing_list (hdr->env->to, NULL, NULL, 0) && !check_for_mailing_list (hdr->env->cc, NULL, NULL, 0)) @@ -465,8 +487,7 @@ hdr_format_str (char *dest, break; case 'n': - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, mutt_get_name (hdr->env->from)); + hdr_format_s (dest, destlen, prefix, mutt_get_name (hdr->env->from)); break; case 'N': @@ -488,8 +509,7 @@ hdr_format_str (char *dest, make_from_addr (hdr->env, buf2, sizeof (buf2), 1); if (!option (OPTSAVEADDRESS) && (p = strpbrk (buf2, "%@"))) *p = 0; - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); } else if (!check_for_mailing_list_addr (hdr->env->to, NULL, 0) && !check_for_mailing_list_addr (hdr->env->cc, NULL, 0)) @@ -500,13 +520,12 @@ hdr_format_str (char *dest, case 'M': snprintf (fmt, sizeof (fmt), "%%%sd", prefix); - snprintf (buf2, sizeof (buf2), "%%%ss", prefix); if (!optional) { if (threads && is_index && hdr->collapsed && hdr->num_hidden > 1) snprintf (dest, destlen, fmt, hdr->num_hidden); else if (is_index && threads) - snprintf (dest, destlen, buf2, " "); + hdr_format_s (dest, destlen, prefix, " "); else *dest = '\0'; } @@ -519,19 +538,18 @@ hdr_format_str (char *dest, case 's': - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); if (flags & M_FORMAT_TREE) { if (flags & M_FORMAT_FORCESUBJ) { snprintf (buf2, sizeof (buf2), "%s%s", hdr->tree, NONULL (hdr->env->subject)); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); } else - snprintf (dest, destlen, fmt, hdr->tree); + hdr_format_s (dest, destlen, prefix, hdr->tree); } else - snprintf (dest, destlen, fmt, NONULL (hdr->env->subject)); + hdr_format_s (dest, destlen, prefix, NONULL (hdr->env->subject)); break; case 'S': @@ -568,8 +586,7 @@ hdr_format_str (char *dest, else if (hdr->env->cc) snprintf (buf2, sizeof (buf2), "Cc %s", mutt_get_name (hdr->env->cc)); } - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'T': @@ -587,26 +604,24 @@ hdr_format_str (char *dest, } else buf2[0] = 0; - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'v': - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); if (mutt_addr_is_user (hdr->env->from)) { if (hdr->env->to) - snprintf (buf2, sizeof (buf2), fmt, mutt_get_name (hdr->env->to)); + hdr_format_s (buf2, sizeof (buf2), prefix, mutt_get_name (hdr->env->to)); else if (hdr->env->cc) - snprintf (buf2, sizeof (buf2), fmt, mutt_get_name (hdr->env->cc)); + hdr_format_s (buf2, sizeof (buf2), prefix, mutt_get_name (hdr->env->cc)); else *buf2 = 0; } else - snprintf (buf2, sizeof (buf2), fmt, mutt_get_name (hdr->env->from)); + hdr_format_s (buf2, sizeof (buf2), prefix, mutt_get_name (hdr->env->from)); if ((p = strpbrk (buf2, " %@"))) *p = 0; - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; case 'Z': @@ -624,7 +639,6 @@ hdr_format_str (char *dest, ch = 'K'; #endif - snprintf (fmt, sizeof (fmt), "%%%ss", prefix); snprintf (buf2, sizeof (buf2), "%c%c%c", (THREAD_NEW ? 'n' : (THREAD_OLD ? 'o' : ((hdr->read && (ctx && ctx->msgnotreadyet != hdr->msgno)) @@ -633,7 +647,7 @@ hdr_format_str (char *dest, hdr->tagged ? '*' : (hdr->flagged ? '!' : (Tochars && ((i = mutt_user_is_recipient (hdr)) < mutt_strlen (Tochars)) ? Tochars[i] : ' '))); - snprintf (dest, destlen, fmt, buf2); + hdr_format_s (dest, destlen, prefix, buf2); break; default: diff --git a/help.c b/help.c index dd00bfcd..52ee3cb3 100644 --- a/help.c +++ b/help.c @@ -84,59 +84,54 @@ mutt_compile_help (char *buf, size_t buflen, int menu, struct mapping_t *items) return buf; } -static int print_macro (FILE *f, int maxchar, const char **macro) +static int print_macro (FILE *f, int maxwidth, const char **macro) { - int c = (unsigned char) **macro; - int n = maxchar; + int n = maxwidth; + wchar_t wc; + int k, w; - while (c) + for (;;) { - if (!IsPrint(c)) + if ((k = mutt_mbtowc (&wc, *macro, -1)) <= 0) + break; + if ((w = mutt_wcwidth (wc)) >= 0) { - if (c >= ' ' && c != 127) - c = '?'; - else if (n < 2) - c = 0; - else + if (w > n) + break; + n -= w; { - --n; - switch (c) - { - case '\033': - fputc ('\\', f); - c = 'e'; - break; - case '\n': - fputc ('\\', f); - c = 'n'; - break; - case '\r': - fputc ('\\', f); - c = 'r'; - break; - case '\t': - fputc ('\\', f); - c = 't'; - break; - default: - fputc ('^', f); - c = (c + '@') & 127; - break; - } + char tb[7]; + int m = mutt_wctomb (tb, wc); + if (0 < m && m < 7) + tb[m] = '\0', fprintf (f, "%s", tb); } } - - if (c && n > 0) + else if (wc < 0x20 || wc == 0x7f) { - --n; - fputc(c, f); - c = (unsigned char) *++*macro; + if (2 > n) + break; + n -= 2; + if (wc == '\033') + fprintf (f, "\\e"); + else if (wc == '\n') + fprintf (f, "\\n"); + else if (wc == '\r') + fprintf (f, "\\r"); + else if (wc == '\t') + fprintf (f, "\\t"); + else + fprintf (f, "^%c", (char)((wc + '@') & 0x7f)); } else - c = 0; + { + if (1 > n) + break; + n -= 1; + fprintf (f, "?"); + } + *macro += k; } - - return (maxchar - n); + return (maxwidth - n); } static int pad (FILE *f, int col, int i) @@ -214,6 +209,7 @@ static void format_line (FILE *f, int ismacro, { SKIPWS(t3); + /* FIXME: this is completely wrong */ if ((n = mutt_strlen (t3)) > COLS - col) { n = COLS - col; diff --git a/iconv/Makefile.am b/iconv/Makefile.am new file mode 100644 index 00000000..b38f959c --- /dev/null +++ b/iconv/Makefile.am @@ -0,0 +1,10 @@ +## Process this file with automake to produce Makefile.in + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir) + +noinst_LIBRARIES = libiconv.a +noinst_HEADERS = iconv.h + +libiconv_a_SOURCES = iconv.c diff --git a/iconv/iconv.c b/iconv/iconv.c new file mode 100644 index 00000000..af391c3d --- /dev/null +++ b/iconv/iconv.c @@ -0,0 +1,899 @@ +/* + * Copyright (C) 1999 Thomas Roessler + * + * This program is free software; you can redistribute it + * and/or modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA + * 02139, USA. + */ + +/* + * This module deals with POSIX.2 character set definition files. + */ + + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mutt.h" +#include "iconv.h" + +#ifndef EILSEQ +#define EILSEQ EINVAL +#endif + +/* Define this if you want any dprint () statements in this code */ + +#undef CHARSET_DEBUG + +#ifndef CHARSET_DEBUG +# undef dprint +# define dprint(a, b) (void) a +#endif + + +/* Type definitions */ + +typedef int CHARSET_MAP[256]; + +typedef struct descr +{ + char *symbol; + int repr; +} +CHARDESC; + +typedef struct +{ + char *charset; + char escape_char; + char comment_char; + short multbyte; + LIST *aliases; +} +CHARMAP; + +typedef struct +{ + size_t n_symb; + size_t u_symb; + + short multbyte; + HASH *symb_to_repr; + CHARDESC **description; +} +CHARSET; + +/* Module-global variables */ + +static HASH *Translations = NULL; +static HASH *Charsets = NULL; +static HASH *CharsetAliases = NULL; + +/* Function Prototypes */ + +static CHARDESC *chardesc_new (void); +static CHARDESC *repr2descr (int repr, CHARSET * cs); + +static CHARMAP *charmap_new (void); +static CHARMAP *parse_charmap_header (FILE * fp); +static CHARSET *charset_new (size_t hash_size); + +static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to); + +static char translate_character (CHARSET * to, const char *symbol); + +static int load_charset (const char *filename, CHARSET ** csp, short multbyte); +static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp); +static int _cd_compar (const void *a, const void *b); + +static void chardesc_free (CHARDESC ** cdp); +static void charmap_free (CHARMAP ** cp); +static void charset_free (CHARSET ** csp); +static void fix_symbol (char *symbol, CHARMAP * m); + +static CHARSET *charset_new (size_t hash_size) +{ + CHARSET *cp = safe_malloc (sizeof (CHARSET)); + size_t i; + + cp->n_symb = 256; + cp->u_symb = 0; + cp->multbyte = 1; + cp->symb_to_repr = hash_create (hash_size); + cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *)); + + for (i = 0; i < cp->n_symb; i++) + cp->description[i] = NULL; + + return cp; +} + +static void charset_free (CHARSET ** csp) +{ + CHARSET *cs = *csp; + size_t i; + + for (i = 0; i < cs->n_symb; i++) + chardesc_free (&cs->description[i]); + + safe_free ((void **) &cs->description); + + hash_destroy (&cs->symb_to_repr, NULL); + safe_free ((void **) csp); +} + +static CHARMAP *charmap_new (void) +{ + CHARMAP *m = safe_malloc (sizeof (CHARMAP)); + + m->charset = NULL; + m->escape_char = '\\'; + m->comment_char = '#'; + m->multbyte = 1; + m->aliases = NULL; + + return m; +} + +static void charmap_free (CHARMAP ** cp) +{ + if (!cp || !*cp) + return; + + mutt_free_list (&(*cp)->aliases); + safe_free ((void **) &(*cp)->charset); + safe_free ((void **) cp); + + return; +} + +static CHARDESC *chardesc_new (void) +{ + CHARDESC *p = safe_malloc (sizeof (CHARDESC)); + + p->symbol = NULL; + p->repr = -1; + + return p; +} + +static void chardesc_free (CHARDESC ** cdp) +{ + if (!cdp || !*cdp) + return; + + + safe_free ((void **) &(*cdp)->symbol); + safe_free ((void **) cdp); + + return; +} + +static CHARMAP *parse_charmap_header (FILE * fp) +{ + char buffer[1024]; + char *t, *u; + CHARMAP *m = charmap_new (); + + while (fgets (buffer, sizeof (buffer), fp)) + { + if ((t = strchr (buffer, '\n'))) + *t = '\0'; + else + { + charmap_free (&m); + return NULL; + } + + if (!strncmp (buffer, "CHARMAP", 7)) + break; + + if (*buffer == m->comment_char) + { + if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias")) + { + char _tmp[SHORT_STRING]; + while ((t = strtok(NULL, "\t, "))) + { + mutt_canonical_charset (_tmp, sizeof (_tmp), t); + m->aliases = mutt_add_list (m->aliases, _tmp); + } + } + continue; + } + + if (!(t = strtok (buffer, "\t "))) + continue; + + if (!(u = strtok (NULL, "\t "))) + { + charmap_free (&m); + return NULL; + } + + if (!strcmp (t, "")) + { + safe_free ((void **) &m->charset); + mutt_canonical_charset (u, strlen (u) + 1, u); + m->charset = safe_strdup (u); + } + else if (!strcmp (t, "")) + { + m->comment_char = *u; + } + else if (!strcmp (t, "")) + { + m->escape_char = *u; + } + else if (!strcmp (t, "")) + { + m->multbyte = strtol (u, NULL, 0); + } + } + + return m; +} + +/* Properly handle escape characters within a symbol. */ + +static void fix_symbol (char *symbol, CHARMAP * m) +{ + char *s, *d; + + for (s = symbol, d = symbol; *s; *d++ = *s++) + { + if (*s == m->escape_char && !*++s) + break; + } + + *d = *s; +} + +enum +{ + CL_DESCR, + CL_END, + CL_COMMENT, + CL_ERROR +}; + +static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp) +{ + char *t, *u; + short n; + CHARDESC *descr; + + if (*line == m->comment_char) + return CL_COMMENT; + + descr = *descrp = chardesc_new (); + + if (!strncmp (line, "END CHARMAP", 11)) + { + chardesc_free (descrp); + return CL_END; + } + + for (t = line; *t && isspace ((unsigned char) *t); t++) + ; + + if (*t++ != '<') + { + chardesc_free (descrp); + return CL_ERROR; + } + + for (u = t; *u && *u != '>'; u++) + { + if (*u == m->escape_char && u[1]) + u++; + } + + if (*u != '>') + { + chardesc_free (descrp); + return CL_ERROR; + } + + *u++ = '\0'; + descr->symbol = safe_strdup (t); + fix_symbol (descr->symbol, m); + + for (t = u; *t && isspace ((unsigned char) *t); t++) + ; + + for (u = t; *u && !isspace ((unsigned char) *u); u++) + ; + + *u++ = 0; + descr->repr = 0; + + for (n = 0; *t == m->escape_char && n < m->multbyte; n++) + { + switch (*++t) + { + case 'x': + descr->repr = descr->repr * 256 + strtol (++t, &t, 16); + break; + case 'd': + descr->repr = descr->repr * 256 + strtol (++t, &t, 10); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + descr->repr = descr->repr * 256 + strtol (t, &t, 8); + break; + default: + chardesc_free (descrp); + return CL_ERROR; + } + } + + if (!n) + { + chardesc_free (descrp); + return CL_ERROR; + } + + return CL_DESCR; +} + +static int _cd_compar (const void *a, const void *b) +{ + const CHARDESC *ap, *bp; + int i; + + ap = * (CHARDESC **) a; + bp = * (CHARDESC **) b; + + i = ap->repr - bp->repr; + + dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n", + ap->repr, ap->symbol, bp->repr, bp->symbol, i)); + + return i; +} + +/* + * Load a character set description into memory. + * + * The multibyte parameter tells us whether we are going + * to accept multibyte character sets. + */ + +static int load_charset (const char *filename, CHARSET ** csp, short multbyte) +{ + CHARDESC *cd = NULL; + CHARSET *cs = NULL; + CHARMAP *m = NULL; + FILE *fp; + char buffer[1024]; + int i; + int rv = -1; + + cs = *csp = charset_new (multbyte ? 1031 : 257); + + dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename)); + + if ((fp = fopen (filename, "r")) == NULL) + { + char _filename[_POSIX_PATH_MAX]; + + snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename); + dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename)); + + if ((fp = fopen (_filename, "r")) == NULL) + { + dprint (2, (debugfile, "load_charset: Failed.\n")); + goto bail; + } + } + + if ((m = parse_charmap_header (fp)) == NULL) + goto bail; + + /* Don't handle multibyte character sets unless explicitly requested + * to do so. + */ + + if (m->multbyte > 1 && !multbyte) + { + dprint (2, (debugfile, "load_charset: m->multbyte == %d\n", + (int) m->multbyte)); + goto bail; + } + + cs->multbyte = m->multbyte; + + while (fgets (buffer, sizeof (buffer), fp) != NULL) + { + i = parse_charmap_line (buffer, m, &cd); + + if (i == CL_END) + break; + else if (i == CL_DESCR) + { + dprint (5, (debugfile, "load_charset: Got character description: <%s> -> %x\n", + cd->symbol, cd->repr)); + + if (!multbyte) + { + if (0 <= cd->repr && cd->repr < 256) + { + hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); + hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); + + /* note: we intentionally leak some memory here. */ + if (!cs->description[cd->repr]) + cs->u_symb++; + + cs->description[cd->repr] = cd; + cd = NULL; + } + } + else + { + if (cs->u_symb == cs->n_symb) + { + size_t new_size = cs->n_symb + 256; + size_t i; + + safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *)); + for (i = cs->u_symb; i < new_size; i++) + cs->description[i] = NULL; + cs->n_symb = new_size; + } + + hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); + hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); + + cs->description[cs->u_symb++] = cd; + cd = NULL; + } + } + + if (cd) + { + dprint (5, (debugfile, "load_charset: character description still present: <%s>->%x\n", + cd->symbol, cd->repr)); + } + chardesc_free (&cd); + } + + if (multbyte) + qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar); + + rv = 0; + +bail: + charmap_free (&m); + if (fp) + fclose (fp); + if (rv) + charset_free (csp); + + return rv; +} + +static CHARDESC *repr2descr (int repr, CHARSET * cs) +{ + CHARDESC *key; + CHARDESC **r; + + if (!cs || repr < 0) + return NULL; + + if (cs->multbyte == 1) + { + if (repr < 256) + return cs->description[repr]; + else + return NULL; + } + + key = safe_malloc (sizeof(CHARDESC)); + key->repr = repr; + key->symbol = ""; /* otherwise, the + * debug code may + * segfault. ouch. + */ + + r = bsearch (&key, cs->description, cs->u_symb, + sizeof (CHARDESC *), _cd_compar); + + safe_free ((void **) &key); + + if (r) return *r; + + return NULL; +} + +/* Build a translation table. If a character cannot be + * translated correctly, we try to find an approximation + * from the portable charcter set. + * + * Note that this implies the assumption that the portable + * character set can be used without any conversion. + * + * Should be safe on POSIX systems. + */ + +static char translate_character (CHARSET * to, const char *symbol) +{ + CHARDESC *cdt; + + if ((cdt = hash_find (to->symb_to_repr, symbol))) + return (char) cdt->repr; + else + return *symbol; +} + +static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to) +{ + int i; + CHARSET_MAP *map; + CHARDESC *cd; + + /* This is for 8-bit character sets. */ + + if (!from || !to || from->multbyte > 1 || to->multbyte > 1) + return NULL; + + map = safe_malloc (sizeof (CHARSET_MAP)); + for (i = 0; i < 256; i++) + { + if (!(cd = repr2descr (i, from))) + (*map)[i] = '?'; + else + (*map)[i] = translate_character (to, cd->symbol); + } + + return map; +} + +/* Currently, just scan the various charset definition files. + * On the long run, we should cache this stuff in a file. + */ + +static HASH *load_charset_aliases (void) +{ + HASH *charset_aliases; + CHARMAP *m; + DIR *dp; + FILE *fp; + struct dirent *de; + + if ((dp = opendir (CHARMAPS_DIR)) == NULL) + return NULL; + + charset_aliases = hash_create(127); + + while ((de = readdir (dp))) + { + char fnbuff[_POSIX_PATH_MAX]; + + if (*de->d_name == '.') + continue; + + snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name); + dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff)); + if ((fp = fopen (fnbuff, "r")) == NULL) + continue; + + if ((m = parse_charmap_header (fp)) != NULL) + { + LIST *lp; + char buffer[LONG_STRING]; + + mutt_canonical_charset (buffer, sizeof (buffer), de->d_name); + m->aliases = mutt_add_list (m->aliases, buffer); + + if (m->charset) + m->aliases = mutt_add_list (m->aliases, m->charset); + + for (lp = m->aliases; lp; lp = lp->next) + { + if (lp->data) + { + dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n", + lp->data, de->d_name)); + if (hash_find (charset_aliases, lp->data)) + { + dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n", + lp->data)); + } + else + hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0); + } + } + + charmap_free (&m); + } + + fclose (fp); + } + + closedir (dp); + return charset_aliases; +} + +static void init_charsets () +{ + if (Charsets) return; + + Charsets = hash_create (127); + Translations = hash_create (127); + CharsetAliases = load_charset_aliases (); +} + +static CHARSET *mutt_get_charset (const char *name) +{ + CHARSET *charset; + char buffer[SHORT_STRING]; + char *real_charset; + char *hooked; + + if (!name || !*name) + return (NULL); + + init_charsets(); + mutt_canonical_charset (buffer, sizeof(buffer), name); + + /* needs to be documented */ + + if ((hooked = mutt_charset_hook (buffer))) + mutt_canonical_charset (buffer, sizeof (buffer), hooked); + + dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer)); + + if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer))) + real_charset = buffer; + + dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset)); + + if(!(charset = hash_find (Charsets, real_charset))) + { + dprint (2, (debugfile, "mutt_get_charset: Need to load.\n")); + if (load_charset(real_charset, &charset, 0) == 0) + hash_insert(Charsets, safe_strdup(real_charset), charset, 1); + else + charset = NULL; + } + return charset; +} + +static CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to) +{ + char from_canon[SHORT_STRING]; + char to_canon[SHORT_STRING]; + char key[SHORT_STRING]; + char *from, *to; + CHARSET *from_cs, *to_cs; + CHARSET_MAP *map; + + if(!_from || !_to) + return NULL; + + mutt_canonical_charset(from_canon, sizeof(from_canon), _from); + mutt_canonical_charset(to_canon, sizeof(to_canon), _to); + + /* quick check for some trivial cases. Doing this before + * we actually call the initialization routine delays character + * set loading until it's _really_ needed. + */ + + if(!strcmp(from_canon, to_canon) + || (!strcmp (from_canon, "us-ascii") && !strncmp (to_canon, "iso-8859", 8))) + return NULL; + + init_charsets(); + + if(!CharsetAliases || !(from = hash_find(CharsetAliases, from_canon))) + from = from_canon; + if(!CharsetAliases || !(to = hash_find(CharsetAliases, to_canon))) + to = to_canon; + + /* quick check for the identity mapping */ + if((from == to) || !mutt_strcmp(from, to)) + return NULL; + + snprintf(key, sizeof(key), "%s %s", from, to); + if((map = hash_find(Translations, key)) == NULL) + { + from_cs = mutt_get_charset(from); + to_cs = mutt_get_charset(to); + + if((map = build_translation(from_cs, to_cs))) + hash_insert(Translations, safe_strdup(key), map, 1); + } + return map; +} + +static unsigned char mutt_display_char (unsigned char ch, CHARSET_MAP *map) +{ + if (!map || !ch) + return ch; + + return (unsigned char) (*map)[ch]; +} + +static CHARSET *Unicode = NULL; + +static int unicode_init (void) +{ + if (!Unicode) + { + if (load_charset ("ISO_10646", &Unicode, 1) == -1) + Unicode = NULL; + } + + return (Unicode == NULL ? -1 : 0); +} + +/*************************************************************/ +/* iconv functions */ + +struct iconv +{ + CHARSET_MAP *chm; /* used for 8-bit to 8-bit recoding */ + CHARSET *chs_from; /* used for translation from UTF-8 */ + CHARSET *chs_to; /* used for translation to UTF-8 */ +}; + +iconv_t iconv_open (const char *tocode, const char *fromcode) +{ + struct iconv *cd = 0; + CHARSET_MAP *chm; + CHARSET *chs; + + if (fromcode && tocode) + { + if (mutt_is_utf8 (fromcode)) + { + if (!mutt_is_utf8 (tocode)) + { + if ((chs = mutt_get_charset (tocode))) + { + cd = safe_calloc (sizeof (struct iconv), 1); + cd->chs_to = chs; + } + } + } + else if (mutt_is_utf8 (tocode)) + { + if ((chs = mutt_get_charset (fromcode))) + { + cd = safe_calloc (sizeof (struct iconv), 1); + cd->chs_from = chs; + } + } + else + { + if ((chm = mutt_get_translation (fromcode, tocode))) + { + cd = safe_calloc (sizeof (struct iconv), 1); + cd->chm = mutt_get_translation (fromcode, tocode); + } + } + } + return cd; +} + +size_t iconv (iconv_t _cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + struct iconv *cd = (struct iconv *)_cd; + const char *ib = *inbuf; + size_t ibl = *inbytesleft; + char *ob = *outbuf; + size_t obl = *outbytesleft; + int n; + + if (cd && cd->chm) + { + int i; + n = ibl < obl ? ibl : obl; + for (i = 0; i < n; i++) + ob[i] = mutt_display_char (ib[i], cd->chm); + ib += n, ibl -= n, ob += n, obl -= n; + } + else if (cd && cd->chs_from && !unicode_init ()) + { + n = 0; + while (ibl && obl) + { + CHARDESC *d; + unsigned long c = 0xfffd; /* replacement char */ + int k; + if ((d = repr2descr ((unsigned char)*ib, cd->chs_from)) && + (d = hash_find (Unicode->symb_to_repr, d->symbol))) + c = d->repr; + if (obl < 6 && obl < mutt_wctoutf8 (0, c)) + break; + k = mutt_wctoutf8 (ob, c); + ++ib, --ibl; + ob += k, obl -=k; + ++n; + } + } + else if (cd && !cd->chs_from && cd->chs_to && !unicode_init ()) + { + mbstate_t mbstate = 0; + unsigned int wc; + int k; + char c; + CHARDESC *d; + + n = 0; + while (ibl && obl) + { + k = utf8rtowc (&wc, ib, ibl, &mbstate); + if (k == -1 || k == -2) + { + *inbuf = ib, *inbytesleft = ibl; + *outbuf = ob, *outbytesleft = obl; + errno = (k == -1) ? EILSEQ : EINVAL; + return -1; + } + k = k ? k : 1; + c = '?'; /* replacement char */ + if ((d = repr2descr (wc, Unicode))) + { + c = d->symbol[0]; /* approximation */ + if ((d = hash_find (cd->chs_to->symb_to_repr, d->symbol))) + c = d->repr; + } + ib += k, ibl -= k; + *ob++ = c, --obl; + ++n; + } + } + else + { + /* no translation defined - just copy */ + n = ibl < obl ? ibl : obl; + memcpy (ob, ib, n); + ib += n, ibl -= n, ob += n, obl -= n; + } + + *inbuf = ib, *inbytesleft = ibl; + *outbuf = ob, *outbytesleft = obl; + if (ibl) + { + errno = E2BIG; + return (size_t)-1; + } + else + return n; +} + +int iconv_close (iconv_t cd) +{ + free (cd); + return 0; +} diff --git a/iconv/iconv.h b/iconv/iconv.h new file mode 100644 index 00000000..596a1748 --- /dev/null +++ b/iconv/iconv.h @@ -0,0 +1,12 @@ +#ifndef _ICONV_H +#define _ICONV_H + +#include + +typedef void * iconv_t; + +iconv_t iconv_open (const char *, const char *); +size_t iconv (iconv_t, const char **, size_t *, char **, size_t *); +int iconv_close (iconv_t); + +#endif /* _ICONV_H */ diff --git a/init.c b/init.c index be4e3458..a7ef2a3b 100644 --- a/init.c +++ b/init.c @@ -22,6 +22,7 @@ #include "mutt_regex.h" #include "history.h" #include "keymap.h" +#include "mbyte.h" #ifdef HAVE_PGP @@ -882,6 +883,7 @@ static int parse_set (BUFFER *tmp, BUFFER *s, unsigned long data, BUFFER *err) else if (DTYPE (MuttVars[idx].type) == DT_STR) { *((char **) MuttVars[idx].data) = safe_strdup (tmp->data); + mutt_set_charset (Charset); } else { diff --git a/lib.h b/lib.h index 7d86324c..90b72290 100644 --- a/lib.h +++ b/lib.h @@ -45,7 +45,8 @@ # ifdef ENABLE_NLS # include -# define _(a) (gettext (a)) +char *mutt_gettext (const char *); /* gettext.c */ +# define _(a) (mutt_gettext (a)) # ifdef gettext_noop # define N_(a) gettext_noop (a) # else diff --git a/mbyte.c b/mbyte.c new file mode 100644 index 00000000..4ce59a94 --- /dev/null +++ b/mbyte.c @@ -0,0 +1,167 @@ + +/* + * This file provides functions that are just like the C library ones, + * except that they behave according to mutt's Charset instead of + * according to the locale. + */ + +#include + +#include "mutt.h" +#include "mbyte.h" +#include "charset.h" + +#ifndef EILSEQ +#define EILSEQ EINVAL +#endif + +int Charset_is_utf8 = 0; + +void mutt_set_charset (char *charset) +{ + Charset_is_utf8 = mutt_is_utf8 (charset); +} + +int wctomb (char *s, wchar_t wc) +{ + if (Charset_is_utf8) + return mutt_wctoutf8(s, wc); + else if (wc < 0x100) + { + if (s) + *s = wc; + return 1; + } + else + return -1; +} + +int mbtowc (wchar_t *pwc, const char *s, size_t n) +{ + mbstate_t state = 0; + int result = mbrtowc (pwc, s, n, &state); + if (result >= 0) + return result; + else + return -1; +} + +size_t utf8rtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + static mbstate_t mbstate = 0; + size_t k = 1; + unsigned char c; + wchar_t wc; + int count; + + if (!ps) + ps = &mbstate; + + if (!s) + { + *ps = 0; + return 0; + } + if (!n) + return (size_t)-2; + + if (!*ps) + { + c = (unsigned char)*s; + if (c < 0x80) + { + if (pwc) + *pwc = c; + return (c != 0); + } + else if (c < 0xc2) + { + errno = EILSEQ; + return (size_t)-1; + } + else if (c < 0xe0) + wc = ((c & 0x1f) << 6) + (count = 0); + else if (c < 0xf0) + wc = ((c & 0x0f) << 12) + (count = 1); + else if (c < 0xf8) + wc = ((c & 0x07) << 18) + (count = 2); + else if (c < 0xfc) + wc = ((c & 0x03) << 24) + (count = 3); + else if (c < 0xfe) + wc = ((c & 0x01) << 30) + (count = 4); + else + { + errno = EILSEQ; + return (size_t)-1; + } + ++s, --n, ++k; + } + else + { + wc = *ps & 0x7fffffff; + count = wc & 7; /* if count > 4 it will be caught below */ + } + + for (; n; ++s, --n, ++k) + { + c = (unsigned char)*s; + if (0x80 <= c && c < 0xc0) + { + wc |= (c & 0x3f) << (6 * count); + if (!count) + { + if (pwc) + *pwc = wc; + *ps = 0; + return wc ? k : 0; + } + --count, --wc; + if (!(wc >> (11+count*5))) + { + errno = count < 4 ? EILSEQ : EINVAL; + return (size_t)-1; + } + } + else + { + errno = EILSEQ; + return (size_t)-1; + } + } + *ps = wc; + return (size_t)-2; +} + +size_t mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + static mbstate_t mbstate = 0; + + if (!ps) + ps = &mbstate; + + if (Charset_is_utf8) + return utf8rtowc (pwc, s, n, ps); + else + { + if (!s) + { + *ps = 0; + return 0; + } + if (!n) + return (size_t)-2; + if (pwc) + *pwc = (wchar_t)(unsigned char)*s; + return (*s != 0); + } +} + +int iswprint (wchar_t wc) +{ + return ((0x20 <= wc && wc < 0x7f) || 0xa0 <= wc); +} + +wchar_t replacement_char () +{ + return Charset_is_utf8 ? 0xfffd : '?'; +} diff --git a/mbyte.h b/mbyte.h new file mode 100644 index 00000000..3f45d0ec --- /dev/null +++ b/mbyte.h @@ -0,0 +1,30 @@ +#ifndef _MBYTE_H +#define _MBYTE_H + +/* This is necessary because we may be redefining wchar_t, etc */ +#include + +#define wchar_t mutt_wchar_t +#define mbstate_t mutt_mbstate_t + +typedef unsigned int wchar_t; +typedef unsigned int mbstate_t; + +#define wctomb mutt_wctomb +#define mbtowc mutt_mbtowc +#define mbrtowc mutt_mbrtowc +#define iswprint mutt_iswprint +#define wcwidth mutt_wcwidth + +void mutt_set_charset (char *charset); + +int wctomb (char *s, wchar_t wc); +int mbtowc (wchar_t *pwc, const char *s, size_t n); +size_t utf8rtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps); +size_t mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps); +int iswprint (wchar_t wc); +int wcwidth (wchar_t wc); + +wchar_t replacement_char (void); + +#endif /* _MBYTE_H */ diff --git a/menu.c b/menu.c index eef22988..8959217e 100644 --- a/menu.c +++ b/menu.c @@ -19,6 +19,7 @@ #include "mutt.h" #include "mutt_curses.h" #include "mutt_menu.h" +#include "mbyte.h" #ifdef USE_IMAP #include "imap.h" @@ -27,8 +28,13 @@ #include #include +extern int Charset_is_utf8; /* FIXME: bad modularisation */ + static void print_enriched_string (int attr, unsigned char *s, int do_color) { + wchar_t wc; + int k; + while (*s) { if (*s < M_TREE_MAX) @@ -40,19 +46,44 @@ static void print_enriched_string (int attr, unsigned char *s, int do_color) switch (*s) { case M_TREE_LLCORNER: - addch (option (OPTASCIICHARS) ? '`' : ACS_LLCORNER); + if (option (OPTASCIICHARS)) + addch ('`'); + else if (Charset_is_utf8) + addstr ("\342\224\224"); /* WACS_LLCORNER */ + else + addch (ACS_LLCORNER); break; case M_TREE_ULCORNER: - addch (option (OPTASCIICHARS) ? ',' : ACS_ULCORNER); + if (option (OPTASCIICHARS)) + addch (','); + else if (Charset_is_utf8) + addstr ("\342\224\214"); /* WACS_ULCORNER */ + else + addch (ACS_ULCORNER); break; case M_TREE_LTEE: - addch (option (OPTASCIICHARS) ? '|' : ACS_LTEE); + if (option (OPTASCIICHARS)) + addch ('|'); + else if (Charset_is_utf8) + addstr ("\342\224\234"); /* WACS_LTEE */ + else + addch (ACS_LTEE); break; case M_TREE_HLINE: - addch (option (OPTASCIICHARS) ? '-' : ACS_HLINE); + if (option (OPTASCIICHARS)) + addch ('-'); + else if (Charset_is_utf8) + addstr ("\342\224\200"); /* WACS_HLINE */ + else + addch (ACS_HLINE); break; case M_TREE_VLINE: - addch (option (OPTASCIICHARS) ? '|' : ACS_VLINE); + if (option (OPTASCIICHARS)) + addch ('|'); + else if (Charset_is_utf8) + addstr ("\342\224\202"); /* WACS_VLINE */ + else + addch (ACS_VLINE); break; case M_TREE_SPACE: addch (' '); @@ -71,11 +102,13 @@ static void print_enriched_string (int attr, unsigned char *s, int do_color) } if (do_color) attrset(attr); } - else + else if ((k = mbtowc (&wc, (char *)s, -1)) > 0) { - addch (*s); - s++; + addnstr ((char *)s, k); + s += k; } + else + break; } } @@ -100,6 +133,11 @@ void menu_pad_string (char *s, size_t l) l--; /* save room for the terminal \0 */ if (l > COLS - shift) l = COLS - shift; + + /* Let's just pad the string anyway ... */ + mutt_format_string (s, INT_MAX, l, l, 0, ' ', s, INT_MAX); + return; + #if !defined (HAVE_BKGDSET) && !defined (USE_SLANG_CURSES) /* we have to pad the string with blanks to the end of line */ if (n < l) @@ -123,7 +161,8 @@ void menu_redraw_full (MUTTMENU *menu) if (option (OPTHELP)) { SETCOLOR (MT_COLOR_STATUS); - mvprintw (option (OPTSTATUSONTOP) ? LINES-2 : 0, 0, "%-*.*s", COLS, COLS, menu->help); + move (option (OPTSTATUSONTOP) ? LINES-2 : 0, 0); + mutt_paddstr (COLS, menu->help); SETCOLOR (MT_COLOR_NORMAL); menu->offset = 1; menu->pagelen = LINES - 3; @@ -145,7 +184,8 @@ void menu_redraw_status (MUTTMENU *menu) snprintf (buf, sizeof (buf), M_MODEFMT, menu->title); SETCOLOR (MT_COLOR_STATUS); - mvprintw (option (OPTSTATUSONTOP) ? 0 : LINES - 2, 0, "%-*.*s", COLS, COLS, buf); + move (option (OPTSTATUSONTOP) ? 0 : LINES - 2, 0); + mutt_paddstr (COLS, buf); SETCOLOR (MT_COLOR_NORMAL); menu->redraw &= ~REDRAW_STATUS; } diff --git a/muttlib.c b/muttlib.c index 7c3b8ae7..2cfdc4d9 100644 --- a/muttlib.c +++ b/muttlib.c @@ -906,6 +906,7 @@ void mutt_FormatString (char *dest, /* output buffer */ } *wptr = 0; +#if 0 if (flags & M_FORMAT_MAKEPRINT) { /* Make sure that the string is printable by changing all non-printable @@ -915,6 +916,7 @@ void mutt_FormatString (char *dest, /* output buffer */ !((flags & M_FORMAT_TREE) && (*cp <= M_TREE_MAX))) *cp = isspace ((unsigned char) *cp) ? ' ' : '.'; } +#endif } /* This function allows the user to specify a command to read stdout from in diff --git a/pager.c b/pager.c index 4ea1770a..219546ac 100644 --- a/pager.c +++ b/pager.c @@ -25,6 +25,7 @@ #include "sort.h" #include "pager.h" #include "attach.h" +#include "mbyte.h" #ifdef USE_IMAP #include "imap.h" @@ -969,6 +970,137 @@ static int grok_ansi(unsigned char *buf, int pos, ansi_attr *a) return pos; } +static int format_line (struct line_t **lineInfo, int n, unsigned char *buf, + int flags, ansi_attr *pa, int cnt, + int *pspace, int *pvch, int *pcol, int *pspecial) +{ + int space = -1; /* index of the last space or TAB */ + int col = option (OPTMARKERS) ? (*lineInfo)[n].continuation : 0; + int ch, vch, k, special = 0, t; + wchar_t wc; + mbstate_t mbstate = 0; /* FIXME: this should come from lineInfo */ + + for (ch = 0, vch = 0; ch < cnt; ch += k, vch += k) + { + /* Handle ANSI sequences */ + while (cnt-ch >= 2 && buf[ch] == '\033' && buf[ch+1] == '[' && + is_ansi (buf+ch+2)) + ch = grok_ansi (buf, ch+2, pa) + 1; + + k = mbrtowc (&wc, (char *)buf+ch, cnt-ch, &mbstate); + if (k == -2) + break; + if (k == -1) + { + if (col + 4 > COLS) + break; + col += 4; + if (pa) + printw ("\\%03o", buf[ch]); + k = 1; + continue; + } + if (k == 0) + k = wctomb (0, wc); + + /* Handle backspace */ + special = 0; + if (iswprint (wc)) + { + wchar_t wc1; + mbstate_t mbstate1; + int k1, k2; + + while ((wc1 = 0, mbstate1 = mbstate, + k1 = k + mbrtowc (&wc1, (char *)buf+ch+k, cnt-ch-k, &mbstate1), + wc1 == '\b') && + (wc1 = 0, + k2 = mbrtowc (&wc1, (char *)buf+ch+k1, cnt-ch-k1, &mbstate1), + iswprint (wc1))) + { + if (wc == wc1) + { + special = (wc == '_' && special == A_UNDERLINE) + ? A_UNDERLINE : A_BOLD; + } + else if (wc == '_' || wc1 == '_') + { + special = A_UNDERLINE; + wc = (wc1 == '_') ? wc : wc1; + } + else + { + special = 0; /* overstrike: nothing to do! */ + wc = wc1; + } + ch += k1; + k = k2; + mbstate = mbstate1; + } + } + + if (pa && + ((flags & (M_SHOWCOLOR | M_SEARCH | M_PAGER_MARKER)) || special + || pa->attr)) + resolve_color (*lineInfo, n, vch, flags, special, pa); + + if (iswprint (wc)) + { + if (wc == ' ') + space = ch; + t = wcwidth (wc); + if (col + t > COLS) + break; + col += t; + if (pa) + mutt_addwch (wc); + } + else if (wc == '\n') + break; + else if (wc == '\t') + { + space = ch; + t = (col & ~7) + 8; + if (t > COLS) + break; + if (pa) + for (; col < t; col++) + addch (' '); + else + col = t; + } + else if (wc < 0x20 || wc == 0x7f) + { + if (col + 2 > COLS) + break; + col += 2; + if (pa) + printw ("^%c", ('@' + wc) & 0x7f); + } + else if (wc < 0x100) + { + if (col + 4 > COLS) + break; + col += 4; + if (pa) + printw ("\\%03o", wc); + } + else + { + if (col + 1 > COLS) + break; + ++col; + if (pa) + addch (replacement_char ()); + } + } + *pspace = space; + *pcol = col; + *pvch = vch; + *pspecial = special; + return ch; +} + /* * Args: * flags M_SHOWFLAT, show characters (used for displaying help) @@ -992,10 +1124,10 @@ display_line (FILE *f, long *last_pos, struct line_t **lineInfo, int n, int *q_level, int *force_redraw, regex_t *SearchRE) { unsigned char buf[LONG_STRING], fmt[LONG_STRING]; - unsigned char *buf_ptr = buf, c; - int ch, vch, t, col, cnt, b_read; + unsigned char *buf_ptr = buf; + int ch, vch, col, cnt, b_read; int buf_ready = 0, change_last = 0; - int special = 0, last_special = 0; + int special; int offset; int def_color; int m; @@ -1121,73 +1253,13 @@ display_line (FILE *f, long *last_pos, struct line_t **lineInfo, int n, } /* now chose a good place to break the line */ - - ch = -1; /* index of the last space or TAB */ - cnt = 0; - col = option (OPTMARKERS) ? (*lineInfo)[n].continuation : 0; - while (col < COLS && cnt < b_read) - { - c = *buf_ptr++; - if (c == '\n') - break; - - while (*buf_ptr == '\010' && cnt + 2 < b_read) - { - cnt += 2; - buf_ptr += 2; - c = buf[cnt]; - } - - if (*buf_ptr == '\033' && *(buf_ptr + 1) && *(buf_ptr + 1) == '[' && is_ansi (buf_ptr+2)) - { - cnt = grok_ansi(buf, cnt+3, NULL); - cnt++; - buf_ptr = buf + cnt; - continue; - } - - if (c == '\t') - { - ch = cnt; - /* expand TABs */ - if ((t = (col & ~7) + 8) < COLS) - { - col = t; - cnt++; - } - else - break; - } - else if (IsPrint (c)) - { - if (c == ' ') - ch = cnt; - col++; - cnt++; - } - else if (iscntrl (c) && c < '@') - { - if (c == '\r' && *buf_ptr == '\n') - cnt++; - else if (col < COLS - 1) - { - col += 2; - cnt++; - } - else - break; - } - else - { - col++; - cnt++; - } - } + cnt = format_line (lineInfo, n, buf, flags, 0, b_read, &ch, &vch, &col, &special); + buf_ptr = buf + cnt; /* move the break point only if smart_wrap is set */ if (option (OPTWRAP)) { - if (col == COLS) + if (cnt < b_read) { if (ch != -1 && buf[cnt] != ' ' && buf[cnt] != '\t' && buf[cnt] != '\n' && buf[cnt] != '\r') { @@ -1237,107 +1309,9 @@ display_line (FILE *f, long *last_pos, struct line_t **lineInfo, int n, } else clrtoeol (); - - /* display the line */ - col = option (OPTMARKERS) ? (*lineInfo)[n].continuation : 0; - for (ch = 0, vch = 0; ch < cnt; ch++, vch++) - { - special = 0; - c = buf[ch]; - while (buf[ch+1] == '\010' && ch+2 < b_read) - { - if (buf[ch+2] == c) - { - special = (c == '_' && last_special == A_UNDERLINE) - ? A_UNDERLINE : A_BOLD; - ch += 2; - } - else if (buf[ch] == '_' || buf[ch+2] == '_') - { - special = A_UNDERLINE; - ch += 2; - c = (buf[ch] == '_') ? buf[ch-2] : buf[ch]; - } - else - { - special = 0; /* overstrike: nothing to do! */ - ch += 2; - c = buf[ch]; - } - last_special = special; - } - - /* Handle ANSI sequences */ - if (c == '\033' && buf[ch+1] == '[' && is_ansi (buf+ch+2)) - { - ch = grok_ansi(buf, ch+2, &a); - c = buf[ch]; - continue; - } - - if (c == '\t') - { - if ((flags & (M_SHOWCOLOR | M_SEARCH | M_PAGER_MARKER)) || last_special - || a.attr) - { - resolve_color (*lineInfo, n, vch, flags, special, &a); - if (!special) - last_special = 0; - } - - t = (col & ~7) + 8; - while (col < t) - { - addch (' '); - col++; - } - } - else if (IsPrint (c)) - { - if ((flags & (M_SHOWCOLOR | M_SEARCH | M_PAGER_MARKER)) || special - || last_special || a.attr) - resolve_color (*lineInfo, n, vch, flags, special, &a); - if (!special) - last_special = 0; - - addch (c); - col++; - } - else if (iscntrl (c) && (c < '@' || c == 127)) - { - if ((c != '\r' && c !='\n') || (buf[ch+1] != '\n' && buf[ch+1] != '\0')) - { - if ((flags & (M_SHOWCOLOR | M_SEARCH | M_PAGER_MARKER)) || last_special - || a.attr) - { - resolve_color (*lineInfo, n, vch, flags, special, &a); - if (!special) - last_special = 0; - } - - addch ('^'); - addch ((c + '@') & 127); - col += 2; - } - } - else - { - if ((flags & (M_SHOWCOLOR | M_SEARCH | M_PAGER_MARKER)) || last_special - || a.attr) - { - resolve_color (*lineInfo, n, vch, flags, special, &a); - if (!special) - last_special = 0; - } - - if (ISSPACE (c)) - addch (c); /* unbreakable space */ - else - addch ('?'); - col++; - } - } + /* display the line */ + format_line (lineInfo, n, buf, flags, &a, cnt, &ch, &vch, &col, &special); /* avoid a bug in ncurses... */ #ifndef USE_SLANG_CURSES @@ -1349,7 +1323,7 @@ display_line (FILE *f, long *last_pos, struct line_t **lineInfo, int n, #endif /* end the last color pattern (needed by S-Lang) */ - if (last_special || (col != COLS && (flags & (M_SHOWCOLOR | M_SEARCH)))) + if (special || (col != COLS && (flags & (M_SHOWCOLOR | M_SEARCH)))) resolve_color (*lineInfo, n, vch, flags, 0, &a); /* ncurses always wraps lines when you get to the right side of the @@ -1530,7 +1504,8 @@ mutt_pager (const char *banner, const char *fname, int flags, pager_t *extra) if (option (OPTHELP)) { SETCOLOR (MT_COLOR_STATUS); - mvprintw (helpoffset, 0, "%-*.*s", COLS, COLS, helpstr); + move (helpoffset, 0); + mutt_paddstr (COLS, helpstr); SETCOLOR (MT_COLOR_NORMAL); } @@ -1654,8 +1629,9 @@ mutt_pager (const char *banner, const char *fname, int flags, pager_t *extra) COLS - 9 < sizeof (buffer) ? COLS - 9: sizeof (buffer), NONULL (PagerFmt), Context, extra->bdy->hdr, M_FORMAT_MAKEPRINT); } - printw ("%-*.*s -- (", COLS-10, COLS-10, - IsHeader (extra) || IsMsgAttach (extra) ? buffer : banner); + mutt_paddstr (COLS-10, IsHeader (extra) || IsMsgAttach (extra) ? + buffer : banner); + addstr (" -- ("); if (last_pos < sb.st_size - 1) printw ("%d%%)", (int) (100 * last_offset / sb.st_size)); else @@ -1675,7 +1651,7 @@ mutt_pager (const char *banner, const char *fname, int flags, pager_t *extra) move (indexoffset + (option (OPTSTATUSONTOP) ? 0 : (indexlen - 1)), 0); SETCOLOR (MT_COLOR_STATUS); - printw ("%-*.*s", COLS, COLS, buffer); + mutt_paddstr (COLS, buffer); SETCOLOR (MT_COLOR_NORMAL); } diff --git a/pgppubring.c b/pgppubring.c index 7538ad38..c48ce05b 100644 --- a/pgppubring.c +++ b/pgppubring.c @@ -1041,4 +1041,13 @@ static void pgpring_dump_keyblock (pgp_key_t *p) } } } - + +/* + * The mutt_gettext () defined in gettext.c requires iconv, + * so we do without charset conversion here. + */ + +char *mutt_gettext (const char *message) +{ + return message; +} diff --git a/protos.h b/protos.h index 76daf732..e03c2326 100644 --- a/protos.h +++ b/protos.h @@ -17,6 +17,8 @@ */ +#include "mbyte.h" + #ifdef DEBUG #define dprint(N,X) do { if(debuglevel>=N) fprintf X; } while (0) #else @@ -132,6 +134,7 @@ void mutt_block_signals_system (void); void mutt_body_handler (BODY *, STATE *); void mutt_bounce_message (FILE *fp, HEADER *, ADDRESS *); void mutt_buffy (char *); +void mutt_canonical_charset (char *, size_t, const char *); void mutt_check_rescore (CONTEXT *); void mutt_clear_error (void); void mutt_create_alias (ENVELOPE *, ADDRESS *); @@ -151,6 +154,7 @@ void mutt_expand_link (char *, const char *, const char *); void mutt_fetchPopMail (void); void mutt_fix_reply_recipients (ENVELOPE *env); void mutt_folder_hook (char *); +void mutt_format_string (char *, size_t, int, int, int, char, const char *, size_t); void mutt_forward_intro (FILE *fp, HEADER *cur); void mutt_forward_trailer (FILE *fp); void mutt_free_alias (ALIAS **); @@ -171,6 +175,7 @@ void mutt_message (const char *, ...); void mutt_message_to_7bit (BODY *, FILE *); void mutt_mktemp (char *); void mutt_normalize_time (struct tm *); +void mutt_paddstr (int, const char *); void mutt_parse_mime_message (CONTEXT *ctx, HEADER *); void mutt_parse_part (FILE *, BODY *); void mutt_pipe_message_to_state (HEADER *, STATE *); @@ -209,6 +214,7 @@ void mutt_view_attachments (HEADER *); void mutt_set_virtual (CONTEXT *); int mutt_addr_is_user (ADDRESS *); +int mutt_addwch (wchar_t); int mutt_alias_complete (char *, size_t); int mutt_alloc_color (int fg, int bg); int mutt_any_key_to_continue (const char *); @@ -233,6 +239,7 @@ int mutt_display_message (HEADER *h); int mutt_edit_attachment(BODY *); int mutt_edit_message (CONTEXT *, HEADER *); int mutt_fetch_recips (ENVELOPE *out, ENVELOPE *in, int flags); +int mutt_is_utf8 (const char *); int mutt_parent_message (CONTEXT *, HEADER *); int mutt_prepare_template(FILE*, CONTEXT *, HEADER *, HEADER *, short); int mutt_resend_message (FILE *, CONTEXT *, HEADER *); @@ -307,6 +314,10 @@ ADDRESS *alias_reverse_lookup (ADDRESS *); void mutt_to_base64 (unsigned char*, const unsigned char*, int); int mutt_from_base64 (char*, const char*); +/* utf8.c */ +int mutt_wctoutf8 (char *s, unsigned int c); +int mutt_utf8towc (unsigned int *pwc, const char *s, size_t n); + #ifdef LOCALES_HACK #define IsPrint(c) (isprint((unsigned char)(c)) || \ ((unsigned char)(c) >= 0xa0)) diff --git a/rfc2047.c b/rfc2047.c index 786803aa..df5bd33f 100644 --- a/rfc2047.c +++ b/rfc2047.c @@ -244,7 +244,7 @@ void rfc2047_encode_string (char *d, size_t dlen, const unsigned char *s) strfcpy((char *)scratch, (const char *) s, sizeof(scratch)); if (*send_charset && mutt_strcasecmp("us-ascii", send_charset)) - mutt_display_string((char *)scratch, mutt_get_translation(Charset, send_charset)); + mutt_convert_string ((char *)scratch, LONG_STRING, Charset, send_charset); (*encoder) (d, dlen, scratch, send_charset); } @@ -280,6 +280,7 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len) char *t; int enc = 0, filter = 0, count = 0, c1, c2, c3, c4; char *charset = NULL; + size_t olen = len; while ((pp = strtok (pp, "?")) != NULL) { @@ -364,17 +365,7 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len) } if (filter) - { - if(mutt_is_utf8(charset)) - { - CHARSET *chs = mutt_get_charset(Charset); - mutt_decode_utf8_string(d, chs); - } - else - mutt_display_string(d, mutt_get_translation(charset, Charset)); - - mutt_display_sanitize (d); - } + mutt_convert_string (d, olen, charset, Charset); safe_free ((void **) &p); return (0); } @@ -387,7 +378,8 @@ void rfc2047_decode (char *d, const char *s, size_t dlen) const char *p, *q; size_t n; int found_encoded = 0; - + int in_place = (d == s); + dlen--; /* save room for the terminal nul */ while (*s && dlen > 0) @@ -418,7 +410,7 @@ void rfc2047_decode (char *d, const char *s, size_t dlen) } } - rfc2047_decode_word (d, p, dlen); + rfc2047_decode_word (d, p, in_place ? q + 2 - p : dlen); found_encoded = 1; s = q + 2; n = mutt_strlen (d); diff --git a/rfc2231.c b/rfc2231.c index 0fb1556d..4441a738 100644 --- a/rfc2231.c +++ b/rfc2231.c @@ -219,14 +219,7 @@ static void rfc2231_decode_one (char *dest, char *src, char *chs) *d = '\0'; if (chs && strcmp (chs, "us-ascii") && strcmp (chs, Charset)) - { - if (mutt_is_utf8 (chs)) - mutt_decode_utf8_string (dest, mutt_get_charset (Charset)); - else - mutt_display_string (dest, mutt_get_translation (chs, Charset)); - - mutt_display_sanitize (dest); - } + mutt_convert_string (dest, 0, chs, Charset); } /* insert parameter into an ordered list. @@ -339,7 +332,7 @@ int rfc2231_encode (char *dest, size_t l, unsigned char *src) *t = '\0'; if (Charset && SendCharset && mutt_strcasecmp (Charset, SendCharset)) - mutt_display_string (buff, mutt_get_translation (Charset, SendCharset)); + mutt_convert_string (buff, LONG_STRING, Charset, SendCharset); snprintf (dest, l, "%s''%s", SendCharset ? SendCharset : (Charset ? Charset : "unknown-8bit"), buff); diff --git a/sendlib.c b/sendlib.c index 1181ad1e..42e44fe0 100644 --- a/sendlib.c +++ b/sendlib.c @@ -126,16 +126,13 @@ static char MsgIdPfx = 'A'; static void transform_to_7bit (BODY *a, FILE *fpin); -static void encode_quoted (FILE * fin, FILE *fout, int istext, CHARSET_MAP *map) +static void encode_quoted (FGETCONV * fc, FILE *fout, int istext) { int c, linelen = 0; char line[77], savechar; - while ((c = fgetc (fin)) != EOF) + while ((c = fgetconv (fc)) != EOF) { - if(istext && map) - c = mutt_display_char(c, map); - /* Escape lines that begin with "the message separator". */ if (linelen == 5 && !mutt_strncmp ("From ", line, 5)) { @@ -310,18 +307,15 @@ static void b64_putc(char c, FILE *fout) } -static void encode_base64 (FILE * fin, FILE *fout, int istext, CHARSET_MAP *map) +static void encode_base64 (FGETCONV * fc, FILE *fout, int istext) { int ch, ch1 = EOF; b64_num = b64_linelen = 0; - while((ch = fgetc(fin)) != EOF) + while ((ch = fgetconv (fc)) != EOF) { - if(istext && map) - ch = mutt_display_char(ch, map); - - if(istext && ch == '\n' && ch1 != '\r') + if (istext && ch == '\n' && ch1 != '\r') b64_putc('\r', fout); b64_putc(ch, fout); ch1 = ch; @@ -330,20 +324,12 @@ static void encode_base64 (FILE * fin, FILE *fout, int istext, CHARSET_MAP *map) fputc('\n', fout); } -static void encode_8bit(FILE *fin, FILE *fout, int istext, CHARSET_MAP *map) +static void encode_8bit (FGETCONV *fc, FILE *fout, int istext) { int ch; - - if(!istext || !map) - { - mutt_copy_stream(fin, fout); - return; - } - while((ch = fgetc(fin)) != EOF) - { - fputc(mutt_display_char(ch, map), fout); - } + while ((ch = fgetconv (fc)) != EOF) + fputc (ch, fout); } @@ -444,7 +430,7 @@ int mutt_write_mime_body (BODY *a, FILE *f) char send_charset[SHORT_STRING]; FILE *fpin; BODY *t; - CHARSET_MAP *map = NULL; + FGETCONV *fc; if (a->type == TYPEMULTIPART) { @@ -490,19 +476,21 @@ int mutt_write_mime_body (BODY *a, FILE *f) return -1; } - if (a->type == TYPETEXT) - map = mutt_get_translation (Charset, mutt_get_send_charset (send_charset, sizeof(send_charset), a, 1)); + if (a->type == TYPETEXT && (!a->noconv)) + fc = fgetconv_open (fpin, Charset, mutt_get_send_charset (send_charset, sizeof (send_charset), a, 1)); + else + fc = fgetconv_open (fpin, 0, 0); if (a->encoding == ENCQUOTEDPRINTABLE) - encode_quoted (fpin, f, mutt_is_text_type (a->type, a->subtype), - a->type == TYPETEXT && (!a->noconv) ? map : NULL); + encode_quoted (fc, f, mutt_is_text_type (a->type, a->subtype)); else if (a->encoding == ENCBASE64) - encode_base64 (fpin, f, mutt_is_text_type (a->type, a->subtype), - a->type == TYPETEXT && (!a->noconv) ? map : NULL); + encode_base64 (fc, f, mutt_is_text_type (a->type, a->subtype)); + else if (a->type == TYPETEXT && (!a->noconv)) + encode_8bit (fc, f, mutt_is_text_type (a->type, a->subtype)); else - encode_8bit (fpin, f, mutt_is_text_type (a->type, a->subtype), - a->type == TYPETEXT && (!a->noconv) ? map : NULL); + mutt_copy_stream (fpin, f); + fgetconv_close (fc); fclose (fpin); return (ferror (f) ? -1 : 0); diff --git a/utf8.c b/utf8.c new file mode 100644 index 00000000..6a17c39f --- /dev/null +++ b/utf8.c @@ -0,0 +1,66 @@ + +int mutt_wctoutf8 (char *s, unsigned int c) +{ + if (c < (1 << 7)) + { + if (s) + *s++ = c; + return 1; + } + else if (c < (1 << 11)) + { + if (s) + { + *s++ = 0xc0 | (c >> 6); + *s++ = 0x80 | (c & 0x3f); + } + return 2; + } + else if (c < (1 << 16)) + { + if (s) + { + *s++ = 0xe0 | (c >> 12); + *s++ = 0x80 | ((c >> 6) & 0x3f); + *s++ = 0x80 | (c & 0x3f); + } + return 3; + } + else if (c < (1 << 21)) + { + if (s) + { + *s++ = 0xf0 | (c >> 18); + *s++ = 0x80 | ((c >> 12) & 0x3f); + *s++ = 0x80 | ((c >> 6) & 0x3f); + *s++ = 0x80 | (c & 0x3f); + } + return 4; + } + else if (c < (1 << 26)) + { + if (s) + { + *s++ = 0xf8 | (c >> 24); + *s++ = 0x80 | ((c >> 18) & 0x3f); + *s++ = 0x80 | ((c >> 12) & 0x3f); + *s++ = 0x80 | ((c >> 6) & 0x3f); + *s++ = 0x80 | (c & 0x3f); + } + return 5; + } + else if (c < (1 << 31)) + { + if (s) + { + *s++ = 0xfc | (c >> 30); + *s++ = 0x80 | ((c >> 24) & 0x3f); + *s++ = 0x80 | ((c >> 18) & 0x3f); + *s++ = 0x80 | ((c >> 12) & 0x3f); + *s++ = 0x80 | ((c >> 6) & 0x3f); + *s++ = 0x80 | (c & 0x3f); + } + return 6; + } + return 0; +} diff --git a/wcwidth.c b/wcwidth.c new file mode 100644 index 00000000..15817e8a --- /dev/null +++ b/wcwidth.c @@ -0,0 +1,134 @@ +/* + * This is an implementation of wcwidth() and wcswidth() as defined in + * "The Single UNIX Specification, Version 2, The Open Group, 1997" + * + * + * Markus Kuhn -- 2000-02-08 -- public domain + */ + +#if 0 /* original */ +#include +#else /* Mutt */ +#include "mbyte.h" +#endif + +/* These functions define the column width of an ISO 10646 character + * as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * FullWidth (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +int wcwidth(wchar_t ucs) +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + static const struct interval { + unsigned short first; + unsigned short last; + } combining[] = { + { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, + { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, + { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, + { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, + { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, + { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, + { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, + { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, + { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, + { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, + { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, + { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, + { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, + { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } + }; + int min = 0; + int max = sizeof(combining) / sizeof(struct interval) - 1; + int mid; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* first quick check for Latin-1 etc. characters */ + if (ucs < combining[0].first) + return 1; + + /* binary search in table of non-spacing characters */ + while (max >= min) { + mid = (min + max) / 2; + if (combining[mid].last < ucs) + min = mid + 1; + else if (combining[mid].first > ucs) + max = mid - 1; + else if (combining[mid].first <= ucs && combining[mid].last >= ucs) + return 0; + } + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + /* fast test for majority of non-wide scripts */ + if (ucs < 0x1100) + return 1; + + return 1 + + ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ + (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6)); +} + + +#if 0 /* original */ +int wcswidth(const wchar_t *pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} +#endif