From: Richard Russon Date: Sat, 30 Dec 2017 16:05:25 +0000 (+0000) Subject: move charset functions to muttlib X-Git-Tag: neomutt-20180223~56^2~3 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=29671adb049b279dc3f743eb830fea0285a6cbec;p=neomutt move charset functions to muttlib --- diff --git a/Makefile.autosetup b/Makefile.autosetup index 89b229559..6089f957b 100644 --- a/Makefile.autosetup +++ b/Makefile.autosetup @@ -56,7 +56,7 @@ ALL_FILES!= (cd $(SRCDIR) && git ls-tree -r --name-only HEAD 2>/dev/null) \ # neomutt NEOMUTT= neomutt$(EXEEXT) NEOMUTTOBJS= mutt_account.o addrbook.o alias.o attach.o bcache.o body.o \ - browser.o buffy.o mutt_charset.o color.o commands.o complete.o \ + browser.o buffy.o color.o commands.o complete.o \ compose.o compress.o conststrings.o copy.o curs_lib.o \ curs_main.o edit.o editmsg.o enter.o envelope.o filter.o \ flags.o from.o group.o handler.o hdrline.o \ diff --git a/alias.c b/alias.c index f6ee769e0..7f98a290b 100644 --- a/alias.c +++ b/alias.c @@ -35,7 +35,6 @@ #include "address.h" #include "envelope.h" #include "globals.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mutt_idna.h" #include "options.h" @@ -230,7 +229,7 @@ static void recode_buf(char *buf, size_t buflen) s = mutt_str_strdup(buf); if (!s) return; - if (mutt_convert_string(&s, Charset, ConfigCharset, 0) == 0) + if (mutt_cs_convert_string(&s, Charset, ConfigCharset, 0) == 0) mutt_str_strfcpy(buf, s, buflen); FREE(&s); } diff --git a/browser.c b/browser.c index 6cea7ccc1..9d1de391c 100644 --- a/browser.c +++ b/browser.c @@ -50,7 +50,6 @@ #include "mailbox.h" #include "mbyte.h" #include "mutt_account.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mutt_menu.h" #include "mx.h" @@ -577,7 +576,7 @@ static const char *group_index_format_str(char *buf, size_t buflen, size_t col, { char *desc = mutt_str_strdup(folder->ff->nd->desc); if (NewsgroupsCharset && *NewsgroupsCharset) - mutt_convert_string(&desc, NewsgroupsCharset, Charset, MUTT_ICONV_HOOK_FROM); + mutt_cs_convert_string(&desc, NewsgroupsCharset, Charset, MUTT_ICONV_HOOK_FROM); mutt_filter_unprintable(&desc); snprintf(fmt, sizeof(fmt), "%%%ss", prec); diff --git a/handler.c b/handler.c index 8b93f31cf..b20fd7098 100644 --- a/handler.c +++ b/handler.c @@ -42,7 +42,6 @@ #include "globals.h" #include "keymap.h" #include "mime.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "ncrypt/ncrypt.h" #include "opcodes.h" @@ -1591,10 +1590,10 @@ void mutt_decode_attachment(struct Body *b, struct State *s) if (!charset && AssumedCharset && *AssumedCharset) charset = mutt_cs_get_default_charset(); if (charset && Charset) - cd = mutt_iconv_open(Charset, charset, MUTT_ICONV_HOOK_FROM); + cd = mutt_cs_iconv_open(Charset, charset, MUTT_ICONV_HOOK_FROM); } else if (istext && b->charset) - cd = mutt_iconv_open(Charset, b->charset, MUTT_ICONV_HOOK_FROM); + cd = mutt_cs_iconv_open(Charset, b->charset, MUTT_ICONV_HOOK_FROM); fseeko(s->fpin, b->offset, SEEK_SET); switch (b->encoding) diff --git a/hcache/hcache.c b/hcache/hcache.c index db8aa97c5..02c348d24 100644 --- a/hcache/hcache.c +++ b/hcache/hcache.c @@ -57,7 +57,6 @@ #include "hcache/hcversion.h" #include "header.h" #include "mbyte.h" -#include "mutt_charset.h" #include "parameter.h" #include "protos.h" #include "tags.h" @@ -181,7 +180,7 @@ static unsigned char *dump_char_size(char *c, unsigned char *d, int *off, if (convert && !mutt_str_is_ascii(c, size)) { p = mutt_str_substr_dup(c, c + size); - if (mutt_convert_string(&p, Charset, "utf-8", 0) == 0) + if (mutt_cs_convert_string(&p, Charset, "utf-8", 0) == 0) { c = p; size = mutt_str_strlen(c) + 1; @@ -220,7 +219,7 @@ static void restore_char(char **c, const unsigned char *d, int *off, bool conver if (convert && !mutt_str_is_ascii(*c, size)) { char *tmp = mutt_str_strdup(*c); - if (mutt_convert_string(&tmp, "utf-8", Charset, 0) == 0) + if (mutt_cs_convert_string(&tmp, "utf-8", Charset, 0) == 0) { mutt_str_replace(c, tmp); } diff --git a/hcache/hcache.h b/hcache/hcache.h index 3b4388c38..3a577c686 100644 --- a/hcache/hcache.h +++ b/hcache/hcache.h @@ -41,7 +41,7 @@ * * SpamList * * NoSpamList * - Neomutt functions - * * mutt_convert_string() + * * mutt_cs_convert_string() * * mutt_encode_path() * * mutt_new_body() * * mutt_env_new() diff --git a/history.c b/history.c index eda49e944..44927407a 100644 --- a/history.c +++ b/history.c @@ -29,7 +29,6 @@ #include "mutt/mutt.h" #include "history.h" #include "globals.h" -#include "mutt_charset.h" #include "protos.h" /* This history ring grows from 0..History, with last marking the @@ -140,7 +139,7 @@ void mutt_read_histfile(void) p = mutt_str_strdup(linebuf + read); if (p) { - mutt_convert_string(&p, "utf-8", Charset, 0); + mutt_cs_convert_string(&p, "utf-8", Charset, 0); mutt_history_add(hclass, p, false); FREE(&p); } @@ -310,7 +309,7 @@ static void save_history(enum HistoryClass hclass, const char *s) } tmp = mutt_str_strdup(s); - mutt_convert_string(&tmp, Charset, "utf-8", 0); + mutt_cs_convert_string(&tmp, Charset, "utf-8", 0); /* Format of a history item (1 line): ":|". We add a '|' in order to avoid lines ending with '\'. */ diff --git a/hook.c b/hook.c index b6445c4aa..57ef1c1fc 100644 --- a/hook.c +++ b/hook.c @@ -36,7 +36,6 @@ #include "globals.h" #include "header.h" #include "mailbox.h" -#include "mutt_charset.h" #include "ncrypt/ncrypt.h" #include "options.h" #include "pattern.h" diff --git a/imap/utf7.c b/imap/utf7.c index 315e61f4c..76dfdc8d1 100644 --- a/imap/utf7.c +++ b/imap/utf7.c @@ -36,7 +36,6 @@ #include "imap_private.h" #include "mutt/mutt.h" #include "globals.h" -#include "mutt_charset.h" // clang-format off /** @@ -322,7 +321,7 @@ void imap_utf_encode(struct ImapData *idata, char **s) if (Charset) { char *t = mutt_str_strdup(*s); - if (t && !mutt_convert_string(&t, Charset, "utf-8", 0)) + if (t && !mutt_cs_convert_string(&t, Charset, "utf-8", 0)) { FREE(s); if (idata->unicode) @@ -350,7 +349,7 @@ void imap_utf_decode(struct ImapData *idata, char **s) else t = utf7_to_utf8(*s, strlen(*s), 0, 0); - if (t && !mutt_convert_string(&t, "utf-8", Charset, 0)) + if (t && !mutt_cs_convert_string(&t, "utf-8", Charset, 0)) { FREE(s); *s = t; diff --git a/init.c b/init.c index 1b7d5194b..68562eddc 100644 --- a/init.c +++ b/init.c @@ -52,7 +52,6 @@ #include "mailbox.h" #include "mbtable.h" #include "mbyte.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mutt_idna.h" #include "mutt_menu.h" @@ -2129,7 +2128,7 @@ static int check_charset(struct Option *opt, const char *val) { if (!*p) continue; - if (!mutt_check_charset(p, strict)) + if (!mutt_cs_check_charset(p, strict)) { rc = -1; break; @@ -3031,7 +3030,7 @@ static int source_rc(const char *rcfile_path, struct Buffer *err) currentline = mutt_str_strdup(linebuf); if (!currentline) continue; - mutt_convert_string(¤tline, ConfigCharset, Charset, 0); + mutt_cs_convert_string(¤tline, ConfigCharset, Charset, 0); } else currentline = linebuf; diff --git a/mbyte.c b/mbyte.c index 09849b77b..a79bf5745 100644 --- a/mbyte.c +++ b/mbyte.c @@ -29,7 +29,6 @@ #include #include "mutt/mutt.h" #include "mbyte.h" -#include "mutt_charset.h" #include "options.h" #include "protos.h" diff --git a/mutt/charset.c b/mutt/charset.c index ff459965d..a4a27ce3a 100644 --- a/mutt/charset.c +++ b/mutt/charset.c @@ -32,12 +32,20 @@ * | Function | Description * | :----------------------------- | :--------------------------------------------------------- * | mutt_cs_canonical_charset() | Canonicalise the charset of a string + * | mutt_cs_charset_lookup() | Look for a replacement character set + * | mutt_cs_check_charset() | Does iconv understand a character set? * | mutt_cs_chscmp() | Are the names of two character sets equivalent? + * | mutt_cs_convert_string() | Convert a string between encodings * | mutt_cs_fgetconv() | Convert a file's character set * | mutt_cs_fgetconvs() | Convert a file's charset into a string buffer * | mutt_cs_fgetconv_close() | Close an fgetconv handle + * | mutt_cs_fgetconv_open() | Prepare a file for charset conversion * | mutt_cs_get_default_charset() | Get the default character set * | mutt_cs_iconv() | Change the encoding of a string + * | mutt_cs_iconv_lookup() | Look for a replacement character set + * | mutt_cs_iconv_open() | Set up iconv for conversions + * | mutt_cs_lookup_add() | Add a new character set lookup + * | mutt_cs_lookup_remove() | Remove all the character set lookups * | mutt_cs_set_langinfo_charset() | Set the user's choice of character set */ @@ -46,11 +54,16 @@ #include #include #include +#include +#include #include #include #include #include "charset.h" +#include "buffer.h" #include "memory.h" +#include "queue.h" +#include "regex3.h" #include "string2.h" #ifndef EILSEQ @@ -60,6 +73,20 @@ char *AssumedCharset; /**< Encoding schemes for messages without indication */ char *Charset; /**< User's choice of character set */ +/** + * struct Lookup - Regex to String lookup table + * + * This is used by 'charset-hook' and 'iconv-hook'. + */ +struct Lookup +{ + enum LookupType type; /**< Lookup type */ + struct Regex regex; /**< Regular expression */ + char *replacement; /**< Alternative charset to use */ + TAILQ_ENTRY(Lookup) entries; +}; +static TAILQ_HEAD(LookupHead, Lookup) Lookups = TAILQ_HEAD_INITIALIZER(Lookups); + // clang-format off /** * PreferredMIMENames - Lookup table of preferred charsets @@ -172,10 +199,8 @@ const struct MimeNames PreferredMIMENames[] = { "csGB2312", "gb2312" }, { "csbig5", "big5" }, - /* - * End of official brain damage. What follows has been taken from glibc's - * localedata files. - */ + /* End of official brain damage. + * What follows has been taken from glibc's localedata files. */ { "iso_8859-13", "iso-8859-13" }, { "iso-ir-179", "iso-8859-13" }, @@ -197,9 +222,7 @@ const struct MimeNames PreferredMIMENames[] = { "646", "us-ascii" }, - /* - * http://www.sun.com/software/white-papers/wp-unicode/ - */ + /* http://www.sun.com/software/white-papers/wp-unicode/ */ { "eucJP", "euc-jp" }, { "PCK", "Shift_JIS" }, @@ -211,131 +234,49 @@ const struct MimeNames PreferredMIMENames[] = { "sjis", "Shift_JIS" }, { "euc-jp-ms", "eucJP-ms" }, - /* - * If you happen to encounter system-specific brain-damage with respect to + /* If you happen to encounter system-specific brain-damage with respect to * character set naming, please add it above this comment, and submit a patch - * to . - */ - - /* End of aliases. Please keep this line last. */ + * to */ { NULL, NULL }, }; // clang-format on /** - * mutt_cs_fgetconv_close - Close an fgetconv handle - * @param handle fgetconv handle - */ -void mutt_cs_fgetconv_close(struct FgetConv **fc) -{ - if ((*fc)->cd != (iconv_t) -1) - iconv_close((*fc)->cd); - FREE(fc); -} - -/** - * mutt_cs_fgetconv - Convert a file's character set - * @param fc FgetConv handle - * @retval num Next character in the converted file - * @retval EOF Error + * lookup_charset - Look for a preferred character set name + * @param type Type, e.g. #MUTT_LOOKUP_CHARSET + * @param cs Character set * - * A file is read into a buffer and its character set is converted. - * Each call to this function will return one converted character. - * The buffer is refilled automatically when empty. + * If the character set matches one of the regexes, + * then return the replacement name. */ -int mutt_cs_fgetconv(struct FgetConv *fc) +static const char *lookup_charset(enum LookupType type, const char *cs) { - if (!fc) - return EOF; - if (fc->cd == (iconv_t) -1) - return fgetc(fc->file); - if (!fc->p) - return EOF; - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - - /* Try to convert some more */ - fc->p = fc->ob = fc->bufo; - if (fc->ibl) - { - size_t obl = sizeof(fc->bufo); - iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - } - - /* If we trusted iconv a bit more, we would at this point - * ask why it had stopped converting ... */ + if (!cs) + return NULL; - /* Try to read some more */ - if (fc->ibl == sizeof(fc->bufi) || - (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) - { - fc->p = 0; - return EOF; - } - if (fc->ibl) - memcpy(fc->bufi, fc->ib, fc->ibl); - fc->ib = fc->bufi; - fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file); + struct Lookup *l = NULL; - /* Try harder this time to convert some */ - if (fc->ibl) + TAILQ_FOREACH(l, &Lookups, entries) { - size_t obl = sizeof(fc->bufo); - mutt_cs_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; + if (l->type != type) + continue; + if (regexec(l->regex.regex, cs, 0, NULL, 0) == 0) + return l->replacement; } - - /* Either the file has finished or one of the buffers is too small */ - fc->p = 0; - return EOF; -} - -/** - * mutt_cs_fgetconvs - Convert a file's charset into a string buffer - * @param buf Buffer for result - * @param l Length of buffer - * @param fc FgetConv handle - * @retval ptr Result buffer on success - * @retval NULL Error - * - * Read a file into a buffer, converting the character set as it goes. - */ -char *mutt_cs_fgetconvs(char *buf, size_t l, struct FgetConv *fc) -{ - int c; - size_t r; - - for (r = 0; r + 1 < l;) - { - c = mutt_cs_fgetconv(fc); - if (c == EOF) - break; - buf[r++] = (char) c; - if (c == '\n') - break; - } - buf[r] = '\0'; - - if (r) - return buf; - else - return NULL; + return NULL; } /** * mutt_cs_canonical_charset - Canonicalise the charset of a string - * @param dest Buffer for canonical character set name - * @param dlen Length of buffer + * @param buf Buffer for canonical character set name + * @param buflen Length of buffer * @param name Name to be canonicalised * * This first ties off any charset extension such as "//TRANSLIT", * canonicalizes the charset and re-adds the extension */ -void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name) +void mutt_cs_canonical_charset(char *buf, size_t buflen, const char *name) { char *p = NULL, *ext = NULL; char in[LONG_STRING], scratch[LONG_STRING]; @@ -348,7 +289,7 @@ void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name) if ((mutt_str_strcasecmp(in, "utf-8") == 0) || (mutt_str_strcasecmp(in, "utf8") == 0)) { - mutt_str_strfcpy(dest, "utf-8", dlen); + mutt_str_strfcpy(buf, "utf-8", buflen); goto out; } @@ -369,47 +310,50 @@ void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name) if ((mutt_str_strcasecmp(scratch, PreferredMIMENames[i].key) == 0) || (mutt_str_strcasecmp(scratch, PreferredMIMENames[i].key) == 0)) { - mutt_str_strfcpy(dest, PreferredMIMENames[i].pref, dlen); + mutt_str_strfcpy(buf, PreferredMIMENames[i].pref, buflen); goto out; } } - mutt_str_strfcpy(dest, scratch, dlen); + mutt_str_strfcpy(buf, scratch, buflen); /* for cosmetics' sake, transform to lowercase. */ - for (p = dest; *p; p++) + for (p = buf; *p; p++) *p = tolower(*p); out: if (ext && *ext) { - mutt_str_strcat(dest, dlen, "/"); - mutt_str_strcat(dest, dlen, ext); + mutt_str_strcat(buf, buflen, "/"); + mutt_str_strcat(buf, buflen, ext); } } /** * mutt_cs_chscmp - Are the names of two character sets equivalent? - * @param s First character set - * @param chs Second character set + * @param cs1 First character set + * @param cs2 Second character set * @retval num true if the names are equivalent * * Charsets may have extensions that mutt_cs_canonical_charset() leaves intact; - * we expect 'chs' to originate from neomutt code, not user input (i.e. 'chs' + * we expect 'cs2' to originate from neomutt code, not user input (i.e. 'cs2' * does _not_ have any extension) we simply check if the shorter string is a * prefix for the longer. */ -int mutt_cs_chscmp(const char *s, const char *chs) +int mutt_cs_chscmp(const char *cs1, const char *cs2) { - if (!s || !chs) + if (!cs1 || !cs2) return 0; char buffer[STRING]; - mutt_cs_canonical_charset(buffer, sizeof(buffer), s); - int a = mutt_str_strlen(buffer); - int b = mutt_str_strlen(chs); - return (mutt_str_strncasecmp(a > b ? buffer : chs, a > b ? chs : buffer, MIN(a, b)) == 0); + mutt_cs_canonical_charset(buffer, sizeof(buffer), cs1); + + int len1 = mutt_str_strlen(buffer); + int len2 = mutt_str_strlen(cs2); + + return (mutt_str_strncasecmp((len1 > len2) ? buffer : cs2, + (len1 > len2) ? cs2 : buffer, MIN(len1, len2)) == 0); } /** @@ -433,6 +377,154 @@ char *mutt_cs_get_default_charset(void) return strcpy(fcharset, "us-ascii"); } +/** + * mutt_cs_set_langinfo_charset - Set the user's choice of character set + * + * Lookup the character map used by the user's locale and store it in Charset. + */ +void mutt_cs_set_langinfo_charset(void) +{ + char buf[LONG_STRING]; + char buf2[LONG_STRING]; + + mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf)); + mutt_cs_canonical_charset(buf2, sizeof(buf2), buf); + + /* finally, set $charset */ + Charset = mutt_str_strdup(buf2); + if (!Charset) + Charset = mutt_str_strdup("iso-8859-1"); +} + +/** + * mutt_cs_lookup_add - Add a new character set lookup + * @param type Type of character set, e.g. MUTT_LOOKUP_CHARSET + * @param pat Pattern to match + * @param replace Replacement string + * @param err Buffer for error message + * @retval true, lookup added to list + * @retval false, Regex string was invalid + * + * Add a regex for a character set and a replacement name. + */ +bool mutt_cs_lookup_add(enum LookupType type, const char *pat, + const char *replace, struct Buffer *err) +{ + if (!pat || !replace) + return false; + + regex_t *rx = mutt_mem_malloc(sizeof(regex_t)); + int rc = REGCOMP(rx, pat, REG_ICASE); + if (rc != 0) + { + regerror(rc, rx, err->data, err->dsize); + FREE(&rx); + return false; + } + + struct Lookup *l = mutt_mem_calloc(1, sizeof(struct Lookup)); + l->type = type; + l->replacement = mutt_str_strdup(replace); + l->regex.pattern = mutt_str_strdup(pat); + l->regex.regex = rx; + l->regex.not = false; + + TAILQ_INSERT_TAIL(&Lookups, l, entries); + + return true; +} + +/** + * mutt_cs_lookup_remove - Remove all the character set lookups + * + * Empty the list of replacement character set names. + */ +void mutt_cs_lookup_remove(void) +{ + struct Lookup *l = NULL; + struct Lookup *tmp = NULL; + + TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp) + { + TAILQ_REMOVE(&Lookups, l, entries); + FREE(&l->replacement); + FREE(&l->regex.pattern); + if (l->regex.regex) + regfree(l->regex.regex); + FREE(&l->regex); + FREE(&l); + } +} + +/** + * mutt_cs_charset_lookup - Look for a replacement character set + * @param chs Character set to lookup + * @retval ptr Replacement character set (if a 'charset-hook' matches) + * @retval NULL No matching hook + * + * Look through all the 'charset-hook's. + * If one matches return the replacement character set. + */ +const char *mutt_cs_charset_lookup(const char *chs) +{ + return lookup_charset(MUTT_LOOKUP_CHARSET, chs); +} + +/** + * mutt_cs_iconv_open - Set up iconv for conversions + * @param tocode Current character set + * @param fromcode Target character set + * @param flags Flags, e.g. #MUTT_ICONV_HOOK_FROM + * @retval ptr iconv handle for the conversion + * + * Like iconv_open, but canonicalises the charsets, applies charset-hooks, + * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips + * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers + * should use flags=0 when fromcode can safely be considered true, either some + * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be + * used only when fromcode is unsure, taken from a possibly wrong incoming MIME + * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions + * in some setups. Note: By design charset-hooks should never be, and are never, + * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM + * acts on charset-hooks, not at all on iconv-hooks. + */ +iconv_t mutt_cs_iconv_open(const char *tocode, const char *fromcode, int flags) +{ + char tocode1[SHORT_STRING]; + char fromcode1[SHORT_STRING]; + const char *tocode2 = NULL, *fromcode2 = NULL; + const char *tmp = NULL; + + iconv_t cd; + + /* transform to MIME preferred charset names */ + mutt_cs_canonical_charset(tocode1, sizeof(tocode1), tocode); + mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), fromcode); + + /* maybe apply charset-hooks and recanonicalise fromcode, + * but only when caller asked us to sanitize a potentially wrong + * charset name incoming from the wild exterior. */ + if (flags & MUTT_ICONV_HOOK_FROM) + { + tmp = mutt_cs_charset_lookup(fromcode1); + if (tmp) + mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), tmp); + } + + /* always apply iconv-hooks to suit system's iconv tastes */ + tocode2 = mutt_cs_iconv_lookup(tocode1); + tocode2 = (tocode2) ? tocode2 : tocode1; + fromcode2 = mutt_cs_iconv_lookup(fromcode1); + fromcode2 = (fromcode2) ? fromcode2 : fromcode1; + + /* call system iconv with names it appreciates */ + cd = iconv_open(tocode2, fromcode2); + if (cd != (iconv_t) -1) + return cd; + + return (iconv_t) -1; +} + /** * mutt_cs_iconv - Change the encoding of a string * @param[in] cd Iconv conversion descriptor @@ -519,20 +611,250 @@ size_t mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char * } /** - * mutt_cs_set_langinfo_charset - Set the user's choice of character set + * mutt_cs_iconv_lookup - Look for a replacement character set + * @param chs Character set to lookup + * @retval ptr Replacement character set (if a 'iconv-hook' matches) + * @retval NULL No matching hook * - * Lookup the character map used by the user's locale and store it in Charset. + * Look through all the 'iconv-hook's. + * If one matches return the replacement character set. */ -void mutt_cs_set_langinfo_charset(void) +const char *mutt_cs_iconv_lookup(const char *chs) { - char buf[LONG_STRING]; - char buf2[LONG_STRING]; + return lookup_charset(MUTT_LOOKUP_ICONV, chs); +} - mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf)); - mutt_cs_canonical_charset(buf2, sizeof(buf2), buf); +/** + * mutt_cs_convert_string - Convert a string between encodings + * @param[in,out] ps String to convert + * @param[in] from Current character set + * @param[in] to Target character set + * @param[in] flags Flags, e.g. + * @retval 0 Success + * @retval -1 Error + * + * Parameter flags is given as-is to mutt_cs_iconv_open(). + * See there for its meaning and usage policy. + */ +int mutt_cs_convert_string(char **ps, const char *from, const char *to, int flags) +{ + iconv_t cd; + const char *repls[] = { "\357\277\275", "?", 0 }; + char *s = *ps; - /* finally, set $charset */ - Charset = mutt_str_strdup(buf2); - if (!Charset) - Charset = mutt_str_strdup("iso-8859-1"); + if (!s || !*s) + return 0; + + if (to && from && (cd = mutt_cs_iconv_open(to, from, flags)) != (iconv_t) -1) + { + size_t len; + const char *ib = NULL; + char *buf = NULL, *ob = NULL; + size_t ibl, obl; + const char **inrepls = NULL; + char *outrepl = NULL; + + if (mutt_cs_is_utf8(to)) + outrepl = "\357\277\275"; + else if (mutt_cs_is_utf8(from)) + inrepls = repls; + else + outrepl = "?"; + + len = strlen(s); + ib = s; + ibl = len + 1; + obl = MB_LEN_MAX * ibl; + ob = buf = mutt_mem_malloc(obl + 1); + + mutt_cs_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); + iconv_close(cd); + + *ob = '\0'; + + FREE(ps); + *ps = buf; + + mutt_str_adjust(ps); + return 0; + } + else + return -1; +} + +/** + * mutt_cs_check_charset - Does iconv understand a character set? + * @param cs Character set to check + * @param strict Check strictly by using iconv + * @retval bool true if character set is valid + * + * If `strict` is false, then finding a matching character set in + * #PreferredMIMENames will be enough. + * If `strict` is true, or the charset is not in #PreferredMIMENames, then + * iconv() with be run. + */ +bool mutt_cs_check_charset(const char *cs, bool strict) +{ + iconv_t cd; + + if (mutt_cs_is_utf8(cs)) + return true; + + if (!strict) + { + for (int i = 0; PreferredMIMENames[i].key; i++) + { + if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, cs) == 0) || + (mutt_str_strcasecmp(PreferredMIMENames[i].pref, cs) == 0)) + { + return true; + } + } + } + + cd = mutt_cs_iconv_open(cs, cs, 0); + if (cd != (iconv_t)(-1)) + { + iconv_close(cd); + return true; + } + + return false; +} + +/** + * mutt_cs_fgetconv_open - Prepare a file for charset conversion + * @param file FILE ptr to prepare + * @param from Current character set + * @param to Destination character set + * @param flags Flags, e.g. MUTT_ICONV_HOOK_FROM + * @retval ptr fgetconv handle + * + * Parameter flags is given as-is to mutt_cs_iconv_open(). + */ +struct FgetConv *mutt_cs_fgetconv_open(FILE *file, const char *from, const char *to, int flags) +{ + struct FgetConv *fc = NULL; + iconv_t cd = (iconv_t) -1; + static const char *repls[] = { "\357\277\275", "?", 0 }; + + if (from && to) + cd = mutt_cs_iconv_open(to, from, flags); + + if (cd != (iconv_t) -1) + { + fc = mutt_mem_malloc(sizeof(struct FgetConv)); + fc->p = fc->ob = fc->bufo; + fc->ib = fc->bufi; + fc->ibl = 0; + fc->inrepls = mutt_cs_is_utf8(to) ? repls : repls + 1; + } + else + fc = mutt_mem_malloc(sizeof(struct FgetConvNot)); + fc->file = file; + fc->cd = cd; + return fc; +} + +/** + * mutt_cs_fgetconv_close - Close an fgetconv handle + * @param fc fgetconv handle + */ +void mutt_cs_fgetconv_close(struct FgetConv **fc) +{ + if ((*fc)->cd != (iconv_t) -1) + iconv_close((*fc)->cd); + FREE(fc); +} + +/** + * mutt_cs_fgetconv - Convert a file's character set + * @param fc FgetConv handle + * @retval num Next character in the converted file + * @retval EOF Error + * + * A file is read into a buffer and its character set is converted. + * Each call to this function will return one converted character. + * The buffer is refilled automatically when empty. + */ +int mutt_cs_fgetconv(struct FgetConv *fc) +{ + if (!fc) + return EOF; + if (fc->cd == (iconv_t) -1) + return fgetc(fc->file); + if (!fc->p) + return EOF; + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + + /* Try to convert some more */ + fc->p = fc->ob = fc->bufo; + if (fc->ibl) + { + size_t obl = sizeof(fc->bufo); + iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl); + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + } + + /* If we trusted iconv a bit more, we would at this point + * ask why it had stopped converting ... */ + + /* Try to read some more */ + if (fc->ibl == sizeof(fc->bufi) || + (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) + { + fc->p = 0; + return EOF; + } + if (fc->ibl) + memcpy(fc->bufi, fc->ib, fc->ibl); + fc->ib = fc->bufi; + fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file); + + /* Try harder this time to convert some */ + if (fc->ibl) + { + size_t obl = sizeof(fc->bufo); + mutt_cs_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0); + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + } + + /* Either the file has finished or one of the buffers is too small */ + fc->p = 0; + return EOF; +} + +/** + * mutt_cs_fgetconvs - Convert a file's charset into a string buffer + * @param buf Buffer for result + * @param buflen Length of buffer + * @param fc FgetConv handle + * @retval ptr Result buffer on success + * @retval NULL Error + * + * Read a file into a buffer, converting the character set as it goes. + */ +char *mutt_cs_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc) +{ + int c; + size_t r; + + for (r = 0; (r + 1) < buflen;) + { + c = mutt_cs_fgetconv(fc); + if (c == EOF) + break; + buf[r++] = (char) c; + if (c == '\n') + break; + } + buf[r] = '\0'; + + if (r > 0) + return buf; + + return NULL; } diff --git a/mutt/charset.h b/mutt/charset.h index ac17bf19f..835be1d04 100644 --- a/mutt/charset.h +++ b/mutt/charset.h @@ -24,8 +24,11 @@ #define _MUTT_CHARSET_H #include +#include #include +struct Buffer; + extern char *AssumedCharset; extern char *Charset; @@ -63,18 +66,40 @@ struct MimeNames const char *pref; }; +/** + * enum LookupType - Types of character set lookups + */ +enum LookupType +{ + MUTT_LOOKUP_CHARSET, + MUTT_LOOKUP_ICONV +}; + +#define MUTT_ICONV_HOOK_FROM 1 /**< apply charset-hooks to fromcode */ + extern const struct MimeNames PreferredMIMENames[]; -void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name); -int mutt_cs_chscmp(const char *s, const char *chs); -void mutt_cs_fgetconv_close(struct FgetConv **fc); -int mutt_cs_fgetconv(struct FgetConv *fc); -char * mutt_cs_fgetconvs(char *buf, size_t l, struct FgetConv *fc); -char * mutt_cs_get_default_charset(void); -size_t mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl); -void mutt_cs_set_langinfo_charset(void); +void mutt_cs_canonical_charset(char *buf, size_t buflen, const char *name); +int mutt_cs_chscmp(const char *cs1, const char *cs2); +char * mutt_cs_get_default_charset(void); +void mutt_cs_set_langinfo_charset(void); + +bool mutt_cs_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err); +void mutt_cs_lookup_remove(void); +const char * mutt_cs_charset_lookup(const char *chs); + +iconv_t mutt_cs_iconv_open(const char *tocode, const char *fromcode, int flags); +size_t mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl); +const char * mutt_cs_iconv_lookup(const char *chs); +int mutt_cs_convert_string(char **ps, const char *from, const char *to, int flags); +bool mutt_cs_check_charset(const char *cs, bool strict); + +struct FgetConv *mutt_cs_fgetconv_open(FILE *file, const char *from, const char *to, int flags); +void mutt_cs_fgetconv_close(struct FgetConv **fc); +int mutt_cs_fgetconv(struct FgetConv *fc); +char * mutt_cs_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc); #define mutt_cs_is_utf8(a) mutt_cs_chscmp(a, "utf-8") #define mutt_cs_is_us_ascii(a) mutt_cs_chscmp(a, "us-ascii") -#endif +#endif diff --git a/mutt_charset.c b/mutt_charset.c deleted file mode 100644 index a030cd2ff..000000000 --- a/mutt_charset.c +++ /dev/null @@ -1,325 +0,0 @@ -/** - * @file - * Conversion between different character encodings - * - * @authors - * Copyright (C) 1999-2002,2007 Thomas Roessler - * - * @copyright - * This program is free software: you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free Software - * Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#include "config.h" -#include -#include -#include -#include -#include -#include -#include "mutt/mutt.h" -#include "mutt.h" -#include "mutt_charset.h" -#include "globals.h" -#include "protos.h" - -/** - * struct Lookup - Regex to String lookup table - * - * This is used by 'charset-hook' and 'iconv-hook'. - */ -struct Lookup -{ - enum LookupType type; /**< Lookup type */ - struct Regex regex; /**< Regular expression */ - char *replacement; /**< Alternative charset to use */ - TAILQ_ENTRY(Lookup) entries; -}; -static TAILQ_HEAD(LookupHead, Lookup) Lookups = TAILQ_HEAD_INITIALIZER(Lookups); - -/** - * mutt_iconv_open - Set up iconv for conversions - * - * Like iconv_open, but canonicalises the charsets, applies charset-hooks, - * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips - * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers - * should use flags=0 when fromcode can safely be considered true, either some - * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be - * used only when fromcode is unsure, taken from a possibly wrong incoming MIME - * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions - * in some setups. Note: By design charset-hooks should never be, and are never, - * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM - * acts on charset-hooks, not at all on iconv-hooks. - */ -iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) -{ - char tocode1[SHORT_STRING]; - char fromcode1[SHORT_STRING]; - const char *tocode2 = NULL, *fromcode2 = NULL; - const char *tmp = NULL; - - iconv_t cd; - - /* transform to MIME preferred charset names */ - mutt_cs_canonical_charset(tocode1, sizeof(tocode1), tocode); - mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), fromcode); - - /* maybe apply charset-hooks and recanonicalise fromcode, - * but only when caller asked us to sanitize a potentially wrong - * charset name incoming from the wild exterior. */ - if (flags & MUTT_ICONV_HOOK_FROM) - { - tmp = mutt_cs_charset_lookup(fromcode1); - if (tmp) - mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), tmp); - } - - /* always apply iconv-hooks to suit system's iconv tastes */ - tocode2 = mutt_cs_iconv_lookup(tocode1); - tocode2 = (tocode2) ? tocode2 : tocode1; - fromcode2 = mutt_cs_iconv_lookup(fromcode1); - fromcode2 = (fromcode2) ? fromcode2 : fromcode1; - - /* call system iconv with names it appreciates */ - cd = iconv_open(tocode2, fromcode2); - if (cd != (iconv_t) -1) - return cd; - - return (iconv_t) -1; -} - -/** - * mutt_convert_string - Convert a string between encodings - * - * Parameter flags is given as-is to mutt_iconv_open(). - * See there for its meaning and usage policy. - */ -int mutt_convert_string(char **ps, const char *from, const char *to, int flags) -{ - iconv_t cd; - const char *repls[] = { "\357\277\275", "?", 0 }; - char *s = *ps; - - if (!s || !*s) - return 0; - - if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1) - { - size_t len; - const char *ib = NULL; - char *buf = NULL, *ob = NULL; - size_t ibl, obl; - const char **inrepls = NULL; - char *outrepl = NULL; - - if (mutt_cs_is_utf8(to)) - outrepl = "\357\277\275"; - else if (mutt_cs_is_utf8(from)) - inrepls = repls; - else - outrepl = "?"; - - len = strlen(s); - ib = s; - ibl = len + 1; - obl = MB_LEN_MAX * ibl; - ob = buf = mutt_mem_malloc(obl + 1); - - mutt_cs_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); - iconv_close(cd); - - *ob = '\0'; - - FREE(ps); - *ps = buf; - - mutt_str_adjust(ps); - return 0; - } - else - return -1; -} - -/** - * fgetconv_open - Prepare a file for charset conversion - * @param file FILE ptr to prepare - * @param from Current character set - * @param to Destination character set - * @param flags Flags, e.g. MUTT_ICONV_HOOK_FROM - * @retval ptr fgetconv handle - * - * Parameter flags is given as-is to mutt_iconv_open(). - */ -struct FgetConv *fgetconv_open(FILE *file, const char *from, const char *to, int flags) -{ - struct FgetConv *fc = NULL; - iconv_t cd = (iconv_t) -1; - static const char *repls[] = { "\357\277\275", "?", 0 }; - - if (from && to) - cd = mutt_iconv_open(to, from, flags); - - if (cd != (iconv_t) -1) - { - fc = mutt_mem_malloc(sizeof(struct FgetConv)); - fc->p = fc->ob = fc->bufo; - fc->ib = fc->bufi; - fc->ibl = 0; - fc->inrepls = mutt_cs_is_utf8(to) ? repls : repls + 1; - } - else - fc = mutt_mem_malloc(sizeof(struct FgetConvNot)); - fc->file = file; - fc->cd = cd; - return fc; -} - -bool mutt_check_charset(const char *s, bool strict) -{ - iconv_t cd; - - if (mutt_cs_is_utf8(s)) - return true; - - if (!strict) - for (int i = 0; PreferredMIMENames[i].key; i++) - { - if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) || - (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0)) - { - return true; - } - } - - cd = mutt_iconv_open(s, s, 0); - if (cd != (iconv_t)(-1)) - { - iconv_close(cd); - return true; - } - - return false; -} - -/** - * lookup_charset - Look for a preferred character set name - * @param type Type, e.g. #MUTT_LOOKUP_CHARSET - * @param cs Character set - * - * If the character set matches one of the regexes, - * then return the replacement name. - */ -static const char *lookup_charset(enum LookupType type, const char *cs) -{ - if (!cs) - return NULL; - - struct Lookup *l = NULL; - - TAILQ_FOREACH(l, &Lookups, entries) - { - if (l->type != type) - continue; - if (regexec(l->regex.regex, cs, 0, NULL, 0) == 0) - return l->replacement; - } - return NULL; -} - -/** - * mutt_cs_lookup_add - Add a new character set lookup - * @param type Type of character set, e.g. MUTT_LOOKUP_CHARSET - * @param pat Pattern to match - * @param replace Replacement string - * @param err Buffer for error message - * @retval true, lookup added to list - * @retval false, Regex string was invalid - * - * Add a regex for a character set and a replacement name. - */ -bool mutt_cs_lookup_add(enum LookupType type, const char *pat, - const char *replace, struct Buffer *err) -{ - if (!pat || !replace) - return false; - - regex_t *rx = mutt_mem_malloc(sizeof(regex_t)); - int rc = REGCOMP(rx, pat, REG_ICASE); - if (rc != 0) - { - regerror(rc, rx, err->data, err->dsize); - FREE(&rx); - return false; - } - - struct Lookup *l = mutt_mem_calloc(1, sizeof(struct Lookup)); - l->type = type; - l->replacement = mutt_str_strdup(replace); - l->regex.pattern = mutt_str_strdup(pat); - l->regex.regex = rx; - l->regex.not = false; - - TAILQ_INSERT_TAIL(&Lookups, l, entries); - - return true; -} - -/** - * mutt_cs_lookup_remove - Remove all the character set lookups - * - * Empty the list of replacement character set names. - */ -void mutt_cs_lookup_remove(void) -{ - struct Lookup *l = NULL; - struct Lookup *tmp = NULL; - - TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp) - { - TAILQ_REMOVE(&Lookups, l, entries); - FREE(&l->replacement); - FREE(&l->regex.pattern); - if (l->regex.regex) - regfree(l->regex.regex); - FREE(&l->regex); - FREE(&l); - } -} - -/** - * mutt_cs_charset_lookup - Look for a replacement character set - * @param chs Character set to lookup - * @retval ptr Replacement character set (if a 'charset-hook' matches) - * @retval NULL No matching hook - * - * Look through all the 'charset-hook's. - * If one matches return the replacement character set. - */ -const char *mutt_cs_charset_lookup(const char *chs) -{ - return lookup_charset(MUTT_LOOKUP_CHARSET, chs); -} - -/** - * mutt_cs_iconv_lookup - Look for a replacement character set - * @param chs Character set to lookup - * @retval ptr Replacement character set (if a 'iconv-hook' matches) - * @retval NULL No matching hook - * - * Look through all the 'iconv-hook's. - * If one matches return the replacement character set. - */ -const char *mutt_cs_iconv_lookup(const char *chs) -{ - return lookup_charset(MUTT_LOOKUP_ICONV, chs); -} diff --git a/mutt_charset.h b/mutt_charset.h deleted file mode 100644 index a940539ee..000000000 --- a/mutt_charset.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * @file - * Conversion between different character encodings - * - * @authors - * Copyright (C) 1999-2003 Thomas Roessler - * - * @copyright - * This program is free software: you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free Software - * Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#ifndef _MUTT_CHARSET2_H -#define _MUTT_CHARSET2_H - -#include -#include -#include - -/** - * enum LookupType - Types of character set lookups - */ -enum LookupType -{ - MUTT_LOOKUP_CHARSET, - MUTT_LOOKUP_ICONV -}; - -bool mutt_cs_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err); -void mutt_cs_lookup_remove(void); -const char * mutt_cs_charset_lookup(const char *chs); -const char * mutt_cs_iconv_lookup(const char *chs); - -int mutt_convert_string(char **ps, const char *from, const char *to, int flags); - -iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags); -struct FgetConv *fgetconv_open(FILE *file, const char *from, const char *to, int flags); - -/* flags for charset.c:mutt_convert_string(), fgetconv_open(), and - * mutt_iconv_open(). Note that applying charset-hooks to tocode is - * never needed, and sometimes hurts: Hence there is no MUTT_ICONV_HOOK_TO - * flag. - */ -#define MUTT_ICONV_HOOK_FROM 1 /* apply charset-hooks to fromcode */ - -bool mutt_check_charset(const char *s, bool strict); - -#endif /* _MUTT_CHARSET2_H */ diff --git a/mutt_idna.c b/mutt_idna.c index 4a56bf6e5..7b48cc62c 100644 --- a/mutt_idna.c +++ b/mutt_idna.c @@ -29,7 +29,6 @@ #include "address.h" #include "envelope.h" #include "globals.h" -#include "mutt_charset.h" #include "options.h" #ifdef HAVE_IDNA_H #include @@ -109,10 +108,10 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags) #endif /* HAVE_LIBIDN */ /* we don't want charset-hook effects, so we set flags to 0 */ - if (mutt_convert_string(&local_user, "utf-8", Charset, 0) == -1) + if (mutt_cs_convert_string(&local_user, "utf-8", Charset, 0) == -1) goto cleanup; - if (mutt_convert_string(&local_domain, "utf-8", Charset, 0) == -1) + if (mutt_cs_convert_string(&local_domain, "utf-8", Charset, 0) == -1) goto cleanup; /* @@ -123,7 +122,7 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags) { reversed_user = mutt_str_strdup(local_user); - if (mutt_convert_string(&reversed_user, Charset, "utf-8", 0) == -1) + if (mutt_cs_convert_string(&reversed_user, Charset, "utf-8", 0) == -1) { mutt_debug( 1, "Not reversible. Charset conv to utf-8 failed for user = '%s'.\n", reversed_user); @@ -139,7 +138,7 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags) reversed_domain = mutt_str_strdup(local_domain); - if (mutt_convert_string(&reversed_domain, Charset, "utf-8", 0) == -1) + if (mutt_cs_convert_string(&reversed_domain, Charset, "utf-8", 0) == -1) { mutt_debug( 1, @@ -196,10 +195,10 @@ char *mutt_idna_local_to_intl(char *user, char *domain) intl_domain = mutt_str_strdup(domain); /* we don't want charset-hook effects, so we set flags to 0 */ - if (mutt_convert_string(&intl_user, Charset, "utf-8", 0) == -1) + if (mutt_cs_convert_string(&intl_user, Charset, "utf-8", 0) == -1) goto cleanup; - if (mutt_convert_string(&intl_domain, Charset, "utf-8", 0) == -1) + if (mutt_cs_convert_string(&intl_domain, Charset, "utf-8", 0) == -1) goto cleanup; #ifdef HAVE_LIBIDN diff --git a/muttlib.c b/muttlib.c index 0267cc6f8..46f6ffcc2 100644 --- a/muttlib.c +++ b/muttlib.c @@ -52,7 +52,6 @@ #include "header.h" #include "mailbox.h" #include "mime.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mx.h" #include "ncrypt/ncrypt.h" @@ -1426,7 +1425,7 @@ const char *mutt_make_version(void) void mutt_encode_path(char *dest, size_t dlen, const char *src) { char *p = mutt_str_strdup(src); - int rc = mutt_convert_string(&p, Charset, "utf-8", 0); + int rc = mutt_cs_convert_string(&p, Charset, "utf-8", 0); /* `src' may be NULL, such as when called from the pop3 driver. */ mutt_str_strfcpy(dest, (rc == 0) ? NONULL(p) : NONULL(src), dlen); FREE(&p); diff --git a/ncrypt/crypt_gpgme.c b/ncrypt/crypt_gpgme.c index d06d122b6..937b96c2b 100644 --- a/ncrypt/crypt_gpgme.c +++ b/ncrypt/crypt_gpgme.c @@ -54,7 +54,6 @@ #include "header.h" #include "keymap.h" #include "mime.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mutt_menu.h" #include "ncrypt.h" @@ -180,7 +179,7 @@ static void print_utf8(FILE *fp, const char *buf, size_t len) /* fromcode "utf-8" is sure, so we don't want * charset-hook corrections: flags must be 0. */ - mutt_convert_string(&tstr, "utf-8", Charset, 0); + mutt_cs_convert_string(&tstr, "utf-8", Charset, 0); fputs(tstr, fp); FREE(&tstr); } @@ -2453,7 +2452,7 @@ static void copy_clearsigned(gpgme_data_t data, struct State *s, char *charset) * be a wrong label, so we want the ability to do corrections via * charset-hooks. Therefore we set flags to MUTT_ICONV_HOOK_FROM. */ - fc = fgetconv_open(fp, charset, Charset, MUTT_ICONV_HOOK_FROM); + fc = mutt_cs_fgetconv_open(fp, charset, Charset, MUTT_ICONV_HOOK_FROM); for (complete = true, armor_header = true; mutt_cs_fgetconvs(buf, sizeof(buf), fc) != NULL; complete = (strchr(buf, '\n') != NULL)) @@ -2678,7 +2677,7 @@ int pgp_gpgme_application_handler(struct Body *m, struct State *s) struct FgetConv *fc = NULL; int c; rewind(pgpout); - fc = fgetconv_open(pgpout, "utf-8", Charset, 0); + fc = mutt_cs_fgetconv_open(pgpout, "utf-8", Charset, 0); while ((c = mutt_cs_fgetconv(fc)) != EOF) { state_putc(c, s); diff --git a/ncrypt/gnupgparse.c b/ncrypt/gnupgparse.c index 01ec7c892..58ded9477 100644 --- a/ncrypt/gnupgparse.c +++ b/ncrypt/gnupgparse.c @@ -43,7 +43,6 @@ #include "filter.h" #include "globals.h" #include "mime.h" -#include "mutt_charset.h" #include "ncrypt.h" #include "options.h" #include "pgpinvoke.h" @@ -90,7 +89,7 @@ static void fix_uid(char *uid) } *d = '\0'; - if (chs && (cd = mutt_iconv_open(chs, "utf-8", 0)) != (iconv_t) -1) + if (chs && (cd = mutt_cs_iconv_open(chs, "utf-8", 0)) != (iconv_t) -1) { int n = s - uid + 1; /* chars available in original buffer */ char *buf = NULL; diff --git a/ncrypt/pgp.c b/ncrypt/pgp.c index 1497e940f..ebd09208e 100644 --- a/ncrypt/pgp.c +++ b/ncrypt/pgp.c @@ -51,7 +51,6 @@ #include "globals.h" #include "header.h" #include "mime.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "ncrypt.h" #include "options.h" @@ -286,7 +285,7 @@ static void pgp_copy_clearsigned(FILE *fpin, struct State *s, char *charset) * be a wrong label, so we want the ability to do corrections via * charset-hooks. Therefore we set flags to MUTT_ICONV_HOOK_FROM. */ - fc = fgetconv_open(fpin, charset, Charset, MUTT_ICONV_HOOK_FROM); + fc = mutt_cs_fgetconv_open(fpin, charset, Charset, MUTT_ICONV_HOOK_FROM); for (complete = true, armor_header = true; mutt_cs_fgetconvs(buf, sizeof(buf), fc) != NULL; complete = (strchr(buf, '\n') != NULL)) @@ -422,7 +421,7 @@ int pgp_application_pgp_handler(struct Body *m, struct State *s) l = mutt_str_strlen(gpgcharset); if ((l > 0) && (gpgcharset[l - 1] == '\n')) gpgcharset[l - 1] = 0; - if (!mutt_check_charset(gpgcharset, 0)) + if (!mutt_cs_check_charset(gpgcharset, 0)) mutt_str_replace(&gpgcharset, "UTF-8"); } } @@ -549,7 +548,7 @@ int pgp_application_pgp_handler(struct Body *m, struct State *s) rewind(pgpout); state_set_prefix(s); - fc = fgetconv_open(pgpout, expected_charset, Charset, MUTT_ICONV_HOOK_FROM); + fc = mutt_cs_fgetconv_open(pgpout, expected_charset, Charset, MUTT_ICONV_HOOK_FROM); while ((ch = mutt_cs_fgetconv(fc)) != EOF) state_prefix_putc(ch, s); mutt_cs_fgetconv_close(&fc); @@ -1554,7 +1553,7 @@ struct Body *pgp_traditional_encryptsign(struct Body *a, int flags, char *keylis send_charset = "utf-8"; /* fromcode is assumed to be correct: we set flags to 0 */ - fc = fgetconv_open(fp, from_charset, "utf-8", 0); + fc = mutt_cs_fgetconv_open(fp, from_charset, "utf-8", 0); while ((c = mutt_cs_fgetconv(fc)) != EOF) fputc(c, pgpin); diff --git a/parse.c b/parse.c index 92399dbd6..bf1c7f857 100644 --- a/parse.c +++ b/parse.c @@ -37,7 +37,6 @@ #include "header.h" #include "mailbox.h" #include "mime.h" -#include "mutt_charset.h" #include "ncrypt/ncrypt.h" #include "options.h" #include "parameter.h" diff --git a/rfc2047.c b/rfc2047.c index c880b8bd3..7a6756b99 100644 --- a/rfc2047.c +++ b/rfc2047.c @@ -32,7 +32,6 @@ #include "globals.h" #include "mbyte.h" #include "mime.h" -#include "mutt_charset.h" #include "options.h" #include "protos.h" @@ -61,7 +60,7 @@ static size_t convert_string(const char *f, size_t flen, const char *from, size_t obl, n; int e; - cd = mutt_iconv_open(to, from, 0); + cd = mutt_cs_iconv_open(to, from, 0); if (cd == (iconv_t)(-1)) return (size_t)(-1); obl = 4 * flen + 1; @@ -117,7 +116,7 @@ int convert_nonmime_string(char **ps) return 0; } } - mutt_convert_string(ps, (const char *) mutt_cs_get_default_charset(), Charset, + mutt_cs_convert_string(ps, (const char *) mutt_cs_get_default_charset(), Charset, MUTT_ICONV_HOOK_FROM); return -1; } @@ -277,7 +276,7 @@ static size_t try_block(const char *d, size_t dlen, const char *fromcode, if (fromcode) { - cd = mutt_iconv_open(tocode, fromcode, 0); + cd = mutt_cs_iconv_open(tocode, fromcode, 0); assert(cd != (iconv_t)(-1)); ib = d; ibl = dlen; @@ -357,7 +356,7 @@ static size_t encode_block(char *s, char *d, size_t dlen, const char *fromcode, if (fromcode) { - cd = mutt_iconv_open(tocode, fromcode, 0); + cd = mutt_cs_iconv_open(tocode, fromcode, 0); assert(cd != (iconv_t)(-1)); ib = d; ibl = dlen; @@ -738,7 +737,7 @@ static int rfc2047_decode_word(char *d, const char *s, size_t len) } if (charset) - mutt_convert_string(&d0, charset, Charset, MUTT_ICONV_HOOK_FROM); + mutt_cs_convert_string(&d0, charset, Charset, MUTT_ICONV_HOOK_FROM); mutt_filter_unprintable(&d0); mutt_str_strfcpy(d, d0, len); rc = 0; diff --git a/rfc2231.c b/rfc2231.c index 2aaf268fb..c5c20ac79 100644 --- a/rfc2231.c +++ b/rfc2231.c @@ -39,7 +39,6 @@ #include "globals.h" #include "mbyte.h" #include "mime.h" -#include "mutt_charset.h" #include "options.h" #include "parameter.h" #include "protos.h" @@ -203,7 +202,7 @@ static void rfc2231_join_continuations(struct Parameter **head, struct Rfc2231Pa } while (par && (strcmp(par->attribute, attribute) == 0)); if (encoded) - mutt_convert_string(&value, charset, Charset, MUTT_ICONV_HOOK_FROM); + mutt_cs_convert_string(&value, charset, Charset, MUTT_ICONV_HOOK_FROM); *head = mutt_param_new(); (*head)->attribute = mutt_str_strdup(attribute); (*head)->value = value; @@ -261,7 +260,7 @@ void rfc2231_decode_parameters(struct Parameter **headp) s = rfc2231_get_charset(p->value, charset, sizeof(charset)); rfc2231_decode_one(p->value, s); - mutt_convert_string(&p->value, charset, Charset, MUTT_ICONV_HOOK_FROM); + mutt_cs_convert_string(&p->value, charset, Charset, MUTT_ICONV_HOOK_FROM); mutt_filter_unprintable(&p->value); *last = p; diff --git a/sendlib.c b/sendlib.c index d9a9ebb96..bcca72fa6 100644 --- a/sendlib.c +++ b/sendlib.c @@ -53,7 +53,6 @@ #include "header.h" #include "mailbox.h" #include "mime.h" -#include "mutt_charset.h" #include "mutt_curses.h" #include "mutt_idna.h" #include "mx.h" @@ -475,10 +474,10 @@ int mutt_write_mime_body(struct Body *a, FILE *f) } if (a->type == TYPETEXT && (!a->noconv)) - fc = fgetconv_open(fpin, a->charset, + fc = mutt_cs_fgetconv_open(fpin, a->charset, mutt_get_body_charset(send_charset, sizeof(send_charset), a), 0); else - fc = fgetconv_open(fpin, 0, 0, 0); + fc = mutt_cs_fgetconv_open(fpin, 0, 0, 0); mutt_sig_allow_interrupt(1); if (a->encoding == ENCQUOTEDPRINTABLE) @@ -689,7 +688,7 @@ static size_t convert_file_to(FILE *file, const char *fromcode, int ncodes, struct ContentState *states = NULL; size_t *score = NULL; - cd1 = mutt_iconv_open("utf-8", fromcode, 0); + cd1 = mutt_cs_iconv_open("utf-8", fromcode, 0); if (cd1 == (iconv_t)(-1)) return -1; @@ -701,7 +700,7 @@ static size_t convert_file_to(FILE *file, const char *fromcode, int ncodes, for (int i = 0; i < ncodes; i++) { if (mutt_str_strcasecmp(tocodes[i], "utf-8") != 0) - cd[i] = mutt_iconv_open(tocodes[i], "utf-8", 0); + cd[i] = mutt_cs_iconv_open(tocodes[i], "utf-8", 0); else { /* Special case for conversion to UTF-8 */