From c0464fbb0add0b9c5fc28b203fdd29c5e60e0e91 Mon Sep 17 00:00:00 2001 From: Richard Russon Date: Wed, 22 Nov 2017 17:26:52 +0000 Subject: [PATCH] move charset functions to libmutt --- Makefile.am | 4 +- Makefile.autosetup | 4 +- alias.c | 2 +- browser.c | 2 +- globals.h | 2 - handler.c | 2 +- hcache/hcache.c | 2 +- history.c | 2 +- imap/utf7.c | 2 +- init.c | 2 +- mbyte.c | 2 +- mutt/Makefile.am | 4 +- charset.c => mutt/charset.c | 380 +++++++++--------------------------- mutt/charset.h | 79 ++++++++ mutt/mutt.h | 2 + mutt_charset.c | 190 ++++++++++++++++++ charset.h => mutt_charset.h | 21 +- mutt_idna.c | 2 +- muttlib.c | 2 +- ncrypt/crypt_gpgme.c | 2 +- ncrypt/gnupgparse.c | 2 +- ncrypt/pgp.c | 2 +- parse.c | 2 +- po/POTFILES.in | 3 +- protos.h | 4 - rfc2047.c | 2 +- rfc2231.c | 2 +- safe_asprintf.c | 1 + sendlib.c | 2 +- 29 files changed, 395 insertions(+), 333 deletions(-) rename charset.c => mutt/charset.c (73%) create mode 100644 mutt/charset.h create mode 100644 mutt_charset.c rename charset.h => mutt_charset.h (68%) diff --git a/Makefile.am b/Makefile.am index e0bdbb4fe..2055d5a1f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,7 +43,7 @@ BUILT_SOURCES = conststrings.c git_ver.h bin_PROGRAMS = neomutt neomutt_SOURCES = mutt_account.c addrbook.c address.h alias.c alias.h attach.c \ - bcache.c body.c body.h browser.c buffy.c charset.c color.c \ + bcache.c body.c body.h browser.c buffy.c mutt_charset.c color.c \ commands.c complete.c compose.c compress.c content.h context.h copy.c \ curs_lib.c curs_main.c edit.c editmsg.c enter.c enter_state.h \ envelope.c envelope.h filter.c flags.c format_flags.h from.c group.c \ @@ -84,7 +84,7 @@ EXTRA_neomutt_SOURCES = browser.h mbyte.h mutt_idna.c mutt_idna.h \ remailer.c remailer.h resize.c url.h EXTRA_DIST = mutt_account.h attach.h bcache.h browser.h buffy.h \ - ChangeLog.md charset.h CODE_OF_CONDUCT.md compress.h copy.h \ + ChangeLog.md mutt_charset.h CODE_OF_CONDUCT.md compress.h copy.h \ COPYRIGHT filter.h functions.h globals.h \ group.h history.h init.h keymap.h LICENSE.md mailbox.h \ mbyte.h mime.h mutt.h mutt_commands.h \ diff --git a/Makefile.autosetup b/Makefile.autosetup index 2d108c684..c81a91a9e 100644 --- a/Makefile.autosetup +++ b/Makefile.autosetup @@ -54,7 +54,7 @@ ALL_FILES!= (cd $(SRCDIR) && git ls-tree -r --name-only HEAD 2>/dev/null) \ # neomutt NEOMUTT= neomutt$(EXEEXT) NEOMUTTOBJS= mutt_account.o addrbook.o alias.o attach.o bcache.o body.o \ - browser.o buffy.o charset.o color.o commands.o complete.o \ + browser.o buffy.o mutt_charset.o color.o commands.o complete.o \ compose.o compress.o conststrings.o copy.o curs_lib.o \ curs_main.o edit.o editmsg.o enter.o envelope.o filter.o \ flags.o from.o group.o handler.o hdrline.o \ @@ -90,7 +90,7 @@ ALLOBJS+= $(NEOMUTTOBJS) ############################################################################### # libmutt LIBMUTT= libmutt.a -LIBMUTTOBJS= mutt/base64.o mutt/buffer.o mutt/date.o mutt/debug.o mutt/exit.o \ +LIBMUTTOBJS= mutt/base64.o mutt/buffer.o mutt/charset.o mutt/date.o mutt/debug.o mutt/exit.o \ mutt/file.o mutt/hash.o mutt/list.o mutt/mapping.o mutt/mbyte.o mutt/md5.o \ mutt/memory.o mutt/message.o mutt/sha1.o mutt/signal.o mutt/string.o CLEANFILES+= $(LIBMUTT) $(LIBMUTTOBJS) diff --git a/alias.c b/alias.c index cb333d4d7..729ee0e68 100644 --- a/alias.c +++ b/alias.c @@ -36,7 +36,7 @@ #include "mutt.h" #include "address.h" #include "alias.h" -#include "charset.h" +#include "mutt_charset.h" #include "envelope.h" #include "globals.h" #include "mutt_curses.h" diff --git a/browser.c b/browser.c index eb6f38486..45ad2c95f 100644 --- a/browser.c +++ b/browser.c @@ -46,7 +46,7 @@ #include "body.h" #include "browser.h" #include "buffy.h" -#include "charset.h" +#include "mutt_charset.h" #include "context.h" #include "format_flags.h" #include "globals.h" diff --git a/globals.h b/globals.h index 6506e6ae3..351472b92 100644 --- a/globals.h +++ b/globals.h @@ -46,14 +46,12 @@ WHERE struct Address *From; WHERE char *AliasFile; WHERE char *AliasFormat; -WHERE char *AssumedCharset; WHERE char *AttachSep; WHERE char *Attribution; WHERE char *AttributionLocale; WHERE char *AttachCharset; WHERE char *AttachFormat; WHERE struct Regex AttachKeyword; -WHERE char *Charset; WHERE char *ComposeFormat; WHERE char *ConfigCharset; WHERE char *ContentType; diff --git a/handler.c b/handler.c index be410a657..efda72699 100644 --- a/handler.c +++ b/handler.c @@ -40,7 +40,7 @@ #include "mutt/mutt.h" #include "mutt.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "copy.h" #include "filter.h" #include "globals.h" diff --git a/hcache/hcache.c b/hcache/hcache.c index aebbdae7d..c5bb942dc 100644 --- a/hcache/hcache.c +++ b/hcache/hcache.c @@ -51,7 +51,7 @@ #include "address.h" #include "backend.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "envelope.h" #include "globals.h" #include "hcache.h" diff --git a/history.c b/history.c index 2875b3877..0f5921ac9 100644 --- a/history.c +++ b/history.c @@ -28,7 +28,7 @@ #include #include "mutt/mutt.h" #include "history.h" -#include "charset.h" +#include "mutt_charset.h" #include "globals.h" #include "protos.h" diff --git a/imap/utf7.c b/imap/utf7.c index bb26cece8..abfcaf1d8 100644 --- a/imap/utf7.c +++ b/imap/utf7.c @@ -35,7 +35,7 @@ #include #include "imap_private.h" #include "mutt/mutt.h" -#include "charset.h" +#include "mutt_charset.h" #include "globals.h" // clang-format off diff --git a/init.c b/init.c index 532465214..e1a942226 100644 --- a/init.c +++ b/init.c @@ -41,7 +41,7 @@ #include "init.h" #include "address.h" #include "alias.h" -#include "charset.h" +#include "mutt_charset.h" #include "context.h" #include "envelope.h" #include "filter.h" diff --git a/mbyte.c b/mbyte.c index c02620ed3..145b16bd9 100644 --- a/mbyte.c +++ b/mbyte.c @@ -29,7 +29,7 @@ #include #include "mutt/mutt.h" #include "mbyte.h" -#include "charset.h" +#include "mutt_charset.h" #include "options.h" #include "protos.h" diff --git a/mutt/Makefile.am b/mutt/Makefile.am index b52068d37..49f10184f 100644 --- a/mutt/Makefile.am +++ b/mutt/Makefile.am @@ -3,11 +3,11 @@ include $(top_srcdir)/flymake.am AUTOMAKE_OPTIONS = 1.6 foreign -EXTRA_DIST = mutt.h base64.h buffer.h date.h debug.h exit.h file.h hash.h list.h mapping.h mbyte.h md5.h memory.h message.h queue.h sha1.h signal2.h string2.h +EXTRA_DIST = mutt.h base64.h buffer.h charset.h date.h debug.h exit.h file.h hash.h list.h mapping.h mbyte.h md5.h memory.h message.h queue.h sha1.h signal2.h string2.h AM_CPPFLAGS = -I$(top_srcdir) noinst_LIBRARIES = libmutt.a -libmutt_a_SOURCES = base64.c buffer.c date.c debug.c exit.c file.c hash.c list.c mapping.c mbyte.c md5.c memory.c message.c sha1.c signal.c string.c +libmutt_a_SOURCES = base64.c buffer.c charset.c date.c debug.c exit.c file.c hash.c list.c mapping.c mbyte.c md5.c memory.c message.c sha1.c signal.c string.c diff --git a/charset.c b/mutt/charset.c similarity index 73% rename from charset.c rename to mutt/charset.c index 01b152157..a1c27c2eb 100644 --- a/charset.c +++ b/mutt/charset.c @@ -23,20 +23,22 @@ #include "config.h" #include #include +#include #include -#include +#include #include #include -#include "mutt/mutt.h" -#include "mutt.h" #include "charset.h" -#include "globals.h" -#include "protos.h" +#include "memory.h" +#include "string2.h" #ifndef EILSEQ #define EILSEQ EINVAL #endif +char *AssumedCharset; +char *Charset; + /* * The following list has been created manually from the data under: * http://www.isi.edu/in-notes/iana/assignments/character-sets @@ -46,14 +48,8 @@ * MIME name is given. */ -static const struct -{ - const char *key; - const char *pref; -} - // clang-format off -PreferredMIMENames[] = +const struct MimeNames PreferredMIMENames[] = { { "ansi_x3.4-1968", "us-ascii" }, { "iso-ir-6", "us-ascii" }, @@ -205,18 +201,87 @@ PreferredMIMENames[] = }; // clang-format on -void mutt_set_langinfo_charset(void) +void fgetconv_close(FGETCONV **_fc) { - char buf[LONG_STRING]; - char buf2[LONG_STRING]; + struct FgetConv *fc = (struct FgetConv *) *_fc; - mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf)); - mutt_canonical_charset(buf2, sizeof(buf2), buf); + if (fc->cd != (iconv_t) -1) + iconv_close(fc->cd); + FREE(_fc); +} - /* finally, set $charset */ - Charset = mutt_str_strdup(buf2); - if (!Charset) - Charset = mutt_str_strdup("iso-8859-1"); +int fgetconv(FGETCONV *_fc) +{ + struct FgetConv *fc = (struct FgetConv *) _fc; + + if (!fc) + return EOF; + if (fc->cd == (iconv_t) -1) + return fgetc(fc->file); + if (!fc->p) + return EOF; + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + + /* Try to convert some more */ + fc->p = fc->ob = fc->bufo; + if (fc->ibl) + { + size_t obl = sizeof(fc->bufo); + iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl); + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + } + + /* If we trusted iconv a bit more, we would at this point + * ask why it had stopped converting ... */ + + /* Try to read some more */ + if (fc->ibl == sizeof(fc->bufi) || + (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) + { + fc->p = 0; + return EOF; + } + if (fc->ibl) + memcpy(fc->bufi, fc->ib, fc->ibl); + fc->ib = fc->bufi; + fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file); + + /* Try harder this time to convert some */ + if (fc->ibl) + { + size_t obl = sizeof(fc->bufo); + mutt_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0); + if (fc->p < fc->ob) + return (unsigned char) *(fc->p)++; + } + + /* Either the file has finished or one of the buffers is too small */ + fc->p = 0; + return EOF; +} + +char *fgetconvs(char *buf, size_t l, FGETCONV *_fc) +{ + int c; + size_t r; + + for (r = 0; r + 1 < l;) + { + c = fgetconv(_fc); + if (c == EOF) + break; + buf[r++] = (char) c; + if (c == '\n') + break; + } + buf[r] = '\0'; + + if (r) + return buf; + else + return NULL; } /** @@ -312,53 +377,6 @@ char *mutt_get_default_charset(void) return strcpy(fcharset, "us-ascii"); } -/** - * mutt_iconv_open - Set up iconv for conversions - * - * Like iconv_open, but canonicalises the charsets, applies charset-hooks, - * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips - * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers - * should use flags=0 when fromcode can safely be considered true, either some - * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be - * used only when fromcode is unsure, taken from a possibly wrong incoming MIME - * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions - * in some setups. Note: By design charset-hooks should never be, and are never, - * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM - * acts on charset-hooks, not at all on iconv-hooks. - */ -iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) -{ - char tocode1[SHORT_STRING]; - char fromcode1[SHORT_STRING]; - char *tocode2 = NULL, *fromcode2 = NULL; - char *tmp = NULL; - - iconv_t cd; - - /* transform to MIME preferred charset names */ - mutt_canonical_charset(tocode1, sizeof(tocode1), tocode); - mutt_canonical_charset(fromcode1, sizeof(fromcode1), fromcode); - - /* maybe apply charset-hooks and recanonicalise fromcode, - * but only when caller asked us to sanitize a potentially wrong - * charset name incoming from the wild exterior. */ - if ((flags & MUTT_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1))) - mutt_canonical_charset(fromcode1, sizeof(fromcode1), tmp); - - /* always apply iconv-hooks to suit system's iconv tastes */ - tocode2 = mutt_iconv_hook(tocode1); - tocode2 = (tocode2) ? tocode2 : tocode1; - fromcode2 = mutt_iconv_hook(fromcode1); - fromcode2 = (fromcode2) ? fromcode2 : fromcode1; - - /* call system iconv with names it appreciates */ - cd = iconv_open(tocode2, fromcode2); - if (cd != (iconv_t) -1) - return cd; - - return (iconv_t) -1; -} - /** * mutt_iconv - Change the encoding of a string * @@ -436,224 +454,16 @@ size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **ou } } -/** - * mutt_convert_string - Convert a string between encodings - * - * Parameter flags is given as-is to mutt_iconv_open(). - * See there for its meaning and usage policy. - */ -int mutt_convert_string(char **ps, const char *from, const char *to, int flags) -{ - iconv_t cd; - const char *repls[] = { "\357\277\275", "?", 0 }; - char *s = *ps; - - if (!s || !*s) - return 0; - - if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1) - { - int len; - const char *ib = NULL; - char *buf = NULL, *ob = NULL; - size_t ibl, obl; - const char **inrepls = NULL; - char *outrepl = NULL; - - if (mutt_is_utf8(to)) - outrepl = "\357\277\275"; - else if (mutt_is_utf8(from)) - inrepls = repls; - else - outrepl = "?"; - - len = strlen(s); - ib = s; - ibl = len + 1; - obl = MB_LEN_MAX * ibl; - ob = buf = mutt_mem_malloc(obl + 1); - - mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); - iconv_close(cd); - - *ob = '\0'; - - FREE(ps); - *ps = buf; - - mutt_str_adjust(ps); - return 0; - } - else - return -1; -} - -/* - * FGETCONV stuff for converting a file while reading it. - * Used in sendlib.c for converting from neomutt's Charset - */ - -/** - * struct FgetConv - Cursor for converting a file's encoding - */ -struct FgetConv -{ - FILE *file; - iconv_t cd; - char bufi[512]; - char bufo[512]; - char *p; - char *ob; - char *ib; - size_t ibl; - const char **inrepls; -}; - -/** - * struct FgetConvNot - A dummy converter - */ -struct FgetConvNot -{ - FILE *file; - iconv_t cd; -}; - -/** - * fgetconv_open - Open a file and convert its encoding - * - * Parameter flags is given as-is to mutt_iconv_open(). - * See there for its meaning and usage policy. - */ -FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags) -{ - struct FgetConv *fc = NULL; - iconv_t cd = (iconv_t) -1; - static const char *repls[] = { "\357\277\275", "?", 0 }; - - if (from && to) - cd = mutt_iconv_open(to, from, flags); - - if (cd != (iconv_t) -1) - { - fc = mutt_mem_malloc(sizeof(struct FgetConv)); - fc->p = fc->ob = fc->bufo; - fc->ib = fc->bufi; - fc->ibl = 0; - fc->inrepls = mutt_is_utf8(to) ? repls : repls + 1; - } - else - fc = mutt_mem_malloc(sizeof(struct FgetConvNot)); - fc->file = file; - fc->cd = cd; - return (FGETCONV *) fc; -} - -char *fgetconvs(char *buf, size_t l, FGETCONV *_fc) -{ - int c; - size_t r; - - for (r = 0; r + 1 < l;) - { - c = fgetconv(_fc); - if (c == EOF) - break; - buf[r++] = (char) c; - if (c == '\n') - break; - } - buf[r] = '\0'; - - if (r) - return buf; - else - return NULL; -} - -int fgetconv(FGETCONV *_fc) -{ - struct FgetConv *fc = (struct FgetConv *) _fc; - - if (!fc) - return EOF; - if (fc->cd == (iconv_t) -1) - return fgetc(fc->file); - if (!fc->p) - return EOF; - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - - /* Try to convert some more */ - fc->p = fc->ob = fc->bufo; - if (fc->ibl) - { - size_t obl = sizeof(fc->bufo); - iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - } - - /* If we trusted iconv a bit more, we would at this point - * ask why it had stopped converting ... */ - - /* Try to read some more */ - if (fc->ibl == sizeof(fc->bufi) || - (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) - { - fc->p = 0; - return EOF; - } - if (fc->ibl) - memcpy(fc->bufi, fc->ib, fc->ibl); - fc->ib = fc->bufi; - fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file); - - /* Try harder this time to convert some */ - if (fc->ibl) - { - size_t obl = sizeof(fc->bufo); - mutt_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0); - if (fc->p < fc->ob) - return (unsigned char) *(fc->p)++; - } - - /* Either the file has finished or one of the buffers is too small */ - fc->p = 0; - return EOF; -} - -void fgetconv_close(FGETCONV **_fc) -{ - struct FgetConv *fc = (struct FgetConv *) *_fc; - - if (fc->cd != (iconv_t) -1) - iconv_close(fc->cd); - FREE(_fc); -} - -bool mutt_check_charset(const char *s, bool strict) +void mutt_set_langinfo_charset(void) { - iconv_t cd; - - if (mutt_is_utf8(s)) - return true; - - if (!strict) - for (int i = 0; PreferredMIMENames[i].key; i++) - { - if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) || - (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0)) - { - return true; - } - } + char buf[LONG_STRING]; + char buf2[LONG_STRING]; - cd = mutt_iconv_open(s, s, 0); - if (cd != (iconv_t)(-1)) - { - iconv_close(cd); - return true; - } + mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf)); + mutt_canonical_charset(buf2, sizeof(buf2), buf); - return false; + /* finally, set $charset */ + Charset = mutt_str_strdup(buf2); + if (!Charset) + Charset = mutt_str_strdup("iso-8859-1"); } diff --git a/mutt/charset.h b/mutt/charset.h new file mode 100644 index 000000000..816258880 --- /dev/null +++ b/mutt/charset.h @@ -0,0 +1,79 @@ +/** + * @file + * Conversion between different character encodings + * + * @authors + * Copyright (C) 1999-2002,2007 Thomas Roessler + * + * @copyright + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _MUTT_CHARSET_H +#define _MUTT_CHARSET_H + +#include +#include + +extern char *AssumedCharset; +extern char *Charset; + +typedef void *FGETCONV; + +/** + * struct FgetConv - Cursor for converting a file's encoding + */ +struct FgetConv +{ + FILE *file; + iconv_t cd; + char bufi[512]; + char bufo[512]; + char *p; + char *ob; + char *ib; + size_t ibl; + const char **inrepls; +}; + +/** + * struct FgetConvNot - A dummy converter + */ +struct FgetConvNot +{ + FILE *file; + iconv_t cd; +}; + +struct MimeNames +{ + const char *key; + const char *pref; +}; + +extern const struct MimeNames PreferredMIMENames[]; + +char * fgetconvs(char *buf, size_t l, FGETCONV *_fc); +char * mutt_get_default_charset(void); +int fgetconv(FGETCONV *_fc); +int mutt_chscmp(const char *s, const char *chs); +size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl); +void fgetconv_close(FGETCONV **_fc); +void mutt_canonical_charset(char *dest, size_t dlen, const char *name); +void mutt_set_langinfo_charset(void); + +#define mutt_is_utf8(a) mutt_chscmp(a, "utf-8") +#define mutt_is_us_ascii(a) mutt_chscmp(a, "us-ascii") + +#endif diff --git a/mutt/mutt.h b/mutt/mutt.h index 5125f52d0..7da7f9cba 100644 --- a/mutt/mutt.h +++ b/mutt/mutt.h @@ -30,6 +30,7 @@ * * -# @subpage base64 * -# @subpage buffer + * -# @subpage charset * -# @subpage date * -# @subpage debug * -# @subpage exit @@ -51,6 +52,7 @@ #include "base64.h" #include "buffer.h" +#include "charset.h" #include "date.h" #include "debug.h" #include "exit.h" diff --git a/mutt_charset.c b/mutt_charset.c new file mode 100644 index 000000000..3af22d2f6 --- /dev/null +++ b/mutt_charset.c @@ -0,0 +1,190 @@ +/** + * @file + * Conversion between different character encodings + * + * @authors + * Copyright (C) 1999-2002,2007 Thomas Roessler + * + * @copyright + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "config.h" +#include +#include +#include +#include +#include +#include +#include "mutt/mutt.h" +#include "mutt.h" +#include "mutt_charset.h" +#include "globals.h" +#include "protos.h" + +/** + * mutt_iconv_open - Set up iconv for conversions + * + * Like iconv_open, but canonicalises the charsets, applies charset-hooks, + * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips + * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers + * should use flags=0 when fromcode can safely be considered true, either some + * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be + * used only when fromcode is unsure, taken from a possibly wrong incoming MIME + * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions + * in some setups. Note: By design charset-hooks should never be, and are never, + * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM + * acts on charset-hooks, not at all on iconv-hooks. + */ +iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags) +{ + char tocode1[SHORT_STRING]; + char fromcode1[SHORT_STRING]; + char *tocode2 = NULL, *fromcode2 = NULL; + char *tmp = NULL; + + iconv_t cd; + + /* transform to MIME preferred charset names */ + mutt_canonical_charset(tocode1, sizeof(tocode1), tocode); + mutt_canonical_charset(fromcode1, sizeof(fromcode1), fromcode); + + /* maybe apply charset-hooks and recanonicalise fromcode, + * but only when caller asked us to sanitize a potentially wrong + * charset name incoming from the wild exterior. */ + if ((flags & MUTT_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1))) + mutt_canonical_charset(fromcode1, sizeof(fromcode1), tmp); + + /* always apply iconv-hooks to suit system's iconv tastes */ + tocode2 = mutt_iconv_hook(tocode1); + tocode2 = (tocode2) ? tocode2 : tocode1; + fromcode2 = mutt_iconv_hook(fromcode1); + fromcode2 = (fromcode2) ? fromcode2 : fromcode1; + + /* call system iconv with names it appreciates */ + cd = iconv_open(tocode2, fromcode2); + if (cd != (iconv_t) -1) + return cd; + + return (iconv_t) -1; +} + +/** + * mutt_convert_string - Convert a string between encodings + * + * Parameter flags is given as-is to mutt_iconv_open(). + * See there for its meaning and usage policy. + */ +int mutt_convert_string(char **ps, const char *from, const char *to, int flags) +{ + iconv_t cd; + const char *repls[] = { "\357\277\275", "?", 0 }; + char *s = *ps; + + if (!s || !*s) + return 0; + + if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1) + { + int len; + const char *ib = NULL; + char *buf = NULL, *ob = NULL; + size_t ibl, obl; + const char **inrepls = NULL; + char *outrepl = NULL; + + if (mutt_is_utf8(to)) + outrepl = "\357\277\275"; + else if (mutt_is_utf8(from)) + inrepls = repls; + else + outrepl = "?"; + + len = strlen(s); + ib = s; + ibl = len + 1; + obl = MB_LEN_MAX * ibl; + ob = buf = mutt_mem_malloc(obl + 1); + + mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl); + iconv_close(cd); + + *ob = '\0'; + + FREE(ps); + *ps = buf; + + mutt_str_adjust(ps); + return 0; + } + else + return -1; +} + +/** + * fgetconv_open - Open a file and convert its encoding + * + * Parameter flags is given as-is to mutt_iconv_open(). + * See there for its meaning and usage policy. + */ +FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags) +{ + struct FgetConv *fc = NULL; + iconv_t cd = (iconv_t) -1; + static const char *repls[] = { "\357\277\275", "?", 0 }; + + if (from && to) + cd = mutt_iconv_open(to, from, flags); + + if (cd != (iconv_t) -1) + { + fc = mutt_mem_malloc(sizeof(struct FgetConv)); + fc->p = fc->ob = fc->bufo; + fc->ib = fc->bufi; + fc->ibl = 0; + fc->inrepls = mutt_is_utf8(to) ? repls : repls + 1; + } + else + fc = mutt_mem_malloc(sizeof(struct FgetConvNot)); + fc->file = file; + fc->cd = cd; + return (FGETCONV *) fc; +} + +bool mutt_check_charset(const char *s, bool strict) +{ + iconv_t cd; + + if (mutt_is_utf8(s)) + return true; + + if (!strict) + for (int i = 0; PreferredMIMENames[i].key; i++) + { + if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) || + (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0)) + { + return true; + } + } + + cd = mutt_iconv_open(s, s, 0); + if (cd != (iconv_t)(-1)) + { + iconv_close(cd); + return true; + } + + return false; +} diff --git a/charset.h b/mutt_charset.h similarity index 68% rename from charset.h rename to mutt_charset.h index 2576c1a47..72d1742d3 100644 --- a/charset.h +++ b/mutt_charset.h @@ -20,8 +20,8 @@ * this program. If not, see . */ -#ifndef _MUTT_CHARSET_H -#define _MUTT_CHARSET_H +#ifndef _MUTT_CHARSET2_H +#define _MUTT_CHARSET2_H #include #include @@ -30,18 +30,7 @@ int mutt_convert_string(char **ps, const char *from, const char *to, int flags); iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags); -size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, - size_t *outbytesleft, const char **inrepls, const char *outrepl); - -typedef void *FGETCONV; - FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags); -int fgetconv(FGETCONV *_fc); -char *fgetconvs(char *buf, size_t l, FGETCONV *_fc); -void fgetconv_close(FGETCONV **_fc); - -void mutt_set_langinfo_charset(void); -char *mutt_get_default_charset(void); /* flags for charset.c:mutt_convert_string(), fgetconv_open(), and * mutt_iconv_open(). Note that applying charset-hooks to tocode is @@ -50,10 +39,6 @@ char *mutt_get_default_charset(void); */ #define MUTT_ICONV_HOOK_FROM 1 /* apply charset-hooks to fromcode */ -/* Check if given character set is valid (either officially assigned or - * known to local iconv implementation). If strict is non-zero, check - * against iconv only. Returns 0 if known and negative otherwise. - */ bool mutt_check_charset(const char *s, bool strict); -#endif /* _MUTT_CHARSET_H */ +#endif /* _MUTT_CHARSET2_H */ diff --git a/mutt_idna.c b/mutt_idna.c index 50c9a649b..d9dbb6af8 100644 --- a/mutt_idna.c +++ b/mutt_idna.c @@ -27,7 +27,7 @@ #include "mutt/mutt.h" #include "mutt_idna.h" #include "address.h" -#include "charset.h" +#include "mutt_charset.h" #include "envelope.h" #include "globals.h" #include "options.h" diff --git a/muttlib.c b/muttlib.c index 8e4de056e..84292201a 100644 --- a/muttlib.c +++ b/muttlib.c @@ -48,7 +48,7 @@ #include "address.h" #include "alias.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "envelope.h" #include "filter.h" #include "format_flags.h" diff --git a/ncrypt/crypt_gpgme.c b/ncrypt/crypt_gpgme.c index 5a1cfa2f8..04ea35d42 100644 --- a/ncrypt/crypt_gpgme.c +++ b/ncrypt/crypt_gpgme.c @@ -50,7 +50,7 @@ #include "address.h" #include "alias.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "crypt.h" #include "envelope.h" #include "format_flags.h" diff --git a/ncrypt/gnupgparse.c b/ncrypt/gnupgparse.c index 7277d7224..ef7eba5bd 100644 --- a/ncrypt/gnupgparse.c +++ b/ncrypt/gnupgparse.c @@ -40,7 +40,7 @@ #include #include "mutt/mutt.h" #include "mutt.h" -#include "charset.h" +#include "mutt_charset.h" #include "filter.h" #include "globals.h" #include "mime.h" diff --git a/ncrypt/pgp.c b/ncrypt/pgp.c index f7a1ca83f..dd68f2b7e 100644 --- a/ncrypt/pgp.c +++ b/ncrypt/pgp.c @@ -47,7 +47,7 @@ #include "mutt.h" #include "address.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "crypt.h" #include "cryptglue.h" #include "filter.h" diff --git a/parse.c b/parse.c index 612105740..f7d747107 100644 --- a/parse.c +++ b/parse.c @@ -32,7 +32,7 @@ #include "mutt.h" #include "address.h" #include "body.h" -#include "charset.h" +#include "mutt_charset.h" #include "envelope.h" #include "globals.h" #include "header.h" diff --git a/po/POTFILES.in b/po/POTFILES.in index 84914a29b..b572eef0c 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -6,7 +6,6 @@ bcache.c body.c browser.c buffy.c -charset.c color.c commands.c complete.c @@ -67,6 +66,7 @@ menu.c mh.c mutt/base64.c mutt/buffer.c +mutt/charset.c mutt/date.c mutt/debug.c mutt/exit.c @@ -84,6 +84,7 @@ mutt/string.c muttlib.c mutt_account.c mutt_address.c +mutt_charset.c mutt_idna.c mutt_lua.c mutt_notmuch.c diff --git a/protos.h b/protos.h index ce86da802..b1a39cc8a 100644 --- a/protos.h +++ b/protos.h @@ -152,7 +152,6 @@ int mutt_body_handler(struct Body *b, struct State *s); int mutt_bounce_message(FILE *fp, struct Header *h, struct Address *to); void mutt_buffy(char *s, size_t slen); int mutt_buffy_list(void); -void mutt_canonical_charset(char *dest, size_t dlen, const char *name); int mutt_count_body_parts(struct Context *ctx, struct Header *hdr); void mutt_check_rescore(struct Context *ctx); void mutt_clear_error(void); @@ -268,9 +267,6 @@ int mutt_edit_attachment(struct Body *a); int mutt_edit_message(struct Context *ctx, struct Header *hdr); int mutt_view_message(struct Context *ctx, struct Header *hdr); int mutt_fetch_recips(struct Envelope *out, struct Envelope *in, int flags); -int mutt_chscmp(const char *s, const char *chs); -#define mutt_is_utf8(a) mutt_chscmp(a, "utf-8") -#define mutt_is_us_ascii(a) mutt_chscmp(a, "us-ascii") int mutt_prepare_template(FILE *fp, struct Context *ctx, struct Header *newhdr, struct Header *hdr, short resend); int mutt_resend_message(FILE *fp, struct Context *ctx, struct Header *cur); int mutt_compose_to_sender(struct Header *hdr); diff --git a/rfc2047.c b/rfc2047.c index afc9f9a74..5c4db41db 100644 --- a/rfc2047.c +++ b/rfc2047.c @@ -29,7 +29,7 @@ #include "mutt/mutt.h" #include "rfc2047.h" #include "address.h" -#include "charset.h" +#include "mutt_charset.h" #include "globals.h" #include "mbyte.h" #include "mime.h" diff --git a/rfc2231.c b/rfc2231.c index 6f8b445b7..378fa5489 100644 --- a/rfc2231.c +++ b/rfc2231.c @@ -36,7 +36,7 @@ #include #include "mutt/mutt.h" #include "rfc2231.h" -#include "charset.h" +#include "mutt_charset.h" #include "globals.h" #include "mbyte.h" #include "mime.h" diff --git a/safe_asprintf.c b/safe_asprintf.c index 580f40984..90a47418d 100644 --- a/safe_asprintf.c +++ b/safe_asprintf.c @@ -22,6 +22,7 @@ #include #include +#include #include "mutt/mutt.h" /* NOTE: Currently there is no check in configure.ac for vasprintf(3). the diff --git a/sendlib.c b/sendlib.c index 19df360cb..c13ac7db4 100644 --- a/sendlib.c +++ b/sendlib.c @@ -43,7 +43,7 @@ #include "address.h" #include "body.h" #include "buffy.h" -#include "charset.h" +#include "mutt_charset.h" #include "content.h" #include "context.h" #include "copy.h" -- 2.40.0