move charset functions to libmutt

author Richard Russon <rich@flatcap.org>

Wed, 22 Nov 2017 17:26:52 +0000 (17:26 +0000)

committer Richard Russon <rich@flatcap.org>

Sun, 3 Dec 2017 23:39:31 +0000 (23:39 +0000)
author Richard Russon <rich@flatcap.org>
Wed, 22 Nov 2017 17:26:52 +0000 (17:26 +0000)
committer Richard Russon <rich@flatcap.org>
Sun, 3 Dec 2017 23:39:31 +0000 (23:39 +0000)
diff --git a/Makefile.am b/Makefile.am

index e0bdbb4feb8a52d4a24877032982031161975510..2055d5a1f9184e003928d27123aaa7a526a46917 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -43,7 +43,7 @@ BUILT_SOURCES = conststrings.c git_ver.h
  bin_PROGRAMS = neomutt
  
  neomutt_SOURCES = mutt_account.c addrbook.c address.h alias.c alias.h attach.c \
-       bcache.c body.c body.h browser.c buffy.c charset.c color.c \
+       bcache.c body.c body.h browser.c buffy.c mutt_charset.c color.c \
         commands.c complete.c compose.c compress.c content.h context.h copy.c \
         curs_lib.c curs_main.c edit.c editmsg.c enter.c enter_state.h \
         envelope.c envelope.h filter.c flags.c format_flags.h from.c group.c \
@@ -84,7 +84,7 @@ EXTRA_neomutt_SOURCES = browser.h mbyte.h mutt_idna.c mutt_idna.h \
         remailer.c remailer.h resize.c url.h
  
  EXTRA_DIST = mutt_account.h attach.h bcache.h browser.h buffy.h \
-       ChangeLog.md charset.h CODE_OF_CONDUCT.md compress.h copy.h \
+       ChangeLog.md mutt_charset.h CODE_OF_CONDUCT.md compress.h copy.h \
         COPYRIGHT filter.h functions.h globals.h \
         group.h history.h init.h keymap.h LICENSE.md mailbox.h \
         mbyte.h mime.h mutt.h mutt_commands.h \
diff --git a/Makefile.autosetup b/Makefile.autosetup

index 2d108c684b53d019c9cc8878ff195b6729c22b4b..c81a91a9e44c3e1bee3284c9cd2b59ea0d1a6a20 100644 (file)
--- a/Makefile.autosetup
+++ b/Makefile.autosetup
@@ -54,7 +54,7 @@ ALL_FILES!=   (cd $(SRCDIR) && git ls-tree -r --name-only HEAD 2>/dev/null) \
  # neomutt
  NEOMUTT=       neomutt$(EXEEXT)
  NEOMUTTOBJS=   mutt_account.o addrbook.o alias.o attach.o bcache.o body.o \
-               browser.o buffy.o charset.o color.o commands.o complete.o \
+               browser.o buffy.o mutt_charset.o color.o commands.o complete.o \
                 compose.o compress.o conststrings.o copy.o curs_lib.o \
                 curs_main.o edit.o editmsg.o enter.o envelope.o filter.o \
                 flags.o from.o group.o handler.o hdrline.o \
@@ -90,7 +90,7 @@ ALLOBJS+=     $(NEOMUTTOBJS)
  ###############################################################################
  # libmutt
  LIBMUTT=       libmutt.a
-LIBMUTTOBJS=   mutt/base64.o mutt/buffer.o mutt/date.o mutt/debug.o mutt/exit.o \
+LIBMUTTOBJS=   mutt/base64.o mutt/buffer.o mutt/charset.o mutt/date.o mutt/debug.o mutt/exit.o \
                 mutt/file.o mutt/hash.o mutt/list.o mutt/mapping.o mutt/mbyte.o mutt/md5.o \
                 mutt/memory.o mutt/message.o mutt/sha1.o mutt/signal.o mutt/string.o
  CLEANFILES+=   $(LIBMUTT) $(LIBMUTTOBJS)
diff --git a/alias.c b/alias.c

index cb333d4d7e3d759a1477c65c88a17f099a10c225..729ee0e68c24d82964b50195b97988e37abd839b 100644 (file)
--- a/alias.c
+++ b/alias.c
@@ -36,7 +36,7 @@
  #include "mutt.h"
  #include "address.h"
  #include "alias.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "envelope.h"
  #include "globals.h"
  #include "mutt_curses.h"
diff --git a/browser.c b/browser.c

index eb6f38486595403c9fba0789885e143a2d7314a8..45ad2c95fc72215474daaaa287c5b0d51100c992 100644 (file)
--- a/browser.c
+++ b/browser.c
@@ -46,7 +46,7 @@
  #include "body.h"
  #include "browser.h"
  #include "buffy.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "context.h"
  #include "format_flags.h"
  #include "globals.h"
diff --git a/globals.h b/globals.h

index 6506e6ae39be73d89265f590cafe7b8719ef839d..351472b9232fbb47fb385937c77903a88c70fe91 100644 (file)
--- a/globals.h
+++ b/globals.h
@@ -46,14 +46,12 @@ WHERE struct Address *From;
  
  WHERE char *AliasFile;
  WHERE char *AliasFormat;
-WHERE char *AssumedCharset;
  WHERE char *AttachSep;
  WHERE char *Attribution;
  WHERE char *AttributionLocale;
  WHERE char *AttachCharset;
  WHERE char *AttachFormat;
  WHERE struct Regex AttachKeyword;
-WHERE char *Charset;
  WHERE char *ComposeFormat;
  WHERE char *ConfigCharset;
  WHERE char *ContentType;
diff --git a/handler.c b/handler.c

index be410a6573fc5077c83324d4d06bf76b1a120eec..efda72699b38ef63a34cba040331bc5895fb1118 100644 (file)
--- a/handler.c
+++ b/handler.c
@@ -40,7 +40,7 @@
  #include "mutt/mutt.h"
  #include "mutt.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "copy.h"
  #include "filter.h"
  #include "globals.h"
diff --git a/hcache/hcache.c b/hcache/hcache.c

index aebbdae7db804b62bf710884b657f0984f26d282..c5bb942dc5d334a9eef9e5ccf689d55c1a76fbc2 100644 (file)
--- a/hcache/hcache.c
+++ b/hcache/hcache.c
@@ -51,7 +51,7 @@
  #include "address.h"
  #include "backend.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "envelope.h"
  #include "globals.h"
  #include "hcache.h"
diff --git a/history.c b/history.c

index 2875b38777485fef53e69aa9aaf12d89ab62193e..0f5921ac93a43007a8a1c927f2094bcccea38373 100644 (file)
--- a/history.c
+++ b/history.c
@@ -28,7 +28,7 @@
  #include <unistd.h>
  #include "mutt/mutt.h"
  #include "history.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "globals.h"
  #include "protos.h"
  
diff --git a/imap/utf7.c b/imap/utf7.c

index bb26cece88f23483a95944ca0199cf44362a3004..abfcaf1d8b5107e43d8628278123397dc8947863 100644 (file)
--- a/imap/utf7.c
+++ b/imap/utf7.c
@@ -35,7 +35,7 @@
  #include <string.h>
  #include "imap_private.h"
  #include "mutt/mutt.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "globals.h"
  
  // clang-format off
diff --git a/init.c b/init.c

index 532465214106fa8222e4f58faba1cf788336cf5c..e1a942226ee70ae98e758396fa43374d5fb5c090 100644 (file)
--- a/init.c
+++ b/init.c
@@ -41,7 +41,7 @@
  #include "init.h"
  #include "address.h"
  #include "alias.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "context.h"
  #include "envelope.h"
  #include "filter.h"
diff --git a/mbyte.c b/mbyte.c

index c02620ed3b13cff80cb6bc2ce6a18e5ca9a1e425..145b16bd9b68a442a00b17b7d2ad5aa920452557 100644 (file)
--- a/mbyte.c
+++ b/mbyte.c
@@ -29,7 +29,7 @@
  #include <wchar.h>
  #include "mutt/mutt.h"
  #include "mbyte.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "options.h"
  #include "protos.h"
  
diff --git a/mutt/Makefile.am b/mutt/Makefile.am

index b52068d370c123c54ee2872c9bd22fe8f63973e5..49f10184fd30d403d1eae7427ae426092ddc237d 100644 (file)
--- a/mutt/Makefile.am
+++ b/mutt/Makefile.am
@@ -3,11 +3,11 @@ include $(top_srcdir)/flymake.am
  
  AUTOMAKE_OPTIONS = 1.6 foreign
  
-EXTRA_DIST = mutt.h base64.h buffer.h date.h debug.h exit.h file.h hash.h list.h mapping.h mbyte.h md5.h memory.h message.h queue.h sha1.h signal2.h string2.h
+EXTRA_DIST = mutt.h base64.h buffer.h charset.h date.h debug.h exit.h file.h hash.h list.h mapping.h mbyte.h md5.h memory.h message.h queue.h sha1.h signal2.h string2.h
  
  AM_CPPFLAGS = -I$(top_srcdir)
  
  noinst_LIBRARIES = libmutt.a
  
-libmutt_a_SOURCES = base64.c buffer.c date.c debug.c exit.c file.c hash.c list.c mapping.c mbyte.c md5.c memory.c message.c sha1.c signal.c string.c
+libmutt_a_SOURCES = base64.c buffer.c charset.c date.c debug.c exit.c file.c hash.c list.c mapping.c mbyte.c md5.c memory.c message.c sha1.c signal.c string.c
  
diff --git a/charset.c b/mutt/charset.c

similarity index 73%

rename from charset.c

rename to mutt/charset.c

index 01b152157c20cd032a1126b97742a988983dd711..a1c27c2eb16666fb8b5763403dd479c46cb45027 100644 (file)
--- a/charset.c
+++ b/mutt/charset.c
@@ -23,20 +23,22 @@
  #include "config.h"
  #include <ctype.h>
  #include <errno.h>
+#include <iconv.h>
  #include <langinfo.h>
-#include <limits.h>
+#include <stdbool.h>
  #include <stdio.h>
  #include <string.h>
-#include "mutt/mutt.h"
-#include "mutt.h"
  #include "charset.h"
-#include "globals.h"
-#include "protos.h"
+#include "memory.h"
+#include "string2.h"
  
  #ifndef EILSEQ
  #define EILSEQ EINVAL
  #endif
  
+char *AssumedCharset;
+char *Charset;
+
  /*
   * The following list has been created manually from the data under:
   * http://www.isi.edu/in-notes/iana/assignments/character-sets
@@ -46,14 +48,8 @@
   * MIME name is given.
   */
  
-static const struct
-{
-  const char *key;
-  const char *pref;
-}
-
  // clang-format off
-PreferredMIMENames[] =
+const struct MimeNames PreferredMIMENames[] =
  {
    { "ansi_x3.4-1968",        "us-ascii"      },
    { "iso-ir-6",              "us-ascii"      },
@@ -205,18 +201,87 @@ PreferredMIMENames[] =
  };
  // clang-format on
  
-void mutt_set_langinfo_charset(void)
+void fgetconv_close(FGETCONV **_fc)
  {
-  char buf[LONG_STRING];
-  char buf2[LONG_STRING];
+  struct FgetConv *fc = (struct FgetConv *) *_fc;
  
-  mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf));
-  mutt_canonical_charset(buf2, sizeof(buf2), buf);
+  if (fc->cd != (iconv_t) -1)
+    iconv_close(fc->cd);
+  FREE(_fc);
+}
  
-  /* finally, set $charset */
-  Charset = mutt_str_strdup(buf2);
-  if (!Charset)
-    Charset = mutt_str_strdup("iso-8859-1");
+int fgetconv(FGETCONV *_fc)
+{
+  struct FgetConv *fc = (struct FgetConv *) _fc;
+
+  if (!fc)
+    return EOF;
+  if (fc->cd == (iconv_t) -1)
+    return fgetc(fc->file);
+  if (!fc->p)
+    return EOF;
+  if (fc->p < fc->ob)
+    return (unsigned char) *(fc->p)++;
+
+  /* Try to convert some more */
+  fc->p = fc->ob = fc->bufo;
+  if (fc->ibl)
+  {
+    size_t obl = sizeof(fc->bufo);
+    iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
+    if (fc->p < fc->ob)
+      return (unsigned char) *(fc->p)++;
+  }
+
+  /* If we trusted iconv a bit more, we would at this point
+   * ask why it had stopped converting ... */
+
+  /* Try to read some more */
+  if (fc->ibl == sizeof(fc->bufi) ||
+      (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi)))
+  {
+    fc->p = 0;
+    return EOF;
+  }
+  if (fc->ibl)
+    memcpy(fc->bufi, fc->ib, fc->ibl);
+  fc->ib = fc->bufi;
+  fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file);
+
+  /* Try harder this time to convert some */
+  if (fc->ibl)
+  {
+    size_t obl = sizeof(fc->bufo);
+    mutt_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0);
+    if (fc->p < fc->ob)
+      return (unsigned char) *(fc->p)++;
+  }
+
+  /* Either the file has finished or one of the buffers is too small */
+  fc->p = 0;
+  return EOF;
+}
+
+char *fgetconvs(char *buf, size_t l, FGETCONV *_fc)
+{
+  int c;
+  size_t r;
+
+  for (r = 0; r + 1 < l;)
+  {
+    c = fgetconv(_fc);
+    if (c == EOF)
+      break;
+    buf[r++] = (char) c;
+    if (c == '\n')
+      break;
+  }
+  buf[r] = '\0';
+
+  if (r)
+    return buf;
+  else
+    return NULL;
  }
  
  /**
@@ -312,53 +377,6 @@ char *mutt_get_default_charset(void)
    return strcpy(fcharset, "us-ascii");
  }
  
-/**
- * mutt_iconv_open - Set up iconv for conversions
- *
- * Like iconv_open, but canonicalises the charsets, applies charset-hooks,
- * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips
- * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers
- * should use flags=0 when fromcode can safely be considered true, either some
- * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be
- * used only when fromcode is unsure, taken from a possibly wrong incoming MIME
- * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions
- * in some setups. Note: By design charset-hooks should never be, and are never,
- * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM
- * acts on charset-hooks, not at all on iconv-hooks.
- */
-iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags)
-{
-  char tocode1[SHORT_STRING];
-  char fromcode1[SHORT_STRING];
-  char *tocode2 = NULL, *fromcode2 = NULL;
-  char *tmp = NULL;
-
-  iconv_t cd;
-
-  /* transform to MIME preferred charset names */
-  mutt_canonical_charset(tocode1, sizeof(tocode1), tocode);
-  mutt_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
-
-  /* maybe apply charset-hooks and recanonicalise fromcode,
-   * but only when caller asked us to sanitize a potentially wrong
-   * charset name incoming from the wild exterior. */
-  if ((flags & MUTT_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1)))
-    mutt_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
-
-  /* always apply iconv-hooks to suit system's iconv tastes */
-  tocode2 = mutt_iconv_hook(tocode1);
-  tocode2 = (tocode2) ? tocode2 : tocode1;
-  fromcode2 = mutt_iconv_hook(fromcode1);
-  fromcode2 = (fromcode2) ? fromcode2 : fromcode1;
-
-  /* call system iconv with names it appreciates */
-  cd = iconv_open(tocode2, fromcode2);
-  if (cd != (iconv_t) -1)
-    return cd;
-
-  return (iconv_t) -1;
-}
-
  /**
   * mutt_iconv - Change the encoding of a string
   *
@@ -436,224 +454,16 @@ size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **ou
    }
  }
  
-/**
- * mutt_convert_string - Convert a string between encodings
- *
- * Parameter flags is given as-is to mutt_iconv_open().
- * See there for its meaning and usage policy.
- */
-int mutt_convert_string(char **ps, const char *from, const char *to, int flags)
-{
-  iconv_t cd;
-  const char *repls[] = { "\357\277\275", "?", 0 };
-  char *s = *ps;
-
-  if (!s || !*s)
-    return 0;
-
-  if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1)
-  {
-    int len;
-    const char *ib = NULL;
-    char *buf = NULL, *ob = NULL;
-    size_t ibl, obl;
-    const char **inrepls = NULL;
-    char *outrepl = NULL;
-
-    if (mutt_is_utf8(to))
-      outrepl = "\357\277\275";
-    else if (mutt_is_utf8(from))
-      inrepls = repls;
-    else
-      outrepl = "?";
-
-    len = strlen(s);
-    ib = s;
-    ibl = len + 1;
-    obl = MB_LEN_MAX * ibl;
-    ob = buf = mutt_mem_malloc(obl + 1);
-
-    mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
-    iconv_close(cd);
-
-    *ob = '\0';
-
-    FREE(ps);
-    *ps = buf;
-
-    mutt_str_adjust(ps);
-    return 0;
-  }
-  else
-    return -1;
-}
-
-/*
- * FGETCONV stuff for converting a file while reading it.
- * Used in sendlib.c for converting from neomutt's Charset
- */
-
-/**
- * struct FgetConv - Cursor for converting a file's encoding
- */
-struct FgetConv
-{
-  FILE *file;
-  iconv_t cd;
-  char bufi[512];
-  char bufo[512];
-  char *p;
-  char *ob;
-  char *ib;
-  size_t ibl;
-  const char **inrepls;
-};
-
-/**
- * struct FgetConvNot - A dummy converter
- */
-struct FgetConvNot
-{
-  FILE *file;
-  iconv_t cd;
-};
-
-/**
- * fgetconv_open - Open a file and convert its encoding
- *
- * Parameter flags is given as-is to mutt_iconv_open().
- * See there for its meaning and usage policy.
- */
-FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags)
-{
-  struct FgetConv *fc = NULL;
-  iconv_t cd = (iconv_t) -1;
-  static const char *repls[] = { "\357\277\275", "?", 0 };
-
-  if (from && to)
-    cd = mutt_iconv_open(to, from, flags);
-
-  if (cd != (iconv_t) -1)
-  {
-    fc = mutt_mem_malloc(sizeof(struct FgetConv));
-    fc->p = fc->ob = fc->bufo;
-    fc->ib = fc->bufi;
-    fc->ibl = 0;
-    fc->inrepls = mutt_is_utf8(to) ? repls : repls + 1;
-  }
-  else
-    fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
-  fc->file = file;
-  fc->cd = cd;
-  return (FGETCONV *) fc;
-}
-
-char *fgetconvs(char *buf, size_t l, FGETCONV *_fc)
-{
-  int c;
-  size_t r;
-
-  for (r = 0; r + 1 < l;)
-  {
-    c = fgetconv(_fc);
-    if (c == EOF)
-      break;
-    buf[r++] = (char) c;
-    if (c == '\n')
-      break;
-  }
-  buf[r] = '\0';
-
-  if (r)
-    return buf;
-  else
-    return NULL;
-}
-
-int fgetconv(FGETCONV *_fc)
-{
-  struct FgetConv *fc = (struct FgetConv *) _fc;
-
-  if (!fc)
-    return EOF;
-  if (fc->cd == (iconv_t) -1)
-    return fgetc(fc->file);
-  if (!fc->p)
-    return EOF;
-  if (fc->p < fc->ob)
-    return (unsigned char) *(fc->p)++;
-
-  /* Try to convert some more */
-  fc->p = fc->ob = fc->bufo;
-  if (fc->ibl)
-  {
-    size_t obl = sizeof(fc->bufo);
-    iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
-    if (fc->p < fc->ob)
-      return (unsigned char) *(fc->p)++;
-  }
-
-  /* If we trusted iconv a bit more, we would at this point
-   * ask why it had stopped converting ... */
-
-  /* Try to read some more */
-  if (fc->ibl == sizeof(fc->bufi) ||
-      (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi)))
-  {
-    fc->p = 0;
-    return EOF;
-  }
-  if (fc->ibl)
-    memcpy(fc->bufi, fc->ib, fc->ibl);
-  fc->ib = fc->bufi;
-  fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file);
-
-  /* Try harder this time to convert some */
-  if (fc->ibl)
-  {
-    size_t obl = sizeof(fc->bufo);
-    mutt_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0);
-    if (fc->p < fc->ob)
-      return (unsigned char) *(fc->p)++;
-  }
-
-  /* Either the file has finished or one of the buffers is too small */
-  fc->p = 0;
-  return EOF;
-}
-
-void fgetconv_close(FGETCONV **_fc)
-{
-  struct FgetConv *fc = (struct FgetConv *) *_fc;
-
-  if (fc->cd != (iconv_t) -1)
-    iconv_close(fc->cd);
-  FREE(_fc);
-}
-
-bool mutt_check_charset(const char *s, bool strict)
+void mutt_set_langinfo_charset(void)
  {
-  iconv_t cd;
-
-  if (mutt_is_utf8(s))
-    return true;
-
-  if (!strict)
-    for (int i = 0; PreferredMIMENames[i].key; i++)
-    {
-      if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) ||
-          (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0))
-      {
-        return true;
-      }
-    }
+  char buf[LONG_STRING];
+  char buf2[LONG_STRING];
  
-  cd = mutt_iconv_open(s, s, 0);
-  if (cd != (iconv_t)(-1))
-  {
-    iconv_close(cd);
-    return true;
-  }
+  mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf));
+  mutt_canonical_charset(buf2, sizeof(buf2), buf);
  
-  return false;
+  /* finally, set $charset */
+  Charset = mutt_str_strdup(buf2);
+  if (!Charset)
+    Charset = mutt_str_strdup("iso-8859-1");
  }
diff --git a/mutt/charset.h b/mutt/charset.h

new file mode 100644 (file)

index 0000000..8162588
--- /dev/null
+++ b/mutt/charset.h
@@ -0,0 +1,79 @@
+/**
+ * @file
+ * Conversion between different character encodings
+ *
+ * @authors
+ * Copyright (C) 1999-2002,2007 Thomas Roessler <roessler@does-not-exist.org>
+ *
+ * @copyright
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _MUTT_CHARSET_H
+#define _MUTT_CHARSET_H
+
+#include <iconv.h>
+#include <stdio.h>
+
+extern char *AssumedCharset;
+extern char *Charset;
+
+typedef void *FGETCONV;
+
+/**
+ * struct FgetConv - Cursor for converting a file's encoding
+ */
+struct FgetConv
+{
+  FILE *file;
+  iconv_t cd;
+  char bufi[512];
+  char bufo[512];
+  char *p;
+  char *ob;
+  char *ib;
+  size_t ibl;
+  const char **inrepls;
+};
+
+/**
+ * struct FgetConvNot - A dummy converter
+ */
+struct FgetConvNot
+{
+  FILE *file;
+  iconv_t cd;
+};
+
+struct MimeNames
+{
+  const char *key;
+  const char *pref;
+};
+
+extern const struct MimeNames PreferredMIMENames[];
+
+char * fgetconvs(char *buf, size_t l, FGETCONV *_fc);
+char * mutt_get_default_charset(void);
+int    fgetconv(FGETCONV *_fc);
+int    mutt_chscmp(const char *s, const char *chs);
+size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl);
+void   fgetconv_close(FGETCONV **_fc);
+void   mutt_canonical_charset(char *dest, size_t dlen, const char *name);
+void   mutt_set_langinfo_charset(void);
+
+#define mutt_is_utf8(a)     mutt_chscmp(a, "utf-8")
+#define mutt_is_us_ascii(a) mutt_chscmp(a, "us-ascii")
+
+#endif 
diff --git a/mutt/mutt.h b/mutt/mutt.h

index 5125f52d0e8f2be33bbd280f4754d85a6ca43827..7da7f9cba9d50419d755289654cf37bc3ff52437 100644 (file)
--- a/mutt/mutt.h
+++ b/mutt/mutt.h
@@ -30,6 +30,7 @@
   *
   * -# @subpage base64
   * -# @subpage buffer
+ * -# @subpage charset
   * -# @subpage date
   * -# @subpage debug
   * -# @subpage exit
@@ -51,6 +52,7 @@
  
  #include "base64.h"
  #include "buffer.h"
+#include "charset.h"
  #include "date.h"
  #include "debug.h"
  #include "exit.h"
diff --git a/mutt_charset.c b/mutt_charset.c

new file mode 100644 (file)

index 0000000..3af22d2
--- /dev/null
+++ b/mutt_charset.c
@@ -0,0 +1,190 @@
+/**
+ * @file
+ * Conversion between different character encodings
+ *
+ * @authors
+ * Copyright (C) 1999-2002,2007 Thomas Roessler <roessler@does-not-exist.org>
+ *
+ * @copyright
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+#include <ctype.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include "mutt/mutt.h"
+#include "mutt.h"
+#include "mutt_charset.h"
+#include "globals.h"
+#include "protos.h"
+
+/**
+ * mutt_iconv_open - Set up iconv for conversions
+ *
+ * Like iconv_open, but canonicalises the charsets, applies charset-hooks,
+ * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips
+ * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers
+ * should use flags=0 when fromcode can safely be considered true, either some
+ * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be
+ * used only when fromcode is unsure, taken from a possibly wrong incoming MIME
+ * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions
+ * in some setups. Note: By design charset-hooks should never be, and are never,
+ * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM
+ * acts on charset-hooks, not at all on iconv-hooks.
+ */
+iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags)
+{
+  char tocode1[SHORT_STRING];
+  char fromcode1[SHORT_STRING];
+  char *tocode2 = NULL, *fromcode2 = NULL;
+  char *tmp = NULL;
+
+  iconv_t cd;
+
+  /* transform to MIME preferred charset names */
+  mutt_canonical_charset(tocode1, sizeof(tocode1), tocode);
+  mutt_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
+
+  /* maybe apply charset-hooks and recanonicalise fromcode,
+   * but only when caller asked us to sanitize a potentially wrong
+   * charset name incoming from the wild exterior. */
+  if ((flags & MUTT_ICONV_HOOK_FROM) && (tmp = mutt_charset_hook(fromcode1)))
+    mutt_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
+
+  /* always apply iconv-hooks to suit system's iconv tastes */
+  tocode2 = mutt_iconv_hook(tocode1);
+  tocode2 = (tocode2) ? tocode2 : tocode1;
+  fromcode2 = mutt_iconv_hook(fromcode1);
+  fromcode2 = (fromcode2) ? fromcode2 : fromcode1;
+
+  /* call system iconv with names it appreciates */
+  cd = iconv_open(tocode2, fromcode2);
+  if (cd != (iconv_t) -1)
+    return cd;
+
+  return (iconv_t) -1;
+}
+
+/**
+ * mutt_convert_string - Convert a string between encodings
+ *
+ * Parameter flags is given as-is to mutt_iconv_open().
+ * See there for its meaning and usage policy.
+ */
+int mutt_convert_string(char **ps, const char *from, const char *to, int flags)
+{
+  iconv_t cd;
+  const char *repls[] = { "\357\277\275", "?", 0 };
+  char *s = *ps;
+
+  if (!s || !*s)
+    return 0;
+
+  if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1)
+  {
+    int len;
+    const char *ib = NULL;
+    char *buf = NULL, *ob = NULL;
+    size_t ibl, obl;
+    const char **inrepls = NULL;
+    char *outrepl = NULL;
+
+    if (mutt_is_utf8(to))
+      outrepl = "\357\277\275";
+    else if (mutt_is_utf8(from))
+      inrepls = repls;
+    else
+      outrepl = "?";
+
+    len = strlen(s);
+    ib = s;
+    ibl = len + 1;
+    obl = MB_LEN_MAX * ibl;
+    ob = buf = mutt_mem_malloc(obl + 1);
+
+    mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
+    iconv_close(cd);
+
+    *ob = '\0';
+
+    FREE(ps);
+    *ps = buf;
+
+    mutt_str_adjust(ps);
+    return 0;
+  }
+  else
+    return -1;
+}
+
+/**
+ * fgetconv_open - Open a file and convert its encoding
+ *
+ * Parameter flags is given as-is to mutt_iconv_open().
+ * See there for its meaning and usage policy.
+ */
+FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags)
+{
+  struct FgetConv *fc = NULL;
+  iconv_t cd = (iconv_t) -1;
+  static const char *repls[] = { "\357\277\275", "?", 0 };
+
+  if (from && to)
+    cd = mutt_iconv_open(to, from, flags);
+
+  if (cd != (iconv_t) -1)
+  {
+    fc = mutt_mem_malloc(sizeof(struct FgetConv));
+    fc->p = fc->ob = fc->bufo;
+    fc->ib = fc->bufi;
+    fc->ibl = 0;
+    fc->inrepls = mutt_is_utf8(to) ? repls : repls + 1;
+  }
+  else
+    fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
+  fc->file = file;
+  fc->cd = cd;
+  return (FGETCONV *) fc;
+}
+
+bool mutt_check_charset(const char *s, bool strict)
+{
+  iconv_t cd;
+
+  if (mutt_is_utf8(s))
+    return true;
+
+  if (!strict)
+    for (int i = 0; PreferredMIMENames[i].key; i++)
+    {
+      if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) ||
+          (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0))
+      {
+        return true;
+      }
+    }
+
+  cd = mutt_iconv_open(s, s, 0);
+  if (cd != (iconv_t)(-1))
+  {
+    iconv_close(cd);
+    return true;
+  }
+
+  return false;
+}
diff --git a/charset.h b/mutt_charset.h

similarity index 68%

rename from charset.h

rename to mutt_charset.h

index 2576c1a479b035f9e7e6aa254eb4e1014ac8a0fb..72d1742d3d8678dcfacd229918973dbbabda1086 100644 (file)
--- a/charset.h
+++ b/mutt_charset.h
@@ -20,8 +20,8 @@
   * this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
-#ifndef _MUTT_CHARSET_H
-#define _MUTT_CHARSET_H
+#ifndef _MUTT_CHARSET2_H
+#define _MUTT_CHARSET2_H
  
  #include <iconv.h>
  #include <stdbool.h>
@@ -30,18 +30,7 @@
  int mutt_convert_string(char **ps, const char *from, const char *to, int flags);
  
  iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags);
-size_t mutt_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf,
-                  size_t *outbytesleft, const char **inrepls, const char *outrepl);
-
-typedef void *FGETCONV;
-
  FGETCONV *fgetconv_open(FILE *file, const char *from, const char *to, int flags);
-int fgetconv(FGETCONV *_fc);
-char *fgetconvs(char *buf, size_t l, FGETCONV *_fc);
-void fgetconv_close(FGETCONV **_fc);
-
-void mutt_set_langinfo_charset(void);
-char *mutt_get_default_charset(void);
  
  /* flags for charset.c:mutt_convert_string(), fgetconv_open(), and
   * mutt_iconv_open(). Note that applying charset-hooks to tocode is
@@ -50,10 +39,6 @@ char *mutt_get_default_charset(void);
   */
  #define MUTT_ICONV_HOOK_FROM 1 /* apply charset-hooks to fromcode */
  
-/* Check if given character set is valid (either officially assigned or
- * known to local iconv implementation). If strict is non-zero, check
- * against iconv only. Returns 0 if known and negative otherwise.
- */
  bool mutt_check_charset(const char *s, bool strict);
  
-#endif /* _MUTT_CHARSET_H */
+#endif /* _MUTT_CHARSET2_H */
diff --git a/mutt_idna.c b/mutt_idna.c

index 50c9a649b40e48c90a2f460a0414196729d74608..d9dbb6af87b72b1c927b235482ab4cfb1413e45c 100644 (file)
--- a/mutt_idna.c
+++ b/mutt_idna.c
@@ -27,7 +27,7 @@
  #include "mutt/mutt.h"
  #include "mutt_idna.h"
  #include "address.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "envelope.h"
  #include "globals.h"
  #include "options.h"
diff --git a/muttlib.c b/muttlib.c

index 8e4de056ec317fe1d49ec0125407c9347404981f..84292201a46410ba921979e4617c2f322e528e51 100644 (file)
--- a/muttlib.c
+++ b/muttlib.c
@@ -48,7 +48,7 @@
  #include "address.h"
  #include "alias.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "envelope.h"
  #include "filter.h"
  #include "format_flags.h"
diff --git a/ncrypt/crypt_gpgme.c b/ncrypt/crypt_gpgme.c

index 5a1cfa2f81c4be33480b0a0291424c59a653cd92..04ea35d421a5915fa483c93c6336dd169f12afe8 100644 (file)
--- a/ncrypt/crypt_gpgme.c
+++ b/ncrypt/crypt_gpgme.c
@@ -50,7 +50,7 @@
  #include "address.h"
  #include "alias.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "crypt.h"
  #include "envelope.h"
  #include "format_flags.h"
diff --git a/ncrypt/gnupgparse.c b/ncrypt/gnupgparse.c

index 7277d722439796be0fd750aa53c28bb3897e3859..ef7eba5bd43c97e8d15cc29dfcb61d9c123ac7f4 100644 (file)
--- a/ncrypt/gnupgparse.c
+++ b/ncrypt/gnupgparse.c
@@ -40,7 +40,7 @@
  #include <unistd.h>
  #include "mutt/mutt.h"
  #include "mutt.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "filter.h"
  #include "globals.h"
  #include "mime.h"
diff --git a/ncrypt/pgp.c b/ncrypt/pgp.c

index f7a1ca83fd34fc682dd319998117bc1479fb3ed9..dd68f2b7e2f1605944bb5205be4e79a01268155f 100644 (file)
--- a/ncrypt/pgp.c
+++ b/ncrypt/pgp.c
@@ -47,7 +47,7 @@
  #include "mutt.h"
  #include "address.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "crypt.h"
  #include "cryptglue.h"
  #include "filter.h"
diff --git a/parse.c b/parse.c

index 6121057408d587ac291708c3ab0c8d0be7c04006..f7d747107ea3978bb3d9139a535aee3d18656732 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -32,7 +32,7 @@
  #include "mutt.h"
  #include "address.h"
  #include "body.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "envelope.h"
  #include "globals.h"
  #include "header.h"
diff --git a/po/POTFILES.in b/po/POTFILES.in

index 84914a29b31c1a9e73ffcf06ca5dd71a10ae3647..b572eef0c5fbef0e8ed5341e7b053ac4d224e59f 100644 (file)
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -6,7 +6,6 @@ bcache.c
  body.c
  browser.c
  buffy.c
-charset.c
  color.c
  commands.c
  complete.c
@@ -67,6 +66,7 @@ menu.c
  mh.c
  mutt/base64.c
  mutt/buffer.c
+mutt/charset.c
  mutt/date.c
  mutt/debug.c
  mutt/exit.c
@@ -84,6 +84,7 @@ mutt/string.c
  muttlib.c
  mutt_account.c
  mutt_address.c
+mutt_charset.c
  mutt_idna.c
  mutt_lua.c
  mutt_notmuch.c
diff --git a/protos.h b/protos.h

index ce86da802ac18aa409272553415990e5c08a9439..b1a39cc8a1daf29d1dc885beb4797ad758c426d2 100644 (file)
--- a/protos.h
+++ b/protos.h
@@ -152,7 +152,6 @@ int mutt_body_handler(struct Body *b, struct State *s);
  int mutt_bounce_message(FILE *fp, struct Header *h, struct Address *to);
  void mutt_buffy(char *s, size_t slen);
  int mutt_buffy_list(void);
-void mutt_canonical_charset(char *dest, size_t dlen, const char *name);
  int mutt_count_body_parts(struct Context *ctx, struct Header *hdr);
  void mutt_check_rescore(struct Context *ctx);
  void mutt_clear_error(void);
@@ -268,9 +267,6 @@ int mutt_edit_attachment(struct Body *a);
  int mutt_edit_message(struct Context *ctx, struct Header *hdr);
  int mutt_view_message(struct Context *ctx, struct Header *hdr);
  int mutt_fetch_recips(struct Envelope *out, struct Envelope *in, int flags);
-int mutt_chscmp(const char *s, const char *chs);
-#define mutt_is_utf8(a) mutt_chscmp(a, "utf-8")
-#define mutt_is_us_ascii(a) mutt_chscmp(a, "us-ascii")
  int mutt_prepare_template(FILE *fp, struct Context *ctx, struct Header *newhdr, struct Header *hdr, short resend);
  int mutt_resend_message(FILE *fp, struct Context *ctx, struct Header *cur);
  int mutt_compose_to_sender(struct Header *hdr);
diff --git a/rfc2047.c b/rfc2047.c

index afc9f9a7450302c8a2fb8690607225df34aca7ec..5c4db41dbdb9321bebb7a03796490086703b2a32 100644 (file)
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -29,7 +29,7 @@
  #include "mutt/mutt.h"
  #include "rfc2047.h"
  #include "address.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "globals.h"
  #include "mbyte.h"
  #include "mime.h"
diff --git a/rfc2231.c b/rfc2231.c

index 6f8b445b76e07d1f13e285accc4d75281adcddca..378fa548926922a0bd502e5150ef92dacec80a05 100644 (file)
--- a/rfc2231.c
+++ b/rfc2231.c
@@ -36,7 +36,7 @@
  #include <string.h>
  #include "mutt/mutt.h"
  #include "rfc2231.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "globals.h"
  #include "mbyte.h"
  #include "mime.h"
diff --git a/safe_asprintf.c b/safe_asprintf.c

index 580f409849eddc3d4da037fe1ad0de8b0aeff9f5..90a47418d8fd94c53cc0f7e67af501494dbbccc8 100644 (file)
--- a/safe_asprintf.c
+++ b/safe_asprintf.c
@@ -22,6 +22,7 @@
  
  #include <stdarg.h>
  #include <stdio.h>
+#include <unistd.h>
  #include "mutt/mutt.h"
  
  /* NOTE: Currently there is no check in configure.ac for vasprintf(3).  the
diff --git a/sendlib.c b/sendlib.c

index 19df360cb741f16b73549808c3ff92f732c76a5a..c13ac7db4f47c91605475e0177aee262a4937c2d 100644 (file)
--- a/sendlib.c
+++ b/sendlib.c
@@ -43,7 +43,7 @@
  #include "address.h"
  #include "body.h"
  #include "buffy.h"
-#include "charset.h"
+#include "mutt_charset.h"
  #include "content.h"
  #include "context.h"
  #include "copy.h"
author	Richard Russon <rich@flatcap.org>
	Wed, 22 Nov 2017 17:26:52 +0000 (17:26 +0000)
committer	Richard Russon <rich@flatcap.org>
	Sun, 3 Dec 2017 23:39:31 +0000 (23:39 +0000)
Makefile.am		patch \| blob \| history
Makefile.autosetup		patch \| blob \| history
alias.c		patch \| blob \| history
browser.c		patch \| blob \| history
globals.h		patch \| blob \| history
handler.c		patch \| blob \| history
hcache/hcache.c		patch \| blob \| history
history.c		patch \| blob \| history
imap/utf7.c		patch \| blob \| history
init.c		patch \| blob \| history
mbyte.c		patch \| blob \| history
mutt/Makefile.am		patch \| blob \| history
mutt/charset.c	[moved from charset.c with 73% similarity]	patch \| blob \| history
mutt/charset.h	[new file with mode: 0644]	patch \| blob
mutt/mutt.h		patch \| blob \| history
mutt_charset.c	[new file with mode: 0644]	patch \| blob
mutt_charset.h	[moved from charset.h with 68% similarity]	patch \| blob \| history
mutt_idna.c		patch \| blob \| history
muttlib.c		patch \| blob \| history
ncrypt/crypt_gpgme.c		patch \| blob \| history
ncrypt/gnupgparse.c		patch \| blob \| history
ncrypt/pgp.c		patch \| blob \| history
parse.c		patch \| blob \| history
po/POTFILES.in		patch \| blob \| history
protos.h		patch \| blob \| history
rfc2047.c		patch \| blob \| history
rfc2231.c		patch \| blob \| history
safe_asprintf.c		patch \| blob \| history
sendlib.c		patch \| blob \| history