move charset functions to muttlib

author Richard Russon <rich@flatcap.org>

Sat, 30 Dec 2017 16:05:25 +0000 (16:05 +0000)

committer Richard Russon <rich@flatcap.org>

Sun, 31 Dec 2017 00:40:16 +0000 (00:40 +0000)
author Richard Russon <rich@flatcap.org>
Sat, 30 Dec 2017 16:05:25 +0000 (16:05 +0000)
committer Richard Russon <rich@flatcap.org>
Sun, 31 Dec 2017 00:40:16 +0000 (00:40 +0000)
diff --git a/Makefile.autosetup b/Makefile.autosetup

index 89b2295594ab7e77136341ef8c0ff151faaa8265..6089f957b824f8fd82b28134bfccfd47b1e08fd7 100644 (file)
--- a/Makefile.autosetup
+++ b/Makefile.autosetup
@@ -56,7 +56,7 @@ ALL_FILES!=   (cd $(SRCDIR) && git ls-tree -r --name-only HEAD 2>/dev/null) \
  # neomutt
  NEOMUTT=       neomutt$(EXEEXT)
  NEOMUTTOBJS=   mutt_account.o addrbook.o alias.o attach.o bcache.o body.o \
-               browser.o buffy.o mutt_charset.o color.o commands.o complete.o \
+               browser.o buffy.o color.o commands.o complete.o \
                 compose.o compress.o conststrings.o copy.o curs_lib.o \
                 curs_main.o edit.o editmsg.o enter.o envelope.o filter.o \
                 flags.o from.o group.o handler.o hdrline.o \
diff --git a/alias.c b/alias.c

index f6ee769e0d51d93ffbac92fba7e00db8f6896237..7f98a290b1c0fb39c40f09c4de179780ec036318 100644 (file)
--- a/alias.c
+++ b/alias.c
@@ -35,7 +35,6 @@
  #include "address.h"
  #include "envelope.h"
  #include "globals.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mutt_idna.h"
  #include "options.h"
@@ -230,7 +229,7 @@ static void recode_buf(char *buf, size_t buflen)
    s = mutt_str_strdup(buf);
    if (!s)
      return;
-  if (mutt_convert_string(&s, Charset, ConfigCharset, 0) == 0)
+  if (mutt_cs_convert_string(&s, Charset, ConfigCharset, 0) == 0)
      mutt_str_strfcpy(buf, s, buflen);
    FREE(&s);
  }
diff --git a/browser.c b/browser.c

index 6cea7ccc1fd4bbdd91a4f3741846585270b42ed9..9d1de391c046c4584a2bfda294edecdefc00a532 100644 (file)
--- a/browser.c
+++ b/browser.c
@@ -50,7 +50,6 @@
  #include "mailbox.h"
  #include "mbyte.h"
  #include "mutt_account.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mutt_menu.h"
  #include "mx.h"
@@ -577,7 +576,7 @@ static const char *group_index_format_str(char *buf, size_t buflen, size_t col,
        {
          char *desc = mutt_str_strdup(folder->ff->nd->desc);
          if (NewsgroupsCharset && *NewsgroupsCharset)
-          mutt_convert_string(&desc, NewsgroupsCharset, Charset, MUTT_ICONV_HOOK_FROM);
+          mutt_cs_convert_string(&desc, NewsgroupsCharset, Charset, MUTT_ICONV_HOOK_FROM);
          mutt_filter_unprintable(&desc);
  
          snprintf(fmt, sizeof(fmt), "%%%ss", prec);
diff --git a/handler.c b/handler.c

index 8b93f31cf0fd67b9c9dad4213c35bd6840059cd1..b20fd7098719413f51f2745a2e3508e2e0dd4fe9 100644 (file)
--- a/handler.c
+++ b/handler.c
@@ -42,7 +42,6 @@
  #include "globals.h"
  #include "keymap.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "ncrypt/ncrypt.h"
  #include "opcodes.h"
@@ -1591,10 +1590,10 @@ void mutt_decode_attachment(struct Body *b, struct State *s)
      if (!charset && AssumedCharset && *AssumedCharset)
        charset = mutt_cs_get_default_charset();
      if (charset && Charset)
-      cd = mutt_iconv_open(Charset, charset, MUTT_ICONV_HOOK_FROM);
+      cd = mutt_cs_iconv_open(Charset, charset, MUTT_ICONV_HOOK_FROM);
    }
    else if (istext && b->charset)
-    cd = mutt_iconv_open(Charset, b->charset, MUTT_ICONV_HOOK_FROM);
+    cd = mutt_cs_iconv_open(Charset, b->charset, MUTT_ICONV_HOOK_FROM);
  
    fseeko(s->fpin, b->offset, SEEK_SET);
    switch (b->encoding)
diff --git a/hcache/hcache.c b/hcache/hcache.c

index db8aa97c51e6a9afd02cd5a11e20b2f86227d102..02c348d24edb900b18f589281a17a9d65749ae95 100644 (file)
--- a/hcache/hcache.c
+++ b/hcache/hcache.c
@@ -57,7 +57,6 @@
  #include "hcache/hcversion.h"
  #include "header.h"
  #include "mbyte.h"
-#include "mutt_charset.h"
  #include "parameter.h"
  #include "protos.h"
  #include "tags.h"
@@ -181,7 +180,7 @@ static unsigned char *dump_char_size(char *c, unsigned char *d, int *off,
    if (convert && !mutt_str_is_ascii(c, size))
    {
      p = mutt_str_substr_dup(c, c + size);
-    if (mutt_convert_string(&p, Charset, "utf-8", 0) == 0)
+    if (mutt_cs_convert_string(&p, Charset, "utf-8", 0) == 0)
      {
        c = p;
        size = mutt_str_strlen(c) + 1;
@@ -220,7 +219,7 @@ static void restore_char(char **c, const unsigned char *d, int *off, bool conver
    if (convert && !mutt_str_is_ascii(*c, size))
    {
      char *tmp = mutt_str_strdup(*c);
-    if (mutt_convert_string(&tmp, "utf-8", Charset, 0) == 0)
+    if (mutt_cs_convert_string(&tmp, "utf-8", Charset, 0) == 0)
      {
        mutt_str_replace(c, tmp);
      }
diff --git a/hcache/hcache.h b/hcache/hcache.h

index 3b4388c38cd12db5d84e714c764cf7d8aef0315c..3a577c6866f74740718fd29b22800468bb098f0c 100644 (file)
--- a/hcache/hcache.h
+++ b/hcache/hcache.h
@@ -41,7 +41,7 @@
   *   * SpamList
   *   * NoSpamList
   * - Neomutt functions
- *   * mutt_convert_string()
+ *   * mutt_cs_convert_string()
   *   * mutt_encode_path()
   *   * mutt_new_body()
   *   * mutt_env_new()
diff --git a/history.c b/history.c

index eda49e944d44d20eea99ee94348c4e98df035d1a..44927407a24382de5ec72f7bc8d5bc4a481ae7cd 100644 (file)
--- a/history.c
+++ b/history.c
@@ -29,7 +29,6 @@
  #include "mutt/mutt.h"
  #include "history.h"
  #include "globals.h"
-#include "mutt_charset.h"
  #include "protos.h"
  
  /* This history ring grows from 0..History, with last marking the
@@ -140,7 +139,7 @@ void mutt_read_histfile(void)
      p = mutt_str_strdup(linebuf + read);
      if (p)
      {
-      mutt_convert_string(&p, "utf-8", Charset, 0);
+      mutt_cs_convert_string(&p, "utf-8", Charset, 0);
        mutt_history_add(hclass, p, false);
        FREE(&p);
      }
@@ -310,7 +309,7 @@ static void save_history(enum HistoryClass hclass, const char *s)
    }
  
    tmp = mutt_str_strdup(s);
-  mutt_convert_string(&tmp, Charset, "utf-8", 0);
+  mutt_cs_convert_string(&tmp, Charset, "utf-8", 0);
  
    /* Format of a history item (1 line): "<histclass>:<string>|".
       We add a '|' in order to avoid lines ending with '\'. */
diff --git a/hook.c b/hook.c

index b6445c4aab521218772b523db5274b7e27e03a20..57ef1c1fc69b667c406cb9970f292795c834d9a9 100644 (file)
--- a/hook.c
+++ b/hook.c
@@ -36,7 +36,6 @@
  #include "globals.h"
  #include "header.h"
  #include "mailbox.h"
-#include "mutt_charset.h"
  #include "ncrypt/ncrypt.h"
  #include "options.h"
  #include "pattern.h"
diff --git a/imap/utf7.c b/imap/utf7.c

index 315e61f4c89ea06e167e5c045125e3b944a7ce31..76dfdc8d1d1943900560eae6323ea8e1f1040044 100644 (file)
--- a/imap/utf7.c
+++ b/imap/utf7.c
@@ -36,7 +36,6 @@
  #include "imap_private.h"
  #include "mutt/mutt.h"
  #include "globals.h"
-#include "mutt_charset.h"
  
  // clang-format off
  /**
@@ -322,7 +321,7 @@ void imap_utf_encode(struct ImapData *idata, char **s)
    if (Charset)
    {
      char *t = mutt_str_strdup(*s);
-    if (t && !mutt_convert_string(&t, Charset, "utf-8", 0))
+    if (t && !mutt_cs_convert_string(&t, Charset, "utf-8", 0))
      {
        FREE(s);
        if (idata->unicode)
@@ -350,7 +349,7 @@ void imap_utf_decode(struct ImapData *idata, char **s)
      else
        t = utf7_to_utf8(*s, strlen(*s), 0, 0);
  
-    if (t && !mutt_convert_string(&t, "utf-8", Charset, 0))
+    if (t && !mutt_cs_convert_string(&t, "utf-8", Charset, 0))
      {
        FREE(s);
        *s = t;
diff --git a/init.c b/init.c

index 1b7d5194baa559bd8054f5197cdef72b4578a141..68562eddce1371a6074b46f7b74052f3c7709208 100644 (file)
--- a/init.c
+++ b/init.c
@@ -52,7 +52,6 @@
  #include "mailbox.h"
  #include "mbtable.h"
  #include "mbyte.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mutt_idna.h"
  #include "mutt_menu.h"
@@ -2129,7 +2128,7 @@ static int check_charset(struct Option *opt, const char *val)
    {
      if (!*p)
        continue;
-    if (!mutt_check_charset(p, strict))
+    if (!mutt_cs_check_charset(p, strict))
      {
        rc = -1;
        break;
@@ -3031,7 +3030,7 @@ static int source_rc(const char *rcfile_path, struct Buffer *err)
        currentline = mutt_str_strdup(linebuf);
        if (!currentline)
          continue;
-      mutt_convert_string(&currentline, ConfigCharset, Charset, 0);
+      mutt_cs_convert_string(&currentline, ConfigCharset, Charset, 0);
      }
      else
        currentline = linebuf;
diff --git a/mbyte.c b/mbyte.c

index 09849b77bae36693fa018519092ce991d09976f5..a79bf574580df3845a9e1f3eb3a82c9572e03d46 100644 (file)
--- a/mbyte.c
+++ b/mbyte.c
@@ -29,7 +29,6 @@
  #include <wchar.h>
  #include "mutt/mutt.h"
  #include "mbyte.h"
-#include "mutt_charset.h"
  #include "options.h"
  #include "protos.h"
  
diff --git a/mutt/charset.c b/mutt/charset.c

index ff459965db47ab58f0c3f4617830bdf1b8f75875..a4a27ce3ae6c8d2173e6959eaa80c900025c0fc3 100644 (file)
--- a/mutt/charset.c
+++ b/mutt/charset.c
@@ -32,12 +32,20 @@
   * | Function                       | Description
   * | :----------------------------- | :---------------------------------------------------------
   * | mutt_cs_canonical_charset()    | Canonicalise the charset of a string
+ * | mutt_cs_charset_lookup()       | Look for a replacement character set
+ * | mutt_cs_check_charset()        | Does iconv understand a character set?
   * | mutt_cs_chscmp()               | Are the names of two character sets equivalent?
+ * | mutt_cs_convert_string()       | Convert a string between encodings
   * | mutt_cs_fgetconv()             | Convert a file's character set
   * | mutt_cs_fgetconvs()            | Convert a file's charset into a string buffer
   * | mutt_cs_fgetconv_close()       | Close an fgetconv handle
+ * | mutt_cs_fgetconv_open()        | Prepare a file for charset conversion
   * | mutt_cs_get_default_charset()  | Get the default character set
   * | mutt_cs_iconv()                | Change the encoding of a string
+ * | mutt_cs_iconv_lookup()         | Look for a replacement character set
+ * | mutt_cs_iconv_open()           | Set up iconv for conversions
+ * | mutt_cs_lookup_add()           | Add a new character set lookup
+ * | mutt_cs_lookup_remove()        | Remove all the character set lookups
   * | mutt_cs_set_langinfo_charset() | Set the user's choice of character set
   */
  
@@ -46,11 +54,16 @@
  #include <errno.h>
  #include <iconv.h>
  #include <langinfo.h>
+#include <limits.h>
+#include <regex.h>
  #include <stdbool.h>
  #include <stdio.h>
  #include <string.h>
  #include "charset.h"
+#include "buffer.h"
  #include "memory.h"
+#include "queue.h"
+#include "regex3.h"
  #include "string2.h"
  
  #ifndef EILSEQ
@@ -60,6 +73,20 @@
  char *AssumedCharset; /**< Encoding schemes for messages without indication */
  char *Charset;        /**< User's choice of character set */
  
+/**
+ * struct Lookup - Regex to String lookup table
+ *
+ * This is used by 'charset-hook' and 'iconv-hook'.
+ */
+struct Lookup
+{
+  enum LookupType type; /**< Lookup type */
+  struct Regex regex;   /**< Regular expression */
+  char *replacement;    /**< Alternative charset to use */
+  TAILQ_ENTRY(Lookup) entries;
+};
+static TAILQ_HEAD(LookupHead, Lookup) Lookups = TAILQ_HEAD_INITIALIZER(Lookups);
+
  // clang-format off
  /**
   * PreferredMIMENames - Lookup table of preferred charsets
@@ -172,10 +199,8 @@ const struct MimeNames PreferredMIMENames[] =
    { "csGB2312",              "gb2312"        },
    { "csbig5",                "big5"          },
  
-  /*
-   * End of official brain damage. What follows has been taken from glibc's
-   * localedata files.
-   */
+  /* End of official brain damage.
+   * What follows has been taken from glibc's localedata files.  */
  
    { "iso_8859-13",           "iso-8859-13"   },
    { "iso-ir-179",            "iso-8859-13"   },
@@ -197,9 +222,7 @@ const struct MimeNames PreferredMIMENames[] =
  
    { "646",                   "us-ascii"      },
  
-  /*
-   * http://www.sun.com/software/white-papers/wp-unicode/
-   */
+  /* http://www.sun.com/software/white-papers/wp-unicode/ */
  
    { "eucJP",                 "euc-jp"        },
    { "PCK",                   "Shift_JIS"     },
@@ -211,131 +234,49 @@ const struct MimeNames PreferredMIMENames[] =
    { "sjis",                  "Shift_JIS"     },
    { "euc-jp-ms",             "eucJP-ms"      },
  
-  /*
-   * If you happen to encounter system-specific brain-damage with respect to
+  /* If you happen to encounter system-specific brain-damage with respect to
     * character set naming, please add it above this comment, and submit a patch
-   * to <neomutt-devel@neomutt.org>.
-   */
-
-  /* End of aliases.  Please keep this line last. */
+   * to <neomutt-devel@neomutt.org> */
  
    { NULL,                     NULL           },
  };
  // clang-format on
  
  /**
- * mutt_cs_fgetconv_close - Close an fgetconv handle
- * @param handle fgetconv handle
- */
-void mutt_cs_fgetconv_close(struct FgetConv **fc)
-{
-  if ((*fc)->cd != (iconv_t) -1)
-    iconv_close((*fc)->cd);
-  FREE(fc);
-}
-
-/**
- * mutt_cs_fgetconv - Convert a file's character set
- * @param fc FgetConv handle
- * @retval num Next character in the converted file
- * @retval EOF Error
+ * lookup_charset - Look for a preferred character set name
+ * @param type Type, e.g. #MUTT_LOOKUP_CHARSET
+ * @param cs   Character set
   *
- * A file is read into a buffer and its character set is converted.
- * Each call to this function will return one converted character.
- * The buffer is refilled automatically when empty.
+ * If the character set matches one of the regexes,
+ * then return the replacement name.
   */
-int mutt_cs_fgetconv(struct FgetConv *fc)
+static const char *lookup_charset(enum LookupType type, const char *cs)
  {
-  if (!fc)
-    return EOF;
-  if (fc->cd == (iconv_t) -1)
-    return fgetc(fc->file);
-  if (!fc->p)
-    return EOF;
-  if (fc->p < fc->ob)
-    return (unsigned char) *(fc->p)++;
-
-  /* Try to convert some more */
-  fc->p = fc->ob = fc->bufo;
-  if (fc->ibl)
-  {
-    size_t obl = sizeof(fc->bufo);
-    iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
-    if (fc->p < fc->ob)
-      return (unsigned char) *(fc->p)++;
-  }
-
-  /* If we trusted iconv a bit more, we would at this point
-   * ask why it had stopped converting ... */
+  if (!cs)
+    return NULL;
  
-  /* Try to read some more */
-  if (fc->ibl == sizeof(fc->bufi) ||
-      (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi)))
-  {
-    fc->p = 0;
-    return EOF;
-  }
-  if (fc->ibl)
-    memcpy(fc->bufi, fc->ib, fc->ibl);
-  fc->ib = fc->bufi;
-  fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file);
+  struct Lookup *l = NULL;
  
-  /* Try harder this time to convert some */
-  if (fc->ibl)
+  TAILQ_FOREACH(l, &Lookups, entries)
    {
-    size_t obl = sizeof(fc->bufo);
-    mutt_cs_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0);
-    if (fc->p < fc->ob)
-      return (unsigned char) *(fc->p)++;
+    if (l->type != type)
+      continue;
+    if (regexec(l->regex.regex, cs, 0, NULL, 0) == 0)
+      return l->replacement;
    }
-
-  /* Either the file has finished or one of the buffers is too small */
-  fc->p = 0;
-  return EOF;
-}
-
-/**
- * mutt_cs_fgetconvs - Convert a file's charset into a string buffer
- * @param buf Buffer for result
- * @param l   Length of buffer
- * @param fc  FgetConv handle
- * @retval ptr  Result buffer on success
- * @retval NULL Error
- *
- * Read a file into a buffer, converting the character set as it goes.
- */
-char *mutt_cs_fgetconvs(char *buf, size_t l, struct FgetConv *fc)
-{
-  int c;
-  size_t r;
-
-  for (r = 0; r + 1 < l;)
-  {
-    c = mutt_cs_fgetconv(fc);
-    if (c == EOF)
-      break;
-    buf[r++] = (char) c;
-    if (c == '\n')
-      break;
-  }
-  buf[r] = '\0';
-
-  if (r)
-    return buf;
-  else
-    return NULL;
+  return NULL;
  }
  
  /**
   * mutt_cs_canonical_charset - Canonicalise the charset of a string
- * @param dest Buffer for canonical character set name
- * @param dlen Length of buffer
+ * @param buf Buffer for canonical character set name
+ * @param buflen Length of buffer
   * @param name Name to be canonicalised
   *
   * This first ties off any charset extension such as "//TRANSLIT",
   * canonicalizes the charset and re-adds the extension
   */
-void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name)
+void mutt_cs_canonical_charset(char *buf, size_t buflen, const char *name)
  {
    char *p = NULL, *ext = NULL;
    char in[LONG_STRING], scratch[LONG_STRING];
@@ -348,7 +289,7 @@ void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name)
    if ((mutt_str_strcasecmp(in, "utf-8") == 0) ||
        (mutt_str_strcasecmp(in, "utf8") == 0))
    {
-    mutt_str_strfcpy(dest, "utf-8", dlen);
+    mutt_str_strfcpy(buf, "utf-8", buflen);
      goto out;
    }
  
@@ -369,47 +310,50 @@ void mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name)
      if ((mutt_str_strcasecmp(scratch, PreferredMIMENames[i].key) == 0) ||
          (mutt_str_strcasecmp(scratch, PreferredMIMENames[i].key) == 0))
      {
-      mutt_str_strfcpy(dest, PreferredMIMENames[i].pref, dlen);
+      mutt_str_strfcpy(buf, PreferredMIMENames[i].pref, buflen);
        goto out;
      }
    }
  
-  mutt_str_strfcpy(dest, scratch, dlen);
+  mutt_str_strfcpy(buf, scratch, buflen);
  
    /* for cosmetics' sake, transform to lowercase. */
-  for (p = dest; *p; p++)
+  for (p = buf; *p; p++)
      *p = tolower(*p);
  
  out:
    if (ext && *ext)
    {
-    mutt_str_strcat(dest, dlen, "/");
-    mutt_str_strcat(dest, dlen, ext);
+    mutt_str_strcat(buf, buflen, "/");
+    mutt_str_strcat(buf, buflen, ext);
    }
  }
  
  /**
   * mutt_cs_chscmp - Are the names of two character sets equivalent?
- * @param s   First character set
- * @param chs Second character set
+ * @param cs1 First character set
+ * @param cs2 Second character set
   * @retval num true if the names are equivalent
   *
   * Charsets may have extensions that mutt_cs_canonical_charset() leaves intact;
- * we expect 'chs' to originate from neomutt code, not user input (i.e. 'chs'
+ * we expect 'cs2' to originate from neomutt code, not user input (i.e. 'cs2'
   * does _not_ have any extension) we simply check if the shorter string is a
   * prefix for the longer.
   */
-int mutt_cs_chscmp(const char *s, const char *chs)
+int mutt_cs_chscmp(const char *cs1, const char *cs2)
  {
-  if (!s || !chs)
+  if (!cs1 || !cs2)
      return 0;
  
    char buffer[STRING];
  
-  mutt_cs_canonical_charset(buffer, sizeof(buffer), s);
-  int a = mutt_str_strlen(buffer);
-  int b = mutt_str_strlen(chs);
-  return (mutt_str_strncasecmp(a > b ? buffer : chs, a > b ? chs : buffer, MIN(a, b)) == 0);
+  mutt_cs_canonical_charset(buffer, sizeof(buffer), cs1);
+
+  int len1 = mutt_str_strlen(buffer);
+  int len2 = mutt_str_strlen(cs2);
+
+  return (mutt_str_strncasecmp((len1 > len2) ? buffer : cs2,
+                               (len1 > len2) ? cs2 : buffer, MIN(len1, len2)) == 0);
  }
  
  /**
@@ -433,6 +377,154 @@ char *mutt_cs_get_default_charset(void)
    return strcpy(fcharset, "us-ascii");
  }
  
+/**
+ * mutt_cs_set_langinfo_charset - Set the user's choice of character set
+ *
+ * Lookup the character map used by the user's locale and store it in Charset.
+ */
+void mutt_cs_set_langinfo_charset(void)
+{
+  char buf[LONG_STRING];
+  char buf2[LONG_STRING];
+
+  mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf));
+  mutt_cs_canonical_charset(buf2, sizeof(buf2), buf);
+
+  /* finally, set $charset */
+  Charset = mutt_str_strdup(buf2);
+  if (!Charset)
+    Charset = mutt_str_strdup("iso-8859-1");
+}
+
+/**
+ * mutt_cs_lookup_add - Add a new character set lookup
+ * @param type    Type of character set, e.g. MUTT_LOOKUP_CHARSET 
+ * @param pat     Pattern to match
+ * @param replace Replacement string
+ * @param err     Buffer for error message
+ * @retval true, lookup added to list
+ * @retval false, Regex string was invalid
+ *
+ * Add a regex for a character set and a replacement name.
+ */
+bool mutt_cs_lookup_add(enum LookupType type, const char *pat,
+                        const char *replace, struct Buffer *err)
+{
+  if (!pat || !replace)
+    return false;
+
+  regex_t *rx = mutt_mem_malloc(sizeof(regex_t));
+  int rc = REGCOMP(rx, pat, REG_ICASE);
+  if (rc != 0)
+  {
+    regerror(rc, rx, err->data, err->dsize);
+    FREE(&rx);
+    return false;
+  }
+
+  struct Lookup *l = mutt_mem_calloc(1, sizeof(struct Lookup));
+  l->type = type;
+  l->replacement = mutt_str_strdup(replace);
+  l->regex.pattern = mutt_str_strdup(pat);
+  l->regex.regex = rx;
+  l->regex.not = false;
+
+  TAILQ_INSERT_TAIL(&Lookups, l, entries);
+
+  return true;
+}
+
+/**
+ * mutt_cs_lookup_remove - Remove all the character set lookups
+ *
+ * Empty the list of replacement character set names.
+ */
+void mutt_cs_lookup_remove(void)
+{
+  struct Lookup *l = NULL;
+  struct Lookup *tmp = NULL;
+
+  TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
+  {
+    TAILQ_REMOVE(&Lookups, l, entries);
+    FREE(&l->replacement);
+    FREE(&l->regex.pattern);
+    if (l->regex.regex)
+      regfree(l->regex.regex);
+    FREE(&l->regex);
+    FREE(&l);
+  }
+}
+
+/**
+ * mutt_cs_charset_lookup - Look for a replacement character set
+ * @param chs Character set to lookup
+ * @retval ptr  Replacement character set (if a 'charset-hook' matches)
+ * @retval NULL No matching hook
+ *
+ * Look through all the 'charset-hook's.
+ * If one matches return the replacement character set.
+ */
+const char *mutt_cs_charset_lookup(const char *chs)
+{
+  return lookup_charset(MUTT_LOOKUP_CHARSET, chs);
+}
+
+/**
+ * mutt_cs_iconv_open - Set up iconv for conversions
+ * @param tocode   Current character set
+ * @param fromcode Target character set
+ * @param flags    Flags, e.g. #MUTT_ICONV_HOOK_FROM
+ * @retval ptr iconv handle for the conversion
+ *
+ * Like iconv_open, but canonicalises the charsets, applies charset-hooks,
+ * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips
+ * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers
+ * should use flags=0 when fromcode can safely be considered true, either some
+ * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be
+ * used only when fromcode is unsure, taken from a possibly wrong incoming MIME
+ * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions
+ * in some setups. Note: By design charset-hooks should never be, and are never,
+ * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM
+ * acts on charset-hooks, not at all on iconv-hooks.
+ */
+iconv_t mutt_cs_iconv_open(const char *tocode, const char *fromcode, int flags)
+{
+  char tocode1[SHORT_STRING];
+  char fromcode1[SHORT_STRING];
+  const char *tocode2 = NULL, *fromcode2 = NULL;
+  const char *tmp = NULL;
+
+  iconv_t cd;
+
+  /* transform to MIME preferred charset names */
+  mutt_cs_canonical_charset(tocode1, sizeof(tocode1), tocode);
+  mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
+
+  /* maybe apply charset-hooks and recanonicalise fromcode,
+   * but only when caller asked us to sanitize a potentially wrong
+   * charset name incoming from the wild exterior. */
+  if (flags & MUTT_ICONV_HOOK_FROM)
+  {
+    tmp = mutt_cs_charset_lookup(fromcode1);
+    if (tmp)
+      mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
+  }
+
+  /* always apply iconv-hooks to suit system's iconv tastes */
+  tocode2 = mutt_cs_iconv_lookup(tocode1);
+  tocode2 = (tocode2) ? tocode2 : tocode1;
+  fromcode2 = mutt_cs_iconv_lookup(fromcode1);
+  fromcode2 = (fromcode2) ? fromcode2 : fromcode1;
+
+  /* call system iconv with names it appreciates */
+  cd = iconv_open(tocode2, fromcode2);
+  if (cd != (iconv_t) -1)
+    return cd;
+
+  return (iconv_t) -1;
+}
+
  /**
   * mutt_cs_iconv - Change the encoding of a string
   * @param[in]     cd           Iconv conversion descriptor
@@ -519,20 +611,250 @@ size_t mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char *
  }
  
  /**
- * mutt_cs_set_langinfo_charset - Set the user's choice of character set
+ * mutt_cs_iconv_lookup - Look for a replacement character set
+ * @param chs Character set to lookup
+ * @retval ptr  Replacement character set (if a 'iconv-hook' matches)
+ * @retval NULL No matching hook
   *
- * Lookup the character map used by the user's locale and store it in Charset.
+ * Look through all the 'iconv-hook's.
+ * If one matches return the replacement character set.
   */
-void mutt_cs_set_langinfo_charset(void)
+const char *mutt_cs_iconv_lookup(const char *chs)
  {
-  char buf[LONG_STRING];
-  char buf2[LONG_STRING];
+  return lookup_charset(MUTT_LOOKUP_ICONV, chs);
+}
  
-  mutt_str_strfcpy(buf, nl_langinfo(CODESET), sizeof(buf));
-  mutt_cs_canonical_charset(buf2, sizeof(buf2), buf);
+/**
+ * mutt_cs_convert_string - Convert a string between encodings
+ * @param[in,out] ps    String to convert
+ * @param[in]     from  Current character set
+ * @param[in]     to    Target character set
+ * @param[in]     flags Flags, e.g. 
+ * @retval 0  Success
+ * @retval -1 Error
+ *
+ * Parameter flags is given as-is to mutt_cs_iconv_open().
+ * See there for its meaning and usage policy.
+ */
+int mutt_cs_convert_string(char **ps, const char *from, const char *to, int flags)
+{
+  iconv_t cd;
+  const char *repls[] = { "\357\277\275", "?", 0 };
+  char *s = *ps;
  
-  /* finally, set $charset */
-  Charset = mutt_str_strdup(buf2);
-  if (!Charset)
-    Charset = mutt_str_strdup("iso-8859-1");
+  if (!s || !*s)
+    return 0;
+
+  if (to && from && (cd = mutt_cs_iconv_open(to, from, flags)) != (iconv_t) -1)
+  {
+    size_t len;
+    const char *ib = NULL;
+    char *buf = NULL, *ob = NULL;
+    size_t ibl, obl;
+    const char **inrepls = NULL;
+    char *outrepl = NULL;
+
+    if (mutt_cs_is_utf8(to))
+      outrepl = "\357\277\275";
+    else if (mutt_cs_is_utf8(from))
+      inrepls = repls;
+    else
+      outrepl = "?";
+
+    len = strlen(s);
+    ib = s;
+    ibl = len + 1;
+    obl = MB_LEN_MAX * ibl;
+    ob = buf = mutt_mem_malloc(obl + 1);
+
+    mutt_cs_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
+    iconv_close(cd);
+
+    *ob = '\0';
+
+    FREE(ps);
+    *ps = buf;
+
+    mutt_str_adjust(ps);
+    return 0;
+  }
+  else
+    return -1;
+}
+
+/**
+ * mutt_cs_check_charset - Does iconv understand a character set?
+ * @param cs     Character set to check
+ * @param strict Check strictly by using iconv
+ * @retval bool true if character set is valid
+ *
+ * If `strict` is false, then finding a matching character set in
+ * #PreferredMIMENames will be enough.
+ * If `strict` is true, or the charset is not in #PreferredMIMENames, then
+ * iconv() with be run.
+ */
+bool mutt_cs_check_charset(const char *cs, bool strict)
+{
+  iconv_t cd;
+
+  if (mutt_cs_is_utf8(cs))
+    return true;
+
+  if (!strict)
+  {
+    for (int i = 0; PreferredMIMENames[i].key; i++)
+    {
+      if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, cs) == 0) ||
+          (mutt_str_strcasecmp(PreferredMIMENames[i].pref, cs) == 0))
+      {
+        return true;
+      }
+    }
+  }
+
+  cd = mutt_cs_iconv_open(cs, cs, 0);
+  if (cd != (iconv_t)(-1))
+  {
+    iconv_close(cd);
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * mutt_cs_fgetconv_open - Prepare a file for charset conversion
+ * @param file  FILE ptr to prepare
+ * @param from  Current character set
+ * @param to    Destination character set
+ * @param flags Flags, e.g. MUTT_ICONV_HOOK_FROM
+ * @retval ptr fgetconv handle
+ *
+ * Parameter flags is given as-is to mutt_cs_iconv_open().
+ */
+struct FgetConv *mutt_cs_fgetconv_open(FILE *file, const char *from, const char *to, int flags)
+{
+  struct FgetConv *fc = NULL;
+  iconv_t cd = (iconv_t) -1;
+  static const char *repls[] = { "\357\277\275", "?", 0 };
+
+  if (from && to)
+    cd = mutt_cs_iconv_open(to, from, flags);
+
+  if (cd != (iconv_t) -1)
+  {
+    fc = mutt_mem_malloc(sizeof(struct FgetConv));
+    fc->p = fc->ob = fc->bufo;
+    fc->ib = fc->bufi;
+    fc->ibl = 0;
+    fc->inrepls = mutt_cs_is_utf8(to) ? repls : repls + 1;
+  }
+  else
+    fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
+  fc->file = file;
+  fc->cd = cd;
+  return fc;
+}
+
+/**
+ * mutt_cs_fgetconv_close - Close an fgetconv handle
+ * @param fc fgetconv handle
+ */
+void mutt_cs_fgetconv_close(struct FgetConv **fc)
+{
+  if ((*fc)->cd != (iconv_t) -1)
+    iconv_close((*fc)->cd);
+  FREE(fc);
+}
+
+/**
+ * mutt_cs_fgetconv - Convert a file's character set
+ * @param fc FgetConv handle
+ * @retval num Next character in the converted file
+ * @retval EOF Error
+ *
+ * A file is read into a buffer and its character set is converted.
+ * Each call to this function will return one converted character.
+ * The buffer is refilled automatically when empty.
+ */
+int mutt_cs_fgetconv(struct FgetConv *fc)
+{
+  if (!fc)
+    return EOF;
+  if (fc->cd == (iconv_t) -1)
+    return fgetc(fc->file);
+  if (!fc->p)
+    return EOF;
+  if (fc->p < fc->ob)
+    return (unsigned char) *(fc->p)++;
+
+  /* Try to convert some more */
+  fc->p = fc->ob = fc->bufo;
+  if (fc->ibl)
+  {
+    size_t obl = sizeof(fc->bufo);
+    iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
+    if (fc->p < fc->ob)
+      return (unsigned char) *(fc->p)++;
+  }
+
+  /* If we trusted iconv a bit more, we would at this point
+   * ask why it had stopped converting ... */
+
+  /* Try to read some more */
+  if (fc->ibl == sizeof(fc->bufi) ||
+      (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi)))
+  {
+    fc->p = 0;
+    return EOF;
+  }
+  if (fc->ibl)
+    memcpy(fc->bufi, fc->ib, fc->ibl);
+  fc->ib = fc->bufi;
+  fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->file);
+
+  /* Try harder this time to convert some */
+  if (fc->ibl)
+  {
+    size_t obl = sizeof(fc->bufo);
+    mutt_cs_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl, fc->inrepls, 0);
+    if (fc->p < fc->ob)
+      return (unsigned char) *(fc->p)++;
+  }
+
+  /* Either the file has finished or one of the buffers is too small */
+  fc->p = 0;
+  return EOF;
+}
+
+/**
+ * mutt_cs_fgetconvs - Convert a file's charset into a string buffer
+ * @param buf    Buffer for result
+ * @param buflen Length of buffer
+ * @param fc     FgetConv handle
+ * @retval ptr  Result buffer on success
+ * @retval NULL Error
+ *
+ * Read a file into a buffer, converting the character set as it goes.
+ */
+char *mutt_cs_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
+{
+  int c;
+  size_t r;
+
+  for (r = 0; (r + 1) < buflen;)
+  {
+    c = mutt_cs_fgetconv(fc);
+    if (c == EOF)
+      break;
+    buf[r++] = (char) c;
+    if (c == '\n')
+      break;
+  }
+  buf[r] = '\0';
+
+  if (r > 0)
+    return buf;
+
+  return NULL;
  }
diff --git a/mutt/charset.h b/mutt/charset.h

index ac17bf19f73c43c7a612a76c9a5814f3afb99736..835be1d04b52298883297677c4858eb70dd4f65a 100644 (file)
--- a/mutt/charset.h
+++ b/mutt/charset.h
@@ -24,8 +24,11 @@
  #define _MUTT_CHARSET_H
  
  #include <iconv.h>
+#include <stdbool.h>
  #include <stdio.h>
  
+struct Buffer;
+
  extern char *AssumedCharset;
  extern char *Charset;
  
@@ -63,18 +66,40 @@ struct MimeNames
    const char *pref;
  };
  
+/**
+ * enum LookupType - Types of character set lookups
+ */
+enum LookupType
+{
+  MUTT_LOOKUP_CHARSET,
+  MUTT_LOOKUP_ICONV
+};
+
+#define MUTT_ICONV_HOOK_FROM 1 /**< apply charset-hooks to fromcode */
+
  extern const struct MimeNames PreferredMIMENames[];
  
-void   mutt_cs_canonical_charset(char *dest, size_t dlen, const char *name);
-int    mutt_cs_chscmp(const char *s, const char *chs);
-void   mutt_cs_fgetconv_close(struct FgetConv **fc);
-int    mutt_cs_fgetconv(struct FgetConv *fc);
-char * mutt_cs_fgetconvs(char *buf, size_t l, struct FgetConv *fc);
-char * mutt_cs_get_default_charset(void);
-size_t mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl);
-void   mutt_cs_set_langinfo_charset(void);
+void             mutt_cs_canonical_charset(char *buf, size_t buflen, const char *name);
+int              mutt_cs_chscmp(const char *cs1, const char *cs2);
+char *           mutt_cs_get_default_charset(void);
+void             mutt_cs_set_langinfo_charset(void);
+
+bool             mutt_cs_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err);
+void             mutt_cs_lookup_remove(void);
+const char *     mutt_cs_charset_lookup(const char *chs);
+
+iconv_t          mutt_cs_iconv_open(const char *tocode, const char *fromcode, int flags);
+size_t           mutt_cs_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl);
+const char *     mutt_cs_iconv_lookup(const char *chs);
+int              mutt_cs_convert_string(char **ps, const char *from, const char *to, int flags);
+bool             mutt_cs_check_charset(const char *cs, bool strict);
+
+struct FgetConv *mutt_cs_fgetconv_open(FILE *file, const char *from, const char *to, int flags);
+void             mutt_cs_fgetconv_close(struct FgetConv **fc);
+int              mutt_cs_fgetconv(struct FgetConv *fc);
+char *           mutt_cs_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc);
  
  #define mutt_cs_is_utf8(a)     mutt_cs_chscmp(a, "utf-8")
  #define mutt_cs_is_us_ascii(a) mutt_cs_chscmp(a, "us-ascii")
  
-#endif 
+#endif
diff --git a/mutt_charset.c b/mutt_charset.c

deleted file mode 100644 (file)

index a030cd2..0000000
--- a/mutt_charset.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * @file
- * Conversion between different character encodings
- *
- * @authors
- * Copyright (C) 1999-2002,2007 Thomas Roessler <roessler@does-not-exist.org>
- *
- * @copyright
- * This program is free software: you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation, either version 2 of the License, or (at your option) any later
- * version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "config.h"
-#include <ctype.h>
-#include <errno.h>
-#include <langinfo.h>
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-#include "mutt/mutt.h"
-#include "mutt.h"
-#include "mutt_charset.h"
-#include "globals.h"
-#include "protos.h"
-
-/**
- * struct Lookup - Regex to String lookup table
- *
- * This is used by 'charset-hook' and 'iconv-hook'.
- */
-struct Lookup
-{
-  enum LookupType type; /**< Lookup type */
-  struct Regex regex;   /**< Regular expression */
-  char *replacement;    /**< Alternative charset to use */
-  TAILQ_ENTRY(Lookup) entries;
-};
-static TAILQ_HEAD(LookupHead, Lookup) Lookups = TAILQ_HEAD_INITIALIZER(Lookups);
-
-/**
- * mutt_iconv_open - Set up iconv for conversions
- *
- * Like iconv_open, but canonicalises the charsets, applies charset-hooks,
- * recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips
- * charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers
- * should use flags=0 when fromcode can safely be considered true, either some
- * constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be
- * used only when fromcode is unsure, taken from a possibly wrong incoming MIME
- * label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions
- * in some setups. Note: By design charset-hooks should never be, and are never,
- * applied to tocode. Highlight note: The top-well-named MUTT_ICONV_HOOK_FROM
- * acts on charset-hooks, not at all on iconv-hooks.
- */
-iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags)
-{
-  char tocode1[SHORT_STRING];
-  char fromcode1[SHORT_STRING];
-  const char *tocode2 = NULL, *fromcode2 = NULL;
-  const char *tmp = NULL;
-
-  iconv_t cd;
-
-  /* transform to MIME preferred charset names */
-  mutt_cs_canonical_charset(tocode1, sizeof(tocode1), tocode);
-  mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
-
-  /* maybe apply charset-hooks and recanonicalise fromcode,
-   * but only when caller asked us to sanitize a potentially wrong
-   * charset name incoming from the wild exterior. */
-  if (flags & MUTT_ICONV_HOOK_FROM)
-  {
-    tmp = mutt_cs_charset_lookup(fromcode1);
-    if (tmp)
-      mutt_cs_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
-  }
-
-  /* always apply iconv-hooks to suit system's iconv tastes */
-  tocode2 = mutt_cs_iconv_lookup(tocode1);
-  tocode2 = (tocode2) ? tocode2 : tocode1;
-  fromcode2 = mutt_cs_iconv_lookup(fromcode1);
-  fromcode2 = (fromcode2) ? fromcode2 : fromcode1;
-
-  /* call system iconv with names it appreciates */
-  cd = iconv_open(tocode2, fromcode2);
-  if (cd != (iconv_t) -1)
-    return cd;
-
-  return (iconv_t) -1;
-}
-
-/**
- * mutt_convert_string - Convert a string between encodings
- *
- * Parameter flags is given as-is to mutt_iconv_open().
- * See there for its meaning and usage policy.
- */
-int mutt_convert_string(char **ps, const char *from, const char *to, int flags)
-{
-  iconv_t cd;
-  const char *repls[] = { "\357\277\275", "?", 0 };
-  char *s = *ps;
-
-  if (!s || !*s)
-    return 0;
-
-  if (to && from && (cd = mutt_iconv_open(to, from, flags)) != (iconv_t) -1)
-  {
-    size_t len;
-    const char *ib = NULL;
-    char *buf = NULL, *ob = NULL;
-    size_t ibl, obl;
-    const char **inrepls = NULL;
-    char *outrepl = NULL;
-
-    if (mutt_cs_is_utf8(to))
-      outrepl = "\357\277\275";
-    else if (mutt_cs_is_utf8(from))
-      inrepls = repls;
-    else
-      outrepl = "?";
-
-    len = strlen(s);
-    ib = s;
-    ibl = len + 1;
-    obl = MB_LEN_MAX * ibl;
-    ob = buf = mutt_mem_malloc(obl + 1);
-
-    mutt_cs_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
-    iconv_close(cd);
-
-    *ob = '\0';
-
-    FREE(ps);
-    *ps = buf;
-
-    mutt_str_adjust(ps);
-    return 0;
-  }
-  else
-    return -1;
-}
-
-/**
- * fgetconv_open - Prepare a file for charset conversion
- * @param file  FILE ptr to prepare
- * @param from  Current character set
- * @param to    Destination character set
- * @param flags Flags, e.g. MUTT_ICONV_HOOK_FROM
- * @retval ptr fgetconv handle
- *
- * Parameter flags is given as-is to mutt_iconv_open().
- */
-struct FgetConv *fgetconv_open(FILE *file, const char *from, const char *to, int flags)
-{
-  struct FgetConv *fc = NULL;
-  iconv_t cd = (iconv_t) -1;
-  static const char *repls[] = { "\357\277\275", "?", 0 };
-
-  if (from && to)
-    cd = mutt_iconv_open(to, from, flags);
-
-  if (cd != (iconv_t) -1)
-  {
-    fc = mutt_mem_malloc(sizeof(struct FgetConv));
-    fc->p = fc->ob = fc->bufo;
-    fc->ib = fc->bufi;
-    fc->ibl = 0;
-    fc->inrepls = mutt_cs_is_utf8(to) ? repls : repls + 1;
-  }
-  else
-    fc = mutt_mem_malloc(sizeof(struct FgetConvNot));
-  fc->file = file;
-  fc->cd = cd;
-  return fc;
-}
-
-bool mutt_check_charset(const char *s, bool strict)
-{
-  iconv_t cd;
-
-  if (mutt_cs_is_utf8(s))
-    return true;
-
-  if (!strict)
-    for (int i = 0; PreferredMIMENames[i].key; i++)
-    {
-      if ((mutt_str_strcasecmp(PreferredMIMENames[i].key, s) == 0) ||
-          (mutt_str_strcasecmp(PreferredMIMENames[i].pref, s) == 0))
-      {
-        return true;
-      }
-    }
-
-  cd = mutt_iconv_open(s, s, 0);
-  if (cd != (iconv_t)(-1))
-  {
-    iconv_close(cd);
-    return true;
-  }
-
-  return false;
-}
-
-/**
- * lookup_charset - Look for a preferred character set name
- * @param type Type, e.g. #MUTT_LOOKUP_CHARSET
- * @param cs   Character set
- *
- * If the character set matches one of the regexes,
- * then return the replacement name.
- */
-static const char *lookup_charset(enum LookupType type, const char *cs)
-{
-  if (!cs)
-    return NULL;
-
-  struct Lookup *l = NULL;
-
-  TAILQ_FOREACH(l, &Lookups, entries)
-  {
-    if (l->type != type)
-      continue;
-    if (regexec(l->regex.regex, cs, 0, NULL, 0) == 0)
-      return l->replacement;
-  }
-  return NULL;
-}
-
-/**
- * mutt_cs_lookup_add - Add a new character set lookup
- * @param type    Type of character set, e.g. MUTT_LOOKUP_CHARSET 
- * @param pat     Pattern to match
- * @param replace Replacement string
- * @param err     Buffer for error message
- * @retval true, lookup added to list
- * @retval false, Regex string was invalid
- *
- * Add a regex for a character set and a replacement name.
- */
-bool mutt_cs_lookup_add(enum LookupType type, const char *pat,
-                        const char *replace, struct Buffer *err)
-{
-  if (!pat || !replace)
-    return false;
-
-  regex_t *rx = mutt_mem_malloc(sizeof(regex_t));
-  int rc = REGCOMP(rx, pat, REG_ICASE);
-  if (rc != 0)
-  {
-    regerror(rc, rx, err->data, err->dsize);
-    FREE(&rx);
-    return false;
-  }
-
-  struct Lookup *l = mutt_mem_calloc(1, sizeof(struct Lookup));
-  l->type = type;
-  l->replacement = mutt_str_strdup(replace);
-  l->regex.pattern = mutt_str_strdup(pat);
-  l->regex.regex = rx;
-  l->regex.not = false;
-
-  TAILQ_INSERT_TAIL(&Lookups, l, entries);
-
-  return true;
-}
-
-/**
- * mutt_cs_lookup_remove - Remove all the character set lookups
- *
- * Empty the list of replacement character set names.
- */
-void mutt_cs_lookup_remove(void)
-{
-  struct Lookup *l = NULL;
-  struct Lookup *tmp = NULL;
-
-  TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
-  {
-    TAILQ_REMOVE(&Lookups, l, entries);
-    FREE(&l->replacement);
-    FREE(&l->regex.pattern);
-    if (l->regex.regex)
-      regfree(l->regex.regex);
-    FREE(&l->regex);
-    FREE(&l);
-  }
-}
-
-/**
- * mutt_cs_charset_lookup - Look for a replacement character set
- * @param chs Character set to lookup
- * @retval ptr  Replacement character set (if a 'charset-hook' matches)
- * @retval NULL No matching hook
- *
- * Look through all the 'charset-hook's.
- * If one matches return the replacement character set.
- */
-const char *mutt_cs_charset_lookup(const char *chs)
-{
-  return lookup_charset(MUTT_LOOKUP_CHARSET, chs);
-}
-
-/**
- * mutt_cs_iconv_lookup - Look for a replacement character set
- * @param chs Character set to lookup
- * @retval ptr  Replacement character set (if a 'iconv-hook' matches)
- * @retval NULL No matching hook
- *
- * Look through all the 'iconv-hook's.
- * If one matches return the replacement character set.
- */
-const char *mutt_cs_iconv_lookup(const char *chs)
-{
-  return lookup_charset(MUTT_LOOKUP_ICONV, chs);
-}
diff --git a/mutt_charset.h b/mutt_charset.h

deleted file mode 100644 (file)

index a940539..0000000
--- a/mutt_charset.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * @file
- * Conversion between different character encodings
- *
- * @authors
- * Copyright (C) 1999-2003 Thomas Roessler <roessler@does-not-exist.org>
- *
- * @copyright
- * This program is free software: you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation, either version 2 of the License, or (at your option) any later
- * version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _MUTT_CHARSET2_H
-#define _MUTT_CHARSET2_H
-
-#include <iconv.h>
-#include <stdbool.h>
-#include <stdio.h>
-
-/**
- * enum LookupType - Types of character set lookups
- */
-enum LookupType
-{
-  MUTT_LOOKUP_CHARSET,
-  MUTT_LOOKUP_ICONV
-};
-
-bool             mutt_cs_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err);
-void             mutt_cs_lookup_remove(void);
-const char *     mutt_cs_charset_lookup(const char *chs);
-const char *     mutt_cs_iconv_lookup(const char *chs);
-
-int mutt_convert_string(char **ps, const char *from, const char *to, int flags);
-
-iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags);
-struct FgetConv *fgetconv_open(FILE *file, const char *from, const char *to, int flags);
-
-/* flags for charset.c:mutt_convert_string(), fgetconv_open(), and
- * mutt_iconv_open(). Note that applying charset-hooks to tocode is
- * never needed, and sometimes hurts: Hence there is no MUTT_ICONV_HOOK_TO
- * flag.
- */
-#define MUTT_ICONV_HOOK_FROM 1 /* apply charset-hooks to fromcode */
-
-bool mutt_check_charset(const char *s, bool strict);
-
-#endif /* _MUTT_CHARSET2_H */
diff --git a/mutt_idna.c b/mutt_idna.c

index 4a56bf6e54bc378ceec9f0f9bfa94c918a8c0480..7b48cc62c7752838ccc40475bc8a9fd43ce44b00 100644 (file)
--- a/mutt_idna.c
+++ b/mutt_idna.c
@@ -29,7 +29,6 @@
  #include "address.h"
  #include "envelope.h"
  #include "globals.h"
-#include "mutt_charset.h"
  #include "options.h"
  #ifdef HAVE_IDNA_H
  #include <idna.h>
@@ -109,10 +108,10 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags)
  #endif /* HAVE_LIBIDN */
  
    /* we don't want charset-hook effects, so we set flags to 0 */
-  if (mutt_convert_string(&local_user, "utf-8", Charset, 0) == -1)
+  if (mutt_cs_convert_string(&local_user, "utf-8", Charset, 0) == -1)
      goto cleanup;
  
-  if (mutt_convert_string(&local_domain, "utf-8", Charset, 0) == -1)
+  if (mutt_cs_convert_string(&local_domain, "utf-8", Charset, 0) == -1)
      goto cleanup;
  
    /*
@@ -123,7 +122,7 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags)
    {
      reversed_user = mutt_str_strdup(local_user);
  
-    if (mutt_convert_string(&reversed_user, Charset, "utf-8", 0) == -1)
+    if (mutt_cs_convert_string(&reversed_user, Charset, "utf-8", 0) == -1)
      {
        mutt_debug(
            1, "Not reversible. Charset conv to utf-8 failed for user = '%s'.\n", reversed_user);
@@ -139,7 +138,7 @@ char *mutt_idna_intl_to_local(char *orig_user, char *orig_domain, int flags)
  
      reversed_domain = mutt_str_strdup(local_domain);
  
-    if (mutt_convert_string(&reversed_domain, Charset, "utf-8", 0) == -1)
+    if (mutt_cs_convert_string(&reversed_domain, Charset, "utf-8", 0) == -1)
      {
        mutt_debug(
            1,
@@ -196,10 +195,10 @@ char *mutt_idna_local_to_intl(char *user, char *domain)
    intl_domain = mutt_str_strdup(domain);
  
    /* we don't want charset-hook effects, so we set flags to 0 */
-  if (mutt_convert_string(&intl_user, Charset, "utf-8", 0) == -1)
+  if (mutt_cs_convert_string(&intl_user, Charset, "utf-8", 0) == -1)
      goto cleanup;
  
-  if (mutt_convert_string(&intl_domain, Charset, "utf-8", 0) == -1)
+  if (mutt_cs_convert_string(&intl_domain, Charset, "utf-8", 0) == -1)
      goto cleanup;
  
  #ifdef HAVE_LIBIDN
diff --git a/muttlib.c b/muttlib.c

index 0267cc6f81da02990c2e623c99008e2f51f82d4d..46f6ffcc20eaf1157e6788d3bcc9e1586a7ab9c0 100644 (file)
--- a/muttlib.c
+++ b/muttlib.c
@@ -52,7 +52,6 @@
  #include "header.h"
  #include "mailbox.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mx.h"
  #include "ncrypt/ncrypt.h"
@@ -1426,7 +1425,7 @@ const char *mutt_make_version(void)
  void mutt_encode_path(char *dest, size_t dlen, const char *src)
  {
    char *p = mutt_str_strdup(src);
-  int rc = mutt_convert_string(&p, Charset, "utf-8", 0);
+  int rc = mutt_cs_convert_string(&p, Charset, "utf-8", 0);
    /* `src' may be NULL, such as when called from the pop3 driver. */
    mutt_str_strfcpy(dest, (rc == 0) ? NONULL(p) : NONULL(src), dlen);
    FREE(&p);
diff --git a/ncrypt/crypt_gpgme.c b/ncrypt/crypt_gpgme.c

index d06d122b6bd9c32b7c8881ec6cd81d9f259a5d25..937b96c2b7896da17836d4ea39f7f34f3c67d40f 100644 (file)
--- a/ncrypt/crypt_gpgme.c
+++ b/ncrypt/crypt_gpgme.c
@@ -54,7 +54,6 @@
  #include "header.h"
  #include "keymap.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mutt_menu.h"
  #include "ncrypt.h"
@@ -180,7 +179,7 @@ static void print_utf8(FILE *fp, const char *buf, size_t len)
    /* fromcode "utf-8" is sure, so we don't want
     * charset-hook corrections: flags must be 0.
     */
-  mutt_convert_string(&tstr, "utf-8", Charset, 0);
+  mutt_cs_convert_string(&tstr, "utf-8", Charset, 0);
    fputs(tstr, fp);
    FREE(&tstr);
  }
@@ -2453,7 +2452,7 @@ static void copy_clearsigned(gpgme_data_t data, struct State *s, char *charset)
     * be a wrong label, so we want the ability to do corrections via
     * charset-hooks. Therefore we set flags to MUTT_ICONV_HOOK_FROM.
     */
-  fc = fgetconv_open(fp, charset, Charset, MUTT_ICONV_HOOK_FROM);
+  fc = mutt_cs_fgetconv_open(fp, charset, Charset, MUTT_ICONV_HOOK_FROM);
  
    for (complete = true, armor_header = true; mutt_cs_fgetconvs(buf, sizeof(buf), fc) != NULL;
         complete = (strchr(buf, '\n') != NULL))
@@ -2678,7 +2677,7 @@ int pgp_gpgme_application_handler(struct Body *m, struct State *s)
          struct FgetConv *fc = NULL;
          int c;
          rewind(pgpout);
-        fc = fgetconv_open(pgpout, "utf-8", Charset, 0);
+        fc = mutt_cs_fgetconv_open(pgpout, "utf-8", Charset, 0);
          while ((c = mutt_cs_fgetconv(fc)) != EOF)
          {
            state_putc(c, s);
diff --git a/ncrypt/gnupgparse.c b/ncrypt/gnupgparse.c

index 01ec7c892ebeb8b2be6f66eb1ab425807d6f1494..58ded94778cdd22bb36666fd5525dcd7df9cbc15 100644 (file)
--- a/ncrypt/gnupgparse.c
+++ b/ncrypt/gnupgparse.c
@@ -43,7 +43,6 @@
  #include "filter.h"
  #include "globals.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "ncrypt.h"
  #include "options.h"
  #include "pgpinvoke.h"
@@ -90,7 +89,7 @@ static void fix_uid(char *uid)
    }
    *d = '\0';
  
-  if (chs && (cd = mutt_iconv_open(chs, "utf-8", 0)) != (iconv_t) -1)
+  if (chs && (cd = mutt_cs_iconv_open(chs, "utf-8", 0)) != (iconv_t) -1)
    {
      int n = s - uid + 1; /* chars available in original buffer */
      char *buf = NULL;
diff --git a/ncrypt/pgp.c b/ncrypt/pgp.c

index 1497e940f502b36d60c41b6fb8479b115530e6c2..ebd09208e8865a005d7b6aa216bf9adbc642e8b6 100644 (file)
--- a/ncrypt/pgp.c
+++ b/ncrypt/pgp.c
@@ -51,7 +51,6 @@
  #include "globals.h"
  #include "header.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "ncrypt.h"
  #include "options.h"
@@ -286,7 +285,7 @@ static void pgp_copy_clearsigned(FILE *fpin, struct State *s, char *charset)
     * be a wrong label, so we want the ability to do corrections via
     * charset-hooks. Therefore we set flags to MUTT_ICONV_HOOK_FROM.
     */
-  fc = fgetconv_open(fpin, charset, Charset, MUTT_ICONV_HOOK_FROM);
+  fc = mutt_cs_fgetconv_open(fpin, charset, Charset, MUTT_ICONV_HOOK_FROM);
  
    for (complete = true, armor_header = true; mutt_cs_fgetconvs(buf, sizeof(buf), fc) != NULL;
         complete = (strchr(buf, '\n') != NULL))
@@ -422,7 +421,7 @@ int pgp_application_pgp_handler(struct Body *m, struct State *s)
            l = mutt_str_strlen(gpgcharset);
            if ((l > 0) && (gpgcharset[l - 1] == '\n'))
              gpgcharset[l - 1] = 0;
-          if (!mutt_check_charset(gpgcharset, 0))
+          if (!mutt_cs_check_charset(gpgcharset, 0))
              mutt_str_replace(&gpgcharset, "UTF-8");
          }
        }
@@ -549,7 +548,7 @@ int pgp_application_pgp_handler(struct Body *m, struct State *s)
  
          rewind(pgpout);
          state_set_prefix(s);
-        fc = fgetconv_open(pgpout, expected_charset, Charset, MUTT_ICONV_HOOK_FROM);
+        fc = mutt_cs_fgetconv_open(pgpout, expected_charset, Charset, MUTT_ICONV_HOOK_FROM);
          while ((ch = mutt_cs_fgetconv(fc)) != EOF)
            state_prefix_putc(ch, s);
          mutt_cs_fgetconv_close(&fc);
@@ -1554,7 +1553,7 @@ struct Body *pgp_traditional_encryptsign(struct Body *a, int flags, char *keylis
        send_charset = "utf-8";
  
      /* fromcode is assumed to be correct: we set flags to 0 */
-    fc = fgetconv_open(fp, from_charset, "utf-8", 0);
+    fc = mutt_cs_fgetconv_open(fp, from_charset, "utf-8", 0);
      while ((c = mutt_cs_fgetconv(fc)) != EOF)
        fputc(c, pgpin);
  
diff --git a/parse.c b/parse.c

index 92399dbd60e6517007f9242bb12ee93566ae72cd..bf1c7f8576c8d0e78ba2d88d8bac740db2eebd51 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -37,7 +37,6 @@
  #include "header.h"
  #include "mailbox.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "ncrypt/ncrypt.h"
  #include "options.h"
  #include "parameter.h"
diff --git a/rfc2047.c b/rfc2047.c

index c880b8bd331d6bb8fa2c6c2bb49d2cfc182831c9..7a6756b99d22bb6b89765b9ddfe596d78b08a525 100644 (file)
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -32,7 +32,6 @@
  #include "globals.h"
  #include "mbyte.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "options.h"
  #include "protos.h"
  
@@ -61,7 +60,7 @@ static size_t convert_string(const char *f, size_t flen, const char *from,
    size_t obl, n;
    int e;
  
-  cd = mutt_iconv_open(to, from, 0);
+  cd = mutt_cs_iconv_open(to, from, 0);
    if (cd == (iconv_t)(-1))
      return (size_t)(-1);
    obl = 4 * flen + 1;
@@ -117,7 +116,7 @@ int convert_nonmime_string(char **ps)
        return 0;
      }
    }
-  mutt_convert_string(ps, (const char *) mutt_cs_get_default_charset(), Charset,
+  mutt_cs_convert_string(ps, (const char *) mutt_cs_get_default_charset(), Charset,
                        MUTT_ICONV_HOOK_FROM);
    return -1;
  }
@@ -277,7 +276,7 @@ static size_t try_block(const char *d, size_t dlen, const char *fromcode,
  
    if (fromcode)
    {
-    cd = mutt_iconv_open(tocode, fromcode, 0);
+    cd = mutt_cs_iconv_open(tocode, fromcode, 0);
      assert(cd != (iconv_t)(-1));
      ib = d;
      ibl = dlen;
@@ -357,7 +356,7 @@ static size_t encode_block(char *s, char *d, size_t dlen, const char *fromcode,
  
    if (fromcode)
    {
-    cd = mutt_iconv_open(tocode, fromcode, 0);
+    cd = mutt_cs_iconv_open(tocode, fromcode, 0);
      assert(cd != (iconv_t)(-1));
      ib = d;
      ibl = dlen;
@@ -738,7 +737,7 @@ static int rfc2047_decode_word(char *d, const char *s, size_t len)
    }
  
    if (charset)
-    mutt_convert_string(&d0, charset, Charset, MUTT_ICONV_HOOK_FROM);
+    mutt_cs_convert_string(&d0, charset, Charset, MUTT_ICONV_HOOK_FROM);
    mutt_filter_unprintable(&d0);
    mutt_str_strfcpy(d, d0, len);
    rc = 0;
diff --git a/rfc2231.c b/rfc2231.c

index 2aaf268fb898333c739a75b08b12486338d5f044..c5c20ac794b4d4b26b3991826be346956464dd1f 100644 (file)
--- a/rfc2231.c
+++ b/rfc2231.c
@@ -39,7 +39,6 @@
  #include "globals.h"
  #include "mbyte.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "options.h"
  #include "parameter.h"
  #include "protos.h"
@@ -203,7 +202,7 @@ static void rfc2231_join_continuations(struct Parameter **head, struct Rfc2231Pa
      } while (par && (strcmp(par->attribute, attribute) == 0));
  
      if (encoded)
-      mutt_convert_string(&value, charset, Charset, MUTT_ICONV_HOOK_FROM);
+      mutt_cs_convert_string(&value, charset, Charset, MUTT_ICONV_HOOK_FROM);
      *head = mutt_param_new();
      (*head)->attribute = mutt_str_strdup(attribute);
      (*head)->value = value;
@@ -261,7 +260,7 @@ void rfc2231_decode_parameters(struct Parameter **headp)
  
        s = rfc2231_get_charset(p->value, charset, sizeof(charset));
        rfc2231_decode_one(p->value, s);
-      mutt_convert_string(&p->value, charset, Charset, MUTT_ICONV_HOOK_FROM);
+      mutt_cs_convert_string(&p->value, charset, Charset, MUTT_ICONV_HOOK_FROM);
        mutt_filter_unprintable(&p->value);
  
        *last = p;
diff --git a/sendlib.c b/sendlib.c

index d9a9ebb96aefc3aded97547bc04a1ff5d9254868..bcca72fa6ef04b4bfb58fbf0cca182a585d6d95b 100644 (file)
--- a/sendlib.c
+++ b/sendlib.c
@@ -53,7 +53,6 @@
  #include "header.h"
  #include "mailbox.h"
  #include "mime.h"
-#include "mutt_charset.h"
  #include "mutt_curses.h"
  #include "mutt_idna.h"
  #include "mx.h"
@@ -475,10 +474,10 @@ int mutt_write_mime_body(struct Body *a, FILE *f)
    }
  
    if (a->type == TYPETEXT && (!a->noconv))
-    fc = fgetconv_open(fpin, a->charset,
+    fc = mutt_cs_fgetconv_open(fpin, a->charset,
                         mutt_get_body_charset(send_charset, sizeof(send_charset), a), 0);
    else
-    fc = fgetconv_open(fpin, 0, 0, 0);
+    fc = mutt_cs_fgetconv_open(fpin, 0, 0, 0);
  
    mutt_sig_allow_interrupt(1);
    if (a->encoding == ENCQUOTEDPRINTABLE)
@@ -689,7 +688,7 @@ static size_t convert_file_to(FILE *file, const char *fromcode, int ncodes,
    struct ContentState *states = NULL;
    size_t *score = NULL;
  
-  cd1 = mutt_iconv_open("utf-8", fromcode, 0);
+  cd1 = mutt_cs_iconv_open("utf-8", fromcode, 0);
    if (cd1 == (iconv_t)(-1))
      return -1;
  
@@ -701,7 +700,7 @@ static size_t convert_file_to(FILE *file, const char *fromcode, int ncodes,
    for (int i = 0; i < ncodes; i++)
    {
      if (mutt_str_strcasecmp(tocodes[i], "utf-8") != 0)
-      cd[i] = mutt_iconv_open(tocodes[i], "utf-8", 0);
+      cd[i] = mutt_cs_iconv_open(tocodes[i], "utf-8", 0);
      else
      {
        /* Special case for conversion to UTF-8 */
author	Richard Russon <rich@flatcap.org>
	Sat, 30 Dec 2017 16:05:25 +0000 (16:05 +0000)
committer	Richard Russon <rich@flatcap.org>
	Sun, 31 Dec 2017 00:40:16 +0000 (00:40 +0000)
Makefile.autosetup		patch \| blob \| history
alias.c		patch \| blob \| history
browser.c		patch \| blob \| history
handler.c		patch \| blob \| history
hcache/hcache.c		patch \| blob \| history
hcache/hcache.h		patch \| blob \| history
history.c		patch \| blob \| history
hook.c		patch \| blob \| history
imap/utf7.c		patch \| blob \| history
init.c		patch \| blob \| history
mbyte.c		patch \| blob \| history
mutt/charset.c		patch \| blob \| history
mutt/charset.h		patch \| blob \| history
mutt_charset.c	[deleted file]	patch \| blob \| history
mutt_charset.h	[deleted file]	patch \| blob \| history
mutt_idna.c		patch \| blob \| history
muttlib.c		patch \| blob \| history
ncrypt/crypt_gpgme.c		patch \| blob \| history
ncrypt/gnupgparse.c		patch \| blob \| history
ncrypt/pgp.c		patch \| blob \| history
parse.c		patch \| blob \| history
rfc2047.c		patch \| blob \| history
rfc2231.c		patch \| blob \| history
sendlib.c		patch \| blob \| history