[unstable] Adding experimental new character set conversion code.

author Thomas Roessler <roessler@does-not-exist.org>

Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)

committer Thomas Roessler <roessler@does-not-exist.org>

Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)
author Thomas Roessler <roessler@does-not-exist.org>
Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)
committer Thomas Roessler <roessler@does-not-exist.org>
Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)
diff --git a/Makefile.am b/Makefile.am

index 1a97b51d6d392fdfe925cbcacd4ea32ff7c5ef50..d6c18775460ba951f2cafd9ed5e359f546285bca 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -52,7 +52,7 @@ ACLOCAL_AMFLAGS = -I m4
  
  LDADD = @LIBOBJS@ @INTLLIBS@
  
-SUBDIRS = doc intl m4 po rx contrib charsets
+SUBDIRS = doc intl m4 po rx contrib
  
  OPS=$(srcdir)/OPS $(srcdir)/OPS.PGP
  
diff --git a/charset.c b/charset.c

index 40d12e5b665987dd4047051f1d7bb90eee83cb19..f6dbd09d403ad551cc6e330f864741ad48061116 100644 (file)
--- a/charset.c
+++ b/charset.c
@@ -1,148 +1,664 @@
  static const char rcsid[]="$Id$";
  /*
- * Copyright (C) 1998 Ruslan Ermilov <ru@ucb.crimea.ua>,
- *                    Thomas Roessler <roessler@guug.de>
+ * Copyright (C) 1999 Thomas Roessler <roessler@guug.de>
   *
- *     This program is free software; you can redistribute it and/or modify
- *     it under the terms of the GNU General Public License as published by
- *     the Free Software Foundation; either version 2 of the License, or
- *     (at your option) any later version.
+ *     This program is free software; you can redistribute it
+ *     and/or modify it under the terms of the GNU General Public
+ *     License as published by the Free Software Foundation; either
+ *     version 2 of the License, or (at your option) any later
+ *     version.
   *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
+ *     This program is distributed in the hope that it will be
+ *     useful, but WITHOUT ANY WARRANTY; without even the implied
+ *     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *     PURPOSE.  See the GNU General Public License for more
+ *     details.
   *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, write to the Free Software
- *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *     You should have received a copy of the GNU General Public
+ *     License along with this program; if not, write to the Free
+ *     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA
+ *     02139, USA.
+ */
+
+/*
+ * This module deals with POSIX.2 character set definition files.
   */
  
-#include "mutt.h"
-#include "charset.h"
  
  #include <string.h>
  #include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#include "mutt.h"
+#include "charset.h"
+
+/* Where are character set definition files located? */
+
+#define CHARMAPS_DIR "/usr/share/i18n/charmaps"
+
+
+/* Define this if you want any dprint () statements in this code */
+
+#undef CHARSET_DEBUG
+
+#ifndef CHARSET_DEBUG
+# undef dprint
+# define dprint(a, b) (void) a
+#endif
+
+
+/* Module-global variables */
  
  static HASH *Translations = NULL;
  static HASH *Charsets = NULL;
  static HASH *CharsetAliases = NULL;
  
-static CHARSET *mutt_new_charset(void)
+/* Function Prototypes */
+
+static CHARDESC *chardesc_new (void);
+static CHARDESC *repr2descr (int repr, CHARSET * cs);
+
+static CHARMAP *charmap_new (void);
+static CHARMAP *parse_charmap_header (FILE * fp);
+static CHARSET *charset_new (size_t hash_size);
+
+static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to);
+
+static char translate_character (CHARSET * to, const char *symbol);
+
+static int load_charset (const char *filename, CHARSET ** csp, short multbyte);
+static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp);
+static int _cd_compar (const void *a, const void *b);
+
+static void canonical_charset (char *dest, size_t dlen, const char *name);
+static void chardesc_free (CHARDESC ** cdp);
+static void charmap_free (CHARMAP ** cp);
+static void charset_free (CHARSET ** csp);
+static void fix_symbol (char *symbol, CHARMAP * m);
+
+static void canonical_charset (char *dest, size_t dlen, const char *name)
  {
-  CHARSET *chs;
-  
-  chs          = safe_malloc(sizeof(CHARSET));
-  chs->map     = NULL;
-  
-  return chs;
+  int i;
+
+  if (!strncasecmp (name, "x-", 2))
+    name = name + 2;
+
+  for (i = 0; name[i] && i < dlen - 1; i++)
+  {
+    if (strchr ("_/. ", name[i]))
+      dest[i] = '-';
+    else
+      dest[i] = tolower (name[i]);
+  }
+
+  dest[i] = '\0';
  }
  
-#if 0
+static CHARSET *charset_new (size_t hash_size)
+{
+  CHARSET *cp = safe_malloc (sizeof (CHARSET));
+  short i;
  
-static void mutt_free_charset(CHARSET **chsp)
+  cp->n_symb = 256;
+  cp->u_symb = 0;
+  cp->multbyte = 1;
+  cp->symb_to_repr = hash_create (hash_size);
+  cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *));
+
+  for (i = 0; i < cp->n_symb; i++)
+    cp->description[i] = NULL;
+
+  return cp;
+}
+
+static void charset_free (CHARSET ** csp)
+{
+  CHARSET *cs = *csp;
+  size_t i;
+
+  for (i = 0; i < cs->n_symb; i++)
+    chardesc_free (&cs->description[i]);
+
+  safe_free ((void **) &cs->description);
+
+  hash_destroy (&cs->symb_to_repr, NULL);
+  safe_free ((void **) csp);
+}
+
+static CHARMAP *charmap_new (void)
  {
-  CHARSET *chs = *chsp;
+  CHARMAP *m = safe_malloc (sizeof (CHARMAP));
+
+  m->charset = NULL;
+  m->escape_char = '\\';
+  m->comment_char = '#';
+  m->multbyte = 1;
+  m->aliases = NULL;
    
-  safe_free((void **) &chs->map);
-  safe_free((void **) chsp);
+  return m;
  }
  
-#endif
+static void charmap_free (CHARMAP ** cp)
+{
+  if (!cp || !*cp)
+    return;
+
+  mutt_free_list (&(*cp)->aliases);
+  safe_free ((void **) &(*cp)->charset);
+  safe_free ((void **) cp);
  
-static void canonical_charset(char *dest, size_t dlen, const char *name)
+  return;
+}
+
+static CHARDESC *chardesc_new (void)
  {
-  int i;
-  
-  if(!mutt_strncasecmp(name, "x-", 2))
-    name = name + 2;
-  
-  for(i = 0; name[i] && i < dlen - 1; i++)
+  CHARDESC *p = safe_malloc (sizeof (CHARDESC));
+
+  p->symbol = NULL;
+  p->repr = -1;
+
+  return p;
+}
+
+static void chardesc_free (CHARDESC ** cdp)
+{
+  if (!cdp || !*cdp)
+    return;
+
+
+  safe_free ((void **) &(*cdp)->symbol);
+  safe_free ((void **) cdp);
+
+  return;
+}
+
+static CHARMAP *parse_charmap_header (FILE * fp)
+{
+  char buffer[1024];
+  char *t, *u;
+  CHARMAP *m = charmap_new ();
+
+  while (fgets (buffer, sizeof (buffer), fp))
    {
-    if(strchr("_/. ", name[i]))
-      dest[i] = '-';
+    if ((t = strchr (buffer, '\n')))
+      *t = '\0';
      else
-      dest[i] = tolower(name[i]);
+    {
+      charmap_free (&m);
+      return NULL;
+    }
+
+    if (!strncmp (buffer, "CHARMAP", 7))
+      break;
+
+    if (*buffer == m->comment_char)
+    {
+      if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias"))
+      {
+       char _tmp[SHORT_STRING];
+       while ((t = strtok(NULL, "\t, ")))
+       {
+         canonical_charset (_tmp, sizeof (_tmp), t);
+         m->aliases = mutt_add_list (m->aliases, _tmp);
+       }
+      }
+      continue;
+    }
+
+    if (!(t = strtok (buffer, "\t ")))
+      continue;
+
+    if (!(u = strtok (NULL, "\t ")))
+    {
+      charmap_free (&m);
+      return NULL;
+    }
+
+    if (!strcmp (t, "<code_set_name>"))
+    {
+      safe_free ((void **) &m->charset);
+      canonical_charset (u, strlen (u) + 1, u);
+      m->charset = safe_strdup (u);
+    }
+    else if (!strcmp (t, "<comment_char>"))
+    {
+      m->comment_char = *u;
+    }
+    else if (!strcmp (t, "<escape_char>"))
+    {
+      m->escape_char = *u;
+    }
+    else if (!strcmp (t, "<mb_cur_max>"))
+    {
+      m->multbyte = strtol (u, NULL, 0);
+    }
+  }
+
+  return m;
+}
+
+/* Properly handle escape characters within a symbol. */
+
+static void fix_symbol (char *symbol, CHARMAP * m)
+{
+  char *s, *d;
+
+  for (s = symbol, d = symbol; *s; *d++ = *s++)
+  {
+    if (*s == m->escape_char)
+      s++;
+  }
+
+  *d = *s;
+}
+
+enum
+{
+  CL_DESCR,
+  CL_END,
+  CL_COMMENT,
+  CL_ERROR
+};
+
+static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp)
+{
+  char *t, *u;
+  short n;
+  CHARDESC *descr;
+
+  if (*line == m->comment_char)
+    return CL_COMMENT;
+
+  descr = *descrp = chardesc_new ();
+
+  if (!strncmp (line, "END CHARMAP", 11))
+  {
+    chardesc_free (descrp);
+    return CL_END;
+  }
+
+  for (t = line; *t && isspace (*t); t++)
+    ;
+
+  if (*t++ != '<')
+  {
+    chardesc_free (descrp);
+    return CL_ERROR;
+  }
+
+  for (u = t; *u && *u != '>'; u++)
+  {
+    if (*u == m->escape_char && u[1])
+      u++;
+  }
+
+  if (*u != '>')
+  {
+    chardesc_free (descrp);
+    return CL_ERROR;
+  }
+
+  *u++ = '\0';
+  descr->symbol = safe_strdup (t);
+  fix_symbol (descr->symbol, m);
+
+  for (t = u; *t && isspace (*t); t++)
+    ;
+
+  for (u = t; *u && !isspace (*u); u++)
+    ;
+
+  *u++ = 0;
+  descr->repr = 0;
+
+  for (n = 0; *t == m->escape_char && n < m->multbyte; n++)
+  {
+    switch (*++t)
+    {
+    case 'x':
+      descr->repr = descr->repr * 256 + strtol (++t, &t, 16);
+      break;
+    case 'd':
+      descr->repr = descr->repr * 256 + strtol (++t, &t, 10);
+      break;
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+      descr->repr = descr->repr * 256 + strtol (t, &t, 8);
+      break;
+    default:
+      chardesc_free (descrp);
+      return CL_ERROR;
+    }
    }
+
+  if (!n)
+  {
+    chardesc_free (descrp);
+    return CL_ERROR;
+  }
+
+  return CL_DESCR;
+}
+
+static int _cd_compar (const void *a, const void *b)
+{
+  const CHARDESC *ap, *bp;
+  int i;
+
+  ap = * (CHARDESC **) a;
+  bp = * (CHARDESC **) b;
+
+  i = ap->repr - bp->repr;
+
+  dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n",
+              ap->repr, ap->symbol, bp->repr, bp->symbol, i));
    
-  dest[i] = '\0';
+  return i;
  }
  
-static CHARSET *load_charset(const char *name)
+/*
+ * Load a character set description into memory.
+ * 
+ * The multibyte parameter tells us whether we are going
+ * to accept multibyte character sets.
+ */
+
+static int load_charset (const char *filename, CHARSET ** csp, short multbyte)
  {
-  char path[_POSIX_PATH_MAX];
-  char buffer[SHORT_STRING];
-  CHARSET *chs;
-  FILE *fp = NULL;
+  CHARDESC *cd = NULL;
+  CHARSET *cs = NULL;
+  CHARMAP *m = NULL;
+  FILE *fp;
+  char buffer[1024];
    int i;
+  int rv = -1;
  
-  chs = mutt_new_charset();
+  cs = *csp = charset_new (multbyte ? 1031 : 257);
  
-  snprintf(path, sizeof(path), "%s/charsets/%s", SHAREDIR, name);
-  if((fp = fopen(path, "r")) == NULL)
-    goto bail;
+  dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename));
    
-  if(fgets(buffer, sizeof(buffer), fp) == NULL)
+  if ((fp = fopen (filename, "r")) == NULL)
+  {
+    char _filename[_POSIX_PATH_MAX];
+
+    snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename);
+    dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename));
+    
+    if ((fp = fopen (_filename, "r")) == NULL)
+    {
+      dprint (2, (debugfile, "load_charset: Failed.\n"));
+      goto bail;
+    }
+  }
+      
+  if ((m = parse_charmap_header (fp)) == NULL)
      goto bail;
-  
-  if(mutt_strcmp(buffer, CHARSET_MAGIC) != 0)
+
+  /* Don't handle multibyte character sets unless explicitly requested
+   * to do so.
+   */
+
+  if (m->multbyte > 1 && !multbyte)
+  {
+    dprint (2, (debugfile, "load_charset: m->multbyte == %d\n",
+               (int) m->multbyte));
      goto bail;
+  }
  
-  chs->map  = safe_malloc(sizeof(CHARSET_MAP));
-  
-  for(i = 0; i < 256; i++)
+  cs->multbyte = m->multbyte;
+
+  while (fgets (buffer, sizeof (buffer), fp) != NULL)
+  {
+    i = parse_charmap_line (buffer, m, &cd);
+
+    if (i == CL_END)
+      break;
+    else if (i == CL_DESCR)
+    {
+      dprint (5, (debugfile, "load_charset: Got character description: < %s > -> %x\n",
+                 cd->symbol, cd->repr));
+      hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL);
+      hash_insert (cs->symb_to_repr, cd->symbol, cd, 0);
+
+      if (!multbyte)
+      {
+       if (0 <= cd->repr && cd->repr < 256)
+       {
+         if (cs->description[cd->repr])
+           chardesc_free (&cs->description[cd->repr]);
+         else
+           cs->u_symb++;
+
+         cs->description[cd->repr] = cd;
+         cd = NULL;
+       }
+      }
+      else
+      {
+       if (cs->u_symb == cs->n_symb)
+       {
+         size_t new_size = cs->n_symb + 256;
+         size_t i;
+
+         safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *));
+         for (i = cs->u_symb; i < new_size; i++)
+           cs->description[i] = NULL;
+         cs->n_symb = new_size;
+       }
+
+       cs->description[cs->u_symb++] = cd;
+       cd = NULL;
+      }
+    }
+
+    chardesc_free (&cd);
+  }
+
+  if (multbyte)
+    qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar);
+
+  rv = 0;
+
+bail:
+  charmap_free (&m);
+  if (fp)
+    fclose (fp);
+  if (rv)
+    charset_free (csp);
+
+  return rv;
+}
+
+static CHARDESC *repr2descr (int repr, CHARSET * cs)
+{
+  size_t a, b, c;
+  short found;
+
+  if (!cs || repr < 0)
+    return NULL;
+
+  if (cs->multbyte == 1)
+  {
+    if (repr < 256)
+      return cs->description[repr];
+    else
+      return NULL;
+  }
+
+  /* So we have a multibyte mapping, i.e., Unicode.  */
+
+  /* binary search for the proper description */
+  a = 0;
+  b = cs->u_symb - 1;
+  c = 0;  /* shut up the compiler. */
+  found = 0;
+
+  while (!found && b - a > 1)
    {
-    if(fscanf(fp, "%i", &(*chs->map)[i]) != 1)
+    c = (a + b) / 2;
+
+    if (cs->description[c]->repr == repr)
      {
-      safe_free((void **) &chs->map);
+      found = 1;
        break;
      }
+    else if (cs->description[c]->repr < repr)
+      a = c;
+    else if (cs->description[c]->repr > repr)
+      b = c;
    }
  
-  bail:
-  
-  if(fp) fclose(fp);
-  return chs;
+  if (!found)
+  {
+    if (cs->description[(c = a)]->repr == repr)
+      found = 1;
+    else if (cs->description[(c = b)]->repr == repr)
+      found = 1;
+  }
+    
+  if (found)
+  {
+    dprint (5, (debugfile, "repr2descr: %x -> { %x, %s }\n",
+               repr, cs->description[c]->repr, cs->description[c]->symbol));
+    return cs->description[c];
+  }
+  else
+    dprint (5, (debugfile, "Couldn't file a symbol for %x\n",
+               repr));
+
+  return NULL;
  }
  
-static HASH *load_charset_aliases(void)
+/* Build a translation table.  If a character cannot be
+ * translated correctly, we try to find an approximation
+ * from the portable charcter set.
+ *
+ * Note that this implies the assumption that the portable
+ * character set can be used without any conversion.
+ *
+ * Should be safe on POSIX systems.
+ */
+
+static char translate_character (CHARSET * to, const char *symbol)
+{
+  CHARDESC *cdt;
+
+  if ((cdt = hash_find (to->symb_to_repr, symbol)))
+    return (char) cdt->repr;
+  else
+    return *symbol;
+}
+
+static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to)
+{
+  int i;
+  CHARSET_MAP *map;
+  CHARDESC *cd;
+
+  /* This is for 8-bit character sets. */
+
+  if (!from || !to || from->multbyte > 1 || to->multbyte > 1)
+    return NULL;
+
+  map = safe_malloc (sizeof (CHARSET_MAP));
+  for (i = 0; i < 256; i++)
+  {
+    if (!(cd = repr2descr (i, from)))
+      (*map)[i] = '?';
+    else
+      (*map)[i] = translate_character (to, cd->symbol);
+  }
+
+  return map;
+}
+
+/* Currently, just scan the various charset definition files.
+ * On the long run, we should cache this stuff in a file.
+ */
+
+static HASH *load_charset_aliases (void)
  {
-  FILE *fp;
-  char buffer[LONG_STRING];
-  char *t;
    HASH *charset_aliases;
-  
-  sprintf(buffer, "%s/charsets/charsets.alias", SHAREDIR);
-  if(!(fp = fopen(buffer, "r")))
-     return NULL;
+  CHARMAP *m;
+  DIR *dp;
+  FILE *fp;
+  struct dirent *de;
+
+  if ((dp = opendir (CHARMAPS_DIR)) == NULL)
+    return NULL;
  
    charset_aliases = hash_create(127);
-  
-  while(fgets(buffer, sizeof(buffer), fp))
+
+  while ((de = readdir (dp)))
    {
-    if((t = strchr(buffer, '\n')))
-      *t = '\0';
+    char fnbuff[_POSIX_PATH_MAX];
  
-    if(!(t = strchr(buffer, ' ')))
+    if (*de->d_name == '.')
        continue;
+
+    snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name);
+    dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff));
+    if ((fp = fopen (fnbuff, "r")) == NULL)
+      continue;
+    
+    if ((m = parse_charmap_header (fp)) != NULL)
+    {
+      LIST *lp;
+      char buffer[LONG_STRING];
+      
+      canonical_charset (buffer, sizeof (buffer), de->d_name);
+      m->aliases = mutt_add_list (m->aliases, buffer);
+
+      if (m->charset)
+       m->aliases = mutt_add_list (m->aliases, m->charset);
+      
+      for (lp = m->aliases; lp; lp = lp->next)
+      {
+       if (lp->data)
+       {
+         dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n",
+                     lp->data, de->d_name));
+         if (hash_find (charset_aliases, lp->data))
+         {
+           dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n",
+                       lp->data));
+         }
+         else
+           hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0);
+       }
+      }
+
+      charmap_free (&m);
+    }
      
-    *t++ = '\0';
-    hash_insert(charset_aliases, safe_strdup(buffer), safe_strdup(t), 1);
+    fclose (fp);
    }
-  fclose(fp);
+    
+  closedir (dp);
    return charset_aliases;
  }
  
-static void init_charsets()
+static void init_charsets ()
  {
-  if(Charsets) return;
+  if (Charsets) return;
  
-  Charsets       = hash_create(127);
-  Translations   = hash_create(127);
-  CharsetAliases = load_charset_aliases();
+  Charsets       = hash_create (127);
+  Translations   = hash_create (127);
+  CharsetAliases = load_charset_aliases ();
  }
  
-CHARSET *mutt_get_charset(const char *name)
+CHARSET *mutt_get_charset (const char *name)
  {
    CHARSET *charset;
    char buffer[SHORT_STRING];
@@ -154,41 +670,24 @@ CHARSET *mutt_get_charset(const char *name)
    init_charsets();
    canonical_charset(buffer, sizeof(buffer), name);
  
+  dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer));
+  
    if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer)))
      real_charset = buffer;
  
-  if(!(charset = hash_find(Charsets, real_charset)))
+  dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset));
+  
+  if(!(charset = hash_find (Charsets, real_charset)))
    {
-    charset = load_charset(real_charset);
-    hash_insert(Charsets, safe_strdup(real_charset), charset, 1);
+    dprint (2, (debugfile, "mutt_get_charset: Need to load.\n"));
+    if (load_charset(real_charset, &charset, 0) == 0)
+      hash_insert(Charsets, safe_strdup(real_charset), charset, 1);
+    else
+      charset = NULL;
    }
    return charset;
  }
  
-static int translate_char(CHARSET_MAP *to, int ch)
-{
-  int i;
-
-  if (ch == -1) return '?';
-  for (i = 0; i < 256; i++)
-  {
-    if ((*to)[i] == ch) 
-      return i;
-  }
-  return '?';
-}
-
-CHARSET_MAP *build_translation(CHARSET_MAP *from, CHARSET_MAP *to)
-{
-  int i;
-  CHARSET_MAP *map = safe_malloc(sizeof(CHARSET_MAP));
-
-  for(i = 0; i < 256; i++)
-    (*map)[i] = translate_char(to, (*from)[i]);
-
-  return map;
-}
-
  CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to)
  {
    char from_canon[SHORT_STRING];
@@ -221,11 +720,8 @@ CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to)
      from_cs = mutt_get_charset(from);
      to_cs   = mutt_get_charset(to);
  
-    if(!from_cs->map || !to_cs->map)
-      return NULL;
-    
-    if((map = build_translation(from_cs->map, to_cs->map)))
-       hash_insert(Translations, safe_strdup(key), map, 1);
+    if((map = build_translation(from_cs, to_cs)))
+      hash_insert(Translations, safe_strdup(key), map, 1);
    }
    return map;
  }
@@ -331,35 +827,34 @@ static char *utf_to_unicode(int *out, char *in)
    return in + um->len;
  }
  
+static CHARSET *Unicode = NULL;
+
  void mutt_decode_utf8_string(char *str, CHARSET *chs)
  {
    char *s, *t;
-  int ch, i;
-  CHARSET_MAP *map = NULL;
+  CHARDESC *cd;
+  int ch;
+
+  /* Hack */
    
-  if(chs)
-    map = chs->map;
+  if (!Unicode)
+  {
+    if (load_charset ("ISO_10646", &Unicode, 1) == -1)
+      Unicode = NULL;
+  }
    
-  for( s = t = str; *t; s++)
+  for (s = t = str; *t; s++)
    {
      t = utf_to_unicode(&ch, t);
  
-    if(!map)
-    {
-      *s = (char) ch;
-    }
+    /* handle us-ascii characters directly */
+    if (0 <= ch && ch < 128)
+      *s = ch;
+    else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (chs, cd->symbol)) != -1)
+      *s = ch;
      else
-    {
-      for(i = 0, *s = '\0'; i < 256; i++)
-      {
-       if((*map)[i] == ch)
-       {
-         *s = i;
-         break;
-       }
-      }
-    }
-      
+      *s = '?';
+
      if(!*s) *s = '?';
    }
    
diff --git a/charset.h b/charset.h

index fd67cf326eb677059dfaa82de5c79a0d164f7891..5a254768e4ff5baae98f2653097ae8451f09a451 100644 (file)
--- a/charset.h
+++ b/charset.h
@@ -1,6 +1,6 @@
  /* $Id$ */
  /*
- * Copyright (C) 1998 Ruslan Ermilov <ru@ucb.crimea.ua>
+ * Copyright (C) 1999 Thomas Roessler <roessler@guug.de>
   *
   *     This program is free software; you can redistribute it and/or modify
   *     it under the terms of the GNU General Public License as published by
@@ -26,13 +26,34 @@
  
  typedef int CHARSET_MAP[256];
  
-typedef struct 
+typedef struct descr
  {
-  CHARSET_MAP *map;
-} 
+  char *symbol;
+  int repr;
+}
+CHARDESC;
+
+typedef struct
+{
+  char *charset;
+  char escape_char;
+  char comment_char;
+  short multbyte;
+  LIST *aliases;
+}
+CHARMAP;
+
+typedef struct
+{
+  size_t n_symb;
+  size_t u_symb;
+
+  short multbyte;
+  HASH *symb_to_repr;
+  CHARDESC **description;
+}
  CHARSET;
  
-#define mutt_unicode_char(cm,ch) (!cm ? -1 : (*cm)[ch])
  
  CHARSET *mutt_get_charset(const char *);
  CHARSET_MAP *mutt_get_translation(const char *, const char *);
diff --git a/configure.in b/configure.in

index df89518ea1f59acb5018abe7da88b03e2e7c3215..6d1662b9905b1c2e6ad35790f445e51cfcf47a7b 100644 (file)
--- a/configure.in
+++ b/configure.in
@@ -470,4 +470,4 @@ MUTTLOCALEDIR=$mutt_cv_prefix/$DATADIRNAME/locale
  AC_SUBST(MUTTLOCALEDIR)
  AC_DEFINE_UNQUOTED(MUTTLOCALEDIR, "$MUTTLOCALEDIR")
  
-AC_OUTPUT(Makefile intl/Makefile m4/Makefile po/Makefile.in rx/Makefile Muttrc doc/Makefile doc/manual.sgml doc/dotlock.man doc/mutt.man charsets/Makefile contrib/Makefile)
+AC_OUTPUT(Makefile intl/Makefile m4/Makefile po/Makefile.in rx/Makefile Muttrc doc/Makefile doc/manual.sgml doc/dotlock.man doc/mutt.man contrib/Makefile)
diff --git a/main.c b/main.c

index 7e7db55e69b639bd1b72c26af3ffe40c0adcce38..73220fb93edf5e4a78b6d9b3a0acf77aff3007dc 100644 (file)
--- a/main.c
+++ b/main.c
@@ -1,6 +1,7 @@
  static const char rcsid[]="$Id$";
  /*
   * Copyright (C) 1996-8 Michael R. Elkins <me@cs.hmc.edu>
+ * Copyright (C) 1999   Thomas Roessler <roessler@guug.de>
   * 
   *     This program is free software; you can redistribute it and/or modify
   *     it under the terms of the GNU General Public License as published by
@@ -41,16 +42,15 @@ const char ReachingUs[] = N_("\
  To contact the developers, please mail to <mutt-dev@mutt.org>.\n");
  
  const char Notice[] = N_("\
-Copyright (C) 1996-8 Michael R. Elkins and others.\n\
+Copyright (C) 1996-9 Michael R. Elkins and others.\n\
  Mutt comes with ABSOLUTELY NO WARRANTY; for details type `mutt -vv'.\n\
  Mutt is free software, and you are welcome to redistribute it\n\
  under certain conditions; type `mutt -vv' for details.\n");
  
  const char Copyright[] = N_("\
-Copyright (C) 1996-8 Michael R. Elkins <me@cs.hmc.edu>\n\
-Copyright (C) 1997-8 Thomas Roessler <roessler@guug.de>\n\
-Copyright (C) 1998   Werner Koch <wk@isil.d.shuttle.de>\n\
-Copyright (C) 1998   Ruslan Ermilov <ru@ucb.crimea.ua>\n\
+Copyright (C) 1996-9 Michael R. Elkins <me@cs.hmc.edu>\n\
+Copyright (C) 1997-9 Thomas Roessler <roessler@guug.de>\n\
+Copyright (C) 1998-9 Werner Koch <wk@isil.d.shuttle.de>\n\
  \n\
  Lots of others not mentioned here contributed lots of code,\n\
  fixes, and suggestions.\n\
diff --git a/mutt.h b/mutt.h

index b4271ac550e9962bb9e19bf2defdde0adf5f5354..a5c902a9826ba7201bdce3e8f6060fdefca72d80 100644 (file)
--- a/mutt.h
+++ b/mutt.h
@@ -464,7 +464,6 @@ typedef struct content
    unsigned int binary : 1; /* long lines, or CR not in CRLF pair */
    unsigned int from : 1;   /* has a line beginning with "From "? */
    unsigned int dot : 1;    /* has a line consisting of a single dot? */
-  unsigned int nonasc : 1; /* has unicode characters out of ASCII range */
  } CONTENT;
  
  typedef struct body
diff --git a/sendlib.c b/sendlib.c

index 3ceb65b4d4b1bac0c125f5eccddb77a52cb7ad2b..a44e6e6855f90533f1f2f8db5a9eaee1b57360a1 100644 (file)
--- a/sendlib.c
+++ b/sendlib.c
@@ -554,7 +554,6 @@ static CONTENT *mutt_get_content_info (const char *fname)
  {
    CONTENT *info;
    FILE *fp;
-  CHARSET_MAP *cm;
    int ch, from=0, whitespace=0, dot=0, linelen=0;
  
    if ((fp = fopen (fname, "r")) == NULL)
@@ -564,12 +563,6 @@ static CONTENT *mutt_get_content_info (const char *fname)
      return (NULL);
    }
  
-  {
-    CHARSET *cs;
-
-    cm = (cs = mutt_get_charset(Charset)) ? cs->map : 0;
-  }
-
    info = safe_calloc (1, sizeof (CONTENT));
    while ((ch = fgetc (fp)) != EOF)
    {
@@ -642,8 +635,7 @@ static CONTENT *mutt_get_content_info (const char *fname)
        if (ch == ' ') whitespace++;
        info->ascii++;
      }
-    if (cm && mutt_unicode_char (cm, ch) & -128)
-      info->nonasc = 1;
+
      if (linelen > 1) dot = 0;
      if (ch != ' ' && ch != '\t') whitespace = 0;
    }
@@ -748,15 +740,11 @@ static int lookup_mime_type (char *d, const char *s)
  
  static char *set_text_charset (CONTENT *info)
  {
-  CHARSET *cs;
-
-  /* if charset is unknown assume low bytes are ascii compatible */
-
    if ((Charset == NULL || mutt_strcasecmp (Charset, "us-ascii") == 0)
        && info->hibin)
      return ("unknown-8bit");
  
-  if (((cs = mutt_get_charset (Charset)) && cs->map) ? info->nonasc : info->hibin)
+  if (info->hibin)
      return (Charset);
  
    return ("us-ascii");
author	Thomas Roessler <roessler@does-not-exist.org>
	Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)
committer	Thomas Roessler <roessler@does-not-exist.org>
	Tue, 5 Jan 1999 14:35:11 +0000 (14:35 +0000)
Makefile.am		patch \| blob \| history
charset.c		patch \| blob \| history
charset.h		patch \| blob \| history
configure.in		patch \| blob \| history
main.c		patch \| blob \| history
mutt.h		patch \| blob \| history
sendlib.c		patch \| blob \| history