updated for version 7.0063
authorBram Moolenaar <Bram@vim.org>
Tue, 22 Mar 2005 22:54:12 +0000 (22:54 +0000)
committerBram Moolenaar <Bram@vim.org>
Tue, 22 Mar 2005 22:54:12 +0000 (22:54 +0000)
runtime/doc/Makefile
runtime/doc/vim.1
src/Make_bc5.mak
src/Make_cyg.mak
src/normal.c
src/proto/charset.pro
src/spell.c

index 6eca937cb93a1b0e800334bc503af88f5a93f106..1c2959de5f0cf5f04c8d55d3b6358c41bec8cdba 100644 (file)
@@ -381,31 +381,31 @@ os_win32.txt:
        touch os_win32.txt
 
 vim-it.UTF-8.1: vim-it.1
-       iconv -f latin1 -t utf-8 $< >$@
+       iconv -f latin1 -t utf-8 $> >$@
 
 evim-it.UTF-8.1: evim-it.1
-       iconv -f latin1 -t utf-8 $< >$@
+       iconv -f latin1 -t utf-8 $> >$@
 
 vimdiff-it.UTF-8.1: vimdiff-it.1
-       iconv -f latin1 -t utf-8 $< >$@
+       iconv -f latin1 -t utf-8 $> >$@
 
 vimtutor-it.UTF-8.1: vimtutor-it.1
-       iconv -f latin1 -t utf-8 $< >$@
+       iconv -f latin1 -t utf-8 $> >$@
 
 xxd-it.UTF-8.1: xxd-it.1
-       iconv -f latin1 -t utf-8 $< >$@
+       iconv -f latin1 -t utf-8 $> >$@
 
 vim-ru.UTF-8.1: vim-ru.1
-       iconv -f KOI8-R -t utf-8 $< >$@
+       iconv -f KOI8-R -t utf-8 $> >$@
 
 evim-ru.UTF-8.1: evim-ru.1
-       iconv -f KOI8-R -t utf-8 $< >$@
+       iconv -f KOI8-R -t utf-8 $> >$@
 
 vimdiff-ru.UTF-8.1: vimdiff-ru.1
-       iconv -f KOI8-R -t utf-8 $< >$@
+       iconv -f KOI8-R -t utf-8 $> >$@
 
 vimtutor-ru.UTF-8.1: vimtutor-ru.1
-       iconv -f KOI8-R -t utf-8 $< >$@
+       iconv -f KOI8-R -t utf-8 $> >$@
 
 xxd-ru.UTF-8.1: xxd-ru.1
-       iconv -f KOI8-R -t utf-8 $< >$@
+       iconv -f KOI8-R -t utf-8 $> >$@
index b98a96e2c826aa08b4f9145194e430e08ca01d6e..d78bf2efabe72f0089ad465a0d99a33ae8263288 100644 (file)
@@ -465,7 +465,7 @@ Type ":help" in
 to get started.
 Type ":help subject" to get help on a specific subject.
 For example: ":help ZZ" to get help for the "ZZ" command.
-Use <Tab> and CTRL-D to complete subjects (":help cmdline\-completion").
+Use <Tab> and CTRL\-D to complete subjects (":help cmdline\-completion").
 Tags are present to jump from one place to another (sort of hypertext links,
 see ":help").
 All documentation files can be viewed in this way, for example
index 25145d94dc78ac0a369034d4688832bcdea99711..195d79f9c185e86f3a562c02447edebd5a0b45fe 100644 (file)
@@ -564,6 +564,7 @@ vimobj =  \
        $(OBJDIR)\regexp.obj \
        $(OBJDIR)\screen.obj \
        $(OBJDIR)\search.obj \
+       $(OBJDIR)\spell.obj \
        $(OBJDIR)\syntax.obj \
        $(OBJDIR)\tag.obj \
        $(OBJDIR)\term.obj \
index 24a52ce8aa64732e0e82912a12b883dfb947002f..51e36189a8b3ad3238c745f6d8b31991faf01610 100644 (file)
@@ -1,6 +1,6 @@
 #
 # Makefile for VIM on Win32, using Cygnus gcc
-# Last updated by Dan Sharp.  Last Change: 2005 Jan 29
+# Last updated by Dan Sharp.  Last Change: 2005 Mar 21
 #
 # Also read INSTALLpc.txt!
 #
@@ -424,6 +424,7 @@ OBJ = \
        $(OUTDIR)/regexp.o \
        $(OUTDIR)/screen.o \
        $(OUTDIR)/search.o \
+       $(OUTDIR)/spell.o \
        $(OUTDIR)/syntax.o \
        $(OUTDIR)/tag.o \
        $(OUTDIR)/term.o \
index 458b8155eaa05cfbbb53cccff5dc82af9005d6f0..8780b954d6f7c7d8f928a9d21153918d2fd502a1 100644 (file)
@@ -3874,6 +3874,7 @@ check_scrollbind(topline_diff, leftcol_diff)
 nv_ignore(cap)
     cmdarg_T   *cap;
 {
+    cap->retval |= CA_COMMAND_BUSY;    /* don't call edit() now */
 }
 
 /*
@@ -8675,6 +8676,7 @@ nv_cursorhold(cap)
 {
     apply_autocmds(EVENT_CURSORHOLD, NULL, NULL, FALSE, curbuf);
     did_cursorhold = TRUE;
+    cap->retval |= CA_COMMAND_BUSY;    /* don't call edit() now */
 }
 #endif
 
index afc8562cec064fb7170f278db469a647fe8f8795..a3d4c145d0fdc0d3eefe1171068d9cfc8307f1d7 100644 (file)
@@ -20,6 +20,8 @@ int vim_isIDc __ARGS((int c));
 int vim_iswordc __ARGS((int c));
 int vim_iswordp __ARGS((char_u *p));
 int vim_iswordc_buf __ARGS((char_u *p, buf_T *buf));
+void init_spell_chartab __ARGS((void));
+int spell_iswordc __ARGS((char_u *p));
 int vim_isfilec __ARGS((int c));
 int vim_isprintc __ARGS((int c));
 int vim_isprintc_strict __ARGS((int c));
index a167ad1f12ff38ca6a532bb90b1ddcc6e94b0476..c92eb65bf162fd2a83bf2278b8fa4c5cebba4184 100644 (file)
@@ -9,6 +9,11 @@
 
 /*
  * spell.c: code for spell checking
+ *
+ * Terminology:
+ * "dword" is a dictionary word, made out of letters and digits.
+ * "nword" is a word with a character that's not a letter or digit.
+ * "word"  is either a "dword" or an "nword".
  */
 
 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
@@ -23,6 +28,8 @@
 # include <fcntl.h>
 #endif
 
+#define MAXWLEN 100            /* assume max. word len is this many bytes */
+
 /*
  * Structure that is used to store the text from the language file.  This
  * avoids the need to allocate each individual word and copying it.  It's
@@ -36,24 +43,67 @@ struct sblock_S
     char_u     sb_data[1];     /* data, actually longer */
 };
 
+/* Structure to store words and additions.  Used twice : once for case-folded
+ * and once for keep-case words. */
+typedef struct winfo_S
+{
+    hashtab_T  wi_ht;          /* hashtable with all words, both dword_T and
+                                  nword_T (check flags for DW_NWORD) */
+    garray_T   wi_add;         /* table with pointers to additions in a
+                                  dword_T */
+    int                wi_addlen;      /* longest addition length */
+} winfo_T;
+
 /*
  * Structure used to store words and other info for one language.
  */
 typedef struct slang_S slang_T;
-
 struct slang_S
 {
     slang_T    *sl_next;       /* next language */
     char_u     sl_name[2];     /* language name "en", "nl", etc. */
-    hashtab_T  sl_ht;          /* hashtable with all words */
-    garray_T   sl_match;       /* table with pointers to matches */
-    garray_T   sl_add;         /* table with pointers to additions */
-    char_u     sl_regions[13]; /* table with up to 6 region names */
+    winfo_T    sl_fwords;      /* case-folded words and additions */
+    winfo_T    sl_kwords;      /* keep-case words and additions */
+    char_u     sl_regions[17]; /* table with up to 8 region names plus NUL */
     sblock_T   *sl_block;      /* list with allocated memory blocks */
 };
 
 static slang_T *first_lang = NULL;
 
+/* Entry for dword in "sl_ht".  Also used for part of an nword, starting with
+ * the first non-word character.  And used for additions in wi_add. */
+typedef struct dword_S
+{
+    char_u     dw_region;      /* one bit per region where it's valid */
+    char_u     dw_flags;       /* WF_ flags */
+    char_u     dw_word[1];     /* actually longer, NUL terminated */
+} dword_T;
+
+#define REGION_ALL 0xff
+
+#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
+
+/* Entry for a nword in "sl_ht".  Note that the last three items must be
+ * identical to dword_T, so that they can be in the same hashtable. */
+typedef struct nword_S
+{
+    garray_T   nw_ga;          /* table with pointers to dword_T for part
+                                  starting with non-word character */
+    int                nw_maxlen;      /* longest nword length (after the dword) */
+    char_u     nw_region;      /* one bit per region where it's valid */
+    char_u     nw_flags;       /* WF_ flags */
+    char_u     nw_word[1];     /* actually longer, NUL terminated */
+} nword_T;
+
+/* Get nword_T pointer from hashitem that uses nw_word */
+static nword_T dumnw;
+#define HI2NWORD(hi)   ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
+
+#define DW_CAP     0x01        /* word must start with capital */
+#define DW_RARE            0x02        /* rare word */
+#define DW_NWORD    0x04       /* this is an nword_T */
+#define DW_DWORD    0x08       /* (also) use as dword without nword */
+
 /*
  * Structure used in "b_langp", filled from 'spelllang'.
  */
@@ -64,25 +114,15 @@ typedef struct langp_S
 } langp_T;
 
 #define LANGP_ENTRY(ga, i)     (((langp_T *)(ga).ga_data) + (i))
-#define MATCH_ENTRY(gap, i)    *(((char_u **)(gap)->ga_data) + i)
-
-/*
- * The byte before a word in the hashtable indicates the type of word.
- * Also used for the byte just before a match.
- * The top two bits are used to indicate rare and case-sensitive words.
- * The lower bits are used to indicate the region in which the word is valid.
- * Words valid in all regions use REGION_ALL.
- */
-#define REGION_MASK    0x3f
-#define REGION_ALL     0x3f
-#define CASE_MASK      0x40
-#define RARE_MASK      0x80
+#define DWORD_ENTRY(gap, i)    *(((dword_T **)(gap)->ga_data) + i)
 
 #define SP_OK          0
 #define SP_BAD         1
 #define SP_RARE                2
 #define SP_LOCAL       3
 
+static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
+
 static slang_T *spell_load_lang __ARGS((char_u *lang));
 static void spell_load_file __ARGS((char_u *fname));
 static int find_region __ARGS((char_u *rp, char_u *region));
@@ -102,19 +142,34 @@ spell_check(wp, ptr, attrp)
     char_u     *ptr;
     int                *attrp;
 {
-    char_u     *e;
+    char_u     *e;             /* end of word */
+    char_u     *ne;            /* new end of word */
+    char_u     *me;            /* max. end of match */
     langp_T    *lp;
     int                result;
     int                len = 0;
-    hash_T     hash;
     hashitem_T *hi;
-    int                c;
-#define MAXWLEN 80     /* assume max. word len is 80 */
-    char_u     word[MAXWLEN + 1];
+    int                round;
+    char_u     kword[MAXWLEN + 1];     /* word copy */
+    char_u     fword[MAXWLEN + 1];     /* word with case folded */
+    char_u     match[MAXWLEN + 1];     /* fword with additional chars */
+    char_u     kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
+    char_u     fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
+    char_u     *clen;
+    int                cidx = 0;               /* char index in xwordclen[] */
+    hash_T     fhash;                  /* hash for fword */
+    hash_T     khash;                  /* hash for kword */
+    int                match_len = 0;          /* length of match[] */
+    int                fmatch_len = 0;         /* length of nword match in chars */
     garray_T   *gap;
-    int                l, h, t;
-    char_u     *p;
+    int                l, t;
+    char_u     *p, *tp;
     int                n;
+    dword_T    *dw;
+    dword_T    *tdw;
+    winfo_T    *wi;
+    nword_T    *nw;
+    int                w_isupper;
 
     /* Find the end of the word.  We already know that *ptr is a word char. */
     e = ptr;
@@ -122,119 +177,265 @@ spell_check(wp, ptr, attrp)
     {
        mb_ptr_adv(e);
        ++len;
-    } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));
+    } while (*e != NUL && spell_iswordc(e));
+
+    /* A word starting with a number is always OK. */
+    if (*ptr >= '0' && *ptr <= '9')
+       return (int)(e - ptr);
+
+#ifdef FEAT_MBYTE
+    w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
+#else
+    w_isupper = MB_ISUPPER(*ptr);
+#endif
+
+    /* Make a copy of the word so that it can be NUL terminated.
+     * Compute hash value. */
+    mch_memmove(kword, ptr, e - ptr);
+    kword[e - ptr] = NUL;
+    khash = hash_hash(kword);
+
+    /* Make case-folded copy of the Word.  Compute its hash value. */
+    (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
+    fhash = hash_hash(fword);
+
+    /* Further case-folded characters to check for an nword match go in
+     * match[]. */
+    me = e;
+
+    /* "ne" is the end for the longest match */
+    ne = e;
 
     /* The word is bad unless we find it in the dictionary. */
     result = SP_BAD;
 
-    /* Words are always stored with folded case. */
-    (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
-    hash = hash_hash(word);
-
     /*
      * Loop over the languages specified in 'spelllang'.
-     * We check them all, because a match may find a longer word.
+     * We check them all, because a matching nword may be longer than an
+     * already found dword or nword.
      */
-    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
-                                                                    ++lp)
+    for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
     {
-       /* Check words when it wasn't recognized as a good word yet. */
-       if (result != SP_OK)
+       /*
+        * Check for a matching word in the hashtable.
+        * Check both the keep-case word and the fold-case word.
+        */
+       for (round = 0; round <= 1; ++round)
        {
-           /* Word lookup.  Using a hash table is fast. */
-           hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
+           if (round == 0)
+           {
+               wi = &lp->lp_slang->sl_kwords;
+               hi = hash_lookup(&wi->wi_ht, kword, khash);
+           }
+           else
+           {
+               wi = &lp->lp_slang->sl_fwords;
+               hi = hash_lookup(&wi->wi_ht, fword, fhash);
+           }
            if (!HASHITEM_EMPTY(hi))
            {
-               /* The character before the key indicates the type of word. */
-               c = hi->hi_key[-1];
-               if ((c & CASE_MASK) != 0)
+               /*
+                * If this is an nword entry, check for match with remainder.
+                */
+               dw = HI2DWORD(hi);
+               if (dw->dw_flags & DW_NWORD)
                {
-                   /* Need to check first letter is uppercase.  If it is,
-                    * check region.  If it isn't it may be a rare word. */
-                   if (
+                   /* If the word is not defined as a dword we must find an
+                    * nword. */
+                   if ((dw->dw_flags & DW_DWORD) == 0)
+                       dw = NULL;
+
+                   /* Fold more characters when needed for the nword.  Need
+                    * to do one extra to check for a non-word character after
+                    * the nword.  Also keep the byte-size of each character,
+                    * both before and after folding case. */
+                   nw = HI2NWORD(hi);
+                   while ((round == 0
+                               ? me - e <= nw->nw_maxlen
+                               : match_len <= nw->nw_maxlen)
+                           && *me != NUL)
+                   {
 #ifdef FEAT_MBYTE
-                           MB_ISUPPER(mb_ptr2char(ptr))
+                       l = mb_ptr2len_check(me);
 #else
-                           MB_ISUPPER(*ptr)
+                       l = 1;
 #endif
-                           )
+                       (void)str_foldcase(me, l, match + match_len,
+                                                    MAXWLEN - match_len + 1);
+                       me += l;
+                       kwordclen[cidx] = l;
+                       fwordclen[cidx] = STRLEN(match + match_len);
+                       match_len += fwordclen[cidx];
+                       ++cidx;
+                   }
+
+                   if (round == 0)
                    {
-                       if ((c & lp->lp_region) == 0)
-                           result = SP_LOCAL;
-                       else
-                           result = SP_OK;
+                       clen = kwordclen;
+                       tp = e;
                    }
-                   else if (c & RARE_MASK)
-                       result = SP_RARE;
-               }
-               else
-               {
-                   if ((c & lp->lp_region) == 0)
-                       result = SP_LOCAL;
-                   else if (c & RARE_MASK)
-                       result = SP_RARE;
                    else
-                       result = SP_OK;
+                   {
+                       clen = fwordclen;
+                       tp = match;
+                   }
+
+                   /* Match with each item.  The longest match wins:
+                    * "you've" is longer than "you". */
+                   gap = &nw->nw_ga;
+                   for (t = 0; t < gap->ga_len; ++t)
+                   {
+                       /* Skip entries with wrong case for first char.
+                        * Continue if it's a rare word without a captial. */
+                       tdw = DWORD_ENTRY(gap, t);
+                       if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
+                                                               && !w_isupper)
+                           continue;
+
+                       p = tdw->dw_word;
+                       l = 0;
+                       for (n = 0; p[n] != 0; n += clen[l++])
+                           if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+                               break;
+
+                       /* Use a match if it's longer than previous matches
+                        * and the next character is not a word character. */
+                       if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
+                                                  || !spell_iswordc(tp + n)))
+                       {
+                           dw = tdw;
+                           fmatch_len = l;
+                           if (round == 0)
+                               ne = tp + n;
+                           else
+                           {
+                               /* Need to use the length of the original
+                                * chars, not the fold-case ones. */
+                               ne = e;
+                               for (l = 0; l < fmatch_len; ++l)
+                                   ne += kwordclen[l];
+                           }
+                           if ((lp->lp_region & tdw->dw_region) == 0)
+                               result = SP_LOCAL;
+                           else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
+                               result = SP_RARE;
+                           else
+                               result = SP_OK;
+                       }
+                   }
+
                }
-           }
-       }
 
-       /* Match lookup.  Uses a binary search.  If there is a match adjust
-        * "e" to the end.  This is also done when a word matched, because
-        * "you've" is longer than "you". */
-       gap = &lp->lp_slang->sl_match;
-       l = 0;                  /* low index */
-       h = gap->ga_len - 1;    /* high index */
-       /* keep searching, the match must be between "l" and "h" (inclusive) */
-       while (h >= l)
-       {
-           t = (h + l) / 2;
-           p = MATCH_ENTRY(gap, t) + 1;
-           for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
-               ;
-           if (p[n] == 0)
-           {
-               if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer)))
+               if (dw != NULL)
                {
-                   /* match! */
-                   e = ptr + n;
-                   if (result != SP_OK)
+                   if (dw->dw_flags & DW_CAP)
                    {
-                       if ((lp->lp_region & p[-1]) == 0)
+                       /* Need to check first letter is uppercase.  If it is,
+                        * check region.  If it isn't it may be a rare word.
+                        * */
+                       if (w_isupper)
+                       {
+                           if ((dw->dw_region & lp->lp_region) == 0)
+                               result = SP_LOCAL;
+                           else
+                               result = SP_OK;
+                       }
+                       else if (dw->dw_flags & DW_RARE)
+                           result = SP_RARE;
+                   }
+                   else
+                   {
+                       if ((dw->dw_region & lp->lp_region) == 0)
                            result = SP_LOCAL;
+                       else if (dw->dw_flags & DW_RARE)
+                           result = SP_RARE;
                        else
                            result = SP_OK;
                    }
-                   break;
                }
-               /* match is too short, next item is new low index */
-               l = t + 1;
            }
-           else if (p[n] < ptr[n])
-               /* match is before word, next item is new low index */
-               l = t + 1;
-           else
-               /* match is after word, previous item is new high index */
-               h = t - 1;
        }
 
-       /* Addition lookup.  Uses a linear search, there should be very few.
-        * If there is a match adjust "e" to the end.  This doesn't change
-        * whether a word was good or bad, only the length. */
-       gap = &lp->lp_slang->sl_add;
-       for (t = 0; t < gap->ga_len; ++t)
-       {
-           p = MATCH_ENTRY(gap, t) + 1;
-           for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
-               ;
-           if (p[n] == 0
-                     && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer)))
+       /*
+        * Check for an addition.
+        * Only after a dword, not after an nword.
+        * Check both the keep-case word and the fold-case word.
+        */
+       if (fmatch_len == 0)
+           for (round = 0; round <= 1; ++round)
            {
-               /* match */
-               e += n;
-               break;
+               if (round == 0)
+                   wi = &lp->lp_slang->sl_kwords;
+               else
+                   wi = &lp->lp_slang->sl_fwords;
+               gap = &wi->wi_add;
+               if (gap->ga_len == 0)   /* no additions, skip quickly */
+                   continue;
+
+               /* Fold characters when needed for the addition.  Need to do one
+                * extra to check for a word character after the addition. */
+               while ((round == 0
+                           ? me - e <= wi->wi_addlen
+                           : match_len <= wi->wi_addlen)
+                       && *me != NUL)
+               {
+#ifdef FEAT_MBYTE
+                   l = mb_ptr2len_check(me);
+#else
+                   l = 1;
+#endif
+                   (void)str_foldcase(me, l, match + match_len,
+                                                        MAXWLEN - match_len + 1);
+                   me += l;
+                   kwordclen[cidx] = l;
+                   fwordclen[cidx] = STRLEN(match + match_len);
+                   match_len += fwordclen[cidx];
+                   ++cidx;
+               }
+
+               if (round == 0)
+               {
+                   clen = kwordclen;
+                   tp = e;
+               }
+               else
+               {
+                   clen = fwordclen;
+                   tp = match;
+               }
+
+               /* Addition lookup.  Uses a linear search, there should be
+                * very few.  If there is a match adjust "ne" to the end.
+                * This doesn't change whether a word was good or bad, only
+                * the length. */
+               for (t = 0; t < gap->ga_len; ++t)
+               {
+                   tdw = DWORD_ENTRY(gap, t);
+                   p = tdw->dw_word;
+                   l = 0;
+                   for (n = 0; p[n] != 0; n += clen[l++])
+                       if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+                           break;
+
+                   /* Use a match if it's longer than previous matches
+                    * and the next character is not a word character. */
+                   if (p[n] == 0 && l > fmatch_len
+                                   && (tp[n] == 0 || !spell_iswordc(tp + n)))
+                   {
+                       fmatch_len = l;
+                       if (round == 0)
+                           ne = tp + n;
+                       else
+                       {
+                           /* Need to use the length of the original
+                            * chars, not the fold-case ones. */
+                           ne = e;
+                           for (l = 0; l < fmatch_len; ++l)
+                               ne += kwordclen[l];
+                       }
+                   }
+               }
            }
-       }
     }
 
     if (result != SP_OK)
@@ -247,7 +448,7 @@ spell_check(wp, ptr, attrp)
            *attrp = highlight_attr[HLF_SPL];
     }
 
-    return (int)(e - ptr);
+    return (int)(ne - ptr);
 }
 
 static slang_T     *load_lp;   /* passed from spell_load_lang() to
@@ -264,15 +465,19 @@ spell_load_lang(lang)
     char_u     fname_enc[80];
     char_u     fname_ascii[20];
     char_u     *p;
+    int                r;
 
     lp = (slang_T *)alloc(sizeof(slang_T));
     if (lp != NULL)
     {
        lp->sl_name[0] = lang[0];
        lp->sl_name[1] = lang[1];
-       hash_init(&lp->sl_ht);
-       ga_init2(&lp->sl_match, sizeof(char_u *), 20);
-       ga_init2(&lp->sl_add, sizeof(char_u *), 4);
+       hash_init(&lp->sl_fwords.wi_ht);
+       ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4);
+       lp->sl_fwords.wi_addlen = 0;
+       hash_init(&lp->sl_kwords.wi_ht);
+       ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4);
+       lp->sl_kwords.wi_addlen = 0;
        lp->sl_regions[0] = NUL;
        lp->sl_block = NULL;
 
@@ -286,17 +491,20 @@ spell_load_lang(lang)
            p = (char_u *)"latin1";
        load_lp = lp;
        sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
-       if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
+       r = do_in_runtimepath(fname_enc, TRUE, spell_load_file);
+       if (r == FAIL)
        {
            /* Try again to find an ASCII spell file. */
            sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
-           if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
-           {
-               vim_free(lp);
-               lp = NULL;
-               smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
+           r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file);
+       }
+
+       if (r == FAIL)
+       {
+           vim_free(lp);
+           lp = NULL;
+           smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
                                                               fname_enc + 6);
-           }
        }
        else
        {
@@ -319,229 +527,398 @@ spell_load_file(fname)
     int                fd;
     size_t     len;
     size_t     l;
-    size_t     rest = 0;
     char_u     *p = NULL, *np;
-    sblock_T   *bl;
+    sblock_T   *bl = NULL;
+    int                bl_used = 0;
+    size_t     rest = 0;
+    char_u     *rbuf;          /* read buffer */
+    char_u     *rbuf_end;      /* past last valid char in "rbuf" */
     hash_T     hash;
     hashitem_T *hi;
     int                c;
+    int                cc;
     int                region = REGION_ALL;
-    char_u     word[MAXWLEN + 1];
-    int                n;
+    int                wlen;
+    winfo_T    *wi;
+    dword_T    *dw, *edw;
+    nword_T    *nw = NULL;
+    int                flags;
+    char_u     *save_sourcing_name = sourcing_name;
+    linenr_T   save_sourcing_lnum = sourcing_lnum;
+
+    rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1));
+    if (rbuf == NULL)
+       return;
 
     fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
     if (fd < 0)
     {
        EMSG2(_(e_notopen), fname);
-       return;
+       goto theend;
     }
 
+    sourcing_name = fname;
+    sourcing_lnum = 0;
+
     /* Get the length of the whole file. */
     len = lseek(fd, (off_t)0, SEEK_END);
     lseek(fd, (off_t)0, SEEK_SET);
 
-    /* Loop, reading the file one block at a time.
+    /*
+     * Read the file one block at a time.
      * "rest" is the length of an incomplete line at the previous block.
-     * "p" points to the remainder. */
+     * "p" points to the remainder.
+     */
     while (len > 0)
     {
-       /* Allocate a block of memory to store the info in.  This is not freed
-        * until spell_reload() is called. */
+       /* Read a block from the file.  Prepend the remainder of the previous
+        * block, if any. */
+       if (rest > 0)
+       {
+           if (rest > MAXWLEN)     /* truncate long line (should be comment) */
+               rest = MAXWLEN;
+           mch_memmove(rbuf, p, rest);
+           --sourcing_lnum;
+       }
        if (len > SBLOCKSIZE)
            l = SBLOCKSIZE;
        else
            l = len;
        len -= l;
-       bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
-       if (bl == NULL)
-           break;
-       bl->sb_next = load_lp->sl_block;
-       load_lp->sl_block = bl;
-
-       /* Read a block from the file.  Prepend the remainder of the previous
-        * block. */
-       if (rest > 0)
-           mch_memmove(bl->sb_data, p, rest);
-       if (read(fd, bl->sb_data + rest, l) != l)
+       if (read(fd, rbuf + rest, l) != l)
        {
            EMSG2(_(e_notread), fname);
            break;
        }
-       l += rest;
+       rbuf_end = rbuf + l + rest;
        rest = 0;
 
        /* Deal with each line that was read until we finish the block. */
-       for (p = bl->sb_data; l > 0; p = np)
+       for (p = rbuf; p < rbuf_end; p = np)
        {
-           /* "np" points to the char after the line (CR or NL). */
-           for (np = p; l > 0 && *np >= ' '; ++np)
-               --l;
-           if (l == 0)
+           ++sourcing_lnum;
+
+           /* "np" points to the first char after the line (CR, NL or white
+            * space). */
+           for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np))
+               ;
+           if (np >= rbuf_end)
            {
-               /* Incomplete line (or end of file). */
+               /* Incomplete line or end of file. */
                rest = np - p;
                if (len == 0)
-                   EMSG2(_("E751: Truncated spell file: %s"), fname);
+                   EMSG(_("E751: Truncated spell file"));
                break;
            }
            *np = NUL;      /* terminate the line with a NUL */
 
-           /* Skip comment and empty lines. */
-           c = *p;
-           if (c != '#' && np > p)
+           if (*p == '-')
            {
-               if (c == '=' || c == '+')
+               /*
+                * Region marker: ---, -xx, -xx-yy, etc.
+                */
+               ++p;
+               if (*p == '-')
                {
-                   garray_T *gap;
-
-                   /* Match or Add item. */
-                   if (c == '=')
-                       gap = &load_lp->sl_match;
-                   else
-                       gap = &load_lp->sl_add;
-
-                   if (ga_grow(gap, 1) == OK)
+                   if (p[1] != '-' || p[2] != NUL)
                    {
-                       for (n = 0; n < gap->ga_len; ++n)
-                           if ((c = STRCMP(p + 1,
-                                               MATCH_ENTRY(gap, n) + 1)) < 0)
-                               break;
-                       if (c == 0)
-                       {
-                           if (p_verbose > 0)
-                               smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
-                                                               p + 1, fname);
-                       }
-                       else
-                       {
-                           mch_memmove((char_u **)gap->ga_data + n + 1,
-                                   (char_u **)gap->ga_data + n,
-                                   (gap->ga_len - n) * sizeof(char_u *));
-                           *(((char_u **)gap->ga_data) + n) = p;
-                           *p = region;
-                           ++gap->ga_len;
-                       }
+                       EMSG2(_(e_invchar2), p - 1);
+                       len = 0;
+                       break;
                    }
+                   region = REGION_ALL;
                }
-               else if (c == '-')
+               else
                {
-                   /* region item */
-                   ++p;
-                   if (*p == '-')
-                       /* end of a region */
-                       region = REGION_ALL;
-                   else
-                   {
-                       char_u  *rp = load_lp->sl_regions;
-                       int     r;
+                   char_u      *rp = load_lp->sl_regions;
+                   int         r;
 
-                       /* The region may be repeated: "-ca-uk".  Fill
-                        * "region" with the bit mask for the ones we find. */
-                       region = 0;
-                       for (;;)
+                   /* Start of a region.  The region may be repeated:
+                    * "-ca-uk".  Fill "region" with the bit mask for the
+                    * ones we find. */
+                   region = 0;
+                   for (;;)
+                   {
+                       r = find_region(rp, p);
+                       if (r == REGION_ALL)
                        {
-                           /* start of a region */
-                           r = find_region(rp, p);
-                           if (r == REGION_ALL)
+                           /* new region, add it to sl_regions[] */
+                           r = STRLEN(rp);
+                           if (r >= 16)
                            {
-                               /* new region, add it */
-                               r = STRLEN(rp);
-                               if (r >= 12)
-                               {
-                                   EMSG2(_("E752: Too many regions in %s"),
-                                                                      fname);
-                                   r = REGION_ALL;
-                               }
-                               else
-                               {
-                                   rp[r] = p[0];
-                                   rp[r + 1] = p[1];
-                                   rp[r + 2] = NUL;
-                                   r = 1 << (r / 2);
-                               }
+                               EMSG2(_("E752: Too many regions: %s"), p);
+                               len = 0;
+                               break;
                            }
                            else
-                               r = 1 << r;
+                           {
+                               rp[r] = p[0];
+                               rp[r + 1] = p[1];
+                               rp[r + 2] = NUL;
+                               r = 1 << (r / 2);
+                           }
+                       }
+                       else
+                           r = 1 << r;
 
-                           region |= r;
-                           if (p[2] != '-')
+                       region |= r;
+                       if (p[2] != '-')
+                       {
+                           if (p[2] > ' ')
                            {
-                               if (p[2] != NUL)
-                                   EMSG2(_("E753: Invalid character in \"%s\""),
-                                                                      p - 1);
-                               break;
+                               EMSG2(_(e_invchar2), p - 1);
+                               len = 0;
                            }
-                           p += 3;
+                           break;
                        }
+                       p += 3;
                    }
                }
+           }
+           else if (*p != '#' && *p != NUL)
+           {
+               /*
+                * Not an empty line or comment.
+                */
+               if (*p == '!')
+               {
+                   wi = &load_lp->sl_kwords;       /* keep case */
+                   ++p;
+               }
                else
+                   wi = &load_lp->sl_fwords;       /* fold case */
+
+               flags = 0;
+               c = *p;
+               if (c == '>')           /* rare word */
                {
-                   /* add the word */
-                   if (c == '>')
-                       c = region | RARE_MASK;
-                   else
+                   flags = DW_RARE;
+                   ++p;
+               }
+               else if (*p == '+')     /* addition */
+                   ++p;
+
+               if (c != '+' && !spell_iswordc(p))
+               {
+                   EMSG2(_(e_invchar2), p);
+                   len = 0;
+                   break;
+               }
+
+               /* Make sure there is room for the word.  Folding case may
+                * double the size. */
+               wlen = np - p;
+               if (bl == NULL || bl_used + sizeof(dword_T) + wlen
+#ifdef FEAT_MBYTE
+                                           * (has_mbyte ? 2 : 1)
+#endif
+                                                           >= SBLOCKSIZE)
+               {
+                   /* Allocate a block of memory to store the dword_T in.
+                    * This is not freed until spell_reload() is called. */
+                   bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T)
+                                                          + SBLOCKSIZE));
+                   if (bl == NULL)
                    {
-                       if (c != ' ')
-                           EMSG2(_("E753: Invalid character in \"%s\""), p);
-                       c = region;
+                       len = 0;
+                       break;
                    }
+                   bl->sb_next = load_lp->sl_block;
+                   load_lp->sl_block = bl;
+                   bl_used = 0;
+               }
+               dw = (dword_T *)(bl->sb_data + bl_used);
+
+               /* For fold-case words fold the case and check for start
+                * with uppercase letter. */
+               if (wi == &load_lp->sl_fwords)
+               {
 #ifdef FEAT_MBYTE
-                   if (MB_ISUPPER(mb_ptr2char(p + 1)))
+                   if (MB_ISUPPER(mb_ptr2char(p)))
 #else
-                   if (MB_ISUPPER(p[1]))
+                   if (MB_ISUPPER(*p))
 #endif
-                       c |= CASE_MASK;
-                   *p++ = c;
-                   (void)str_foldcase(p, np - p, word, MAXWLEN + 1);
-                   n = STRLEN(word);
-                   if (n > np - p)
-                   {
-                       sblock_T        *s;
+                       flags |= DW_CAP;
+
+                   /* Fold case. */
+                   (void)str_foldcase(p, np - p, dw->dw_word, wlen
+#ifdef FEAT_MBYTE
+                                                    * (has_mbyte ? 2 : 1)
+#endif
+                                                                    + 1);
+#ifdef FEAT_MBYTE
+                   /* case folding may change length of word */
+                   wlen = STRLEN(dw->dw_word);
+#endif
+               }
+               else
+               {
+                   /* Keep case: copy the word as-is. */
+                   mch_memmove(dw->dw_word, p, wlen + 1);
+               }
 
-                       /* Folding case made word longer!  We need to allocate
-                        * memory for it. */
-                       s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
-                                                                    + n + 1);
-                       if (s != NULL)
+               if (c == '+')
+               {
+                   garray_T    *gap = &wi->wi_add;
+
+                   /* Addition.  TODO: search for matching entry? */
+                   if (wi->wi_addlen < wlen)
+                       wi->wi_addlen = wlen;
+                   if (ga_grow(gap, 1) == FAIL)
+                   {
+                       len = 0;
+                       break;
+                   }
+                   *(((dword_T **)gap->ga_data) + gap->ga_len) = dw;
+                   ++gap->ga_len;
+                   dw->dw_region = region;
+                   dw->dw_flags = flags;
+                   bl_used += sizeof(dword_T) + wlen;
+               }
+               else
+               {
+                   /*
+                    * Check for a non-word character.  If found it's
+                    * going to be an nword.
+                    * For an nword we split in two: the leading dword and
+                    * the remainder.  The dword goes in the hashtable
+                    * with an nword_T, the remainder is put in the
+                    * dword_T (starting with the first non-word
+                    * character).
+                    */
+                   cc = NUL;
+                   for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p))
+                       if (!spell_iswordc(p))
                        {
-                           s->sb_next = load_lp->sl_block;
-                           load_lp->sl_block = s;
-                           s->sb_data[0] = p[-1];
-                           p = s->sb_data + 1;
+                           cc = *p;
+                           *p = NUL;
+                           break;
                        }
-                   }
-                   mch_memmove(p, word, n + 1);
 
-                   hash = hash_hash(p);
-                   hi = hash_lookup(&load_lp->sl_ht, p, hash);
+                   /* check if we already have this dword */
+                   hash = hash_hash(dw->dw_word);
+                   hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash);
                    if (!HASHITEM_EMPTY(hi))
                    {
-                       c = hi->hi_key[-1];
-                       if ((c & (CASE_MASK | RARE_MASK))
-                                        == (p[-1] & (CASE_MASK | RARE_MASK)))
+                       /* Existing entry. */
+                       edw = HI2DWORD(hi);
+                       if ((edw->dw_flags & (DW_CAP | DW_RARE))
+                                  == (dw->dw_flags & (DW_CAP | DW_RARE)))
                        {
                            if (p_verbose > 0)
                                smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
-                                                                   p, fname);
+                                                     dw->dw_word, fname);
+                       }
+                   }
+
+                   if (cc != NUL) /* nword */
+                   {
+                       if (HASHITEM_EMPTY(hi)
+                                      || (edw->dw_flags & DW_NWORD) == 0)
+                       {
+                           sblock_T *sb;
+
+                           /* Need to allocate a new nword_T.  Put it in an
+                            * sblock_T, so that we can free it later. */
+                           sb = (sblock_T *)alloc(
+                                   (unsigned)(sizeof(sblock_T)
+                                              + sizeof(nword_T) + wlen));
+                           if (sb == NULL)
+                           {
+                               len = 0;
+                               break;
+                           }
+                           sb->sb_next = load_lp->sl_block;
+                           load_lp->sl_block = sb;
+                           nw = (nword_T *)sb->sb_data;
+
+                           ga_init2(&nw->nw_ga, sizeof(dword_T *), 4);
+                           nw->nw_maxlen = 0;
+                           STRCPY(nw->nw_word, dw->dw_word);
+                           if (!HASHITEM_EMPTY(hi))
+                           {
+                               /* Note: the nw_region and nw_flags is for
+                                * the dword that matches with the start
+                                * of this nword, not for the nword
+                                * itself! */
+                               nw->nw_region = edw->dw_region;
+                               nw->nw_flags = edw->dw_flags | DW_NWORD;
+
+                               /* Remove the dword item so that we can
+                                * add it as an nword. */
+                               hash_remove(&wi->wi_ht, hi);
+                               hi = hash_lookup(&wi->wi_ht,
+                                                      nw->nw_word, hash);
+                           }
+                           else
+                           {
+                               nw->nw_region = 0;
+                               nw->nw_flags = DW_NWORD;
+                           }
                        }
                        else
-                           hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK));
+                           nw = HI2NWORD(hi);
+                   }
+
+                   if (HASHITEM_EMPTY(hi))
+                   {
+                       /* Add new dword or nword entry. */
+                       hash_add_item(&wi->wi_ht, hi, cc == NUL
+                                      ? dw->dw_word : nw->nw_word, hash);
+                       if (cc == NUL)
+                       {
+                           /* New dword: init the values and count the
+                            * used space.  */
+                           dw->dw_flags = DW_DWORD | flags;
+                           dw->dw_region = region;
+                           bl_used += sizeof(dword_T) + wlen;
+                       }
+                   }
+                   else if (cc == NUL)
+                   {
+                       /* existing dword: add the region and flags */
+                       dw = edw;
+                       dw->dw_region |= region;
+                       dw->dw_flags |= DW_DWORD | flags;
+                   }
+
+                   if (cc != NUL)
+                   {
+                       /* Use the dword for the non-word character and
+                        * following characters. */
+                       dw->dw_region = region;
+                       dw->dw_flags = flags;
+                       STRCPY(dw->dw_word + 1, p + 1);
+                       dw->dw_word[0] = cc;
+                       l = wlen - (p - dw->dw_word);
+                       bl_used += sizeof(dword_T) + l;
+                       if (nw->nw_maxlen < l)
+                           nw->nw_maxlen = l;
+
+                       /* Add the dword to the growarray in the nword. */
+                       if (ga_grow(&nw->nw_ga, 1) == FAIL)
+                       {
+                           len = 0;
+                           break;
+                       }
+                       *((dword_T **)nw->nw_ga.ga_data + nw->nw_ga.ga_len)
+                                                                    = dw;
+                       ++nw->nw_ga.ga_len;
                    }
-                   else
-                       hash_add_item(&load_lp->sl_ht, hi, p, hash);
                }
            }
 
-           while (l > 0 && *np < ' ')
-           {
+           /* Skip over CR and NL characters and trailing white space. */
+           while (np < rbuf_end && *np <= ' ')
                ++np;
-               --l;
-           }
        }
     }
 
     close(fd);
+theend:
+    sourcing_name = save_sourcing_name;
+    sourcing_lnum = save_sourcing_lnum;
+    vim_free(rbuf);
 }
 
 /*
@@ -672,15 +1049,19 @@ spell_reload()
     slang_T    *lp;
     sblock_T   *sp;
 
+    /* Initialize the table for spell_iswordc(). */
+    init_spell_chartab();
+
     /* Unload all allocated memory. */
     while (first_lang != NULL)
     {
        lp = first_lang;
        first_lang = lp->sl_next;
 
-       hash_clear(&lp->sl_ht);
-       ga_clear(&lp->sl_match);
-       ga_clear(&lp->sl_add);
+       hash_clear(&lp->sl_fwords.wi_ht);
+       ga_clear(&lp->sl_fwords.wi_add);
+       hash_clear(&lp->sl_kwords.wi_ht);
+       ga_clear(&lp->sl_kwords.wi_add);
        while (lp->sl_block != NULL)
        {
            sp = lp->sl_block;