]> granicus.if.org Git - vim/commitdiff
updated for version 7.3.1137 v7.3.1137
authorBram Moolenaar <Bram@vim.org>
Fri, 7 Jun 2013 12:08:30 +0000 (14:08 +0200)
committerBram Moolenaar <Bram@vim.org>
Fri, 7 Jun 2013 12:08:30 +0000 (14:08 +0200)
Problem:    New regexp engine: collections are slow.
Solution:   Handle all characters in one go.

src/regexp_nfa.c
src/version.c

index 42030ac0b4dbc0d0d67f148964c5ef3ed09ffc31..254b3d9318df0478bf1336b1472b1bc5f96e849c 100644 (file)
@@ -34,15 +34,23 @@ enum
     NFA_SPLIT = -1024,
     NFA_MATCH,
     NFA_SKIP_CHAR,                 /* matches a 0-length char */
-    NFA_END_NEG_RANGE,             /* Used when expanding [^ab] */
 
-    NFA_CONCAT,
+    NFA_START_COLL,                /* [abc] start */
+    NFA_END_COLL,                  /* [abc] end */
+    NFA_START_NEG_COLL,                    /* [^abc] start */
+    NFA_END_NEG_COLL,              /* [^abc] end (only used in postfix) */
+    NFA_RANGE,                     /* range of the two previous items (only
+                                    * used in postfix) */
+    NFA_RANGE_MIN,                 /* low end of a range  */
+    NFA_RANGE_MAX,                 /* high end of a range  */
+
+    NFA_CONCAT,                            /* concatenate two previous items (only
+                                    * used in postfix) */
     NFA_OR,
     NFA_STAR,                      /* greedy * */
     NFA_STAR_NONGREEDY,                    /* non-greedy * */
     NFA_QUEST,                     /* greedy \? */
     NFA_QUEST_NONGREEDY,           /* non-greedy \? */
-    NFA_NOT,                       /* used for [^ab] negated char ranges */
 
     NFA_BOL,                       /* ^    Begin line */
     NFA_EOL,                       /* $    End line */
@@ -260,7 +268,7 @@ static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
 static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
 static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
-static int nfa_emit_equi_class __ARGS((int c, int neg));
+static int nfa_emit_equi_class __ARGS((int c));
 static int nfa_regatom __ARGS((void));
 static int nfa_regpiece __ARGS((void));
 static int nfa_regconcat __ARGS((void));
@@ -664,21 +672,10 @@ nfa_recognize_char_class(start, end, extra_newl)
  * NOTE! When changing this function, also update reg_equi_class()
  */
     static int
-nfa_emit_equi_class(c, neg)
+nfa_emit_equi_class(c)
     int            c;
-    int            neg;
 {
-    int        first = TRUE;
-    int        glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
-#define EMIT2(c)               \
-       EMIT(c);                \
-       if (neg == TRUE) {      \
-           EMIT(NFA_NOT);      \
-       }                       \
-       if (first == FALSE)     \
-           EMIT(glue);         \
-       else                    \
-           first = FALSE;      \
+#define EMIT2(c)   EMIT(c); EMIT(NFA_CONCAT);
 
 #ifdef FEAT_MBYTE
     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
@@ -687,84 +684,84 @@ nfa_emit_equi_class(c, neg)
     {
        switch (c)
        {
-           case 'A': case '\300': case '\301': case '\302':
-           case '\303': case '\304': case '\305':
-                   EMIT2('A');     EMIT2('\300');  EMIT2('\301');
-                   EMIT2('\302');  EMIT2('\303');  EMIT2('\304');
-                   EMIT2('\305');
+           case 'A': case 0300: case 0301: case 0302:
+           case 0303: case 0304: case 0305:
+                   EMIT2('A');     EMIT2(0300);  EMIT2(0301);
+                   EMIT2(0302);  EMIT2(0303);  EMIT2(0304);
+                   EMIT2(0305);
                    return OK;
 
-           case 'C': case '\307':
-                   EMIT2('C');     EMIT2('\307');
+           case 'C': case 0307:
+                   EMIT2('C');     EMIT2(0307);
                    return OK;
 
-           case 'E': case '\310': case '\311': case '\312': case '\313':
-                   EMIT2('E');     EMIT2('\310');  EMIT2('\311');
-                   EMIT2('\312');  EMIT2('\313');
+           case 'E': case 0310: case 0311: case 0312: case 0313:
+                   EMIT2('E');     EMIT2(0310);  EMIT2(0311);
+                   EMIT2(0312);  EMIT2(0313);
                    return OK;
 
-           case 'I': case '\314': case '\315': case '\316': case '\317':
-                   EMIT2('I');     EMIT2('\314');  EMIT2('\315');
-                   EMIT2('\316');  EMIT2('\317');
+           case 'I': case 0314: case 0315: case 0316: case 0317:
+                   EMIT2('I');     EMIT2(0314);  EMIT2(0315);
+                   EMIT2(0316);  EMIT2(0317);
                    return OK;
 
-           case 'N': case '\321':
-                   EMIT2('N');     EMIT2('\321');
+           case 'N': case 0321:
+                   EMIT2('N');     EMIT2(0321);
                    return OK;
 
-           case 'O': case '\322': case '\323': case '\324': case '\325':
-           case '\326':
-                   EMIT2('O');     EMIT2('\322');  EMIT2('\323');
-                   EMIT2('\324');  EMIT2('\325');  EMIT2('\326');
+           case 'O': case 0322: case 0323: case 0324: case 0325:
+           case 0326:
+                   EMIT2('O');     EMIT2(0322);  EMIT2(0323);
+                   EMIT2(0324);  EMIT2(0325);  EMIT2(0326);
                    return OK;
 
-           case 'U': case '\331': case '\332': case '\333': case '\334':
-                   EMIT2('U');     EMIT2('\331');  EMIT2('\332');
-                   EMIT2('\333');  EMIT2('\334');
+           case 'U': case 0331: case 0332: case 0333: case 0334:
+                   EMIT2('U');     EMIT2(0331);  EMIT2(0332);
+                   EMIT2(0333);  EMIT2(0334);
                    return OK;
 
-           case 'Y': case '\335':
-                   EMIT2('Y');     EMIT2('\335');
+           case 'Y': case 0335:
+                   EMIT2('Y');     EMIT2(0335);
                    return OK;
 
-           case 'a': case '\340': case '\341': case '\342':
-           case '\343': case '\344': case '\345':
-                   EMIT2('a');     EMIT2('\340');  EMIT2('\341');
-                   EMIT2('\342');  EMIT2('\343');  EMIT2('\344');
-                   EMIT2('\345');
+           case 'a': case 0340: case 0341: case 0342:
+           case 0343: case 0344: case 0345:
+                   EMIT2('a');     EMIT2(0340);  EMIT2(0341);
+                   EMIT2(0342);  EMIT2(0343);  EMIT2(0344);
+                   EMIT2(0345);
                    return OK;
 
-           case 'c': case '\347':
-                   EMIT2('c');     EMIT2('\347');
+           case 'c': case 0347:
+                   EMIT2('c');     EMIT2(0347);
                    return OK;
 
-           case 'e': case '\350': case '\351': case '\352': case '\353':
-                   EMIT2('e');     EMIT2('\350');  EMIT2('\351');
-                   EMIT2('\352');  EMIT2('\353');
+           case 'e': case 0350: case 0351: case 0352: case 0353:
+                   EMIT2('e');     EMIT2(0350);  EMIT2(0351);
+                   EMIT2(0352);  EMIT2(0353);
                    return OK;
 
-           case 'i': case '\354': case '\355': case '\356': case '\357':
-                   EMIT2('i');     EMIT2('\354');  EMIT2('\355');
-                   EMIT2('\356');  EMIT2('\357');
+           case 'i': case 0354: case 0355: case 0356: case 0357:
+                   EMIT2('i');     EMIT2(0354);  EMIT2(0355);
+                   EMIT2(0356);  EMIT2(0357);
                    return OK;
 
-           case 'n': case '\361':
-                   EMIT2('n');     EMIT2('\361');
+           case 'n': case 0361:
+                   EMIT2('n');     EMIT2(0361);
                    return OK;
 
-           case 'o': case '\362': case '\363': case '\364': case '\365':
-           case '\366':
-                   EMIT2('o');     EMIT2('\362');  EMIT2('\363');
-                   EMIT2('\364');  EMIT2('\365');  EMIT2('\366');
+           case 'o': case 0362: case 0363: case 0364: case 0365:
+           case 0366:
+                   EMIT2('o');     EMIT2(0362);  EMIT2(0363);
+                   EMIT2(0364);  EMIT2(0365);  EMIT2(0366);
                    return OK;
 
-           case 'u': case '\371': case '\372': case '\373': case '\374':
-                   EMIT2('u');     EMIT2('\371');  EMIT2('\372');
-                   EMIT2('\373');  EMIT2('\374');
+           case 'u': case 0371: case 0372: case 0373: case 0374:
+                   EMIT2('u');     EMIT2(0371);  EMIT2(0372);
+                   EMIT2(0373);  EMIT2(0374);
                    return OK;
 
-           case 'y': case '\375': case '\377':
-                   EMIT2('y');     EMIT2('\375');  EMIT2('\377');
+           case 'y': case 0375: case 0377:
+                   EMIT2('y');     EMIT2(0375);  EMIT2(0377);
                    return OK;
 
            default:
@@ -811,14 +808,12 @@ nfa_regatom()
     char_u     *old_regparse = regparse;
 #endif
     int                extra = 0;
-    int                first;
     int                emit_range;
     int                negated;
     int                result;
     int                startc = -1;
     int                endc = -1;
     int                oldstartc = -1;
-    int                glue;           /* ID that will "glue" nodes together */
 
     c = getchr();
     switch (c)
@@ -927,8 +922,8 @@ nfa_regatom()
 
        case Magic('n'):
            if (reg_string)
-           /* In a string "\n" matches a newline character. */
-           EMIT(NL);
+               /* In a string "\n" matches a newline character. */
+               EMIT(NL);
            else
            {
                /* In buffer text "\n" matches the end of a line. */
@@ -1160,32 +1155,15 @@ nfa_regatom()
        case Magic('['):
 collection:
            /*
-            * Glue is emitted between several atoms from the [].
-            * It is either NFA_OR, or NFA_CONCAT.
-            *
-            * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
-            * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
-            *          NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
-            *          notation)
-            *
+            * [abc]  uses NFA_START_COLL - NFA_END_COLL
+            * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
+            * Each character is produced as a regular state, using
+            * NFA_CONCAT to bind them together.
+            * Besides normal characters there can be:
+            * - character classes  NFA_CLASS_*
+            * - ranges, two characters followed by NFA_RANGE.
             */
 
-
-/* Emit negation atoms, if needed.
- * The CONCAT below merges the NOT with the previous node. */
-#define TRY_NEG()                  \
-           if (negated == TRUE)    \
-           {                       \
-               EMIT(NFA_NOT);      \
-           }
-
-/* Emit glue between important nodes : CONCAT or OR. */
-#define EMIT_GLUE()                \
-           if (first == FALSE)     \
-               EMIT(glue);         \
-           else                    \
-               first = FALSE;
-
            p = regparse;
            endp = skip_anyof(p);
            if (*endp == ']')
@@ -1216,21 +1194,20 @@ collection:
                 * version that turns [abc] into 'a' OR 'b' OR 'c'
                 */
                startc = endc = oldstartc = -1;
-               first = TRUE;       /* Emitting first atom in this sequence? */
                negated = FALSE;
-               glue = NFA_OR;
                if (*regparse == '^')                   /* negated range */
                {
                    negated = TRUE;
-                   glue = NFA_CONCAT;
                    mb_ptr_adv(regparse);
+                   EMIT(NFA_START_NEG_COLL);
                }
+               else
+                   EMIT(NFA_START_COLL);
                if (*regparse == '-')
                {
                    startc = '-';
                    EMIT(startc);
-                   TRY_NEG();
-                   EMIT_GLUE();
+                   EMIT(NFA_CONCAT);
                    mb_ptr_adv(regparse);
                }
                /* Emit the OR branches for each character in the [] */
@@ -1306,20 +1283,18 @@ collection:
                                    EMIT(NFA_CLASS_ESCAPE);
                                    break;
                            }
-                           TRY_NEG();
-                           EMIT_GLUE();
+                           EMIT(NFA_CONCAT);
                            continue;
                        }
                        /* Try equivalence class [=a=] and the like */
                        if (equiclass != 0)
                        {
-                           result = nfa_emit_equi_class(equiclass, negated);
+                           result = nfa_emit_equi_class(equiclass);
                            if (result == FAIL)
                            {
                                /* should never happen */
                                EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
                            }
-                           EMIT_GLUE();
                            continue;
                        }
                        /* Try collating class like [. .]  */
@@ -1391,19 +1366,32 @@ collection:
                        startc = oldstartc;
                        if (startc > endc)
                            EMSG_RET_FAIL(_(e_invrange));
+
+                       if (endc > startc + 2)
+                       {
+                           /* Emit a range instead of the sequence of
+                            * individual characters. */
+                           if (startc == 0)
+                               /* \x00 is translated to \x0a, start at \x01. */
+                               EMIT(1);
+                           else
+                               --post_ptr; /* remove NFA_CONCAT */
+                           EMIT(endc);
+                           EMIT(NFA_RANGE);
+                           EMIT(NFA_CONCAT);
+                       }
+                       else
 #ifdef FEAT_MBYTE
-                       if (has_mbyte && ((*mb_char2len)(startc) > 1
+                            if (has_mbyte && ((*mb_char2len)(startc) > 1
                                    || (*mb_char2len)(endc) > 1))
                        {
-                           if (endc > startc + 256)
-                               EMSG_RET_FAIL(_(e_invrange));
-                           /* Emit the range. "startc" was already emitted, so
-                            * skip it. */
+                           /* Emit the characters in the range.
+                            * "startc" was already emitted, so skip it.
+                            * */
                            for (c = startc + 1; c <= endc; c++)
                            {
                                EMIT(c);
-                               TRY_NEG();
-                               EMIT_GLUE();
+                               EMIT(NFA_CONCAT);
                            }
                        }
                        else
@@ -1425,8 +1413,7 @@ collection:
 #endif
                                {
                                    EMIT(c);
-                                   TRY_NEG();
-                                   EMIT_GLUE();
+                                   EMIT(NFA_CONCAT);
                                }
                        }
                        emit_range = FALSE;
@@ -1434,23 +1421,29 @@ collection:
                    }
                    else
                    {
-                       /*
-                        * This char (startc) is not part of a range. Just
+                       /* This char (startc) is not part of a range. Just
                         * emit it.
-                        *
                         * Normally, simply emit startc. But if we get char
                         * code=0 from a collating char, then replace it with
                         * 0x0a.
-                        *
                         * This is needed to completely mimic the behaviour of
-                        * the backtracking engine.
-                        */
-                       if (got_coll_char == TRUE && startc == 0)
-                           EMIT(0x0a);
+                        * the backtracking engine. */
+                       if (startc == NFA_NEWL)
+                       {
+                           /* Line break can't be matched as part of the
+                            * collection, add an OR below. But not for negated
+                            * range. */
+                           if (!negated)
+                               extra = ADD_NL;
+                       }
                        else
-                           EMIT(startc);
-                       TRY_NEG();
-                       EMIT_GLUE();
+                       {
+                           if (got_coll_char == TRUE && startc == 0)
+                               EMIT(0x0a);
+                           else
+                               EMIT(startc);
+                           EMIT(NFA_CONCAT);
+                       }
                    }
 
                    mb_ptr_adv(regparse);
@@ -1460,20 +1453,19 @@ collection:
                if (*regparse == '-')       /* if last, '-' is just a char */
                {
                    EMIT('-');
-                   TRY_NEG();
-                   EMIT_GLUE();
+                   EMIT(NFA_CONCAT);
                }
                mb_ptr_adv(regparse);
 
                /* skip the trailing ] */
                regparse = endp;
                mb_ptr_adv(regparse);
+
+               /* Mark end of the collection. */
                if (negated == TRUE)
-               {
-                   /* Mark end of negated char range */
-                   EMIT(NFA_END_NEG_RANGE);
-                   EMIT(NFA_CONCAT);
-               }
+                   EMIT(NFA_END_NEG_COLL);
+               else
+                   EMIT(NFA_END_COLL);
 
                /* \_[] also matches \n but it's not negated */
                if (extra == ADD_NL)
@@ -1532,9 +1524,6 @@ nfa_do_multibyte:
            }
     }
 
-#undef TRY_NEG
-#undef EMIT_GLUE
-
     return OK;
 }
 
@@ -2091,10 +2080,17 @@ nfa_set_code(c)
        case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
        case NFA_QUEST:         STRCPY(code, "NFA_QUEST"); break;
        case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
-       case NFA_NOT:           STRCPY(code, "NFA_NOT "); break;
        case NFA_SKIP_CHAR:     STRCPY(code, "NFA_SKIP_CHAR"); break;
        case NFA_OR:            STRCPY(code, "NFA_OR"); break;
-       case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
+
+       case NFA_START_COLL:    STRCPY(code, "NFA_START_COLL"); break;
+       case NFA_END_COLL:      STRCPY(code, "NFA_END_COLL"); break;
+       case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
+       case NFA_END_NEG_COLL:  STRCPY(code, "NFA_END_NEG_COLL"); break;
+       case NFA_RANGE:         STRCPY(code, "NFA_RANGE"); break;
+       case NFA_RANGE_MIN:     STRCPY(code, "NFA_RANGE_MIN"); break;
+       case NFA_RANGE_MAX:     STRCPY(code, "NFA_RANGE_MAX"); break;
+
        case NFA_CLASS_ALNUM:   STRCPY(code, "NFA_CLASS_ALNUM"); break;
        case NFA_CLASS_ALPHA:   STRCPY(code, "NFA_CLASS_ALPHA"); break;
        case NFA_CLASS_BLANK:   STRCPY(code, "NFA_CLASS_BLANK"); break;
@@ -2231,8 +2227,12 @@ nfa_print_state2(debugf, state, indent)
        fprintf(debugf, " %s", p);
 
     nfa_set_code(state->c);
-    fprintf(debugf, "%s%s (%d) (id=%d)\n",
-                state->negated ? "NOT " : "", code, state->c, abs(state->id));
+    fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
+                state->negated ? "NOT " : "",
+                code,
+                state->c,
+                abs(state->id),
+                state->val);
     if (state->id < 0)
        return;
 
@@ -2325,6 +2325,7 @@ alloc_state(c, out, out1)
     s->c    = c;
     s->out  = out;
     s->out1 = out1;
+    s->val  = 0;
 
     s->id   = istate;
     s->lastlist[0] = 0;
@@ -2565,13 +2566,10 @@ post2nfa(postfix, end, nfa_calc_size)
        switch (*p)
        {
        case NFA_CONCAT:
-           /* Catenation.
-            * Pay attention: this operator does not exist
-            * in the r.e. itself (it is implicit, really).
-            * It is added when r.e. is translated to postfix
-            * form in re2post().
-            *
-            * No new state added here. */
+           /* Concatenation.
+            * Pay attention: this operator does not exist in the r.e. itself
+            * (it is implicit, really).  It is added when r.e. is translated
+            * to postfix form in re2post(). */
            if (nfa_calc_size == TRUE)
            {
                /* nstate += 0; */
@@ -2583,22 +2581,6 @@ post2nfa(postfix, end, nfa_calc_size)
            PUSH(frag(e1.start, e2.out));
            break;
 
-       case NFA_NOT:
-           /* Negation of a character */
-           if (nfa_calc_size == TRUE)
-           {
-               /* nstate += 0; */
-               break;
-           }
-           e1 = POP();
-           e1.start->negated = TRUE;
-#ifdef FEAT_MBYTE
-           if (e1.start->c == NFA_COMPOSING)
-               e1.start->out1->negated = TRUE;
-#endif
-           PUSH(e1);
-           break;
-
        case NFA_OR:
            /* Alternation */
            if (nfa_calc_size == TRUE)
@@ -2672,6 +2654,43 @@ post2nfa(postfix, end, nfa_calc_size)
            PUSH(frag(s, append(e.out, list1(&s->out))));
            break;
 
+       case NFA_END_COLL:
+       case NFA_END_NEG_COLL:
+           /* On the stack is the sequence starting with NFA_START_COLL or
+            * NFA_START_NEG_COLL and all possible characters. Patch it to
+            * add the output to the start. */
+           if (nfa_calc_size == TRUE)
+           {
+               nstate++;
+               break;
+           }
+           e = POP();
+           s = alloc_state(NFA_END_COLL, NULL, NULL);
+           if (s == NULL)
+               goto theend;
+           patch(e.out, s);
+           e.start->out1 = s;
+           PUSH(frag(e.start, list1(&s->out)));
+           break;
+
+       case NFA_RANGE:
+           /* Before this are two characters, the low and high end of a
+            * range.  Turn them into two states with MIN and MAX. */
+           if (nfa_calc_size == TRUE)
+           {
+               /* nstate += 0; */
+               break;
+           }
+           e2 = POP();
+           e1 = POP();
+           e2.start->val = e2.start->c;
+           e2.start->c = NFA_RANGE_MAX;
+           e1.start->val = e1.start->c;
+           e1.start->c = NFA_RANGE_MIN;
+           patch(e1.out, e2.start);
+           PUSH(frag(e1.start, e2.out));
+           break;
+
        case NFA_SKIP_CHAR:
            /* Symbol of 0-length, Used in a repetition
             * with max/min count of 0 */
@@ -2990,6 +3009,8 @@ post2nfa(postfix, end, nfa_calc_size)
     matchstate = &state_ptr[istate++]; /* the match state */
     matchstate->c = NFA_MATCH;
     matchstate->out = matchstate->out1 = NULL;
+    matchstate->negated = FALSE;
+    matchstate->id = 0;
 
     patch(e.out, matchstate);
     ret = e.start;
@@ -3308,7 +3329,6 @@ addstate(l, state, subs, off)
     switch (state->c)
     {
        case NFA_SPLIT:
-       case NFA_NOT:
        case NFA_NOPEN:
        case NFA_SKIP_CHAR:
        case NFA_NCLOSE:
@@ -3782,7 +3802,8 @@ check_char_class(class, c)
 
        default:
            /* should not be here :P */
-           EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
+           EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
+           return FAIL;
     }
     return FAIL;
 }
@@ -4320,8 +4341,8 @@ nfa_regmatch(prog, start, submatch, m)
     addstate(thislist, start, m, 0);
 
     /* There are two cases when the NFA advances: 1. input char matches the
-     * NFA node and 2. input char does not match the NFA node, but the next
-     * node is NFA_NOT. The following macro calls addstate() according to
+     * NFA node and 2. input char does not match the NFA node and the state
+     * has the negated flag. The following macro calls addstate() according to
      * these rules. It is used A LOT, so use the "listtbl" table for speed */
     listtbl[0][0] = NULL;
     listtbl[0][1] = neglist;
@@ -4845,16 +4866,79 @@ nfa_regmatch(prog, start, submatch, m)
                ADD_POS_NEG_STATE(t->state);
                break;
 
-           case NFA_END_NEG_RANGE:
-               /* This follows a series of negated nodes, like:
-                * NOT CHAR(x), NOT CHAR(y), etc. */
-               if (curc > 0)
+           case NFA_START_COLL:
+           case NFA_START_NEG_COLL:
+             {
+               /* What follows is a list of characters, until NFA_END_COLL.
+                * One of them must match or none of them must match. */
+               nfa_state_T     *state;
+               int             result_if_matched;
+               int             c1, c2;
+
+               /* Never match EOL. If it's part of the collection it is added
+                * as a separate state with an OR. */
+               if (curc == NUL)
+                   break;
+
+               state = t->state->out;
+               result_if_matched = (t->state->c == NFA_START_COLL);
+               for (;;)
                {
+                   if (state->c == NFA_END_COLL)
+                   {
+                       result = !result_if_matched;
+                       break;
+                   }
+                   if (state->c == NFA_RANGE_MIN)
+                   {
+                       c1 = state->val;
+                       state = state->out; /* advance to NFA_RANGE_MAX */
+                       c2 = state->val;
+#ifdef ENABLE_LOG
+                       fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
+                               curc, c1, c2);
+#endif
+                       if (curc >= c1 && curc <= c2)
+                       {
+                           result = result_if_matched;
+                           break;
+                       }
+                       if (ireg_ic)
+                       {
+                           int curc_low = MB_TOLOWER(curc);
+                           int done = FALSE;
+
+                           for ( ; c1 <= c2; ++c1)
+                               if (MB_TOLOWER(c1) == curc_low)
+                               {
+                                   result = result_if_matched;
+                                   done = TRUE;
+                                   break;
+                               }
+                           if (done)
+                               break;
+                       }
+                   }
+                   else if (state->c < 0 ? check_char_class(state->c, curc)
+                               : (curc == state->c
+                                  || (ireg_ic && MB_TOLOWER(curc)
+                                                   == MB_TOLOWER(state->c))))
+                   {
+                       result = result_if_matched;
+                       break;
+                   }
+                   state = state->out;
+               }
+               if (result)
+               {
+                   /* next state is in out of the NFA_END_COLL, out1 of
+                    * START points to the END state */
                    ll = nextlist;
-                   add_state = t->state->out;
+                   add_state = t->state->out1->out;
                    add_off = clen;
                }
                break;
+             }
 
            case NFA_ANY:
                /* Any char except '\0', (end of input) does not match. */
index 76bdd6cf8a676a71fe47d94722b981ccb8045636..51887f4ae9c7570cb0a8535009966d19805d7954 100644 (file)
@@ -728,6 +728,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1137,
 /**/
     1136,
 /**/