]> granicus.if.org Git - vim/commitdiff
updated for version 7.3.1088 v7.3.1088
authorBram Moolenaar <Bram@vim.org>
Sat, 1 Jun 2013 17:54:43 +0000 (19:54 +0200)
committerBram Moolenaar <Bram@vim.org>
Sat, 1 Jun 2013 17:54:43 +0000 (19:54 +0200)
Problem:    New regexp engine: \@<= and \@<! are not implemented.
Solution:   Implement look-behind matching.  Fix off-by-one error in old
            regexp engine.

src/regexp.c
src/regexp_nfa.c
src/testdir/test64.in
src/testdir/test64.ok
src/version.c

index cfeee4c27dfbe02fb022bc01c27a2da35a03a9e9..3eaf74dd93b2daac89dcc0d0caa30b2b36a9ded1 100644 (file)
@@ -5576,7 +5576,14 @@ regmatch(scan)
                limit = OPERAND_MIN(rp->rs_scan);
                if (REG_MULTI)
                {
-                   if (rp->rs_un.regsave.rs_u.pos.col == 0)
+                   if (limit > 0
+                           && ((rp->rs_un.regsave.rs_u.pos.lnum
+                                                   < behind_pos.rs_u.pos.lnum
+                                   ? (colnr_T)STRLEN(regline)
+                                   : behind_pos.rs_u.pos.col)
+                               - rp->rs_un.regsave.rs_u.pos.col >= limit))
+                       no = FAIL;
+                   else if (rp->rs_un.regsave.rs_u.pos.col == 0)
                    {
                        if (rp->rs_un.regsave.rs_u.pos.lnum
                                        < behind_pos.rs_u.pos.lnum
@@ -5601,13 +5608,6 @@ regmatch(scan)
                        else
 #endif
                            --rp->rs_un.regsave.rs_u.pos.col;
-                       if (limit > 0
-                               && ((rp->rs_un.regsave.rs_u.pos.lnum
-                                                   < behind_pos.rs_u.pos.lnum
-                                       ? (colnr_T)STRLEN(regline)
-                                       : behind_pos.rs_u.pos.col)
-                                   - rp->rs_un.regsave.rs_u.pos.col > limit))
-                           no = FAIL;
                    }
                }
                else
index fffe4a0cc297c51875422bdb68ff40693b7f2f44..17865d6d2cfe9a85a45d6974995f912a7c0a5a10 100644 (file)
@@ -56,6 +56,7 @@ enum
     NFA_NOPEN,                     /* Start of subexpression marked with \%( */
     NFA_NCLOSE,                            /* End of subexpr. marked with \%( ... \) */
     NFA_START_INVISIBLE,
+    NFA_START_INVISIBLE_BEFORE,
     NFA_END_INVISIBLE,
     NFA_COMPOSING,                 /* Next nodes in NFA are part of the
                                       composing multibyte char */
@@ -1369,34 +1370,43 @@ nfa_regpiece()
            break;
 
        case Magic('@'):
+           c2 = getdecchrs();
            op = no_Magic(getchr());
+           i = 0;
            switch(op)
            {
                case '=':
-                   EMIT(NFA_PREV_ATOM_NO_WIDTH);
+                   /* \@= */
+                   i = NFA_PREV_ATOM_NO_WIDTH;
                    break;
                case '!':
-                   EMIT(NFA_PREV_ATOM_NO_WIDTH_NEG);
+                   /* \@! */
+                   i = NFA_PREV_ATOM_NO_WIDTH_NEG;
                    break;
-               case '0':
-               case '1':
-               case '2':
-               case '3':
-               case '4':
-               case '5':
-               case '6':
-               case '7':
-               case '8':
-               case '9':
                case '<':
+                   op = no_Magic(getchr());
+                   if (op == '=')
+                       /* \@<= */
+                       i = NFA_PREV_ATOM_JUST_BEFORE;
+                   else if (op == '!')
+                       /* \@<! */
+                       i = NFA_PREV_ATOM_JUST_BEFORE_NEG;
+                   break;
                case '>':
-                   /* Not supported yet */
-                   return FAIL;
-               default:
-                   syntax_error = TRUE;
-                   EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op);
+                   /* \@> Not supported yet */
+                   /* i = NFA_PREV_ATOM_LIKE_PATTERN; */
                    return FAIL;
            }
+           if (i == 0)
+           {
+               syntax_error = TRUE;
+               EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op);
+               return FAIL;
+           }
+           EMIT(i);
+           if (i == NFA_PREV_ATOM_JUST_BEFORE
+                                       || i == NFA_PREV_ATOM_JUST_BEFORE_NEG)
+               EMIT(c2);
            break;
 
        case Magic('?'):
@@ -1734,9 +1744,15 @@ nfa_set_code(c)
                            STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
        case NFA_PREV_ATOM_NO_WIDTH_NEG:
                            STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break;
+       case NFA_PREV_ATOM_JUST_BEFORE:
+                           STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break;
+       case NFA_PREV_ATOM_JUST_BEFORE_NEG:
+                        STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break;
        case NFA_NOPEN:             STRCPY(code, "NFA_NOPEN"); break;
        case NFA_NCLOSE:            STRCPY(code, "NFA_NCLOSE"); break;
        case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
+       case NFA_START_INVISIBLE_BEFORE:
+                           STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
        case NFA_END_INVISIBLE:     STRCPY(code, "NFA_END_INVISIBLE"); break;
 
        case NFA_COMPOSING:         STRCPY(code, "NFA_COMPOSING"); break;
@@ -2237,7 +2253,7 @@ post2nfa(postfix, end, nfa_calc_size)
     if (nfa_calc_size == FALSE)
     {
        /* Allocate space for the stack. Max states on the stack : nstate */
-       stack = (Frag_T *) lalloc((nstate + 1) * sizeof(Frag_T), TRUE);
+       stack = (Frag_T *)lalloc((nstate + 1) * sizeof(Frag_T), TRUE);
        stackp = stack;
        stack_end = stack + (nstate + 1);
     }
@@ -2370,8 +2386,12 @@ post2nfa(postfix, end, nfa_calc_size)
 
        case NFA_PREV_ATOM_NO_WIDTH:
        case NFA_PREV_ATOM_NO_WIDTH_NEG:
+       case NFA_PREV_ATOM_JUST_BEFORE:
+       case NFA_PREV_ATOM_JUST_BEFORE_NEG:
            /* The \@= operator: match the preceding atom with zero width.
             * The \@! operator: no match for the preceding atom.
+            * The \@<= operator: match for the preceding atom.
+            * The \@<! operator: no match for the preceding atom.
             * Surrounds the preceding atom with START_INVISIBLE and
             * END_INVISIBLE, similarly to MOPEN. */
 
@@ -2389,11 +2409,18 @@ post2nfa(postfix, end, nfa_calc_size)
            s = new_state(NFA_START_INVISIBLE, e.start, s1);
            if (s == NULL)
                goto theend;
-           if (*p == NFA_PREV_ATOM_NO_WIDTH_NEG)
+           if (*p == NFA_PREV_ATOM_NO_WIDTH_NEG
+                                      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG)
            {
                s->negated = TRUE;
                s1->negated = TRUE;
            }
+           if (*p == NFA_PREV_ATOM_JUST_BEFORE
+                                      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG)
+           {
+               s->val = *++p; /* get the count */
+               ++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */
+           }
 
            PUSH(frag(s, list1(&s1->out)));
            break;
@@ -3307,21 +3334,24 @@ nfa_re_num_cmp(val, op, pos)
     return val == pos;
 }
 
-static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
+static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m, save_se_T *endp));
 
 /*
  * Main matching routine.
  *
  * Run NFA to determine whether it matches reginput.
  *
+ * When "endp" is not NULL it is a required end-of-match position.
+ *
  * Return TRUE if there is a match, FALSE otherwise.
  * Note: Caller must ensure that: start != NULL.
  */
     static int
-nfa_regmatch(start, submatch, m)
+nfa_regmatch(start, submatch, m, endp)
     nfa_state_T                *start;
     regsub_T           *submatch;
     regsub_T           *m;
+    save_se_T          *endp;
 {
     int                result;
     int                size = 0;
@@ -3532,16 +3562,42 @@ nfa_regmatch(start, submatch, m)
              }
 
            case NFA_END_INVISIBLE:
-               /* This is only encountered after a NFA_START_INVISIBLE node.
-                * They surround a zero-width group, used with "\@=" and "\&".
+               /* This is only encountered after a NFA_START_INVISIBLE or
+                * NFA_START_INVISIBLE_BEFORE node.
+                * They surround a zero-width group, used with "\@=", "\&",
+                * "\@!", "\@<=" and "\@<!".
                 * If we got here, it means that the current "invisible" group
                 * finished successfully, so return control to the parent
                 * nfa_regmatch().  Submatches are stored in *m, and used in
                 * the parent call. */
                if (start->c == NFA_MOPEN + 0)
+                   /* TODO: do we ever get here? */
                    addstate_here(thislist, t->state->out, &t->sub, &listidx);
                else
                {
+#ifdef ENABLE_LOG
+                   if (endp != NULL)
+                   {
+                       if (REG_MULTI)
+                           fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
+                                   (int)reglnum,
+                                   (int)endp->se_u.pos.lnum,
+                                   (int)(reginput - regline),
+                                   endp->se_u.pos.col);
+                       else
+                           fprintf(log_fd, "Current col: %d, endp col: %d\n",
+                                   (int)(reginput - regline),
+                                   (int)(endp->se_u.ptr - reginput));
+                   }
+#endif
+                   /* It's only a match if it ends at "endp" */
+                   if (endp != NULL && (REG_MULTI
+                           ? (reglnum != endp->se_u.pos.lnum
+                               || (int)(reginput - regline)
+                                                       != endp->se_u.pos.col)
+                           : reginput != endp->se_u.ptr))
+                       break;
+
                    /* do not set submatches for \@! */
                    if (!t->state->negated)
                        /* TODO: only copy positions in use. */
@@ -3551,11 +3607,70 @@ nfa_regmatch(start, submatch, m)
                break;
 
            case NFA_START_INVISIBLE:
+           case NFA_START_INVISIBLE_BEFORE:
              {
-               char_u  *save_reginput = reginput;
-               char_u  *save_regline = regline;
-               int     save_reglnum = reglnum;
-               int     save_nfa_match = nfa_match;
+               char_u      *save_reginput = reginput;
+               char_u      *save_regline = regline;
+               int         save_reglnum = reglnum;
+               int         save_nfa_match = nfa_match;
+               save_se_T   endpos;
+               save_se_T   *endposp = NULL;
+
+               if (t->state->c == NFA_START_INVISIBLE_BEFORE)
+               {
+                   /* The recursive match must end at the current position. */
+                   endposp = &endpos;
+                   if (REG_MULTI)
+                   {
+                       endpos.se_u.pos.col = (int)(reginput - regline);
+                       endpos.se_u.pos.lnum = reglnum;
+                   }
+                   else
+                       endpos.se_u.ptr = reginput;
+
+                   /* Go back the specified number of bytes, or as far as the
+                    * start of the previous line, to try matching "\@<=" or
+                    * not matching "\@<!". */
+                   if (t->state->val <= 0)
+                   {
+                       if (REG_MULTI)
+                       {
+                           regline = reg_getline(--reglnum);
+                           if (regline == NULL)
+                               /* can't go before the first line */
+                               regline = reg_getline(++reglnum);
+                       }
+                       reginput = regline;
+                   }
+                   else
+                   {
+                       if (REG_MULTI
+                               && (int)(reginput - regline) < t->state->val)
+                       {
+                           /* Not enough bytes in this line, go to end of
+                            * previous line. */
+                           regline = reg_getline(--reglnum);
+                           if (regline == NULL)
+                           {
+                               /* can't go before the first line */
+                               regline = reg_getline(++reglnum);
+                               reginput = regline;
+                           }
+                           else
+                               reginput = regline + STRLEN(regline);
+                       }
+                       if ((int)(reginput - regline) >= t->state->val)
+                       {
+                           reginput -= t->state->val;
+#ifdef FEAT_MBYTE
+                           if (has_mbyte)
+                               reginput -= mb_head_off(regline, reginput);
+#endif
+                       }
+                       else
+                           reginput = regline;
+                   }
+               }
 
                /* Call nfa_regmatch() to check if the current concat matches
                 * at this position. The concat ends with the node
@@ -3579,7 +3694,7 @@ nfa_regmatch(start, submatch, m)
                 * recursion. */
                nfa_save_listids(start, listids);
                nfa_set_null_listids(start);
-               result = nfa_regmatch(t->state->out, submatch, m);
+               result = nfa_regmatch(t->state->out, submatch, m, endposp);
                nfa_set_neg_listids(start);
                nfa_restore_listids(start, listids);
 
@@ -4120,11 +4235,21 @@ nfa_regmatch(start, submatch, m)
         * matters!
         * Do not add the start state in recursive calls of nfa_regmatch(),
         * because recursive calls should only start in the first position.
+        * Unless "endp" is not NULL, then we match the end position.
         * Also don't start a match past the first line. */
-       if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
-               && reglnum == 0 && clen != 0
-               && (ireg_maxcol == 0
-                             || (colnr_T)(reginput - regline) < ireg_maxcol))
+       if (nfa_match == FALSE
+               && ((start->c == NFA_MOPEN + 0
+                       && reglnum == 0
+                       && clen != 0
+                       && (ireg_maxcol == 0
+                           || (colnr_T)(reginput - regline) < ireg_maxcol))
+                   || (endp != NULL
+                       && (REG_MULTI
+                           ? (reglnum < endp->se_u.pos.lnum
+                              || (reglnum == endp->se_u.pos.lnum
+                                  && (int)(reginput - regline)
+                                                      < endp->se_u.pos.col))
+                           : reginput < endp->se_u.ptr))))
        {
 #ifdef ENABLE_LOG
            fprintf(log_fd, "(---) STARTSTATE\n");
@@ -4148,7 +4273,8 @@ nextchar:
         * finish. */
        if (clen != 0)
            reginput += clen;
-       else if (go_to_nextline)
+       else if (go_to_nextline || (endp != NULL && REG_MULTI
+                                           && reglnum < endp->se_u.pos.lnum))
            reg_nextline();
        else
            break;
@@ -4225,7 +4351,7 @@ nfa_regtry(start, col)
     sub.in_use = 0;
     m.in_use = 0;
 
-    if (nfa_regmatch(start, &sub, &m) == FALSE)
+    if (nfa_regmatch(start, &sub, &m, NULL) == FALSE)
        return 0;
 
     cleanup_subexpr();
index 962f60ebe37da9cbf9f5af8ebbec1f9fea0fb798..78909246f79f14148f38c0b1262b5e5ab74d97c8 100644 (file)
@@ -363,12 +363,13 @@ STARTTEST
 :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
 :"
 :"""" Look-behind with limit
-:call add(tl, [0, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
-:call add(tl, [0, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
-:call add(tl, [0, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
-:call add(tl, [0, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
-:call add(tl, [0, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
-:call add(tl, [0, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+:call add(tl, [2, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [2, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [2, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [2, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+:call add(tl, [2, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
+:call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+:call add(tl, [2, '\(foo\)\@<!bar.', 'xx foobar1 xbar2 xx', 'bar2'])
 :"
 :"""" "\_" prepended negated collection matches EOL
 :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
@@ -514,8 +515,8 @@ Behind:
 asdfasd<yyy
 xxstart1
 asdfasd<yy
-xxxxstart2
+xxxstart2
 asdfasd<yy
-xxxstart3
+xxstart3
 
 Results of test64:
index b014a16cb9506cb1dcdc594b2012f010a3283667..31baa4f21803dacb9ae10877b3beb7d9ddc4f614 100644 (file)
@@ -817,16 +817,25 @@ OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 OK 0 - <\@<=span.
 OK 1 - <\@<=span.
+OK 2 - <\@<=span.
 OK 0 - <\@1<=span.
 OK 1 - <\@1<=span.
+OK 2 - <\@1<=span.
 OK 0 - <\@2<=span.
 OK 1 - <\@2<=span.
+OK 2 - <\@2<=span.
 OK 0 - \(<<\)\@<=span.
 OK 1 - \(<<\)\@<=span.
+OK 2 - \(<<\)\@<=span.
 OK 0 - \(<<\)\@1<=span.
 OK 1 - \(<<\)\@1<=span.
+OK 2 - \(<<\)\@1<=span.
 OK 0 - \(<<\)\@2<=span.
 OK 1 - \(<<\)\@2<=span.
+OK 2 - \(<<\)\@2<=span.
+OK 0 - \(foo\)\@<!bar.
+OK 1 - \(foo\)\@<!bar.
+OK 2 - \(foo\)\@<!bar.
 OK 0 - \_[^8-9]\+
 OK 1 - \_[^8-9]\+
 OK 2 - \_[^8-9]\+
@@ -844,7 +853,7 @@ OK 2 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
 <T="7">Ac 7</Title>
 ghi
 
-xxxstart3
+xxstart3
 -0-
 ffo
 bob
index 641fb6793ade831fea4051925e0358e8d1e3dca1..0427db3f2fbf2c8d8761f6b3798b23760054f491 100644 (file)
@@ -728,6 +728,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1088,
 /**/
     1087,
 /**/