updated for version 7.4.001

author Bram Moolenaar <Bram@vim.org>

Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)

committer Bram Moolenaar <Bram@vim.org>

Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)
author Bram Moolenaar <Bram@vim.org>
Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)
committer Bram Moolenaar <Bram@vim.org>
Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c

index 35b42ef9cf1c239b4fcf4a817525dce8ee619325..0e172c8ca12634268593e30d527df596c14e1b96 100644 (file)
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -29,6 +29,9 @@
  # define NFA_REGEXP_DEBUG_LOG  "nfa_regexp_debug.log"
  #endif
  
+/* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */
+#define NFA_ADD_NL             31
+
  enum
  {
      NFA_SPLIT = -1024,
@@ -183,6 +186,13 @@ enum
      NFA_NLOWER,                /*      Match non-lowercase char */
      NFA_UPPER,         /*      Match uppercase char */
      NFA_NUPPER,                /*      Match non-uppercase char */
+    NFA_LOWER_IC,      /*      Match [a-z] */
+    NFA_NLOWER_IC,     /*      Match [^a-z] */
+    NFA_UPPER_IC,      /*      Match [A-Z] */
+    NFA_NUPPER_IC,     /*      Match [^A-Z] */
+
+    NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL,
+    NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL,
  
      NFA_CURSOR,                /*      Match cursor pos */
      NFA_LNUM,          /*      Match line number */
@@ -199,9 +209,6 @@ enum
      NFA_MARK_LT,       /*      Match < mark */
      NFA_VISUAL,                /*      Match Visual area */
  
-    NFA_FIRST_NL = NFA_ANY + ADD_NL,
-    NFA_LAST_NL = NFA_NUPPER + ADD_NL,
-
      /* Character classes [:alnum:] etc */
      NFA_CLASS_ALNUM,
      NFA_CLASS_ALPHA,
@@ -578,6 +585,8 @@ realloc_post_list()
   * On failure, return 0 (=FAIL)
   * Start points to the first char of the range, while end should point
   * to the closing brace.
+ * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
+ * need to be interpreted as [a-zA-Z].
   */
      static int
  nfa_recognize_char_class(start, end, extra_newl)
@@ -681,7 +690,7 @@ nfa_recognize_char_class(start, end, extra_newl)
         return FAIL;
  
      if (newl == TRUE)
-       extra_newl = ADD_NL;
+       extra_newl = NFA_ADD_NL;
  
      switch (config)
      {
@@ -710,13 +719,13 @@ nfa_recognize_char_class(start, end, extra_newl)
         case CLASS_not | CLASS_az | CLASS_AZ:
             return extra_newl + NFA_NALPHA;
         case CLASS_az:
-          return extra_newl + NFA_LOWER;
+          return extra_newl + NFA_LOWER_IC;
         case CLASS_not | CLASS_az:
-           return extra_newl + NFA_NLOWER;
+           return extra_newl + NFA_NLOWER_IC;
         case CLASS_AZ:
-           return extra_newl + NFA_UPPER;
+           return extra_newl + NFA_UPPER_IC;
         case CLASS_not | CLASS_AZ:
-           return extra_newl + NFA_NUPPER;
+           return extra_newl + NFA_NUPPER_IC;
      }
      return FAIL;
  }
@@ -914,7 +923,7 @@ nfa_regatom()
                 break;
             }
  
-           extra = ADD_NL;
+           extra = NFA_ADD_NL;
  
             /* "\_[" is collection plus newline */
             if (c == '[')
@@ -970,7 +979,7 @@ nfa_regatom()
             }
  #endif
             EMIT(nfa_classcodes[p - classchars]);
-           if (extra == ADD_NL)
+           if (extra == NFA_ADD_NL)
             {
                 EMIT(NFA_NEWL);
                 EMIT(NFA_OR);
@@ -1240,21 +1249,21 @@ collection:
             {
                 /*
                  * Try to reverse engineer character classes. For example,
-                * recognize that [0-9] stands for  \d and [A-Za-z_] with \h,
+                * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
                  * and perform the necessary substitutions in the NFA.
                  */
                 result = nfa_recognize_char_class(regparse, endp,
-                                                           extra == ADD_NL);
+                                                        extra == NFA_ADD_NL);
                 if (result != FAIL)
                 {
-                   if (result >= NFA_DIGIT && result <= NFA_NUPPER)
-                       EMIT(result);
-                   else        /* must be char class + newline */
+                   if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL)
                     {
-                       EMIT(result - ADD_NL);
+                       EMIT(result - NFA_ADD_NL);
                         EMIT(NFA_NEWL);
                         EMIT(NFA_OR);
                     }
+                   else
+                       EMIT(result);
                     regparse = endp;
                     mb_ptr_adv(regparse);
                     return OK;
@@ -1504,7 +1513,7 @@ collection:
                              * collection, add an OR below. But not for negated
                              * range. */
                             if (!negated)
-                               extra = ADD_NL;
+                               extra = NFA_ADD_NL;
                         }
                         else
                         {
@@ -1537,7 +1546,7 @@ collection:
                     EMIT(NFA_END_COLL);
  
                 /* \_[] also matches \n but it's not negated */
-               if (extra == ADD_NL)
+               if (extra == NFA_ADD_NL)
                 {
                     EMIT(reg_string ? NL : NFA_NEWL);
                     EMIT(NFA_OR);
@@ -2011,7 +2020,7 @@ nfa_set_code(c)
      if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
      {
         addnl = TRUE;
-       c -= ADD_NL;
+       c -= NFA_ADD_NL;
      }
  
      STRCPY(code, "");
@@ -2217,6 +2226,10 @@ nfa_set_code(c)
         case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
         case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break;
         case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
+       case NFA_LOWER_IC:  STRCPY(code, "NFA_LOWER_IC"); break;
+       case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break;
+       case NFA_UPPER_IC:  STRCPY(code, "NFA_UPPER_IC"); break;
+       case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break;
  
         default:
             STRCPY(code, "CHAR(x)");
@@ -2687,6 +2700,10 @@ nfa_max_width(startstate, depth)
             case NFA_NLOWER:
             case NFA_UPPER:
             case NFA_NUPPER:
+           case NFA_LOWER_IC:
+           case NFA_NLOWER_IC:
+           case NFA_UPPER_IC:
+           case NFA_NUPPER_IC:
                 /* possibly non-ascii */
  #ifdef FEAT_MBYTE
                 if (has_mbyte)
@@ -3841,6 +3858,10 @@ match_follows(startstate, depth)
             case NFA_NLOWER:
             case NFA_UPPER:
             case NFA_NUPPER:
+           case NFA_LOWER_IC:
+           case NFA_NLOWER_IC:
+           case NFA_UPPER_IC:
+           case NFA_NUPPER_IC:
             case NFA_START_COLL:
             case NFA_START_NEG_COLL:
             case NFA_NEWL:
@@ -5872,6 +5893,28 @@ nfa_regmatch(prog, start, submatch, m)
                 ADD_STATE_IF_MATCH(t->state);
                 break;
  
+           case NFA_LOWER_IC:  /* [a-z] */
+               result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
+               ADD_STATE_IF_MATCH(t->state);
+               break;
+
+           case NFA_NLOWER_IC: /* [^a-z] */
+               result = curc != NUL
+                         && !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
+               ADD_STATE_IF_MATCH(t->state);
+               break;
+
+           case NFA_UPPER_IC:  /* [A-Z] */
+               result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
+               ADD_STATE_IF_MATCH(t->state);
+               break;
+
+           case NFA_NUPPER_IC: /* ^[A-Z] */
+               result = curc != NUL
+                         && !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
+               ADD_STATE_IF_MATCH(t->state);
+               break;
+
             case NFA_BACKREF1:
             case NFA_BACKREF2:
             case NFA_BACKREF3:
diff --git a/src/testdir/test64.in b/src/testdir/test64.in

index b0b1c911a344e8ac7c13a6a7dbf952f075f73a38..b29fe76f212c6247246c72f18ede08b5c7725d09 100644 (file)
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -289,15 +289,29 @@ STARTTEST
  :call add(tl, [2, '.a\%$', " a\n "])
  :call add(tl, [2, '.a\%$', " a\n_a", "_a"])
  :"
-:"""" Test recognition of some character classes
-:call add(tl, [2, '[0-9]', '8', '8'])
-:call add(tl, [2, '[^0-9]', '8'])
-:call add(tl, [2, '[0-9a-fA-F]*', '0a7', '0a7'])
-:call add(tl, [2, '[^0-9A-Fa-f]\+', '0a7'])
-:call add(tl, [2, '[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij'])
-:call add(tl, [2, '[a-z]', 'a', 'a'])
-:call add(tl, [2, '[a-zA-Z]', 'a', 'a'])
-:call add(tl, [2, '[A-Z]', 'a'])
+:"""" Test recognition of character classes
+:call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567'])
+:call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89'])
+:call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789'])
+:call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% '])
+:call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef'])
+:call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% '])
+:call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij'])
+:call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% '])
+:call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ'])
+:call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% '])
+:call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz'])
+:call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz'])
+:call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% '])
+:call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%'])
+:call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ'])
+:call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ '])
+:call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ'])
+:call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%'])
+:call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ'])
+:call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ'])
+:call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ '])
+:call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ '])
  :call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
  :"
  :"""" Tests for \z features
diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok

index 2df568d9b0dff92acb6c9b760190e6cfc810bd2e..733ad870335781f0a97a902501d06ef098b5b49a 100644 (file)
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -650,30 +650,72 @@ OK 2 - .a\%$
  OK 0 - .a\%$
  OK 1 - .a\%$
  OK 2 - .a\%$
-OK 0 - [0-9]
-OK 1 - [0-9]
-OK 2 - [0-9]
-OK 0 - [^0-9]
-OK 1 - [^0-9]
-OK 2 - [^0-9]
-OK 0 - [0-9a-fA-F]*
-OK 1 - [0-9a-fA-F]*
-OK 2 - [0-9a-fA-F]*
+OK 0 - [0-7]\+
+OK 1 - [0-7]\+
+OK 2 - [0-7]\+
+OK 0 - [^0-7]\+
+OK 1 - [^0-7]\+
+OK 2 - [^0-7]\+
+OK 0 - [0-9]\+
+OK 1 - [0-9]\+
+OK 2 - [0-9]\+
+OK 0 - [^0-9]\+
+OK 1 - [^0-9]\+
+OK 2 - [^0-9]\+
+OK 0 - [0-9a-fA-F]\+
+OK 1 - [0-9a-fA-F]\+
+OK 2 - [0-9a-fA-F]\+
  OK 0 - [^0-9A-Fa-f]\+
  OK 1 - [^0-9A-Fa-f]\+
  OK 2 - [^0-9A-Fa-f]\+
  OK 0 - [a-z_A-Z0-9]\+
  OK 1 - [a-z_A-Z0-9]\+
  OK 2 - [a-z_A-Z0-9]\+
-OK 0 - [a-z]
-OK 1 - [a-z]
-OK 2 - [a-z]
-OK 0 - [a-zA-Z]
-OK 1 - [a-zA-Z]
-OK 2 - [a-zA-Z]
-OK 0 - [A-Z]
-OK 1 - [A-Z]
-OK 2 - [A-Z]
+OK 0 - [^a-z_A-Z0-9]\+
+OK 1 - [^a-z_A-Z0-9]\+
+OK 2 - [^a-z_A-Z0-9]\+
+OK 0 - [a-z_A-Z]\+
+OK 1 - [a-z_A-Z]\+
+OK 2 - [a-z_A-Z]\+
+OK 0 - [^a-z_A-Z]\+
+OK 1 - [^a-z_A-Z]\+
+OK 2 - [^a-z_A-Z]\+
+OK 0 - [a-z]\+
+OK 1 - [a-z]\+
+OK 2 - [a-z]\+
+OK 0 - [a-z]\+
+OK 1 - [a-z]\+
+OK 2 - [a-z]\+
+OK 0 - [^a-z]\+
+OK 1 - [^a-z]\+
+OK 2 - [^a-z]\+
+OK 0 - [^a-z]\+
+OK 1 - [^a-z]\+
+OK 2 - [^a-z]\+
+OK 0 - [a-zA-Z]\+
+OK 1 - [a-zA-Z]\+
+OK 2 - [a-zA-Z]\+
+OK 0 - [^a-zA-Z]\+
+OK 1 - [^a-zA-Z]\+
+OK 2 - [^a-zA-Z]\+
+OK 0 - [A-Z]\+
+OK 1 - [A-Z]\+
+OK 2 - [A-Z]\+
+OK 0 - [^A-Z]\+
+OK 1 - [^A-Z]\+
+OK 2 - [^A-Z]\+
+OK 0 - [a-z]\+\c
+OK 1 - [a-z]\+\c
+OK 2 - [a-z]\+\c
+OK 0 - [A-Z]\+\c
+OK 1 - [A-Z]\+\c
+OK 2 - [A-Z]\+\c
+OK 0 - \c[^a-z]\+
+OK 1 - \c[^a-z]\+
+OK 2 - \c[^a-z]\+
+OK 0 - \c[^A-Z]\+
+OK 1 - \c[^A-Z]\+
+OK 2 - \c[^A-Z]\+
  OK 0 - \C[^A-Z]\+
  OK 1 - \C[^A-Z]\+
  OK 2 - \C[^A-Z]\+
diff --git a/src/version.c b/src/version.c

index f063557653d41aa8fcb749441ab32ab597e8f0ad..8eccd3cf08d8b0aa55cd7dc1e4595a1c9087cc92 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -727,6 +727,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    1,
  /**/
      0
  };
author	Bram Moolenaar <Bram@vim.org>
	Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)
committer	Bram Moolenaar <Bram@vim.org>
	Wed, 14 Aug 2013 10:06:49 +0000 (12:06 +0200)
src/regexp_nfa.c		patch \| blob \| history
src/testdir/test64.in		patch \| blob \| history
src/testdir/test64.ok		patch \| blob \| history
src/version.c		patch \| blob \| history