# define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log"
#endif
+/* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */
+#define NFA_ADD_NL 31
+
enum
{
NFA_SPLIT = -1024,
NFA_NLOWER, /* Match non-lowercase char */
NFA_UPPER, /* Match uppercase char */
NFA_NUPPER, /* Match non-uppercase char */
+ NFA_LOWER_IC, /* Match [a-z] */
+ NFA_NLOWER_IC, /* Match [^a-z] */
+ NFA_UPPER_IC, /* Match [A-Z] */
+ NFA_NUPPER_IC, /* Match [^A-Z] */
+
+ NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL,
+ NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL,
NFA_CURSOR, /* Match cursor pos */
NFA_LNUM, /* Match line number */
NFA_MARK_LT, /* Match < mark */
NFA_VISUAL, /* Match Visual area */
- NFA_FIRST_NL = NFA_ANY + ADD_NL,
- NFA_LAST_NL = NFA_NUPPER + ADD_NL,
-
/* Character classes [:alnum:] etc */
NFA_CLASS_ALNUM,
NFA_CLASS_ALPHA,
* On failure, return 0 (=FAIL)
* Start points to the first char of the range, while end should point
* to the closing brace.
+ * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
+ * need to be interpreted as [a-zA-Z].
*/
static int
nfa_recognize_char_class(start, end, extra_newl)
return FAIL;
if (newl == TRUE)
- extra_newl = ADD_NL;
+ extra_newl = NFA_ADD_NL;
switch (config)
{
case CLASS_not | CLASS_az | CLASS_AZ:
return extra_newl + NFA_NALPHA;
case CLASS_az:
- return extra_newl + NFA_LOWER;
+ return extra_newl + NFA_LOWER_IC;
case CLASS_not | CLASS_az:
- return extra_newl + NFA_NLOWER;
+ return extra_newl + NFA_NLOWER_IC;
case CLASS_AZ:
- return extra_newl + NFA_UPPER;
+ return extra_newl + NFA_UPPER_IC;
case CLASS_not | CLASS_AZ:
- return extra_newl + NFA_NUPPER;
+ return extra_newl + NFA_NUPPER_IC;
}
return FAIL;
}
break;
}
- extra = ADD_NL;
+ extra = NFA_ADD_NL;
/* "\_[" is collection plus newline */
if (c == '[')
}
#endif
EMIT(nfa_classcodes[p - classchars]);
- if (extra == ADD_NL)
+ if (extra == NFA_ADD_NL)
{
EMIT(NFA_NEWL);
EMIT(NFA_OR);
{
/*
* Try to reverse engineer character classes. For example,
- * recognize that [0-9] stands for \d and [A-Za-z_] with \h,
+ * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
* and perform the necessary substitutions in the NFA.
*/
result = nfa_recognize_char_class(regparse, endp,
- extra == ADD_NL);
+ extra == NFA_ADD_NL);
if (result != FAIL)
{
- if (result >= NFA_DIGIT && result <= NFA_NUPPER)
- EMIT(result);
- else /* must be char class + newline */
+ if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL)
{
- EMIT(result - ADD_NL);
+ EMIT(result - NFA_ADD_NL);
EMIT(NFA_NEWL);
EMIT(NFA_OR);
}
+ else
+ EMIT(result);
regparse = endp;
mb_ptr_adv(regparse);
return OK;
* collection, add an OR below. But not for negated
* range. */
if (!negated)
- extra = ADD_NL;
+ extra = NFA_ADD_NL;
}
else
{
EMIT(NFA_END_COLL);
/* \_[] also matches \n but it's not negated */
- if (extra == ADD_NL)
+ if (extra == NFA_ADD_NL)
{
EMIT(reg_string ? NL : NFA_NEWL);
EMIT(NFA_OR);
if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
{
addnl = TRUE;
- c -= ADD_NL;
+ c -= NFA_ADD_NL;
}
STRCPY(code, "");
case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break;
case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
+ case NFA_LOWER_IC: STRCPY(code, "NFA_LOWER_IC"); break;
+ case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break;
+ case NFA_UPPER_IC: STRCPY(code, "NFA_UPPER_IC"); break;
+ case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break;
default:
STRCPY(code, "CHAR(x)");
case NFA_NLOWER:
case NFA_UPPER:
case NFA_NUPPER:
+ case NFA_LOWER_IC:
+ case NFA_NLOWER_IC:
+ case NFA_UPPER_IC:
+ case NFA_NUPPER_IC:
/* possibly non-ascii */
#ifdef FEAT_MBYTE
if (has_mbyte)
case NFA_NLOWER:
case NFA_UPPER:
case NFA_NUPPER:
+ case NFA_LOWER_IC:
+ case NFA_NLOWER_IC:
+ case NFA_UPPER_IC:
+ case NFA_NUPPER_IC:
case NFA_START_COLL:
case NFA_START_NEG_COLL:
case NFA_NEWL:
ADD_STATE_IF_MATCH(t->state);
break;
+ case NFA_LOWER_IC: /* [a-z] */
+ result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
+ ADD_STATE_IF_MATCH(t->state);
+ break;
+
+ case NFA_NLOWER_IC: /* [^a-z] */
+ result = curc != NUL
+ && !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
+ ADD_STATE_IF_MATCH(t->state);
+ break;
+
+ case NFA_UPPER_IC: /* [A-Z] */
+ result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
+ ADD_STATE_IF_MATCH(t->state);
+ break;
+
+ case NFA_NUPPER_IC: /* ^[A-Z] */
+ result = curc != NUL
+ && !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
+ ADD_STATE_IF_MATCH(t->state);
+ break;
+
case NFA_BACKREF1:
case NFA_BACKREF2:
case NFA_BACKREF3:
:call add(tl, [2, '.a\%$', " a\n "])
:call add(tl, [2, '.a\%$', " a\n_a", "_a"])
:"
-:"""" Test recognition of some character classes
-:call add(tl, [2, '[0-9]', '8', '8'])
-:call add(tl, [2, '[^0-9]', '8'])
-:call add(tl, [2, '[0-9a-fA-F]*', '0a7', '0a7'])
-:call add(tl, [2, '[^0-9A-Fa-f]\+', '0a7'])
-:call add(tl, [2, '[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij'])
-:call add(tl, [2, '[a-z]', 'a', 'a'])
-:call add(tl, [2, '[a-zA-Z]', 'a', 'a'])
-:call add(tl, [2, '[A-Z]', 'a'])
+:"""" Test recognition of character classes
+:call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567'])
+:call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89'])
+:call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789'])
+:call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% '])
+:call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef'])
+:call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% '])
+:call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij'])
+:call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% '])
+:call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ'])
+:call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% '])
+:call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz'])
+:call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz'])
+:call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% '])
+:call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%'])
+:call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ'])
+:call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ '])
+:call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ'])
+:call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%'])
+:call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ'])
+:call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ'])
+:call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ '])
+:call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ '])
:call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
:"
:"""" Tests for \z features