src/ir/dfa/state.h \
src/ir/dfa/dfa.h \
src/ir/dfa/action.h \
+ src/ir/regexp/encoding/case.h \
src/ir/regexp/encoding/enc.h \
src/ir/regexp/encoding/range_suffix.h \
src/ir/regexp/encoding/utf8/utf8.h \
--- /dev/null
+#ifndef _RE2C_IR_REGEXP_ENCODING_CASE_
+#define _RE2C_IR_REGEXP_ENCODING_CASE_
+
+#include "src/util/c99_stdint.h"
+
+namespace re2c {
+
+// TODO: support non-ASCII encodings
+bool is_alpha (uint32_t c);
+uint32_t to_lower_unsafe (uint32_t c);
+uint32_t to_upper_unsafe (uint32_t c);
+
+inline bool is_alpha (uint32_t c)
+{
+ return (c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z');
+}
+
+inline uint32_t to_lower_unsafe (uint32_t c)
+{
+ return c | 0x20u;
+}
+
+inline uint32_t to_upper_unsafe (uint32_t c)
+{
+ return c & ~0x20u;
+}
+
+}
+
+#endif // _RE2C_IR_REGEXP_ENCODING_CASE_
+#include "src/ir/regexp/encoding/case.h"
#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
#include "src/ir/regexp/regexp.h"
RegExp * Scanner::strToCaseInsensitiveRE (SubStr & s) const
{
- if (s.len == 0)
- return new NullOp;
-
- uint32_t c = unescape(s);
-
- RegExp *re, *reL, *reU;
-
- if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
- {
- reL = matchSymbol(tolower(c));
- reU = matchSymbol(toupper(c));
- re = mkAlt(reL, reU);
- }
- else
- {
- re = matchSymbol(c);
- }
-
+ RegExp * r = NULL;
while (s.len > 0)
{
- c = unescape(s);
-
- if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ const uint32_t c = unescape (s);
+ if (is_alpha (c))
{
- reL = matchSymbol(tolower(c));
- reU = matchSymbol(toupper(c));
- re = new CatOp(re, mkAlt(reL, reU));
+ RegExp * rl = matchSymbol (to_lower_unsafe (c));
+ RegExp * ru = matchSymbol (to_upper_unsafe (c));
+ r = doCat (r, mkAlt (rl, ru));
}
else
{
- re = new CatOp(re, matchSymbol(c));
+ r = doCat (r, matchSymbol (c));
}
}
-
- return re;
+ return r ? r : new NullOp;
}
Range * Scanner::mkRange(SubStr &s) const