From 66babb646b50bfcfa98f2873c90398b44936a2ca Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Wed, 10 Jun 2015 17:30:15 +0100
Subject: [PATCH] Restructured sources layout, improved formatting.

---
 re2c/Makefile.am                              |   69 +-
 re2c/bootstrap/scanner_lex.cc                 |    5 +-
 re2c/src/codegen/emit.h                       |    3 +-
 re2c/src/codegen/emit_action.cc               |    4 +-
 re2c/src/codegen/emit_dfa.cc                  |    1 -
 re2c/src/codegen/go_construct.cc              |    2 +-
 re2c/src/codegen/go_destruct.cc               |    2 +-
 re2c/src/codegen/go_emit.cc                   |    2 +-
 re2c/src/codegen/go_used_labels.cc            |    2 +-
 re2c/src/codegen/output.h                     |    2 +-
 re2c/src/codegen/prepare_dfa.cc               |    3 +-
 re2c/src/codegen/scc.cc                       |    2 +-
 re2c/src/codegen/skeleton/path.h              |    2 +-
 re2c/src/codegen/skeleton/skeleton.cc         |    1 +
 re2c/src/codegen/skeleton/skeleton.h          |    2 +-
 re2c/src/dfa/actions.cc                       | 1073 -----------------
 re2c/src/dfa/encoding/utf16/utf16_regexp.h    |   16 -
 re2c/src/dfa/encoding/utf8/utf8_regexp.h      |   16 -
 re2c/src/dfa/ins.h                            |   55 -
 re2c/src/dfa/re.h                             |  310 -----
 re2c/src/globals.h                            |    2 +-
 re2c/src/ir/bytecode/bytecode.cc              |  111 ++
 re2c/src/ir/bytecode/bytecode.h               |   16 +
 re2c/src/ir/bytecode/calc_size.cc             |   72 ++
 re2c/src/ir/bytecode/charset.cc               |   32 +
 re2c/src/ir/bytecode/charset.h                |   37 +
 re2c/src/ir/bytecode/compile.cc               |  262 ++++
 re2c/src/ir/bytecode/ins.cc                   |   41 +
 re2c/src/ir/bytecode/ins.h                    |   52 +
 re2c/src/ir/bytecode/split.cc                 |   82 ++
 re2c/src/{ => ir}/dfa/action.h                |    6 +-
 re2c/src/{ => ir}/dfa/dfa.cc                  |    3 +-
 re2c/src/{ => ir}/dfa/dfa.h                   |   13 +-
 re2c/src/{ => ir}/dfa/state.h                 |   10 +-
 re2c/src/ir/regexp/display.cc                 |   57 +
 re2c/src/{dfa => ir/regexp}/encoding/enc.cc   |    2 +-
 re2c/src/{dfa => ir/regexp}/encoding/enc.h    |    6 +-
 .../regexp}/encoding/range_suffix.cc          |    5 +-
 .../regexp}/encoding/range_suffix.h           |    6 +-
 .../regexp}/encoding/utf16/utf16.cc           |    2 +-
 .../{dfa => ir/regexp}/encoding/utf16/utf16.h |    6 +-
 .../regexp}/encoding/utf16/utf16_range.cc     |    4 +-
 .../regexp}/encoding/utf16/utf16_range.h      |   10 +-
 .../regexp}/encoding/utf16/utf16_regexp.cc    |    8 +-
 .../ir/regexp/encoding/utf16/utf16_regexp.h   |   16 +
 .../{dfa => ir/regexp}/encoding/utf8/utf8.cc  |    2 +-
 .../{dfa => ir/regexp}/encoding/utf8/utf8.h   |    6 +-
 .../regexp}/encoding/utf8/utf8_range.cc       |    4 +-
 .../regexp}/encoding/utf8/utf8_range.h        |   10 +-
 .../regexp}/encoding/utf8/utf8_regexp.cc      |    8 +-
 .../src/ir/regexp/encoding/utf8/utf8_regexp.h |   16 +
 re2c/src/ir/regexp/fixed_length.cc            |   53 +
 re2c/src/ir/regexp/regexp.cc                  |  287 +++++
 re2c/src/ir/regexp/regexp.h                   |   76 ++
 re2c/src/ir/regexp/regexp_alt.h               |   32 +
 re2c/src/ir/regexp/regexp_cat.h               |   31 +
 re2c/src/ir/regexp/regexp_close.h             |   28 +
 re2c/src/ir/regexp/regexp_closev.h            |   34 +
 re2c/src/ir/regexp/regexp_match.h             |   30 +
 re2c/src/ir/regexp/regexp_null.h              |   22 +
 re2c/src/ir/regexp/regexp_rule.h              |   47 +
 re2c/src/{dfa => ir}/rule_rank.cc             |    2 +-
 re2c/src/{dfa => ir}/rule_rank.h              |    6 +-
 re2c/src/main.cc                              |    2 -
 re2c/src/parse/extop.h                        |   18 +
 re2c/src/parse/parser.h                       |    4 +-
 re2c/src/parse/parser.ypp                     |    7 +-
 re2c/src/parse/scanner.h                      |    2 +-
 re2c/src/parse/scanner_lex.re                 |    3 +-
 re2c/src/parse/unescape.cc                    |  220 ++++
 70 files changed, 1807 insertions(+), 1576 deletions(-)
 delete mode 100644 re2c/src/dfa/actions.cc
 delete mode 100644 re2c/src/dfa/encoding/utf16/utf16_regexp.h
 delete mode 100644 re2c/src/dfa/encoding/utf8/utf8_regexp.h
 delete mode 100644 re2c/src/dfa/ins.h
 delete mode 100644 re2c/src/dfa/re.h
 create mode 100644 re2c/src/ir/bytecode/bytecode.cc
 create mode 100644 re2c/src/ir/bytecode/bytecode.h
 create mode 100644 re2c/src/ir/bytecode/calc_size.cc
 create mode 100644 re2c/src/ir/bytecode/charset.cc
 create mode 100644 re2c/src/ir/bytecode/charset.h
 create mode 100644 re2c/src/ir/bytecode/compile.cc
 create mode 100644 re2c/src/ir/bytecode/ins.cc
 create mode 100644 re2c/src/ir/bytecode/ins.h
 create mode 100644 re2c/src/ir/bytecode/split.cc
 rename re2c/src/{ => ir}/dfa/action.h (93%)
 rename re2c/src/{ => ir}/dfa/dfa.cc (98%)
 rename re2c/src/{ => ir}/dfa/dfa.h (80%)
 rename re2c/src/{ => ir}/dfa/state.h (80%)
 create mode 100644 re2c/src/ir/regexp/display.cc
 rename re2c/src/{dfa => ir/regexp}/encoding/enc.cc (99%)
 rename re2c/src/{dfa => ir/regexp}/encoding/enc.h (97%)
 rename re2c/src/{dfa => ir/regexp}/encoding/range_suffix.cc (77%)
 rename re2c/src/{dfa => ir/regexp}/encoding/range_suffix.h (79%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf16/utf16.cc (82%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf16/utf16.h (82%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf16/utf16_range.cc (97%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf16/utf16_range.h (63%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf16/utf16_regexp.cc (77%)
 create mode 100644 re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h
 rename re2c/src/{dfa => ir/regexp}/encoding/utf8/utf8.cc (97%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf8/utf8.h (87%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf8/utf8_range.cc (97%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf8/utf8_range.h (55%)
 rename re2c/src/{dfa => ir/regexp}/encoding/utf8/utf8_regexp.cc (78%)
 create mode 100644 re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h
 create mode 100644 re2c/src/ir/regexp/fixed_length.cc
 create mode 100644 re2c/src/ir/regexp/regexp.cc
 create mode 100644 re2c/src/ir/regexp/regexp.h
 create mode 100644 re2c/src/ir/regexp/regexp_alt.h
 create mode 100644 re2c/src/ir/regexp/regexp_cat.h
 create mode 100644 re2c/src/ir/regexp/regexp_close.h
 create mode 100644 re2c/src/ir/regexp/regexp_closev.h
 create mode 100644 re2c/src/ir/regexp/regexp_match.h
 create mode 100644 re2c/src/ir/regexp/regexp_null.h
 create mode 100644 re2c/src/ir/regexp/regexp_rule.h
 rename re2c/src/{dfa => ir}/rule_rank.cc (94%)
 rename re2c/src/{dfa => ir}/rule_rank.h (89%)
 create mode 100644 re2c/src/parse/extop.h
 create mode 100644 re2c/src/parse/unescape.cc

diff --git a/re2c/Makefile.am b/re2c/Makefile.am
index 0f767835..59122534 100644
--- a/re2c/Makefile.am
+++ b/re2c/Makefile.am
@@ -22,22 +22,32 @@ SRC_HDR = \
 	src/codegen/scc.h \
 	src/codegen/skeleton/path.h \
 	src/codegen/skeleton/skeleton.h \
-	src/dfa/action.h \
-	src/dfa/encoding/enc.h \
-	src/dfa/encoding/range_suffix.h \
-	src/dfa/encoding/utf16/utf16.h \
-	src/dfa/encoding/utf16/utf16_range.h \
-	src/dfa/encoding/utf16/utf16_regexp.h \
-	src/dfa/encoding/utf8/utf8.h \
-	src/dfa/encoding/utf8/utf8_range.h \
-	src/dfa/encoding/utf8/utf8_regexp.h \
-	src/dfa/dfa.h \
-	src/dfa/ins.h \
-	src/dfa/re.h \
-	src/dfa/rule_rank.h \
-	src/dfa/state.h \
+	src/ir/bytecode/charset.h \
+	src/ir/bytecode/bytecode.h \
+	src/ir/bytecode/ins.h \
+	src/ir/dfa/state.h \
+	src/ir/dfa/dfa.h \
+	src/ir/dfa/action.h \
+	src/ir/regexp/regexp_alt.h \
+	src/ir/regexp/regexp_match.h \
+	src/ir/regexp/encoding/enc.h \
+	src/ir/regexp/encoding/range_suffix.h \
+	src/ir/regexp/encoding/utf8/utf8.h \
+	src/ir/regexp/encoding/utf8/utf8_regexp.h \
+	src/ir/regexp/encoding/utf8/utf8_range.h \
+	src/ir/regexp/encoding/utf16/utf16_range.h \
+	src/ir/regexp/encoding/utf16/utf16_regexp.h \
+	src/ir/regexp/encoding/utf16/utf16.h \
+	src/ir/regexp/regexp_rule.h \
+	src/ir/regexp/regexp_cat.h \
+	src/ir/regexp/regexp_closev.h \
+	src/ir/regexp/regexp_null.h \
+	src/ir/regexp/regexp.h \
+	src/ir/regexp/regexp_close.h \
+	src/ir/rule_rank.h \
 	src/globals.h \
 	src/mbo_getopt.h \
+	src/parse/extop.h \
 	src/parse/input.h \
 	src/parse/parser.h \
 	src/parse/scanner.h \
@@ -70,21 +80,30 @@ SRC = \
 	src/codegen/scc.cc \
 	src/codegen/skeleton/path.cc \
 	src/codegen/skeleton/skeleton.cc \
-	src/dfa/actions.cc \
-	src/dfa/encoding/enc.cc \
-	src/dfa/encoding/range_suffix.cc \
-	src/dfa/encoding/utf16/utf16.cc \
-	src/dfa/encoding/utf16/utf16_range.cc \
-	src/dfa/encoding/utf16/utf16_regexp.cc \
-	src/dfa/encoding/utf8/utf8.cc \
-	src/dfa/encoding/utf8/utf8_range.cc \
-	src/dfa/encoding/utf8/utf8_regexp.cc \
-	src/dfa/dfa.cc \
-	src/dfa/rule_rank.cc \
+	src/ir/bytecode/bytecode.cc \
+	src/ir/bytecode/ins.cc \
+	src/ir/bytecode/charset.cc \
+	src/ir/bytecode/split.cc \
+	src/ir/bytecode/compile.cc \
+	src/ir/bytecode/calc_size.cc \
+	src/ir/dfa/dfa.cc \
+	src/ir/regexp/display.cc \
+	src/ir/regexp/encoding/enc.cc \
+	src/ir/regexp/encoding/range_suffix.cc \
+	src/ir/regexp/encoding/utf8/utf8_regexp.cc \
+	src/ir/regexp/encoding/utf8/utf8_range.cc \
+	src/ir/regexp/encoding/utf8/utf8.cc \
+	src/ir/regexp/encoding/utf16/utf16_regexp.cc \
+	src/ir/regexp/encoding/utf16/utf16.cc \
+	src/ir/regexp/encoding/utf16/utf16_range.cc \
+	src/ir/regexp/fixed_length.cc \
+	src/ir/regexp/regexp.cc \
+	src/ir/rule_rank.cc \
 	src/main.cc \
 	src/mbo_getopt.cc \
 	src/parse/input.cc \
 	src/parse/scanner.cc \
+	src/parse/unescape.cc \
 	src/util/range.cc
 re2c_SOURCES = \
 	$(SRC_HDR) \
diff --git a/re2c/bootstrap/scanner_lex.cc b/re2c/bootstrap/scanner_lex.cc
index 469393ad..48160f72 100644
--- a/re2c/bootstrap/scanner_lex.cc
+++ b/re2c/bootstrap/scanner_lex.cc
@@ -1,11 +1,12 @@
-/* Generated by re2c 0.14.1.dev on Thu May 14 17:19:41 2015*/
+/* Generated by re2c 0.14.2 on Wed Jun 10 17:27:57 2015 */
 #include <stdlib.h>
 #include <string.h>
 #include <iostream>
 #include <sstream>
 
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 #include "src/globals.h"
+#include "src/parse/extop.h"
 #include "src/parse/parser.h"
 #include "src/parse/scanner.h"
 #include "y.tab.h"
diff --git a/re2c/src/codegen/emit.h b/re2c/src/codegen/emit.h
index c616175e..5c78ef18 100644
--- a/re2c/src/codegen/emit.h
+++ b/re2c/src/codegen/emit.h
@@ -2,11 +2,10 @@
 #define _RE2C_CODEGEN_EMIT_
 
 #include "src/codegen/output.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 
 namespace re2c {
 
-class DFA;
 typedef std::vector<std::string> RegExpIndices;
 
 void emit_action
diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc
index 87f33ad8..e5d75afd 100644
--- a/re2c/src/codegen/emit_action.cc
+++ b/re2c/src/codegen/emit_action.cc
@@ -1,8 +1,8 @@
 #include "src/codegen/emit.h"
 #include "src/codegen/indent.h"
 #include "src/codegen/input_api.h"
-#include "src/codegen/skeleton/skeleton.h"
-#include "src/dfa/action.h"
+#include "src/ir/dfa/action.h"
+#include "src/ir/regexp/regexp_rule.h"
 
 namespace re2c
 {
diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc
index 147ce6da..11653a34 100644
--- a/re2c/src/codegen/emit_dfa.cc
+++ b/re2c/src/codegen/emit_dfa.cc
@@ -6,7 +6,6 @@
 #include "src/codegen/indent.h"
 #include "src/codegen/input_api.h"
 #include "src/codegen/skeleton/skeleton.h"
-#include "src/dfa/dfa.h"
 
 namespace re2c
 {
diff --git a/re2c/src/codegen/go_construct.cc b/re2c/src/codegen/go_construct.cc
index 321f3344..398f4a80 100644
--- a/re2c/src/codegen/go_construct.cc
+++ b/re2c/src/codegen/go_construct.cc
@@ -1,6 +1,6 @@
 #include "src/codegen/bitmap.h"
 #include "src/codegen/go.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 #include "src/util/allocate.h"
 
 namespace re2c
diff --git a/re2c/src/codegen/go_destruct.cc b/re2c/src/codegen/go_destruct.cc
index e53cedfb..041945d8 100644
--- a/re2c/src/codegen/go_destruct.cc
+++ b/re2c/src/codegen/go_destruct.cc
@@ -1,5 +1,5 @@
 #include "src/codegen/go.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 
 namespace re2c
 {
diff --git a/re2c/src/codegen/go_emit.cc b/re2c/src/codegen/go_emit.cc
index 5da68ad6..9c5d33c6 100644
--- a/re2c/src/codegen/go_emit.cc
+++ b/re2c/src/codegen/go_emit.cc
@@ -2,7 +2,7 @@
 #include "src/codegen/go.h"
 #include "src/codegen/indent.h"
 #include "src/codegen/print.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 
 namespace re2c
 {
diff --git a/re2c/src/codegen/go_used_labels.cc b/re2c/src/codegen/go_used_labels.cc
index d17ab92e..850f270a 100644
--- a/re2c/src/codegen/go_used_labels.cc
+++ b/re2c/src/codegen/go_used_labels.cc
@@ -1,5 +1,5 @@
 #include "src/codegen/go.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 
 namespace re2c
 {
diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h
index 4daacf25..4890a7f4 100644
--- a/re2c/src/codegen/output.h
+++ b/re2c/src/codegen/output.h
@@ -7,7 +7,7 @@
 #include <vector>
 
 #include "src/codegen/label.h"
-#include "src/dfa/rule_rank.h"
+#include "src/ir/rule_rank.h"
 #include "src/util/c99_stdint.h"
 #include "src/util/forbid_copy.h"
 
diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc
index c5ef6255..57640430 100644
--- a/re2c/src/codegen/prepare_dfa.cc
+++ b/re2c/src/codegen/prepare_dfa.cc
@@ -2,7 +2,8 @@
 
 #include "src/codegen/bitmap.h"
 #include "src/codegen/scc.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
+#include "src/ir/regexp/regexp_rule.h"
 #include "src/util/allocate.h"
 
 namespace re2c {
diff --git a/re2c/src/codegen/scc.cc b/re2c/src/codegen/scc.cc
index ba79dbb0..243497a8 100644
--- a/re2c/src/codegen/scc.cc
+++ b/re2c/src/codegen/scc.cc
@@ -1,5 +1,5 @@
 #include "src/codegen/scc.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 
 namespace re2c {
 
diff --git a/re2c/src/codegen/skeleton/path.h b/re2c/src/codegen/skeleton/path.h
index 097e2c9f..b24e20a0 100644
--- a/re2c/src/codegen/skeleton/path.h
+++ b/re2c/src/codegen/skeleton/path.h
@@ -3,7 +3,7 @@
 
 #include <vector>
 
-#include "src/dfa/rule_rank.h"
+#include "src/ir/rule_rank.h"
 #include "src/util/c99_stdint.h"
 
 namespace re2c
diff --git a/re2c/src/codegen/skeleton/skeleton.cc b/re2c/src/codegen/skeleton/skeleton.cc
index ca243c86..17275498 100644
--- a/re2c/src/codegen/skeleton/skeleton.cc
+++ b/re2c/src/codegen/skeleton/skeleton.cc
@@ -1,6 +1,7 @@
 #include "src/codegen/indent.h"
 #include "src/codegen/print.h"
 #include "src/codegen/skeleton/skeleton.h"
+#include "src/ir/regexp/regexp_rule.h"
 #include "src/util/allocate.h"
 
 namespace re2c
diff --git a/re2c/src/codegen/skeleton/skeleton.h b/re2c/src/codegen/skeleton/skeleton.h
index 41af35d3..aebaee10 100644
--- a/re2c/src/codegen/skeleton/skeleton.h
+++ b/re2c/src/codegen/skeleton/skeleton.h
@@ -4,7 +4,7 @@
 #include <map>
 
 #include "src/codegen/skeleton/path.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 #include "src/util/c99_stdint.h"
 #include "src/util/forbid_copy.h"
 #include "src/util/local_increment.h"
diff --git a/re2c/src/dfa/actions.cc b/re2c/src/dfa/actions.cc
deleted file mode 100644
index 01a7ee08..00000000
--- a/re2c/src/dfa/actions.cc
+++ /dev/null
@@ -1,1073 +0,0 @@
-#include <time.h>
-#include <string.h>
-#include <iostream>
-#include <iomanip>
-#include <cctype>
-
-#include "src/codegen/skeleton/skeleton.h"
-#include "src/dfa/dfa.h"
-#include "src/dfa/encoding/utf16/utf16_regexp.h"
-#include "src/dfa/encoding/utf8/utf8_regexp.h"
-#include "src/globals.h"
-#include "src/parse/scanner.h"
-#include "src/util/allocate.h"
-
-namespace re2c
-{
-
-const Ins* showIns(std::ostream &o, const Ins &i, const Ins &base)
-{
-	o.width(3);
-	o << &i - &base << ": ";
-
-	const Ins *ret = &(&i)[1];
-
-	switch (i.i.tag)
-	{
-
-		case CHAR:
-		{
-			o << "match ";
-
-			for (; ret < (Ins*) i.i.link; ++ret)
-			{
-				o << "\\x" << std::hex << ret->c.value;
-			}
-
-			break;
-		}
-
-		case GOTO:
-		o << "goto " << ((Ins*) i.i.link - &base);
-		break;
-
-		case FORK:
-		o << "fork " << ((Ins*) i.i.link - &base);
-		break;
-
-		case CTXT:
-		o << "ctxt";
-		break;
-
-		case TERM:
-		o << "term " << ((RuleOp*) i.i.link)->rank;
-		break;
-	}
-
-	o << "\n";
-	return ret;
-}
-
-uint32_t RegExp::fixedLength()
-{
-	return ~0u;
-}
-
-uint32_t compile_goto(Ins *ins, Ins *i)
-{
-	i->i.tag = GOTO;
-	i->i.link = ins;
-	return 1;
-}
-
-void NullOp::calcSize(Char*)
-{
-	size = 0;
-}
-
-uint32_t NullOp::fixedLength()
-{
-	return 0;
-}
-
-uint32_t NullOp::compile(Char*, Ins*)
-{
-	return 0;
-}
-
-void NullOp::decompile()
-{
-	;
-}
-
-void NullOp::split(CharSet&)
-{
-	;
-}
-
-MatchOp *merge(MatchOp *m1, MatchOp *m2)
-{
-	if (!m1)
-		return m2;
-
-	if (!m2)
-		return m1;
-
-	MatchOp* m = new MatchOp(doUnion(m1->match, m2->match));
-	if (m1->ins_access == RegExp::PRIVATE
-		|| m2->ins_access == RegExp::PRIVATE)
-		m->ins_access = RegExp::PRIVATE;
-
-	return m;
-}
-
-void MatchOp::display(std::ostream &o) const
-{
-	o << match;
-}
-
-void MatchOp::calcSize(Char *rep)
-{
-	size = 1;
-
-	for (Range *r = match; r; r = r->next)
-		for (uint32_t c = r->lb; c < r->ub; ++c)
-			if (rep[c] == c)
-				++size;
-}
-
-uint32_t MatchOp::fixedLength()
-{
-	return 1;
-}
-
-uint32_t MatchOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		i->i.tag = CHAR;
-		i->i.link = &i[size];
-		Ins *j = &i[1];
-		uint32_t bump = size;
-
-		for (Range *r = match; r; r = r->next)
-		{
-			for (uint32_t c = r->lb; c < r->ub; ++c)
-			{
-				if (rep[c] == c)
-				{
-					j->c.value = c;
-					j->c.bump = --bump;
-					j++;
-				}
-			}
-		}
-
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return size;
-	}
-}
-
-void MatchOp::decompile()
-{
-	ins_cache = NULL;
-}
-
-void MatchOp::split(CharSet &s)
-{
-	for (Range *r = match; r; r = r->next)
-	{
-		for (uint32_t c = r->lb; c < r->ub; ++c)
-		{
-			CharPtn *x = s.rep[c], *a = x->nxt;
-
-			if (!a)
-			{
-				if (x->card == 1)
-					continue;
-
-				x->nxt = a = s.freeHead;
-
-				if (!(s.freeHead = s.freeHead->nxt))
-					s.freeTail = &s.freeHead;
-
-				a->nxt = NULL;
-
-				x->fix = s.fix;
-
-				s.fix = x;
-			}
-
-			if (--(x->card) == 0)
-			{
-				*s.freeTail = x;
-				*(s.freeTail = &x->nxt) = NULL;
-			}
-
-			s.rep[c] = a;
-			++(a->card);
-		}
-	}
-
-	for (; s.fix; s.fix = s.fix->fix)
-		if (s.fix->card)
-			s.fix->nxt = NULL;
-}
-
-RegExp * mkDiff(RegExp *e1, RegExp *e2)
-{
-	MatchOp * m1 = dynamic_cast<MatchOp*>(e1);
-	MatchOp * m2 = dynamic_cast<MatchOp*>(e2);
-	if (m1 == NULL || m2 == NULL)
-		return NULL;
-
-	Range *r = doDiff(m1->match, m2->match);
-
-	return r ? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
-}
-
-RegExp *doAlt(RegExp *e1, RegExp *e2)
-{
-	if (!e1)
-		return e2;
-
-	if (!e2)
-		return e1;
-
-	return new AltOp(e1, e2);
-}
-
-RegExp *doCat(RegExp *e1, RegExp *e2)
-{
-	if (!e1)
-		return e2;
-
-	if (!e2)
-		return e1;
-
-	return new CatOp(e1, e2);
-}
-
-RegExp *mkAlt(RegExp *e1, RegExp *e2)
-{
-	AltOp *a;
-	MatchOp *m1, *m2;
-
-	a = dynamic_cast<AltOp*>(e1);
-	if (a != NULL)
-	{
-		m1 = dynamic_cast<MatchOp*>(a->exp1);
-		if (m1 != NULL)
-		{
-			m1->ins_access = e1->ins_access;
-			a->exp2->ins_access = e1->ins_access;
-			e1 = a->exp2;
-		}
-	}
-	else
-	{
-		m1 = dynamic_cast<MatchOp*>(e1);
-		if (m1 != NULL)
-			e1 = NULL;
-	}
-
-	a = dynamic_cast<AltOp*>(e2);
-	if (a != NULL)
-	{
-		m2 = dynamic_cast<MatchOp*>(a->exp1);
-		if (m2 != NULL)
-		{
-			m2->ins_access = e2->ins_access;
-			a->exp2->ins_access = e2->ins_access;
-			e2 = a->exp2;
-		}
-	}
-	else
-	{
-		m2 = dynamic_cast<MatchOp*>(e2);
-		if (m2 != NULL)
-			e2 = NULL;
-	}
-
-	return doAlt(merge(m1, m2), doAlt(e1, e2));
-}
-
-void AltOp::calcSize(Char *rep)
-{
-	exp1->calcSize(rep);
-	exp2->calcSize(rep);
-	size = exp1->size + exp2->size + 2;
-}
-
-uint32_t AltOp::fixedLength()
-{
-	uint32_t l1 = exp1->fixedLength();
-	uint32_t l2 = exp1->fixedLength();
-
-	if (l1 != l2 || l1 == ~0u)
-		return ~0u;
-
-	return l1;
-}
-
-uint32_t AltOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		i->i.tag = FORK;
-		const uint32_t sz1 = exp1->compile(rep, &i[1]);
-		Ins * const j = &i[sz1 + 1];
-		i->i.link = &j[1];
-		j->i.tag = GOTO;
-		const uint32_t sz2 = exp2->compile(rep, &j[1]);
-		j->i.link = &j[sz2 + 1];
-
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return sz1 + sz2 + 2;
-	}
-}
-
-void AltOp::decompile()
-{
-	if (ins_cache)
-	{
-		exp1->decompile();
-		exp2->decompile();
-		ins_cache = NULL;
-	}
-}
-
-void AltOp::split(CharSet &s)
-{
-	exp1->split(s);
-	exp2->split(s);
-}
-
-void CatOp::calcSize(Char *rep)
-{
-	exp1->calcSize(rep);
-	exp2->calcSize(rep);
-	size = exp1->size + exp2->size;
-}
-
-uint32_t CatOp::fixedLength()
-{
-	uint32_t l1, l2;
-
-	if ((l1 = exp1->fixedLength()) != ~0u )
-		if ((l2 = exp2->fixedLength()) != ~0u)
-			return l1 + l2;
-
-	return ~0u;
-}
-
-uint32_t CatOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		const uint32_t sz1 = exp1->compile(rep, &i[0]);
-		const uint32_t sz2 = exp2->compile(rep, &i[sz1]);
-
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return sz1 + sz2;
-	}
-}
-
-void CatOp::decompile()
-{
-	if (ins_cache)
-	{
-		exp1->decompile();
-		exp2->decompile();
-		ins_cache = NULL;
-	}
-}
-
-void CatOp::split(CharSet &s)
-{
-	exp1->split(s);
-	exp2->split(s);
-}
-
-void CloseOp::calcSize(Char *rep)
-{
-	exp->calcSize(rep);
-	size = exp->size + 1;
-}
-
-uint32_t CloseOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		i += exp->compile(rep, &i[0]);
-		i->i.tag = FORK;
-		i->i.link = ins_cache;
-		++i;
-
-		const uint32_t sz = i - ins_cache;
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return sz;
-	}
-}
-
-void CloseOp::decompile()
-{
-	if (ins_cache)
-	{
-		exp->decompile();
-		ins_cache = NULL;
-	}
-}
-
-void CloseOp::split(CharSet &s)
-{
-	exp->split(s);
-}
-
-void CloseVOp::calcSize(Char *rep)
-{
-	exp->calcSize(rep);
-
-	if (max >= 0)
-	{
-		size = (exp->size * min) + ((1 + exp->size) * (max - min));
-	}
-	else
-	{
-		size = (exp->size * min) + 1;
-	}
-}
-
-uint32_t CloseVOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		for (int st = min; st < max; st++)
-		{
-			const uint32_t sz = exp->compile(rep, &i[1]);
-			i->i.tag = FORK;
-			i->i.link = ins_cache + (1 + sz) * (max - min);
-			i += sz + 1;
-		}
-
-		for (int st = 0; st < min; st++)
-		{
-			const uint32_t sz = exp->compile(rep, &i[0]);
-			i += sz;
-			if (max < 0 && st == 0)
-			{
-				i->i.tag = FORK;
-				i->i.link = i - sz;
-				i++;
-			}
-		}
-
-		const uint32_t sz = i - ins_cache;
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return sz;
-	}
-}
-
-void CloseVOp::decompile()
-{
-	if (ins_cache)
-	{
-		exp->decompile();
-		ins_cache = NULL;
-	}
-}
-
-void CloseVOp::split(CharSet &s)
-{
-	exp->split(s);
-}
-
-RegExp *expr(Scanner &);
-
-uint32_t Scanner::unescape(SubStr &s) const
-{
-	static const char * hex = "0123456789abcdef";
-	static const char * oct = "01234567";
-
-	s.len--;
-	uint32_t c, ucb = 0;
-
-	if ((c = *s.str++) != '\\' || s.len == 0)
-	{
-		return c;
-	}
-
-	s.len--;
-
-	switch (c = *s.str++)
-	{
-		case 'n': return '\n';
-		case 't': return '\t';
-		case 'v': return '\v';
-		case 'b': return '\b';
-		case 'r': return '\r';
-		case 'f': return '\f';
-		case 'a': return '\a';
-
-		case 'x':
-		{
-			if (s.len < 2)
-			{
-				fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required");
-				return ~0u;
-			}
-			
-			const char *p1 = strchr(hex, tolower(s.str[0]));
-			const char *p2 = strchr(hex, tolower(s.str[1]));
-
-			if (!p1 || !p2)
-			{
-				fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code");
-				return ~0u;
-			}
-			else
-			{
-				s.len -= 2;
-				s.str += 2;
-				
-				uint32_t v = (uint32_t)((p1 - hex) << 4) 
-				       + (uint32_t)((p2 - hex));
-	
-				return v;
-			}
-		}
-
-		case 'U':
-		{
-			if (s.len < 8)
-			{
-				fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required");
-				return ~0u;
-			}
-
-			uint32_t l = 0;
-			if (s.str[0] == '0')
-			{
-				l++;
-				if (s.str[1] == '0')
-				{
-					l++;
-					if (s.str[2] == '0' || (s.str[2] == '1' && encoding.szCodePoint() == 4))
-					{
-						l++;
-						if (encoding.szCodePoint() == 4)
-						{
-							const char *u3 = strchr(hex, tolower(s.str[2]));
-							const char *u4 = strchr(hex, tolower(s.str[3]));
-							if (u3 && u4)
-							{
-								ucb = (uint32_t)((u3 - hex) << 20)
-							        + (uint32_t)((u4 - hex) << 16);
-								l++;
-							}
-						}
-						else if (s.str[3] == '0')
-						{
-							l++;
-						}
-					}
-				}
-			}
-
-			if (l != 4)
-			{
-				fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required");
-			}
-
-			s.len -= 4;
-			s.str += 4;
-			
-			// no break;
-		}
-		case 'X':
-		case 'u':
-		{
-			if (s.len < 4)
-			{
-				fatal(s.ofs()+s.len, 
-					c == 'X'
-					? "Illegal hexadecimal character code, four hexadecimal digits are required"
-					: "Illegal unicode character, four hexadecimal digits are required");
-				return ~0u;
-			}
-			
-			const char *p1 = strchr(hex, tolower(s.str[0]));
-			const char *p2 = strchr(hex, tolower(s.str[1]));
-			const char *p3 = strchr(hex, tolower(s.str[2]));
-			const char *p4 = strchr(hex, tolower(s.str[3]));
-
-			if (!p1 || !p2 || !p3 || !p4)
-			{
-				fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), 
-					c == 'X'
-					? "Illegal hexadecimal character code, non hexxdecimal digit found"
-					: "Illegal unicode character, non hexadecimal digit found");
-				return ~0u;
-			}
-			else
-			{
-				s.len -= 4;
-				s.str += 4;
-				
-				uint32_t v = (uint32_t)((p1 - hex) << 12) 
-				       + (uint32_t)((p2 - hex) <<  8)
-				       + (uint32_t)((p3 - hex) <<  4)
-				       + (uint32_t)((p4 - hex))
-				       + ucb;
-	
-				if (v >= encoding.nCodePoints())
-				{
-					fatal(s.ofs(),
-						c == 'X'
-						? "Illegal hexadecimal character code, out of range"
-						: "Illegal unicode character, out of range");
-				}
-	
-				return v;
-			}
-		}
-
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-		{
-			fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3");
-			return ~0u;
-		}
-
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		{
-			if (s.len < 2)
-			{
-				fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required");
-				return ~0u;
-			}
-
-			const char *p0 = strchr(oct, c);
-			const char *p1 = strchr(oct, s.str[0]);
-			const char *p2 = strchr(oct, s.str[1]);
-
-			if (!p0 || !p1 || !p2)
-			{
-				fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found");
-				return ~0u;
-			}
-			else
-			{
-				s.len -= 2;
-				s.str += 2;
-				
-				uint32_t v = (uint32_t)((p0 - oct) << 6) + (uint32_t)((p1 - oct) << 3) + (uint32_t)(p2 - oct);
-	
-				return v;
-			}
-		}
-
-		default:
-		return c;
-	}
-}
-
-std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const
-{
-	str_out.clear();
-
-	while(str_in.len)
-	{
-		uint32_t c = unescape(str_in);
-
-		if (c > 0xFF)
-		{
-			fatal(str_in.ofs(), "Illegal character");
-		}
-
-		str_out += static_cast<char>(c);
-	}
-
-	return str_out;
-}
-
-Range * Scanner::getRange(SubStr &s) const
-{
-	uint32_t lb = unescape(s), ub;
-
-	if (s.len < 2 || *s.str != '-')
-	{
-		ub = lb;
-	}
-	else
-	{
-		s.len--;
-		s.str++;
-		ub = unescape(s);
-		if (ub < lb)
-		{
-			uint32_t tmp = lb;
-			lb = ub;
-			ub = tmp;
-		}
-	}
-
-	Range * r = encoding.encodeRange(lb, ub);
-	if (r == NULL)
-		fatalf("Bad code point range: '0x%X - 0x%X'", lb, ub);
-	return r;
-}
-
-RegExp * Scanner::matchSymbol(uint32_t c) const
-{
-	if (!encoding.encode(c))
-		fatalf("Bad code point: '0x%X'", c);
-
-	if (encoding.is(Enc::UTF16))
-		return UTF16Symbol(c);
-	else if (encoding.is(Enc::UTF8))
-		return UTF8Symbol(c);
-	else
-		return new MatchOp(new Range(c, c + 1));
-}
-
-RegExp * Scanner::strToRE (SubStr & s) const
-{
-	if (s.len == 0)
-		return new NullOp;
-
-	RegExp *re = matchSymbol(unescape(s));
-
-	while (s.len > 0)
-		re = new CatOp(re, matchSymbol(unescape(s)));
-
-	return re;
-}
-
-RegExp * Scanner::strToCaseInsensitiveRE (SubStr & s) const
-{
-	if (s.len == 0)
-		return new NullOp;
-
-	uint32_t c = unescape(s);
-
-	RegExp *re, *reL, *reU;
-
-	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
-	{
-		reL = matchSymbol(tolower(c));
-		reU = matchSymbol(toupper(c));
-		re = mkAlt(reL, reU);
-	}
-	else
-	{
-		re = matchSymbol(c);
-	}
-
-	while (s.len > 0)
-	{
-		c = unescape(s);
-
-		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
-		{
-			reL = matchSymbol(tolower(c));
-			reU = matchSymbol(toupper(c));
-			re = new CatOp(re, mkAlt(reL, reU));
-		}
-		else
-		{
-			re = new CatOp(re, matchSymbol(c));
-		}
-	}
-
-	return re;
-}
-
-Range * Scanner::mkRange(SubStr &s) const
-{
-	Range *r = getRange(s);
-	while (s.len > 0)
-		r = doUnion(r, getRange(s));
-
-	return r;
-}
-
-RegExp * Scanner::matchSymbolRange(Range * r) const
-{
-	if (encoding.is(Enc::UTF16))
-		return UTF16Range(r);
-	else if (encoding.is(Enc::UTF8))
-		return UTF8Range(r);
-	else
-		return new MatchOp(r);
-}
-
-RegExp * Scanner::ranToRE (SubStr & s) const
-{
-	s.len -= 2;
-	s.str += 1;
-
-	if (s.len == 0)
-		return new NullOp;
-
-	return matchSymbolRange(mkRange(s));
-}
-
-RegExp * Scanner::invToRE (SubStr & s) const
-{
-	s.len -= 3;
-	s.str += 2;
-
-	Range * full = encoding.fullRange();
-
-	Range * r = s.len == 0
-		? full
-		: doDiff(full, mkRange (s));
-
-	return matchSymbolRange(r);
-}
-
-RegExp * Scanner::mkDot() const
-{
-	Range * full = encoding.fullRange();
-	uint32_t c = '\n';
-	if (!encoding.encode(c))
-		fatalf("Bad code point: '0x%X'", c);
-	Range * ran = new Range(c, c + 1);
-	Range * inv = doDiff(full, ran);
-
-	return matchSymbolRange(inv);
-}
-
-/*
- * Create a byte range that includes all possible input characters.
- * This may include characters, which do not map to any valid symbol
- * in current encoding. For encodings, which directly map symbols to
- * input characters (ASCII, EBCDIC, UTF-32), it equals [^]. For other
- * encodings (UTF-16, UTF-8), [^] and this range are different.
- *
- * Also note that default range doesn't respect encoding policy
- * (the way invalid code points are treated).
- */
-RegExp * Scanner::mkDefault() const
-{
-	Range * def = new Range(0, encoding.nCodeUnits());
-	return new MatchOp(def);
-}
-
-RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, rule_rank_t r, InsAccess access)
-	: exp(e)
-	, ctx(c)
-	, ins(NULL)
-	, rank(r)
-	, code(t)
-	, line(0)
-{
-	ins_access = access;
-}
-
-void RuleOp::calcSize(Char *rep)
-{
-	exp->calcSize(rep);
-	ctx->calcSize(rep);
-	size = exp->size + (ctx->size ? ctx->size + 2 : 1);
-}
-
-uint32_t RuleOp::compile(Char *rep, Ins *i)
-{
-	if (ins_cache)
-		return compile_goto(ins_cache, i);
-	else
-	{
-		ins_cache = i;
-
-		i += exp->compile(rep, &i[0]);
-		if (ctx->size)
-		{
-			i->i.tag = CTXT;
-			i->i.link = &i[1];
-			++i;
-			i += ctx->compile(rep, &i[0]);
-		}
-		i->i.tag = TERM;
-		i->i.link = this;
-		++i;
-
-		const uint32_t sz = i - ins_cache;
-		if (ins_access == PRIVATE)
-			decompile();
-
-		return sz;
-	}
-}
-
-void RuleOp::decompile()
-{
-	if (ins_cache)
-	{
-		exp->decompile();
-		ctx->decompile();
-		ins_cache = NULL;
-	}
-}
-
-void RuleOp::split(CharSet &s)
-{
-	exp->split(s);
-	ctx->split(s);
-}
-
-void optimize(Ins *i)
-{
-	while (!isMarked(i))
-	{
-		mark(i);
-
-		if (i->i.tag == CHAR)
-		{
-			i = (Ins*) i->i.link;
-		}
-		else if (i->i.tag == GOTO || i->i.tag == FORK)
-		{
-			Ins *target = (Ins*) i->i.link;
-			optimize(target);
-
-			if (target->i.tag == GOTO)
-				i->i.link = target->i.link == target ? i : target;
-
-			if (i->i.tag == FORK)
-			{
-				Ins *follow = (Ins*) & i[1];
-				optimize(follow);
-
-				if (follow->i.tag == GOTO && follow->i.link == follow)
-				{
-					i->i.tag = GOTO;
-				}
-				else if (i->i.link == i)
-				{
-					i->i.tag = GOTO;
-					i->i.link = follow;
-				}
-			}
-
-			return ;
-		}
-		else
-		{
-			++i;
-		}
-	}
-}
-
-CharSet::CharSet()
-	: fix(0)
-	, freeHead(0)
-	, freeTail(0)
-	, rep(allocate<CharPtr> (encoding.nCodeUnits()))
-	, ptn(allocate<CharPtn> (encoding.nCodeUnits()))
-{
-	for (uint32_t j = 0; j < encoding.nCodeUnits(); ++j)
-	{
-		rep[j] = &ptn[0];
-		ptn[j].nxt = &ptn[j + 1]; /* wrong for j=encoding.nCodeUnits() - 1 but will be corrected below */
-		ptn[j].card = 0;
-	}
-
-	freeHead = &ptn[1];
-	*(freeTail = &ptn[encoding.nCodeUnits() - 1].nxt) = NULL;
-	ptn[0].card = encoding.nCodeUnits();
-	ptn[0].nxt = NULL;
-}
-	
-CharSet::~CharSet()
-{
-	operator delete (rep);
-	operator delete (ptn);
-}
-
-smart_ptr<DFA> genCode(RegExp *re, Output & output, uint32_t ind)
-{
-	CharSet cs;
-	re->split(cs);
-
-	Char *rep = new Char[encoding.nCodeUnits()];
-
-	for (uint32_t j = 0; j < encoding.nCodeUnits(); ++j)
-	{
-		if (!cs.rep[j]->nxt)
-			cs.rep[j]->nxt = &cs.ptn[j];
-
-		rep[j] = cs.rep[j]->nxt - &cs.ptn[0];
-	}
-
-	re->calcSize(rep);
-	Ins *ins = new Ins[re->size + 1];
-	memset(ins, 0, (re->size + 1)*sizeof(Ins));
-	const uint32_t size = re->compile(rep, ins);
-	Ins *eoi = &ins[size];
-	eoi->i.tag = GOTO;
-	eoi->i.link = eoi;
-
-	optimize(ins);
-
-	/*
-	for (const Ins *inst = &ins[0]; inst < &ins[size]; )
-	{
-		inst = showIns(std::cout, *inst, ins[0]);
-	}
-	*/
-
-	for (uint32_t j = 0; j < size;)
-	{
-		unmark(&ins[j]);
-
-		if (ins[j].i.tag == CHAR)
-		{
-			j = (Ins*) ins[j].i.link - ins;
-		}
-		else
-		{
-			j++;
-		}
-	}
-
-	smart_ptr<DFA> dfa = make_smart_ptr(new DFA(ins, size, 0, encoding.nCodeUnits(), rep));
-	if (flag_skeleton)
-	{
-		skeleton::Skeleton skeleton (*dfa);
-		skeleton.emit_data (output.data);
-		skeleton::emit_prolog (output.source, ind, output.data.file_name.c_str ());
-	}
-	dfa->prepare (output.source, output.max_fill);
-
-	return dfa;
-}
-
-} // end namespace re2c
-
diff --git a/re2c/src/dfa/encoding/utf16/utf16_regexp.h b/re2c/src/dfa/encoding/utf16/utf16_regexp.h
deleted file mode 100644
index 14e4fde0..00000000
--- a/re2c/src/dfa/encoding/utf16/utf16_regexp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _RE2C_DFA_ENCODING_UTF16_REGEXP_
-#define _RE2C_DFA_ENCODING_UTF16_REGEXP_
-
-#include "src/dfa/encoding/utf16/utf16.h"
-#include "src/util/range.h"
-
-namespace re2c {
-
-class RegExp; // forward
-
-RegExp * UTF16Symbol(utf16::rune r);
-RegExp * UTF16Range(const Range * r);
-
-} // namespace re2c
-
-#endif // _RE2C_DFA_ENCODING_UTF16_REGEXP_
diff --git a/re2c/src/dfa/encoding/utf8/utf8_regexp.h b/re2c/src/dfa/encoding/utf8/utf8_regexp.h
deleted file mode 100644
index ccfaab0e..00000000
--- a/re2c/src/dfa/encoding/utf8/utf8_regexp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _RE2C_DFA_ENCODING_UTF8_REGEXP_
-#define _RE2C_DFA_ENCODING_UTF8_REGEXP_
-
-#include "src/dfa/encoding/utf8/utf8.h"
-#include "src/util/range.h"
-
-namespace re2c {
-
-class RegExp; // forward
-
-RegExp * UTF8Symbol(utf8::rune r);
-RegExp * UTF8Range(const Range * r);
-
-} // namespace re2c
-
-#endif // _RE2C_DFA_ENCODING_UTF8_REGEXP_
diff --git a/re2c/src/dfa/ins.h b/re2c/src/dfa/ins.h
deleted file mode 100644
index 1e1170c7..00000000
--- a/re2c/src/dfa/ins.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _RE2C_DFA_INS_
-#define _RE2C_DFA_INS_
-
-#include "src/util/c99_stdint.h"
-
-namespace re2c
-{
-
-typedef uint32_t Char;
-
-const uint32_t CHAR = 0;
-const uint32_t GOTO = 1;
-const uint32_t FORK = 2;
-const uint32_t TERM = 3;
-const uint32_t CTXT = 4;
-
-union Ins {
-
-	struct
-	{
-		uint8_t	tag;
-		uint8_t	marked;
-		void	*link;
-	}
-
-	i;
-
-	struct
-	{
-		uint32_t	value;
-		uint32_t	bump;
-		void		*link;
-	}
-
-	c;
-};
-
-inline bool isMarked(Ins *i)
-{
-	return i->i.marked != 0;
-}
-
-inline void mark(Ins *i)
-{
-	i->i.marked = true;
-}
-
-inline void unmark(Ins *i)
-{
-	i->i.marked = false;
-}
-
-} // end namespace re2c
-
-#endif // _RE2C_DFA_INS_
diff --git a/re2c/src/dfa/re.h b/re2c/src/dfa/re.h
deleted file mode 100644
index efb3c70f..00000000
--- a/re2c/src/dfa/re.h
+++ /dev/null
@@ -1,310 +0,0 @@
-#ifndef _RE2C_DFA_RE_
-#define _RE2C_DFA_RE_
-
-#include <iostream>
-#include <set>
-#include <map>
-#include <list>
-#include <vector>
-#include <string>
-
-#include "src/dfa/ins.h"
-#include "src/dfa/rule_rank.h"
-#include "src/globals.h"
-#include "src/parse/token.h"
-#include "src/util/range.h"
-#include "src/util/forbid_copy.h"
-#include "src/util/free_list.h"
-
-namespace re2c
-{
-
-typedef struct extop
-{
-	char op;
-	int	minsize;
-	int	maxsize;
-}
-
-ExtOp;
-
-struct CharPtn
-{
-	uint32_t	card;
-	CharPtn	*fix;
-	CharPtn	*nxt;
-
-	FORBID_COPY (CharPtn);
-};
-
-typedef CharPtn *CharPtr;
-
-struct CharSet
-{
-	CharSet();
-	~CharSet();
-
-	CharPtn	*fix;
-	CharPtn	*freeHead, **freeTail;
-	CharPtr	*rep;
-	CharPtn	*ptn;
-
-	FORBID_COPY (CharSet);
-};
-
-class RegExp
-{
-
-public:
-	uint32_t	size;
-
-	/*
-	 * There're several different cases when the same regexp
-	 * can be used multiple times:
-	 * 	1) named definitions, e.g. digit = [0-9];
-	 * 	2) counted repetition, e.g. "a"{3}, "a"{3,}, "a"{3,5}
-	 * 	3) multiple DFA's sharing the same regexp , e.g. <c1, c2> "abc" { }
-	 * 	4) common suffixes, e.g. suffix [\x80-\xBF] in UTF-8 ranges
-	 * In cases 1-3, regexp must be recompiled each time it's reused.
-	 * In case 4, regexp should be compiled only once, and instructions
-	 * should be shared in order to reduce space.
-	 *
-	 * Note: if regexp must always be recompiled, it doesn't imply that
-	 * parts of this regexp must always be recompiled. It only forces
-	 * the compilation function to traverse the regexp after compilation
-	 * and reset compilation cache for each sub-regexp. E.g., for a regexp
-	 * [^]{3} in UTF-8 mode, each of sub-regexps [^] will have common suffix
-	 * [\x80-\xBF] factored out, but they won't share instructions.
-	 */
-	Ins*	ins_cache; /* if non-NULL, points to compiled instructions */
-	enum InsAccess
-		{ SHARED
-		, PRIVATE
-		} ins_access;
-
-	static free_list<RegExp*> vFreeList;
-
-public:
-	RegExp()
-		: size(0)
-		, ins_cache(NULL)
-		, ins_access(SHARED)
-	{
-		vFreeList.insert(this);
-	}
-
-	virtual ~RegExp()
-	{
-		vFreeList.erase(this);
-	}
-
-	virtual void split(CharSet&) = 0;
-	virtual void calcSize(Char*) = 0;
-	virtual uint32_t fixedLength();
-	virtual uint32_t compile(Char*, Ins*) = 0;
-	virtual void decompile() = 0;
-	virtual void display(std::ostream&) const = 0;
-	friend std::ostream& operator<<(std::ostream&, const RegExp&);
-	friend std::ostream& operator<<(std::ostream&, const RegExp*);
-
-	FORBID_COPY (RegExp);
-};
-
-inline std::ostream& operator<<(std::ostream &o, const RegExp &re)
-{
-	re.display(o);
-	return o;
-}
-
-inline std::ostream& operator<<(std::ostream &o, const RegExp *re)
-{
-	return o << *re;
-}
-
-class NullOp: public RegExp
-{
-
-public:
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t fixedLength();
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << "_";
-	}
-};
-
-class MatchOp: public RegExp
-{
-
-public:
-	Range	*match;
-
-public:
-	MatchOp(Range *m) : match(m)
-	{
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t fixedLength();
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream&) const;
-
-	FORBID_COPY (MatchOp);
-};
-
-class RuleOp: public RegExp
-{
-private:
-	RegExp   *exp;
-
-public:
-	RegExp   *ctx;
-	Ins      *ins;
-	rule_rank_t rank;
-	Token    *code;
-	uint32_t line;
-
-public:
-	RuleOp(RegExp*, RegExp*, Token*, rule_rank_t, InsAccess);
-
-	~RuleOp()
-	{
-		delete code;
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << exp << "/" << ctx << ";";
-	}
-
-	FORBID_COPY (RuleOp);
-};
-
-RegExp *doAlt(RegExp*, RegExp*);
-RegExp *mkAlt(RegExp*, RegExp*);
-
-class AltOp: public RegExp
-{
-
-private:
-	RegExp	*exp1, *exp2;
-
-public:
-	AltOp(RegExp *e1, RegExp *e2)
-		: exp1(e1)
-		, exp2(e2)
-	{
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t fixedLength();
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << exp1 << "|" << exp2;
-	}
-
-	friend RegExp *mkAlt(RegExp*, RegExp*);
-
-	FORBID_COPY (AltOp);
-};
-
-RegExp *doCat(RegExp*, RegExp*);
-RegExp *mkCat(RegExp*, RegExp*);
-
-class CatOp: public RegExp
-{
-
-private:
-	RegExp	*exp1, *exp2;
-
-public:
-	CatOp(RegExp *e1, RegExp *e2)
-		: exp1(e1)
-		, exp2(e2)
-	{
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t fixedLength();
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << exp1 << exp2;
-	}
-
-	FORBID_COPY (CatOp);
-};
-
-class CloseOp: public RegExp
-{
-
-private:
-	RegExp	*exp;
-
-public:
-	CloseOp(RegExp *e)
-		: exp(e)
-	{
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << exp << "+";
-	}
-
-	FORBID_COPY (CloseOp);
-};
-
-class CloseVOp: public RegExp
-{
-
-private:
-	RegExp	*exp;
-	int	min;
-	int	max;
-
-public:
-	CloseVOp(RegExp *e, int lb, int ub)
-		: exp(e)
-		, min(lb)
-		, max(ub)
-	{
-		exp->ins_access = PRIVATE;
-	}
-
-	void split(CharSet&);
-	void calcSize(Char*);
-	uint32_t compile(Char*, Ins*);
-	void decompile();
-	void display(std::ostream &o) const
-	{
-		o << exp << "+";
-	}
-
-	FORBID_COPY (CloseVOp);
-};
-
-extern RegExp *mkDiff(RegExp*, RegExp*);
-extern RegExp *mkAlt(RegExp*, RegExp*);
-
-} // end namespace re2c
-
-#endif // _RE2C_DFA_RE_
diff --git a/re2c/src/globals.h b/re2c/src/globals.h
index e77bcf8d..9f2549ad 100644
--- a/re2c/src/globals.h
+++ b/re2c/src/globals.h
@@ -5,7 +5,7 @@
 
 #include "src/codegen/code_names.h"
 #include "src/codegen/input_api.h"
-#include "src/dfa/encoding/enc.h"
+#include "src/ir/regexp/encoding/enc.h"
 #include "src/util/c99_stdint.h"
 
 namespace re2c
diff --git a/re2c/src/ir/bytecode/bytecode.cc b/re2c/src/ir/bytecode/bytecode.cc
new file mode 100644
index 00000000..0fda50b2
--- /dev/null
+++ b/re2c/src/ir/bytecode/bytecode.cc
@@ -0,0 +1,111 @@
+#include <string.h>
+
+#include "src/codegen/skeleton/skeleton.h"
+#include "src/ir/bytecode/bytecode.h"
+#include "src/globals.h"
+
+namespace re2c {
+
+static void optimize (Ins * i);
+
+smart_ptr<DFA> genCode (RegExp *re, Output & output, uint32_t ind)
+{
+	CharSet cs;
+	re->split(cs);
+
+	Char *rep = new Char[encoding.nCodeUnits()];
+
+	for (uint32_t j = 0; j < encoding.nCodeUnits(); ++j)
+	{
+		if (!cs.rep[j]->nxt)
+			cs.rep[j]->nxt = &cs.ptn[j];
+
+		rep[j] = cs.rep[j]->nxt - &cs.ptn[0];
+	}
+
+	re->calcSize(rep);
+	Ins *ins = new Ins[re->size + 1];
+	memset(ins, 0, (re->size + 1)*sizeof(Ins));
+	const uint32_t size = re->compile(rep, ins);
+	Ins *eoi = &ins[size];
+	eoi->i.tag = GOTO;
+	eoi->i.link = eoi;
+
+	optimize(ins);
+
+	/*
+	for (const Ins *inst = &ins[0]; inst < &ins[size]; )
+	{
+		inst = showIns(std::cout, *inst, ins[0]);
+	}
+	*/
+
+	for (uint32_t j = 0; j < size;)
+	{
+		unmark(&ins[j]);
+
+		if (ins[j].i.tag == CHAR)
+		{
+			j = (Ins*) ins[j].i.link - ins;
+		}
+		else
+		{
+			j++;
+		}
+	}
+
+	smart_ptr<DFA> dfa = make_smart_ptr(new DFA(ins, size, 0, encoding.nCodeUnits(), rep));
+	if (flag_skeleton)
+	{
+		skeleton::Skeleton skeleton (*dfa);
+		skeleton.emit_data (output.data);
+		skeleton::emit_prolog (output.source, ind, output.data.file_name.c_str ());
+	}
+	dfa->prepare (output.source, output.max_fill);
+
+	return dfa;
+}
+
+void optimize (Ins * i)
+{
+	while (!isMarked (i))
+	{
+		mark (i);
+		if (i->i.tag == CHAR)
+		{
+			i = (Ins *) i->i.link;
+		}
+		else if (i->i.tag == GOTO || i->i.tag == FORK)
+		{
+			Ins * target = (Ins *) i->i.link;
+			optimize (target);
+			if (target->i.tag == GOTO)
+			{
+				i->i.link = target->i.link == target
+					? i
+					: target;
+			}
+			if (i->i.tag == FORK)
+			{
+				Ins * follow = (Ins *) & i[1];
+				optimize (follow);
+				if (follow->i.tag == GOTO && follow->i.link == follow)
+				{
+					i->i.tag = GOTO;
+				}
+				else if (i->i.link == i)
+				{
+					i->i.tag = GOTO;
+					i->i.link = follow;
+				}
+			}
+			return;
+		}
+		else
+		{
+			++i;
+		}
+	}
+}
+
+} // namespace re2c
diff --git a/re2c/src/ir/bytecode/bytecode.h b/re2c/src/ir/bytecode/bytecode.h
new file mode 100644
index 00000000..50757e12
--- /dev/null
+++ b/re2c/src/ir/bytecode/bytecode.h
@@ -0,0 +1,16 @@
+#ifndef _RE2C_IR_BYTECODE_BYTECODE_
+#define _RE2C_IR_BYTECODE_BYTECODE_
+
+#include "src/codegen/output.h"
+#include "src/ir/dfa/dfa.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/util/smart_ptr.h"
+
+namespace re2c
+{
+
+smart_ptr<DFA> genCode (RegExp * re, Output & output, uint32_t ind);
+
+} // namespace re2c
+
+#endif // _RE2C_IR_BYTECODE_BYTECODE_
diff --git a/re2c/src/ir/bytecode/calc_size.cc b/re2c/src/ir/bytecode/calc_size.cc
new file mode 100644
index 00000000..fc90e7e8
--- /dev/null
+++ b/re2c/src/ir/bytecode/calc_size.cc
@@ -0,0 +1,72 @@
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_closev.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/ir/regexp/regexp_rule.h"
+
+namespace re2c
+{
+
+void AltOp::calcSize (Char * rep)
+{
+	exp1->calcSize (rep);
+	exp2->calcSize (rep);
+	size = exp1->size + exp2->size + 2;
+}
+
+void CatOp::calcSize (Char * rep)
+{
+	exp1->calcSize (rep);
+	exp2->calcSize (rep);
+	size = exp1->size + exp2->size;
+}
+
+void CloseOp::calcSize (Char * rep)
+{
+	exp->calcSize (rep);
+	size = exp->size + 1;
+}
+
+void CloseVOp::calcSize (Char * rep)
+{
+	exp->calcSize (rep);
+	if (max >= 0)
+	{
+		size = (exp->size * min) + ((1 + exp->size) * (max - min));
+	}
+	else
+	{
+		size = (exp->size * min) + 1;
+	}
+}
+
+void MatchOp::calcSize (Char * rep)
+{
+	size = 1;
+	for (Range * r = match; r; r = r->next)
+	{
+		for (uint32_t c = r->lb; c < r->ub; ++c)
+		{
+			if (rep[c] == c)
+			{
+				++size;
+			}
+		}
+	}
+}
+
+void NullOp::calcSize (Char *)
+{
+	size = 0;
+}
+
+void RuleOp::calcSize (Char * rep)
+{
+	exp->calcSize (rep);
+	ctx->calcSize (rep);
+	size = exp->size + (ctx->size ? ctx->size + 2 : 1);
+}
+
+} // end namespace re2c
diff --git a/re2c/src/ir/bytecode/charset.cc b/re2c/src/ir/bytecode/charset.cc
new file mode 100644
index 00000000..1543abea
--- /dev/null
+++ b/re2c/src/ir/bytecode/charset.cc
@@ -0,0 +1,32 @@
+#include "src/ir/bytecode/charset.h"
+#include "src/globals.h"
+#include "src/util/allocate.h"
+
+namespace re2c {
+
+CharSet::CharSet ()
+	: fix (0)
+	, freeHead (0)
+	, freeTail (0)
+	, rep (allocate<CharPtr> (encoding.nCodeUnits ()))
+	, ptn (allocate<CharPtn> (encoding.nCodeUnits ()))
+{
+	for (uint32_t j = 0; j < encoding.nCodeUnits (); ++j)
+	{
+		rep[j] = &ptn[0];
+		ptn[j].nxt = &ptn[j + 1]; /* wrong for j=encoding.nCodeUnits() - 1 but will be corrected below */
+		ptn[j].card = 0;
+	}
+	freeHead = &ptn[1];
+	* (freeTail = &ptn[encoding.nCodeUnits() - 1].nxt) = NULL;
+	ptn[0].card = encoding.nCodeUnits ();
+	ptn[0].nxt = NULL;
+}
+
+CharSet::~CharSet ()
+{
+	operator delete (rep);
+	operator delete (ptn);
+}
+
+} // namespace re2c
diff --git a/re2c/src/ir/bytecode/charset.h b/re2c/src/ir/bytecode/charset.h
new file mode 100644
index 00000000..af36b4f5
--- /dev/null
+++ b/re2c/src/ir/bytecode/charset.h
@@ -0,0 +1,37 @@
+#ifndef _RE2C_IR_BYTECODE_CHARSET_
+#define _RE2C_IR_BYTECODE_CHARSET_
+
+#include "src/util/c99_stdint.h"
+#include "src/util/forbid_copy.h"
+
+namespace re2c
+{
+
+struct CharPtn
+{
+	uint32_t card;
+	CharPtn * fix;
+	CharPtn * nxt;
+
+	FORBID_COPY (CharPtn);
+};
+
+typedef CharPtn * CharPtr;
+
+struct CharSet
+{
+	CharPtn * fix;
+	CharPtn * freeHead;
+	CharPtn ** freeTail;
+	CharPtr * rep;
+	CharPtn * ptn;
+
+	CharSet ();
+	~CharSet ();
+
+	FORBID_COPY (CharSet);
+};
+
+} // namespace re2c
+
+#endif // _RE2C_IR_BYTECODE_CHARSET_
diff --git a/re2c/src/ir/bytecode/compile.cc b/re2c/src/ir/bytecode/compile.cc
new file mode 100644
index 00000000..2f4609bf
--- /dev/null
+++ b/re2c/src/ir/bytecode/compile.cc
@@ -0,0 +1,262 @@
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_closev.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/ir/regexp/regexp_rule.h"
+
+namespace re2c
+{
+
+static uint32_t compile_goto (Ins * ins, Ins * i);
+
+uint32_t AltOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		i->i.tag = FORK;
+		const uint32_t sz1 = exp1->compile (rep, &i[1]);
+		Ins * const j = &i[sz1 + 1];
+		i->i.link = &j[1];
+		j->i.tag = GOTO;
+		const uint32_t sz2 = exp2->compile (rep, &j[1]);
+		j->i.link = &j[sz2 + 1];
+
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return sz1 + sz2 + 2;
+	}
+}
+
+void AltOp::decompile ()
+{
+	if (ins_cache)
+	{
+		exp1->decompile ();
+		exp2->decompile ();
+		ins_cache = NULL;
+	}
+}
+
+uint32_t CatOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		const uint32_t sz1 = exp1->compile (rep, &i[0]);
+		const uint32_t sz2 = exp2->compile (rep, &i[sz1]);
+
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return sz1 + sz2;
+	}
+}
+
+void CatOp::decompile ()
+{
+	if (ins_cache)
+	{
+		exp1->decompile ();
+		exp2->decompile ();
+		ins_cache = NULL;
+	}
+}
+
+uint32_t CloseOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		i += exp->compile (rep, &i[0]);
+		i->i.tag = FORK;
+		i->i.link = ins_cache;
+		++i;
+
+		const uint32_t sz = i - ins_cache;
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return sz;
+	}
+}
+
+void CloseOp::decompile ()
+{
+	if (ins_cache)
+	{
+		exp->decompile ();
+		ins_cache = NULL;
+	}
+}
+
+uint32_t CloseVOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		for (int st = min; st < max; st++)
+		{
+			const uint32_t sz = exp->compile (rep, &i[1]);
+			i->i.tag = FORK;
+			i->i.link = ins_cache + (1 + sz) * (max - min);
+			i += sz + 1;
+		}
+		for (int st = 0; st < min; st++)
+		{
+			const uint32_t sz = exp->compile (rep, &i[0]);
+			i += sz;
+			if (max < 0 && st == 0)
+			{
+				i->i.tag = FORK;
+				i->i.link = i - sz;
+				i++;
+			}
+		}
+		const uint32_t sz = i - ins_cache;
+
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return sz;
+	}
+}
+
+void CloseVOp::decompile ()
+{
+	if (ins_cache)
+	{
+		exp->decompile ();
+		ins_cache = NULL;
+	}
+}
+
+uint32_t MatchOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		i->i.tag = CHAR;
+		i->i.link = &i[size];
+		Ins *j = &i[1];
+		uint32_t bump = size;
+		for (Range *r = match; r; r = r->next)
+		{
+			for (uint32_t c = r->lb; c < r->ub; ++c)
+			{
+				if (rep[c] == c)
+				{
+					j->c.value = c;
+					j->c.bump = --bump;
+					j++;
+				}
+			}
+		}
+
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return size;
+	}
+}
+
+void MatchOp::decompile ()
+{
+	ins_cache = NULL;
+}
+
+uint32_t NullOp::compile (Char *, Ins *)
+{
+	return 0;
+}
+
+void NullOp::decompile () {}
+
+uint32_t RuleOp::compile (Char * rep, Ins * i)
+{
+	if (ins_cache)
+	{
+		return compile_goto (ins_cache, i);
+	}
+	else
+	{
+		ins_cache = i;
+
+		i += exp->compile (rep, &i[0]);
+		if (ctx->size)
+		{
+			i->i.tag = CTXT;
+			i->i.link = &i[1];
+			++i;
+			i += ctx->compile (rep, &i[0]);
+		}
+		i->i.tag = TERM;
+		i->i.link = this;
+		++i;
+		const uint32_t sz = i - ins_cache;
+
+		if (ins_access == PRIVATE)
+		{
+			decompile ();
+		}
+
+		return sz;
+	}
+}
+
+void RuleOp::decompile ()
+{
+	if (ins_cache)
+	{
+		exp->decompile ();
+		ctx->decompile ();
+		ins_cache = NULL;
+	}
+}
+
+uint32_t compile_goto (Ins * ins, Ins * i)
+{
+	i->i.tag = GOTO;
+	i->i.link = ins;
+	return 1;
+}
+
+} // end namespace re2c
diff --git a/re2c/src/ir/bytecode/ins.cc b/re2c/src/ir/bytecode/ins.cc
new file mode 100644
index 00000000..f0041e51
--- /dev/null
+++ b/re2c/src/ir/bytecode/ins.cc
@@ -0,0 +1,41 @@
+#include <iostream>
+
+#include "src/ir/bytecode/ins.h"
+#include "src/ir/regexp/regexp_rule.h"
+
+namespace re2c {
+
+const Ins * showIns (std::ostream & o, const Ins & i, const Ins & base)
+{
+	o.width (3);
+	o << &i - &base << ": ";
+	const Ins * ret = &(&i)[1];
+	switch (i.i.tag)
+	{
+		case CHAR:
+		{
+			o << "match ";
+			for (; ret < (Ins *) i.i.link; ++ret)
+			{
+				o << "\\x" << std::hex << ret->c.value;
+			}
+			break;
+		}
+		case GOTO:
+			o << "goto " << ((Ins *) i.i.link - &base);
+			break;
+		case FORK:
+			o << "fork " << ((Ins *) i.i.link - &base);
+			break;
+		case CTXT:
+			o << "ctxt";
+			break;
+		case TERM:
+			o << "term " << ((RuleOp *) i.i.link)->rank;
+			break;
+	}
+	o << "\n";
+	return ret;
+}
+
+} // namespace re2c
diff --git a/re2c/src/ir/bytecode/ins.h b/re2c/src/ir/bytecode/ins.h
new file mode 100644
index 00000000..6cd14d54
--- /dev/null
+++ b/re2c/src/ir/bytecode/ins.h
@@ -0,0 +1,52 @@
+#ifndef _RE2C_IR_BYTECODE_INS_
+#define _RE2C_IR_BYTECODE_INS_
+
+#include <iosfwd>
+
+#include "src/util/c99_stdint.h"
+
+namespace re2c
+{
+
+static const uint32_t CHAR = 0;
+static const uint32_t GOTO = 1;
+static const uint32_t FORK = 2;
+static const uint32_t TERM = 3;
+static const uint32_t CTXT = 4;
+
+union Ins
+{
+	struct
+	{
+		uint8_t tag;
+		uint8_t marked;
+		void * link;
+	} i;
+	struct
+	{
+		uint32_t value;
+		uint32_t bump;
+		void * link;
+	} c;
+};
+
+inline bool isMarked (Ins * i)
+{
+	return i->i.marked != 0;
+}
+
+inline void mark (Ins * i)
+{
+	i->i.marked = true;
+}
+
+inline void unmark (Ins * i)
+{
+	i->i.marked = false;
+}
+
+const Ins * showIns (std::ostream & o, const Ins & i, const Ins & base);
+
+} // namespace re2c
+
+#endif // _RE2C_IR_BYTECODE_INS_
diff --git a/re2c/src/ir/bytecode/split.cc b/re2c/src/ir/bytecode/split.cc
new file mode 100644
index 00000000..7e0bc6d1
--- /dev/null
+++ b/re2c/src/ir/bytecode/split.cc
@@ -0,0 +1,82 @@
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_closev.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/ir/regexp/regexp_rule.h"
+
+namespace re2c {
+
+void AltOp::split (CharSet & s)
+{
+	exp1->split (s);
+	exp2->split (s);
+}
+
+void CatOp::split (CharSet & s)
+{
+	exp1->split (s);
+	exp2->split (s);
+}
+
+void CloseOp::split (CharSet & s)
+{
+	exp->split (s);
+}
+
+void CloseVOp::split (CharSet & s)
+{
+	exp->split (s);
+}
+
+void MatchOp::split (CharSet & s)
+{
+	for (Range *r = match; r; r = r->next)
+	{
+		for (uint32_t c = r->lb; c < r->ub; ++c)
+		{
+			CharPtn * x = s.rep[c];
+			CharPtn * a = x->nxt;
+			if (!a)
+			{
+				if (x->card == 1)
+				{
+					continue;
+				}
+				x->nxt = a = s.freeHead;
+				if (!(s.freeHead = s.freeHead->nxt))
+				{
+					s.freeTail = &s.freeHead;
+				}
+				a->nxt = NULL;
+				x->fix = s.fix;
+				s.fix = x;
+			}
+			if (--(x->card) == 0)
+			{
+				*s.freeTail = x;
+				*(s.freeTail = &x->nxt) = NULL;
+			}
+			s.rep[c] = a;
+			++(a->card);
+		}
+	}
+	for (; s.fix; s.fix = s.fix->fix)
+	{
+		if (s.fix->card)
+		{
+			s.fix->nxt = NULL;
+		}
+	}
+}
+
+void NullOp::split (CharSet &) {}
+
+void RuleOp::split (CharSet & s)
+{
+	exp->split (s);
+	ctx->split (s);
+}
+
+} // namespace re2c
diff --git a/re2c/src/dfa/action.h b/re2c/src/ir/dfa/action.h
similarity index 93%
rename from re2c/src/dfa/action.h
rename to re2c/src/ir/dfa/action.h
index a188913d..582d1abf 100644
--- a/re2c/src/dfa/action.h
+++ b/re2c/src/ir/dfa/action.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_ACTION_
-#define _RE2C_DFA_ACTION_
+#ifndef _RE2C_IR_DFA_ACTION_
+#define _RE2C_IR_DFA_ACTION_
 
 #include <vector>
 
@@ -106,4 +106,4 @@ private:
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_ACTION_
+#endif // _RE2C_IR_DFA_ACTION_
diff --git a/re2c/src/dfa/dfa.cc b/re2c/src/ir/dfa/dfa.cc
similarity index 98%
rename from re2c/src/dfa/dfa.cc
rename to re2c/src/ir/dfa/dfa.cc
index 246e2121..2721ea67 100644
--- a/re2c/src/dfa/dfa.cc
+++ b/re2c/src/ir/dfa/dfa.cc
@@ -1,6 +1,7 @@
 #include <string.h>
 
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
+#include "src/ir/regexp/regexp_rule.h"
 #include "src/util/allocate.h"
 
 namespace re2c
diff --git a/re2c/src/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h
similarity index 80%
rename from re2c/src/dfa/dfa.h
rename to re2c/src/ir/dfa/dfa.h
index fd00cecf..98229f67 100644
--- a/re2c/src/dfa/dfa.h
+++ b/re2c/src/ir/dfa/dfa.h
@@ -1,10 +1,9 @@
-#ifndef _RE2C_DFA_DFA_
-#define _RE2C_DFA_DFA_
+#ifndef _RE2C_IR_DFA_DFA_
+#define _RE2C_IR_DFA_DFA_
 
-#include "src/dfa/action.h"
-#include "src/dfa/state.h"
+#include "src/ir/dfa/action.h"
+#include "src/ir/dfa/state.h"
 #include "src/util/forbid_copy.h"
-#include "src/util/smart_ptr.h"
 
 namespace re2c
 {
@@ -44,8 +43,6 @@ public:
 	FORBID_COPY (DFA);
 };
 
-smart_ptr<DFA> genCode (RegExp *, Output &, uint32_t);
-
 } // namespace re2c
 
-#endif // _RE2C_DFA_DFA_
+#endif // _RE2C_IR_DFA_DFA_
diff --git a/re2c/src/dfa/state.h b/re2c/src/ir/dfa/state.h
similarity index 80%
rename from re2c/src/dfa/state.h
rename to re2c/src/ir/dfa/state.h
index 72013501..41af7dfd 100644
--- a/re2c/src/dfa/state.h
+++ b/re2c/src/ir/dfa/state.h
@@ -1,9 +1,9 @@
-#ifndef _RE2C_DFA_STATE_
-#define _RE2C_DFA_STATE_
+#ifndef _RE2C_IR_DFA_STATE_
+#define _RE2C_IR_DFA_STATE_
 
 #include "src/codegen/go.h"
-#include "src/dfa/action.h"
-#include "src/dfa/re.h"
+#include "src/ir/dfa/action.h"
+#include "src/ir/regexp/regexp.h"
 #include "src/util/forbid_copy.h"
 
 namespace re2c
@@ -49,4 +49,4 @@ public:
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_STATE_
+#endif // _RE2C_IR_DFA_STATE_
diff --git a/re2c/src/ir/regexp/display.cc b/re2c/src/ir/regexp/display.cc
new file mode 100644
index 00000000..1e12d350
--- /dev/null
+++ b/re2c/src/ir/regexp/display.cc
@@ -0,0 +1,57 @@
+#include <iostream>
+
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_closev.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/ir/regexp/regexp_rule.h"
+
+namespace re2c
+{
+
+std::ostream & operator << (std::ostream & o, const RegExp & re)
+{
+	re.display (o);
+	return o;
+}
+
+void AltOp::display (std::ostream & o) const
+{
+	o << exp1 << "|" << exp2;
+}
+
+void CatOp::display (std::ostream & o) const
+{
+	o << exp1 << exp2;
+}
+
+void CloseOp::display (std::ostream & o) const
+{
+	o << exp << "+";
+}
+
+void CloseVOp::display (std::ostream & o) const
+{
+	o << exp << "+";
+}
+
+void MatchOp::display (std::ostream & o) const
+{
+	o << match;
+}
+
+void NullOp::display (std::ostream & o) const
+{
+	o << "_";
+}
+
+void RuleOp::display (std::ostream & o) const
+{
+	o << exp << "/" << ctx << ";";
+}
+
+} // end namespace re2c
+
diff --git a/re2c/src/dfa/encoding/enc.cc b/re2c/src/ir/regexp/encoding/enc.cc
similarity index 99%
rename from re2c/src/dfa/encoding/enc.cc
rename to re2c/src/ir/regexp/encoding/enc.cc
index 2d775972..9cd8e3b1 100644
--- a/re2c/src/dfa/encoding/enc.cc
+++ b/re2c/src/ir/regexp/encoding/enc.cc
@@ -1,4 +1,4 @@
-#include "src/dfa/encoding/enc.h"
+#include "src/ir/regexp/encoding/enc.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/enc.h b/re2c/src/ir/regexp/encoding/enc.h
similarity index 97%
rename from re2c/src/dfa/encoding/enc.h
rename to re2c/src/ir/regexp/encoding/enc.h
index f54997f3..ca5399b4 100644
--- a/re2c/src/dfa/encoding/enc.h
+++ b/re2c/src/ir/regexp/encoding/enc.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_ENCODING_ENC_
-#define _RE2C_DFA_ENCODING_ENC_
+#ifndef _RE2C_IR_REGEXP_ENCODING_ENC_
+#define _RE2C_IR_REGEXP_ENCODING_ENC_
 
 #include "src/util/c99_stdint.h"
 #include "src/util/range.h"
@@ -172,4 +172,4 @@ inline void Enc::setPolicy(policy_t t)
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_ENC_
+#endif // _RE2C_IR_REGEXP_ENCODING_ENC_
diff --git a/re2c/src/dfa/encoding/range_suffix.cc b/re2c/src/ir/regexp/encoding/range_suffix.cc
similarity index 77%
rename from re2c/src/dfa/encoding/range_suffix.cc
rename to re2c/src/ir/regexp/encoding/range_suffix.cc
index 3c8ccb50..96615927 100644
--- a/re2c/src/dfa/encoding/range_suffix.cc
+++ b/re2c/src/ir/regexp/encoding/range_suffix.cc
@@ -1,5 +1,6 @@
-#include "src/dfa/encoding/range_suffix.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/encoding/range_suffix.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_match.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/range_suffix.h b/re2c/src/ir/regexp/encoding/range_suffix.h
similarity index 79%
rename from re2c/src/dfa/encoding/range_suffix.h
rename to re2c/src/ir/regexp/encoding/range_suffix.h
index 770c0bc6..93008977 100644
--- a/re2c/src/dfa/encoding/range_suffix.h
+++ b/re2c/src/ir/regexp/encoding/range_suffix.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_ENCODING_RANGE_SUFFIX_
-#define _RE2C_DFA_ENCODING_RANGE_SUFFIX_
+#ifndef _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
+#define _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
 
 #include <stddef.h> // NULL
 
@@ -37,4 +37,4 @@ RegExp * emit(RangeSuffix * p, RegExp * re);
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_RANGE_SUFFIX_
+#endif // _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
diff --git a/re2c/src/dfa/encoding/utf16/utf16.cc b/re2c/src/ir/regexp/encoding/utf16/utf16.cc
similarity index 82%
rename from re2c/src/dfa/encoding/utf16/utf16.cc
rename to re2c/src/ir/regexp/encoding/utf16/utf16.cc
index 5b2d8448..47743edf 100644
--- a/re2c/src/dfa/encoding/utf16/utf16.cc
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16.cc
@@ -1,4 +1,4 @@
-#include "src/dfa/encoding/utf16/utf16.h"
+#include "src/ir/regexp/encoding/utf16/utf16.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/utf16/utf16.h b/re2c/src/ir/regexp/encoding/utf16/utf16.h
similarity index 82%
rename from re2c/src/dfa/encoding/utf16/utf16.h
rename to re2c/src/ir/regexp/encoding/utf16/utf16.h
index c6389d26..d64de349 100644
--- a/re2c/src/dfa/encoding/utf16/utf16.h
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_ENCODING_UTF16_UTF16_
-#define _RE2C_DFA_ENCODING_UTF16_UTF16_
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
+#define _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
 
 #include "src/util/c99_stdint.h"
 
@@ -34,4 +34,4 @@ inline uint16_t utf16::trail_surr(rune r)
 
 }  // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_UTF16_UTF16_
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
diff --git a/re2c/src/dfa/encoding/utf16/utf16_range.cc b/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc
similarity index 97%
rename from re2c/src/dfa/encoding/utf16/utf16_range.cc
rename to re2c/src/ir/regexp/encoding/utf16/utf16_range.cc
index 187f42ff..c4f27f6c 100644
--- a/re2c/src/dfa/encoding/utf16/utf16_range.cc
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc
@@ -1,5 +1,5 @@
-#include "src/dfa/encoding/utf16/utf16_range.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/encoding/utf16/utf16_range.h"
+#include "src/ir/regexp/regexp.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/utf16/utf16_range.h b/re2c/src/ir/regexp/encoding/utf16/utf16_range.h
similarity index 63%
rename from re2c/src/dfa/encoding/utf16/utf16_range.h
rename to re2c/src/ir/regexp/encoding/utf16/utf16_range.h
index 66cf3b91..4e75560d 100644
--- a/re2c/src/dfa/encoding/utf16/utf16_range.h
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16_range.h
@@ -1,8 +1,8 @@
-#ifndef _RE2C_DFA_ENCODING_UTF16_RANGE_
-#define _RE2C_DFA_ENCODING_UTF16_RANGE_
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
+#define _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
 
-#include "src/dfa/encoding/range_suffix.h"
-#include "src/dfa/encoding/utf16/utf16.h"
+#include "src/ir/regexp/encoding/range_suffix.h"
+#include "src/ir/regexp/encoding/utf16/utf16.h"
 
 namespace re2c {
 
@@ -13,4 +13,4 @@ void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h);
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_UTF16_RANGE_
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
diff --git a/re2c/src/dfa/encoding/utf16/utf16_regexp.cc b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
similarity index 77%
rename from re2c/src/dfa/encoding/utf16/utf16_regexp.cc
rename to re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
index ee26ce0e..2e7f69df 100644
--- a/re2c/src/dfa/encoding/utf16/utf16_regexp.cc
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
@@ -1,6 +1,8 @@
-#include "src/dfa/encoding/utf16/utf16_range.h"
-#include "src/dfa/encoding/utf16/utf16_regexp.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/encoding/utf16/utf16_range.h"
+#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_match.h"
 
 namespace re2c {
 
diff --git a/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h
new file mode 100644
index 00000000..af1b7e79
--- /dev/null
+++ b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h
@@ -0,0 +1,16 @@
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
+#define _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
+
+#include "src/ir/regexp/encoding/utf16/utf16.h"
+#include "src/util/range.h"
+
+namespace re2c {
+
+class RegExp; // forward
+
+RegExp * UTF16Symbol(utf16::rune r);
+RegExp * UTF16Range(const Range * r);
+
+} // namespace re2c
+
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
diff --git a/re2c/src/dfa/encoding/utf8/utf8.cc b/re2c/src/ir/regexp/encoding/utf8/utf8.cc
similarity index 97%
rename from re2c/src/dfa/encoding/utf8/utf8.cc
rename to re2c/src/ir/regexp/encoding/utf8/utf8.cc
index 70256933..8332e131 100644
--- a/re2c/src/dfa/encoding/utf8/utf8.cc
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8.cc
@@ -1,4 +1,4 @@
-#include "src/dfa/encoding/utf8/utf8.h"
+#include "src/ir/regexp/encoding/utf8/utf8.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/utf8/utf8.h b/re2c/src/ir/regexp/encoding/utf8/utf8.h
similarity index 87%
rename from re2c/src/dfa/encoding/utf8/utf8.h
rename to re2c/src/ir/regexp/encoding/utf8/utf8.h
index 9c049a50..72d14cca 100644
--- a/re2c/src/dfa/encoding/utf8/utf8.h
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_ENCODING_UTF8_UTF8_
-#define _RE2C_DFA_ENCODING_UTF8_UTF8_
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
+#define _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
 
 #include "src/util/c99_stdint.h"
 
@@ -43,4 +43,4 @@ public:
 
 }  // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_UTF8_UTF8_
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
diff --git a/re2c/src/dfa/encoding/utf8/utf8_range.cc b/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc
similarity index 97%
rename from re2c/src/dfa/encoding/utf8/utf8_range.cc
rename to re2c/src/ir/regexp/encoding/utf8/utf8_range.cc
index c8fd9cb9..466526af 100644
--- a/re2c/src/dfa/encoding/utf8/utf8_range.cc
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc
@@ -1,5 +1,5 @@
-#include "src/dfa/encoding/utf8/utf8_range.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/encoding/utf8/utf8_range.h"
+#include "src/ir/regexp/regexp.h"
 
 namespace re2c {
 
diff --git a/re2c/src/dfa/encoding/utf8/utf8_range.h b/re2c/src/ir/regexp/encoding/utf8/utf8_range.h
similarity index 55%
rename from re2c/src/dfa/encoding/utf8/utf8_range.h
rename to re2c/src/ir/regexp/encoding/utf8/utf8_range.h
index e4f91da3..490e76bc 100644
--- a/re2c/src/dfa/encoding/utf8/utf8_range.h
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8_range.h
@@ -1,8 +1,8 @@
-#ifndef _RE2C_DFA_ENCODING_UTF8_RANGE_
-#define _RE2C_DFA_ENCODING_UTF8_RANGE_
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
+#define _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
 
-#include "src/dfa/encoding/range_suffix.h"
-#include "src/dfa/encoding/utf8/utf8.h"
+#include "src/ir/regexp/encoding/range_suffix.h"
+#include "src/ir/regexp/encoding/utf8/utf8.h"
 
 namespace re2c {
 
@@ -12,4 +12,4 @@ void UTF8splitByRuneLength(RangeSuffix * & p, utf8::rune l, utf8::rune h);
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_ENCODING_UTF8_RANGE_
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
diff --git a/re2c/src/dfa/encoding/utf8/utf8_regexp.cc b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
similarity index 78%
rename from re2c/src/dfa/encoding/utf8/utf8_regexp.cc
rename to re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
index 1b9fb06c..0e6001d3 100644
--- a/re2c/src/dfa/encoding/utf8/utf8_regexp.cc
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
@@ -1,6 +1,8 @@
-#include "src/dfa/encoding/utf8/utf8_range.h"
-#include "src/dfa/encoding/utf8/utf8_regexp.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/encoding/utf8/utf8_range.h"
+#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_match.h"
 
 namespace re2c {
 
diff --git a/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h
new file mode 100644
index 00000000..434fc320
--- /dev/null
+++ b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h
@@ -0,0 +1,16 @@
+#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
+#define _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
+
+#include "src/ir/regexp/encoding/utf8/utf8.h"
+#include "src/util/range.h"
+
+namespace re2c {
+
+class RegExp; // forward
+
+RegExp * UTF8Symbol(utf8::rune r);
+RegExp * UTF8Range(const Range * r);
+
+} // namespace re2c
+
+#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
diff --git a/re2c/src/ir/regexp/fixed_length.cc b/re2c/src/ir/regexp/fixed_length.cc
new file mode 100644
index 00000000..7319be8d
--- /dev/null
+++ b/re2c/src/ir/regexp/fixed_length.cc
@@ -0,0 +1,53 @@
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+
+namespace re2c
+{
+
+uint32_t RegExp::fixedLength ()
+{
+	return ~0u;
+}
+
+uint32_t AltOp::fixedLength ()
+{
+	uint32_t l1 = exp1->fixedLength ();
+	uint32_t l2 = exp1->fixedLength ();
+
+	if (l1 != l2 || l1 == ~0u)
+	{
+		return ~0u;
+	}
+
+	return l1;
+}
+
+uint32_t CatOp::fixedLength ()
+{
+	const uint32_t l1 = exp1->fixedLength ();
+	if (l1 != ~0u)
+	{
+		const uint32_t l2 = exp2->fixedLength ();
+		if (l2 != ~0u)
+		{
+			return l1 + l2;
+		}
+	}
+	return ~0u;
+}
+
+uint32_t MatchOp::fixedLength ()
+{
+	return 1;
+}
+
+uint32_t NullOp::fixedLength ()
+{
+	return 0;
+}
+
+} // end namespace re2c
+
diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc
new file mode 100644
index 00000000..67515cb2
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp.cc
@@ -0,0 +1,287 @@
+#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
+#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_alt.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_match.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/parse/scanner.h"
+#include "src/util/range.h"
+
+namespace re2c
+{
+
+static MatchOp * merge (MatchOp * m1, MatchOp * m2);
+
+RegExp * doAlt (RegExp * e1, RegExp * e2)
+{
+	if (!e1)
+	{
+		return e2;
+	}
+	if (!e2)
+	{
+		return e1;
+	}
+	return new AltOp (e1, e2);
+}
+
+RegExp * mkAlt (RegExp * e1, RegExp * e2)
+{
+	AltOp * a;
+	MatchOp * m1;
+	MatchOp * m2;
+
+	a = dynamic_cast<AltOp*> (e1);
+	if (a != NULL)
+	{
+		m1 = dynamic_cast<MatchOp*> (a->exp1);
+		if (m1 != NULL)
+		{
+			m1->ins_access = e1->ins_access;
+			a->exp2->ins_access = e1->ins_access;
+			e1 = a->exp2;
+		}
+	}
+	else
+	{
+		m1 = dynamic_cast<MatchOp*> (e1);
+		if (m1 != NULL)
+		{
+			e1 = NULL;
+		}
+	}
+	a = dynamic_cast<AltOp*> (e2);
+	if (a != NULL)
+	{
+		m2 = dynamic_cast<MatchOp*> (a->exp1);
+		if (m2 != NULL)
+		{
+			m2->ins_access = e2->ins_access;
+			a->exp2->ins_access = e2->ins_access;
+			e2 = a->exp2;
+		}
+	}
+	else
+	{
+		m2 = dynamic_cast<MatchOp*> (e2);
+		if (m2 != NULL)
+		{
+			e2 = NULL;
+		}
+	}
+
+	return doAlt (merge (m1, m2), doAlt (e1, e2));
+}
+
+MatchOp * merge (MatchOp * m1, MatchOp * m2)
+{
+	if (!m1)
+	{
+		return m2;
+	}
+	if (!m2)
+	{
+		return m1;
+	}
+	MatchOp * m = new MatchOp (doUnion (m1->match, m2->match));
+	if (m1->ins_access == RegExp::PRIVATE
+		|| m2->ins_access == RegExp::PRIVATE)
+	{
+		m->ins_access = RegExp::PRIVATE;
+	}
+	return m;
+}
+
+RegExp * doCat (RegExp * e1, RegExp * e2)
+{
+	if (!e1)
+	{
+		return e2;
+	}
+	if (!e2)
+	{
+		return e1;
+	}
+	return new CatOp (e1, e2);
+}
+
+RegExp * mkDiff (RegExp * e1, RegExp * e2)
+{
+	MatchOp * m1 = dynamic_cast<MatchOp *> (e1);
+	MatchOp * m2 = dynamic_cast<MatchOp *> (e2);
+	if (m1 == NULL || m2 == NULL)
+	{
+		return NULL;
+	}
+	Range * r = doDiff (m1->match, m2->match);
+	return r
+		? (RegExp *) new MatchOp(r)
+		: (RegExp *) new NullOp;
+}
+
+Range * Scanner::getRange(SubStr &s) const
+{
+	uint32_t lb = unescape(s), ub;
+
+	if (s.len < 2 || *s.str != '-')
+	{
+		ub = lb;
+	}
+	else
+	{
+		s.len--;
+		s.str++;
+		ub = unescape(s);
+		if (ub < lb)
+		{
+			uint32_t tmp = lb;
+			lb = ub;
+			ub = tmp;
+		}
+	}
+
+	Range * r = encoding.encodeRange(lb, ub);
+	if (r == NULL)
+		fatalf("Bad code point range: '0x%X - 0x%X'", lb, ub);
+	return r;
+}
+
+RegExp * Scanner::matchSymbol(uint32_t c) const
+{
+	if (!encoding.encode(c))
+		fatalf("Bad code point: '0x%X'", c);
+
+	if (encoding.is(Enc::UTF16))
+		return UTF16Symbol(c);
+	else if (encoding.is(Enc::UTF8))
+		return UTF8Symbol(c);
+	else
+		return new MatchOp(new Range(c, c + 1));
+}
+
+RegExp * Scanner::strToRE (SubStr & s) const
+{
+	if (s.len == 0)
+		return new NullOp;
+
+	RegExp *re = matchSymbol(unescape(s));
+
+	while (s.len > 0)
+		re = new CatOp(re, matchSymbol(unescape(s)));
+
+	return re;
+}
+
+RegExp * Scanner::strToCaseInsensitiveRE (SubStr & s) const
+{
+	if (s.len == 0)
+		return new NullOp;
+
+	uint32_t c = unescape(s);
+
+	RegExp *re, *reL, *reU;
+
+	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+	{
+		reL = matchSymbol(tolower(c));
+		reU = matchSymbol(toupper(c));
+		re = mkAlt(reL, reU);
+	}
+	else
+	{
+		re = matchSymbol(c);
+	}
+
+	while (s.len > 0)
+	{
+		c = unescape(s);
+
+		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+		{
+			reL = matchSymbol(tolower(c));
+			reU = matchSymbol(toupper(c));
+			re = new CatOp(re, mkAlt(reL, reU));
+		}
+		else
+		{
+			re = new CatOp(re, matchSymbol(c));
+		}
+	}
+
+	return re;
+}
+
+Range * Scanner::mkRange(SubStr &s) const
+{
+	Range *r = getRange(s);
+	while (s.len > 0)
+		r = doUnion(r, getRange(s));
+
+	return r;
+}
+
+RegExp * Scanner::matchSymbolRange(Range * r) const
+{
+	if (encoding.is(Enc::UTF16))
+		return UTF16Range(r);
+	else if (encoding.is(Enc::UTF8))
+		return UTF8Range(r);
+	else
+		return new MatchOp(r);
+}
+
+RegExp * Scanner::ranToRE (SubStr & s) const
+{
+	s.len -= 2;
+	s.str += 1;
+
+	if (s.len == 0)
+		return new NullOp;
+
+	return matchSymbolRange(mkRange(s));
+}
+
+RegExp * Scanner::invToRE (SubStr & s) const
+{
+	s.len -= 3;
+	s.str += 2;
+
+	Range * full = encoding.fullRange();
+
+	Range * r = s.len == 0
+		? full
+		: doDiff(full, mkRange (s));
+
+	return matchSymbolRange(r);
+}
+
+RegExp * Scanner::mkDot() const
+{
+	Range * full = encoding.fullRange();
+	uint32_t c = '\n';
+	if (!encoding.encode(c))
+		fatalf("Bad code point: '0x%X'", c);
+	Range * ran = new Range(c, c + 1);
+	Range * inv = doDiff(full, ran);
+
+	return matchSymbolRange(inv);
+}
+
+/*
+ * Create a byte range that includes all possible input characters.
+ * This may include characters, which do not map to any valid symbol
+ * in current encoding. For encodings, which directly map symbols to
+ * input characters (ASCII, EBCDIC, UTF-32), it equals [^]. For other
+ * encodings (UTF-16, UTF-8), [^] and this range are different.
+ *
+ * Also note that default range doesn't respect encoding policy
+ * (the way invalid code points are treated).
+ */
+RegExp * Scanner::mkDefault() const
+{
+	Range * def = new Range(0, encoding.nCodeUnits());
+	return new MatchOp(def);
+}
+
+} // namespace re2c
diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h
new file mode 100644
index 00000000..758d3b4f
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp.h
@@ -0,0 +1,76 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_
+#define _RE2C_IR_REGEXP_REGEXP_
+
+#include <iosfwd>
+
+#include "src/ir/bytecode/charset.h"
+#include "src/ir/bytecode/ins.h"
+#include "src/globals.h"
+#include "src/util/forbid_copy.h"
+
+namespace re2c
+{
+
+typedef uint32_t Char;
+
+class RegExp
+{
+public:
+	static free_list <RegExp *> vFreeList;
+
+	uint32_t size;
+	/*
+	 * There're several different cases when the same regexp
+	 * can be used multiple times:
+	 * 	1) named definitions, e.g. digit = [0-9];
+	 * 	2) counted repetition, e.g. "a"{3}, "a"{3,}, "a"{3,5}
+	 * 	3) multiple DFA's sharing the same regexp , e.g. <c1, c2> "abc" { }
+	 * 	4) common suffixes, e.g. suffix [\x80-\xBF] in UTF-8 ranges
+	 * In cases 1-3, regexp must be recompiled each time it's reused.
+	 * In case 4, regexp should be compiled only once, and instructions
+	 * should be shared in order to reduce space.
+	 *
+	 * Note: if regexp must always be recompiled, it doesn't imply that
+	 * parts of this regexp must always be recompiled. It only forces
+	 * the compilation function to traverse the regexp after compilation
+	 * and reset compilation cache for each sub-regexp. E.g., for a regexp
+	 * [^]{3} in UTF-8 mode, each of sub-regexps [^] will have common suffix
+	 * [\x80-\xBF] factored out, but they won't share instructions.
+	 */
+	Ins * ins_cache; /* if non-NULL, points to compiled instructions */
+	enum InsAccess
+	{
+		SHARED,
+		PRIVATE
+	} ins_access;
+
+	inline RegExp ()
+		: size (0)
+		, ins_cache (NULL)
+		, ins_access (SHARED)
+	{
+		vFreeList.insert (this);
+	}
+	inline virtual ~RegExp ()
+	{
+		vFreeList.erase (this);
+	}
+	virtual void split (CharSet &) = 0;
+	virtual void calcSize (Char *) = 0;
+	virtual uint32_t fixedLength ();
+	virtual uint32_t compile (Char *, Ins *) = 0;
+	virtual void decompile () = 0;
+	virtual void display (std::ostream &) const = 0;
+	friend std::ostream & operator << (std::ostream & o, const RegExp & re);
+
+	FORBID_COPY (RegExp);
+};
+
+RegExp * doAlt (RegExp * e1, RegExp * e2);
+RegExp * mkAlt (RegExp * e1, RegExp * e2);
+RegExp * doCat (RegExp * e1, RegExp * e2);
+RegExp * mkDiff (RegExp * e1, RegExp * e2);
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_
diff --git a/re2c/src/ir/regexp/regexp_alt.h b/re2c/src/ir/regexp/regexp_alt.h
new file mode 100644
index 00000000..fb84be5e
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_alt.h
@@ -0,0 +1,32 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_
+#define _RE2C_IR_REGEXP_REGEXP_ALT_
+
+#include "src/ir/regexp/regexp.h"
+
+namespace re2c
+{
+
+class AltOp: public RegExp
+{
+	RegExp * exp1;
+	RegExp * exp2;
+
+public:
+	inline AltOp (RegExp * e1, RegExp * e2)
+		: exp1 (e1)
+		, exp2 (e2)
+	{}
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t fixedLength ();
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+	friend RegExp * mkAlt (RegExp *, RegExp *);
+
+	FORBID_COPY (AltOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_ALT_
diff --git a/re2c/src/ir/regexp/regexp_cat.h b/re2c/src/ir/regexp/regexp_cat.h
new file mode 100644
index 00000000..08ad5f07
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_cat.h
@@ -0,0 +1,31 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_
+#define _RE2C_IR_REGEXP_REGEXP_CAT_
+
+#include "src/ir/regexp/regexp.h"
+
+namespace re2c
+{
+
+class CatOp: public RegExp
+{
+	RegExp * exp1;
+	RegExp * exp2;
+
+public:
+	inline CatOp (RegExp * e1, RegExp * e2)
+		: exp1 (e1)
+		, exp2 (e2)
+	{}
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t fixedLength ();
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+
+	FORBID_COPY (CatOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_CAT_
diff --git a/re2c/src/ir/regexp/regexp_close.h b/re2c/src/ir/regexp/regexp_close.h
new file mode 100644
index 00000000..e147d8c6
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_close.h
@@ -0,0 +1,28 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_
+#define _RE2C_IR_REGEXP_REGEXP_CLOSE_
+
+#include "src/ir/regexp/regexp.h"
+
+namespace re2c
+{
+
+class CloseOp: public RegExp
+{
+	RegExp * exp;
+
+public:
+	inline CloseOp (RegExp * e)
+		: exp (e)
+	{}
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+
+	FORBID_COPY (CloseOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_
diff --git a/re2c/src/ir/regexp/regexp_closev.h b/re2c/src/ir/regexp/regexp_closev.h
new file mode 100644
index 00000000..8cd054ee
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_closev.h
@@ -0,0 +1,34 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSEV_
+#define _RE2C_IR_REGEXP_REGEXP_CLOSEV_
+
+#include "src/ir/regexp/regexp.h"
+
+namespace re2c
+{
+
+class CloseVOp: public RegExp
+{
+	RegExp * exp;
+	int32_t min;
+	int32_t max;
+
+public:
+	inline CloseVOp (RegExp * e, int lb, int ub)
+		: exp (e)
+		, min (lb)
+		, max (ub)
+	{
+		exp->ins_access = PRIVATE;
+	}
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+
+	FORBID_COPY (CloseVOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_CLOSEV_
diff --git a/re2c/src/ir/regexp/regexp_match.h b/re2c/src/ir/regexp/regexp_match.h
new file mode 100644
index 00000000..49059941
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_match.h
@@ -0,0 +1,30 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_
+#define _RE2C_IR_REGEXP_REGEXP_MATCH_
+
+#include "src/ir/regexp/regexp.h"
+#include "src/util/range.h"
+
+namespace re2c
+{
+
+class MatchOp: public RegExp
+{
+public:
+	Range * match;
+
+	inline MatchOp (Range * m)
+		: match (m)
+	{}
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t fixedLength ();
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+
+	FORBID_COPY (MatchOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_
diff --git a/re2c/src/ir/regexp/regexp_null.h b/re2c/src/ir/regexp/regexp_null.h
new file mode 100644
index 00000000..d2227b9d
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_null.h
@@ -0,0 +1,22 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_
+#define _RE2C_IR_REGEXP_REGEXP_NULL_
+
+#include "src/ir/regexp/regexp.h"
+
+namespace re2c
+{
+
+class NullOp: public RegExp
+{
+public:
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t fixedLength ();
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+	void display (std::ostream & o) const;
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_NULL_
diff --git a/re2c/src/ir/regexp/regexp_rule.h b/re2c/src/ir/regexp/regexp_rule.h
new file mode 100644
index 00000000..3e6718f3
--- /dev/null
+++ b/re2c/src/ir/regexp/regexp_rule.h
@@ -0,0 +1,47 @@
+#ifndef _RE2C_IR_REGEXP_REGEXP_RULE_
+#define _RE2C_IR_REGEXP_REGEXP_RULE_
+
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/rule_rank.h"
+#include "src/parse/token.h"
+
+namespace re2c
+{
+
+class RuleOp: public RegExp
+{
+	RegExp * exp;
+
+public:
+	RegExp * ctx;
+	Ins * ins;
+	rule_rank_t rank;
+	Token * code;
+	uint32_t line;
+
+	inline RuleOp (RegExp * e, RegExp * c, Token * t, rule_rank_t r, InsAccess access)
+		: exp (e)
+		, ctx (c)
+		, ins (NULL)
+		, rank (r)
+		, code (t)
+		, line (0)
+	{
+		ins_access = access;
+	}
+	inline ~RuleOp ()
+	{
+		delete code;
+	}
+	void display (std::ostream & o) const;
+	void split (CharSet &);
+	void calcSize (Char *);
+	uint32_t compile (Char *, Ins *);
+	void decompile ();
+
+	FORBID_COPY (RuleOp);
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_IR_REGEXP_REGEXP_RULE_
diff --git a/re2c/src/dfa/rule_rank.cc b/re2c/src/ir/rule_rank.cc
similarity index 94%
rename from re2c/src/dfa/rule_rank.cc
rename to re2c/src/ir/rule_rank.cc
index 4120ff28..ca19f689 100644
--- a/re2c/src/dfa/rule_rank.cc
+++ b/re2c/src/ir/rule_rank.cc
@@ -1,6 +1,6 @@
 #include <ostream>
 
-#include "src/dfa/rule_rank.h"
+#include "src/ir/rule_rank.h"
 
 namespace re2c
 {
diff --git a/re2c/src/dfa/rule_rank.h b/re2c/src/ir/rule_rank.h
similarity index 89%
rename from re2c/src/dfa/rule_rank.h
rename to re2c/src/ir/rule_rank.h
index f3e49ebd..39473b88 100644
--- a/re2c/src/dfa/rule_rank.h
+++ b/re2c/src/ir/rule_rank.h
@@ -1,5 +1,5 @@
-#ifndef _RE2C_DFA_RULE_RANK_
-#define _RE2C_DFA_RULE_RANK_
+#ifndef _RE2C_IR_RULE_RANK_
+#define _RE2C_IR_RULE_RANK_
 
 #include <iosfwd>
 
@@ -36,4 +36,4 @@ public:
 
 } // namespace re2c
 
-#endif // _RE2C_DFA_RULE_RANK_
+#endif // _RE2C_IR_RULE_RANK_
diff --git a/re2c/src/main.cc b/re2c/src/main.cc
index 7d005971..d97f7283 100644
--- a/re2c/src/main.cc
+++ b/re2c/src/main.cc
@@ -4,8 +4,6 @@
 #include <string.h>
 
 #include "config.h"
-#include "src/dfa/dfa.h"
-#include "src/dfa/encoding/enc.h"
 #include "src/globals.h"
 #include "src/mbo_getopt.h"
 #include "src/parse/parser.h"
diff --git a/re2c/src/parse/extop.h b/re2c/src/parse/extop.h
new file mode 100644
index 00000000..44b33ff4
--- /dev/null
+++ b/re2c/src/parse/extop.h
@@ -0,0 +1,18 @@
+#ifndef _RE2C_PARSE_EXTOP_
+#define _RE2C_PARSE_EXTOP_
+
+#include "src/util/c99_stdint.h"
+
+namespace re2c
+{
+
+struct ExtOp
+{
+	char op;
+	int32_t minsize;
+	int32_t maxsize;
+};
+
+} // end namespace re2c
+
+#endif // _RE2C_PARSE_EXTOP_
diff --git a/re2c/src/parse/parser.h b/re2c/src/parse/parser.h
index f4b3f295..2c973279 100644
--- a/re2c/src/parse/parser.h
+++ b/re2c/src/parse/parser.h
@@ -1,10 +1,12 @@
 #ifndef _RE2C_PARSE_PARSER_
 #define _RE2C_PARSE_PARSER_
 
+#include <list>
 #include <string>
 
 #include "src/codegen/output.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/regexp.h"
+#include "src/ir/regexp/regexp_rule.h"
 #include "src/parse/scanner.h"
 
 namespace re2c
diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp
index 164f7db5..114ad940 100644
--- a/re2c/src/parse/parser.ypp
+++ b/re2c/src/parse/parser.ypp
@@ -8,9 +8,14 @@
 #include <set>
 
 #include "config.h"
-#include "src/dfa/dfa.h"
+#include "src/ir/bytecode/bytecode.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_closev.h"
+#include "src/ir/regexp/regexp_null.h"
 #include "src/codegen/emit.h" // genTypes
 #include "src/globals.h"
+#include "src/parse/extop.h"
 #include "src/parse/parser.h"
 #include "src/util/c99_stdint.h"
 #include "src/util/smart_ptr.h"
diff --git a/re2c/src/parse/scanner.h b/re2c/src/parse/scanner.h
index 1475dd70..51236b5a 100644
--- a/re2c/src/parse/scanner.h
+++ b/re2c/src/parse/scanner.h
@@ -4,7 +4,7 @@
 #include <string>
 
 #include "src/codegen/output.h"
-#include "src/dfa/re.h"
+#include "src/ir/regexp/regexp.h"
 #include "src/globals.h"
 #include "src/parse/input.h"
 #include "src/parse/token.h"
diff --git a/re2c/src/parse/scanner_lex.re b/re2c/src/parse/scanner_lex.re
index 6ddc5082..1eca7603 100644
--- a/re2c/src/parse/scanner_lex.re
+++ b/re2c/src/parse/scanner_lex.re
@@ -3,8 +3,9 @@
 #include <iostream>
 #include <sstream>
 
-#include "src/dfa/dfa.h"
+#include "src/ir/dfa/dfa.h"
 #include "src/globals.h"
+#include "src/parse/extop.h"
 #include "src/parse/parser.h"
 #include "src/parse/scanner.h"
 #include "y.tab.h"
diff --git a/re2c/src/parse/unescape.cc b/re2c/src/parse/unescape.cc
new file mode 100644
index 00000000..5064d5e8
--- /dev/null
+++ b/re2c/src/parse/unescape.cc
@@ -0,0 +1,220 @@
+#include <string.h>
+
+#include "src/parse/scanner.h"
+#include "src/util/substr.h"
+
+namespace re2c {
+
+uint32_t Scanner::unescape(SubStr &s) const
+{
+	static const char * hex = "0123456789abcdef";
+	static const char * oct = "01234567";
+
+	s.len--;
+	uint32_t c, ucb = 0;
+
+	if ((c = *s.str++) != '\\' || s.len == 0)
+	{
+		return c;
+	}
+
+	s.len--;
+
+	switch (c = *s.str++)
+	{
+		case 'n': return '\n';
+		case 't': return '\t';
+		case 'v': return '\v';
+		case 'b': return '\b';
+		case 'r': return '\r';
+		case 'f': return '\f';
+		case 'a': return '\a';
+
+		case 'x':
+		{
+			if (s.len < 2)
+			{
+				fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required");
+				return ~0u;
+			}
+			
+			const char *p1 = strchr(hex, tolower(s.str[0]));
+			const char *p2 = strchr(hex, tolower(s.str[1]));
+
+			if (!p1 || !p2)
+			{
+				fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code");
+				return ~0u;
+			}
+			else
+			{
+				s.len -= 2;
+				s.str += 2;
+				
+				uint32_t v = (uint32_t)((p1 - hex) << 4) 
+				       + (uint32_t)((p2 - hex));
+	
+				return v;
+			}
+		}
+
+		case 'U':
+		{
+			if (s.len < 8)
+			{
+				fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required");
+				return ~0u;
+			}
+
+			uint32_t l = 0;
+			if (s.str[0] == '0')
+			{
+				l++;
+				if (s.str[1] == '0')
+				{
+					l++;
+					if (s.str[2] == '0' || (s.str[2] == '1' && encoding.szCodePoint() == 4))
+					{
+						l++;
+						if (encoding.szCodePoint() == 4)
+						{
+							const char *u3 = strchr(hex, tolower(s.str[2]));
+							const char *u4 = strchr(hex, tolower(s.str[3]));
+							if (u3 && u4)
+							{
+								ucb = (uint32_t)((u3 - hex) << 20)
+							        + (uint32_t)((u4 - hex) << 16);
+								l++;
+							}
+						}
+						else if (s.str[3] == '0')
+						{
+							l++;
+						}
+					}
+				}
+			}
+
+			if (l != 4)
+			{
+				fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required");
+			}
+
+			s.len -= 4;
+			s.str += 4;
+			
+			// no break;
+		}
+		case 'X':
+		case 'u':
+		{
+			if (s.len < 4)
+			{
+				fatal(s.ofs()+s.len, 
+					c == 'X'
+					? "Illegal hexadecimal character code, four hexadecimal digits are required"
+					: "Illegal unicode character, four hexadecimal digits are required");
+				return ~0u;
+			}
+			
+			const char *p1 = strchr(hex, tolower(s.str[0]));
+			const char *p2 = strchr(hex, tolower(s.str[1]));
+			const char *p3 = strchr(hex, tolower(s.str[2]));
+			const char *p4 = strchr(hex, tolower(s.str[3]));
+
+			if (!p1 || !p2 || !p3 || !p4)
+			{
+				fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), 
+					c == 'X'
+					? "Illegal hexadecimal character code, non hexxdecimal digit found"
+					: "Illegal unicode character, non hexadecimal digit found");
+				return ~0u;
+			}
+			else
+			{
+				s.len -= 4;
+				s.str += 4;
+				
+				uint32_t v = (uint32_t)((p1 - hex) << 12) 
+				       + (uint32_t)((p2 - hex) <<  8)
+				       + (uint32_t)((p3 - hex) <<  4)
+				       + (uint32_t)((p4 - hex))
+				       + ucb;
+	
+				if (v >= encoding.nCodePoints())
+				{
+					fatal(s.ofs(),
+						c == 'X'
+						? "Illegal hexadecimal character code, out of range"
+						: "Illegal unicode character, out of range");
+				}
+	
+				return v;
+			}
+		}
+
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		{
+			fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3");
+			return ~0u;
+		}
+
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		{
+			if (s.len < 2)
+			{
+				fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required");
+				return ~0u;
+			}
+
+			const char *p0 = strchr(oct, c);
+			const char *p1 = strchr(oct, s.str[0]);
+			const char *p2 = strchr(oct, s.str[1]);
+
+			if (!p0 || !p1 || !p2)
+			{
+				fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found");
+				return ~0u;
+			}
+			else
+			{
+				s.len -= 2;
+				s.str += 2;
+				
+				uint32_t v = (uint32_t)((p0 - oct) << 6) + (uint32_t)((p1 - oct) << 3) + (uint32_t)(p2 - oct);
+	
+				return v;
+			}
+		}
+
+		default:
+		return c;
+	}
+}
+
+std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const
+{
+	str_out.clear();
+
+	while(str_in.len)
+	{
+		uint32_t c = unescape(str_in);
+
+		if (c > 0xFF)
+		{
+			fatal(str_in.ofs(), "Illegal character");
+		}
+
+		str_out += static_cast<char>(c);
+	}
+
+	return str_out;
+}
+
+} // namespace re2c
-- 
2.40.0