bool is_better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool);
static bool compare_by_rule(const clos_t &c1, const clos_t &c2);
static void prune_final_items(closure_t &clos, std::valarray<Rule> &rules);
-static bool not_fin(const clos_t &c);
-static tagsave_t *merge_transition_tags(closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, tagver_t &maxver);
+static void update_versions(closure_t &clos, Tagpool &tagpool, tagver_t &maxver);
-tagsave_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
- tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver)
+void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
+ std::valarray<Rule> &rules, tagver_t &maxver)
{
// build tagged epsilon-closure of the given set of NFA states
clos2.clear();
std::sort(clos2.begin(), clos2.end(), compare_by_rule);
// merge tags from different rules, find nondeterministic tags
- return merge_transition_tags(clos2, tagpool, tcpool, maxver);
+ update_versions(clos2, tagpool, maxver);
}
/* note [epsilon-closures in tagged NFA]
clositer_t
b = clos.begin(),
e = clos.end(),
- f = std::partition(b, e, not_fin);
+ f = std::partition(b, e, clos_t::not_fin);
if (f != e) {
std::partial_sort(f, f, e, compare_by_rule);
// mark all rules except the first one as shadowed
}
}
-bool not_fin(const clos_t &c)
-{
- return c.state->type != nfa_state_t::FIN;
-}
-
-tagsave_t *merge_transition_tags(closure_t &clos, Tagpool &tagpool,
- tcpool_t &tcpool, tagver_t &maxver)
+void update_versions(closure_t &clos, Tagpool &tagpool, tagver_t &maxver)
{
const size_t ntag = tagpool.ntags;
tagver_t *cur = tagpool.buffer1,
c->tvers = tagpool.insert(ver);
}
-
- return tcpool.conv_to_save(bot, cur, ntag);
}
} // namespace re2c
size_t tvers; // tag versions
size_t ttran; // transition tags (lookahead tags of parent closure)
size_t tlook; // lookahead tags (transition tags of child closures)
+
+ static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
+ static inline bool not_fin(const clos_t &c) { return !fin(c); }
};
typedef std::vector<clos_t> closure_t;
typedef closure_t::iterator clositer_t;
typedef closure_t::const_iterator cclositer_t;
-tagsave_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
- tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver);
+void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
+ std::valarray<Rule> &rules, tagver_t &maxver);
} // namespace re2c
{
const size_t ntag = vartags.size();
Tagpool tagpool(ntag);
- kernels_t kernels(tagpool, tcpool);
+ kernels_t kernels(tagpool);
closure_t clos1, clos2;
dump_dfa_t dump(*this, tagpool, nfa);
// other versions: [ .. -(2*N+1)] and [2*N+1 .. ]
maxtagver = static_cast<tagver_t>(ntag) * 2;
+ // iterate while new kernels are added: for each alphabet symbol,
+ // build tagged epsilon-closure of all reachable NFA states,
+ // then find identical or mappable DFA state or add a new one
+
clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS, ZERO_TAGS};
clos1.push_back(c0);
- closure(clos1, clos2, tagpool, tcpool, rules, maxtagver);
- kernels.insert(clos2, NULL, maxtagver);
- dump.state0(clos2);
+ closure(clos1, clos2, tagpool, rules, maxtagver);
+ find_state(*this, dfa_t::NIL, 0/* any */, tagpool, kernels, clos2, dump);
- // closure kernels are in sync with DFA states
for (size_t i = 0; i < kernels.size(); ++i) {
- const kernel_t *kernel = kernels[i];
-
- // create new DFA state
- dfa_state_t *s = new dfa_state_t(nchars);
- states.push_back(s);
-
- // check if the new state is final
- // see note [at most one final item per closure]
- for (size_t j = 0; j < kernel->size; ++j) {
- const nfa_state_t *f = kernel->state[j];
- if (f->type == nfa_state_t::FIN) {
- s->rule = f->rule;
- const Rule &rule = rules[s->rule];
- s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tvers[j]],
- tagpool[kernel->tlook[j]], finvers, rule.lvar, rule.hvar);
- dump.final(i, f);
- break;
- }
- }
-
- // for each alphabet symbol, build tagged epsilon-closure
- // of all NFA states reachable on that symbol, then try to
- // find identical closure or add the new one
for (size_t c = 0; c < nchars; ++c) {
- reach(kernel, clos1, charset[c]);
- s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver);
- s->arcs[c] = kernels.insert(clos2, &s->tcmd[c], maxtagver);
- dump.state(clos2, i, c);
+ reach(kernels[i], clos1, charset[c]);
+ closure(clos1, clos2, tagpool, rules, maxtagver);
+ find_state(*this, i, c, tagpool, kernels, clos2, dump);
}
}
, tagpool(pool)
, uniqidx(0)
, base(n.states)
- , done()
{
if (!debug) return;
{
if (!debug) return;
- done.insert(0);
-
closure(clos, 0, true);
-
fprintf(stderr, " void [shape=point]\n");
for (cclositer_t c = clos.begin(); c != clos.end(); ++c) {
fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", index(c->state));
}
}
-void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol)
+void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew)
{
if (!debug) return;
if (state2 == dfa_t::NIL) return;
- const bool isnew = done.insert(state2).second;
const tagcopy_t *copy = s->tcmd[symbol].copy;
const uint32_t
a = static_cast<uint32_t>(symbol),
const char *prefix = isnew ? "" : "i";
closure(clos, z, isnew);
-
if (!isnew) {
fprintf(stderr, " i%u [style=dotted]\n"
" i%u -> %u [style=dotted label=\"", z, z, y);
}
fprintf(stderr, "\"]\n");
}
-
for (cclositer_t c = clos.begin(); c != clos.end(); ++c) {
fprintf(stderr, " %u:%u -> %s%u:%u [label=\"%u",
x, index(c->origin), prefix, z, index(c->state), a);
#ifndef _RE2C_IR_DFA_DUMP_
#define _RE2C_IR_DFA_DUMP_
-#include <set>
-
#include "src/ir/dfa/closure.h"
#include "src/ir/dfa/dfa.h"
const Tagpool &tagpool;
uint32_t uniqidx;
const nfa_state_t *base;
- std::set<size_t> done;
dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n);
~dump_dfa_t();
void closure(const closure_t &clos, uint32_t state, bool isnew);
void state0(const closure_t &clos);
- void state(const closure_t &clos, size_t state, size_t symbol);
+ void state(const closure_t &clos, size_t state, size_t symbol, bool isnew);
void final(size_t state, const nfa_state_t *port);
uint32_t index(const nfa_state_t *s);
FORBID_COPY(dump_dfa_t);
}
};
-mapping_t::mapping_t(Tagpool &tagp, tcpool_t &tcp)
- : cmd(NULL)
- , type(opts->dfa_mapping)
- , tagpool(tagp)
- , tcpool(tcp)
- , max(0)
+mapping_t::mapping_t(Tagpool &pool)
+ : type(opts->dfa_mapping)
, cap(0)
, mem(NULL)
+ , tagpool(pool)
+ , max(0)
, x2t(NULL)
, x2y(NULL)
, y2x(NULL)
delete[] mem;
}
-void mapping_t::init(tagver_t v, tcmd_t *c)
+void mapping_t::init(tagver_t v)
{
// +1 to ensure max tag version is not forgotten in loops
max = v + 1;
- cmd = c;
if (cap < max) {
cap = max * 2; // in advance
* subsequence for the given tag is monotonically increasing.
*/
-/* note [save(X), copy(Y,X) optimization]
- *
- * 'Save' command 'X <- ...' followed by a 'copy' command 'Y <- X'
- * can be optimized to 'save' command 'Y <- ...'. This way we end
- * up with less commands ans less tag versions (new version X is
- * gone), but more importantly, we can safely put 'copy' commands
- * in front of 'save' commands. This order is necessary when it
- * comes to fallback commands.
- * This optimization is applied after checking priorities, so it
- * cannot affect them.
-*/
-
static bool compatible_kernels(const kernel_t *x, const kernel_t *y)
{
return x->size == y->size
if (y <= pred[t]) return false;
pred[t] = y;
}
-
- // all good; finally convert mapping to commands
- // see note [save(X), copy(Y,X) optimization]
- for (tagsave_t *s = cmd->save; s; s = s->next) {
- tagver_t y = s->ver, x = y2x[y];
- if (x == TAGVER_ZERO) {
- y = -y;
- x = y2x[y];
- }
- if (x != TAGVER_ZERO) {
- y2x[y] = x2y[x] = TAGVER_ZERO;
- s->ver = abs(x);
- }
- }
- for (tagver_t x = -max; x < max; ++x) {
- const tagver_t y = x2y[x];
- if (y != TAGVER_ZERO && y != x) {
- cmd->copy = tcpool.make_copy(cmd->copy, abs(x), abs(y));
- }
- }
- tagcopy_t::topsort(&cmd->copy, indeg);
return true;
}
-kernels_t::kernels_t(Tagpool &tagpool, tcpool_t &tcpool)
+kernels_t::kernels_t(Tagpool &tagpool)
: lookup()
- , mapping(tagpool, tcpool)
+ , mapping(tagpool)
, maxsize(256) // usually ranges from one to some twenty
, buffer(new kernel_t(maxsize))
{}
return lookup[idx];
}
-size_t kernels_t::insert(const closure_t &clos, tcmd_t *cmd, tagver_t maxver)
+kernels_t::result_t kernels_t::insert(const closure_t &clos, tagver_t maxver)
{
const size_t nkern = clos.size();
+ size_t x = dfa_t::NIL;
// empty closure corresponds to default state
- if (nkern == 0) return dfa_t::NIL;
+ if (nkern == 0) return result_t(x, NULL, false);
// resize buffer if closure is too large
if (maxsize < nkern) {
// try to find identical kernel
kernel_eq_t eq;
- size_t idx = lookup.find_with(hash, buffer, eq);
- if (idx != index_t::NIL) return idx;
+ x = lookup.find_with(hash, buffer, eq);
+ if (x != index_t::NIL) return result_t(x, NULL, false);
// else try to find mappable kernel
- mapping.init(maxver, cmd);
- idx = lookup.find_with(hash, buffer, mapping);
- if (idx != index_t::NIL) return idx;
+ mapping.init(maxver);
+ x = lookup.find_with(hash, buffer, mapping);
+ if (x != index_t::NIL) return result_t(x, &mapping, false);
// otherwise add new kernel
- return lookup.push(hash, kernel_t::copy(*buffer));
+ x = lookup.push(hash, kernel_t::copy(*buffer));
+ return result_t(x, NULL, true);
+}
+
+/* note [save(X), copy(Y,X) optimization]
+ *
+ * 'Save' command 'X <- ...' followed by a 'copy' command 'Y <- X'
+ * can be optimized to 'save' command 'Y <- ...'. This way we end
+ * up with less commands ans less tag versions (new version X is
+ * gone), but more importantly, we can safely put 'copy' commands
+ * in front of 'save' commands. This order is necessary when it
+ * comes to fallback commands.
+ * This optimization is applied after checking priorities, so it
+ * cannot affect them.
+*/
+
+static tcmd_t commands(const closure_t &closure, const Tagpool &tagpool,
+ tcpool_t &tcpool, mapping_t *mapping)
+{
+ tagsave_t *save = NULL;
+ tagcopy_t *copy = NULL;
+ cclositer_t c1 = closure.begin(), c2 = closure.end(), c;
+
+ for (size_t t = 0; t < tagpool.ntags; ++t) {
+ for (c = c1; c != c2 && tagpool[c->ttran][t] != TAGVER_CURSOR; ++c);
+ if (c != c2) save = tcpool.make_save(save, tagpool[c->tvers][t], false);
+
+ for (c = c1; c != c2 && tagpool[c->ttran][t] != TAGVER_BOTTOM; ++c);
+ if (c != c2) save = tcpool.make_save(save, -tagpool[c->tvers][t], true);
+ }
+
+ if (mapping) {
+ tagver_t max = mapping->max,
+ *x2y = mapping->x2y,
+ *y2x = mapping->y2x;
+
+ // see note [save(X), copy(Y,X) optimization]
+ for (tagsave_t *s = save; s; s = s->next) {
+ const tagver_t
+ y = s->bottom ? -s->ver : s->ver,
+ x = y2x[y];
+ if (x != TAGVER_ZERO) {
+ y2x[y] = x2y[x] = TAGVER_ZERO;
+ s->ver = abs(x);
+ }
+ }
+ for (tagver_t x = -max; x < max; ++x) {
+ const tagver_t y = x2y[x];
+ if (y != TAGVER_ZERO && y != x) {
+ copy = tcpool.make_copy(copy, abs(x), abs(y));
+ }
+ }
+ // see note [topological ordering of copy commands]
+ tagcopy_t::topsort(©, mapping->indeg);
+ }
+
+ return tcmd_t(save, copy);
+}
+
+static tcmd_t finalizer(const clos_t &clos, const Rule &rule,
+ const tagver_t *fins, const Tagpool &tagpool, tcpool_t &tcpool)
+{
+ const tagver_t
+ *vers = tagpool[clos.tvers],
+ *tran = tagpool[clos.tlook];
+ tagsave_t *save = NULL;
+ tagcopy_t *copy = NULL;
+
+ for (size_t t = rule.lvar; t < rule.hvar; ++t) {
+ const tagver_t
+ u = tran[t],
+ v = abs(vers[t]),
+ f = fins[t];
+
+ if (u != TAGVER_ZERO) {
+ save = tcpool.make_save(save, f, u == TAGVER_BOTTOM);
+ } else {
+ copy = tcpool.make_copy(copy, f, v);
+ }
+ }
+
+ return tcmd_t(save, copy);
+}
+
+void find_state(dfa_t &dfa, size_t state, size_t symbol,
+ const Tagpool &tagpool, kernels_t &kernels,
+ const closure_t &closure, dump_dfa_t &dump)
+{
+ const kernels_t::result_t result = kernels.insert(closure, dfa.maxtagver);
+
+ if (result.isnew) {
+ // create new DFA state
+ dfa_state_t *t = new dfa_state_t(dfa.nchars);
+ dfa.states.push_back(t);
+
+ // check if the new state is final
+ // see note [at most one final item per closure]
+ cclositer_t c1 = closure.begin(), c2 = closure.end(),
+ c = std::find_if(c1, c2, clos_t::fin);
+ if (c != c2) {
+ t->rule = c->state->rule;
+ t->tcmd[dfa.nchars] = finalizer(*c, dfa.rules[t->rule],
+ dfa.finvers, tagpool, dfa.tcpool);
+ dump.final(result.state, c->state);
+ }
+ }
+
+ // initial state
+ if (state == dfa_t::NIL) {
+ dump.state0(closure);
+ return;
+ }
+
+ dfa_state_t *s = dfa.states[state];
+ s->arcs[symbol] = result.state;
+ s->tcmd[symbol] = commands(closure, tagpool, dfa.tcpool, result.mapping);
+ dump.state(closure, state, symbol, result.isnew);
}
} // namespace re2c
#define _RE2C_IR_DFA_FIND_STATE_
#include "src/ir/dfa/closure.h"
+#include "src/ir/dfa/dump.h"
#include "src/util/forbid_copy.h"
#include "src/util/lookup.h"
{
enum type_t {BIJECTIVE, INJECTIVE};
- tcmd_t *cmd;
-
private:
const type_t type;
-
+ tagver_t cap; // capacity (greater or equal to max)
+ char *mem;
Tagpool &tagpool;
- tcpool_t &tcpool;
+public:
tagver_t max; // maximal tag version
- tagver_t cap; // capacity (greater or equal to max)
- char *mem;
size_t *x2t;
tagver_t *x2y;
tagver_t *y2x;
uint32_t *indeg;
-public:
- mapping_t(Tagpool &tagp, tcpool_t &tcp);
+ explicit mapping_t(Tagpool &pool);
~mapping_t();
- void init(tagver_t v, tcmd_t *c);
+ void init(tagver_t v);
bool operator()(const kernel_t *k1, const kernel_t *k2);
FORBID_COPY(mapping_t);
};
struct kernels_t
{
+ struct result_t
+ {
+ size_t state;
+ mapping_t *mapping;
+ bool isnew;
+
+ result_t(size_t s, mapping_t *m, bool n)
+ : state(s)
+ , mapping(m)
+ , isnew(n)
+ {}
+ };
+
private:
typedef lookup_t<const kernel_t*> index_t;
kernel_t *buffer;
public:
- kernels_t(Tagpool &tagpool, tcpool_t &tcpool);
+ explicit kernels_t(Tagpool &tagpool);
~kernels_t();
size_t size() const;
const kernel_t* operator[](size_t idx) const;
- size_t insert(const closure_t &clos, tcmd_t *cmd, tagver_t maxver);
+ result_t insert(const closure_t &clos, tagver_t maxver);
FORBID_COPY(kernels_t);
};
+void find_state(dfa_t &dfa, size_t state, size_t symbol,
+ const Tagpool &tagpool, kernels_t &kernels,
+ const closure_t &closure, dump_dfa_t &dump);
+
} // namespace re2c
#endif // _RE2C_IR_DFA_FIND_STATE_
return p;
}
-tagsave_t *tcpool_t::conv_to_save(const tagver_t *bottom, const tagver_t *cursor, size_t ntag)
-{
- tagsave_t *s = NULL;
- for (size_t t = ntag; t-- > 0;) {
- const tagver_t b = abs(bottom[t]), c = abs(cursor[t]);
- if (b != TAGVER_ZERO) {
- s = make_save(s, b, true);
- }
- if (c != TAGVER_ZERO) {
- s = make_save(s, c, false);
- }
- }
- return s;
-}
-
-tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *tran,
- const tagver_t *fins, size_t ltag, size_t htag)
-{
- tagsave_t *s = NULL;
- tagcopy_t *c = NULL;
- for (size_t t = ltag; t < htag; ++t) {
- const tagver_t u = tran[t], v = abs(vers[t]), f = fins[t];
- if (u != TAGVER_ZERO) {
- s = make_save(s, f, u == TAGVER_BOTTOM);
- } else {
- c = make_copy(c, f, v);
- }
- }
- return tcmd_t(s, c);
-}
-
uint32_t hash_tcmd(const tagsave_t *save, const tagcopy_t *copy)
{
uint32_t h = 0;
public:
tcpool_t();
-
tagsave_t *make_save(tagsave_t *next, tagver_t ver, bool bottom);
tagcopy_t *make_copy(tagcopy_t *next, tagver_t lhs, tagver_t rhs);
- tagsave_t *conv_to_save(const tagver_t *bottom, const tagver_t *cursor, size_t ntag);
- tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *tran, const tagver_t *fins, size_t ltag, size_t htag);
-
tcid_t insert(const tagsave_t *save, const tagcopy_t *copy);
const tccmd_t &operator[](tcid_t id) const;
};