DFA may be very large, but have only few tagged transitions.
In such cases doing tag optimizations on the whole DFA is a waste:
space aloocations and (respectively) processing time is proportional
to the number of all DFA transitions.
Instead, we spend a little effort to create CFG. It has one basic
block per each tagged transition or tagged final state, plus root
basic block.
* We build just some cover (not necessarily minimal).
* The algorithm takes quadratic (in the number of tags) time.
*/
-tagver_t tag_allocation(const dfa_t &dfa, const bool *interf,
+tagver_t tag_allocation(const cfg_t &cfg, const bool *interf,
tagver_t *ver2new)
{
const tagver_t
END = std::numeric_limits<tagver_t>::max(),
- nver = dfa.maxtagver + 1;
- const size_t
- nsym = dfa.nchars,
- narc = dfa.states.size() * nsym;
+ nver = cfg.dfa.maxtagver + 1;
tagver_t *next = new tagver_t[nver]; // list of class members
tagver_t *repr = new tagver_t[nver]; // maps tag to class representative
tagver_t rx, ry, x, y, z;
std::fill(repr, repr + nver, END);
// copy coalescing: for each command X = Y, try to merge X and Y
- for (size_t a = 0; a < narc; ++a) {
- const tagcopy_t *p = dfa.states[a / nsym]->tags[a % nsym].copy;
- for (; p; p = p->next) {
+ const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+ for (; b < e; ++b) {
+ for (const tagcopy_t *p = b->cmd->copy; p; p = p->next) {
x = p->lhs;
y = p->rhs;
rx = repr[x];
namespace re2c
{
-/* Dead code elimination */
-void tag_dce(dfa_t &dfa, const bool *live)
+void tag_dce(cfg_t &cfg, const bool *live)
{
- const size_t
- nsym = dfa.nchars,
- narc = dfa.states.size() * nsym,
- nver = static_cast<size_t>(dfa.maxtagver) + 1;
+ const tagver_t nver = cfg.dfa.maxtagver + 1;
+ cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
- for (size_t a = 0; a < narc; ++a) {
- tagcmd_t &cmd = dfa.states[a / nsym]->tags[a % nsym];
- const bool *liv = &live[a * nver];
-
- for (tagsave_t *s, **ps = &cmd.save; (s = *ps);) {
- if (!liv[s->ver]) {
+ for (; b < e; ++b, live += nver) {
+ for (tagsave_t *s, **ps = &b->cmd->save; (s = *ps);) {
+ if (!live[s->ver]) {
*ps = s->next;
} else {
ps = &s->next;
}
}
-
// rule tags and copy tags can't be dead by construction
// (copy tags are only used for fallback tags)
}
* However, after indexing different commands may share representation
* in memory, so they must not be modified.
*/
-void tag_indexing(dfa_t &dfa)
+void tag_indexing(const cfg_t &cfg)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
+ cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
lookup_t<tagsave_t*> saveindex;
lookup_t<tagcopy_t*> copyindex;
- for (size_t i = 0; i < nstate; ++i) {
- dfa_state_t *s = dfa.states[i];
-
- for (size_t c = 0; c < nsym; ++c) {
- tagcmd_t &cmd = s->tags[c];
- doindex(&cmd.save, saveindex);
- doindex(&cmd.copy, copyindex);
- }
-
- tagcmd_t &cmd = s->rule_tags;
- doindex(&cmd.save, saveindex);
- doindex(&cmd.copy, copyindex);
+ for (; b < e; ++b) {
+ doindex(&b->cmd->save, saveindex);
+ doindex(&b->cmd->copy, copyindex);
}
}
static void interfere(const tagcmd_t &cmd, bool *interf, bool *live, bool *buf, size_t nver);
-void tag_interference(const dfa_t &dfa, const bool *live, bool *interf)
+void tag_interference(const cfg_t &cfg, const bool *live, bool *interf)
{
- const size_t
- nstate = dfa.states.size(),
- ntag = dfa.tags.size(),
- nver = static_cast<size_t>(dfa.maxtagver) + 1,
- nsym = dfa.nchars;
+ const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
bool *buf1 = new bool[nver];
bool *buf2 = new bool[nver];
+ const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
memset(interf, 0, nver * nver * sizeof(bool));
- for (size_t i = 0; i < nstate; ++i) {
- const dfa_state_t *s = dfa.states[i];
-
- if (s->rule != Rule::NONE) {
- const tagver_t *liv = dfa.tagpool[dfa.rules[s->rule].tags];
- memset(buf1, 0, nver * sizeof(bool));
- for (size_t t = 0; t < ntag; ++t) {
- const tagver_t v = liv[t];
- if (v != TAGVER_ZERO) {
- buf1[v] = true;
- }
- }
- interfere(s->rule_tags, interf, buf1, buf2, nver);
- }
-
- for (size_t c = 0; c < nsym; ++c) {
- memcpy(buf1, &live[(i * nsym + c) * nver], nver * sizeof(bool));
- interfere(s->tags[c], interf, buf1, buf2, nver);
- }
+ for (; b < e; ++b, live += nver) {
+ memcpy(buf1, live, nver * sizeof(bool));
+ interfere(*b->cmd, interf, buf1, buf2, nver);
}
delete[] buf1;
namespace re2c
{
-static void forwprop(const dfa_t &dfa, bool *been, size_t state, bool *live, const bool *need);
-
-void tag_liveness(const dfa_t &dfa, bool *live)
+void tag_liveness(const cfg_t &cfg, bool *live)
{
+ const Tagpool &tagpool = cfg.dfa.tagpool;
const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars,
- narc = nstate * nsym,
- ntag = dfa.tags.size(),
- nver = static_cast<size_t>(dfa.maxtagver) + 1;
+ nbb = cfg.nbblock,
+ nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1,
+ ntag = tagpool.ntags;
bool *buf1 = new bool[nver];
bool *buf2 = new bool[nver];
- bool *been = new bool[nstate];
/* note [control flow equations for tag liveness]
*
* need-in(x) = need-out(x) - define(x)
*
* Equations are solved by iteration until fix point.
- * Basic blocks are arcs. Successors of arc s1 -> s2 are:
- * - all outgoing arcs from state s2
- * - if state s2 is final, corresponding rule action
*/
- memset(live, 0, narc * nver * sizeof(bool));
+ memset(live, 0, nbb * nver * sizeof(bool));
for (bool loop = true; loop;) {
loop = false;
- for (size_t a = 0; a < narc; ++a) {
- const size_t i = dfa.states[a / nsym]->arcs[a % nsym];
- if (i == dfa_t::NIL) continue;
- const dfa_state_t *s = dfa.states[i];
+ for (cfg_ix_t i = 0; i < nbb; ++i) {
+ const cfg_bb_t *b = cfg.bblocks + i;
memset(buf1, 0, nver * sizeof(bool));
-
- if (s->rule != Rule::NONE) {
- const tagver_t *use = dfa.tagpool[dfa.rules[s->rule].tags];
+ if (b->use != TAGVER_ZERO) {
+ // final bblock, no successors
+ const tagver_t *use = tagpool[b->use];
for (size_t t = 0; t < ntag; ++t) {
const tagver_t u = use[t];
if (u != TAGVER_ZERO) {
buf1[u] = true;
}
}
+ } else {
+ // transition bblock, no final rule tags
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ const bool *liv = &live[*j * nver];
+ memcpy(buf2, liv, nver * sizeof(bool));
+
+ for (const tagsave_t *p = cfg.bblocks[*j].cmd->save; p; p = p->next) {
+ buf2[p->ver] = false;
+ }
- for (const tagsave_t *p = s->rule_tags.save; p; p = p->next) {
- buf1[p->ver] = false;
- }
- }
-
- for (size_t c = 0; c < nsym; ++c) {
- const bool *liv = &live[(i * nsym + c) * nver];
- memcpy(buf2, liv, nver * sizeof(bool));
-
- for (const tagsave_t *p = s->tags[c].save; p; p = p->next) {
- buf2[p->ver] = false;
- }
-
- // copy tags are only used for fallback tags,
- // their liveness is handled in a special way
+ // copy tags are only used for fallback tags,
+ // their liveness is handled in a special way
- for (size_t v = 0; v < nver; ++v) {
- buf1[v] |= buf2[v];
+ for (size_t v = 0; v < nver; ++v) {
+ buf1[v] |= buf2[v];
+ }
}
}
- bool *liv = &live[a * nver];
+ bool *liv = &live[i * nver];
if (memcmp(liv, buf1, nver * sizeof(bool)) != 0) {
memcpy(liv, buf1, nver * sizeof(bool));
loop = true;
* but still we should prevent it from merging with other tags
* (otherwise it may become overwritten).
*/
- for (size_t i = 0; i < nstate; ++i) {
- const dfa_state_t *s = dfa.states[i];
- if (!s->fallback) continue;
+ for (cfg_ix_t i = 0; i < nbb; ++i) {
+ const cfg_bb_t *b = cfg.bblocks + i;
+
+ if (b->use == TAGVER_ZERO) continue;
- const tagver_t *use = dfa.tagpool[dfa.rules[s->rule].tags];
+ const tagver_t *use = tagpool[b->use];
memset(buf1, 0, nver * sizeof(bool));
for (size_t t = 0; t < ntag; ++t) {
const tagver_t u = use[t];
buf1[u] = true;
}
}
-
- for (const tagsave_t *p = s->rule_tags.save; p; p = p->next) {
+ for (const tagsave_t *p = b->cmd->save; p; p = p->next) {
buf1[p->ver] = false;
}
// in rule tags copies are swapped: LHS is the origin, RHS is backup
- for (const tagcopy_t *p = s->rule_tags.copy; p; p = p->next) {
+ for (const tagcopy_t *p = b->cmd->copy; p; p = p->next) {
buf1[p->lhs] = false;
buf1[p->rhs] = true;
}
- memset(been, 0, nstate * sizeof(bool));
- forwprop(dfa, been, i, live, buf1);
- }
-
- delete[] buf1;
- delete[] buf2;
- delete[] been;
-}
-
-void forwprop(const dfa_t &dfa, bool *been, size_t state, bool *live,
- const bool *need)
-{
- if (been[state]) return;
- been[state] = true;
-
- const size_t
- nsym = dfa.nchars,
- nver = static_cast<size_t>(dfa.maxtagver) + 1;
- const dfa_state_t *s = dfa.states[state];
-
- for (size_t c = 0; c < nsym; ++c) {
- const size_t dest = s->arcs[c];
- if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) {
- bool *l = &live[(state * nsym + c) * nver];
+ // final bblock has no successors, instead it has the list
+ // of all bblocks reachable by non-accepting DFA paths
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ bool *liv = &live[*j * nver];
for (size_t v = 0; v < nver; ++v) {
- l[v] |= need[v];
+ liv[v] |= buf1[v];
}
- forwprop(dfa, been, dest, live, need);
}
}
+
+ delete[] buf1;
+ delete[] buf2;
}
} // namespace re2c
namespace re2c
{
+static cfg_ix_t map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb);
+static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbblock);
+static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, const tagcmd_t *cmd, size_t use);
+static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
+static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
+
void optimize_tags(dfa_t &dfa)
{
if (dfa.maxtagver == 0) return;
- const size_t
- narc = dfa.states.size() * dfa.nchars,
- nver = static_cast<size_t>(dfa.maxtagver) + 1;
- bool *live = new bool[narc * nver];
+ cfg_t cfg(dfa);
+
+ const size_t nver = static_cast<size_t>(dfa.maxtagver) + 1;
+ bool *live = new bool[cfg.nbblock * nver];
bool *interf = new bool[nver * nver];
tagver_t *ver2new = new tagver_t[nver];
- tag_liveness(dfa, live);
- tag_dce(dfa, live);
- tag_interference(dfa, live, interf);
- const tagver_t maxver = tag_allocation(dfa, interf, ver2new);
- tag_renaming(dfa, ver2new, maxver);
- tag_indexing(dfa);
+ tag_liveness(cfg, live);
+ tag_dce(cfg, live);
+ tag_interference(cfg, live, interf);
+ const tagver_t maxver = tag_allocation(cfg, interf, ver2new);
+ tag_renaming(cfg, ver2new, maxver);
+ tag_indexing(cfg);
delete[] live;
delete[] interf;
delete[] ver2new;
}
+cfg_t::cfg_t(dfa_t &a)
+ : dfa(a)
+ , bblocks(NULL)
+ , nbblock(0)
+{
+ const size_t
+ nstate = dfa.states.size(),
+ narc = nstate * dfa.nchars;
+ cfg_ix_t *arc2bb = new cfg_ix_t[narc + nstate];
+
+ nbblock = map_arcs_to_bblocks(dfa, arc2bb);
+ bblocks = create_bblocks(dfa, arc2bb, nbblock);
+
+ delete[] arc2bb;
+}
+
+cfg_ix_t map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb)
+{
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ // first bblock is CFG root: it has no counterpart in DFA
+ cfg_ix_t nbblock = 1;
+
+ // bblocks for tagged transitions in DFA
+ for (size_t i = 0; i < nstate; ++i) {
+ const tagcmd_t *tags = dfa.states[i]->tags;
+ for (size_t c = 0; c < nsym; ++c) {
+ *arc2bb++ = tags[c].empty() ? 0 : nbblock++;
+ }
+ }
+
+ // bblocks for final DFA states with rules that have tags
+ for (size_t i = 0; i < nstate; ++i) {
+ const dfa_state_t *s = dfa.states[i];
+ *arc2bb++ = s->rule == Rule::NONE || dfa.rules[s->rule].tags == ZERO_TAGS
+ ? 0 : nbblock++;
+ }
+
+ return nbblock;
+}
+
+cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbblock)
+{
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+ const cfg_ix_t *a2b = arc2bb;
+ cfg_ix_t *succb = new cfg_ix_t[nbblock], *succe;
+ bool *been = new bool[nstate];
+
+ cfg_bb_t *bblocks = new cfg_bb_t[nbblock], *b = bblocks;
+
+ // root bblock
+ std::fill(been, been + nstate, false);
+ successors(dfa, arc2bb, been, succe = succb, 0);
+ basic_block(b++, succb, succe, new tagcmd_t, TAGVER_ZERO);
+
+ // transition bblocks
+ for (size_t i = 0; i < nstate; ++i) {
+ const dfa_state_t *s = dfa.states[i];
+ for (size_t c = 0; c < nsym; ++c) {
+ if (*a2b++ == 0) continue;
+
+ std::fill(been, been + nstate, false);
+ successors(dfa, arc2bb, been, succe = succb, s->arcs[c]);
+ basic_block(b++, succb, succe, &s->tags[c], TAGVER_ZERO);
+ }
+ }
+
+ // final bblocks
+ for (size_t i = 0; i < nstate; ++i) {
+ if (*a2b++ == 0) continue;
+
+ const dfa_state_t *s = dfa.states[i];
+ std::fill(been, been + nstate, false);
+ fallback(dfa, arc2bb, been, succe = succb, i);
+ basic_block(b++, succb, succe, &s->rule_tags, dfa.rules[s->rule].tags);
+ }
+
+ delete[] succb;
+ delete[] been;
+ return bblocks;
+}
+
+void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb,
+ const cfg_ix_t *succe, const tagcmd_t *cmd, size_t use)
+{
+ const size_t n = static_cast<size_t>(succe - succb);
+ cfg_ix_t *s = new cfg_ix_t[n];
+ memcpy(s, succb, n * sizeof(cfg_ix_t));
+
+ bb->succb = s;
+ bb->succe = s + n;
+ bb->cmd = const_cast<tagcmd_t*>(cmd);
+ bb->use = use;
+}
+
+// find immediate successors of the given bblock
+void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
+ cfg_ix_t *&succ, size_t x)
+{
+ if (x == dfa_t::NIL || been[x]) return;
+ been[x] = true;
+
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars,
+ *a = dfa.states[x]->arcs;
+ const cfg_ix_t
+ *a2b = &arc2bb[x * nsym],
+ *f2b = &arc2bb[nstate * nsym];
+
+ for (size_t c = 0; c < nsym; ++c) {
+ const cfg_ix_t b = a2b[c];
+ if (b != 0) {
+ *succ++ = b;
+ } else {
+ successors(dfa, arc2bb, been, succ, a[c]);
+ }
+ }
+
+ const cfg_ix_t b = f2b[x];
+ if (b != 0) {
+ *succ++ = b;
+ }
+}
+
+// find all bblocks reachable from this one by following
+// non-accepting DFA paths: this is the set of bblocks affected
+// by liveness of fallback tags
+void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
+ cfg_ix_t *&succ, size_t x)
+{
+ if (x == dfa_t::NIL || been[x]) return;
+ been[x] = true;
+
+ const size_t
+ nsym = dfa.nchars,
+ *a = dfa.states[x]->arcs;
+ const cfg_ix_t *a2b = &arc2bb[x * nsym];
+
+ for (size_t c = 0; c < nsym; ++c) {
+ const size_t y = a[c];
+ if (y != dfa_t::NIL && dfa.states[y]->fallthru) {
+ const cfg_ix_t b = a2b[c];
+ if (b != 0) {
+ *succ++ = b;
+ }
+ fallback(dfa, arc2bb, been, succ, y);
+ }
+ }
+}
+
+cfg_t::~cfg_t()
+{
+ cfg_bb_t *b = bblocks, *e = b + nbblock;
+
+ delete b->cmd;
+
+ for (; b < e; ++b) {
+ delete[] b->succb;
+ }
+
+ delete[] bblocks;
+}
+
} // namespace re2c
namespace re2c {
-void tag_liveness(const dfa_t &dfa, bool *live);
-void tag_dce(dfa_t &dfa, const bool *live);
-void tag_interference(const dfa_t &dfa, const bool *live, bool *interf);
-tagver_t tag_allocation(const dfa_t &dfa, const bool *interf, tagver_t *ver2new);
-void tag_renaming(dfa_t &dfa, const tagver_t *ver2new, tagver_t maxver);
-void tag_indexing(dfa_t &dfa);
+typedef uint32_t cfg_ix_t;
+
+// basic block
+struct cfg_bb_t
+{
+ cfg_ix_t *succb;
+ cfg_ix_t *succe;
+ tagcmd_t *cmd;
+ size_t use;
+};
+
+// control flow graph
+struct cfg_t
+{
+ dfa_t &dfa;
+ cfg_bb_t *bblocks;
+ cfg_ix_t nbblock;
+
+ explicit cfg_t(dfa_t &a);
+ ~cfg_t();
+ FORBID_COPY(cfg_t);
+};
+
+void tag_liveness(const cfg_t &cfg, bool *live);
+void tag_dce(cfg_t &cfg, const bool *live);
+void tag_interference(const cfg_t &cfg, const bool *live, bool *interf);
+tagver_t tag_allocation(const cfg_t &cfg, const bool *interf, tagver_t *ver2new);
+void tag_renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver);
+void tag_indexing(const cfg_t &cfg);
} // namespace re2c
static void rename_save(tagsave_t **psave, const tagver_t *ver2new);
static void rename_copy(tagcopy_t **pcopy, const tagver_t *ver2new);
-void tag_renaming(dfa_t &dfa, const tagver_t *ver2new, tagver_t maxver)
+void tag_renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver)
{
- tagver_t &oldmax = dfa.maxtagver;
+ tagver_t &oldmax = cfg.dfa.maxtagver;
if (maxver >= oldmax) {
assert(maxver == oldmax);
return;
}
oldmax = maxver;
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars,
- nrule = dfa.rules.size();
-
- for (size_t i = 0; i < nstate; ++i) {
- dfa_state_t *s = dfa.states[i];
-
- for (size_t c = 0; c < nsym; ++c) {
- tagcmd_t &cmd = s->tags[c];
- rename_save(&cmd.save, ver2new);
- rename_copy(&cmd.copy, ver2new);
- }
-
- tagcmd_t &cmd = s->rule_tags;
- rename_save(&cmd.save, ver2new);
- rename_copy(&cmd.copy, ver2new);
+ cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+ for (; b < e; ++b) {
+ rename_save(&b->cmd->save, ver2new);
+ rename_copy(&b->cmd->copy, ver2new);
}
- for (size_t i = 0; i < nrule; ++i) {
- rename_rule(dfa.tagpool, dfa.rules[i].tags, ver2new);
+ std::valarray<Rule> &rules = cfg.dfa.rules;
+ Tagpool &tagpool = cfg.dfa.tagpool;
+ for (size_t i = 0; i < rules.size(); ++i) {
+ rename_rule(tagpool, rules[i].tags, ver2new);
}
}