typedef std::vector<std::string> code_lines_t;
-void emit_action(OutputFile &o, uint32_t ind, bool &readCh,
- const DFA &dfa, const State *s, const std::set<label_t> &used_labels);
-void gen_goto_plain(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback);
-void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback);
-void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback);
-void gen_settags(code_lines_t &code, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback);
+void emit_action(OutputFile &o, uint32_t ind, bool &readCh, const DFA &dfa, const State *s, const std::set<label_t> &used_labels);
+void gen_goto_plain(OutputFile &o, uint32_t ind, bool &readCh, const State *to, const DFA &dfa, const tagcmd_t &cmd);
+void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh, const State *to, const DFA &dfa, const tagcmd_t &cmd);
+void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh, const State *to, const DFA &dfa, const tagcmd_t &cmd);
+void gen_settags(code_lines_t &code, const DFA &dfa, const tagcmd_t &cmd);
std::string vartag_name(tagver_t ver);
std::string vartag_expr(tagver_t ver);
-std::string vartag_name_fallback(tagver_t ver);
-std::string vartag_expr_fallback(tagver_t ver);
} // namespace re2c
static void genYYFill(OutputFile &o, size_t need);
static void genSetCondition(OutputFile &o, uint32_t ind, const std::string &cond);
static void genSetState(OutputFile &o, uint32_t ind, uint32_t fillIndex);
-static void gen_goto(code_lines_t &code, bool &readCh, const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback);
+static void gen_goto(code_lines_t &code, bool &readCh, const State *to, const DFA &dfa, const tagcmd_t &cmd);
static void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule);
void emit_action(OutputFile &o, uint32_t ind, bool &readCh,
o.wind(--ind).ws("}\n");
} else {
const accept_t &acc = *s->action.info.accepts;
- gen_goto_plain(o, ind, readCh, acc[l].first, dfa, acc[l].second, true);
+ gen_goto_plain(o, ind, readCh, acc[l].first, dfa, acc[l].second);
}
}
// only one possible 'yyaccept' value: unconditional jump
if (nacc == 1) {
- gen_goto_plain(o, ind, readCh, acc[0].first, dfa, acc[0].second, true);
+ gen_goto_plain(o, ind, readCh, acc[0].first, dfa, acc[0].second);
return;
}
o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
for (uint32_t i = 0; i < nacc - 1; ++i) {
o.wind(ind).ws("case ").wu32(i).ws(": ");
- gen_goto_case(o, ind, readCh, acc[i].first, dfa, acc[i].second, true);
+ gen_goto_case(o, ind, readCh, acc[i].first, dfa, acc[i].second);
}
o.wind(ind).ws("default:");
- gen_goto_case(o, ind, readCh, acc[nacc - 1].first, dfa, acc[nacc - 1].second, true);
+ gen_goto_case(o, ind, readCh, acc[nacc - 1].first, dfa, acc[nacc - 1].second);
o.wind(ind).ws("}\n");
}
}
void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags,
- bool restore_fallback)
+ const State *to, const DFA &dfa, const tagcmd_t &cmd)
{
code_lines_t code;
- gen_goto(code, readCh, to, dfa, tags, restore_fallback);
+ gen_goto(code, readCh, to, dfa, cmd);
const size_t lines = code.size();
if (lines == 1) {
}
void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags,
- bool restore_fallback)
+ const State *to, const DFA &dfa, const tagcmd_t &cmd)
{
code_lines_t code;
- gen_goto(code, readCh, to, dfa, tags, restore_fallback);
+ gen_goto(code, readCh, to, dfa, cmd);
const size_t lines = code.size();
if (lines == 1) {
}
void gen_goto_plain(OutputFile &o, uint32_t ind, bool &readCh,
- const State *to, const DFA &dfa, const tagcmd_t &tags,
- bool restore_fallback)
+ const State *to, const DFA &dfa, const tagcmd_t &cmd)
{
code_lines_t code;
- gen_goto(code, readCh, to, dfa, tags, restore_fallback);
+ gen_goto(code, readCh, to, dfa, cmd);
const size_t lines = code.size();
for (size_t i = 0; i < lines; ++i) {
}
void gen_goto(code_lines_t &code, bool &readCh, const State *to,
- const DFA &dfa, const tagcmd_t &tags, bool restore_fallback)
+ const DFA &dfa, const tagcmd_t &cmd)
{
if (to == NULL) {
readCh = false;
code.push_back(opts->input_api.stmt_peek(0));
readCh = false;
}
- gen_settags(code, dfa, tags, restore_fallback);
+ gen_settags(code, dfa, cmd);
if (to) {
code.push_back("goto " + opts->labelPrefix
+ to_string(to->label) + ";\n");
}
}
-void gen_settags(code_lines_t &code, const DFA &dfa,
- const tagcmd_t &cmd, bool restore_fallback)
+void gen_settags(code_lines_t &code, const DFA &dfa, const tagcmd_t &cmd)
{
const bool generic = opts->input_api.type() == InputAPI::CUSTOM;
- const tagver_t
- *set = dfa.tagpool[cmd.set],
- *copy = dfa.tagpool[cmd.copy];
+ const tagver_t *set = dfa.tagpool[cmd.set];
const std::valarray<Tag> &tags = dfa.tags;
const size_t ntags = tags.size();
std::string line;
// single tag YYCTXMARKER, backwards compatibility
if (cmd.set != ZERO_TAGS && dfa.oldstyle_ctxmarker) {
- assert(cmd.copy == ZERO_TAGS);
+ assert(cmd.copy == NULL);
line = generic
? opts->yybackupctx + " ();\n"
: opts->yyctxmarker + " = " + opts->yycursor + ";\n";
}
// copy
- for (size_t i = 0; i < ntags; ++i) {
- const tagver_t v = copy[i];
- if (v != TAGVER_ZERO) {
- std::string
- x = vartag_expr(v),
- y = vartag_expr_fallback(v);
- if (restore_fallback) std::swap(x, y);
- line = generic
- ? opts->yycopytag + " (" + y + ", " + x + ");\n"
- : y + " = " + x + ";\n";
- code.push_back(line);
- }
+ for (const tagcopy_t *p = cmd.copy; p; p = p->next) {
+ std::string
+ l = vartag_expr(p->lhs),
+ r = vartag_expr(p->rhs);
+ line = generic
+ ? opts->yycopytag + " (" + l + ", " + r + ");\n"
+ : l + " = " + r + ";\n";
+ code.push_back(line);
}
// set
for (tagver_t v = 1; v <= maxtagver; ++v) {
tagnames.insert(vartag_name(v));
}
- const tagver_t *copy = tagpool[copy_tags];
- for (size_t i = 0; i < ntags; ++i) {
- const tagver_t v = copy[i];
- if (v != TAGVER_ZERO) {
- tagnames.insert(vartag_name_fallback(v));
- }
- }
ob.tags.insert(tagnames.begin(), tagnames.end());
}
return e;
}
-std::string vartag_name_fallback(tagver_t ver)
-{
- return vartag_name(ver) + "_";
-}
-
-std::string vartag_expr_fallback(tagver_t ver)
-{
- const std::string s = vartag_name_fallback(ver);
- std::string e = opts->tags_expression;
- strrreplace(e, "@@", s);
- return e;
-}
-
} // end namespace re2c
for (uint32_t i = 1; i < cases_size; ++i) {
const Case &c = cases[i];
c.emit(o, ind);
- gen_goto_case(o, ind, readCh, c.to, dfa, c.tags, false);
+ gen_goto_case(o, ind, readCh, c.to, dfa, c.tags);
}
// default case must be the last one
const Case &c = cases[0];
o.wind(ind).ws("default:");
- gen_goto_case(o, ind, readCh, c.to, dfa, c.tags, false);
+ gen_goto_case(o, ind, readCh, c.to, dfa, c.tags);
o.wind(ind).ws("}\n");
}
const Cond *cond = b.cond;
if (cond) {
output_if(o, ind, readCh, cond->compare, cond->value);
- gen_goto_if(o, ind, readCh, b.to, dfa, b.tags, false);
+ gen_goto_if(o, ind, readCh, b.to, dfa, b.tags);
} else {
- gen_goto_plain(o, ind, readCh, b.to, dfa, b.tags, false);
+ gen_goto_plain(o, ind, readCh, b.to, dfa, b.tags);
}
}
}
o.wu32(bitmap->m);
}
o.ws(") {\n");
- gen_goto_plain(o, ind + 1, readCh, bitmap_state, dfa, tagcmd_t(), false);
+ gen_goto_plain(o, ind + 1, readCh, bitmap_state, dfa, tagcmd_t());
o.wind(ind).ws("}\n");
if (lgo != NULL)
{
void Go::emit (OutputFile & o, uint32_t ind, const DFA &dfa, bool & readCh)
{
code_lines_t code;
- gen_settags(code, dfa, tags, NULL);
+ gen_settags(code, dfa, tags);
for (size_t i = 0; i < code.size(); ++i) {
o.wind(ind).wstring(code[i]);
}
, rules(dfa.rules)
, tags(dfa.tags)
, tagpool(dfa.tagpool)
- , copy_tags(dfa.copy_tags)
, max_fill (0)
, need_backup (false)
, need_accept (false)
std::valarray<Rule> &rules;
std::valarray<Tag> &tags;
Tagpool &tagpool;
- size_t copy_tags;
size_t max_fill;
bool need_backup;
bool need_accept;
// re2c should use old-style YYCTXMARKER for backwards compatibility.
// Note that with generic API fixed-length contexts are forbidden,
// which may cause additional overlaps.
- oldstyle_ctxmarker = maxtagver == 1
- && copy_tags == ZERO_TAGS
- && !opts->tags;
+ oldstyle_ctxmarker = maxtagver == 1 && !opts->tags;
// error if tags are not enabled, but we need them
- if (!opts->tags
- && (maxtagver > 1 || copy_tags != ZERO_TAGS)) {
+ if (!opts->tags && maxtagver > 1) {
error("line %u: overlapping trailing contexts need "
"multiple context markers, use '-t, --tags' "
"option and '/*!tags:re2c ... */' directive",
, rules(nfa.rules)
, tags(*nfa.tags)
, tagpool(*nfa.tagpool)
- , copy_tags(ZERO_TAGS)
+ , maxtagver(static_cast<tagver_t>(tags.size()))
{
clospool_t clospool;
closure_t clos1, clos2;
std::valarray<Rule> &rules;
std::valarray<Tag> &tags;
Tagpool &tagpool;
- size_t copy_tags;
+ tagver_t maxtagver;
dfa_t(const nfa_t &nfa, const charset_t &charset,
const std::string &cond);
namespace re2c
{
-static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, tagver_t *owrt);
+static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bool *owrt);
/* note [fallback tags]
*
*/
void find_overwritten_tags(const dfa_t &dfa, size_t state,
- bool *been, tagver_t *owrt)
+ bool *been, bool *owrt)
{
if (been[state]) return;
been[state] = true;
for (size_t t = 0; t < ntags; ++t) {
const tagver_t v = tags[t];
if (v != TAGVER_ZERO) {
- owrt[t] = v;
+ owrt[v] = true;
}
}
+ for (const tagcopy_t *p = s->tags[c].copy; p; p = p->next) {
+ owrt[p->lhs] = true;
+ }
size_t dest = s->arcs[c];
if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) {
// note [fallback states]
void insert_fallback_tags(dfa_t &dfa)
{
+ tagver_t maxver = dfa.maxtagver;
const size_t
nstates = dfa.states.size(),
- ntags = dfa.tags.size();
+ nsym = dfa.nchars,
+ ntags = dfa.tags.size(),
+ nver = static_cast<size_t>(maxver) + 1;
bool *been = new bool[nstates];
- tagver_t *total = dfa.tagpool.buffer2;
- std::fill(total, total + ntags, TAGVER_ZERO);
+ bool *owrt = new bool[nver];
for (size_t i = 0; i < nstates; ++i) {
dfa_state_t *s = dfa.states[i];
if (!s->fallback) continue;
- tagver_t *owrt = dfa.tagpool.buffer1;
- std::fill(owrt, owrt + ntags, TAGVER_ZERO);
std::fill(been, been + nstates, false);
+ std::fill(owrt, owrt + nver, false);
find_overwritten_tags(dfa, i, been, owrt);
const tagver_t
*fin = dfa.tagpool[dfa.rules[s->rule].tags],
*upd = dfa.tagpool[s->rule_tags.set];
+
for (size_t t = 0; t < ntags; ++t) {
- if (fin[t] == TAGVER_ZERO || upd[t] != TAGVER_ZERO) {
- owrt[t] = TAGVER_ZERO;
- }
- if (owrt[t] != TAGVER_ZERO) {
- total[t] = owrt[t];
- }
- }
- const size_t copy = dfa.tagpool.insert(owrt);
+ const tagver_t
+ f = fin[t],
+ b = static_cast<tagver_t>(nver + t);
+
+ if (f == TAGVER_ZERO || upd[t] != TAGVER_ZERO || !owrt[f]) continue;
- if (copy != ZERO_TAGS) {
- for (size_t c = 0; c < dfa.nchars; ++c) {
+ // patch commands (backups must go first)
+ tagcopy_t **p = &s->rule_tags.copy;
+ *p = new tagcopy_t(*p, f, b);
+ for (size_t c = 0; c < nsym; ++c) {
size_t j = s->arcs[c];
if (j != dfa_t::NIL && dfa.states[j]->fallthru) {
- s->tags[c].copy = copy;
+ p = &s->tags[c].copy;
+ *p = new tagcopy_t(*p, b, f);
}
}
- s->rule_tags.copy = copy;
+ maxver = std::max(maxver, b);
}
}
- dfa.copy_tags = dfa.tagpool.insert(total);
-
delete[] been;
+ delete[] owrt;
+
+ dfa.maxtagver = maxver;
}
} // namespace re2c
tagver_t tag_allocation(const dfa_t &dfa, const bool *interf,
tagver_t *ver2new)
{
+ const tagver_t
+ END = std::numeric_limits<tagver_t>::max(),
+ nver = dfa.maxtagver + 1;
const size_t
- END = std::numeric_limits<size_t>::max(),
- nver = dfa.tags.size() + 1;
- size_t *head = new size_t[nver]; // list of class representatives
- size_t *next = new size_t[nver]; // list of class members
- size_t h0 = END, h, n;
+ nsym = dfa.nchars,
+ narc = dfa.states.size() * nsym;
+ tagver_t *next = new tagver_t[nver]; // list of class members
+ tagver_t *repr = new tagver_t[nver]; // maps tag to class representative
+ tagver_t rx, ry, x, y, z;
- std::fill(head, head + nver, END);
std::fill(next, next + nver, END);
- for (size_t v = 0; v < nver; ++v) {
- const bool *interfv = &interf[v * nver];
+ std::fill(repr, repr + nver, END);
+
+ // copy coalescing: for each command X = Y, try to merge X and Y
+ for (size_t a = 0; a < narc; ++a) {
+ const tagcopy_t *p = dfa.states[a / nsym]->tags[a % nsym].copy;
+ for (; p; p = p->next) {
+ x = p->lhs;
+ y = p->rhs;
+ rx = repr[x];
+ ry = repr[y];
+
+ if (rx != END) {
+ if (ry != END) continue;
+ for (z = rx; z != END; z = next[z]) {
+ if (interf[z * nver + y]) break;
+ }
+ if (z == END) {
+ repr[y] = rx;
+ next[y] = next[rx];
+ next[rx] = y;
+ }
+ } else if (ry != END) {
+ for (z = ry; z != END; z = next[z]) {
+ if (interf[z * nver + x]) break;
+ }
+ if (z == END) {
+ repr[x] = ry;
+ next[x] = next[ry];
+ next[ry] = x;
+ }
+ } else if (!interf[x * nver + y]) {
+ repr[x] = repr[y] = x;
+ next[x] = y;
+ }
+ }
+ }
+
+ // try to merge equivalence classes left after copy coalescing
+ for (rx = 0; rx < nver; ++rx) {
+ if (rx != repr[rx]) continue;
+
+ for (ry = rx + 1; ry < nver; ++ry) {
+ if (ry != repr[ry]) continue;
+
+ for (x = rx; x != END; x = next[x]) {
+ for (y = ry; y != END; y = next[y]) {
+ if (interf[x * nver + y]) break;
+ }
+ if (y != END) break;
+ }
+
+ if (x == END) {
+ for (y = ry;; y = next[y]) {
+ repr[y] = rx;
+ if (next[y] == END) {
+ next[y] = next[rx];
+ next[rx] = ry;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // push each remaining tag to any non-interfering class
+ for (x = 0; x < nver; ++x) {
+ if (repr[x] != END) continue;
// try all existing classes
- for (h = h0; h != END; h = head[h]) {
+ for (rx = nver; --rx >= 0;) {
+ if (rx != repr[rx]) continue;
// check interference with class members
- for (n = h; n != END; n = next[n]) {
- if (interfv[n]) break;
+ for (y = rx; y != END; y = next[y]) {
+ if (interf[x * nver + y]) break;
}
// no interference; add to class
- if (n == END) {
- next[v] = next[h];
- next[h] = v;
+ if (y == END) {
+ repr[x] = rx;
+ next[x] = next[rx];
+ next[rx] = x;
break;
}
}
// make new equivalence class
- if (h == END) {
- head[v] = h0;
- h0 = v;
+ if (rx < 0) {
+ repr[x] = x;
}
}
tagver_t maxver = 0;
- for (h = h0; h != END; h = head[h]) {
+ for (rx = nver; --rx >= 0;) {
+ if (repr[rx] != rx) continue;
+
++maxver;
- for (n = h; n != END; n = next[n]) {
- ver2new[n] = maxver;
+ for (x = rx; x != END; x = next[x]) {
+ ver2new[x] = maxver;
}
}
- delete[] head;
delete[] next;
+ delete[] repr;
return maxver;
}
nsym = dfa.nchars,
narc = dfa.states.size() * nsym,
ntag = dfa.tags.size(),
- nver = ntag + 1;
+ nver = static_cast<size_t>(dfa.maxtagver) + 1;
for (size_t a = 0; a < narc; ++a) {
const size_t
c = a % nsym,
i = a / nsym;
const dfa_state_t *s = dfa.states[i];
- // rule tags can't be dead by construction
+ // rule tags and copy tags can't be dead by construction
+ // (copy tags are only used for fallback tags)
size_t *p = &s->tags[c].set;
if (*p != ZERO_TAGS) {
namespace re2c
{
-static void interfere(bool *interf, size_t nver, const bool *live, const bool *tags);
+static void interfere(const tagcmd_t &cmd, const Tagpool &pool, bool *interf, bool *live, bool *buf, size_t nver);
void tag_interference(const dfa_t &dfa, const bool *live, bool *interf)
{
+ const Tagpool &pool = dfa.tagpool;
const size_t
nstate = dfa.states.size(),
- ntag = dfa.tags.size(),
- nver = ntag + 1,
+ ntag = pool.ntags,
+ nver = static_cast<size_t>(dfa.maxtagver) + 1,
nsym = dfa.nchars;
bool *buf1 = new bool[nver];
bool *buf2 = new bool[nver];
const dfa_state_t *s = dfa.states[i];
if (s->rule != Rule::NONE) {
- unpack(nver, buf1, ntag, dfa.tagpool[dfa.rules[s->rule].tags]);
- unpack(nver, buf2, ntag, dfa.tagpool[s->rule_tags.set]);
- interfere(interf, nver, buf1, buf2);
+ unpack(nver, buf1, ntag, pool[dfa.rules[s->rule].tags]);
+ interfere(s->rule_tags, pool, interf, buf1, buf2, nver);
}
for (size_t c = 0; c < nsym; ++c) {
- const size_t x = s->tags[c].set;
- if (x != ZERO_TAGS) {
- const bool *liv = &live[(i * nsym + c) * nver];
- unpack(nver, buf1, ntag, dfa.tagpool[x]);
- interfere(interf, nver, liv, buf1);
- }
+ memcpy(buf1, &live[(i * nsym + c) * nver], nver * sizeof(bool));
+ interfere(s->tags[c], pool, interf, buf1, buf2, nver);
}
}
delete[] buf2;
}
-// tags that are updated here are pairwise incompatible
-// with all tags that are alive, but not updated here
-void interfere(bool *interf, size_t nver, const bool *live, const bool *tags)
+void interfere(const tagcmd_t &cmd, const Tagpool &pool,
+ bool *interf, bool *live, bool *buf, size_t nver)
{
- for (size_t i = 0; i < nver; ++i) {
- if (live[i] && !tags[i]) {
+ // Each tag updated by set command interferes with all tags
+ // alive after this command, except tags udated by other set
+ // commands (they all have the same value: current position).
+ if (cmd.set != ZERO_TAGS) {
+ // updated by set commands
+ unpack(nver, buf, pool.ntags, pool[cmd.set]);
+
+ // alive after, but not updated by set commands
+ // (a.k.a. all tags alive before the first set command)
+ for (size_t v = 0; v < nver; ++v) {
+ live[v] &= !buf[v];
+ }
+
+ for (size_t i = 0; i < nver; ++i) {
+ if (!buf[i]) continue;
for (size_t j = 0; j < nver; ++j) {
- if (tags[j]) {
- interf[i * nver + j] = interf[j * nver + i] = true;
- }
+ if (!live[j]) continue;
+ interf[i * nver + j] = interf[j * nver + i] = true;
+ }
+ }
+ }
+
+ // Each tag X updated by copy command X = Y interferes with
+ // all tags alive after this command except tags equal to Y.
+ // We don't track which tags are equal to Y at this point,
+ // but at least we can exclude Y itself and all tags assigned
+ // to Y by copy commands.
+ for (const tagcopy_t *p = cmd.copy; p; p = p->next) {
+ const tagver_t r = p->rhs;
+
+ // alive after this command: tags used by subsequent copy
+ // commands plus all tags alive before the first set command
+ memcpy(buf, live, nver * sizeof(bool));
+ for (const tagcopy_t *q = p->next; q; q = q->next) {
+ if (q->rhs != r) {
+ buf[q->rhs] = true;
}
}
+
+ // exclude: tags equal to RHS of this command
+ buf[r] = false;
+ for (const tagcopy_t *q = cmd.copy; q; q = q->next) {
+ if (q->rhs == r) {
+ buf[q->lhs] = false;
+ }
+ }
+
+ const size_t l = static_cast<size_t>(p->lhs);
+ for (size_t v = 0; v < nver; ++v) {
+ if (!buf[v]) continue;
+ interf[l * nver + v] = interf[v * nver + l] = true;
+ }
}
}
nsym = dfa.nchars,
narc = nstate * nsym,
ntag = dfa.tags.size(),
- nver = ntag + 1;
+ nver = static_cast<size_t>(dfa.maxtagver) + 1;
bool *buf1 = new bool[nver];
bool *buf2 = new bool[nver];
bool *been = new bool[nstate];
for (size_t v = 0; v < nver; ++v) {
buf1[v] |= use[v] && !buf2[v];
}
+ // copy tags are only used for fallback tags,
}
bool *liv = &live[a * nver];
* Liveness of fallback tag is propagated forward from fallback
* state (see note [fallback states]) and until there remain
* any fallthrough paths from current state.
+ *
+ * Fallback version of tag is either backup copy of tag's final
+ * version, or (if there's no backup) the final version itself.
+ * Absence of backup means that final version is not overwritten,
+ * but still we should prevent it from merging with other tags
+ * (otherwise it may become overwritten).
*/
for (size_t i = 0; i < nstate; ++i) {
const dfa_state_t *s = dfa.states[i];
- if (s->fallback) {
- const tagver_t
- *use = dfa.tagpool[dfa.rules[s->rule].tags],
- *def = dfa.tagpool[s->rule_tags.set];
- memset(buf1, 0, nver * sizeof(bool));
- for (size_t t = 0; t < ntag; ++t) {
- const tagver_t u = use[t], d = def[t];
- if (u != TAGVER_ZERO && d == TAGVER_ZERO) {
- buf1[u] = true;
- }
+ if (!s->fallback) continue;
+
+ const tagver_t
+ *use = dfa.tagpool[dfa.rules[s->rule].tags],
+ *def = dfa.tagpool[s->rule_tags.set];
+
+ memset(buf1, 0, nver * sizeof(bool));
+ for (size_t t = 0; t < ntag; ++t) {
+ const tagver_t u = use[t], d = def[t];
+ if (u != TAGVER_ZERO && d == TAGVER_ZERO) {
+ buf1[u] = true;
}
- memset(been, 0, nstate * sizeof(bool));
- forwprop(dfa, been, i, live, buf1);
}
+ for (const tagcopy_t *p = s->rule_tags.copy; p; p = p->next) {
+ // in rule tags copies are swapped:
+ // LHS is the origin, RHS is backup
+ buf1[p->lhs] = false;
+ buf1[p->rhs] = true;
+ }
+
+ memset(been, 0, nstate * sizeof(bool));
+ forwprop(dfa, been, i, live, buf1);
}
delete[] buf1;
const size_t
nsym = dfa.nchars,
- nver = dfa.tags.size() + 1;
+ nver = static_cast<size_t>(dfa.maxtagver) + 1;
const dfa_state_t *s = dfa.states[state];
for (size_t c = 0; c < nsym; ++c) {
const size_t
narc = dfa.states.size() * dfa.nchars,
- nver = dfa.tags.size() + 1;
+ nver = static_cast<size_t>(dfa.maxtagver) + 1;
bool *live = new bool[narc * nver];
bool *interf = new bool[nver * nver];
tagver_t *ver2new = new tagver_t[nver];
#include "src/ir/dfa/tag_optimize.h"
+#include "src/util/hash32.h"
namespace re2c
{
-static void rename(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new, bool unify);
+typedef lookup_t<tagcopy_t*> tagcopy_index_t;
+
+struct tagcopy_cmp_t
+{
+ bool operator()(const tagcopy_t *x, const tagcopy_t *y);
+};
+
+static void rename_set(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new, bool unify);
+static void rename_copy(tagcopy_t **phead, const tagver_t *ver2new, tagcopy_index_t &index);
+static int less(const tagcopy_t *x, const tagcopy_t *y);
+static int equal(const tagcopy_t *x, const tagcopy_t *y);
+static tagcopy_t *indexate(tagcopy_index_t &index, tagcopy_t *head);
void tag_renaming(dfa_t &dfa, const tagver_t *ver2new, tagver_t maxver)
{
- const tagver_t oldmax = static_cast<tagver_t>(dfa.tags.size());
- if (maxver >= oldmax) {
- assert(maxver == oldmax);
+ if (maxver >= dfa.maxtagver) {
+ assert(maxver == dfa.maxtagver);
return;
}
const size_t
nstates = dfa.states.size(),
nrule = dfa.rules.size();
+ tagcopy_index_t index;
for (size_t i = 0; i < nstates; ++i) {
dfa_state_t *s = dfa.states[i];
for (size_t c = 0; c < dfa.nchars; ++c) {
- rename(tagpool, s->tags[c].set, ver2new, true);
- rename(tagpool, s->tags[c].copy, ver2new, true);
+ rename_set(tagpool, s->tags[c].set, ver2new, true);
+ rename_copy(&s->tags[c].copy, ver2new, index);
}
- rename(tagpool, s->rule_tags.set, ver2new, true);
- rename(tagpool, s->rule_tags.copy, ver2new, true);
+ rename_set(tagpool, s->rule_tags.set, ver2new, true);
+ rename_copy(&s->rule_tags.copy, ver2new, index);
}
- rename(dfa.tagpool, dfa.copy_tags, ver2new, true);
for (size_t i = 0; i < nrule; ++i) {
- rename(tagpool, dfa.rules[i].tags, ver2new, false);
+ rename_set(tagpool, dfa.rules[i].tags, ver2new, false);
}
}
-void rename(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new,
+void rename_set(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new,
bool unify)
{
if (tags == ZERO_TAGS) return;
: v;
}
- // for transition tags, we don't care how versions map to
- // tags: identical sets of versions should compare equal
- // this doesn't apply to final tag versions in rules
+ // After renaming different tags may map to the same version,
+ // thus one set of versions may have different representations.
+ // Transition tags are treated as sets and should be unified.
+ // Rule tags are treated as mappings of tag to its final version.
if (unify) {
tagver_t *e = newver + ntag;
std::sort(newver, e);
tags = tagpool.insert(newver);
}
+void rename_copy(tagcopy_t **phead, const tagver_t *ver2new,
+ tagcopy_index_t &index)
+{
+ for (tagcopy_t **pp = phead, *p = *pp; p; p = *pp) {
+ p->lhs = ver2new[p->lhs];
+ p->rhs = ver2new[p->rhs];
+ if (p->lhs == p->rhs) {
+ *pp = p->next;
+ } else {
+ pp = &p->next;
+ }
+ }
+
+ // After renaming different lists may become identical
+ // up to reordering and deleting duplicates.
+
+ // sort lexicographically
+ for (tagcopy_t *p = *phead; p; p = p->next) {
+ for (tagcopy_t *q = p->next; q; q = q->next) {
+ if (less(q, p)) {
+ std::swap(p->lhs, q->lhs);
+ std::swap(p->rhs, q->rhs);
+ }
+ }
+ }
+ // delete duplicates
+ for (tagcopy_t *p = *phead; p; p = p->next) {
+ tagcopy_t *q = p->next;
+ if (q && equal(p, q)) {
+ p->next = q->next;
+ }
+ }
+
+ *phead = indexate(index, *phead);
+}
+
+int less(const tagcopy_t *x, const tagcopy_t *y)
+{
+ const tagver_t
+ lx = x->lhs, ly = y->lhs,
+ rx = x->rhs, ry = y->rhs;
+ return (lx < ly) || (lx == ly && rx < ry);
+}
+
+int equal(const tagcopy_t *x, const tagcopy_t *y)
+{
+ return x->lhs == y->lhs && x->rhs == y->rhs;
+}
+
+bool tagcopy_cmp_t::operator()(const tagcopy_t *x, const tagcopy_t *y)
+{
+ for (;;) {
+ if (!x && !y) return true;
+ if (!x || !y) return false;
+ if (x->lhs != y->lhs || x->rhs != y->rhs) return false;
+ x = x->next;
+ y = y->next;
+ }
+}
+
+tagcopy_t *indexate(tagcopy_index_t &index, tagcopy_t *head)
+{
+ uint32_t hash = 0;
+ for (const tagcopy_t *p = head; p; p = p->next) {
+ hash = hash32(hash, &p->lhs, sizeof(p->lhs));
+ hash = hash32(hash, &p->rhs, sizeof(p->rhs));
+ }
+
+ tagcopy_cmp_t eq;
+ const size_t idx = index.find_with(hash, head, eq);
+ if (idx != tagcopy_index_t::NIL) {
+ return index[idx];
+ }
+
+ index.push(hash, head);
+ return head;
+}
+
} // namespace re2c
return lookup[idx];
}
+free_list<tagcopy_t*> tagcopy_t::freelist;
+
+tagcopy_t::tagcopy_t(tagcopy_t *n, tagver_t l, tagver_t r)
+ : next(n)
+ , lhs(l)
+ , rhs(r)
+{
+ freelist.insert(this);
+}
+
+tagcopy_t::~tagcopy_t()
+{
+ freelist.erase(this);
+}
+
} // namespace re2c
#include "src/util/lookup.h"
#include "src/util/forbid_copy.h"
+#include "src/util/free_list.h"
namespace re2c
{
FORBID_COPY(Tagpool);
};
+struct tagcopy_t
+{
+ static free_list<tagcopy_t*> freelist;
+
+ tagcopy_t *next;
+ tagver_t lhs; // left hand side
+ tagver_t rhs; // right hand side
+
+ tagcopy_t(tagcopy_t *n, tagver_t l, tagver_t r);
+ ~tagcopy_t();
+ FORBID_COPY(tagcopy_t);
+};
+
/* must be packed */
struct tagcmd_t
{
size_t set;
- size_t copy;
+ tagcopy_t *copy;
- tagcmd_t(): set(ZERO_TAGS), copy(ZERO_TAGS) {}
- tagcmd_t(size_t s, size_t c): set(s), copy(c) {}
+ tagcmd_t(): set(ZERO_TAGS), copy(NULL) {}
+ tagcmd_t(size_t s, tagcopy_t *c): set(s), copy(c) {}
inline bool empty() const
{
- return set == ZERO_TAGS && copy == ZERO_TAGS;
+ return set == ZERO_TAGS && copy == NULL;
}
};
--- /dev/null
+/* Generated by re2c */
+// This test demonstrates the need for copy coalescing during
+// allocation of tag variables: low interference allows for many
+// ways of partitioning tag versions into equivalence classes.
+
+// Without copy coalescing the choice is arbitrary: operands of
+// copy command may or may not get into the same class.
+// Coalescing adds bias to this choice: we first try to merge
+// copy operands, then examine the rest of tags.
+
+yyt1
+yyt2
+
+
+{
+ YYCTYPE yych;
+ unsigned int yyaccept = 0;
+ if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy5;
+ default: goto yy3;
+ }
+yy2:
+ q = yyt2;
+ p = yyt1;
+ { p q }
+yy3:
+ ++YYCURSOR;
+yy4:
+ {}
+yy5:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'c':
+ yyt1 = YYCURSOR;
+ goto yy6;
+ case 'd':
+ yyt1 = YYCURSOR;
+ goto yy7;
+ default: goto yy4;
+ }
+yy6:
+ yyaccept = 0;
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy9;
+ default:
+ yyt2 = YYCURSOR;
+ goto yy2;
+ }
+yy7:
+ yyaccept = 1;
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy11;
+ default:
+ yyt2 = YYCURSOR;
+ goto yy8;
+ }
+yy8:
+ s = yyt2;
+ r = yyt1;
+ { r s }
+yy9:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'c':
+ yyt1 = YYCURSOR;
+ goto yy6;
+ default: goto yy10;
+ }
+yy10:
+ YYCURSOR = YYMARKER;
+ if (yyaccept == 0) {
+ yyt2 = YYCURSOR;
+ goto yy2;
+ } else {
+ yyt2 = YYCURSOR;
+ goto yy8;
+ }
+yy11:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'd':
+ yyt1 = YYCURSOR;
+ goto yy7;
+ default: goto yy10;
+ }
+}
+
+re2c: warning: line 12: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 13: rule matches empty string [-Wmatch-empty-string]
--- /dev/null
+// This test demonstrates the need for copy coalescing during
+// allocation of tag variables: low interference allows for many
+// ways of partitioning tag versions into equivalence classes.
+
+// Without copy coalescing the choice is arbitrary: operands of
+// copy command may or may not get into the same class.
+// Coalescing adds bias to this choice: we first try to merge
+// copy operands, then examine the rest of tags.
+
+/*!tags:re2c format = "@@\n"; */
+/*!re2c
+ ("a" @p "c" @q)* { p q }
+ ("a" @r "d" @s)* { r s }
+ * {}
+*/
--- /dev/null
+/* Generated by re2c */
+// This test shows the need for merging non-interfering classes
+// of tag versions after copy coalescing during allocation of
+// tag variables.
+
+yyt1
+
+
+{
+ YYCTYPE yych;
+ unsigned int yyaccept = 0;
+ if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy5;
+ default: goto yy3;
+ }
+yy2:
+ p = yyt1;
+ { p }
+yy3:
+ ++YYCURSOR;
+yy4:
+ {}
+yy5:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'c':
+ yyt1 = YYCURSOR;
+ goto yy6;
+ case 'd':
+ yyt1 = YYCURSOR;
+ goto yy7;
+ default: goto yy4;
+ }
+yy6:
+ yyaccept = 0;
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy9;
+ default: goto yy2;
+ }
+yy7:
+ yyaccept = 1;
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy11;
+ default: goto yy8;
+ }
+yy8:
+ q = yyt1;
+ { q }
+yy9:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'c':
+ yyt1 = YYCURSOR;
+ goto yy6;
+ default: goto yy10;
+ }
+yy10:
+ YYCURSOR = YYMARKER;
+ if (yyaccept == 0) {
+ goto yy2;
+ } else {
+ goto yy8;
+ }
+yy11:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'd':
+ yyt1 = YYCURSOR;
+ goto yy7;
+ default: goto yy10;
+ }
+}
+
+re2c: warning: line 7: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 8: rule matches empty string [-Wmatch-empty-string]
--- /dev/null
+// This test shows the need for merging non-interfering classes
+// of tag versions after copy coalescing during allocation of
+// tag variables.
+
+/*!tags:re2c format = "@@\n"; */
+/*!re2c
+ ("a" @p "c")* { p }
+ ("a" @q "d")* { q }
+ * {}
+*/
if (yyaccept == 0) {
goto yy3;
} else {
- yyt1 = yyt1_;
+ yyt1 = yyt2;
goto yy8;
}
yy7:
yych = *YYCURSOR;
switch (yych) {
case 'a':
- yyt1_ = yyt1;
+ yyt2 = yyt1;
goto yy9;
default: goto yy8;
}
--- /dev/null
+/* Generated by re2c */
+// This example shows that fallback tags should participate
+// in tag optimization: there is a chance that they will be
+// merged with other tags.
+
+yyt1
+yyt2
+
+
+{
+ YYCTYPE yych;
+ if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a':
+ yyt1 = YYCURSOR;
+ goto yy5;
+ default: goto yy3;
+ }
+yy2:
+ YYCURSOR = yyt1;
+ {}
+yy3:
+ ++YYCURSOR;
+yy4:
+ {}
+yy5:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'b':
+ yyt2 = YYCURSOR;
+ goto yy6;
+ default: goto yy4;
+ }
+yy6:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'c': goto yy7;
+ default: goto yy2;
+ }
+yy7:
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a':
+ yyt1 = yyt2;
+ goto yy9;
+ default: goto yy8;
+ }
+yy8:
+ p = yyt2;
+ { p }
+yy9:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'b':
+ yyt2 = YYCURSOR;
+ goto yy11;
+ default: goto yy10;
+ }
+yy10:
+ YYCURSOR = YYMARKER;
+ yyt2 = yyt1;
+ goto yy8;
+yy11:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'c': goto yy7;
+ default: goto yy10;
+ }
+}
+
+re2c: warning: line 9: rule matches empty string [-Wmatch-empty-string]
--- /dev/null
+// This example shows that fallback tags should participate
+// in tag optimization: there is a chance that they will be
+// merged with other tags.
+
+/*!tags:re2c format = "@@\n"; */
+/*!re2c
+
+ ("a" @p "bc")+ { p } // needs fallback tag for @p
+ "" / "ab"? {} // trailing context interferes with @p, but not with the fallback tag
+ * {}
+
+*/
--- /dev/null
+/* Generated by re2c */
+// This example shows the neccesity of tracking interference
+// induced by copy commands: fallback tag interferes with another
+// tag in a way that makes it impossible to spot interference
+// unless we examine the copy command.
+
+// On input "cdabe" 2nd rule should match with '@q' pointing to 'd'.
+
+yyt1
+yyt2
+yyt3
+
+
+{
+ YYCTYPE yych;
+ if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a':
+ yyt1 = YYCURSOR;
+ goto yy5;
+ case 'c': goto yy6;
+ case 'e': goto yy7;
+ default: goto yy3;
+ }
+yy2:
+ p = yyt1;
+ { p }
+yy3:
+ ++YYCURSOR;
+yy4:
+ {}
+yy5:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'b': goto yy9;
+ default: goto yy4;
+ }
+yy6:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'd':
+ yyt2 = YYCURSOR;
+ goto yy9;
+ default: goto yy4;
+ }
+yy7:
+ ++YYCURSOR;
+ q = yyt2;
+ { q }
+yy9:
+ YYMARKER = ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a':
+ yyt3 = yyt1;
+ yyt1 = YYCURSOR;
+ goto yy10;
+ case 'c':
+ yyt3 = yyt1;
+ goto yy12;
+ case 'e': goto yy7;
+ default: goto yy2;
+ }
+yy10:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'b': goto yy9;
+ default: goto yy11;
+ }
+yy11:
+ YYCURSOR = YYMARKER;
+ yyt1 = yyt3;
+ goto yy2;
+yy12:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'd':
+ yyt2 = YYCURSOR;
+ goto yy9;
+ default: goto yy11;
+ }
+}
+
+re2c: warning: line 11: rule matches empty string [-Wmatch-empty-string]
--- /dev/null
+// This example shows the neccesity of tracking interference
+// induced by copy commands: fallback tag interferes with another
+// tag in a way that makes it impossible to spot interference
+// unless we examine the copy command.
+
+// On input "cdabe" 2nd rule should match with '@q' pointing to 'd'.
+
+/*!tags:re2c format = "@@\n"; */
+/*!re2c
+
+ (@p "ab" | "c" "d")* { p } // fallback tag for @p
+ ( "ab" | "c" @q "d")* "e" { q } // @q interferes with fallback tag
+ * {}
+
+*/