From: Ulya Trofimovich Date: Sat, 29 Apr 2017 20:13:50 +0000 (+0100) Subject: Don't split tag history into individual sub-histories for tags. X-Git-Tag: 1.0~39^2~52 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=194d41a76ae68a23b515a6b74fd997b69de86cdc;p=re2c Don't split tag history into individual sub-histories for tags. This is necassary for correct comparison of orbit tag histories: if orbit tag is nested in an outer capture, this outer capture is under repetition and there is an epsilon-path through it, then this epsilon-path may contain pieces of orbit history that belong to different iterations of outer capture; these pieces will be glued together and the boundary between them will be lost. Example: ((""){0,3}){0,2}. However, in a common history we can always find boundaries (they are marked by tags that correspond to outer captures). --- diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index ae6a758a..37e32bd2 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -24,7 +24,6 @@ tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, // build tagged epsilon-closure of the given set of NFA states clos2.clear(); if (shadow) shadow->clear(); - tagpool.history.init(); for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) { closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules); } @@ -94,7 +93,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0, case nfa_state_t::TAG: tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR); closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules); - tagtree.pop(n->tag.info); + tagtree.pop(); return; case nfa_state_t::RAN: for (; c != e && c->state != n; ++c); @@ -110,7 +109,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0, } clos_t c2 = {c0.origin, n, c0.tvers, c0.ttran, - tagpool.insert(tagtree.leaves()), c0.order, c0.index++}; + tagtree.tail, c0.order, c0.index++}; if (c == e) { clos.push_back(c2); } else { @@ -147,9 +146,10 @@ bool better(const clos_t &c1, const clos_t &c2, && c1.order == c2.order && c1.index == c2.index) return false; + const hidx_t + l1 = c1.tlook, l2 = c2.tlook, + t1 = c1.ttran, t2 = c2.ttran; const tagver_t - *l1 = tagpool[c1.tlook], *l2 = tagpool[c2.tlook], - *t1 = tagpool[c1.ttran], *t2 = tagpool[c2.ttran], *v1 = tagpool[c1.tvers], *v2 = tagpool[c2.tvers], *o1 = tagpool[c1.order], *o2 = tagpool[c2.order]; tagver_t x, y; @@ -164,7 +164,7 @@ bool better(const clos_t &c1, const clos_t &c2, if (x < y) return false; if (x > y) return true; - const int cmp = tagtree.compare_paths(l1[t], l2[t]); + const int cmp = tagtree.compare_orbits(l1, l2, t); if (cmp < 0) return false; if (cmp > 0) return true; @@ -175,14 +175,14 @@ bool better(const clos_t &c1, const clos_t &c2, // we don't use orders for minimize/maximize, because they are // already used for leftmost } else if (capture(tag)) { - x = tagtree.elem(l1[t]); - y = tagtree.elem(l2[t]); + x = tagtree.last(l1, t); + y = tagtree.last(l2, t); if (x < 0 || y < 0) goto leftmost; if (x > y) return false; if (x < y) return true; - x = tagtree.elem(t1[t]); - y = tagtree.elem(t2[t]); + x = tagtree.last(t1, t); + y = tagtree.last(t2, t); if (x < 0 || y < 0) goto leftmost; if (x > y) return false; if (x < y) return true; @@ -232,7 +232,7 @@ void lower_lookahead_to_transition(closure_t &clos) { for (clositer_t c = clos.begin(); c != clos.end(); ++c) { c->ttran = c->tlook; - c->tlook = ZERO_TAGS; + c->tlook = HROOT; } } @@ -252,25 +252,26 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector &tags, // normal transition, however absolute value should be unique // among all versions of all tags) for (c = b; c != e; ++c) { - const tagver_t - *ls = tagpool[c->tlook], - *us = tagpool[c->ttran], - *vs = tagpool[c->tvers]; + const hidx_t l = c->tlook, h = c->ttran; + if (h == HROOT) continue; + + const tagver_t *vs = tagpool[c->tvers]; for (size_t t = 0; t < ntag; ++t) { const Tag &tag = tags[t]; - const tagver_t u = us[t], - u0 = tagtree.elem(u), - l = tagtree.elem(ls[t]); - if (u0 == TAGVER_ZERO) continue; + const tagver_t + h0 = tagtree.last(h, t), + l0 = tagtree.last(l, t); - const tagver_t h = history(tag) ? vs[t] : TAGVER_ZERO; - newver_t x = {t, h, u}; + if (h0 == TAGVER_ZERO) continue; + + const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO; + newver_t x = {t, v, h}; const tagver_t - n = (maxver + 1) * (u0 == TAGVER_BOTTOM ? -1 : 1), + n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1), m = newvers.insert(std::make_pair(x, n)).first->second; if (n == m) ++maxver; - if (!fixed(tag) && (l == TAGVER_ZERO || history(tag))) { + if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) { newacts.insert(std::make_pair(x, m)); } } @@ -278,32 +279,31 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector &tags, // actions for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) { - const tagver_t - m = i->second, - h = i->first.ver, - u = i->first.act; - if (history(tags[i->first.tag])) { - cmd = tcpool.make_add(cmd, abs(m), abs(h), u, tagtree); + const tagver_t m = i->second, v = i->first.base; + const hidx_t h = i->first.history; + const size_t t = i->first.tag; + if (history(tags[t])) { + cmd = tcpool.make_add(cmd, abs(m), abs(v), tagtree, h, t); } else { - cmd = tcpool.make_set(cmd, abs(m), tagtree.elem(u)); + cmd = tcpool.make_set(cmd, abs(m), tagtree.last(h, t)); } } // update tag versions in closure for (c = b; c != e; ++c) { - if (c->ttran == ZERO_TAGS) continue; - const tagver_t - *us = tagpool[c->ttran], - *vs = tagpool[c->tvers]; + const hidx_t h = c->ttran; + if (h == HROOT) continue; + + const tagver_t *vs = tagpool[c->tvers]; for (size_t t = 0; t < ntag; ++t) { - const bool historic = history(tags[t]); - const tagver_t v = vs[t], u = us[t], - u0 = tagtree.elem(u), - h = historic ? v : TAGVER_ZERO; - if (u0 == TAGVER_ZERO) { - vers[t] = v; + const tagver_t + v0 = vs[t], + h0 = tagtree.last(h, t), + v = history(tags[t]) ? v : TAGVER_ZERO; + if (h0 == TAGVER_ZERO) { + vers[t] = v0; } else { - newver_t x = {t, h, u}; + newver_t x = {t, v, h}; vers[t] = newvers[x]; } } @@ -352,15 +352,16 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector &tags, * This part of the algorithm was invented by Christopher Kuklewicz. */ -typedef std::pair key1_t; +typedef std::pair key1_t; struct cmp_t { tagtree_t &tree; + size_t tag; bool operator()(const key1_t &x, const key1_t &y) { if (x.first < y.first) return true; if (x.first > y.first) return false; - return tree.compare_paths(x.second, y.second) < 0; + return tree.compare_orbits(x.second, y.second, tag) < 0; } }; @@ -372,12 +373,6 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) ntag = tagpool.ntags, nclos = clos.size(); - const cmp_t cmp = {tagtree}; - std::set keys1(cmp); - - typedef std::pair key2_t; - std::set keys2; - size_t &maxclos = tagpool.maxclos; tagver_t *&orders = tagpool.orders, *o; if (maxclos < nclos) { @@ -391,13 +386,14 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) // see note [POSIX disambiguation] if (orbit(tags[t])) { - keys1.clear(); + const cmp_t cmp = {tagtree, t}; + std::set keys1(cmp); for (c = b; c != e; ++c) { - keys1.insert(key1_t(tagpool[c->order][t], tagpool[c->tlook][t])); + keys1.insert(key1_t(tagpool[c->order][t], c->tlook)); } for (c = b; c != e; ++c, o += ntag) { const ptrdiff_t d = std::distance(keys1.begin(), - keys1.find(key1_t(tagpool[c->order][t], tagpool[c->tlook][t]))); + keys1.find(key1_t(tagpool[c->order][t], c->tlook))); o[t] = static_cast(d); } @@ -405,6 +401,8 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) // equals position of this item in leftmost NFA traversal // (it's the same for all tags) } else { + typedef std::pair key2_t; + std::set keys2; for (c = b; c != e; ++c) { keys2.insert(key2_t(tagpool[c->order][t], c->index)); } diff --git a/re2c/src/dfa/closure.h b/re2c/src/dfa/closure.h index f5744c41..e6cd4d11 100644 --- a/re2c/src/dfa/closure.h +++ b/re2c/src/dfa/closure.h @@ -15,8 +15,8 @@ struct clos_t nfa_state_t *origin; // for debug only nfa_state_t *state; size_t tvers; // vector of tag versions (including lookahead tags) - size_t ttran; // vector of transition tags - size_t tlook; // vector of lookahead tags + hidx_t ttran; // history of transition tags + hidx_t tlook; // history of lookahead tags size_t order; // vector of orders size_t index; // leftmost order in NFA traversal @@ -30,8 +30,8 @@ typedef closure_t::const_iterator cclositer_t; struct newver_t { size_t tag; - tagver_t ver; - tagver_t act; + tagver_t base; + hidx_t history; }; struct newver_cmp_t @@ -42,10 +42,10 @@ struct newver_cmp_t if (x.tag < y.tag) return true; if (x.tag > y.tag) return false; - if (x.ver < y.ver) return true; - if (x.ver > y.ver) return false; + if (x.base < y.base) return true; + if (x.base > y.base) return false; - return history.compare_paths(x.act, y.act) < 0; + return history.compare_actions(x.history, y.history, x.tag) < 0; } }; diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 34027029..868603ca 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -42,7 +42,7 @@ void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol) *s2 = transition(s1, symbol); if (s2) { clos_t c = {s1, s2, kernel->tvers[i], kernel->tlook[i], - ZERO_TAGS, kernel->order[i], 0}; + HROOT, kernel->order[i], 0}; clos.push_back(c); } } @@ -86,7 +86,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, // build tagged epsilon-closure of all reachable NFA states, // then find identical or mappable DFA state or add a new one - clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS, ZERO_TAGS, ZERO_TAGS, 0}; + clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, HROOT, HROOT, ZERO_TAGS, 0}; clos1.push_back(c0); acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags); find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump); diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc index 4d2fe704..e7a43a4b 100644 --- a/re2c/src/dfa/dump.cc +++ b/re2c/src/dfa/dump.cc @@ -7,7 +7,7 @@ namespace re2c static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sym, const tcpool_t &tcpool); static const char *tagname(const Tag &t); -static void dump_tags(const Tagpool &tagpool, size_t ttran, size_t tvers); +static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers); dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg) : debug(dbg) @@ -39,35 +39,44 @@ uint32_t dump_dfa_t::index(const nfa_state_t *s) return static_cast(s - base); } -void dump_dfa_t::closure_tags(cclositer_t c, - const tagver_t *lookahead, bool shadowed) +static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i) +{ + if (i == HROOT) { + fprintf(stderr, " /"); + return; + } + + dump_history(dfa, h, h.pred(i)); + + const Tag &t = dfa.tags[h.tag(i)]; + const tagver_t v = h.elem(i); + if (v == TAGVER_BOTTOM) fprintf(stderr, ""); + if (capture(t)) { + fprintf(stderr, "%u_", (uint32_t)t.ncap); + } else if (trailing(t)) { + fprintf(stderr, "*"); + } else { + fprintf(stderr, "%s", t.name->c_str()); + } + if (v == TAGVER_BOTTOM) fprintf(stderr, ""); + fprintf(stderr, " "); +} + +void dump_dfa_t::closure_tags(cclositer_t c) { if (!debug) return; if (c->tvers == ZERO_TAGS) return; - const tagver_t - *look = tagpool[c->tlook], - *vers = tagpool[c->tvers], - *ord = tagpool[c->order]; + const hidx_t l = c->tlook; + const tagver_t *vers = tagpool[c->tvers]; const size_t ntag = tagpool.ntags; + for (size_t t = 0; t < ntag; ++t) { - const Tag &tag = dfa.tags[t]; - - fprintf(stderr, " %s", tagname(tag)); - fprintf(stderr, "%d", abs(vers[t])); - if (lookahead[t]) { - const tagver_t l = tagpool.history.elem(look[t]); - if (l == TAGVER_BOTTOM) { - fprintf(stderr, " ↓"); - } else if (l == TAGVER_CURSOR) { - fprintf(stderr, " ↑"); - } else { - fprintf(stderr, " "); - } - } - if (!shadowed && capture(tag)) { - fprintf(stderr, "[%d]", ord[t]); - } + fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t])); + } + + if (l != HROOT) { + dump_history(dfa, tagpool.history, l); } } @@ -86,17 +95,10 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew) " CELLBORDER=\"1\"" ">", isnew ? "" : "i", state); - tagver_t *look = tagpool.buffer; - for (size_t t = 0; t < tagpool.ntags; ++t) { - for (c = c1; c != c2 && tagpool.history.elem(tagpool[c->tlook][t]) == TAGVER_ZERO; ++c); - for (s = s1; s != s2 && tagpool.history.elem(tagpool[s->tlook][t]) == TAGVER_ZERO; ++s); - look[t] = c != c2 || s != s2; - } - for (s = s1; s != s2; ++s) { fprintf(stderr, "%u", index(s->state), s - s1, color, style, color, index(s->state)); - closure_tags(s, look, true); + closure_tags(s); fprintf(stderr, ""); } if (!shadow->empty()) { @@ -105,7 +107,7 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew) for (c = c1; c != c2; ++c) { fprintf(stderr, "%u", index(c->state), style, index(c->state)); - closure_tags(c, look, true); + closure_tags(c); fprintf(stderr, ""); } fprintf(stderr, ">]\n"); @@ -285,20 +287,21 @@ const char *tagname(const Tag &t) return t.name ? t.name->c_str() : ""; } -void dump_tags(const Tagpool &tagpool, size_t ttran, size_t tvers) +void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers) { - if (ttran == ZERO_TAGS) return; + if (ttran == HROOT) return; + + const tagver_t *vers = tagpool[tvers]; + const tagtree_t &h = tagpool.history; fprintf(stderr, "/"); - const tagver_t - *tran = tagpool[ttran], - *vers = tagpool[tvers]; for (size_t i = 0; i < tagpool.ntags; ++i) { - tagver_t v = vers[i], t = tran[i]; - if (tagpool.history.elem(t) == TAGVER_ZERO) continue; - fprintf(stderr, "%d", abs(v)); - for (; t != -1; t = tagpool.history.pred(t)) { - if (tagpool.history.elem(t) < TAGVER_ZERO) { + if (h.last(ttran, i) == TAGVER_ZERO) continue; + + fprintf(stderr, "%d", abs(vers[i])); + for (hidx_t t = ttran; t != HROOT; t = h.pred(t)) { + if (h.tag(t) != i) continue; + if (h.elem(t) < TAGVER_ZERO) { fprintf(stderr, "↓"); } else if (t > TAGVER_ZERO) { fprintf(stderr, "↑"); diff --git a/re2c/src/dfa/dump.h b/re2c/src/dfa/dump.h index d323bb15..4043ffe8 100644 --- a/re2c/src/dfa/dump.h +++ b/re2c/src/dfa/dump.h @@ -18,7 +18,7 @@ struct dump_dfa_t dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg); ~dump_dfa_t(); - void closure_tags(cclositer_t c, const tagver_t *lookahead, bool shadowed); + void closure_tags(cclositer_t c); void closure(const closure_t &clos, uint32_t state, bool isnew); void state0(const closure_t &clos); void state(const closure_t &clos, size_t state, size_t symbol, bool isnew); diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index f2d9d0ba..f5966e7e 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -10,7 +10,7 @@ kernel_t::kernel_t(size_t n) : size(n) , state(new nfa_state_t*[size]) , tvers(new size_t[size]) - , tlook(new size_t[size]) + , tlook(new hidx_t[size]) , order(new size_t[size]) {} @@ -20,7 +20,7 @@ kernel_t *kernel_t::copy(const kernel_t &k) kernel_t *kcopy = new kernel_t(n); memcpy(kcopy->state, k.state, n * sizeof(void*)); memcpy(kcopy->tvers, k.tvers, n * sizeof(size_t)); - memcpy(kcopy->tlook, k.tlook, n * sizeof(size_t)); + memcpy(kcopy->tlook, k.tlook, n * sizeof(hidx_t)); memcpy(kcopy->order, k.order, n * sizeof(size_t)); return kcopy; } @@ -36,22 +36,19 @@ kernel_t::~kernel_t() static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, Tagpool &tagpool, const std::vector &tags) { - if (memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0) { + if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) { return true; } tagtree_t &h = tagpool.history; for (size_t i = 0; i < x->size; ++i) { - const tagver_t - *xls = tagpool[x->tlook[i]], - *yls = tagpool[y->tlook[i]]; + const hidx_t xl = x->tlook[i], yl = y->tlook[i]; for (size_t t = 0; t < tagpool.ntags; ++t) { - const tagver_t xl = xls[t], yl = yls[t]; if (history(tags[t])) { - // compare whole histories - if (h.compare_paths(xl, yl) != 0) return false; + // compare subhistories + if (h.compare_actions(xl, yl, t) != 0) return false; } else { // compare only the last tags - if (h.elem(xl) != h.elem(yl)) return false; + if (h.last(xl, t) != h.last(yl, t)) return false; } } } @@ -127,12 +124,12 @@ bool kernels_t::operator()(const kernel_t *k1, const kernel_t *k2) for (size_t i = 0; i < k1->size; ++i) { const tagver_t *xv = tagpool[k1->tvers[i]], - *yv = tagpool[k2->tvers[i]], - *xl = tagpool[k1->tlook[i]]; + *yv = tagpool[k2->tvers[i]]; + const hidx_t xl = k1->tlook[i]; for (size_t t = 0; t < ntag; ++t) { // see note [mapping ignores items with lookahead tags] - if (tagpool.history.elem(xl[t]) != TAGVER_ZERO + if (tagpool.history.last(xl, t) != TAGVER_ZERO && !history(tags[t])) continue; const tagver_t x = xv[t], y = yv[t]; @@ -347,19 +344,18 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx, { tcpool_t &tcpool = dfa.tcpool; const Rule &rule = dfa.rules[ridx]; - const tagver_t - *look = tagpool[clos.tlook], - *vers = tagpool[clos.tvers]; + const tagver_t *vers = tagpool[clos.tvers]; + const tagtree_t &hist = tagpool.history; + const hidx_t look = clos.tlook; tcmd_t *copy = NULL, *save = NULL, **p; for (size_t t = rule.ltag; t < rule.htag; ++t) { - const bool historic = history(tags[t]); - const tagver_t v = abs(vers[t]), - l = tagpool.history.elem(look[t]); + const Tag &tag = tags[t]; + const tagver_t v = abs(vers[t]), l = hist.last(look, t); tagver_t &f = dfa.finvers[t]; // don't waste versions on fixed tags - if (fixed(dfa.tags[t])) continue; + if (fixed(tag)) continue; // pick a fresh version: final version is also used as fallback one if (f == TAGVER_ZERO) { @@ -368,8 +364,8 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx, if (l == TAGVER_ZERO) { copy = tcpool.make_copy(copy, f, v); - } else if (historic) { - save = tcpool.make_add(save, f, v, look[t], tagpool.history); + } else if (history(tag)) { + save = tcpool.make_add(save, f, v, hist, look, t); } else { save = tcpool.make_set(save, f, l); } diff --git a/re2c/src/dfa/find_state.h b/re2c/src/dfa/find_state.h index 17be9a0f..d5a6d13a 100644 --- a/re2c/src/dfa/find_state.h +++ b/re2c/src/dfa/find_state.h @@ -14,7 +14,7 @@ struct kernel_t size_t size; nfa_state_t **state; size_t *tvers; // tag versions - size_t *tlook; // lookahead tags + hidx_t *tlook; // lookahead tags size_t *order; // see note [orbit order of closure items] explicit kernel_t(size_t n); diff --git a/re2c/src/dfa/tagpool.cc b/re2c/src/dfa/tagpool.cc index f0adc14b..db121306 100644 --- a/re2c/src/dfa/tagpool.cc +++ b/re2c/src/dfa/tagpool.cc @@ -24,7 +24,7 @@ Tagpool::Tagpool(size_t n) , buffer(new tagver_t[n]) , maxclos(0) , orders(NULL) - , history(n) + , history() {} Tagpool::~Tagpool() diff --git a/re2c/src/dfa/tagtree.cc b/re2c/src/dfa/tagtree.cc index faf2946d..471ca4ce 100644 --- a/re2c/src/dfa/tagtree.cc +++ b/re2c/src/dfa/tagtree.cc @@ -6,68 +6,72 @@ namespace re2c { -tagtree_t::tagtree_t(size_t n) - : nodes() - , path1() - , path2() - , ntag(n) - , tags(new tagver_t[ntag]) -{ - node_t x = {-1, TAGVER_ZERO}; - nodes.push_back(x); - init(); -} - -tagtree_t::~tagtree_t() -{ - delete[] tags; -} - -void tagtree_t::init() -{ - memset(tags, 0, ntag * sizeof(tagver_t)); -} +tagtree_t::tagtree_t(): nodes(), tail(HROOT), path1(), path2() {} -tagver_t tagtree_t::elem(tagver_t i) const -{ - return nodes[static_cast(i)].elem; -} +tagver_t tagtree_t::elem(hidx_t i) const { return nodes[i].elem; } -tagver_t tagtree_t::pred(tagver_t i) const -{ - return nodes[static_cast(i)].pred; -} +hidx_t tagtree_t::pred(hidx_t i) const { return nodes[i].pred; } -const tagver_t *tagtree_t::leaves() const -{ - return tags; -} +size_t tagtree_t::tag(hidx_t i) const { return nodes[i].tag; } void tagtree_t::push(size_t t, tagver_t v) { - node_t x = {tags[t], v}; + node_t x = {tail, v, t}; nodes.push_back(x); - tags[t] = static_cast(nodes.size() - 1); + tail = static_cast(nodes.size() - 1); } -void tagtree_t::pop(size_t t) +void tagtree_t::pop() { // don't destroy the leaf itself, just update pointer to current leaf // (pointer to the the old leaf is stored in one of the closure items) - tags[t] = pred(tags[t]); + tail = pred(tail); } -int32_t tagtree_t::compare_paths(tagver_t x, tagver_t y) +// cut out subhistory of this tag (just skip all other tags) +static void subhistory(const tagtree_t &history, + std::vector &path, hidx_t idx, size_t tag) { - path1.clear(); - for (; x != -1; x = pred(x)) path1.push_back(elem(x)); + path.clear(); + for (hidx_t i = idx; i != HROOT; i = history.pred(i)) { + if (history.tag(i) == tag) { + path.push_back(history.elem(i)); + } + } +} - path2.clear(); - for (; y != -1; y = pred(y)) path2.push_back(elem(y)); +// cut out a list of subhistories of this tag separated by tags +// with higher priority (in POSIX they correspond to outer captures) +static void subhistories(const tagtree_t &history, + std::vector &path, hidx_t idx, size_t tag) +{ + // 0 -- bottom, 1 -- cursor, 2 -- subhistory delimiter, so that + // short history which is a prefix of a longer history dominates + path.clear(); + for (hidx_t i = idx;;) { + + // subhistory begins at the next occurence of this tag + for (; i != HROOT && history.tag(i) != tag; i = history.pred(i)); + if (i == HROOT) break; + path.push_back(2); + + // subhistory ends at the next occurence of tag with + // higher priority or when the whole history ends + for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) { + // skip tags with lower priority + if (history.tag(i) > tag) continue; + path.push_back(history.elem(i) == TAGVER_CURSOR ? 1 : 0); + } + } +} +static int32_t compare_reversed( + const std::vector &h1, + const std::vector &h2) +{ std::vector::const_reverse_iterator - i1 = path1.rbegin(), e1 = path1.rend(), - i2 = path2.rbegin(), e2 = path2.rend(); + i1 = h1.rbegin(), e1 = h1.rend(), + i2 = h2.rbegin(), e2 = h2.rend(); for (;;) { if (i1 == e1 && i2 == e2) break; @@ -81,4 +85,26 @@ int32_t tagtree_t::compare_paths(tagver_t x, tagver_t y) return 0; } +int32_t tagtree_t::compare_actions(hidx_t x, hidx_t y, size_t t) +{ + subhistory(*this, path1, x, t); + subhistory(*this, path2, y, t); + return compare_reversed(path1, path2); +} + +int32_t tagtree_t::compare_orbits(hidx_t x, hidx_t y, size_t t) +{ + subhistories(*this, path1, x, t); + subhistories(*this, path2, y, t); + return compare_reversed(path1, path2); +} + +tagver_t tagtree_t::last(hidx_t i, size_t t) const +{ + for (; i != HROOT; i = pred(i)) { + if (tag(i) == t) return elem(i); + } + return TAGVER_ZERO; +} + } // namespace re2c diff --git a/re2c/src/dfa/tagtree.h b/re2c/src/dfa/tagtree.h index fe4add97..ac997fb0 100644 --- a/re2c/src/dfa/tagtree.h +++ b/re2c/src/dfa/tagtree.h @@ -9,35 +9,35 @@ namespace re2c { -class tagtree_t +typedef uint32_t hidx_t; + +static const hidx_t HROOT = ~0u; + +struct tagtree_t { // the whole tree of tags found by the epsilon-closure // (a bunch of separate subtrees for each tag with common root) struct node_t { - tagver_t pred; + hidx_t pred; tagver_t elem; + size_t tag; }; std::vector nodes; + hidx_t tail; // reconstruct paths for comparison std::vector path1; std::vector path2; - // set of leaves (one leaf per tag) corresponding to - // current deep-first search path in the epsilon-closure - size_t ntag; - tagver_t *tags; - -public: - explicit tagtree_t(size_t n); - ~tagtree_t(); - void init(); - tagver_t pred(tagver_t i) const; - tagver_t elem(tagver_t i) const; - const tagver_t *leaves() const; + tagtree_t(); + hidx_t pred(hidx_t i) const; + tagver_t elem(hidx_t i) const; + size_t tag(hidx_t i) const; void push(size_t t, tagver_t v); - void pop(size_t t); - int32_t compare_paths(tagver_t x, tagver_t y); + void pop(); + int32_t compare_actions(hidx_t x, hidx_t y, size_t t); + int32_t compare_orbits(hidx_t x, hidx_t y, size_t t); + tagver_t last(hidx_t i, size_t t) const; FORBID_COPY(tagtree_t); }; diff --git a/re2c/src/dfa/tcmd.cc b/re2c/src/dfa/tcmd.cc index 5f117490..36079270 100644 --- a/re2c/src/dfa/tcmd.cc +++ b/re2c/src/dfa/tcmd.cc @@ -143,19 +143,25 @@ tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set) } tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, - tagver_t hidx, const tagtree_t &history) + const tagtree_t &history, hidx_t hidx, size_t tag) { size_t hlen = 0; - for (tagver_t i = hidx; i != -1; i = history.pred(i)) ++hlen; + for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { + if (history.tag(i) == tag) ++hlen; + } - const size_t size = sizeof(tcmd_t) + (hlen - 1) * sizeof(tagver_t); + const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); tcmd_t *p = static_cast(alc.alloc(size)); p->next = next; p->lhs = lhs; p->rhs = rhs; - for (tagver_t i = hidx, *h = p->history; i != -1; i = history.pred(i)) { - *h++ = history.elem(i); + tagver_t *h = p->history; + for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { + if (history.tag(i) == tag) { + *h++ = history.elem(i); + } } + *h++ = TAGVER_ZERO; return p; } diff --git a/re2c/src/dfa/tcmd.h b/re2c/src/dfa/tcmd.h index 4dde594f..d48890bb 100644 --- a/re2c/src/dfa/tcmd.h +++ b/re2c/src/dfa/tcmd.h @@ -43,7 +43,7 @@ public: tcpool_t(); tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs); tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set); - tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, tagver_t hidx, const tagtree_t &history); + tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagtree_t &history, hidx_t hidx, size_t tag); tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history); tcid_t insert(const tcmd_t *tcmd); const tcmd_t *operator[](tcid_t id) const;