From f5671eb722c830810ddeea55270ccb5c8bd2d806 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 19 May 2017 22:11:24 +0100 Subject: [PATCH] Forbid mixing leftmost greedy and POSIX disambiguation. Mixing doesn't make much sense (there is no obvious use case). It might be possible, but for now better forbid it before someone starts to use it and it turns out to be impossible or very complex. --- re2c/src/dfa/closure.cc | 105 +++++++++++++++++--------------- re2c/src/dfa/closure.h | 2 +- re2c/src/dfa/determinization.cc | 9 ++- re2c/src/dfa/dump.cc | 4 +- re2c/src/dfa/dump.h | 2 +- re2c/src/dfa/tagpool.cc | 3 +- re2c/src/dfa/tagpool.h | 5 +- re2c/src/re/ast_to_re.cc | 4 ++ 8 files changed, 73 insertions(+), 61 deletions(-) diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index bf15db2e..ce5cead0 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -18,8 +18,7 @@ static bool cmpby_rule_state(const clos_t &x, const clos_t &y); tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, tcpool_t &tcpool, std::valarray &rules, tagver_t &maxver, - newvers_t &newvers, bool lookahead, closure_t *shadow, - const std::vector &tags) + newvers_t &newvers, closure_t *shadow, const std::vector &tags) { // build tagged epsilon-closure of the given set of NFA states raw_closure(clos1, clos2, shadow, tagpool, tags, rules); @@ -29,7 +28,7 @@ tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, std::sort(clos2.begin(), clos2.end(), cmpby_rule_state); // see note [the difference between TDFA(0) and TDFA(1)] - if (!lookahead) { + if (!tagpool.opts->lookahead) { lower_lookahead_to_transition(clos2); if (shadow) lower_lookahead_to_transition(*shadow); } @@ -237,54 +236,51 @@ void raw_closure(const closure_t &clos1, closure_t &done, closure_t *shadow, bool better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, const std::vector &tags) { - if (c1.ttran == c2.ttran - && c1.tvers == c2.tvers + if (tagpool.ntags == 0 + || (c1.order == c2.order && c1.tlook == c2.tlook - && c1.order == c2.order - && c1.index == c2.index) return false; + && c1.index == c2.index)) return false; - const hidx_t l1 = c1.tlook, l2 = c2.tlook; const tagver_t *o1 = tagpool[c1.order], *o2 = tagpool[c2.order]; - tagver_t x, y; - tagtree_t &tagtree = tagpool.history; + tagtree_t &history = tagpool.history; + + // leftmost greedy disambiguation + if (!tagpool.opts->posix_captures) { + const tagver_t x = o1[0], y = o2[0]; + if (x < y) return false; + if (x > y) return true; + const int32_t cmp = history.compare_full(c1.index, c2.index, Tag::RIGHTMOST); + if (cmp < 0) return false; + if (cmp > 0) return true; + + assert(false); // all paths have different indices + } + + // POSIX disambiguation for (size_t t = 0; t < tagpool.ntags; ++t) { - const Tag &tag = tags[t]; - // orbit capture tag: compare by orders and tag histories - if (orbit(tag)) { - x = o1[t]; y = o2[t]; + // orbit tag: compare by orders and tag histories + if (orbit(tags[t])) { + const tagver_t x = o1[t], y = o2[t]; if (x < y) return false; if (x > y) return true; - const int cmp = tagtree.compare_last(l1, l2, t); + const int32_t cmp = history.compare_last(c1.tlook, c2.tlook, t); if (cmp < 0) return false; if (cmp > 0) return true; - // open/close capture tag: maximize (first, lookahead, then orders) - } else if (capture(tag)) { - x = tagtree.last(l1, t); - y = tagtree.last(l2, t); + // open/close tag: maximize (first lookahead, then orders) + } else { + tagver_t x = history.last(c1.tlook, t), + y = history.last(c2.tlook, t); if (x == TAGVER_ZERO && y == TAGVER_ZERO) { x = o1[t]; y = o2[t]; } if (x > y) return false; if (x < y) return true; - - // simple tag: always prefer leftmost - } else { - x = o1[t]; y = o2[t]; - if (x < y) return false; - if (x > y) return true; - - const int cmp = tagtree.compare_full(c1.index, c2.index, Tag::RIGHTMOST); - if (cmp < 0) return false; - if (cmp > 0) return true; - - assert(false); // all indexes are different } } - return false; } @@ -462,6 +458,8 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) ntag = tagpool.ntags, nclos = clos.size(); + if (ntag == 0) return; + size_t &maxclos = tagpool.maxclos; tagver_t *&orders = tagpool.orders, *o; if (maxclos < nclos) { @@ -470,23 +468,44 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) orders = new tagver_t[ntag * maxclos]; } - for (size_t t = 0; t < ntag; ++t) { + // leftmost greedy disambiguation: order equals item's position + // in leftmost NFA traversal (it's the same for all tags) + if (!tagpool.opts->posix_captures) { + const cmp_leftmost_t cmp = {tagtree}; + std::set keys(cmp); + for (c = b; c != e; ++c) { + keys.insert(key_t(tagpool[c->order][0], c->index)); + } o = orders; + for (c = b; c != e; ++c, o += ntag) { + const ptrdiff_t d = std::distance(keys.begin(), + keys.find(key_t(tagpool[c->order][0], c->index))); + std::fill(o, o + ntag, static_cast(d)); + } + o = orders; + for (c = b; c != e; ++c, o += ntag) { + c->order = tagpool.insert(o); + } + return; + } + + // see note [POSIX disambiguation] + for (size_t t = 0; t < ntag; ++t) { - // see note [POSIX disambiguation] if (orbit(tags[t])) { const cmp_orbit_t cmp = {tagtree, t}; std::set keys(cmp); for (c = b; c != e; ++c) { keys.insert(key_t(tagpool[c->order][t], c->tlook)); } + o = orders; for (c = b; c != e; ++c, o += ntag) { const ptrdiff_t d = std::distance(keys.begin(), keys.find(key_t(tagpool[c->order][t], c->tlook))); o[t] = static_cast(d); } - } else if (capture(tags[t])) { + } else { std::set keys; for (c = b; c != e; ++c) { tagver_t u = tagtree.last(c->tlook, t); @@ -495,6 +514,7 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) } keys.insert(u); } + o = orders; for (c = b; c != e; ++c, o += ntag) { tagver_t u = tagtree.last(c->tlook, t); if (u == TAGVER_ZERO) { @@ -503,21 +523,6 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) const ptrdiff_t d = std::distance(keys.begin(), keys.find(u)); o[t] = static_cast(d); } - - // for simple tags and non-orbit capture tags item's order - // equals position of this item in leftmost NFA traversal - // (it's the same for all tags) - } else { - const cmp_leftmost_t cmp = {tagtree}; - std::set keys(cmp); - for (c = b; c != e; ++c) { - keys.insert(key_t(tagpool[c->order][t], c->index)); - } - for (c = b; c != e; ++c, o += ntag) { - const ptrdiff_t d = std::distance(keys.begin(), - keys.find(key_t(tagpool[c->order][t], c->index))); - o[t] = static_cast(d); - } } } diff --git a/re2c/src/dfa/closure.h b/re2c/src/dfa/closure.h index 8e9663ed..be7c33be 100644 --- a/re2c/src/dfa/closure.h +++ b/re2c/src/dfa/closure.h @@ -54,7 +54,7 @@ typedef std::map newvers_t; tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, tcpool_t &tcpool, std::valarray &rules, tagver_t &maxver, - newvers_t &newvers, bool lookahead, closure_t *shadow, const std::vector &tags); + newvers_t &newvers, closure_t *shadow, const std::vector &tags); } // namespace re2c diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index fba0ade1..a0b6c2c4 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -61,15 +61,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, , tcmd0(NULL) , tcid0(TCID0) { - const bool lookahead = opts->lookahead; const size_t ntag = tags.size(); - Tagpool tagpool(ntag); + Tagpool tagpool(opts, ntag); kernels_t kernels(tagpool, tcpool, tags); closure_t clos1, clos2; newver_cmp_t newvers_cmp = {tagpool.history}; newvers_t newvers(newvers_cmp); tcmd_t *acts; - dump_dfa_t dump(*this, tagpool, nfa, opts->dump_dfa_raw); + dump_dfa_t dump(*this, tagpool, nfa); // all-zero tag configuration must have static number zero assert(ZERO_TAGS == tagpool.insert_const(TAGVER_ZERO)); @@ -88,14 +87,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, clos_t c0 = {NULL, nfa.root, ZERO_TAGS, INITIAL_TAGS, HROOT, HROOT, HROOT}; clos1.push_back(c0); - acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags); + acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, dump.shadow, tags); find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump); for (size_t i = 0; i < kernels.size(); ++i) { newvers.clear(); for (size_t c = 0; c < nchars; ++c) { reach(kernels[i], clos1, charset[c]); - acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags); + acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, dump.shadow, tags); find_state(*this, i, c, kernels, clos2, acts, dump); } } diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc index d0f46bce..46425f93 100644 --- a/re2c/src/dfa/dump.cc +++ b/re2c/src/dfa/dump.cc @@ -9,8 +9,8 @@ static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sy static const char *tagname(const Tag &t); static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers); -dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg) - : debug(dbg) +dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n) + : debug(pool.opts->dump_dfa_raw) , dfa(d) , tagpool(pool) , uniqidx(0) diff --git a/re2c/src/dfa/dump.h b/re2c/src/dfa/dump.h index 4043ffe8..ae6b85a6 100644 --- a/re2c/src/dfa/dump.h +++ b/re2c/src/dfa/dump.h @@ -16,7 +16,7 @@ struct dump_dfa_t const nfa_state_t *base; closure_t *shadow; - dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg); + dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n); ~dump_dfa_t(); void closure_tags(cclositer_t c); void closure(const closure_t &clos, uint32_t state, bool isnew); diff --git a/re2c/src/dfa/tagpool.cc b/re2c/src/dfa/tagpool.cc index db121306..6bbaeb81 100644 --- a/re2c/src/dfa/tagpool.cc +++ b/re2c/src/dfa/tagpool.cc @@ -18,8 +18,9 @@ struct eqtag_t } }; -Tagpool::Tagpool(size_t n) +Tagpool::Tagpool(const opt_t *o, size_t n) : lookup() + , opts(o) , ntags(n) , buffer(new tagver_t[n]) , maxclos(0) diff --git a/re2c/src/dfa/tagpool.h b/re2c/src/dfa/tagpool.h index 71dcdad1..84511735 100644 --- a/re2c/src/dfa/tagpool.h +++ b/re2c/src/dfa/tagpool.h @@ -9,6 +9,8 @@ namespace re2c { +struct opt_t; + static const size_t ZERO_TAGS = 0; struct Tagpool @@ -18,6 +20,7 @@ private: taglookup_t lookup; public: + const opt_t *opts; const size_t ntags; tagver_t *buffer; @@ -26,7 +29,7 @@ public: tagtree_t history; - explicit Tagpool(size_t n); + Tagpool(const opt_t *o, size_t n); ~Tagpool(); size_t insert_const(tagver_t ver); size_t insert_succ(tagver_t fst); diff --git a/re2c/src/re/ast_to_re.cc b/re2c/src/re/ast_to_re.cc index 22081aad..fc1424d4 100644 --- a/re2c/src/re/ast_to_re.cc +++ b/re2c/src/re/ast_to_re.cc @@ -191,6 +191,10 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) fatal_error(ast->line, ast->column, "tags are only allowed with '-T, --tags' option"); } + if (opts->posix_captures) { + fatal_error(ast->line, ast->column, + "simple tags are not allowed with '--posix-captures' option"); + } RE *t = re_tag(alc, tags.size(), false); tags.push_back(Tag(ast->tag.name, ast->tag.history)); return t; -- 2.40.0