]> granicus.if.org Git - re2c/commitdiff
Forbid mixing leftmost greedy and POSIX disambiguation.
authorUlya Trofimovich <skvadrik@gmail.com>
Fri, 19 May 2017 21:11:24 +0000 (22:11 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Fri, 19 May 2017 21:11:24 +0000 (22:11 +0100)
Mixing doesn't make much sense (there is no obvious use case).
It might be possible, but for now better forbid it before someone
starts to use it and it turns out to be impossible or very complex.

re2c/src/dfa/closure.cc
re2c/src/dfa/closure.h
re2c/src/dfa/determinization.cc
re2c/src/dfa/dump.cc
re2c/src/dfa/dump.h
re2c/src/dfa/tagpool.cc
re2c/src/dfa/tagpool.h
re2c/src/re/ast_to_re.cc

index bf15db2e32b16eb54b004b9f19ef241624e3c5a0..ce5cead0949c05c615b11f2e0c9a0a74c3ea1827 100644 (file)
@@ -18,8 +18,7 @@ static bool cmpby_rule_state(const clos_t &x, const clos_t &y);
 
 tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
        tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver,
-       newvers_t &newvers, bool lookahead, closure_t *shadow,
-       const std::vector<Tag> &tags)
+       newvers_t &newvers, closure_t *shadow, const std::vector<Tag> &tags)
 {
        // build tagged epsilon-closure of the given set of NFA states
        raw_closure(clos1, clos2, shadow, tagpool, tags, rules);
@@ -29,7 +28,7 @@ tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
        std::sort(clos2.begin(), clos2.end(), cmpby_rule_state);
 
        // see note [the difference between TDFA(0) and TDFA(1)]
-       if (!lookahead) {
+       if (!tagpool.opts->lookahead) {
                lower_lookahead_to_transition(clos2);
                if (shadow) lower_lookahead_to_transition(*shadow);
        }
@@ -237,54 +236,51 @@ void raw_closure(const closure_t &clos1, closure_t &done, closure_t *shadow,
 bool better(const clos_t &c1, const clos_t &c2,
        Tagpool &tagpool, const std::vector<Tag> &tags)
 {
-       if (c1.ttran == c2.ttran
-               && c1.tvers == c2.tvers
+       if (tagpool.ntags == 0
+               || (c1.order == c2.order
                && c1.tlook == c2.tlook
-               && c1.order == c2.order
-               && c1.index == c2.index) return false;
+               && c1.index == c2.index)) return false;
 
-       const hidx_t l1 = c1.tlook, l2 = c2.tlook;
        const tagver_t *o1 = tagpool[c1.order], *o2 = tagpool[c2.order];
-       tagver_t x, y;
-       tagtree_t &tagtree = tagpool.history;
+       tagtree_t &history = tagpool.history;
+
+       // leftmost greedy disambiguation
+       if (!tagpool.opts->posix_captures) {
+               const tagver_t x = o1[0], y = o2[0];
+               if (x < y) return false;
+               if (x > y) return true;
 
+               const int32_t cmp = history.compare_full(c1.index, c2.index, Tag::RIGHTMOST);
+               if (cmp < 0) return false;
+               if (cmp > 0) return true;
+
+               assert(false); // all paths have different indices
+       }
+
+       // POSIX disambiguation
        for (size_t t = 0; t < tagpool.ntags; ++t) {
-               const Tag &tag = tags[t];
 
-               // orbit capture tag: compare by orders and tag histories
-               if (orbit(tag)) {
-                       x = o1[t]; y = o2[t];
+               // orbit tag: compare by orders and tag histories
+               if (orbit(tags[t])) {
+                       const tagver_t x = o1[t], y = o2[t];
                        if (x < y) return false;
                        if (x > y) return true;
 
-                       const int cmp = tagtree.compare_last(l1, l2, t);
+                       const int32_t cmp = history.compare_last(c1.tlook, c2.tlook, t);
                        if (cmp < 0) return false;
                        if (cmp > 0) return true;
 
-               // open/close capture tag: maximize (first, lookahead, then orders)
-               } else if (capture(tag)) {
-                       x = tagtree.last(l1, t);
-                       y = tagtree.last(l2, t);
+               // open/close tag: maximize (first lookahead, then orders)
+               } else {
+                       tagver_t x = history.last(c1.tlook, t),
+                               y = history.last(c2.tlook, t);
                        if (x == TAGVER_ZERO && y == TAGVER_ZERO) {
                                x = o1[t]; y = o2[t];
                        }
                        if (x > y) return false;
                        if (x < y) return true;
-
-               // simple tag: always prefer leftmost
-               } else {
-                       x = o1[t]; y = o2[t];
-                       if (x < y) return false;
-                       if (x > y) return true;
-
-                       const int cmp = tagtree.compare_full(c1.index, c2.index, Tag::RIGHTMOST);
-                       if (cmp < 0) return false;
-                       if (cmp > 0) return true;
-
-                       assert(false); // all indexes are different
                }
        }
-
        return false;
 }
 
@@ -462,6 +458,8 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                ntag = tagpool.ntags,
                nclos = clos.size();
 
+       if (ntag == 0) return;
+
        size_t &maxclos = tagpool.maxclos;
        tagver_t *&orders = tagpool.orders, *o;
        if (maxclos < nclos) {
@@ -470,23 +468,44 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                orders = new tagver_t[ntag * maxclos];
        }
 
-       for (size_t t = 0; t < ntag; ++t) {
+       // leftmost greedy disambiguation: order equals item's position
+       // in leftmost NFA traversal (it's the same for all tags)
+       if (!tagpool.opts->posix_captures) {
+               const cmp_leftmost_t cmp = {tagtree};
+               std::set<key_t, cmp_leftmost_t> keys(cmp);
+               for (c = b; c != e; ++c) {
+                       keys.insert(key_t(tagpool[c->order][0], c->index));
+               }
                o = orders;
+               for (c = b; c != e; ++c, o += ntag) {
+                       const ptrdiff_t d = std::distance(keys.begin(),
+                               keys.find(key_t(tagpool[c->order][0], c->index)));
+                       std::fill(o, o + ntag, static_cast<tagver_t>(d));
+               }
+               o = orders;
+               for (c = b; c != e; ++c, o += ntag) {
+                       c->order = tagpool.insert(o);
+               }
+               return;
+       }
+
+       // see note [POSIX disambiguation]
+       for (size_t t = 0; t < ntag; ++t) {
 
-               // see note [POSIX disambiguation]
                if (orbit(tags[t])) {
                        const cmp_orbit_t cmp = {tagtree, t};
                        std::set<key_t, cmp_orbit_t> keys(cmp);
                        for (c = b; c != e; ++c) {
                                keys.insert(key_t(tagpool[c->order][t], c->tlook));
                        }
+                       o = orders;
                        for (c = b; c != e; ++c, o += ntag) {
                                const ptrdiff_t d = std::distance(keys.begin(),
                                        keys.find(key_t(tagpool[c->order][t], c->tlook)));
                                o[t] = static_cast<tagver_t>(d);
                        }
 
-               } else if (capture(tags[t])) {
+               } else {
                        std::set<tagver_t> keys;
                        for (c = b; c != e; ++c) {
                                tagver_t u = tagtree.last(c->tlook, t);
@@ -495,6 +514,7 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                                }
                                keys.insert(u);
                        }
+                       o = orders;
                        for (c = b; c != e; ++c, o += ntag) {
                                tagver_t u = tagtree.last(c->tlook, t);
                                if (u == TAGVER_ZERO) {
@@ -503,21 +523,6 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                                const ptrdiff_t d = std::distance(keys.begin(), keys.find(u));
                                o[t] = static_cast<tagver_t>(d);
                        }
-
-               // for simple tags and non-orbit capture tags item's order
-               // equals position of this item in leftmost NFA traversal
-               // (it's the same for all tags)
-               } else {
-                       const cmp_leftmost_t cmp = {tagtree};
-                       std::set<key_t, cmp_leftmost_t> keys(cmp);
-                       for (c = b; c != e; ++c) {
-                               keys.insert(key_t(tagpool[c->order][t], c->index));
-                       }
-                       for (c = b; c != e; ++c, o += ntag) {
-                               const ptrdiff_t d = std::distance(keys.begin(),
-                                       keys.find(key_t(tagpool[c->order][t], c->index)));
-                               o[t] = static_cast<tagver_t>(d);
-                       }
                }
        }
 
index 8e9663ed3d7bfb40783237a61518c58aec6f2d04..be7c33beedc11662997601a1565818e167703d3f 100644 (file)
@@ -54,7 +54,7 @@ typedef std::map<newver_t, tagver_t, newver_cmp_t> newvers_t;
 
 tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
        tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver,
-       newvers_t &newvers, bool lookahead, closure_t *shadow, const std::vector<Tag> &tags);
+       newvers_t &newvers, closure_t *shadow, const std::vector<Tag> &tags);
 
 } // namespace re2c
 
index fba0ade14b4bbfecf52a776309fe31318419c39c..a0b6c2c49d9c55a2d94f0833e205f68ed9558951 100644 (file)
@@ -61,15 +61,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        , tcmd0(NULL)
        , tcid0(TCID0)
 {
-       const bool lookahead = opts->lookahead;
        const size_t ntag = tags.size();
-       Tagpool tagpool(ntag);
+       Tagpool tagpool(opts, ntag);
        kernels_t kernels(tagpool, tcpool, tags);
        closure_t clos1, clos2;
        newver_cmp_t newvers_cmp = {tagpool.history};
        newvers_t newvers(newvers_cmp);
        tcmd_t *acts;
-       dump_dfa_t dump(*this, tagpool, nfa, opts->dump_dfa_raw);
+       dump_dfa_t dump(*this, tagpool, nfa);
 
        // all-zero tag configuration must have static number zero
        assert(ZERO_TAGS == tagpool.insert_const(TAGVER_ZERO));
@@ -88,14 +87,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
 
        clos_t c0 = {NULL, nfa.root, ZERO_TAGS, INITIAL_TAGS, HROOT, HROOT, HROOT};
        clos1.push_back(c0);
-       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
+       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, dump.shadow, tags);
        find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump);
 
        for (size_t i = 0; i < kernels.size(); ++i) {
                newvers.clear();
                for (size_t c = 0; c < nchars; ++c) {
                        reach(kernels[i], clos1, charset[c]);
-                       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
+                       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, dump.shadow, tags);
                        find_state(*this, i, c, kernels, clos2, acts, dump);
                }
        }
index d0f46bce47d83057f9d175e1f0547d7834dbf3fd..46425f93f52a0302c3acf92817bda8b730eb60b9 100644 (file)
@@ -9,8 +9,8 @@ static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sy
 static const char *tagname(const Tag &t);
 static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers);
 
-dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg)
-       : debug(dbg)
+dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n)
+       : debug(pool.opts->dump_dfa_raw)
        , dfa(d)
        , tagpool(pool)
        , uniqidx(0)
index 4043ffe863055050c8a9228c7aa6c12ab800df42..ae6b85a60e7b63b9418a22554e91a5e03f5203a2 100644 (file)
@@ -16,7 +16,7 @@ struct dump_dfa_t
        const nfa_state_t *base;
        closure_t *shadow;
 
-       dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg);
+       dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n);
        ~dump_dfa_t();
        void closure_tags(cclositer_t c);
        void closure(const closure_t &clos, uint32_t state, bool isnew);
index db121306a11d082a178f395e92a299dad4122007..6bbaeb8113bbf2c3f7f08691314615ebefa5cd98 100644 (file)
@@ -18,8 +18,9 @@ struct eqtag_t
        }
 };
 
-Tagpool::Tagpool(size_t n)
+Tagpool::Tagpool(const opt_t *o, size_t n)
        : lookup()
+       , opts(o)
        , ntags(n)
        , buffer(new tagver_t[n])
        , maxclos(0)
index 71dcdad1b6dc976b567c8a761e3d4555a0f2e9da..845117359a43fb095d5a2c39c7074de146b331e0 100644 (file)
@@ -9,6 +9,8 @@
 namespace re2c
 {
 
+struct opt_t;
+
 static const size_t ZERO_TAGS = 0;
 
 struct Tagpool
@@ -18,6 +20,7 @@ private:
        taglookup_t lookup;
 
 public:
+       const opt_t *opts;
        const size_t ntags;
        tagver_t *buffer;
 
@@ -26,7 +29,7 @@ public:
 
        tagtree_t history;
 
-       explicit Tagpool(size_t n);
+       Tagpool(const opt_t *o, size_t n);
        ~Tagpool();
        size_t insert_const(tagver_t ver);
        size_t insert_succ(tagver_t fst);
index 22081aadfb7394ef8c22990c5271a99c2c96a503..fc1424d49540a65c499655ebcbee8b9366528f09 100644 (file)
@@ -191,6 +191,10 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                                fatal_error(ast->line, ast->column,
                                        "tags are only allowed with '-T, --tags' option");
                        }
+                       if (opts->posix_captures) {
+                               fatal_error(ast->line, ast->column,
+                                       "simple tags are not allowed with '--posix-captures' option");
+                       }
                        RE *t = re_tag(alc, tags.size(), false);
                        tags.push_back(Tag(ast->tag.name, ast->tag.history));
                        return t;