]> granicus.if.org Git - re2c/commitdiff
Keep the full history of tag occurences found by epsilon-closure.
authorUlya Trofimovich <skvadrik@gmail.com>
Fri, 3 Mar 2017 00:56:49 +0000 (00:56 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Fri, 3 Mar 2017 00:56:49 +0000 (00:56 +0000)
Currently this makes no difference because tag duplication in NFA
is forbidden. Anyway, we are only interested in the last occurence
of each tag on the epsilon-path to the given NFA state.

This will make difference for POSIX disambiguation strategy: we'll
have to allow tag duplication in NFA (because of counted repetition
and iteration expansion).

re2c/Makefile.am
re2c/src/ir/dfa/closure.cc
re2c/src/ir/dfa/closure.h
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/tagtree.cc [new file with mode: 0644]
re2c/src/ir/dfa/tagtree.h [new file with mode: 0644]
re2c/src/ir/nfa/nfa.h

index 097695275eba03dc3f7040c1491be9c2361d5032..f0c83a0d793de2b9543dfed226198294875c2bc3 100644 (file)
@@ -28,6 +28,7 @@ SRC_HDR = \
        src/ir/dfa/dump.h \
        src/ir/dfa/find_state.h \
        src/ir/dfa/tagpool.h \
+       src/ir/dfa/tagtree.h \
        src/ir/nfa/nfa.h \
        src/ir/re/re.h \
        src/ir/regexp/encoding/case.h \
@@ -115,6 +116,7 @@ SRC = \
        src/ir/dfa/find_state.cc \
        src/ir/dfa/minimization.cc \
        src/ir/dfa/tagpool.cc \
+       src/ir/dfa/tagtree.cc \
        src/ir/regexp/encoding/enc.cc \
        src/ir/regexp/encoding/range_suffix.cc \
        src/ir/regexp/encoding/utf8/utf8_regexp.cc \
index 81b745678e78d1a09f6297753a7e027d39639781..7a07b0ac55f2d54655fcdd471a8cd6994a2105f4 100644 (file)
@@ -2,42 +2,43 @@
 
 #include "src/ir/dfa/closure.h"
 #include "src/ir/nfa/nfa.h"
+#include "src/util/local_increment.h"
 
 namespace re2c
 {
 
-static void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0, nfa_state_t *n, tagver_t *tags, closure_t *shadow, std::valarray<Rule> &rules);
+static void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree,
+       clos_t &c0, nfa_state_t *n, closure_t *shadow, std::valarray<Rule> &rules);
 static void assert_items_are_grouped_by_rule(const closure_t &clos);
-static void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool);
-static void update_versions(closure_t &clos, Tagpool &tagpool, tagver_t &maxver, tagver_t *newvers);
+static void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree);
+static void update_versions(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, tagver_t &maxver, tagver_t *newvers);
 static void find_nondet(const closure_t &clos, size_t *nondet, const Tagpool &tagpool, const std::valarray<Rule> &rules);
 
-void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
+void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, tagtree_t &tagtree,
        std::valarray<Rule> &rules, tagver_t &maxver, tagver_t *newvers,
        size_t *nondet, bool lookahead, closure_t *shadow)
 {
        // build tagged epsilon-closure of the given set of NFA states
        clos2.clear();
        if (shadow) shadow->clear();
-       tagver_t *tags = tagpool.buffer1;
-       std::fill(tags, tags + tagpool.ntags, TAGVER_ZERO);
+       tagtree.init();
        for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) {
-               closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules);
+               closure_one(clos2, tagpool, tagtree, *c, c->state, shadow, rules);
        }
 
        assert_items_are_grouped_by_rule(clos2);
 
        // see note [the difference between TDFA(0) and TDFA(1)]
        if (!lookahead) {
-               lower_lookahead_to_transition(clos2, tagpool);
-               if (shadow) lower_lookahead_to_transition(*shadow, tagpool);
+               lower_lookahead_to_transition(clos2, tagpool, tagtree);
+               if (shadow) lower_lookahead_to_transition(*shadow, tagpool, tagtree);
        }
 
        find_nondet(clos2, nondet, tagpool, rules);
 
        // merge tags from different rules, find nondeterministic tags
-       update_versions(clos2, tagpool, maxver, newvers);
-       if (shadow) update_versions(*shadow, tagpool, maxver, newvers);
+       update_versions(clos2, tagpool, tagtree, maxver, newvers);
+       if (shadow) update_versions(*shadow, tagpool, tagtree, maxver, newvers);
 }
 
 /* note [closure items are sorted by rule]
@@ -79,65 +80,47 @@ void assert_items_are_grouped_by_rule(const closure_t &clos)
  * of individual subexpressions: e.g., greedy iteration is compiled so that
  * the leftmost path re-iterates rather than leaves the subexpression..
  */
-void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
-       nfa_state_t *n, tagver_t *tags, closure_t *shadow,
+void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, clos_t &c0,
+       nfa_state_t *n, closure_t *shadow,
        std::valarray<Rule> &rules)
 {
        if (n->loop) return;
+       local_increment_t<uint8_t> _(n->loop);
 
-       n->loop = true;
+       clositer_t c = clos.begin(), e = clos.end();
        switch (n->type) {
                case nfa_state_t::NIL:
-                       closure_one(clos, tagpool, c0, n->nil.out, tags, shadow, rules);
-                       break;
+                       closure_one(clos, tagpool, tagtree, c0, n->nil.out, shadow, rules);
+                       return;
                case nfa_state_t::ALT:
-                       closure_one(clos, tagpool, c0, n->alt.out1, tags, shadow, rules);
-                       closure_one(clos, tagpool, c0, n->alt.out2, tags, shadow, rules);
-                       break;
-               case nfa_state_t::TAG: {
-                       const size_t t = n->tag.info;
-                       const tagver_t old = tags[t];
-                       tags[t] = n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR;
-                       closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules);
-                       tags[t] = old;
-                       break;
-               }
-               case nfa_state_t::RAN: {
-                       c0.state = n;
-                       c0.ttran = tagpool.insert(tags);
-                       clositer_t
-                               c = clos.begin(),
-                               e = clos.end();
+                       closure_one(clos, tagpool, tagtree, c0, n->alt.out1, shadow, rules);
+                       closure_one(clos, tagpool, tagtree, c0, n->alt.out2, shadow, rules);
+                       return;
+               case nfa_state_t::TAG:
+                       tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
+                       closure_one(clos, tagpool, tagtree, c0, n->tag.out, shadow, rules);
+                       tagtree.pop(n->tag.info);
+                       return;
+               case nfa_state_t::RAN:
                        for (; c != e && c->state != n; ++c);
-                       if (c == e) {
-                               clos.push_back(c0);
-                       } else if (shadow) {
-                               shadow->push_back(c0);
-                       }
                        break;
-               }
-               case nfa_state_t::FIN: {
+               case nfa_state_t::FIN:
                        // see note [at most one final item per closure]
-                       c0.state = n;
-                       c0.ttran = tagpool.insert(tags);
-                       clositer_t
-                               c = clos.begin(),
-                               e = clos.end();
                        for (; c != e && c->state->type != nfa_state_t::FIN; ++c);
-                       if (c == e) {
-                               clos.push_back(c0);
-                       } else {
-                               if (c->state != n) {
-                                       rules[n->rule].shadow.insert(rules[c->state->rule].code->fline);
-                               }
-                               if (shadow) {
-                                       shadow->push_back(c0);
-                               }
-                       }
                        break;
-               }
        }
-       n->loop = false;
+
+       clos_t c2 = {c0.origin, n, c0.tvers, tagpool.insert(tagtree.leaves())};
+       if (c == e) {
+               clos.push_back(c2);
+       } else {
+               clos_t &c1 = *c;
+               if (shadow) shadow->push_back(c2);
+               const size_t
+                       r1 = c1.state->rule,
+                       r2 = c2.state->rule;
+               if (r1 != r2) rules[r2].shadow.insert(rules[r1].code->fline);
+       }
 }
 
 /* note [at most one final item per closure]
@@ -184,7 +167,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
 // and 'lowering' is just a waste for TDFA(0). However, supporting two
 // different handlers for TDFA(0) and TDFA(1) is unmaintainable, and since
 // TDFA(1) is default, it has the fast path.
-void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool)
+void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree)
 {
        const size_t ntag = tagpool.ntags;
        tagver_t *vers = tagpool.buffer1;
@@ -196,7 +179,7 @@ void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool)
                        *look = tagpool[c->ttran],
                        *oldv = tagpool[c->tvers];
                for (size_t t = 0; t < ntag; ++t) {
-                       const tagver_t l = look[t];
+                       const tagver_t l = tagtree.elem(look[t]);
                        vers[t] = l == TAGVER_ZERO ? oldv[t] : l;
                }
                c->tvers = tagpool.insert(vers);
@@ -204,7 +187,7 @@ void lower_lookahead_to_transition(closure_t &clos, Tagpool &tagpool)
        }
 }
 
-void update_versions(closure_t &clos, Tagpool &tagpool,
+void update_versions(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree,
        tagver_t &maxver, tagver_t *newvers)
 {
        const size_t ntag = tagpool.ntags;
@@ -251,7 +234,7 @@ void update_versions(closure_t &clos, Tagpool &tagpool,
                        *oldv = tagpool[c->tvers];
 
                for (size_t i = 0; i < ntag; ++i) {
-                       const tagver_t o = oldv[i], l = look[i];
+                       const tagver_t o = oldv[i], l = tagtree.elem(look[i]);
                        tagver_t &v = vers[i], &t = tran[i];
 
                        if (l != TAGVER_ZERO) {
index 506026a7c2dabddb3f00429e80d2255ab433d154..970163a81c0e258b8628e8ac170495e2387c111c 100644 (file)
@@ -4,6 +4,7 @@
 #include "src/util/c99_stdint.h"
 
 #include "src/ir/dfa/dfa.h"
+#include "src/ir/dfa/tagtree.h"
 #include "src/ir/nfa/nfa.h"
 
 namespace re2c
@@ -23,7 +24,7 @@ typedef std::vector<clos_t> closure_t;
 typedef closure_t::iterator clositer_t;
 typedef closure_t::const_iterator cclositer_t;
 
-void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
+void closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, tagtree_t &tagtree,
        std::valarray<Rule> &rules, tagver_t &maxver, tagver_t *newvers,
        size_t *nondet, bool lookahead, closure_t *shadow);
 
index b80ef860eddc6a0c40086b7e6815995a514bd2d5..a33aed6409b521cdef41de5dabec9accd1402c8a 100644 (file)
@@ -64,6 +64,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        const bool lookahead = opts->lookahead;
        const size_t ntag = tags.size();
        Tagpool tagpool(ntag);
+       tagtree_t tagtree(ntag);
        kernels_t kernels(tagpool, tcpool);
        closure_t clos1, clos2;
        tagver_t *newvers = new tagver_t[ntag * 2];
@@ -88,14 +89,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS};
        clos1.push_back(c0);
        std::fill(newvers, newvers + ntag * 2, TAGVER_ZERO);
-       closure(clos1, clos2, tagpool, rules, maxtagver, newvers, nondet, lookahead, dump.shadow);
+       closure(clos1, clos2, tagpool, tagtree, rules, maxtagver, newvers, nondet, lookahead, dump.shadow);
        find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, dump);
 
        for (size_t i = 0; i < kernels.size(); ++i) {
                std::fill(newvers, newvers + ntag * 2, TAGVER_ZERO);
                for (size_t c = 0; c < nchars; ++c) {
                        reach(kernels[i], clos1, charset[c]);
-                       closure(clos1, clos2, tagpool, rules, maxtagver, newvers, nondet, lookahead, dump.shadow);
+                       closure(clos1, clos2, tagpool, tagtree, rules, maxtagver, newvers, nondet, lookahead, dump.shadow);
                        find_state(*this, i, c, kernels, clos2, dump);
                }
        }
diff --git a/re2c/src/ir/dfa/tagtree.cc b/re2c/src/ir/dfa/tagtree.cc
new file mode 100644 (file)
index 0000000..3607797
--- /dev/null
@@ -0,0 +1,59 @@
+#include <assert.h>
+#include <string.h>
+
+#include "src/ir/dfa/tagtree.h"
+
+namespace re2c
+{
+
+tagtree_t::tagtree_t(size_t n)
+       : nodes()
+       , ntag(n)
+       , tags(new tagver_t[ntag])
+{
+       init();
+}
+
+tagtree_t::~tagtree_t()
+{
+       delete[] tags;
+}
+
+void tagtree_t::init()
+{
+       nodes.clear();
+       node_t x = {-1, TAGVER_ZERO};
+       nodes.push_back(x);
+       memset(tags, 0, ntag * sizeof(tagver_t));
+}
+
+tagver_t tagtree_t::elem(tagver_t i) const
+{
+       return nodes[static_cast<size_t>(i)].elem;
+}
+
+tagver_t tagtree_t::pred(tagver_t i) const
+{
+       return nodes[static_cast<size_t>(i)].pred;
+}
+
+const tagver_t *tagtree_t::leaves() const
+{
+       return tags;
+}
+
+void tagtree_t::push(size_t t, tagver_t v)
+{
+       node_t x = {tags[t], v};
+       nodes.push_back(x);
+       tags[t] = static_cast<tagver_t>(nodes.size() - 1);
+}
+
+void tagtree_t::pop(size_t t)
+{
+       // don't destroy the leaf itself, just update pointer to current leaf
+       // (pointer to the the old leaf is stored in one of the closure items)
+       tags[t] = pred(tags[t]);
+}
+
+} // namespace re2c
diff --git a/re2c/src/ir/dfa/tagtree.h b/re2c/src/ir/dfa/tagtree.h
new file mode 100644 (file)
index 0000000..4a6eed3
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef _RE2C_IR_DFA_TAGTREE_
+#define _RE2C_IR_DFA_TAGTREE_
+
+#include <vector>
+
+#include "src/ir/tag.h"
+#include "src/util/forbid_copy.h"
+
+namespace re2c
+{
+
+class tagtree_t
+{
+       // the whole tree of tags found by the epsilon-closure
+       // (a bunch of separate subtrees for each tag with common root)
+       struct node_t {
+               tagver_t pred;
+               tagver_t elem;
+       };
+       std::vector<node_t> nodes;
+
+       // set of leaves (one leaf per tag) corresponding to
+       // current deep-first search path in the epsilon-closure
+       size_t ntag;
+       tagver_t *tags;
+
+public:
+       explicit tagtree_t(size_t n);
+       ~tagtree_t();
+       void init();
+       tagver_t pred(tagver_t i) const;
+       tagver_t elem(tagver_t i) const;
+       const tagver_t *leaves() const;
+       void push(size_t t, tagver_t v);
+       void pop(size_t t);
+       FORBID_COPY(tagtree_t);
+};
+
+} // namespace re2c
+
+#endif // _RE2C_IR_DFA_TAGTREE_
index 3586c45bc5da4b635085b6865d489ea906018a49..7112f08c632cfcd4b95f4e48875efe20077dacb5 100644 (file)
@@ -43,7 +43,7 @@ struct nfa_state_t
                } nil;
        };
        size_t rule;
-       bool loop;
+       uint8_t loop;
 
        void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2)
        {
@@ -51,7 +51,7 @@ struct nfa_state_t
                alt.out1 = s1;
                alt.out2 = s2;
                rule = r;
-               loop = false;
+               loop = 0;
        }
        void make_ran(size_t r, nfa_state_t *s, const Range *p)
        {
@@ -59,7 +59,7 @@ struct nfa_state_t
                ran.out = s;
                ran.ran = p;
                rule = r;
-               loop = false;
+               loop = 0;
        }
        void make_tag(size_t r, nfa_state_t *s, size_t i, bool bottom)
        {
@@ -68,20 +68,20 @@ struct nfa_state_t
                tag.info = i;
                tag.bottom = bottom;
                rule = r;
-               loop = false;
+               loop = 0;
        }
        void make_fin(size_t r)
        {
                type = FIN;
                rule = r;
-               loop = false;
+               loop = 0;
        }
        void make_nil(size_t r, nfa_state_t *s)
        {
                type = NIL;
                nil.out = s;
                rule = r;
-               loop = false;
+               loop = 0;
        }
 };