]> granicus.if.org Git - re2c/commitdiff
Deduplicated leftmost greedy closure implementations in re2c and libre2c.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 4 Mar 2019 17:14:49 +0000 (17:14 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 4 Mar 2019 17:14:49 +0000 (17:14 +0000)
re2c/Makefile.am
re2c/Makefile.lib.am
re2c/lib/regexec_nfa_leftmost.cc
re2c/lib/regexec_nfa_leftmost_trie.cc
re2c/lib/regexec_nfa_posix_trie.cc
re2c/src/dfa/closure.cc
re2c/src/dfa/closure_leftmost.cc [deleted file]
re2c/src/dfa/determinization.h

index d1d9293890eff17bd9a7e64b3dfa908e3af4fab6..9d11ad211d8cb5b0184023519141c15998ea24ad 100644 (file)
@@ -23,6 +23,7 @@ re2c_HDR = \
        src/adfa/action.h \
        src/adfa/adfa.h \
        src/cfg/cfg.h \
+       src/dfa/closure_leftmost.h \
        src/dfa/closure_posix.h \
        src/dfa/determinization.h \
        src/dfa/dfa.h \
@@ -110,7 +111,6 @@ re2c_SRC = \
        src/cfg/rename.cc \
        src/cfg/varalloc.cc \
        src/dfa/closure.cc \
-       src/dfa/closure_leftmost.cc \
        src/dfa/dead_rules.cc \
        src/dfa/determinization.cc \
        src/dfa/fallback_tags.cc \
index 4f44f706821cf68e8e8192085c79ca24ef794b81..69c9c43615d52cbcad942e61d92eeec2b2531fa1 100644 (file)
@@ -20,6 +20,7 @@ libre2c_la_HDR = \
        src/adfa/action.h \
        src/adfa/adfa.h \
        src/cfg/cfg.h \
+       src/dfa/closure_leftmost.h \
        src/dfa/closure_posix.h \
        src/dfa/determinization.h \
        src/dfa/dfa.h \
@@ -97,7 +98,6 @@ libre2c_la_SRC = \
        src/cfg/rename.cc \
        src/cfg/varalloc.cc \
        src/dfa/closure.cc \
-       src/dfa/closure_leftmost.cc \
        src/debug/dump_adfa.cc \
        src/debug/dump_cfg.cc \
        src/debug/dump_dfa.cc \
index 19c49a7eac33db6c31a44aac9f474edd32e44e8a..d261cdfdb56bea56bf4ef9e3fe7849b14c648e83 100644 (file)
@@ -3,6 +3,7 @@
 #include "lib/regex_impl.h"
 #include "src/options/opt.h"
 #include "src/debug/debug.h"
+#include "src/dfa/closure_leftmost.h"
 #include "src/dfa/determinization.h"
 #include "src/nfa/nfa.h"
 
@@ -11,7 +12,6 @@ namespace re2c {
 namespace libre2c {
 
 static void reach_on_symbol(lsimctx_t &, uint32_t);
-static void closure_leftmost(lsimctx_t &);
 static void update_offsets(lsimctx_t &ctx, const conf_t &c);
 
 int regexec_nfa_leftmost(const regex_t *preg, const char *string
@@ -23,14 +23,14 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string
     // root state can be non-core, so we pass zero as origin to avoid checks
     const conf_t c0(ctx.nfa.root, 0, HROOT);
     ctx.reach.push_back(c0);
-    closure_leftmost(ctx);
+    closure_leftmost_dfs(ctx);
 
     for (;;) {
         const uint32_t sym = static_cast<uint8_t>(*ctx.cursor++);
         if (ctx.state.empty() || sym == 0) break;
         reach_on_symbol(ctx, sym);
         ++ctx.step;
-        closure_leftmost(ctx);
+        closure_leftmost_dfs(ctx);
     }
 
     for (cconfiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
@@ -93,40 +93,6 @@ void reach_on_symbol(lsimctx_t &ctx, uint32_t sym)
     ctx.history.init();
 }
 
-void closure_leftmost(lsimctx_t &ctx)
-{
-    confset_t &state = ctx.state, &wl = ctx.reach;
-    state.clear();
-    for (; !wl.empty(); ) {
-
-        const conf_t &x = wl.back();
-        nfa_state_t *n = x.state;
-        const uint32_t o = x.origin;
-        const int32_t h = x.thist;
-        wl.pop_back();
-
-        if (n->clos != NOCLOS) continue;
-
-        n->clos = 0;
-        state.push_back(x);
-
-        switch (n->type) {
-            case nfa_state_t::NIL:
-                wl.push_back(conf_t(n->nil.out, o, h));
-                break;
-            case nfa_state_t::ALT:
-                wl.push_back(conf_t(n->alt.out2, o, h));
-                wl.push_back(conf_t(n->alt.out1, o, h));
-                break;
-            case nfa_state_t::TAG:
-                wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x)));
-                break;
-            default:
-                break;
-        }
-    }
-}
-
 void update_offsets(lsimctx_t &ctx, const conf_t &c)
 {
     const size_t nsub = ctx.nsub;
index 37e37fea320b3f0d86bb05c3bc791cbf67faeb4e..142244051c1310e809b591437dfd9ba76d9990f1 100644 (file)
@@ -3,6 +3,7 @@
 #include "lib/regex_impl.h"
 #include "src/options/opt.h"
 #include "src/debug/debug.h"
+#include "src/dfa/closure_leftmost.h"
 #include "src/dfa/determinization.h"
 #include "src/nfa/nfa.h"
 
@@ -10,8 +11,8 @@
 namespace re2c {
 namespace libre2c {
 
-static void reach_on_symbol(lzsimctx_t &, uint32_t);
-static void closure_leftmost(lzsimctx_t &);
+static void make_step(lzsimctx_t &, uint32_t);
+static void make_final_step(lzsimctx_t &ctx);
 
 int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string
     , size_t nmatch, regmatch_t pmatch[], int)
@@ -22,26 +23,19 @@ int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string
     nfa_state_t *s0 = ctx.nfa.root;
     const conf_t c0(s0, s0->coreid, HROOT);
     ctx.reach.push_back(c0);
-    closure_leftmost(ctx);
-
+    closure_leftmost_dfs(ctx);
     for (;;) {
         const uint32_t sym = static_cast<uint8_t>(*ctx.cursor++);
         if (ctx.state.empty() || sym == 0) break;
-        reach_on_symbol(ctx, sym);
-        ++ctx.step;
-        closure_leftmost(ctx);
-    }
-
-    const confset_t &state = ctx.state;
-    cconfiter_t b = state.begin(), e = state.end(), i;
-    for (i = b; i != e; ++i) {
-        i->state->clos = NOCLOS;
+        make_step(ctx, sym);
+        closure_leftmost_dfs(ctx);
     }
+    make_final_step(ctx);
 
     return finalize(ctx, string, nmatch, pmatch);
 }
 
-void reach_on_symbol(lzsimctx_t &ctx, uint32_t sym)
+void make_step(lzsimctx_t &ctx, uint32_t sym)
 {
     const confset_t &state = ctx.state;
     confset_t &reach = ctx.reach;
@@ -65,44 +59,28 @@ void reach_on_symbol(lzsimctx_t &ctx, uint32_t sym)
                 }
             }
         }
+        else if (s->type == nfa_state_t::FIN) {
+            ctx.marker = ctx.cursor;
+            ctx.hidx = i->thist;
+            ctx.rule = 0;
+        }
     }
+
+    ++ctx.step;
 }
 
-void closure_leftmost(lzsimctx_t &ctx)
+void make_final_step(lzsimctx_t &ctx)
 {
-    confset_t &state = ctx.state, &wl = ctx.reach;
-    state.clear();
-    for (; !wl.empty(); ) {
-
-        const conf_t &x = wl.back();
-        nfa_state_t *n = x.state;
-        const uint32_t o = x.origin;
-        const int32_t h = x.thist;
-        wl.pop_back();
-
-        if (n->clos != NOCLOS) continue;
-
-        n->clos = 0;
-        state.push_back(x);
-
-        switch (n->type) {
-            case nfa_state_t::NIL:
-                wl.push_back(conf_t(n->nil.out, o, h));
-                break;
-            case nfa_state_t::ALT:
-                wl.push_back(conf_t(n->alt.out2, o, h));
-                wl.push_back(conf_t(n->alt.out1, o, h));
-                break;
-            case nfa_state_t::TAG:
-                wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x)));
-                break;
-            case nfa_state_t::RAN:
-                break;
-            case nfa_state_t::FIN:
-                ctx.marker = ctx.cursor + 1;
-                ctx.hidx = x.thist;
-                ctx.rule = 0;
-                break;
+    for (confiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
+        nfa_state_t *s = i->state;
+
+        s->clos = NOCLOS;
+        DASSERT(s->active == 0);
+
+        if (s->type == nfa_state_t::FIN) {
+            ctx.marker = ctx.cursor;
+            ctx.hidx = i->thist;
+            ctx.rule = 0;
         }
     }
 }
index 080b402d7375e606b85a7ac32ce4ce8b6df895a4..577ec10b2ebb457a1eafa04661b7de6e0feb0546 100644 (file)
@@ -1,6 +1,3 @@
-#include <queue>
-#include <stdio.h>
-
 #include "lib/lex.h"
 #include "lib/regex.h"
 #include "lib/regex_impl.h"
index 24a11b1e76d1fa7bec2f45db13b60c08f07e48ff..d9f08aef9442e2db5e2cc97fe72bcc5ca331b2cd 100644 (file)
@@ -10,6 +10,7 @@
 #include "src/options/opt.h"
 #include "src/dfa/determinization.h"
 #include "src/dfa/dfa.h"
+#include "src/dfa/closure_leftmost.h"
 #include "src/dfa/closure_posix.h"
 #include "src/dfa/posix_precedence.h"
 #include "src/dfa/tcmd.h"
diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc
deleted file mode 100644 (file)
index eb50bec..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "src/dfa/determinization.h"
-#include "src/nfa/nfa.h"
-
-
-namespace re2c
-{
-
-void closure_leftmost(ldetctx_t &ctx)
-{
-    closure_t &done = ctx.state, &todo = ctx.reach;
-    done.clear();
-
-    // DFS; linear complexity
-    for (; !todo.empty(); ) {
-        const clos_t &x = todo.back();
-        nfa_state_t *n = x.state;
-        todo.pop_back();
-
-        if (n->clos != NOCLOS) continue;
-
-        n->clos = static_cast<uint32_t>(done.size());
-        done.push_back(x);
-
-        switch (n->type) {
-            case nfa_state_t::NIL:
-                todo.push_back(clos_t(x, n->nil.out));
-                break;
-            case nfa_state_t::ALT:
-                todo.push_back(clos_t(x, n->alt.out2));
-                todo.push_back(clos_t(x, n->alt.out1));
-                break;
-            case nfa_state_t::TAG:
-                todo.push_back(clos_t(x, n->tag.out, ctx.history.link(ctx, x)));
-                break;
-            default:
-                break;
-        }
-    }
-
-    // reset associated closure items
-    // (do this before removing any states from closure)
-    for (clositer_t i = done.begin(); i != done.end(); ++i) {
-        i->state->clos = NOCLOS;
-    }
-}
-
-} // namespace re2c
index 6e8414753b560ea5a1a32b1f63508d17c9d9763f..55e12675b5420213366981c0f4eb88127f11528b 100644 (file)
@@ -18,8 +18,7 @@
 #include "src/util/slab_allocator.h"
 
 
-namespace re2c
-{
+namespace re2c {
 
 // fwd
 struct opt_t;
@@ -190,7 +189,6 @@ typedef determ_context_t<LEFTMOST> ldetctx_t;
 
 template<typename ctx_t> void tagged_epsilon_closure(ctx_t &ctx);
 template<typename ctx_t> void find_state(ctx_t &ctx);
-void closure_leftmost(ldetctx_t &);
 
 inline bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const
 {