Deduplicated algorithm that computes POSIX precedence matrix in re2c and libre2c.

author Ulya Trofimovich <skvadrik@gmail.com>

Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)

committer Ulya Trofimovich <skvadrik@gmail.com>

Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)
author Ulya Trofimovich <skvadrik@gmail.com>
Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)
committer Ulya Trofimovich <skvadrik@gmail.com>
Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)
diff --git a/re2c/Makefile.am b/re2c/Makefile.am

index 2ab0146e773b3a543660e910fe1197a60baaaeb6..46f6d1f4b17599520b6b41f6c12bfb901e6b20f0 100644 (file)
--- a/re2c/Makefile.am
+++ b/re2c/Makefile.am
@@ -25,6 +25,7 @@ re2c_HDR = \
         src/cfg/cfg.h \
         src/dfa/determinization.h \
         src/dfa/dfa.h \
+       src/dfa/posix_precedence.h \
         src/dfa/tag_history.h \
         src/dfa/tagver_table.h \
         src/dfa/tcmd.h \
@@ -116,7 +117,6 @@ re2c_SRC = \
         src/dfa/fillpoints.cc \
         src/dfa/find_state.cc \
         src/dfa/minimization.cc \
-       src/dfa/posix_precedence.cc \
         src/dfa/tagver_table.cc \
         src/dfa/tcmd.cc \
         src/encoding/ebcdic/ebcdic_regexp.cc \
diff --git a/re2c/Makefile.lib.am b/re2c/Makefile.lib.am

index 37e942ebb02f1dbf04e35abceaf6c1f94bdacee9..5501bb110c8ca96e415a6dffe9df9703ee7715a5 100644 (file)
--- a/re2c/Makefile.lib.am
+++ b/re2c/Makefile.lib.am
@@ -22,6 +22,7 @@ libre2c_la_HDR = \
         src/cfg/cfg.h \
         src/dfa/determinization.h \
         src/dfa/dfa.h \
+       src/dfa/posix_precedence.h \
         src/dfa/tag_history.h \
         src/dfa/tagver_table.h \
         src/dfa/tcmd.h \
@@ -108,7 +109,6 @@ libre2c_la_SRC = \
         src/dfa/fillpoints.cc \
         src/dfa/find_state.cc \
         src/dfa/minimization.cc \
-       src/dfa/posix_precedence.cc \
         src/dfa/tagver_table.cc \
         src/dfa/tcmd.cc \
         src/nfa/estimate_size.cc \
diff --git a/re2c/lib/regcomp.cc b/re2c/lib/regcomp.cc

index ce0c8284037255cbd35bc5858a83c72c7b7d352e..27db169735900d701cf886b2dddf814d84430adf 100644 (file)
--- a/re2c/lib/regcomp.cc
+++ b/re2c/lib/regcomp.cc
@@ -47,7 +47,7 @@ int regcomp(regex_t *preg, const char *pattern, int cflags)
  
      dfa_t *dfa = NULL;
      if (cflags & REG_NFA) {
-        preg->simctx = new libre2c::simctx_t(nfa, preg->re_nsub, cflags);
+        preg->simctx = new libre2c::simctx_t(*nfa, preg->re_nsub, cflags);
      }
      else {
          preg->char2class = new size_t[256];
diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h

index 2feab0bdcd2a676ed3e7fbd45fe7d1ff4e655cf9..8295eb311f5f3ded8e035e59ac965daf5184cc05 100644 (file)
--- a/re2c/lib/regex_impl.h
+++ b/re2c/lib/regex_impl.h
@@ -27,14 +27,6 @@ struct conf_t
          : state(s), origin(o), thist(h) {}
  };
  
-struct histleaf_t
-{
-    uint32_t coreid;
-    uint32_t origin;
-    int32_t hidx;
-    int32_t height;
-};
-
  struct ran_or_fin_t
  {
      inline bool operator()(const conf_t &c);
@@ -56,14 +48,20 @@ typedef confset_t::const_reverse_iterator rcconfiter_t;
  
  struct simctx_t
  {
-    const nfa_t *nfa;
+    typedef std::vector<conf_t> confset_t;
+    typedef confset_t::iterator confiter_t;
+    typedef confset_t::const_iterator cconfiter_t;
+    typedef confset_t::reverse_iterator rconfiter_t;
+    typedef confset_t::const_reverse_iterator rcconfiter_t;
+
+    const nfa_t &nfa;
      const size_t nsub;
      const int flags;
  
      confset_t reach;
      confset_t state;
  
-    tag_history_t hist;
+    tag_history_t history;
      int32_t hidx;
  
      uint32_t step;
@@ -76,24 +74,25 @@ struct simctx_t
      regoff_t *offsets1;
      regoff_t *offsets2;
      regoff_t *offsets3;
-
      bool *done;
  
-    int32_t *prectbl1;
-    int32_t *prectbl2;
-    cache_t cache;
+    int32_t *newprectbl;
+    int32_t *oldprectbl;
+    size_t oldprecdim;
      std::vector<histleaf_t> histlevel;
-    std::vector<const conf_t*> sortcores;
+    std::vector<uint32_t> sortcores;
      std::vector<uint32_t> fincount;
      std::vector<int32_t> worklist;
+    cache_t cache;
  
      std::vector<nfa_state_t*> gor1_topsort;
      std::vector<nfa_state_t*> gor1_linear;
      std::vector<nfa_state_t*> gtop_heap_storage;
      cmp_gtop_t gtop_cmp;
      gtop_heap_t gtop_heap;
+    closure_stats_t dc_clstats;
  
-    simctx_t(const nfa_t *nfa, size_t re_nsub, int flags);
+    simctx_t(const nfa_t &nfa, size_t re_nsub, int flags);
      ~simctx_t();
      FORBID_COPY(simctx_t);
  };
diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc

index c90e1c5acfea14d460726350bb7e661e2398e8ff..4ecc73385cf3efed593af4677f87b3374a10844f 100644 (file)
--- a/re2c/lib/regexec.cc
+++ b/re2c/lib/regexec.cc
@@ -40,19 +40,19 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch,
      m->rm_so = 0;
      m->rm_eo = ctx.marker - string - 1;
  
-    const std::vector<Tag> &tags = ctx.nfa->tags;
+    const std::vector<Tag> &tags = ctx.nfa.tags;
      size_t todo = nmatch * 2;
      bool *done = ctx.done;
      memset(done, 0, ctx.nsub * sizeof(bool));
  
      for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) {
-        const tag_history_t::node_t &n = ctx.hist.node(i);
+        const tag_history_t::node_t &n = ctx.history.node(i);
          const Tag &tag = tags[n.info.idx];
          const size_t t = tag.ncap;
          if (!fictive(tag) && t < nmatch * 2 && !done[t]) {
              done[t] = true;
              --todo;
-            const regoff_t off = n.info.neg ? -1 : static_cast<regoff_t>(ctx.hist.node2(i).step);
+            const regoff_t off = n.info.neg ? -1 : static_cast<regoff_t>(ctx.history.node2(i).step);
              m = &pmatch[t / 2 + 1];
              if (t % 2 == 0) {
                  m->rm_so = off;
@@ -67,13 +67,13 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch,
      return 0;
  }
  
-simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags)
+simctx_t::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags)
      : nfa(nfa)
      , nsub(2 * (re_nsub - 1))
      , flags(flags)
      , reach()
      , state()
-    , hist()
+    , history()
      , hidx(HROOT)
      , step(0)
      , rule(Rule::NONE)
@@ -83,22 +83,24 @@ simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags)
      , offsets2(NULL)
      , offsets3(NULL)
      , done(NULL)
-    , prectbl1(NULL)
-    , prectbl2(NULL)
-    , cache()
+    , newprectbl(NULL)
+    , oldprectbl(NULL)
+    , oldprecdim(0)
      , histlevel()
      , sortcores()
      , fincount()
      , worklist()
+    , cache()
      , gor1_topsort()
      , gor1_linear()
      , gtop_heap_storage()
      , gtop_cmp()
      , gtop_heap(gtop_cmp, gtop_heap_storage)
+    , dc_clstats()
  {
      const size_t
-        nstates = nfa->size,
-        ncores = nfa->ncores;
+        nstates = nfa.size,
+        ncores = nfa.ncores;
  
      state.reserve(nstates);
      reach.reserve(nstates);
@@ -111,8 +113,8 @@ simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags)
          offsets3 = new regoff_t[nsub];
      }
      if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
-        prectbl1 = new int32_t[ncores * ncores];
-        prectbl2 = new int32_t[ncores * ncores];
+        newprectbl = new int32_t[ncores * ncores];
+        oldprectbl = new int32_t[ncores * ncores];
          histlevel.reserve(ncores);
          sortcores.reserve(ncores);
          fincount.resize(ncores + 1);
@@ -137,8 +139,8 @@ simctx_t::~simctx_t()
          delete[] offsets3;
      }
      if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
-        delete[] prectbl1;
-        delete[] prectbl2;
+        delete[] newprectbl;
+        delete[] oldprectbl;
      }
  }
  
@@ -146,7 +148,7 @@ void init(simctx_t &ctx, const char *string)
  {
      ctx.reach.clear();
      ctx.state.clear();
-    ctx.hist.init();
+    ctx.history.init();
      ctx.hidx = HROOT;
      ctx.step = 0;
      ctx.rule = Rule::NONE;
diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc

index ede6ba9d59682361264c339ad1f2bc1e47117104..99b4732c2be920fb5a49cd7b894c8d7eabbb9f9a 100644 (file)
--- a/re2c/lib/regexec_nfa_leftmost.cc
+++ b/re2c/lib/regexec_nfa_leftmost.cc
@@ -21,7 +21,7 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string
      init(ctx, string);
  
      // root state can be non-core, so we pass zero as origin to avoid checks
-    const conf_t c0(ctx.nfa->root, 0, HROOT);
+    const conf_t c0(ctx.nfa.root, 0, HROOT);
      ctx.reach.push_back(c0);
      closure_leftmost(ctx);
  
@@ -90,7 +90,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym)
      }
  
      std::swap(ctx.offsets1, ctx.offsets2);
-    ctx.hist.init();
+    ctx.history.init();
  }
  
  void closure_leftmost(simctx_t &ctx)
@@ -120,7 +120,7 @@ void closure_leftmost(simctx_t &ctx)
                  break;
              case nfa_state_t::TAG:
                  wl.push_back(conf_t(n->tag.out, o
-                    , ctx.hist.push(h, n->tag.info)));
+                    , ctx.history.push(h, n->tag.info)));
                  break;
              default:
                  break;
@@ -148,7 +148,7 @@ void update_offsets(simctx_t &ctx, const conf_t &c)
      memset(done, 0, nsub * sizeof(bool));
  
      for (int32_t i = c.thist; i != HROOT; ) {
-        const tag_history_t::node_t &n = ctx.hist.node(i);
+        const tag_history_t::node_t &n = ctx.history.node(i);
          const size_t t = n.info.idx;
          if (!done[t]) {
              done[t] = true;
diff --git a/re2c/lib/regexec_nfa_leftmost_trie.cc b/re2c/lib/regexec_nfa_leftmost_trie.cc

index 1ed3d60514866cc02648dc93bc2ac96a142d47ca..528ce8f432bdeaf9ba9e95960d826216a97d22b4 100644 (file)
--- a/re2c/lib/regexec_nfa_leftmost_trie.cc
+++ b/re2c/lib/regexec_nfa_leftmost_trie.cc
@@ -19,7 +19,7 @@ int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string
      simctx_t &ctx = *preg->simctx;
      init(ctx, string);
  
-    nfa_state_t *s0 = ctx.nfa->root;
+    nfa_state_t *s0 = ctx.nfa.root;
      const conf_t c0(s0, s0->coreid, HROOT);
      ctx.reach.push_back(c0);
      closure_leftmost(ctx);
@@ -95,7 +95,7 @@ void closure_leftmost(simctx_t &ctx)
                  break;
              case nfa_state_t::TAG:
                  wl.push_back(conf_t(n->tag.out, o
-                    , ctx.hist.push2(h, ctx.step, n->tag.info, o)));
+                    , ctx.history.push2(h, ctx.step, n->tag.info, o)));
                  break;
              case nfa_state_t::RAN:
                  break;
diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc

index a9fb3a9736b4abb6a0b6d35e49af5bac43286795..a6f2780d1b1df55aee417c68a31f60002c0c3b4d 100644 (file)
--- a/re2c/lib/regexec_nfa_posix.cc
+++ b/re2c/lib/regexec_nfa_posix.cc
@@ -7,6 +7,7 @@
  #include "src/options/opt.h"
  #include "src/debug/debug.h"
  #include "src/dfa/determinization.h"
+#include "src/dfa/posix_precedence.h"
  #include "src/nfa/nfa.h"
  
  
@@ -27,16 +28,13 @@ template<sssp_alg_t ALG> static int do_regexec(const regex_t *preg, const char *
  template<sssp_alg_t ALG> static void closure_posix(simctx_t &ctx);
  static void make_one_step(simctx_t &, uint32_t);
  static void make_final_step(simctx_t &);
-static void update_offsets(simctx_t &ctx, const conf_t &c);
-static void update_prectbl(simctx_t &);
-static void update_prectbl_naive(simctx_t &ctx);
-static int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y, int32_t &prec1, int32_t &prec2);
+static void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id);
+static void compute_prectbl_naive(simctx_t &ctx);
  
  // we *do* want these to be inlined
  static inline bool scan(simctx_t &ctx, nfa_state_t *q, bool all);
  static inline bool relax_gor1(simctx_t &, const conf_t &);
  static inline void relax_gtop(simctx_t &, const conf_t &);
-static inline int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2);
  
  int regexec_nfa_posix(const regex_t *preg, const char *string
      , size_t nmatch, regmatch_t pmatch[], int eflags)
@@ -54,7 +52,7 @@ int do_regexec(const regex_t *preg, const char *string
      init(ctx, string);
  
      // root state can be non-core, so we pass zero as origin to avoid checks
-    const conf_t c0(ctx.nfa->root, 0, HROOT);
+    const conf_t c0(ctx.nfa.root, 0, HROOT);
      ctx.reach.push_back(c0);
      closure_posix<ALG>(ctx);
      for (;;) {
@@ -89,7 +87,7 @@ int do_regexec(const regex_t *preg, const char *string
  void make_one_step(simctx_t &ctx, uint32_t sym)
  {
      confset_t &state = ctx.state, &reach = ctx.reach;
-    size_t j = 0;
+    uint32_t j = 0;
      reach.clear();
  
      for (cconfiter_t i = state.begin(), e = state.end(); i != e; ++i) {
@@ -102,16 +100,17 @@ void make_one_step(simctx_t &ctx, uint32_t sym)
          if (s->type == nfa_state_t::RAN) {
              for (const Range *r = s->ran.ran; r; r = r->next()) {
                  if (r->lower() <= sym && sym < r->upper()) {
-                    const conf_t c(s->ran.out, s->coreid, HROOT);
+                    const conf_t c(s->ran.out, j, HROOT);
                      reach.push_back(c);
-                    state[j++] = *i;
-                    update_offsets(ctx, *i);
+                    state[j] = *i;
+                    update_offsets(ctx, *i, j);
+                    ++j;
                      break;
                  }
              }
          }
          else if (s->type == nfa_state_t::FIN) {
-            update_offsets(ctx, *i);
+            update_offsets(ctx, *i, NONCORE);
          }
      }
  
@@ -119,13 +118,15 @@ void make_one_step(simctx_t &ctx, uint32_t sym)
      std::swap(ctx.offsets1, ctx.offsets2);
  
      if (!(ctx.flags & REG_SLOWPREC)) {
-        update_prectbl(ctx);
+        compute_prectable(ctx);
      }
      else {
-        update_prectbl_naive(ctx);
+        compute_prectbl_naive(ctx);
      }
+    std::swap(ctx.newprectbl, ctx.oldprectbl);
+    ctx.oldprecdim = j;
  
-    ctx.hist.init();
+    ctx.history.init();
      ++ctx.step;
  }
  
@@ -139,7 +140,7 @@ void make_final_step(simctx_t &ctx)
          DASSERT(s->status == GOR_NOPASS && s->active == 0);
  
          if (s->type == nfa_state_t::FIN) {
-            update_offsets(ctx, *i);
+            update_offsets(ctx, *i, NONCORE);
          }
      }
  }
@@ -203,7 +204,7 @@ inline bool cmp_gor1_t::operator()(const conf_t &x, const conf_t &y) const
  {
      const uint32_t xo = x.origin, yo = y.origin;
      return xo != yo
-        && unpack_leftmost(ctx.prectbl1[xo * ctx.nfa->ncores + yo]) < 0;
+        && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
  }
  
  bool scan(simctx_t &ctx, nfa_state_t *q, bool all)
@@ -234,7 +235,7 @@ bool scan(simctx_t &ctx, nfa_state_t *q, bool all)
          case nfa_state_t::TAG:
              if (q->arcidx == 0) {
                  any |= relax_gor1(ctx, conf_t(q->tag.out, o
-                    , ctx.hist.push1(h, q->tag.info)));
+                    , ctx.history.push1(h, q->tag.info)));
                  ++q->arcidx;
              }
              break;
@@ -310,7 +311,7 @@ void closure_posix<GTOP>(simctx_t &ctx)
                  break;
              case nfa_state_t::TAG:
                  relax_gtop(ctx, conf_t(q->tag.out, o
-                    , ctx.hist.push1(h, q->tag.info)));
+                    , ctx.history.push1(h, q->tag.info)));
                  break;
              default:
                  break;
@@ -343,97 +344,11 @@ void relax_gtop(simctx_t &ctx, const conf_t &c)
      }
  }
  
-int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y
-    , int32_t &prec1, int32_t &prec2)
-{
-    const int32_t idx1 = x.thist, idx2 = y.thist;
-    const uint32_t orig1 = x.origin, orig2 = y.origin;
-
-    if (idx1 == idx2 && orig1 == orig2) {
-        prec1 = prec2 = MAX_RHO;
-        return 0;
-    }
-
-    const std::vector<Tag> &tags = ctx.nfa->tags;
-    tag_history_t &hist = ctx.hist;
-
-    const bool fork_frame = orig1 == orig2;
-    if (fork_frame) {
-        prec1 = prec2 = MAX_RHO;
-    }
-    else {
-        prec1 = unpack_longest(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]);
-        prec2 = unpack_longest(ctx.prectbl1[orig2 * ctx.nfa->ncores + orig1]);
-    }
-
-    tag_info_t info1, info2;
-    int32_t i1 = idx1, i2 = idx2;
-    for (; i1 != i2; ) {
-        if (i1 > i2) {
-            const tag_history_t::node_t &n = hist.node(i1);
-            info1 = n.info;
-            prec1 = std::min(prec1, tags[info1.idx].height);
-            i1 = n.pred;
-        }
-        else {
-            const tag_history_t::node_t &n = hist.node(i2);
-            info2 = n.info;
-            prec2 = std::min(prec2, tags[info2.idx].height);
-            i2 = n.pred;
-        }
-    }
-    if (i1 != HROOT) {
-        DASSERT(fork_frame);
-        const int32_t h = tags[hist.node(i1).info.idx].height;
-        prec1 = std::min(prec1, h);
-        prec2 = std::min(prec2, h);
-    }
-
-    // longest precedence
-    if (prec1 > prec2) return -1;
-    if (prec1 < prec2) return  1;
-
-    // leftmost precedence
-    return fork_frame
-        ? leftprec(ctx, info1, info2, i1 == idx1, i2 == idx2)
-        : unpack_leftmost(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]);
-}
-
-int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2)
-{
-    // equal => not less
-    if (last1 && last2) return 0;
-
-    // shorter => less
-    if (last1) return -1;
-    if (last2) return  1;
-
-    const uint32_t tag1 = info1.idx, tag2 = info2.idx;
-    const bool neg1 = info1.neg, neg2 = info2.neg;
-
-    // can't be both closing
-    DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
-    // closing vs opening: closing wins
-    if (tag1 % 2 == 1) return -1;
-    if (tag2 % 2 == 1) return  1;
-
-    // can't be both negative
-    DASSERT(!(neg1 && neg2));
-
-    // positive vs negative: positive wins
-    if (neg1) return  1;
-    if (neg2) return -1;
-
-    DASSERT(false);
-    return 0;
-}
-
-void update_offsets(simctx_t &ctx, const conf_t &c)
+void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id)
  {
      const size_t nsub = ctx.nsub;
      regoff_t *o;
-    const std::vector<Tag> &tags = ctx.nfa->tags;
+    const std::vector<Tag> &tags = ctx.nfa.tags;
      nfa_state_t *s = c.state;
      bool *done = ctx.done;
  
@@ -443,14 +358,14 @@ void update_offsets(simctx_t &ctx, const conf_t &c)
          o = ctx.offsets3;
      }
      else {
-        o = ctx.offsets1 + s->coreid * nsub;
+        o = ctx.offsets1 + id * nsub;
      }
  
      memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t));
      memset(done, 0, nsub * sizeof(bool));
  
      for (int32_t i = c.thist; i != HROOT; ) {
-        const tag_history_t::node_t &n = ctx.hist.node(i);
+        const tag_history_t::node_t &n = ctx.history.node(i);
          const Tag &tag = tags[n.info.idx];
          const size_t t = tag.ncap;
          regoff_t *off = o + t;
@@ -462,201 +377,26 @@ void update_offsets(simctx_t &ctx, const conf_t &c)
      }
  }
  
-void update_prectbl(simctx_t &ctx)
-{
-    const confset_t &state = ctx.state;
-    const std::vector<Tag> &tags = ctx.nfa->tags;
-    std::vector<const conf_t*> &sortcores = ctx.sortcores;
-    std::vector<uint32_t> &fcount = ctx.fincount;
-    std::vector<int32_t> stack = ctx.worklist;
-    std::vector<histleaf_t> &level = ctx.histlevel;
-    std::vector<histleaf_t>::reverse_iterator li, lj, lk, le;
-    tag_history_t &hist = ctx.hist;
-    const size_t ncores = ctx.nfa->ncores;
-    int32_t *oldtbl = ctx.prectbl1, *newtbl = ctx.prectbl2;
-
-    // Group core configurations by their history tree index, so that later
-    // while traversing the tree we will know at once which configurations
-    // (if any) are bound to the given tree node. We use counting sort, which
-    // requires additional memory, but is fast and conveniently creates an
-    // array of boundaries in the sorted configuration array.
-    uint32_t maxfin = 0;
-    for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
-        uint32_t &x = hist.node1(c->thist).finidx;
-        if (x >= USED) {
-            x = maxfin++;
-            fcount[x] = 0;
-
-            // mark all nodes down to root as used (unless marked already)
-            for (int32_t i = hist.node(c->thist).pred; i >= HROOT; ) {
-                uint32_t &y = hist.node1(i).finidx;
-                if (y <= USED) break;
-                y = USED;
-                i = hist.node(i).pred;
-            }
-        }
-        ++fcount[x];
-    }
-    fcount[maxfin] = 0;
-    for (size_t i = 1; i <= maxfin; ++i) {
-        fcount[i] += fcount[i - 1];
-    }
-    sortcores.resize(state.size());
-    for (rcconfiter_t c = state.rbegin(), e = state.rend(); c != e; ++c) {
-        sortcores[--fcount[hist.node1(c->thist).finidx]] = &*c;
-    }
-
-    // Depth-first traversal of the history tree. During traversal we grow
-    // an array of items (one item per core configuration). Items are added
-    // in tree nodes that have core configurations associated with them.
-    // Each item represents one history. Items have immutable part (core ID,
-    // origin) and mutable part (current minimal height, current tree index)
-    // that changes as we return down the tree.
-    level.clear();
-    stack.push_back(0);
-    while (!stack.empty()) {
-        const int32_t n = stack.back();
-        tag_history_t::node1_t &node = hist.node1(n);
-        const uint32_t fidx = node.finidx;
-
-        if (fidx == NONFIN) {
-            // aborted branch of search tree, don't waste time
-            stack.pop_back();
-            continue;
-        }
-
-        if (node.next != -1) {
-            // start or continue visiting subtrees rooted at this node
-            const tag_history_t::arc_t &arc = hist.arc(node.next);
-            stack.push_back(arc.node);
-            node.next = arc.next;
-            continue;
-        }
-
-        // all subtrees visited, it's time to process this node
-        const int32_t h = n == 0 ? MAX_RHO : tags[hist.node(n).info.idx].height;
-        li = level.rbegin();
-        le = level.rend();
-
-        if (fidx < USED) {
-            // this node has leaf configurations, add them to level
-            for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) {
-                const conf_t *c = sortcores[k];
-                const histleaf_t l = {c->state->coreid, c->origin, HROOT, h};
-                level.push_back(l);
-            }
-
-            // compute precedence for newly added configurations
-            const int32_t p0 = pack(h, 0);
-            for (lj = level.rbegin(); lj != li; ++lj) {
-                for (lk = lj; lk != li; ++lk) {
-                    const uint32_t cj = lj->coreid, ck = lk->coreid;
-                    const uint32_t oj = lj->origin, ok = lk->origin;
-                    const bool fork = n != 0 || oj == ok;
-                    if (fork) {
-                        newtbl[cj * ncores + ck] = p0;
-                        newtbl[ck * ncores + cj] = p0;
-                    }
-                    else {
-                        newtbl[cj * ncores + ck] = oldtbl[oj * ncores + ok];
-                        newtbl[ck * ncores + cj] = oldtbl[ok * ncores + oj];
-                    }
-                }
-            }
-        }
-
-        // Each subtree appended a sequence of items to level. We can find
-        // sequence boundaries by looking at tree index of each item: it is
-        // equal to tree index of the corresponding subtree (except for the
-        // leaf items added at this node; but we know where they start).
-
-        // We must compute precedence for each pair of items from different
-        // sequences (including leaf items added at this node), but not within
-        // sequence boundaries: those histories fork higher up the subtree;
-        // their precedence has already been computed and must not be touched.
-
-        for (int32_t a = node.last; a != -1; ) {
-            const tag_history_t::arc_t &arc = hist.arc(a);
-            a = arc.prev;
-
-            // for all the items of this subtree
-            for (lk = li; li != le && li->hidx == arc.node; ++li) {
-
-                // update height of each item coming from subtree
-                li->height = std::min(li->height, h);
-
-                // for all the level items to the right of this subtree
-                for (lj = level.rbegin(); lj != lk; ++lj) {
-
-                    const uint32_t ci = li->coreid, cj = lj->coreid;
-                    const uint32_t oi = li->origin, oj = lj->origin;
-                    const bool fork = n != 0 || oi == oj;
-                    int32_t p1 = li->height, p2 = lj->height, p;
-
-                    if (!fork) {
-                        p1 = std::min(p1, unpack_longest(oldtbl[oi * ncores + oj]));
-                        p2 = std::min(p2, unpack_longest(oldtbl[oj * ncores + oi]));
-                    }
-
-                    if (p1 > p2) {
-                        p = -1;
-                    }
-                    else if (p1 < p2) {
-                        p = 1;
-                    }
-                    else if (fork) {
-                        const tag_info_t t1 = hist.node(li->hidx).info;
-                        const tag_info_t t2 = hist.node(lj->hidx).info;
-                        p = leftprec(ctx, t1, t2, t1 == NOINFO, t2 == NOINFO);
-                    }
-                    else {
-                        p = unpack_leftmost(oldtbl[oi * ncores + oj]);
-                    }
-
-                    newtbl[ci * ncores + cj] = pack(p1, p);
-                    newtbl[cj * ncores + ci] = pack(p2, -p);
-                }
-            }
-        }
-
-        // finally, downgrade tree index of all subtree items, making their
-        // origins indistinguishable from each other for the previous level
-        for (lj = level.rbegin(); lj != li; ++lj) {
-            lj->hidx = n;
-        }
-
-        stack.pop_back();
-    }
-
-    std::swap(ctx.prectbl1, ctx.prectbl2);
-}
-
  // Old naive algorithm that has cubic complexity in the size of TNFA.
  // Example that exhibits cubic behaviour is ((a?){1,N})*. In this example
  // closure has O(N) states, and the compared histories have O(N) length.
-void update_prectbl_naive(simctx_t &ctx)
+void compute_prectbl_naive(simctx_t &ctx)
  {
      const confset_t &state = ctx.state;
-    const size_t ncores = ctx.nfa->ncores;
-    int32_t *newtbl = ctx.prectbl2;
+    int32_t *newtbl = ctx.newprectbl;
+    const size_t newdim = state.size();
  
      const int32_t p0 = pack(MAX_RHO, 0);
  
-    for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
-        nfa_state_t *s = c->state;
-        DASSERT (s->type == nfa_state_t::RAN);
-        newtbl[s->coreid * ncores + s->coreid] = p0;
-
-        for (cconfiter_t d = c + 1; d != e; ++d) {
-            nfa_state_t *q = d->state;
+    for (uint32_t i = 0; i < newdim; ++i) {
+        newtbl[i * newdim + i] = p0;
+        for (uint32_t j = i + 1; j < newdim; ++j) {
              int32_t prec1, prec2;
-            int32_t prec = precedence(ctx, *c, *d, prec1, prec2);
-            newtbl[s->coreid * ncores + q->coreid] = pack(prec1, prec);
-            newtbl[q->coreid * ncores + s->coreid] = pack(prec2, -prec);
+            int32_t prec = precedence(ctx, state[i], state[j], prec1, prec2);
+            newtbl[i * newdim + j] = pack(prec1, prec);
+            newtbl[j * newdim + i] = pack(prec2, -prec);
          }
      }
-
-    std::swap(ctx.prectbl1, ctx.prectbl2);
  }
  
  } // namespace libre2c
diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc

index 11b4dea1b688fe41df6b0bb8e9b7e0ef671b7b3e..82a8e7111e19bd2546d0d695eeb6525955be1bc0 100644 (file)
--- a/re2c/lib/regexec_nfa_posix_trie.cc
+++ b/re2c/lib/regexec_nfa_posix_trie.cc
@@ -57,7 +57,7 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string
      simctx_t &ctx = *preg->simctx;
      init(ctx, string);
  
-    nfa_state_t *s0 = ctx.nfa->root;
+    nfa_state_t *s0 = ctx.nfa.root;
      const conf_t c0(s0, s0->coreid, HROOT);
      ctx.reach.push_back(c0);
      closure_posix(ctx);
@@ -151,7 +151,7 @@ void closure_posix(simctx_t &ctx)
                  break;
              case nfa_state_t::TAG:
                  relax(ctx, conf_t(q->tag.out, o
-                    , ctx.hist.push2(h, ctx.step, q->tag.info, o)));
+                    , ctx.history.push2(h, ctx.step, q->tag.info, o)));
                  break;
              default:
                  break;
@@ -242,8 +242,8 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2
          return 0;
      }
  
-    const std::vector<Tag> &tags = ctx.nfa->tags;
-    tag_history_t &hist = ctx.hist;
+    const std::vector<Tag> &tags = ctx.nfa.tags;
+    tag_history_t &hist = ctx.history;
  
      int32_t prec = 0;
      prec1 = prec2 = MAX_RHO;
diff --git a/re2c/lib/test.cc b/re2c/lib/test.cc

index 2b33b16bb01a93cd98e60df83091a7222f9e3ed1..9a5c7ec9cf6aaf69b997b02d6a16861edd7fafce 100644 (file)
--- a/re2c/lib/test.cc
+++ b/re2c/lib/test.cc
@@ -511,7 +511,7 @@ static int test_all_posix(int flags)
      T4("(ab|a)(bcd|c)(d|.*)",                "abcd",        0,4, 0,2, 2,3, 3,4);
  
      if (!(flags & REG_NFA)) {
-        T3("((a?){1,100})*", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0,50, 0,50, 49,50);
+        T3("((a?){1,300})*", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0,50, 0,50, 49,50);
      }
      else if (!(flags & REG_SLOWPREC)) {
          T3("((a?){1,1000})*", "aaaa", 0,4, 0,4, 3,4);
diff --git a/re2c/src/debug/dump_dfa.cc b/re2c/src/debug/dump_dfa.cc

index 93da6a82a9dd8f488c186d98af0229ad3b8cb3b5..0eccfa0702e979ed5966808a943f809cf44895cb 100644 (file)
--- a/re2c/src/debug/dump_dfa.cc
+++ b/re2c/src/debug/dump_dfa.cc
@@ -51,14 +51,14 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew)
  {
      if (!debug) return;
  
-    const closure_t &closure = ctx.dc_closure;
+    const closure_t &closure = ctx.state;
      cclositer_t b = closure.begin(), e = closure.end(), c;
      const uint32_t origin = ctx.dc_origin;
      const uint32_t target = ctx.dc_target;
      const uint32_t symbol = ctx.dc_symbol;
-    const dfa_t &dfa = ctx.dc_dfa;
+    const dfa_t &dfa = ctx.dfa;
      const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
-    const tag_history_t &thist = ctx.dc_taghistory;
+    const tag_history_t &thist = ctx.history;
      uint32_t i;
  
      if (target == dfa_t::NIL) return;
@@ -75,7 +75,7 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew)
      i = 0;
      for (c = b; c != e; ++c, ++i) {
          fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
-            i, style, static_cast<uint32_t>(c->state - ctx.dc_nfa.states));
+            i, style, static_cast<uint32_t>(c->state - ctx.nfa.states));
  
          if (c->tvers != ZERO_TAGS) {
              const tagver_t *vers = tvtbl[c->tvers];
@@ -85,8 +85,8 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew)
                  fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
              }
  
-            if (c->tlook != HROOT) {
-                dump_history(dfa, thist, c->tlook);
+            if (c->thist != HROOT) {
+                dump_history(dfa, thist, c->thist);
              }
          }
  
diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc

index 47bb4bcbeb3d27124fcacf446de5398c974c6f0e..c58c22e69400676ec4bb2e47c07c1f63f299d31a 100644 (file)
--- a/re2c/src/dfa/closure.cc
+++ b/re2c/src/dfa/closure.cc
@@ -10,6 +10,7 @@
  #include "src/options/opt.h"
  #include "src/dfa/determinization.h"
  #include "src/dfa/dfa.h"
+#include "src/dfa/posix_precedence.h"
  #include "src/dfa/tcmd.h"
  #include "src/nfa/nfa.h"
  #include "src/regexp/rule.h"
@@ -83,17 +84,17 @@ static bool cmpby_rule_state(const clos_t &, const clos_t &);
  
  void tagged_epsilon_closure(determ_context_t &ctx)
  {
-    closure_t &closure = ctx.dc_closure;
+    closure_t &closure = ctx.state;
  
      // build tagged epsilon-closure of the given set of NFA states
      if (ctx.dc_opts->posix_semantics) {
          closure_posix(ctx);
-        prune(closure, ctx.dc_nfa.rules);
+        prune(closure, ctx.nfa.rules);
          std::sort(closure.begin(), closure.end(), cmpby_rule_state);
-        orders(ctx);
+        compute_prectable(ctx);
      } else {
          closure_leftmost(ctx);
-        prune(closure, ctx.dc_nfa.rules);
+        prune(closure, ctx.nfa.rules);
      }
  
      // see note [the difference between TDFA(0) and TDFA(1)]
@@ -146,22 +147,22 @@ void prune(closure_t &closure, std::valarray<Rule> &rules)
  void lower_lookahead_to_transition(closure_t &closure)
  {
      for (clositer_t c = closure.begin(); c != closure.end(); ++c) {
-        c->ttran = c->tlook;
-        c->tlook = HROOT;
+        c->ttran = c->thist;
+        c->thist = HROOT;
      }
  }
  
  
  void generate_versions(determ_context_t &ctx)
  {
-    dfa_t &dfa = ctx.dc_dfa;
+    dfa_t &dfa = ctx.dfa;
      const std::vector<Tag> &tags = dfa.tags;
      const size_t ntag = tags.size();
      tagver_t &maxver = dfa.maxtagver;
      tagver_table_t &tvtbl = ctx.dc_tagvertbl;
      tagver_t *vers = tvtbl.buffer;
-    closure_t &clos = ctx.dc_closure;
-    tag_history_t &thist = ctx.dc_taghistory;
+    closure_t &clos = ctx.state;
+    tag_history_t &thist = ctx.history;
      newvers_t &newvers = ctx.dc_newvers;
  
      clositer_t b = clos.begin(), e = clos.end(), c;
@@ -173,7 +174,7 @@ void generate_versions(determ_context_t &ctx)
      // normal transition, however absolute value should be unique
      // among all versions of all tags)
      for (c = b; c != e; ++c) {
-        const hidx_t l = c->tlook, h = c->ttran;
+        const hidx_t l = c->thist, h = c->ttran;
          if (h == HROOT) continue;
  
          const tagver_t *vs = tvtbl[c->tvers];
diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc

index 53180159e0189a65df348e38c40367ea3f75356a..418258828b54ac2c6cbc258f11371e85d8364341 100644 (file)
--- a/re2c/src/dfa/closure_leftmost.cc
+++ b/re2c/src/dfa/closure_leftmost.cc
@@ -8,7 +8,7 @@ namespace re2c
  void closure_leftmost(determ_context_t &ctx)
  {
      const closure_t &init = ctx.dc_reached;
-    closure_t &done = ctx.dc_closure;
+    closure_t &done = ctx.state;
      std::stack<clos_t> &todo = ctx.dc_stack_dfs;
  
      // enqueue all initial states
@@ -40,7 +40,7 @@ void closure_leftmost(determ_context_t &ctx)
                      break;
                  case nfa_state_t::TAG:
                      x.state = n->tag.out;
-                    x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info);
+                    x.thist = ctx.history.push(x.thist, n->tag.info);
                      todo.push(x);
                      break;
                  case nfa_state_t::RAN:
diff --git a/re2c/src/dfa/closure_posix.cc b/re2c/src/dfa/closure_posix.cc

index 6c2d309e094e0437836f85c273e4a56f9d024c48..81b7536eb79a64a71db5d46812cdc85b84f87ade 100644 (file)
--- a/re2c/src/dfa/closure_posix.cc
+++ b/re2c/src/dfa/closure_posix.cc
@@ -1,6 +1,7 @@
  #include <queue>
  
  #include "src/dfa/determinization.h"
+#include "src/dfa/posix_precedence.h"
  #include "src/nfa/nfa.h"
  
  
@@ -36,7 +37,7 @@ void closure_posix(determ_context_t &ctx)
  {
      DRESET_CLSTATS(ctx);
  
-    ctx.dc_taghistory.detach();
+    ctx.history.detach();
  
      switch (ctx.dc_opts->posix_closure) {
          case POSIX_CLOSURE_GOR1: closure_posix_gor1(ctx); break;
@@ -47,7 +48,7 @@ void closure_posix(determ_context_t &ctx)
      DDUMP_CLSTATS(ctx);
  
      // cleanup
-    closure_t &cl = ctx.dc_closure;
+    closure_t &cl = ctx.state;
      for (clositer_t i = cl.begin(); i != cl.end(); ++i) {
          nfa_state_t *q = i->state;
          q->clos = NOCLOS;
@@ -71,7 +72,7 @@ void closure_posix(determ_context_t &ctx)
   */
  void closure_posix_gor1(determ_context_t &ctx)
  {
-    closure_t &state = ctx.dc_closure, &reach = ctx.dc_reached;
+    closure_t &state = ctx.state, &reach = ctx.dc_reached;
      std::vector<nfa_state_t*>
          &topsort = ctx.dc_gor1_topsort,
          &linear = ctx.dc_gor1_linear;
@@ -129,18 +130,16 @@ inline cmp_gor1_t::cmp_gor1_t(determ_context_t &ctx) : ctx(ctx) {}
  
  inline bool cmp_gor1_t::operator()(const clos_t &x, const clos_t &y) const
  {
-    const uint32_t xo = x.origin, yo = y.origin;
-    if (xo == yo) return false;
-
      // if longest components differ, leftmost already incorporates that
-    const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
-    return unpack_leftmost(k->prectbl[xo * k->size + yo]) < 0;
+    const uint32_t xo = x.origin, yo = y.origin;
+    return xo != yo
+        && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
  }
  
  bool scan(determ_context_t &ctx, nfa_state_t *q, bool all)
  {
      bool any = false;
-    clos_t x = ctx.dc_closure[q->clos];
+    clos_t x = ctx.state[q->clos];
      switch (q->type) {
          case nfa_state_t::NIL:
              if (q->arcidx == 0) {
@@ -164,7 +163,7 @@ bool scan(determ_context_t &ctx, nfa_state_t *q, bool all)
          case nfa_state_t::TAG:
              if (q->arcidx == 0) {
                  x.state = q->tag.out;
-                x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info);
+                x.thist = ctx.history.push1(x.thist, q->tag.info);
                  any |= relax_gor1(ctx, x);
                  ++q->arcidx;
              }
@@ -177,7 +176,7 @@ bool scan(determ_context_t &ctx, nfa_state_t *q, bool all)
  
  bool relax_gor1(determ_context_t &ctx, const clos_t &x)
  {
-    closure_t &state = ctx.dc_closure;
+    closure_t &state = ctx.state;
      nfa_state_t *q = x.state;
      const uint32_t idx = q->clos;
      int32_t p1, p2;
@@ -234,7 +233,7 @@ bool relax_gor1(determ_context_t &ctx, const clos_t &x)
  void closure_posix_gtop(determ_context_t &ctx)
  {
      const closure_t &reach = ctx.dc_reached;
-    closure_t &state = ctx.dc_closure;
+    closure_t &state = ctx.state;
      gtop_heap_t &heap = ctx.dc_gtop_heap;
  
      state.clear();
@@ -248,7 +247,7 @@ void closure_posix_gtop(determ_context_t &ctx)
          q->active = 0;
          DINCCOUNT_CLSCANS(ctx);
  
-        clos_t x = ctx.dc_closure[q->clos];
+        clos_t x = ctx.state[q->clos];
          switch (q->type) {
              case nfa_state_t::NIL:
                  x.state = q->nil.out;
@@ -262,7 +261,7 @@ void closure_posix_gtop(determ_context_t &ctx)
                  break;
              case nfa_state_t::TAG:
                  x.state = q->tag.out;
-                x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info);
+                x.thist = ctx.history.push1(x.thist, q->tag.info);
                  relax_gtop(ctx, x);
                  break;
              default:
@@ -273,7 +272,7 @@ void closure_posix_gtop(determ_context_t &ctx)
  
  void relax_gtop(determ_context_t &ctx, const clos_t &c)
  {
-    closure_t &state = ctx.dc_closure;
+    closure_t &state = ctx.state;
      nfa_state_t *q = c.state;
      const uint32_t idx = q->clos;
      int32_t p1, p2;
@@ -296,23 +295,4 @@ void relax_gtop(determ_context_t &ctx, const clos_t &c)
      }
  }
  
-void orders(determ_context_t &ctx)
-{
-    closure_t &closure = ctx.dc_closure;
-    const size_t nclos = closure.size();
-
-    prectable_t *prectbl = ctx.dc_prectbl;
-    static const int32_t P0 = pack(MAX_RHO, 0);
-
-    for (size_t i = 0; i < nclos; ++i) {
-        for (size_t j = i + 1; j < nclos; ++j) {
-            int32_t rho1, rho2, l;
-            l = precedence (ctx, closure[i], closure[j], rho1, rho2);
-            prectbl[i * nclos + j] = pack(rho1, l);
-            prectbl[j * nclos + i] = pack(rho2, -l);
-        }
-        prectbl[i * nclos + i] = P0;
-    }
-}
-
  } // namespace re2c
diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc

index 244504a16e9847c8e47c160600555a54da43447d..981133899515940eaadfee8277ac973ff162a9a2 100644 (file)
--- a/re2c/src/dfa/determinization.cc
+++ b/re2c/src/dfa/determinization.cc
@@ -23,7 +23,7 @@ namespace re2c
  {
  
  static void clear_caches(determ_context_t &ctx);
-static void reach_on_symbol(determ_context_t &);
+static void reach_on_symbol(determ_context_t &ctx, uint32_t sym);
  static nfa_state_t *transition(nfa_state_t *, uint32_t);
  static uint32_t init_tag_versions(determ_context_t &);
  static void warn_nondeterministic_tags(const determ_context_t &);
@@ -64,9 +64,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond
          clear_caches(ctx);
  
          for (uint32_t c = 0; c < nchars; ++c) {
-            ctx.dc_symbol = c;
-
-            reach_on_symbol(ctx);
+            reach_on_symbol(ctx, c);
              tagged_epsilon_closure(ctx);
              find_state(ctx);
          }
@@ -80,24 +78,29 @@ void clear_caches(determ_context_t &ctx)
  {
      ctx.dc_newvers.clear();
  
-    const size_t ntags = ctx.dc_nfa.tags.size();
+    const size_t ntags = ctx.nfa.tags.size();
      for (size_t t = 0; t < ntags; ++t) {
          ctx.dc_hc_caches[t].clear();
      }
  }
  
  
-void reach_on_symbol(determ_context_t &ctx)
+void reach_on_symbol(determ_context_t &ctx, uint32_t sym)
  {
+    ctx.dc_symbol = sym;
+    const uint32_t symbol = ctx.dfa.charset[ctx.dc_symbol];
+
      const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
-    closure_t &reached = ctx.dc_reached;
-    const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol];
+    ctx.oldprectbl = kernel->prectbl;
+    ctx.oldprecdim = kernel->size;
  
+    closure_t &reached = ctx.dc_reached;
      reached.clear();
+
      for (uint32_t i = 0; i < kernel->size; ++i) {
          nfa_state_t *s = transition(kernel->state[i], symbol);
          if (s) {
-            clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT};
+            clos_t c = {s, i, kernel->tvers[i], kernel->thist[i], HROOT};
              reached.push_back(c);
          }
      }
@@ -120,7 +123,7 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
  
  uint32_t init_tag_versions(determ_context_t &ctx)
  {
-    dfa_t &dfa = ctx.dc_dfa;
+    dfa_t &dfa = ctx.dfa;
      const size_t ntags = dfa.tags.size();
  
      // all-zero tag configuration must have static number zero
@@ -164,8 +167,8 @@ void warn_nondeterministic_tags(const determ_context_t &ctx)
  
      Warn &warn = ctx.dc_msg.warn;
      const kernels_t &kernels = ctx.dc_kernels;
-    const std::vector<Tag> &tags = ctx.dc_dfa.tags;
-    const std::valarray<Rule> &rules = ctx.dc_dfa.rules;
+    const std::vector<Tag> &tags = ctx.dfa.tags;
+    const std::valarray<Rule> &rules = ctx.dfa.rules;
  
      const size_t
          ntag = tags.size(),
@@ -214,18 +217,17 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg
      : dc_opts(opts)
      , dc_msg(msg)
      , dc_condname(condname)
-    , dc_nfa(nfa)
-    , dc_dfa(dfa)
+    , nfa(nfa)
+    , dfa(dfa)
      , dc_allocator()
      , dc_origin(dfa_t::NIL)
      , dc_target(dfa_t::NIL)
      , dc_symbol(0)
      , dc_actions(NULL)
      , dc_reached()
-    , dc_closure()
-    , dc_prectbl()
+    , state()
      , dc_tagvertbl(nfa.tags.size())
-    , dc_taghistory()
+    , history()
      , dc_kernels()
      , dc_buffers(dc_allocator)
      , dc_stack_dfs()
@@ -235,11 +237,18 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg
      , dc_gtop_cmp()
      , dc_gtop_heap(dc_gtop_cmp, dc_gtop_buffer)
      , dc_hc_caches()
-    , dc_newvers(newver_cmp_t(dc_taghistory, dc_hc_caches))
+    , dc_newvers(newver_cmp_t(history, dc_hc_caches))
      , dc_path1()
      , dc_path2()
      , dc_path3()
      , dc_tagcount()
+    , newprectbl(NULL)
+    , oldprectbl(NULL)
+    , oldprecdim(0)
+    , histlevel()
+    , sortcores()
+    , fincount()
+    , worklist()
      , dc_dump(opts)
      , dc_clstats()
  {
@@ -254,7 +263,11 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg
      dc_tagcount.resize(ntags);
  
      if (opts->posix_semantics) {
-        dc_prectbl = new prectable_t[ncores * ncores];
+        newprectbl = new prectable_t[ncores * ncores];
+        histlevel.reserve(ncores);
+        sortcores.reserve(ncores);
+        fincount.resize(ncores + 1);
+        worklist.reserve(nstates);
      }
  
      if (opts->posix_closure == POSIX_CLOSURE_GTOP) {
@@ -269,7 +282,7 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg
  
  determ_context_t::~determ_context_t()
  {
-    delete[] dc_prectbl;
+    delete[] newprectbl;
  }
  
  
diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h

index cca596453212a8a267144c9cab5afe443ba239b1..204336dabfa344331197afaec43374324aca5fe8 100644 (file)
--- a/re2c/src/dfa/determinization.h
+++ b/re2c/src/dfa/determinization.h
@@ -29,6 +29,7 @@ struct tcmd_t;
  
  
  typedef slab_allocator_t<1024 * 1024, sizeof(void*)> allocator_t;
+typedef int32_t prectable_t;
  
  
  struct clos_t
@@ -37,7 +38,7 @@ struct clos_t
      uint32_t origin;
      uint32_t tvers; // vector of tag versions (including lookahead tags)
      hidx_t ttran; // history of transition tags
-    hidx_t tlook; // history of lookahead tags
+    hidx_t thist; // history of lookahead tags
  
      static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
      static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
@@ -80,7 +81,7 @@ struct kernel_t
      const prectable_t *prectbl;
      nfa_state_t **state;
      uint32_t *tvers; // tag versions
-    hidx_t *tlook; // lookahead tags
+    hidx_t *thist; // lookahead tags
  
      FORBID_COPY(kernel_t);
  };
@@ -109,6 +110,15 @@ struct cmp_gtop_t
  };
  
  
+struct histleaf_t
+{
+    uint32_t coreid;
+    uint32_t origin;
+    int32_t hidx;
+    int32_t height;
+};
+
+
  typedef lookup_t<const kernel_t*> kernels_t;
  typedef std::priority_queue<nfa_state_t*, std::vector<nfa_state_t*>
      , cmp_gtop_t> gtop_heap_t;
@@ -116,14 +126,20 @@ typedef std::priority_queue<nfa_state_t*, std::vector<nfa_state_t*>
  
  struct determ_context_t
  {
+    typedef std::vector<clos_t> confset_t;
+    typedef confset_t::iterator confiter_t;
+    typedef confset_t::const_iterator cconfiter_t;
+    typedef confset_t::reverse_iterator rconfiter_t;
+    typedef confset_t::const_reverse_iterator rcconfiter_t;
+
      // determinization input
      const opt_t               *dc_opts;        // options
      Msg                       &dc_msg;         // error messages and warnings
      const std::string         &dc_condname;    // the name of current condition (with -c)
-    const nfa_t               &dc_nfa;         // TNFA
+    const nfa_t               &nfa;            // TNFA
  
      // determinization output
-    dfa_t                     &dc_dfa;         // resulting TDFA
+    dfa_t                     &dfa;            // resulting TDFA
  
      // temporary structures used by determinization
      allocator_t               dc_allocator;
@@ -132,10 +148,9 @@ struct determ_context_t
      uint32_t                  dc_symbol;       // alphabet symbol of the current transition
      tcmd_t                   *dc_actions;      // tag actions of the current transition
      closure_t                 dc_reached;
-    closure_t                 dc_closure;
-    prectable_t              *dc_prectbl;      // precedence table for Okui POSIX disambiguation
+    closure_t                 state;
      tagver_table_t            dc_tagvertbl;
-    tag_history_t             dc_taghistory;   // prefix trie of tag histories
+    tag_history_t             history;         // prefix trie of tag histories
      kernels_t                 dc_kernels;      // TDFA states under construction
      kernel_buffers_t          dc_buffers;
      std::stack<clos_t>        dc_stack_dfs;    // stack used for DFS in leftmost greedy closure
@@ -151,6 +166,15 @@ struct determ_context_t
      tag_path_t                dc_path3;        // buffer 3 for tag history
      std::vector<uint32_t>     dc_tagcount;     // buffer for counting sort on tag history
  
+    // precedence table and auxilary data for POSIX disambiguation
+    int32_t *newprectbl;
+    const int32_t *oldprectbl;
+    size_t oldprecdim;
+    std::vector<histleaf_t> histlevel;
+    std::vector<uint32_t> sortcores;
+    std::vector<uint32_t> fincount;
+    std::vector<int32_t> worklist;
+
      // debug
      dump_dfa_t               dc_dump;
      closure_stats_t          dc_clstats;
@@ -166,46 +190,13 @@ static const int32_t MAX_RHO = 0x1fffFFFF;
  void tagged_epsilon_closure(determ_context_t &ctx);
  void closure_posix(determ_context_t &);
  void closure_leftmost(determ_context_t &);
-void orders(determ_context_t &);
  void find_state(determ_context_t &ctx);
-int32_t precedence(determ_context_t &, const clos_t &, const clos_t &, int32_t &, int32_t &);
-int32_t unpack_longest(int32_t);
-int32_t unpack_leftmost(int32_t);
-int32_t pack(int32_t, int32_t);
  
  bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const
  {
      return x->topord < y->topord;
  }
  
-inline int32_t unpack_longest(int32_t packed)
-{
-    // take lower 30 bits and sign-extend
-    return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
-}
-
-inline int32_t unpack_leftmost(int32_t packed)
-{
-    // take higher 2 bits and sign-extend
-    return packed >> 30u;
-}
-
-inline int32_t pack(int32_t longest, int32_t leftmost)
-{
-    // avoid signed overflows by using unsigned arithmetics
-    uint32_t u_longest = static_cast<uint32_t>(longest);
-    uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
-
-    // leftmost: higher 2 bits, longest: lower 30 bits
-    uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
-    int32_t packed = static_cast<int32_t>(u_packed);
-
-    DASSERT(unpack_longest(packed) == longest
-        && unpack_leftmost(packed) == leftmost);
-
-    return packed;
-}
-
  } // namespace re2c
  
  #endif // _RE2C_DFA_DETERMINIZATION_
diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc

index cd6791c8479da734c124b7418683e9c80938fa00..234d69421a86276d0c8e844abbd8bae9efba556c 100644 (file)
--- a/re2c/src/dfa/find_state.cc
+++ b/re2c/src/dfa/find_state.cc
@@ -103,7 +103,7 @@ static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin);
  
  void find_state(determ_context_t &ctx)
  {
-    dfa_t &dfa = ctx.dc_dfa;
+    dfa_t &dfa = ctx.dfa;
  
      // find or add the new state in the existing set of states
      const bool is_new = do_find_state(ctx);
@@ -116,8 +116,8 @@ void find_state(determ_context_t &ctx)
          // check if the new state is final
          // see note [at most one final item per closure]
          cclositer_t
-            b = ctx.dc_closure.begin(),
-            e = ctx.dc_closure.end(),
+            b = ctx.state.begin(),
+            e = ctx.state.end(),
              f = std::find_if(b, e, clos_t::fin);
          if (f != e) {
              t->tcmd[dfa.nchars] = final_actions(ctx, *f);
@@ -142,7 +142,7 @@ void find_state(determ_context_t &ctx)
  bool do_find_state(determ_context_t &ctx)
  {
      kernels_t &kernels = ctx.dc_kernels;
-    const closure_t &closure = ctx.dc_closure;
+    const closure_t &closure = ctx.state;
  
      // empty closure corresponds to default state
      if (closure.size() == 0) {
@@ -156,7 +156,7 @@ bool do_find_state(determ_context_t &ctx)
      kernel_t *k = ctx.dc_buffers.kernel;
  
      // copy closure to buffer kernel
-    copy_to_buffer_kernel(closure, ctx.dc_prectbl, k);
+    copy_to_buffer_kernel(closure, ctx.newprectbl, k);
  
      // hash "static" part of the kernel
      const uint32_t hash = hash_kernel(k);
@@ -181,11 +181,11 @@ bool do_find_state(determ_context_t &ctx)
  
  tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin)
  {
-    dfa_t &dfa = ctx.dc_dfa;
+    dfa_t &dfa = ctx.dfa;
      const Rule &rule = dfa.rules[fin.state->rule];
      const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers];
-    const hidx_t look = fin.tlook;
-    const tag_history_t &thist = ctx.dc_taghistory;
+    const hidx_t look = fin.thist;
+    const tag_history_t &thist = ctx.history;
      tcpool_t &tcpool = dfa.tcpool;
      tcmd_t *copy = NULL, *save = NULL, **p;
  
@@ -234,7 +234,7 @@ kernel_t *make_new_kernel(size_t size, allocator_t &alc)
      k->prectbl = NULL;
      k->state = alc.alloct<nfa_state_t*>(size);
      k->tvers = alc.alloct<uint32_t>(size);
-    k->tlook = alc.alloct<hidx_t>(size);
+    k->thist = alc.alloct<hidx_t>(size);
      return k;
  }
  
@@ -247,7 +247,7 @@ kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc)
  
      memcpy(k->state, kernel->state, n * sizeof(void*));
      memcpy(k->tvers, kernel->tvers, n * sizeof(uint32_t));
-    memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t));
+    memcpy(k->thist, kernel->thist, n * sizeof(hidx_t));
  
      prectable_t *ptbl = NULL;
      if (kernel->prectbl) {
@@ -292,7 +292,7 @@ void copy_to_buffer_kernel(const closure_t &closure,
          const clos_t &c = closure[i];
          buffer->state[i] = c.state;
          buffer->tvers[i] = c.tvers;
-        buffer->tlook[i] = c.tlook;
+        buffer->thist[i] = c.thist;
      }
  }
  
@@ -301,8 +301,8 @@ void reserve_buffers(determ_context_t &ctx)
  {
      kernel_buffers_t &kbufs = ctx.dc_buffers;
      allocator_t &alc = ctx.dc_allocator;
-    const tagver_t maxver = ctx.dc_dfa.maxtagver;
-    const size_t nkern = ctx.dc_closure.size();
+    const tagver_t maxver = ctx.dfa.maxtagver;
+    const size_t nkern = ctx.state.size();
  
      if (kbufs.maxsize < nkern) {
          kbufs.maxsize = nkern * 2; // in advance
@@ -344,16 +344,16 @@ bool equal_lookahead_tags(determ_context_t &ctx
  {
      DASSERT(x->size == y->size);
  
-    if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
+    if (memcmp(x->thist, y->thist, x->size * sizeof(hidx_t)) == 0) {
          return true;
      }
  
-    tag_history_t &thist = ctx.dc_taghistory;
+    tag_history_t &thist = ctx.history;
      tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2, &p3 = ctx.dc_path3;
      std::vector<uint32_t> &count = ctx.dc_tagcount;
  
      for (size_t i = 0; i < x->size; ++i) {
-        const hidx_t xl = x->tlook[i], yl = y->tlook[i];
+        const hidx_t xl = x->thist[i], yl = y->thist[i];
  
          if (xl == yl) continue;
  
@@ -436,7 +436,7 @@ bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y)
          && equal_lookahead_tags(ctx, x, y);
      if (!compatible) return false;
  
-    const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+    const std::vector<Tag> &tags = ctx.dfa.tags;
      const size_t ntag = tags.size();
      kernel_buffers_t &bufs = ctx.dc_buffers;
      tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max;
@@ -450,12 +450,12 @@ bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y)
          const tagver_t
              *xvs = ctx.dc_tagvertbl[x->tvers[i]],
              *yvs = ctx.dc_tagvertbl[y->tvers[i]];
-        const hidx_t xl = x->tlook[i];
+        const hidx_t xl = x->thist[i];
  
          for (size_t t = 0; t < ntag; ++t) {
              // see note [mapping ignores items with lookahead tags]
              if (!history(tags[t])
-                && ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO) continue;
+                && ctx.history.last(xl, t) != TAGVER_ZERO) continue;
  
              const tagver_t xv = xvs[t], yv = yvs[t];
              tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv];
@@ -495,7 +495,7 @@ bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y)
          const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv);
          if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) {
              DASSERT(axv != ayv);
-            copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv);
+            copy = ctx.dfa.tcpool.make_copy(copy, axv, ayv);
          }
      }
  
diff --git a/re2c/src/dfa/posix_precedence.cc b/re2c/src/dfa/posix_precedence.cc

deleted file mode 100644 (file)

index e9403e6..0000000
--- a/re2c/src/dfa/posix_precedence.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-#include <stdlib.h>
-
-#include "src/debug/debug.h"
-#include "src/dfa/determinization.h"
-#include "src/dfa/tag_history.h"
-
-
-namespace re2c
-{
-
-int32_t precedence(determ_context_t &ctx, const clos_t &x, const clos_t &y
-    , int32_t &prec1, int32_t &prec2)
-{
-    const int32_t idx1 = x.tlook, idx2 = y.tlook;
-    const uint32_t orig1 = x.origin, orig2 = y.origin;
-    int prec = 0;
-
-    if (idx1 == idx2 && orig1 == orig2) {
-        prec1 = prec2 = MAX_RHO;
-        return prec;
-    }
-
-    const std::vector<Tag> &tags = ctx.dc_dfa.tags;
-    tag_history_t &hist = ctx.dc_taghistory;
-
-    const bool fork_frame = orig1 == orig2;
-    if (fork_frame) {
-        prec1 = prec2 = MAX_RHO;
-    }
-    else {
-        const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
-        prec = unpack_leftmost(k->prectbl[orig1 * k->size + orig2]);
-        prec1 = unpack_longest(k->prectbl[orig1 * k->size + orig2]);
-        prec2 = unpack_longest(k->prectbl[orig2 * k->size + orig1]);
-    }
-
-    tag_info_t info1, info2;
-    int32_t i1 = idx1, i2 = idx2;
-    for (; i1 != i2; ) {
-        if (i1 > i2) {
-            const tag_history_t::node_t &n = hist.node(i1);
-            info1 = n.info;
-            prec1 = std::min(prec1, tags[info1.idx].height);
-            i1 = n.pred;
-        }
-        else {
-            const tag_history_t::node_t &n = hist.node(i2);
-            info2 = n.info;
-            prec2 = std::min(prec2, tags[info2.idx].height);
-            i2 = n.pred;
-        }
-        DINCCOUNT_CLLENGTH(ctx, 1);
-    }
-    if (i1 != HROOT) {
-        DASSERT(fork_frame);
-        const int32_t h = tags[hist.node(i1).info.idx].height;
-        prec1 = std::min(prec1, h);
-        prec2 = std::min(prec2, h);
-    }
-    DINCCOUNT_CLPREC(ctx);
-
-    // longest precedence
-    if (prec1 > prec2) return -1;
-    if (prec1 < prec2) return  1;
-
-    // leftmost precedence
-    if (fork_frame) {
-        // equal => not less
-        if (i1 == idx1 && i2 == idx2) return 0;
-
-        // shorter => less
-        if (i1 == idx1) return -1;
-        if (i2 == idx2) return  1;
-
-        const uint32_t tag1 = info1.idx, tag2 = info2.idx;
-        const bool neg1 = info1.neg, neg2 = info2.neg;
-
-        // can't be both closing
-        DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
-        // closing vs opening: closing wins
-        if (tag1 % 2 == 1) return -1;
-        if (tag2 % 2 == 1) return  1;
-
-        // can't be both negative
-        DASSERT(!(neg1 && neg2));
-
-        // positive vs negative: positive wins
-        if (neg1) return  1;
-        if (neg2) return -1;
-
-        // positive vs positive: smaller wins
-        // (this case is only possible because multiple
-        // top-level RE don't have proper negative tags)
-        if (tag1 < tag2) return -1;
-        if (tag1 > tag2) return  1;
-
-        DASSERT(false); // unreachable
-    }
-    return prec;
-}
-
-} // namespace re2c
diff --git a/re2c/src/dfa/posix_precedence.h b/re2c/src/dfa/posix_precedence.h

new file mode 100644 (file)

index 0000000..607b62c
--- /dev/null
+++ b/re2c/src/dfa/posix_precedence.h
@@ -0,0 +1,308 @@
+#ifndef _RE2C_DFA_POSIX_PRECEDENCE_
+#define _RE2C_DFA_POSIX_PRECEDENCE_
+
+#include "src/dfa/tag_history.h"
+#include "src/debug/debug.h"
+
+
+namespace re2c {
+
+inline int32_t unpack_longest(int32_t packed)
+{
+    // take lower 30 bits and sign-extend
+    return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
+}
+
+inline int32_t unpack_leftmost(int32_t packed)
+{
+    // take higher 2 bits and sign-extend
+    return packed >> 30u;
+}
+
+inline int32_t pack(int32_t longest, int32_t leftmost)
+{
+    // avoid signed overflows by using unsigned arithmetics
+    uint32_t u_longest = static_cast<uint32_t>(longest);
+    uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
+
+    // leftmost: higher 2 bits, longest: lower 30 bits
+    uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
+    int32_t packed = static_cast<int32_t>(u_packed);
+
+    DASSERT(unpack_longest(packed) == longest
+        && unpack_leftmost(packed) == leftmost);
+
+    return packed;
+}
+
+inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2)
+{
+    // equal => not less
+    if (last1 && last2) return 0;
+
+    // shorter => less
+    if (last1) return -1;
+    if (last2) return  1;
+
+    const uint32_t tag1 = info1.idx, tag2 = info2.idx;
+    const bool neg1 = info1.neg, neg2 = info2.neg;
+
+    // can't be both closing
+    DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
+
+    // closing vs opening: closing wins
+    if (tag1 % 2 == 1) return -1;
+    if (tag2 % 2 == 1) return  1;
+
+    // can't be both negative
+    DASSERT(!(neg1 && neg2));
+
+    // positive vs negative: positive wins
+    if (neg1) return  1;
+    if (neg2) return -1;
+
+    // positive vs positive: smaller wins
+    // (this case is only possible because multiple
+    // top-level RE don't have proper negative tags)
+    if (tag1 < tag2) return -1;
+    if (tag1 > tag2) return  1;
+
+    DASSERT(false);
+    return 0;
+}
+
+template<typename ctx_t, typename conf_t>
+int32_t precedence(ctx_t &ctx, const conf_t &x, const conf_t &y
+    , int32_t &prec1, int32_t &prec2)
+{
+    prec1 = prec2 = MAX_RHO;
+    int32_t prec = 0;
+
+    const int32_t idx1 = x.thist, idx2 = y.thist;
+    const uint32_t orig1 = x.origin, orig2 = y.origin;
+
+    if (idx1 == idx2 && orig1 == orig2) {
+        return 0;
+    }
+
+    const std::vector<Tag> &tags = ctx.nfa.tags;
+    tag_history_t &hist = ctx.history;
+
+    const bool fork_frame = orig1 == orig2;
+    if (!fork_frame) {
+        prec = unpack_leftmost(ctx.oldprectbl[orig1 * ctx.oldprecdim + orig2]);
+        prec1 = unpack_longest(ctx.oldprectbl[orig1 * ctx.oldprecdim + orig2]);
+        prec2 = unpack_longest(ctx.oldprectbl[orig2 * ctx.oldprecdim + orig1]);
+    }
+
+    tag_info_t info1, info2;
+    int32_t i1 = idx1, i2 = idx2;
+    for (; i1 != i2; ) {
+        if (i1 > i2) {
+            const tag_history_t::node_t &n = hist.node(i1);
+            info1 = n.info;
+            prec1 = std::min(prec1, tags[info1.idx].height);
+            i1 = n.pred;
+        }
+        else {
+            const tag_history_t::node_t &n = hist.node(i2);
+            info2 = n.info;
+            prec2 = std::min(prec2, tags[info2.idx].height);
+            i2 = n.pred;
+        }
+        DINCCOUNT_CLLENGTH(ctx, 1);
+    }
+    if (i1 != HROOT) {
+        DASSERT(fork_frame);
+        const int32_t h = tags[hist.node(i1).info.idx].height;
+        prec1 = std::min(prec1, h);
+        prec2 = std::min(prec2, h);
+    }
+    DINCCOUNT_CLPREC(ctx);
+
+    // longest precedence
+    if (prec1 > prec2) return -1;
+    if (prec1 < prec2) return  1;
+
+    // leftmost precedence
+    return !fork_frame ? prec
+        : leftprec(info1, info2, i1 == idx1, i2 == idx2);
+}
+
+template<typename ctx_t>
+void compute_prectable(ctx_t &ctx)
+{
+    const typename ctx_t::confset_t &state = ctx.state;
+    const std::vector<Tag> &tags = ctx.nfa.tags;
+    tag_history_t &history = ctx.history;
+
+    const prectable_t *oldtbl = ctx.oldprectbl;
+    prectable_t *newtbl = ctx.newprectbl;
+    const size_t olddim = ctx.oldprecdim, newdim = state.size();
+
+    std::vector<uint32_t> &sortcores = ctx.sortcores;
+    std::vector<uint32_t> &fcount = ctx.fincount;
+    std::vector<int32_t> &stack = ctx.worklist;
+    std::vector<histleaf_t> &level = ctx.histlevel;
+    std::vector<histleaf_t>::reverse_iterator li, lj, lk, le;
+
+    level.clear();
+    level.reserve(newdim);
+    sortcores.resize(newdim);
+
+    // Group core configurations by their history tree index, so that later
+    // while traversing the tree we will know at once which configurations
+    // (if any) are bound to the given tree node. We use counting sort, which
+    // requires additional memory, but is fast and conveniently creates an
+    // array of boundaries in the sorted configuration array.
+    uint32_t maxfin = 0;
+    for (typename ctx_t::cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
+        uint32_t &x = history.node1(c->thist).finidx;
+        if (x >= USED) {
+            x = maxfin++;
+            fcount[x] = 0;
+
+            // mark all nodes down to root as used (unless marked already)
+            for (int32_t i = history.node(c->thist).pred; i >= HROOT; ) {
+                uint32_t &y = history.node1(i).finidx;
+                if (y <= USED) break;
+                y = USED;
+                i = history.node(i).pred;
+            }
+        }
+        ++fcount[x];
+    }
+    fcount[maxfin] = 0;
+    for (size_t i = 1; i <= maxfin; ++i) {
+        fcount[i] += fcount[i - 1];
+    }
+    sortcores.resize(state.size());
+    for (uint32_t i = static_cast<uint32_t>(newdim); i --> 0; ) {
+        sortcores[--fcount[history.node1(state[i].thist).finidx]] = i;
+    }
+
+    // Depth-first traversal of the history tree. During traversal we grow
+    // an array of items (one item per core configuration). Items are added
+    // in tree nodes that have core configurations associated with them.
+    // Each item represents one history. Items have immutable part (core ID,
+    // origin) and mutable part (current minimal height, current tree index)
+    // that changes as we return down the tree.
+    stack.push_back(0);
+    while (!stack.empty()) {
+        const int32_t n = stack.back();
+        tag_history_t::node1_t &node = history.node1(n);
+        const uint32_t fidx = node.finidx;
+
+        if (fidx == NONFIN) {
+            // aborted branch of search tree, don't waste time
+            stack.pop_back();
+            continue;
+        }
+
+        if (node.next != -1) {
+            // start or continue visiting subtrees rooted at this node
+            const tag_history_t::arc_t &arc = history.arc(node.next);
+            stack.push_back(arc.node);
+            node.next = arc.next;
+            continue;
+        }
+
+        // all subtrees visited, it's time to process this node
+        const int32_t h = n == 0 ? MAX_RHO : tags[history.node(n).info.idx].height;
+        li = level.rbegin();
+        le = level.rend();
+
+        if (fidx < USED) {
+            // this node has leaf configurations, add them to level
+            for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) {
+                const uint32_t j = sortcores[k];
+                const histleaf_t l = {j, state[j].origin, HROOT, h};
+                level.push_back(l);
+            }
+
+            // compute precedence for newly added configurations
+            const int32_t p0 = pack(h, 0);
+            for (lj = level.rbegin(); lj != li; ++lj) {
+                for (lk = lj; lk != li; ++lk) {
+                    const uint32_t cj = lj->coreid, ck = lk->coreid;
+                    const uint32_t oj = lj->origin, ok = lk->origin;
+                    const bool fork = n != 0 || oj == ok;
+                    if (fork) {
+                        newtbl[cj * newdim + ck] = p0;
+                        newtbl[ck * newdim + cj] = p0;
+                    }
+                    else {
+                        newtbl[cj * newdim + ck] = oldtbl[oj * olddim + ok];
+                        newtbl[ck * newdim + cj] = oldtbl[ok * olddim + oj];
+                    }
+                }
+            }
+        }
+
+        // Each subtree appended a sequence of items to level. We can find
+        // sequence boundaries by looking at tree index of each item: it is
+        // equal to tree index of the corresponding subtree (except for the
+        // leaf items added at this node; but we know where they start).
+
+        // We must compute precedence for each pair of items from different
+        // sequences (including leaf items added at this node), but not within
+        // sequence boundaries: those histories fork higher up the subtree;
+        // their precedence has already been computed and must not be touched.
+
+        for (int32_t a = node.last; a != -1; ) {
+            const tag_history_t::arc_t &arc = history.arc(a);
+            a = arc.prev;
+
+            // for all the items of this subtree
+            for (lk = li; li != le && li->hidx == arc.node; ++li) {
+
+                // update height of each item coming from subtree
+                li->height = std::min(li->height, h);
+
+                // for all the level items to the right of this subtree
+                for (lj = level.rbegin(); lj != lk; ++lj) {
+
+                    const uint32_t ci = li->coreid, cj = lj->coreid;
+                    const uint32_t oi = li->origin, oj = lj->origin;
+                    const bool fork = n != 0 || oi == oj;
+                    int32_t p1 = li->height, p2 = lj->height, p;
+
+                    if (!fork) {
+                        p1 = std::min(p1, unpack_longest(oldtbl[oi * olddim + oj]));
+                        p2 = std::min(p2, unpack_longest(oldtbl[oj * olddim + oi]));
+                    }
+
+                    if (p1 > p2) {
+                        p = -1;
+                    }
+                    else if (p1 < p2) {
+                        p = 1;
+                    }
+                    else if (fork) {
+                        const tag_info_t t1 = history.node(li->hidx).info;
+                        const tag_info_t t2 = history.node(lj->hidx).info;
+                        p = leftprec(t1, t2, t1 == NOINFO, t2 == NOINFO);
+                    }
+                    else {
+                        p = unpack_leftmost(oldtbl[oi * olddim + oj]);
+                    }
+
+                    newtbl[ci * newdim + cj] = pack(p1, p);
+                    newtbl[cj * newdim + ci] = pack(p2, -p);
+                }
+            }
+        }
+
+        // finally, downgrade tree index of all subtree items, making their
+        // origins indistinguishable from each other for the previous level
+        for (lj = level.rbegin(); lj != li; ++lj) {
+            lj->hidx = n;
+        }
+
+        stack.pop_back();
+    }
+}
+
+} // namespace re2c
+
+#endif // _RE2C_DFA_POSIX_PRECEDENCE_
diff --git a/re2c/src/dfa/tag_history.h b/re2c/src/dfa/tag_history.h

index e58e60035c58114f2978e5b59521bfd4dbf35b10..094a559c662cda640e463696f354b69f05422163 100644 (file)
--- a/re2c/src/dfa/tag_history.h
+++ b/re2c/src/dfa/tag_history.h
@@ -13,7 +13,6 @@ namespace re2c
  {
  
  typedef int32_t hidx_t;
-typedef int32_t prectable_t;
  typedef std::vector<tag_info_t> tag_path_t;
  
  static const hidx_t HROOT = 0;
author	Ulya Trofimovich <skvadrik@gmail.com>
	Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)
committer	Ulya Trofimovich <skvadrik@gmail.com>
	Fri, 1 Mar 2019 22:15:34 +0000 (22:15 +0000)
re2c/Makefile.am		patch \| blob \| history
re2c/Makefile.lib.am		patch \| blob \| history
re2c/lib/regcomp.cc		patch \| blob \| history
re2c/lib/regex_impl.h		patch \| blob \| history
re2c/lib/regexec.cc		patch \| blob \| history
re2c/lib/regexec_nfa_leftmost.cc		patch \| blob \| history
re2c/lib/regexec_nfa_leftmost_trie.cc		patch \| blob \| history
re2c/lib/regexec_nfa_posix.cc		patch \| blob \| history
re2c/lib/regexec_nfa_posix_trie.cc		patch \| blob \| history
re2c/lib/test.cc		patch \| blob \| history
re2c/src/debug/dump_dfa.cc		patch \| blob \| history
re2c/src/dfa/closure.cc		patch \| blob \| history
re2c/src/dfa/closure_leftmost.cc		patch \| blob \| history
re2c/src/dfa/closure_posix.cc		patch \| blob \| history
re2c/src/dfa/determinization.cc		patch \| blob \| history
re2c/src/dfa/determinization.h		patch \| blob \| history
re2c/src/dfa/find_state.cc		patch \| blob \| history
re2c/src/dfa/posix_precedence.cc	[deleted file]	patch \| blob \| history
re2c/src/dfa/posix_precedence.h	[new file with mode: 0644]	patch \| blob
re2c/src/dfa/tag_history.h		patch \| blob \| history