]> granicus.if.org Git - re2c/commitdiff
Use counting sort for tag histories (the number tags is limited, so it is faster).
authorUlya Trofimovich <skvadrik@gmail.com>
Fri, 8 Feb 2019 16:08:58 +0000 (16:08 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Fri, 8 Feb 2019 16:08:58 +0000 (16:08 +0000)
re2c/src/dfa/determinization.cc
re2c/src/dfa/determinization.h
re2c/src/dfa/find_state.cc
re2c/src/dfa/posix_precedence.cc
re2c/src/dfa/tag_history.h

index 5451267a4138e96a3b313b0c869b596d04df263a..96a0d8fce1fdb16bf4cc7bb913e58d3a47328ee4 100644 (file)
@@ -213,16 +213,26 @@ determ_context_t::determ_context_t(const opt_t *opts, Warn &warn
     , dc_closure()
     , dc_prectbl(NULL)
     , dc_tagvertbl(nfa.tags.size())
-    , dc_taghistory(nfa.tags.size())
+    , dc_taghistory()
     , dc_kernels()
     , dc_buffers(dc_allocator)
     , dc_newvers(newver_cmp_t(dc_taghistory))
     , dc_stack_topsort()
     , dc_stack_linear()
     , dc_stack_dfs()
+    , dc_path1()
+    , dc_path2()
+    , dc_path3()
+    , dc_tagcount()
     , dc_dump(opts)
     , dc_clstats()
-{}
+{
+    const size_t ntags = nfa.tags.size();
+    dc_path1.reserve(ntags);
+    dc_path2.reserve(ntags);
+    dc_path3.reserve(ntags);
+    dc_tagcount.resize(ntags);
+}
 
 
 dfa_t::~dfa_t()
index 5b9029b105ddaf5b3cf0d6d28be79147df33ee7b..eecf2ba3552714ff2c4ca15777cef800dde6606d 100644 (file)
@@ -131,6 +131,11 @@ struct determ_context_t
     std::stack<nfa_state_t*> dc_stack_linear;
     std::stack<clos_t>       dc_stack_dfs;
 
+    tag_path_t               dc_path1;      // buffer 1 for tag history
+    tag_path_t               dc_path2;      // buffer 2 for tag history
+    tag_path_t               dc_path3;      // buffer 3 for tag history
+    std::vector<uint32_t>    dc_tagcount;   // buffer for counting sort on tag history
+
     // debug
     dump_dfa_t               dc_dump;
     closure_stats_t          dc_clstats;
index f8892f047a8d79260dbfd37e8b02cec40ac00b41..98d079473db8dd8efa50d5a1d666f4b3d89362bf 100644 (file)
@@ -95,7 +95,7 @@ static void copy_to_buffer_kernel(const closure_t &, const prectable_t *, kernel
 static void reserve_buffers(determ_context_t &);
 static uint32_t hash_kernel(const kernel_t *kernel);
 static bool equal_lookahead_tags(determ_context_t &, const kernel_t *, const kernel_t *);
-static void group_by_tag(tag_path_t &p);
+static void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector<uint32_t> &count);
 static void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx);
 static bool do_find_state(determ_context_t &ctx);
 static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin);
@@ -349,7 +349,8 @@ bool equal_lookahead_tags(determ_context_t &ctx
     }
 
     tag_history_t &thist = ctx.dc_taghistory;
-    tag_path_t &p1 = thist.path1, &p2 = thist.path2;
+    tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2, &p3 = ctx.dc_path3;
+    std::vector<uint32_t> &count = ctx.dc_tagcount;
 
     for (size_t i = 0; i < x->size; ++i) {
         const hidx_t xl = x->tlook[i], yl = y->tlook[i];
@@ -361,8 +362,8 @@ bool equal_lookahead_tags(determ_context_t &ctx
 
         if (p1.size() != p2.size()) return false;
 
-        group_by_tag(p1);
-        group_by_tag(p2);
+        group_by_tag(p1, p3, count);
+        group_by_tag(p2, p3, count);
 
         if (p1 != p2) return false;
     }
@@ -371,21 +372,28 @@ bool equal_lookahead_tags(determ_context_t &ctx
 }
 
 
-void group_by_tag(tag_path_t &p)
+void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector<uint32_t> &count)
 {
-    // insertion sort (must be careful to preserve order of elements
-    // with the same tag, but different negative bit)
-    size_t i, j, n = p.size();
+    // counting sort with tag index as key
+    // must preserve relative order of elements with the same tag
 
-    for (i = 1; i < n; ++i) {
-        const tag_info_t info = p[i];
-        const size_t tag = info.idx;
+    const size_t clen = count.size(), plen = path.size();
+    std::fill(count.begin(), count.end(), 0);
+    buf.resize(plen);
 
-        for (j = i; j > 0 && p[j - 1].idx > tag; --j) {
-            p[j] = p[j - 1];
-        }
-        p[j] = info;
+    for (size_t i = 0; i < plen; ++i) {
+        ++count[path[i].idx];
+    }
+
+    for (size_t i = 1; i < clen; ++i) {
+        count[i] += count[i - 1];
     }
+
+    for (size_t i = plen; i --> 0; ) {
+        buf[--count[path[i].idx]] = path[i];
+    }
+
+    path.swap(buf);
 }
 
 
index dfec7c085640ec8326c6fb5db861c7d084d0ccbb..fc6541879cdbf5e5aaf8a299b5102edd6d052e84 100644 (file)
@@ -24,7 +24,7 @@ int32_t precedence(determ_context_t &ctx,
     }
 
     tag_history_t &thist = ctx.dc_taghistory;
-    tag_path_t &p1 = thist.path1, &p2 = thist.path2;
+    tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2;
     reconstruct_history(thist, p1, xl);
     reconstruct_history(thist, p2, yl);
     tag_path_t::const_reverse_iterator
index bd45bff873c34ad24fb68d321cfe9ad561a2dbba..a3cb76095eca84ba3c852616546576065845c8eb 100644 (file)
@@ -30,11 +30,7 @@ struct tag_history_t
     };
     std::vector<node_t> nodes;
 
-    // reconstruct paths for comparison
-    tag_path_t path1;
-    tag_path_t path2;
-
-    inline explicit tag_history_t(size_t ntags);
+    inline tag_history_t(): nodes() {};
     inline hidx_t pred(hidx_t i) const { return nodes[i].pred; }
     inline tag_info_t info(hidx_t i) const { return nodes[i].info; }
     inline tagver_t elem(hidx_t i) const { return nodes[i].info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; }
@@ -45,15 +41,6 @@ struct tag_history_t
     FORBID_COPY(tag_history_t);
 };
 
-tag_history_t::tag_history_t(size_t ntags)
-    : nodes()
-    , path1()
-    , path2()
-{
-    path1.reserve(ntags);
-    path2.reserve(ntags);
-}
-
 hidx_t tag_history_t::push(hidx_t idx, tag_info_t info)
 {
     node_t x = {idx, info};