From 8838add4d50b4ef7f1dfdab88e2d082c5ebbe017 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 8 Feb 2019 16:08:58 +0000 Subject: [PATCH] Use counting sort for tag histories (the number tags is limited, so it is faster). --- re2c/src/dfa/determinization.cc | 14 ++++++++++-- re2c/src/dfa/determinization.h | 5 +++++ re2c/src/dfa/find_state.cc | 38 +++++++++++++++++++------------- re2c/src/dfa/posix_precedence.cc | 2 +- re2c/src/dfa/tag_history.h | 15 +------------ 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 5451267a..96a0d8fc 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -213,16 +213,26 @@ determ_context_t::determ_context_t(const opt_t *opts, Warn &warn , dc_closure() , dc_prectbl(NULL) , dc_tagvertbl(nfa.tags.size()) - , dc_taghistory(nfa.tags.size()) + , dc_taghistory() , dc_kernels() , dc_buffers(dc_allocator) , dc_newvers(newver_cmp_t(dc_taghistory)) , dc_stack_topsort() , dc_stack_linear() , dc_stack_dfs() + , dc_path1() + , dc_path2() + , dc_path3() + , dc_tagcount() , dc_dump(opts) , dc_clstats() -{} +{ + const size_t ntags = nfa.tags.size(); + dc_path1.reserve(ntags); + dc_path2.reserve(ntags); + dc_path3.reserve(ntags); + dc_tagcount.resize(ntags); +} dfa_t::~dfa_t() diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 5b9029b1..eecf2ba3 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -131,6 +131,11 @@ struct determ_context_t std::stack dc_stack_linear; std::stack dc_stack_dfs; + tag_path_t dc_path1; // buffer 1 for tag history + tag_path_t dc_path2; // buffer 2 for tag history + tag_path_t dc_path3; // buffer 3 for tag history + std::vector dc_tagcount; // buffer for counting sort on tag history + // debug dump_dfa_t dc_dump; closure_stats_t dc_clstats; diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index f8892f04..98d07947 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -95,7 +95,7 @@ static void copy_to_buffer_kernel(const closure_t &, const prectable_t *, kernel static void reserve_buffers(determ_context_t &); static uint32_t hash_kernel(const kernel_t *kernel); static bool equal_lookahead_tags(determ_context_t &, const kernel_t *, const kernel_t *); -static void group_by_tag(tag_path_t &p); +static void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector &count); static void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx); static bool do_find_state(determ_context_t &ctx); static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin); @@ -349,7 +349,8 @@ bool equal_lookahead_tags(determ_context_t &ctx } tag_history_t &thist = ctx.dc_taghistory; - tag_path_t &p1 = thist.path1, &p2 = thist.path2; + tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2, &p3 = ctx.dc_path3; + std::vector &count = ctx.dc_tagcount; for (size_t i = 0; i < x->size; ++i) { const hidx_t xl = x->tlook[i], yl = y->tlook[i]; @@ -361,8 +362,8 @@ bool equal_lookahead_tags(determ_context_t &ctx if (p1.size() != p2.size()) return false; - group_by_tag(p1); - group_by_tag(p2); + group_by_tag(p1, p3, count); + group_by_tag(p2, p3, count); if (p1 != p2) return false; } @@ -371,21 +372,28 @@ bool equal_lookahead_tags(determ_context_t &ctx } -void group_by_tag(tag_path_t &p) +void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector &count) { - // insertion sort (must be careful to preserve order of elements - // with the same tag, but different negative bit) - size_t i, j, n = p.size(); + // counting sort with tag index as key + // must preserve relative order of elements with the same tag - for (i = 1; i < n; ++i) { - const tag_info_t info = p[i]; - const size_t tag = info.idx; + const size_t clen = count.size(), plen = path.size(); + std::fill(count.begin(), count.end(), 0); + buf.resize(plen); - for (j = i; j > 0 && p[j - 1].idx > tag; --j) { - p[j] = p[j - 1]; - } - p[j] = info; + for (size_t i = 0; i < plen; ++i) { + ++count[path[i].idx]; + } + + for (size_t i = 1; i < clen; ++i) { + count[i] += count[i - 1]; } + + for (size_t i = plen; i --> 0; ) { + buf[--count[path[i].idx]] = path[i]; + } + + path.swap(buf); } diff --git a/re2c/src/dfa/posix_precedence.cc b/re2c/src/dfa/posix_precedence.cc index dfec7c08..fc654187 100644 --- a/re2c/src/dfa/posix_precedence.cc +++ b/re2c/src/dfa/posix_precedence.cc @@ -24,7 +24,7 @@ int32_t precedence(determ_context_t &ctx, } tag_history_t &thist = ctx.dc_taghistory; - tag_path_t &p1 = thist.path1, &p2 = thist.path2; + tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2; reconstruct_history(thist, p1, xl); reconstruct_history(thist, p2, yl); tag_path_t::const_reverse_iterator diff --git a/re2c/src/dfa/tag_history.h b/re2c/src/dfa/tag_history.h index bd45bff8..a3cb7609 100644 --- a/re2c/src/dfa/tag_history.h +++ b/re2c/src/dfa/tag_history.h @@ -30,11 +30,7 @@ struct tag_history_t }; std::vector nodes; - // reconstruct paths for comparison - tag_path_t path1; - tag_path_t path2; - - inline explicit tag_history_t(size_t ntags); + inline tag_history_t(): nodes() {}; inline hidx_t pred(hidx_t i) const { return nodes[i].pred; } inline tag_info_t info(hidx_t i) const { return nodes[i].info; } inline tagver_t elem(hidx_t i) const { return nodes[i].info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; } @@ -45,15 +41,6 @@ struct tag_history_t FORBID_COPY(tag_history_t); }; -tag_history_t::tag_history_t(size_t ntags) - : nodes() - , path1() - , path2() -{ - path1.reserve(ntags); - path2.reserve(ntags); -} - hidx_t tag_history_t::push(hidx_t idx, tag_info_t info) { node_t x = {idx, info}; -- 2.40.0