From 79dc58fb2fcac24561806fb447050de058b20410 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 13 Aug 2018 23:21:57 +0100 Subject: [PATCH] Reordered function definitions. --- re2c/src/dfa/determinization.cc | 109 ++++++------ re2c/src/dfa/dump.cc | 43 ++--- re2c/src/dfa/find_state.cc | 300 ++++++++++++++++---------------- 3 files changed, 226 insertions(+), 226 deletions(-) diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 8de098a6..2c25bcf8 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -23,25 +23,56 @@ namespace re2c { -static nfa_state_t *transition(nfa_state_t *, uint32_t); static void reach_on_symbol(determ_context_t &); +static nfa_state_t *transition(nfa_state_t *, uint32_t); +static uint32_t init_tag_versions(determ_context_t &); static void warn_nondeterministic_tags(const determ_context_t &); const uint32_t dfa_t::NIL = ~0u; -nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) +dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn) + : states() + , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges + , charset(nfa.charset) + , rules(nfa.rules) + , tags(nfa.tags) + , mtagvers(*new std::set) + , finvers(NULL) + , tcpool(*new tcpool_t) + , maxtagver(0) + , tcmd0(NULL) + , tcid0(TCID0) { - if (state->type != nfa_state_t::RAN) { - return NULL; - } - for (const Range *r = state->ran.ran; r; r = r->next()) { - if ((r->lower() <= symbol) && (symbol < r->upper())) { - return state->ran.out; + determ_context_t ctx(opts, warn, cond, nfa, *this); + + const uint32_t INITIAL_TAGS = init_tag_versions(ctx); + + // initial state + const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; + ctx.dc_reached.push_back(c0); + tagged_epsilon_closure(ctx); + find_state(ctx); + + // iterate while new kernels are added: for each alphabet symbol, + // build tagged epsilon-closure of all reachable NFA states, + // then find identical or mappable DFA state or add a new one + for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { + + ctx.dc_origin = i; + ctx.dc_newvers.clear(); + + for (uint32_t c = 0; c < nchars; ++c) { + ctx.dc_symbol = c; + + reach_on_symbol(ctx); + tagged_epsilon_closure(ctx); + find_state(ctx); } } - return NULL; + + warn_nondeterministic_tags(ctx); } @@ -62,7 +93,21 @@ void reach_on_symbol(determ_context_t &ctx) } -static uint32_t init_tag_versions(determ_context_t &ctx) +nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) +{ + if (state->type != nfa_state_t::RAN) { + return NULL; + } + for (const Range *r = state->ran.ran; r; r = r->next()) { + if ((r->lower() <= symbol) && (symbol < r->upper())) { + return state->ran.out; + } + } + return NULL; +} + + +uint32_t init_tag_versions(determ_context_t &ctx) { dfa_t &dfa = ctx.dc_dfa; const size_t ntags = dfa.tags.size(); @@ -97,50 +142,6 @@ static uint32_t init_tag_versions(determ_context_t &ctx) } -dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn) - : states() - , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges - , charset(nfa.charset) - , rules(nfa.rules) - , tags(nfa.tags) - , mtagvers(*new std::set) - , finvers(NULL) - , tcpool(*new tcpool_t) - , maxtagver(0) - , tcmd0(NULL) - , tcid0(TCID0) -{ - determ_context_t ctx(opts, warn, cond, nfa, *this); - - const uint32_t INITIAL_TAGS = init_tag_versions(ctx); - - // initial state - const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; - ctx.dc_reached.push_back(c0); - tagged_epsilon_closure(ctx); - find_state(ctx); - - // iterate while new kernels are added: for each alphabet symbol, - // build tagged epsilon-closure of all reachable NFA states, - // then find identical or mappable DFA state or add a new one - for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { - - ctx.dc_origin = i; - ctx.dc_newvers.clear(); - - for (uint32_t c = 0; c < nchars; ++c) { - ctx.dc_symbol = c; - - reach_on_symbol(ctx); - tagged_epsilon_closure(ctx); - find_state(ctx); - } - } - - warn_nondeterministic_tags(ctx); -} - - // For each tag, find maximal number of parallel versions of this tag // used in each kernel (degree of non-determinism) and warn about tags with // maximum degree two or more. diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc index 32a1f1a8..08cc78ee 100644 --- a/re2c/src/dfa/dump.cc +++ b/re2c/src/dfa/dump.cc @@ -19,6 +19,7 @@ namespace re2c { +static void dump_history(const dfa_t &, const tag_history_t &, hidx_t); static void dump_tcmd_or_tcid(tcmd_t *const *, const tcid_t *, size_t, const tcpool_t &); static const char *tagname(const Tag &); static void dump_tags(const tagver_table_t &, const tag_history_t &, hidx_t, uint32_t); @@ -45,27 +46,6 @@ dump_dfa_t::~dump_dfa_t() } -static void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) -{ - if (i == HROOT) { - fprintf(stderr, " /"); - return; - } - - dump_history(dfa, h, h.pred(i)); - - const Tag &t = dfa.tags[h.tag(i)]; - const tagver_t v = h.elem(i); - if (capture(t)) { - fprintf(stderr, "%u", (uint32_t)t.ncap); - } else if (!trailing(t)) { - fprintf(stderr, "%s", t.name->c_str()); - } - fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑"); - fprintf(stderr, " "); -} - - void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) { if (!debug) return; @@ -171,6 +151,27 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) } +void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) +{ + if (i == HROOT) { + fprintf(stderr, " /"); + return; + } + + dump_history(dfa, h, h.pred(i)); + + const Tag &t = dfa.tags[h.tag(i)]; + const tagver_t v = h.elem(i); + if (capture(t)) { + fprintf(stderr, "%u", (uint32_t)t.ncap); + } else if (!trailing(t)) { + fprintf(stderr, "%s", t.name->c_str()); + } + fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑"); + fprintf(stderr, " "); +} + + void dump_dfa(const dfa_t &dfa) { const size_t diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index 2b682ec3..131ec79f 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -2,11 +2,9 @@ #include #include #include -#include #include "src/dfa/determinization.h" #include "src/dfa/dfa.h" -#include "src/dfa/dump.h" #include "src/dfa/tcmd.h" #include "src/nfa/nfa.h" #include "src/re/rule.h" @@ -101,6 +99,118 @@ static bool do_find_state(determ_context_t &ctx); static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin); +void find_state(determ_context_t &ctx) +{ + dfa_t &dfa = ctx.dc_dfa; + + // find or add the new state in the existing set of states + const bool is_new = do_find_state(ctx); + + if (is_new) { + // create new DFA state + dfa_state_t *t = new dfa_state_t(dfa.nchars); + dfa.states.push_back(t); + + // check if the new state is final + // see note [at most one final item per closure] + cclositer_t + b = ctx.dc_closure.begin(), + e = ctx.dc_closure.end(), + f = std::find_if(b, e, clos_t::fin); + if (f != e) { + t->tcmd[dfa.nchars] = final_actions(ctx, *f); + t->rule = f->state->rule; + } + } + + if (ctx.dc_origin == dfa_t::NIL) { + // initial state + dfa.tcmd0 = ctx.dc_actions; + } + else { + dfa_state_t *s = dfa.states[ctx.dc_origin]; + s->arcs[ctx.dc_symbol] = ctx.dc_target; + s->tcmd[ctx.dc_symbol] = ctx.dc_actions; + } + + ctx.dc_dump.state(ctx, is_new); +} + + +bool do_find_state(determ_context_t &ctx) +{ + kernels_t &kernels = ctx.dc_kernels; + const closure_t &closure = ctx.dc_closure; + + // empty closure corresponds to default state + if (closure.size() == 0) { + ctx.dc_target = dfa_t::NIL; + ctx.dc_actions = NULL; + return false; + } + + // resize buffer if closure is too large + reserve_buffers(ctx); + kernel_t *k = ctx.dc_buffers.kernel; + + // copy closure to buffer kernel + copy_to_buffer_kernel(closure, ctx.dc_prectbl, k); + + // hash "static" part of the kernel + const uint32_t hash = hash_kernel(k); + + // try to find identical kernel + kernel_eq_t cmp_eq = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_eq); + if (ctx.dc_target != kernels_t::NIL) return false; + + // else try to find mappable kernel + // see note [bijective mappings] + kernel_map_t cmp_map = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_map); + if (ctx.dc_target != kernels_t::NIL) return false; + + // otherwise add new kernel + kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator); + ctx.dc_target = kernels.push(hash, kcopy); + return true; +} + + +tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) +{ + dfa_t &dfa = ctx.dc_dfa; + const Rule &rule = dfa.rules[fin.state->rule]; + const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers]; + const hidx_t look = fin.tlook; + const tag_history_t &thist = ctx.dc_taghistory; + tcpool_t &tcpool = dfa.tcpool; + tcmd_t *copy = NULL, *save = NULL, **p; + + for (size_t t = rule.ltag; t < rule.htag; ++t) { + + const Tag &tag = dfa.tags[t]; + if (fixed(tag)) continue; + + const tagver_t v = abs(vers[t]), l = thist.last(look, t); + tagver_t &f = dfa.finvers[t]; + if (l == TAGVER_ZERO) { + copy = tcpool.make_copy(copy, f, v); + } else if (history(tag)) { + save = tcpool.make_add(save, f, v, thist, look, t); + } else { + save = tcpool.make_set(save, f, l); + } + } + + // join 'copy' and 'save' commands + for (p = © *p; p = &(*p)->next); + *p = save; + + return copy; +} + + kernel_buffers_t::kernel_buffers_t(allocator_t &alc) : maxsize(0) // usually ranges from one to some twenty , kernel(make_new_kernel(maxsize, alc)) @@ -148,6 +258,43 @@ kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc) } +uint32_t hash_kernel(const kernel_t *kernel) +{ + const size_t n = kernel->size; + + // seed + uint32_t h = static_cast(n); + + // TNFA states + h = hash32(h, kernel->state, n * sizeof(void*)); + + // precedence table + if (kernel->prectbl) { + h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t)); + } + + return h; +} + + +void copy_to_buffer_kernel(const closure_t &closure, + const prectable_t *prectbl, kernel_t *buffer) +{ + const size_t n = closure.size(); + + buffer->size = n; + + buffer->prectbl = prectbl; + + for (size_t i = 0; i < n; ++i) { + const clos_t &c = closure[i]; + buffer->state[i] = c.state; + buffer->tvers[i] = c.tvers; + buffer->tlook[i] = c.tlook; + } +} + + void reserve_buffers(determ_context_t &ctx) { kernel_buffers_t &kbufs = ctx.dc_buffers; @@ -190,43 +337,6 @@ void reserve_buffers(determ_context_t &ctx) } -uint32_t hash_kernel(const kernel_t *kernel) -{ - const size_t n = kernel->size; - - // seed - uint32_t h = static_cast(n); - - // TNFA states - h = hash32(h, kernel->state, n * sizeof(void*)); - - // precedence table - if (kernel->prectbl) { - h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t)); - } - - return h; -} - - -void copy_to_buffer_kernel(const closure_t &closure, - const prectable_t *prectbl, kernel_t *buffer) -{ - const size_t n = closure.size(); - - buffer->size = n; - - buffer->prectbl = prectbl; - - for (size_t i = 0; i < n; ++i) { - const clos_t &c = closure[i]; - buffer->state[i] = c.state; - buffer->tvers[i] = c.tvers; - buffer->tlook[i] = c.tlook; - } -} - - bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, const determ_context_t &ctx) { assert(x->size == y->size); @@ -361,116 +471,4 @@ bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) return !nontrivial_cycles; } - -bool do_find_state(determ_context_t &ctx) -{ - kernels_t &kernels = ctx.dc_kernels; - const closure_t &closure = ctx.dc_closure; - - // empty closure corresponds to default state - if (closure.size() == 0) { - ctx.dc_target = dfa_t::NIL; - ctx.dc_actions = NULL; - return false; - } - - // resize buffer if closure is too large - reserve_buffers(ctx); - kernel_t *k = ctx.dc_buffers.kernel; - - // copy closure to buffer kernel - copy_to_buffer_kernel(closure, ctx.dc_prectbl, k); - - // hash "static" part of the kernel - const uint32_t hash = hash_kernel(k); - - // try to find identical kernel - kernel_eq_t cmp_eq = {ctx}; - ctx.dc_target = kernels.find_with(hash, k, cmp_eq); - if (ctx.dc_target != kernels_t::NIL) return false; - - // else try to find mappable kernel - // see note [bijective mappings] - kernel_map_t cmp_map = {ctx}; - ctx.dc_target = kernels.find_with(hash, k, cmp_map); - if (ctx.dc_target != kernels_t::NIL) return false; - - // otherwise add new kernel - kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator); - ctx.dc_target = kernels.push(hash, kcopy); - return true; -} - - -tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) -{ - dfa_t &dfa = ctx.dc_dfa; - const Rule &rule = dfa.rules[fin.state->rule]; - const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers]; - const hidx_t look = fin.tlook; - const tag_history_t &thist = ctx.dc_taghistory; - tcpool_t &tcpool = dfa.tcpool; - tcmd_t *copy = NULL, *save = NULL, **p; - - for (size_t t = rule.ltag; t < rule.htag; ++t) { - - const Tag &tag = dfa.tags[t]; - if (fixed(tag)) continue; - - const tagver_t v = abs(vers[t]), l = thist.last(look, t); - tagver_t &f = dfa.finvers[t]; - if (l == TAGVER_ZERO) { - copy = tcpool.make_copy(copy, f, v); - } else if (history(tag)) { - save = tcpool.make_add(save, f, v, thist, look, t); - } else { - save = tcpool.make_set(save, f, l); - } - } - - // join 'copy' and 'save' commands - for (p = © *p; p = &(*p)->next); - *p = save; - - return copy; -} - - -void find_state(determ_context_t &ctx) -{ - dfa_t &dfa = ctx.dc_dfa; - - // find or add the new state in the existing set of states - const bool is_new = do_find_state(ctx); - - if (is_new) { - // create new DFA state - dfa_state_t *t = new dfa_state_t(dfa.nchars); - dfa.states.push_back(t); - - // check if the new state is final - // see note [at most one final item per closure] - cclositer_t - b = ctx.dc_closure.begin(), - e = ctx.dc_closure.end(), - f = std::find_if(b, e, clos_t::fin); - if (f != e) { - t->tcmd[dfa.nchars] = final_actions(ctx, *f); - t->rule = f->state->rule; - } - } - - if (ctx.dc_origin == dfa_t::NIL) { - // initial state - dfa.tcmd0 = ctx.dc_actions; - } - else { - dfa_state_t *s = dfa.states[ctx.dc_origin]; - s->arcs[ctx.dc_symbol] = ctx.dc_target; - s->tcmd[ctx.dc_symbol] = ctx.dc_actions; - } - - ctx.dc_dump.state(ctx, is_new); -} - } // namespace re2c -- 2.40.0