From 9c3c158e83ab673ac3693463f75bc7e3d76fd9b9 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 28 Nov 2016 15:32:46 +0000 Subject: [PATCH] Keep fixed and variable tags separately. This commit reverts commit de3f9e70a45c42fcb848a347ece3a727b8fb983e: Keep fixed and variable tags together in one array. Optimizations of variable tags get more complicated and fixed tags should not get in the way. This commit also drops check for tags in trailing context: there's nothing special about them; no technical reason to forbid them. --- re2c/src/codegen/emit_action.cc | 60 +++++++++---------- re2c/src/codegen/emit_dfa.cc | 11 +++- re2c/src/ir/adfa/adfa.cc | 8 ++- re2c/src/ir/adfa/adfa.h | 4 +- re2c/src/ir/dfa/cfg/cfg.cc | 10 ++-- re2c/src/ir/dfa/cfg/cfg.h | 2 +- re2c/src/ir/dfa/cfg/liveanal.cc | 27 ++++----- re2c/src/ir/dfa/cfg/rename.cc | 18 ++---- re2c/src/ir/dfa/closure.cc | 12 ++-- re2c/src/ir/dfa/determinization.cc | 36 +++++------- re2c/src/ir/dfa/dfa.h | 4 +- re2c/src/ir/nfa/init_rules.cc | 85 ++++++++++++--------------- re2c/src/ir/nfa/nfa.cc | 6 +- re2c/src/ir/nfa/nfa.h | 6 +- re2c/src/ir/nfa/regexps2nfa.cc | 45 +++++++------- re2c/src/ir/rule.h | 30 ++++------ re2c/src/ir/skeleton/generate_code.cc | 15 ++--- re2c/src/ir/skeleton/generate_data.cc | 21 ++++--- re2c/src/ir/skeleton/skeleton.cc | 3 +- re2c/src/ir/skeleton/skeleton.h | 3 +- re2c/src/ir/tag.cc | 26 +------- re2c/src/ir/tag.h | 27 ++++----- re2c/src/ir/tcmd.cc | 7 +-- re2c/src/ir/tcmd.h | 2 +- re2c/test/tags/fix4.i--tags.c | 4 +- re2c/test/tags/fix4_trail.i--tags.c | 4 +- re2c/test/tags/fix5.i--tags.c | 4 +- re2c/test/tags/fix5_trail.i--tags.c | 4 +- 28 files changed, 218 insertions(+), 266 deletions(-) diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 4b93a708..20047bd7 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -449,16 +449,24 @@ void gen_settags(code_lines_t &code, const DFA &dfa, tcid_t tcid) void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) { const bool generic = opts->input_api.type() == InputAPI::CUSTOM; - const std::valarray &tags = dfa.tags; - const tagver_t *vers = rule.tags; + const std::vector &vartags = dfa.vartags; + const std::vector &fixtags = dfa.fixtags; + const tagver_t *fins = dfa.finvers; - // trailing context - if (rule.trail != Tag::NONE) { - const Tag &tag = tags[rule.trail]; + // variable tags + for (size_t t = rule.lvar; t < rule.hvar; ++t) { + const VarTag &tag = vartags[t]; o.wind(ind); - if (tag.type == Tag::FIX) { - assert(!generic); - o.wstring(opts->yycursor).ws(" -= ").wu64(tag.fix.dist); + if (tag.name) { + const std::string + name = *tag.name, + expr = vartag_expr(fins[t]); + if (generic) { + o.wstring(opts->yycopytag).ws(" (").wstring(name) + .ws(", ").wstring(expr).ws(")"); + } else { + o.wstring(name).ws(" = ").wstring(expr); + } } else if (dfa.oldstyle_ctxmarker) { if (generic) { o.wstring(opts->yyrestorectx).ws(" ()"); @@ -466,7 +474,7 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker); } } else { - const std::string expr = vartag_expr(vers[rule.trail]); + const std::string expr = vartag_expr(fins[t]); if (generic) { o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); } else { @@ -476,34 +484,22 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) o.ws(";\n"); } - // named tags - for (size_t t = rule.ltag; t < rule.htag; ++t) { - const Tag &tag = tags[t]; - - // fixed - if (tag.type == Tag::FIX) { - assert(!generic); - o.wind(ind).wstring(*tag.name).ws(" = "); - if (tag.fix.base == Tag::NONE) { + // fixed tags + for (size_t t = rule.lfix; t < rule.hfix; ++t) { + assert(!generic); + const FixTag &tag = fixtags[t]; + o.wind(ind); + if (tag.name) { + o.wstring(*tag.name).ws(" = "); + if (tag.base == FixTag::RIGHTMOST) { // optimize '(YYCTXMARKER + ((YYCURSOR - YCTXMARKER) - tag))' // to '(YYCURSOR - tag)' - o.wstring(opts->yycursor).ws(" - ").wu64(tag.fix.dist); + o.wstring(opts->yycursor).ws(" - ").wu64(tag.dist); } else { - const tagver_t v = vers[tag.fix.base]; - o.wstring(vartag_expr(v)).ws(" - ").wu64(tag.fix.dist); + o.wstring(vartag_expr(fins[tag.base])).ws(" - ").wu64(tag.dist); } - o.ws(";\n"); - continue; - } - - // variable - const std::string expr = vartag_expr(vers[t]); - o.wind(ind); - if (generic) { - o.wstring(opts->yycopytag).ws(" (").wstring(*tag.name) - .ws(", ").wstring(expr).ws(")"); } else { - o.wstring(*tag.name).ws(" = ").wstring(expr); + o.wstring(opts->yycursor).ws(" -= ").wu64(tag.dist); } o.ws(";\n"); } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 3172a7c9..c9d5cc67 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -145,9 +145,14 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra std::set tagnames, tagvars; if (!oldstyle_ctxmarker) { - const size_t ntags = tags.size(); - for (size_t i = 0; i < ntags; ++i) { - const std::string *name = tags[i].name; + for (size_t i = 0; i < vartags.size(); ++i) { + const std::string *name = vartags[i].name; + if (name) { + tagvars.insert(*name); + } + } + for (size_t i = 0; i < fixtags.size(); ++i) { + const std::string *name = fixtags[i].name; if (name) { tagvars.insert(*name); } diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 913de658..afd24de2 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -36,7 +36,9 @@ DFA::DFA , nStates(0) , head(NULL) , rules(dfa.rules) - , tags(dfa.tags) + , vartags(dfa.vartags) + , fixtags(dfa.fixtags) + , finvers(dfa.finvers) , tcpool(dfa.tcpool) , max_fill (0) , need_backup (false) @@ -100,7 +102,9 @@ DFA::~DFA() } delete &rules; - delete &tags; + delete &vartags; + delete &fixtags; + delete[] finvers; delete &tcpool; } diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index f676cd77..5d2eb639 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -67,7 +67,9 @@ struct DFA uint32_t nStates; State * head; std::valarray &rules; - std::valarray &tags; + std::vector &vartags; + std::vector &fixtags; + const tagver_t *finvers; tcpool_t &tcpool; size_t max_fill; bool need_backup; diff --git a/re2c/src/ir/dfa/cfg/cfg.cc b/re2c/src/ir/dfa/cfg/cfg.cc index 450ff7f9..1a944c5c 100644 --- a/re2c/src/ir/dfa/cfg/cfg.cc +++ b/re2c/src/ir/dfa/cfg/cfg.cc @@ -7,7 +7,7 @@ namespace re2c static void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb, cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall); static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbbfin, cfg_ix_t nbbfall); -static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, tcmd_t *cmd, tagver_t *use); +static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, tcmd_t *cmd, const Rule *rule); static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x); static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x); @@ -97,7 +97,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, for (size_t i = 0; i < nstate; ++i) { if (*a2b++ != 0) { const dfa_state_t *s = dfa.states[i]; - basic_block(b++, NULL, NULL, &s->tcmd[nsym], dfa.rules[s->rule].tags); + basic_block(b++, NULL, NULL, &s->tcmd[nsym], &dfa.rules[s->rule]); } } @@ -107,7 +107,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, const dfa_state_t *s = dfa.states[i]; std::fill(been, been + nstate, false); fallback(dfa, arc2bb, been, succe = succb, i); - basic_block(b++, succb, succe, &s->tcmd[nsym + 1], dfa.rules[s->rule].tags); + basic_block(b++, succb, succe, &s->tcmd[nsym + 1], &dfa.rules[s->rule]); } } @@ -117,7 +117,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, } void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, - const cfg_ix_t *succe, tcmd_t *cmd, tagver_t *use) + const cfg_ix_t *succe, tcmd_t *cmd, const Rule *rule) { const size_t n = static_cast(succe - succb); cfg_ix_t *s = new cfg_ix_t[n]; @@ -126,7 +126,7 @@ void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, bb->succb = s; bb->succe = s + n; bb->cmd = cmd; - bb->use = use; + bb->rule = rule; } // find immediate successors of the given bblock diff --git a/re2c/src/ir/dfa/cfg/cfg.h b/re2c/src/ir/dfa/cfg/cfg.h index d9d69be8..91db480d 100644 --- a/re2c/src/ir/dfa/cfg/cfg.h +++ b/re2c/src/ir/dfa/cfg/cfg.h @@ -13,7 +13,7 @@ struct cfg_bb_t cfg_ix_t *succb; cfg_ix_t *succe; tcmd_t *cmd; - tagver_t *use; + const Rule *rule; }; // control flow graph diff --git a/re2c/src/ir/dfa/cfg/liveanal.cc b/re2c/src/ir/dfa/cfg/liveanal.cc index 03ea6413..b64149ac 100644 --- a/re2c/src/ir/dfa/cfg/liveanal.cc +++ b/re2c/src/ir/dfa/cfg/liveanal.cc @@ -7,9 +7,8 @@ namespace re2c void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live) { - const size_t - nver = static_cast(cfg.dfa.maxtagver) + 1, - ntag = cfg.dfa.tags.size(); + const size_t nver = static_cast(cfg.dfa.maxtagver) + 1; + const tagver_t *fins = cfg.dfa.finvers; bool *buf1 = new bool[nver]; bool *buf2 = new bool[nver]; @@ -29,16 +28,14 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live) memset(live, 0, cfg.nbbfin * nver * sizeof(bool)); for (cfg_ix_t i = cfg.nbbarc; i < cfg.nbbfin; ++i) { const cfg_bb_t *b = cfg.bblocks + i; + const Rule *r = b->rule; bool *l = &live[i * nver]; // all final bblocks have USE tags, but no successors - assert(b->use && b->succb == b->succe); + assert(r && b->succb == b->succe); - for (size_t t = 0; t < ntag; ++t) { - const tagver_t u = b->use[t]; - if (u != TAGVER_ZERO) { - l[u] = true; - } + for (size_t t = r->lvar; t < r->hvar; ++t) { + l[fins[t]] = true; } } @@ -50,7 +47,7 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live) bool *old = &live[i * nver]; // transition bblocks have no USE tags - assert(!b->use); + assert(!b->rule); memcpy(buf1, old, nver * sizeof(bool)); for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { @@ -98,16 +95,14 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live) */ for (cfg_ix_t i = cfg.nbbfin; i < cfg.nbbfall; ++i) { const cfg_bb_t *b = cfg.bblocks + i; + const Rule *r = b->rule; // all fallback bblocks have USE tags - assert(b->use); + assert(r); memset(buf1, 0, nver * sizeof(bool)); - for (size_t t = 0; t < ntag; ++t) { - const tagver_t u = b->use[t]; - if (u != TAGVER_ZERO) { - buf1[u] = true; - } + for (size_t t = r->lvar; t < r->hvar; ++t) { + buf1[fins[t]] = true; } for (const tagsave_t *p = b->cmd->save; p; p = p->next) { buf1[p->ver] = false; diff --git a/re2c/src/ir/dfa/cfg/rename.cc b/re2c/src/ir/dfa/cfg/rename.cc index e2924537..ad3cd5f6 100644 --- a/re2c/src/ir/dfa/cfg/rename.cc +++ b/re2c/src/ir/dfa/cfg/rename.cc @@ -9,8 +9,8 @@ void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver) if (oldmax == maxver) return; oldmax = maxver; - cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; - for (; b < e; ++b) { + cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall; + for (; b < be; ++b) { // tag versions in save commands for (tagsave_t *p = b->cmd->save; p; p = p->next) { @@ -29,16 +29,10 @@ void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver) } } - // final tag versions in rules - std::valarray &rules = cfg.dfa.rules; - for (size_t r = 0, t = 0; r < rules.size(); ++r) { - Rule &rule = rules[r]; - for (; t < rule.htag; ++t) { - tagver_t &v = rule.tags[t]; - if (v != TAGVER_ZERO) { - v = ver2new[v]; - } - } + // final tag versions + tagver_t *f = cfg.dfa.finvers, *fe = f + cfg.dfa.vartags.size(); + for (; f < fe; ++f) { + *f = ver2new[*f]; } } diff --git a/re2c/src/ir/dfa/closure.cc b/re2c/src/ir/dfa/closure.cc index 2ef786ae..24defa6c 100644 --- a/re2c/src/ir/dfa/closure.cc +++ b/re2c/src/ir/dfa/closure.cc @@ -183,25 +183,25 @@ tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool, tagver_t *tags = tagpool.buffer1; std::fill(tags, tags + ntag, TAGVER_ZERO); - size_t r = 0, lt = 0, ht; + size_t r = 0; for (cclositer_t c = clos.begin(), e = clos.end(); c != e;) { const tagver_t *x = tagpool[c->tagidx]; // find next rule that occurs in closure - for (; r < c->state->rule; lt = rules[r].htag, ++r); - ht = rules[r].htag; + for (; r < c->state->rule; ++r); + const Rule &rule = rules[r]; // merge tags of the 1st item belonging to this rule - for (size_t t = lt; t < ht; ++t) { + for (size_t t = rule.lvar; t < rule.hvar; ++t) { tags[t] = x[t]; } - // check the remaining items with this for tag nondeterminism: + // check the remaining items for tag nondeterminism: // if some tag differs from that of the 1st item, then it is // nondeterministic (don't merge it, only note the conflict) for (++c; c != e && c->state->rule == r; ++c) { const tagver_t *y = tagpool[c->tagidx]; - for (size_t t = lt; t < ht; ++t) { + for (size_t t = rule.lvar; t < rule.hvar; ++t) { badtags[t] |= y[t] != x[t]; } } diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 6f81c4e1..59c731d5 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -13,10 +13,9 @@ namespace re2c { -static tagver_t vartag_maxver(const std::valarray &tags); static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol); static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol); -static void warn_bad_tags(const bool *badtags, const std::valarray &tags, +static void warn_bad_tags(const bool *badtags, const std::vector &tags, const std::valarray &rules, const std::string &cond); const size_t dfa_t::NIL = std::numeric_limits::max(); @@ -51,17 +50,23 @@ dfa_t::dfa_t(const nfa_t &nfa, : states() , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) - , tags(*nfa.tags) + , vartags(nfa.vartags) + , fixtags(nfa.fixtags) + , finvers(NULL) , tcpool(*new tcpool_t) , maxtagver(0) { - const size_t ntag = tags.size(); + const size_t ntag = vartags.size(); Tagpool tagpool(ntag); kernels_t kernels; closure_t clos1, clos2; bool *badtags = new bool[ntag](); - maxtagver = vartag_maxver(tags); + finvers = new tagver_t[ntag]; + for (size_t t = 0; t < ntag; ++t) { + finvers[t] = ++maxtagver; + } + maxtagver *= 2; clos1.push_back(clos_t(nfa.root, ZERO_TAGS)); closure(clos1, clos2, tagpool, tcpool, rules, badtags); kernels.insert(clos2); @@ -80,7 +85,9 @@ dfa_t::dfa_t(const nfa_t &nfa, const nfa_state_t *f = kernel->state[i]; if (f->type == nfa_state_t::FIN) { s->rule = f->rule; - s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], rules[s->rule].tags, ntag); + const Rule &rule = rules[s->rule]; + s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], + finvers, rule.lvar, rule.hvar, ntag); break; } } @@ -95,30 +102,19 @@ dfa_t::dfa_t(const nfa_t &nfa, } } - warn_bad_tags(badtags, tags, rules, cond); + warn_bad_tags(badtags, vartags, rules, cond); delete[] badtags; } -tagver_t vartag_maxver(const std::valarray &tags) -{ - const size_t ntag = tags.size(); - for (size_t t = ntag; t > 0; --t) { - if (tags[t - 1].type == Tag::VAR) { - return static_cast(ntag + t); - } - } - return 0; -} - void warn_bad_tags(const bool *badtags, - const std::valarray &tags, + const std::vector &tags, const std::valarray &rules, const std::string &cond) { const size_t ntags = tags.size(); for (size_t i = 0; i < ntags; ++i) { if (badtags[i]) { - const Tag &tag = tags[i]; + const VarTag &tag = tags[i]; const uint32_t line = rules[tag.rule].info->loc.line; warn.nondeterministic_tags(line, cond, tag.name); } diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index bf020235..e6492b40 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -51,7 +51,9 @@ struct dfa_t std::vector states; const size_t nchars; std::valarray &rules; - std::valarray &tags; + std::vector &vartags; + std::vector &fixtags; + tagver_t *finvers; tcpool_t &tcpool; tagver_t maxtagver; diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc index b48022e2..9afdf8d6 100644 --- a/re2c/src/ir/nfa/init_rules.cc +++ b/re2c/src/ir/nfa/init_rules.cc @@ -5,69 +5,58 @@ namespace re2c { -static void assert_no_tags_in_trailing_context(const Rule &rule, - const std::valarray &tags) -{ - // rule tags should not contain other trailing contexts - for (size_t i = rule.ltag; i < rule.htag; ++i) { - if (tags[i].name == NULL) { - error("line %u: tags in trailing context", - rule.info->loc.line); - exit(1); - } - } -} - static void assert_tags_used_once(const Rule &rule, - const std::valarray &tags) + const std::vector &vartags, const std::vector &fixtags) { std::set names; - for (size_t i = rule.ltag; i < rule.htag; ++i) { - const std::string *name = tags[i].name; - if (name && !names.insert(*name).second) { - error("line %u: tag '%s' is used multiple" - " times in the same rule", - rule.info->loc.line, name->c_str()); - exit(1); - } + const std::string *name = NULL; + + for (size_t t = rule.lvar; t < rule.hvar; ++t) { + name = vartags[t].name; + if (name && !names.insert(*name).second) goto error; } + + for (size_t t = rule.lfix; t < rule.hfix; ++t) { + name = fixtags[t].name; + if (name && !names.insert(*name).second) goto error; + } + + return; + +error: + error("line %u: tag '%s' is used multiple times in the same rule", + rule.info->loc.line, name->c_str()); + exit(1); } void init_rules(const std::vector ®exps, - std::valarray &rules, - const std::valarray &tags) + std::valarray &rules, const std::vector &vartags, + const std::vector &fixtags) { - const size_t nr = rules.size(); - const size_t nt = tags.size(); + const size_t + nr = rules.size(), + nv = vartags.size(), + nf = fixtags.size(); - for (size_t r = 0, t = 0; r < nr; ++r) { + for (size_t r = 0, v = 0, f = 0, t; r < nr; ++r) { Rule &rule = rules[r]; rule.info = regexps[r]->info; - rule.ltag = t; - for (; t < nt && tags[t].rule == r; ++t); - rule.htag = t; + rule.lvar = v; + for (; v < nv && vartags[v].rule == r; ++v); + rule.hvar = v; + + rule.lfix = f; + for (; f < nf && fixtags[f].rule == r; ++f); + rule.hfix = f; - // mark *all* variable tags, including trailing context - tagver_t *vers = new tagver_t[nt]; - std::fill(vers, vers + nt, TAGVER_ZERO); - for (size_t i = rule.ltag; i < rule.htag; ++i) { - if (tags[i].type == Tag::VAR) { - vers[i] = static_cast(i + 1); - } - } - rule.tags = vers; + for (t = rule.lvar; t < rule.hvar && vartags[t].name; ++t); + rule.tvar = t; - // tags in trailing context are forbidden (they make no sense), - // and since tags are constructed in reversed order, this implies - // that trailing context, if present, can only be the first tag - if (rule.ltag < rule.htag && tags[rule.ltag].name == NULL) { - rule.trail = rule.ltag++; - } + for (t = rule.lfix; t < rule.hfix && fixtags[t].name; ++t); + rule.tfix = t; - // sanity checks - assert_no_tags_in_trailing_context(rule, tags); - assert_tags_used_once(rule, tags); + assert_tags_used_once(rule, vartags, fixtags); } } diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index 07fe5524..048a663e 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -7,17 +7,17 @@ nfa_t::nfa_t(const std::vector ®exps) , size(0) , states(NULL) , rules(*new std::valarray(regexps.size())) - , tags(NULL) + , vartags(*new std::vector()) + , fixtags(*new std::vector()) , root(NULL) { size_t ntags = 0; max_size = counters(regexps, ntags); states = new nfa_state_t[max_size]; - tags = new std::valarray(ntags); regexps2nfa(regexps, *this); - init_rules(regexps, rules, *tags); + init_rules(regexps, rules, vartags, fixtags); } nfa_t::~nfa_t() diff --git a/re2c/src/ir/nfa/nfa.h b/re2c/src/ir/nfa/nfa.h index f0f65514..372ab215 100644 --- a/re2c/src/ir/nfa/nfa.h +++ b/re2c/src/ir/nfa/nfa.h @@ -89,7 +89,8 @@ struct nfa_t size_t size; nfa_state_t *states; std::valarray &rules; - std::valarray *tags; + std::vector &vartags; + std::vector &fixtags; nfa_state_t *root; nfa_t(const std::vector &rs); @@ -101,7 +102,8 @@ struct nfa_t size_t counters(const std::vector ®exps, size_t &ntags); void regexps2nfa(const std::vector ®exps, nfa_t &nfa); bool nullable_rule(const RegExpRule *rule); -void init_rules(const std::vector ®exps, std::valarray &rules, const std::valarray &tags); +void init_rules(const std::vector ®exps, std::valarray &rules, + const std::vector &vartags, const std::vector &fixtags); } // namespace re2c diff --git a/re2c/src/ir/nfa/regexps2nfa.cc b/re2c/src/ir/nfa/regexps2nfa.cc index 6db4e5a3..3ca3d357 100644 --- a/re2c/src/ir/nfa/regexps2nfa.cc +++ b/re2c/src/ir/nfa/regexps2nfa.cc @@ -27,9 +27,8 @@ static const size_t VARDIST = std::numeric_limits::max(); // the order of regexp traversal determines the order in which tags are // assigned indices. Splitting this in two passes would require maintaining // exactly the same order of traversal, which is fragile. -static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx, - size_t &dist, size_t &base, bool toplevel, const RegExp *re, - nfa_state_t *t) +static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &dist, + size_t &base, bool toplevel, const RegExp *re, nfa_state_t *t) { nfa_state_t *s = NULL; switch (re->type) { @@ -44,18 +43,18 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx, break; case RegExp::ALT: { nfa_state_t *s1, *s2, *t0, *t1, *t2, *q; - size_t d1 = dist, d2 = dist, i = tidx; + size_t d1 = dist, d2 = dist, i = nfa.vartags.size(); t0 = &nfa.states[nfa.size++]; - s1 = regexp2nfa(nfa, nrule, tidx, d1, base, false, re->alt.re1, t0); - for (t2 = t; i < tidx; ++i) { + s1 = regexp2nfa(nfa, nrule, d1, base, false, re->alt.re1, t0); + for (t2 = t; i < nfa.vartags.size(); ++i) { q = &nfa.states[nfa.size++]; q->make_tag(nrule, t2, i, true); t2 = q; } - s2 = regexp2nfa(nfa, nrule, tidx, d2, base, false, re->alt.re2, t2); - for (t1 = t; i < tidx; ++i) { + s2 = regexp2nfa(nfa, nrule, d2, base, false, re->alt.re2, t2); + for (t1 = t; i < nfa.vartags.size(); ++i) { q = &nfa.states[nfa.size++]; q->make_tag(nrule, t1, i, true); t1 = q; @@ -69,13 +68,13 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx, break; } case RegExp::CAT: - s = regexp2nfa(nfa, nrule, tidx, dist, base, toplevel, re->cat.re2, t); - s = regexp2nfa(nfa, nrule, tidx, dist, base, toplevel, re->cat.re1, s); + s = regexp2nfa(nfa, nrule, dist, base, toplevel, re->cat.re2, t); + s = regexp2nfa(nfa, nrule, dist, base, toplevel, re->cat.re1, s); break; case RegExp::ITER: { // see note [Kleene star is expressed in terms of plus] nfa_state_t *q = &nfa.states[nfa.size++]; - s = regexp2nfa(nfa, nrule, tidx, dist, base, false, re->iter, q); + s = regexp2nfa(nfa, nrule, dist, base, false, re->iter, q); q->make_alt(nrule, t, s); dist = VARDIST; @@ -84,35 +83,36 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx, case RegExp::TAG: { const std::string *name = re->tag; if (toplevel && dist != VARDIST) { - init_fix_tag((*nfa.tags)[tidx], nrule, name, base, dist); + FixTag fix = {name, nrule, base, dist}; + nfa.fixtags.push_back(fix); s = t; } else { + const size_t ntag = nfa.vartags.size(); + VarTag var = {name, nrule}; + nfa.vartags.push_back(var); if (toplevel) { - base = tidx; + base = ntag; dist = 0; } - init_var_tag((*nfa.tags)[tidx], nrule, name); s = &nfa.states[nfa.size++]; - s->make_tag(nrule, t, tidx, false); + s->make_tag(nrule, t, ntag, false); } if (name == NULL) dist = 0; - ++tidx; break; } } return s; } -static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, - size_t &tidx, const RegExpRule *rule) +static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, const RegExpRule *rule) { const bool generic = opts->input_api.type() == InputAPI::CUSTOM; - size_t base = Tag::NONE, dist = 0; + size_t base = FixTag::RIGHTMOST, dist = 0; nfa_state_t *s = &nfa.states[nfa.size++]; s->make_fin(nrule); - return regexp2nfa(nfa, nrule, tidx, dist, base, !generic, rule->re, s); + return regexp2nfa(nfa, nrule, dist, base, !generic, rule->re, s); } void regexps2nfa(const std::vector ®exps, nfa_t &nfa) @@ -123,11 +123,10 @@ void regexps2nfa(const std::vector ®exps, nfa_t &nfa) return; } - size_t tidx = 0; - nfa_state_t *s = regexp2nfa_rule(nfa, 0, tidx, regexps[0]); + nfa_state_t *s = regexp2nfa_rule(nfa, 0, regexps[0]); for (size_t i = 1; i < nregexps; ++i) { nfa_state_t *t = &nfa.states[nfa.size++]; - t->make_alt(i, s, regexp2nfa_rule(nfa, i, tidx, regexps[i])); + t->make_alt(i, s, regexp2nfa_rule(nfa, i, regexps[i])); s = t; } nfa.root = s; diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index 9d7437fa..e74683a0 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -34,26 +34,22 @@ struct Rule static const size_t NONE; const RuleInfo *info; - - size_t ltag; - size_t htag; - size_t trail; - tagver_t *tags; std::set shadow; - Rule() - : info(NULL) - , ltag(0) - , htag(0) - , trail(Tag::NONE) - , tags(NULL) - , shadow() - {} - ~Rule() - { - delete[] tags; - } + // variable tags + size_t lvar; // first + size_t hvar; // next to last + size_t tvar; // trailing context + // fixed tags + size_t lfix; // first + size_t hfix; // next to last + size_t tfix; // trailing context + + Rule(): info(NULL), shadow(), + lvar(0), hvar(0), tvar(0), + lfix(0), hfix(0), tfix(0) + {} FORBID_COPY(Rule); }; diff --git a/re2c/src/ir/skeleton/generate_code.cc b/re2c/src/ir/skeleton/generate_code.cc index a4342744..cbadee02 100644 --- a/re2c/src/ir/skeleton/generate_code.cc +++ b/re2c/src/ir/skeleton/generate_code.cc @@ -354,17 +354,18 @@ void emit_epilog(OutputFile &o, const std::set &names) void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid) { const std::string &name = dfa.name; - const Rule &rule = dfa.rules[rid]; - const size_t ltag = rule.ltag, htag = rule.htag, + const Rule &r = dfa.rules[rid]; + const size_t + ntag = 3 + r.hvar - r.lvar - (r.tvar != r.hvar), rkey = rule2key(rid, dfa.key_size, dfa.def_rule); - o.wind(ind).ws("status = check_key_count_").wstring(name) - .ws("(keys_count, i, ").wu64(3 + htag - ltag).ws(")\n") - .wind(ind + 1).ws(" || action_").wstring(name) + o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ") + .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name) .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")"); - for (size_t t = ltag; t < htag; ++t) { - const std::string &tag = *dfa.tags[t].name; + for (size_t t = r.lvar; t < r.hvar; ++t) { + if (t == r.tvar) continue; + const std::string &tag = *dfa.vartags[t].name; o.ws("\n").wind(ind + 1).ws(" || check_tag_").wstring(name) .ws("(&i, keys, ").wstring(tag).ws(", input, token, \"") .wstring(tag).ws("\")"); diff --git a/re2c/src/ir/skeleton/generate_data.cc b/re2c/src/ir/skeleton/generate_data.cc index b14c5f43..204125e3 100644 --- a/re2c/src/ir/skeleton/generate_data.cc +++ b/re2c/src/ir/skeleton/generate_data.cc @@ -176,28 +176,26 @@ static void write_keys(const path_t &path, const Skeleton &skel, } const size_t rule = path.node(skel, f).rule; - size_t matched = 0, ltag = 0, htag = 0; - const tagver_t *vers = NULL; + size_t matched = 0, ltag = 0, htag = 0, trail = 0; if (rule != Rule::NONE) { const Rule &r = skel.rules[rule]; - ltag = r.ltag; - htag = r.htag; - vers = r.tags; + ltag = r.lvar; + htag = r.hvar; + trail = r.tvar; // matched length might depend on tag values - const size_t t = r.trail; - if (t == Tag::NONE) { + if (trail == htag) { matched = f; } else { - assert(skel.tags[t].type == Tag::VAR); - matched = tags[vers[t]]; + assert(r.tfix == r.hfix); // no fixed trailing context + matched = tags[skel.finvers[trail]]; assert(matched != Skeleton::DEFTAG); } } // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags - const size_t nkey = 3 + htag - ltag; + const size_t nkey = 3 + htag - ltag - (trail != htag); key_t *keys = new key_t[nkey * width], *k = keys; for (size_t w = 0; w < width; ++w) { *k++ = to_le(static_cast(path.len())); @@ -205,7 +203,8 @@ static void write_keys(const path_t &path, const Skeleton &skel, *k++ = to_le(rule2key(rule, skel.defrule)); const size_t *ts = &tags[w * nver]; for (size_t t = ltag; t < htag; ++t) { - *k++ = to_le(static_cast(ts[vers[t]])); + if (t == trail) continue; + *k++ = to_le(static_cast(ts[skel.finvers[t]])); } } // dump to file diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 81aa5bb5..05c3bbf0 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -65,7 +65,8 @@ Skeleton::Skeleton( , defrule(def) , ntagver(static_cast(dfa.maxtagver) + 1) , rules(dfa.rules) - , tags(dfa.tags) + , vartags(dfa.vartags) + , finvers(dfa.finvers) { // initialize nodes const size_t nil = nodes_count - 1; diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index e8ebc336..c8c7c7dc 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -72,7 +72,8 @@ struct Skeleton size_t defrule; size_t ntagver; const std::valarray &rules; - const std::valarray &tags; + const std::vector &vartags; + const tagver_t *finvers; Skeleton(const dfa_t &dfa, const charset_t &cs, size_t def, const std::string &dfa_name, const std::string &dfa_cond, diff --git a/re2c/src/ir/tag.cc b/re2c/src/ir/tag.cc index 09254b59..90528a9e 100644 --- a/re2c/src/ir/tag.cc +++ b/re2c/src/ir/tag.cc @@ -1,34 +1,10 @@ #include -#include "src/ir/rule.h" #include "src/ir/tag.h" namespace re2c { -const size_t Tag::NONE = std::numeric_limits::max(); - -Tag::Tag() - : type(VAR) - , rule(Rule::NONE) - , name(NULL) - , fix() -{} - -void init_var_tag(Tag &tag, size_t r, const std::string *n) -{ - tag.type = Tag::VAR; - tag.rule = r; - tag.name = n; -} - -void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d) -{ - tag.type = Tag::FIX; - tag.rule = r; - tag.name = n; - tag.fix.base = b; - tag.fix.dist = d; -} +const size_t FixTag::RIGHTMOST = std::numeric_limits::max(); } // namespace re2c diff --git a/re2c/src/ir/tag.h b/re2c/src/ir/tag.h index 06b6700d..8eaceca9 100644 --- a/re2c/src/ir/tag.h +++ b/re2c/src/ir/tag.h @@ -4,7 +4,6 @@ #include #include "src/util/c99_stdint.h" -#include "src/util/forbid_copy.h" namespace re2c { @@ -14,25 +13,21 @@ typedef int32_t tagver_t; static const tagver_t TAGVER_BOTTOM = -1; // default value for tag static const tagver_t TAGVER_ZERO = 0; // absense of tag -struct Tag +struct VarTag { - static const size_t NONE; - - enum {VAR, FIX} type; - size_t rule; const std::string *name; - struct - { - size_t base; - size_t dist; - } fix; - - Tag(); - FORBID_COPY(Tag); + size_t rule; }; -void init_var_tag(Tag &tag, size_t r, const std::string *n); -void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d); +struct FixTag +{ + static const size_t RIGHTMOST; + + const std::string *name; + size_t rule; + size_t base; + size_t dist; +}; } // namespace re2c diff --git a/re2c/src/ir/tcmd.cc b/re2c/src/ir/tcmd.cc index 61fa2af6..476bc458 100644 --- a/re2c/src/ir/tcmd.cc +++ b/re2c/src/ir/tcmd.cc @@ -90,14 +90,13 @@ tagsave_t *tcpool_t::conv_to_save(const tagver_t *vers, size_t ntag) return s; } -tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag) +tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, + size_t ltag, size_t htag, size_t ntag) { tagsave_t *s = NULL; tagcopy_t *c = NULL; - for (size_t t = ntag; t-- > 0;) { + for (size_t t = ltag; t < htag; ++t) { const tagver_t v = vers[t], f = fins[t]; - if (f == TAGVER_ZERO) continue; - if (v != TAGVER_ZERO) { s = make_save(s, f, v == TAGVER_BOTTOM); } else { diff --git a/re2c/src/ir/tcmd.h b/re2c/src/ir/tcmd.h index af63bcc3..a9121fe8 100644 --- a/re2c/src/ir/tcmd.h +++ b/re2c/src/ir/tcmd.h @@ -70,7 +70,7 @@ public: tagsave_t *make_save(tagsave_t *next, tagver_t ver, bool bottom); tagcopy_t *make_copy(tagcopy_t *next, tagver_t lhs, tagver_t rhs); tagsave_t *conv_to_save(const tagver_t *vers, size_t ntag); - tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag); + tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ltag, size_t htag, size_t ntag); tcid_t insert(const tagsave_t *save, const tagcopy_t *copy); const tccmd_t &operator[](tcid_t id) const; diff --git a/re2c/test/tags/fix4.i--tags.c b/re2c/test/tags/fix4.i--tags.c index 7ba19c81..58563e15 100644 --- a/re2c/test/tags/fix4.i--tags.c +++ b/re2c/test/tags/fix4.i--tags.c @@ -61,10 +61,10 @@ yy9: } yy11: ++YYCURSOR; - p4 = YYCURSOR - 1; p3 = yyt1; - p2 = yyt1 - 1; p1 = yyt2; + p4 = YYCURSOR - 1; + p2 = yyt1 - 1; p0 = yyt2 - 1; { printf("'%.*s', '%.*s', '%.*s', '%.*s', '%.*s'\n", diff --git a/re2c/test/tags/fix4_trail.i--tags.c b/re2c/test/tags/fix4_trail.i--tags.c index 4963a7ac..8e913114 100644 --- a/re2c/test/tags/fix4_trail.i--tags.c +++ b/re2c/test/tags/fix4_trail.i--tags.c @@ -61,10 +61,10 @@ yy9: } yy11: ++YYCURSOR; - YYCURSOR -= 1; p3 = yyt1; - p2 = yyt1 - 1; p1 = yyt2; + YYCURSOR -= 1; + p2 = yyt1 - 1; p0 = yyt2 - 1; { printf("'%.*s', '%.*s', '%.*s', '%.*s', '%s'\n", diff --git a/re2c/test/tags/fix5.i--tags.c b/re2c/test/tags/fix5.i--tags.c index 42bb00a8..8f122844 100644 --- a/re2c/test/tags/fix5.i--tags.c +++ b/re2c/test/tags/fix5.i--tags.c @@ -79,10 +79,10 @@ yy12: goto yy15; yy13: p4 = yyt1; - p3 = yyt1 - 1; p2 = yyt2; - p1 = yyt2 - 1; p0 = yyt3; + p3 = yyt1 - 1; + p1 = yyt2 - 1; { printf("'%.*s', '%.*s', '%.*s', '%.*s', '%.*s'\n", p1 - p0, p0, diff --git a/re2c/test/tags/fix5_trail.i--tags.c b/re2c/test/tags/fix5_trail.i--tags.c index a21476e8..ee57949a 100644 --- a/re2c/test/tags/fix5_trail.i--tags.c +++ b/re2c/test/tags/fix5_trail.i--tags.c @@ -79,10 +79,10 @@ yy12: goto yy15; yy13: YYCURSOR = yyt1; - p3 = yyt1 - 1; p2 = yyt2; - p1 = yyt2 - 1; p0 = yyt3; + p3 = yyt1 - 1; + p1 = yyt2 - 1; { printf("'%.*s', '%.*s', '%.*s', '%.*s', '%s'\n", p1 - p0, p0, -- 2.50.0