From de3f9e70a45c42fcb848a347ece3a727b8fb983e Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 9 May 2016 15:13:42 +0100 Subject: [PATCH] Keep fixed and variable tags together in one array. This complicates tag deduplication a little (fixed tags have to be masked), but simplifies tag initialization and rule handling. --- re2c/Makefile.am | 2 +- re2c/src/codegen/emit.h | 2 + re2c/src/codegen/emit_action.cc | 42 ++++----- re2c/src/codegen/emit_dfa.cc | 25 ++++- re2c/src/codegen/go_emit.cc | 3 +- re2c/src/codegen/input_api.cc | 29 +++--- re2c/src/codegen/input_api.h | 10 +- re2c/src/conf/warn.cc | 6 +- re2c/src/conf/warn.h | 2 +- re2c/src/ir/adfa/adfa.cc | 6 +- re2c/src/ir/adfa/adfa.h | 3 +- re2c/src/ir/ctx.cc | 38 ++++---- re2c/src/ir/ctx.h | 76 ++++------------ re2c/src/ir/dfa/context_deduplication.cc | 46 ++++++---- re2c/src/ir/dfa/determinization.cc | 23 +++-- re2c/src/ir/dfa/dfa.h | 3 +- .../ir/nfa/{sizeof_regexps.cc => counters.cc} | 17 ++-- re2c/src/ir/nfa/init_rules.cc | 91 ++++++------------- re2c/src/ir/nfa/make_tags.cc | 55 +++++------ re2c/src/ir/nfa/nfa.cc | 19 ++-- re2c/src/ir/nfa/nfa.h | 24 ++--- re2c/src/ir/nfa/regexps2nfa.cc | 27 +++--- re2c/src/ir/rule.h | 16 ++-- re2c/src/ir/skeleton/path.h | 21 ++--- re2c/src/ir/skeleton/skeleton.cc | 4 +- re2c/src/ir/skeleton/skeleton.h | 2 +- 26 files changed, 264 insertions(+), 328 deletions(-) rename re2c/src/ir/nfa/{sizeof_regexps.cc => counters.cc} (54%) diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 721202c9..bcf79e5d 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -82,12 +82,12 @@ SRC = \ src/conf/msg.cc \ src/conf/opt.cc \ src/conf/warn.cc \ + src/ir/nfa/counters.cc \ src/ir/nfa/init_rules.cc \ src/ir/nfa/make_tags.cc \ src/ir/nfa/nfa.cc \ src/ir/nfa/nullable.cc \ src/ir/nfa/regexps2nfa.cc \ - src/ir/nfa/sizeof_regexps.cc \ src/ir/adfa/adfa.cc \ src/ir/adfa/prepare.cc \ src/ir/dfa/context_deduplication.cc \ diff --git a/re2c/src/codegen/emit.h b/re2c/src/codegen/emit.h index f094354f..c08af69f 100644 --- a/re2c/src/codegen/emit.h +++ b/re2c/src/codegen/emit.h @@ -15,6 +15,8 @@ void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh, void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh, const State *to, const DFA &dfa, size_t tags); void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags); +std::string vartag_name(const std::string *name, size_t rule); +std::string vartag_expr(const std::string *name, size_t rule); } // namespace re2c diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index aae5bdf5..b60ccc6e 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -209,20 +209,15 @@ void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, o.wind(ind).ws("}\n"); } -static void subst_contexts(std::string &action, const Rule &rule, - const std::vector &vartags, - const std::vector &fixtags) +static void subst_contexts(std::string &action, + const Rule &rule, const std::valarray &tags) { - for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { - const CtxVar &ctx = vartags[i]; - strrreplace(action, "@" + *ctx.codename, - opts->input_api.expr_ctx(ctx.expr())); - } - - for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { - const CtxFix &ctx = fixtags[i]; - strrreplace(action, "@" + *ctx.codename, - opts->input_api.expr_ctx_fix(ctx, vartags)); + for (size_t i = rule.ltag; i < rule.htag; ++i) { + const Tag &tag = tags[i]; + const std::string s = tag.type == Tag::VAR + ? opts->input_api.expr_ctx(vartag_expr(tag.name, tag.rule)) + : opts->input_api.expr_ctx_fix(tag, tags); + strrreplace(action, "@" + *tag.name, s); } } @@ -231,21 +226,18 @@ void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) const Rule &rule = dfa.rules[rule_idx]; const RuleInfo *info = rule.info; - const Trail &trail = rule.trail; - switch (trail.type) { - case Trail::NONE: break; - case Trail::VAR: + if (rule.trail != Tag::NONE) { + const Tag &tag = dfa.tags[rule.trail]; + if (tag.type == Tag::VAR) { if (dfa.base_ctxmarker) { o.wstring(opts->input_api.stmt_restorectx_var_base(ind, - dfa.vartags[trail.var].expr())); + vartag_expr(tag.name, tag.rule))); } else { o.wstring(opts->input_api.stmt_restorectx_var(ind)); } - break; - case Trail::FIX: - o.wstring(opts->input_api.stmt_restorectx_fix(ind, - dfa.fixtags[trail.fix].dist)); - break; + } else { + o.wstring(opts->input_api.stmt_restorectx_fix(ind, tag.fix.dist)); + } } if (opts->target == opt_t::SKELETON) { @@ -261,7 +253,7 @@ void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) o.wind(ind).wstring(yySetupRule).ws("\n"); } std::string action = code->text; - subst_contexts(action, rule, dfa.vartags, dfa.fixtags); + subst_contexts(action, rule, dfa.tags); o.wline_info(code->loc.line, code->loc.filename.c_str()) .wind(ind).wstring(action).ws("\n") .wdelay_line_info(); @@ -398,7 +390,7 @@ void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags) if (tags != 0) { if (dfa.base_ctxmarker) { o.wstring(opts->input_api.stmt_dist(ind, - dfa.tagpool[tags], dfa.vartags)); + dfa.tagpool[tags], dfa.tags)); } else { o.wstring(opts->input_api.stmt_backupctx(ind)); } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 66e3a150..aa6d7299 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -145,8 +145,11 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra std::set ctxnames; if (base_ctxmarker) { - for (size_t i = 0; i < vartags.size(); ++i) { - ctxnames.insert(vartags[i].name()); + for (size_t i = 0; i < tags.size(); ++i) { + const Tag &t = tags[i]; + if (t.type == Tag::VAR && t.var.orig == i) { + ctxnames.insert(vartag_name(t.name, t.rule)); + } } ob.contexts.insert(ctxnames.begin(), ctxnames.end()); } @@ -364,4 +367,22 @@ void genCondGoto(OutputFile & o, uint32_t ind, const std::vector & bWroteCondCheck = true; } +std::string vartag_name(const std::string *name, size_t rule) +{ + std::ostringstream s; + s << opts->contexts_prefix << rule; + if (name != NULL) { + s << *name; + } + return s.str(); +} + +std::string vartag_expr(const std::string *name, size_t rule) +{ + const std::string s = vartag_name(name, rule); + std::string e = opts->contexts_expr; + strrreplace(e, "@@", s); + return e; +} + } // end namespace re2c diff --git a/re2c/src/codegen/go_emit.cc b/re2c/src/codegen/go_emit.cc index e86b70f2..1593d826 100644 --- a/re2c/src/codegen/go_emit.cc +++ b/re2c/src/codegen/go_emit.cc @@ -221,7 +221,8 @@ void Dot::emit(OutputFile &o, const DFA &dfa) const bool *tags = dfa.tagpool[c.tags]; for (size_t j = 0; j < dfa.tagpool.ntags; ++j) { if (tags[j]) { - o.ws("<").wstring(dfa.vartags[j].name()).ws(">"); + const Tag &t = dfa.tags[dfa.tags[j].var.orig]; + o.ws("<").wstring(vartag_name(t.name, t.rule)).ws(">"); } } o.ws("\"]\n"); diff --git a/re2c/src/codegen/input_api.cc b/re2c/src/codegen/input_api.cc index 659c4d43..1aab82bf 100644 --- a/re2c/src/codegen/input_api.cc +++ b/re2c/src/codegen/input_api.cc @@ -1,6 +1,7 @@ #include #include +#include "src/codegen/emit.h" #include "src/codegen/input_api.h" #include "src/codegen/indent.h" #include "src/conf/opt.h" @@ -107,44 +108,46 @@ std::string InputAPI::expr_dist () const return s; } -std::string InputAPI::stmt_dist (uint32_t ind, const bool *tags, - const std::vector &contexts) const +std::string InputAPI::stmt_dist (uint32_t ind, const bool *mask, + const std::valarray &tags) const { std::string s = indent(ind); - for (size_t i = 0; i < contexts.size(); ++i) { - if (tags[i]) { - s += contexts[i].expr() + " = "; + for (size_t i = 0; i < tags.size(); ++i) { + if (mask[i]) { + const Tag &t = tags[tags[i].var.orig]; + s += vartag_expr(t.name, t.rule) + " = "; } } return s + expr_dist() + ";\n"; } -std::string InputAPI::expr_ctx(const std::string &ctx) const +std::string InputAPI::expr_ctx(const std::string &var) const { switch (type_) { - case DEFAULT: return "(" + opts->yyctxmarker + " + " + ctx + ")"; - case CUSTOM: return opts->yyctx + "(" + ctx + ")"; + case DEFAULT: return "(" + opts->yyctxmarker + " + " + var + ")"; + case CUSTOM: return opts->yyctx + "(" + var + ")"; default: assert(false); } } -std::string InputAPI::expr_ctx_fix(const CtxFix &ctx, const std::vector &ctxvars) const +std::string InputAPI::expr_ctx_fix(const Tag &tag, const std::valarray &tags) const { std::ostringstream s; - if (ctx.base == CtxFix::RIGHTMOST) { + if (tag.fix.base == Tag::NONE) { switch (type_) { case DEFAULT: // optimize '(YYCTXMARKER + ((YYCURSOR - YCTXMARKER) - yyctx))' // to '(YYCURSOR - yyctx)' - s << "(" << opts->yycursor << " - " << ctx.dist << ")"; + s << "(" << opts->yycursor << " - " << tag.fix.dist << ")"; break; case CUSTOM: - s << opts->yyctx << "(" << opts->yydist << "() - " << ctx.dist << ")"; + s << opts->yyctx << "(" << opts->yydist << "() - " << tag.fix.dist << ")"; break; } return s.str(); } else { - s << "(" << ctxvars[ctx.base].expr() << " - " << ctx.dist << ")"; + const Tag &t = tags[tags[tag.fix.base].var.orig]; + s << "(" << vartag_expr(t.name, t.rule) << " - " << tag.fix.dist << ")"; return expr_ctx(s.str()); } } diff --git a/re2c/src/codegen/input_api.h b/re2c/src/codegen/input_api.h index 4ef77a0f..47e08f12 100644 --- a/re2c/src/codegen/input_api.h +++ b/re2c/src/codegen/input_api.h @@ -4,7 +4,7 @@ #include "src/util/c99_stdint.h" #include #include -#include +#include #include "src/ir/ctx.h" @@ -33,10 +33,10 @@ public: std::string stmt_backup (uint32_t ind) const; std::string stmt_backupctx (uint32_t ind) const; std::string expr_dist () const; - std::string stmt_dist (uint32_t ind, const bool *tags, - const std::vector &contexts) const; - std::string expr_ctx (const std::string &ctx) const; - std::string expr_ctx_fix (const CtxFix &ctx, const std::vector &ctxvars) const; + std::string stmt_dist (uint32_t ind, const bool *tagmask, + const std::valarray &tags) const; + std::string expr_ctx(const std::string &var) const; + std::string expr_ctx_fix(const Tag &tag, const std::valarray &tags) const; std::string stmt_restore (uint32_t ind) const; std::string stmt_restorectx_fix (uint32_t ind, size_t dist) const; std::string stmt_restorectx_var (uint32_t ind) const; diff --git a/re2c/src/conf/warn.cc b/re2c/src/conf/warn.cc index b9c9be43..8c55a1f2 100644 --- a/re2c/src/conf/warn.cc +++ b/re2c/src/conf/warn.cc @@ -121,7 +121,7 @@ void Warn::match_empty_string (uint32_t line) void Warn::selfoverlapping_contexts( uint32_t line, const std::string &cond, - const CtxVar &ctx) + const std::string *tagname) { if (mask[SELFOVERLAPPING_CONTEXTS] & WARNING) { @@ -129,12 +129,12 @@ void Warn::selfoverlapping_contexts( error_accuml |= e; const char *trail, *name; - if (ctx.codename == NULL) { + if (tagname == NULL) { trail = "trailing context"; name = ""; } else { trail = "context "; - name = ctx.codename->c_str(); + name = tagname->c_str(); } warning(names[SELFOVERLAPPING_CONTEXTS], line, e, "%s%s %sis self-overlapping", trail, name, diff --git a/re2c/src/conf/warn.h b/re2c/src/conf/warn.h index 72089026..915c2a0b 100644 --- a/re2c/src/conf/warn.h +++ b/re2c/src/conf/warn.h @@ -59,7 +59,7 @@ public: void condition_order (uint32_t line); void empty_class (uint32_t line); void match_empty_string (uint32_t line); - void selfoverlapping_contexts(uint32_t line, const std::string &cond, const CtxVar &ctx); + void selfoverlapping_contexts(uint32_t line, const std::string &cond, const std::string *tagname); void swapped_range (uint32_t line, uint32_t l, uint32_t u); void undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow); void unreachable_rule (const std::string & cond, const Rule &rule); diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 2e4cb308..30546840 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -36,8 +36,7 @@ DFA::DFA , nStates(0) , head(NULL) , rules(dfa.rules) - , vartags(dfa.vartags) - , fixtags(dfa.fixtags) + , tags(dfa.tags) , tagpool(dfa.tagpool) // statistics @@ -117,8 +116,7 @@ DFA::~DFA() delete skeleton; delete &rules; - delete &vartags; - delete &fixtags; + delete &tags; delete &tagpool; } diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index e19fefa8..7c3d8205 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -67,8 +67,7 @@ struct DFA uint32_t nStates; State * head; std::valarray &rules; - std::vector &vartags; - std::vector &fixtags; + std::valarray &tags; Tagpool &tagpool; size_t max_fill; bool need_backup; diff --git a/re2c/src/ir/ctx.cc b/re2c/src/ir/ctx.cc index c079f4b2..43a0bb0d 100644 --- a/re2c/src/ir/ctx.cc +++ b/re2c/src/ir/ctx.cc @@ -9,31 +9,29 @@ namespace re2c { -CtxVar::CtxVar(const std::string *n, size_t r) - : rule(r) - , codename(n) - , uniqname() -{ - std::ostringstream s; - s << rule; - if (codename != NULL) { - s << *codename; - } - uniqname = s.str(); -} +const size_t Tag::NONE = std::numeric_limits::max(); -std::string CtxVar::name() const +Tag::Tag() + : type(VAR) + , rule(Rule::NONE) + , name(NULL) +{} + +void init_var_tag(Tag &tag, size_t r, const std::string *n, size_t o) { - return opts->contexts_prefix + uniqname; + tag.type = Tag::VAR; + tag.rule = r; + tag.name = n; + tag.var.orig = o; } -std::string CtxVar::expr() const +void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d) { - std::string e = opts->contexts_expr; - strrreplace(e, "@@", opts->contexts_prefix + uniqname); - return e; + tag.type = Tag::FIX; + tag.rule = r; + tag.name = n; + tag.fix.base = b; + tag.fix.dist = d; } -const size_t CtxFix::RIGHTMOST = std::numeric_limits::max(); - } // namespace re2c diff --git a/re2c/src/ir/ctx.h b/re2c/src/ir/ctx.h index 9449e047..b982d319 100644 --- a/re2c/src/ir/ctx.h +++ b/re2c/src/ir/ctx.h @@ -1,76 +1,40 @@ #ifndef _RE2C_IR_CTX_ #define _RE2C_IR_CTX_ -#include #include -namespace re2c -{ +#include "src/util/forbid_copy.h" -static const size_t NO_TAG = std::numeric_limits::max(); - -struct CtxVar +namespace re2c { - size_t rule; - const std::string *codename; - std::string uniqname; - - CtxVar(const std::string *n, size_t r); - CtxVar(const CtxVar &ctx) - : rule(ctx.rule) - , codename(ctx.codename) - , uniqname(ctx.uniqname) - {} - CtxVar& operator=(const CtxVar &ctx) - { - rule = ctx.rule; - codename = ctx.codename; - uniqname = ctx.uniqname; - return *this; - } - std::string name() const; - std::string expr() const; -}; -struct CtxFix +struct Tag { - static const size_t RIGHTMOST; + static const size_t NONE; + enum {VAR, FIX} type; size_t rule; - const std::string *codename; - size_t base; - size_t dist; - - CtxFix(const std::string *n, size_t r, size_t b, size_t d) - : rule(r) - , codename(n) - , base(b) - , dist(d) - {} -}; - -struct Trail -{ - enum {NONE, VAR, FIX} type; + const std::string *name; union { - size_t var; - size_t fix; + struct + { + size_t orig; + } var; + struct + { + size_t base; + size_t dist; + } fix; }; - Trail(): type(NONE) {} - void make_var(size_t v) - { - type = VAR; - var = v; - } - void make_fix(size_t f) - { - type = FIX; - fix = f; - } + Tag(); + FORBID_COPY(Tag); }; +void init_var_tag(Tag &tag, size_t r, const std::string *n, size_t o); +void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d); + } // namespace re2c #endif // _RE2C_IR_CTX_ diff --git a/re2c/src/ir/dfa/context_deduplication.cc b/re2c/src/ir/dfa/context_deduplication.cc index 084cd950..426a7c74 100644 --- a/re2c/src/ir/dfa/context_deduplication.cc +++ b/re2c/src/ir/dfa/context_deduplication.cc @@ -53,7 +53,7 @@ static void calc_live(const dfa_t &dfa, visited[i] = true; dfa_state_t *s = dfa.states[i]; - const size_t ntags = dfa.vartags.size(); + const size_t ntags = dfa.tags.size(); // add tags before recursing to child states, // so that tags propagate into loopbacks to this state @@ -89,7 +89,7 @@ static void mask_dead(dfa_t &dfa, const bool *livetags) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.vartags.size(); + const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { @@ -130,7 +130,7 @@ static void incompatibility_table(const dfa_t &dfa, bool *incompattbl) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.vartags.size(); + const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < nstates; ++i) { const dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { @@ -156,6 +156,18 @@ static void incompatibility_table(const dfa_t &dfa, } } } + + // fixed tags should not participate in deduplication, so + // each fixed tag is incompatible with all other tags + for (size_t i = 0; i < ntags; ++i) { + if (dfa.tags[i].type == Tag::FIX) { + for (size_t j = 0; j < ntags; ++j) { + incompattbl[i * ntags + j] + = incompattbl[j * ntags + i] + = j != i; + } + } + } } /* We have a binary relation on the set of all tags @@ -170,7 +182,7 @@ static void incompatibility_table(const dfa_t &dfa, * The algorithm takes quadratic (in the number of tags) time. * static void equivalence_classes(const std::vector &incompattbl, */ -static size_t equivalence_classes(const bool *incompattbl, +static void equivalence_classes(const bool *incompattbl, size_t ntags, std::vector &represent) { static const size_t END = std::numeric_limits::max(); @@ -201,14 +213,6 @@ static size_t equivalence_classes(const bool *incompattbl, head[0] = c; } } - - size_t nreps = 0; - for (size_t i = 0; i < ntags; ++i) { - if (represent[i] == i) { - ++nreps; - } - } - return nreps; } static size_t patch_tagset(Tagpool &tagpool, size_t oldidx, @@ -237,16 +241,19 @@ static void patch_tags(dfa_t &dfa, const std::vector &represent) s->rule_tags = patch_tagset(dfa.tagpool, s->rule_tags, represent); } - const size_t ntags = dfa.vartags.size(); + const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < ntags; ++i) { - dfa.vartags[i].uniqname = dfa.vartags[represent[i]].uniqname; + Tag &t = dfa.tags[i]; + if (t.type == Tag::VAR) { + t.var.orig = represent[i]; + } } } size_t deduplicate_contexts(dfa_t &dfa, const std::vector &fallback) { - const size_t ntags = dfa.vartags.size(); + const size_t ntags = dfa.tags.size(); if (ntags == 0) { return 0; } @@ -268,7 +275,14 @@ size_t deduplicate_contexts(dfa_t &dfa, incompatibility_table(dfa, live, fbctxs, incompattbl); std::vector represent(ntags, 0); - const size_t nreps = equivalence_classes(incompattbl, ntags, represent); + equivalence_classes(incompattbl, ntags, represent); + + size_t nreps = 0; + for (size_t i = 0; i < ntags; ++i) { + if (dfa.tags[i].type == Tag::VAR && represent[i] == i) { + ++nreps; + } + } if (nreps < ntags) { patch_tags(dfa, represent); diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 0f982050..90d5255e 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -136,11 +136,10 @@ dfa_t::dfa_t( : states() , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) - , vartags(nfa.vartags) - , fixtags(nfa.fixtags) - , tagpool(*new Tagpool(vartags.size())) + , tags(*nfa.tags) + , tagpool(*new Tagpool(tags.size())) { - const size_t ntags = vartags.size(); + const size_t ntags = tags.size(); const size_t nrules = rules.size(); const size_t mask_size = (nchars + 1) * ntags; @@ -148,7 +147,7 @@ dfa_t::dfa_t( kitem_t *kstart = new kitem_t[nfa.size], *kend = kstart; bool *ktags = new bool[ntags](); bool *badtags = new bool[ntags](); - bool *tags = new bool[mask_size]; + bool *arctags = new bool[mask_size]; bool *mask = new bool[mask_size]; bool *fin = new bool[nrules]; std::vector *arcs = new std::vector[nchars]; @@ -157,7 +156,7 @@ dfa_t::dfa_t( find_state(kstart, kend, kernels, tagpool); for (size_t i = 0; i < kernels.size(); ++i) { memset(fin, 0, nrules * sizeof(bool)); - memset(tags, 0, mask_size * sizeof(bool)); + memset(arctags, 0, mask_size * sizeof(bool)); memset(mask, 0, mask_size * sizeof(bool)); for(size_t c = 0; c < nchars; ++c) { arcs[c].clear(); @@ -178,7 +177,7 @@ dfa_t::dfa_t( for (const Range *r = n->value.ran.ran; r; r = r->next ()) { for (; charset[c] != r->lower(); ++c); for (; charset[c] != r->upper(); ++c) { - merge_tags_with_mask(&tags[c * ntags], newtags, + merge_tags_with_mask(&arctags[c * ntags], newtags, &mask[c * ntags], rules[m->rule].tags, badtags, ntags); arcs[c].push_back(m); @@ -187,7 +186,7 @@ dfa_t::dfa_t( break; } case nfa_state_t::FIN: - merge_tags_with_mask(&tags[nchars * ntags], newtags, + merge_tags_with_mask(&arctags[nchars * ntags], newtags, &mask[nchars * ntags], rules[n->rule].tags, badtags, ntags); fin[n->rule] = true; @@ -205,9 +204,9 @@ dfa_t::dfa_t( closure(kstart, kend, a[j], ktags, badtags, ntags); } s->arcs[c] = find_state(kstart, kend, kernels, tagpool); - s->tags[c] = tagpool.insert(&tags[c * ntags]); + s->tags[c] = tagpool.insert(&arctags[c * ntags]); } - s->rule_tags = tagpool.insert(&tags[nchars * ntags]); + s->rule_tags = tagpool.insert(&arctags[nchars * ntags]); // choose the first rule (the one with smallest rank) size_t r; @@ -228,14 +227,14 @@ dfa_t::dfa_t( for (size_t i = 0; i < ntags; ++i) { if (badtags[i]) { // TODO: use rule line, add rule reference to context struct - warn.selfoverlapping_contexts(line, cond, vartags[i]); + warn.selfoverlapping_contexts(line, cond, tags[i].name); } } delete[] kstart; delete[] ktags; delete[] badtags; - delete[] tags; + delete[] arctags; delete[] mask; delete[] fin; delete[] arcs; diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index f34b3028..a7eda5b9 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -46,8 +46,7 @@ struct dfa_t std::vector states; const size_t nchars; std::valarray &rules; - std::vector &vartags; - std::vector &fixtags; + std::valarray &tags; Tagpool &tagpool; dfa_t(const nfa_t &nfa, const charset_t &charset, diff --git a/re2c/src/ir/nfa/sizeof_regexps.cc b/re2c/src/ir/nfa/counters.cc similarity index 54% rename from re2c/src/ir/nfa/sizeof_regexps.cc rename to re2c/src/ir/nfa/counters.cc index 1dde2514..fb5f6569 100644 --- a/re2c/src/ir/nfa/sizeof_regexps.cc +++ b/re2c/src/ir/nfa/counters.cc @@ -2,7 +2,7 @@ namespace re2c { -static size_t sizeof_regexp(const RegExp *re) +static size_t count(const RegExp *re, size_t &ntags) { switch (re->type) { case RegExp::NIL: @@ -10,28 +10,29 @@ static size_t sizeof_regexp(const RegExp *re) case RegExp::SYM: return 1; case RegExp::ALT: - return sizeof_regexp(re->alt.re1) - + sizeof_regexp(re->alt.re2) + return count(re->alt.re1, ntags) + + count(re->alt.re2, ntags) + 1; case RegExp::CAT: - return sizeof_regexp(re->cat.re1) - + sizeof_regexp(re->cat.re2); + return count(re->cat.re1, ntags) + + count(re->cat.re2, ntags); case RegExp::ITER: - return sizeof_regexp(re->iter) + return count(re->iter, ntags) + 1; case RegExp::TAG: + ++ntags; return 1; default: assert(false); } } -size_t sizeof_regexps(const std::vector ®exps) +size_t counters(const std::vector ®exps, size_t &ntags) { const size_t nregexps = regexps.size(); size_t size = nregexps - 1; for (size_t i = 0; i < nregexps; ++i) { - size += sizeof_regexp(regexps[i]->re) + 1; + size += count(regexps[i]->re, ntags) + 1; } return size; } diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc index 58fe0ccf..921714c7 100644 --- a/re2c/src/ir/nfa/init_rules.cc +++ b/re2c/src/ir/nfa/init_rules.cc @@ -5,101 +5,66 @@ namespace re2c { -static void fatal_tags_in_trail(uint32_t line) -{ - error("line %u: tags in trailing context", line); - exit(1); -} - static void assert_no_tags_in_trailing_context(const Rule &rule, - const std::vector &vartags, - const std::vector &fixtags) + const std::valarray &tags) { - const uint32_t line = rule.info->loc.line; // rule tags should not contain other trailing contexts - for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { - if (fixtags[i].codename == NULL) { - fatal_tags_in_trail(line); + for (size_t i = rule.ltag; i < rule.htag; ++i) { + if (tags[i].name == NULL) { + error("line %u: tags in trailing context", + rule.info->loc.line); + exit(1); } } - for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { - if (vartags[i].codename == NULL) { - fatal_tags_in_trail(line); - } - } - // fixed trailing context must be fixed on cursor - if (rule.trail.type == Trail::FIX - && fixtags[rule.trail.fix].base != CtxFix::RIGHTMOST) { - fatal_tags_in_trail(line); - } -} - -static void fatal_tag_reuse(uint32_t line, const char *tag) -{ - error("line %u: tag '%s' is used multiple times in the same rule", line, tag); - exit(1); } static void assert_tags_used_once(const Rule &rule, - const std::vector &vartags, - const std::vector &fixtags) + const std::valarray &tags) { - const uint32_t line = rule.info->loc.line; std::set names; - for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { - const std::string *name = fixtags[i].codename; - if (name && !names.insert(*name).second) { - fatal_tag_reuse(line, name->c_str()); - } - } - for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { - const std::string *name = vartags[i].codename; + for (size_t i = rule.ltag; i < rule.htag; ++i) { + const std::string *name = tags[i].name; if (name && !names.insert(*name).second) { - fatal_tag_reuse(line, name->c_str()); + error("line %u: tag '%s' is used multiple" + " times in the same rule", + rule.info->loc.line, name->c_str()); + exit(1); } } } -void init_rules(std::valarray &rules, - const std::vector ®exps, - const std::vector &vartags, - const std::vector &fixtags) +void init_rules(const std::vector ®exps, + std::valarray &rules, + const std::valarray &tags) { - const size_t nf = fixtags.size(); - const size_t nv = vartags.size(); const size_t nr = rules.size(); + const size_t nt = tags.size(); - for (size_t r = 0, f = 0, v = 0; r < nr; ++r) { + for (size_t r = 0, t = 0; r < nr; ++r) { Rule &rule = rules[r]; rule.info = regexps[r]->info; rule.nullable = nullable_rule(regexps[r]); - rule.lfixtag = f; - for (; f < nf && fixtags[f].rule == r; ++f); - rule.hfixtag = f; - - rule.lvartag = v; - for (; v < nv && vartags[v].rule == r; ++v); - rule.hvartag = v; + rule.ltag = t; + for (; t < nt && tags[t].rule == r; ++t); + rule.htag = t; // mark *all* variable tags, including trailing context - rule.tags = new bool[nv](); - for (size_t t = rule.lvartag; t < rule.hvartag; ++t) { - rule.tags[t] = true; + rule.tags = new bool[nt](); + for (size_t i = rule.ltag; i < rule.htag; ++i) { + rule.tags[i] = tags[i].type == Tag::VAR; } // tags in trailing context are forbidden (they make no sense), // and since tags are constructed in reversed order, this implies // that trailing context, if present, can only be the first tag - if (rule.lfixtag < rule.hfixtag && fixtags[rule.lfixtag].codename == NULL) { - rule.trail.make_fix(rule.lfixtag++); - } else if (rule.lvartag < rule.hvartag && vartags[rule.lvartag].codename == NULL) { - rule.trail.make_var(rule.lvartag++); + if (rule.ltag < rule.htag && tags[rule.ltag].name == NULL) { + rule.trail = rule.ltag++; } // sanity checks - assert_no_tags_in_trailing_context(rule, vartags, fixtags); - assert_tags_used_once(rule, vartags, fixtags); + assert_no_tags_in_trailing_context(rule, tags); + assert_tags_used_once(rule, tags); } } diff --git a/re2c/src/ir/nfa/make_tags.cc b/re2c/src/ir/nfa/make_tags.cc index e82e0913..d1df82b2 100644 --- a/re2c/src/ir/nfa/make_tags.cc +++ b/re2c/src/ir/nfa/make_tags.cc @@ -9,10 +9,8 @@ namespace re2c { static const size_t VARDIST = std::numeric_limits::max(); static void make_tags_var(size_t nrule, - std::vector &vartags, - std::vector &tagidxs, - const RegExp *re, - size_t &dist) + std::valarray &tags, size_t &tagidx, + const RegExp *re, size_t &dist) { switch (re->type) { case RegExp::NIL: break; @@ -23,54 +21,50 @@ static void make_tags_var(size_t nrule, break; case RegExp::ALT: { size_t d1 = dist, d2 = dist; - make_tags_var(nrule, vartags, tagidxs, re->alt.re1, d1); - make_tags_var(nrule, vartags, tagidxs, re->alt.re2, d2); + make_tags_var(nrule, tags, tagidx, re->alt.re1, d1); + make_tags_var(nrule, tags, tagidx, re->alt.re2, d2); dist = (d1 == d2) ? d1 : VARDIST; break; } case RegExp::CAT: - make_tags_var(nrule, vartags, tagidxs, re->cat.re2, dist); - make_tags_var(nrule, vartags, tagidxs, re->cat.re1, dist); + make_tags_var(nrule, tags, tagidx, re->cat.re2, dist); + make_tags_var(nrule, tags, tagidx, re->cat.re1, dist); break; case RegExp::ITER: dist = VARDIST; - make_tags_var(nrule, vartags, tagidxs, re->iter, dist); + make_tags_var(nrule, tags, tagidx, re->iter, dist); break; - case RegExp::TAG: - tagidxs.push_back(vartags.size()); - vartags.push_back(CtxVar(re->tag, nrule)); + case RegExp::TAG: { + const size_t orig = tagidx; + init_var_tag(tags[tagidx++], nrule, re->tag, orig); break; + } } } static void make_tags_var_fix(size_t nrule, - std::vector &vartags, - std::vector &fixtags, - std::vector &tagidxs, - const RegExp *re, - size_t &dist, - size_t &base) + std::valarray &tags, size_t &tagidx, + const RegExp *re, size_t &dist, size_t &base) { switch (re->type) { case RegExp::NIL: case RegExp::SYM: case RegExp::ALT: case RegExp::ITER: - make_tags_var(nrule, vartags, tagidxs, re, dist); + make_tags_var(nrule, tags, tagidx, re, dist); break; case RegExp::CAT: - make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re2, dist, base); - make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re1, dist, base); + make_tags_var_fix(nrule, tags, tagidx, re->cat.re2, dist, base); + make_tags_var_fix(nrule, tags, tagidx, re->cat.re1, dist, base); break; case RegExp::TAG: { const std::string *name = re->tag; if (dist == VARDIST) { - tagidxs.push_back(base = vartags.size()); - vartags.push_back(CtxVar(name, nrule)); + base = tagidx; + init_var_tag(tags[tagidx++], nrule, name, base); dist = 0; } else { - tagidxs.push_back(NO_TAG); - fixtags.push_back(CtxFix(name, nrule, base, dist)); + init_fix_tag(tags[tagidx++], nrule, name, base, dist); } if (name == NULL) { dist = 0; @@ -101,14 +95,11 @@ static void make_tags_var_fix(size_t nrule, * calculate fixed tag value based on initialized value * (and spoil default value expected by the programmer). */ -void make_tags(const std::vector &rs, - std::vector &vartags, - std::vector &fixtags, - std::vector &tagidxs) +void make_tags(const std::vector &rs, std::valarray &tags) { const size_t nrs = rs.size(); - for (size_t i = 0; i < nrs; ++i) { - size_t base = CtxFix::RIGHTMOST, dist = 0; + for (size_t i = 0, tagidx = 0; i < nrs; ++i) { + size_t base = Tag::NONE, dist = 0; // don't optimize fixed-length trailing context with generic API // unless tags are explicitly enabled: generic API needs base tag // to restore fixed-length trailing context, and base existence @@ -116,7 +107,7 @@ void make_tags(const std::vector &rs, if (!opts->contexts && opts->input_api.type() == InputAPI::CUSTOM) { dist = VARDIST; } - make_tags_var_fix(i, vartags, fixtags, tagidxs, rs[i]->re, dist, base); + make_tags_var_fix(i, tags, tagidx, rs[i]->re, dist, base); } } diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index ed8c9e84..b2dc638a 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -3,20 +3,23 @@ namespace re2c { nfa_t::nfa_t(const std::vector ®exps) - : max_size(sizeof_regexps(regexps)) + : max_size(0) , size(0) - , states(new nfa_state_t[max_size]) + , states(NULL) , rules(*new std::valarray(regexps.size())) - , vartags(*new std::vector) - , fixtags(*new std::vector) + , tags(NULL) , root(NULL) { - std::vector tagidxs; - make_tags(regexps, vartags, fixtags, tagidxs); + size_t ntags = 0; + max_size = counters(regexps, ntags); - regexps2nfa(regexps, *this, tagidxs.begin()); + tags = new std::valarray(ntags); + make_tags(regexps, *tags); - init_rules(rules, regexps, vartags, fixtags); + states = new nfa_state_t[max_size]; + regexps2nfa(regexps, *this); + + init_rules(regexps, rules, *tags); } nfa_t::~nfa_t() diff --git a/re2c/src/ir/nfa/nfa.h b/re2c/src/ir/nfa/nfa.h index 729b355e..cccf62ef 100644 --- a/re2c/src/ir/nfa/nfa.h +++ b/re2c/src/ir/nfa/nfa.h @@ -77,12 +77,11 @@ struct nfa_state_t struct nfa_t { - const size_t max_size; + size_t max_size; size_t size; nfa_state_t *states; std::valarray &rules; - std::vector &vartags; - std::vector &fixtags; + std::valarray *tags; nfa_state_t *root; nfa_t(const std::vector &rs); @@ -91,20 +90,13 @@ struct nfa_t FORBID_COPY(nfa_t); }; -typedef std::vector::const_iterator tagidx_t; - -size_t sizeof_regexps(const std::vector ®exps); -void make_tags(const std::vector &rs, - std::vector &vartags, - std::vector &fixtags, - std::vector &tagidxs); -void regexps2nfa(const std::vector &rs, - nfa_t &nfa, tagidx_t tagidx); +size_t counters(const std::vector ®exps, size_t &ntags); +void make_tags(const std::vector ®exps, std::valarray &tags); +void regexps2nfa(const std::vector ®exps, nfa_t &nfa); bool nullable_rule(const RegExpRule *rule); -void init_rules(std::valarray &rules, - const std::vector ®exps, - const std::vector &vartags, - const std::vector &fixtags); +void init_rules(const std::vector ®exps, + std::valarray &rules, + const std::valarray &tags); } // namespace re2c diff --git a/re2c/src/ir/nfa/regexps2nfa.cc b/re2c/src/ir/nfa/regexps2nfa.cc index a7fff690..44287771 100644 --- a/re2c/src/ir/nfa/regexps2nfa.cc +++ b/re2c/src/ir/nfa/regexps2nfa.cc @@ -3,7 +3,7 @@ namespace re2c { static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, - tagidx_t &tagidx, const RegExp *re, nfa_state_t *t) + size_t &tagidx, const RegExp *re, nfa_state_t *t) { nfa_state_t *s = NULL; switch (re->type) { @@ -28,41 +28,40 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, s = &nfa.states[nfa.size++]; s->alt(nrule, t, regexp2nfa(nfa, nrule, tagidx, re->iter, s)); break; - case RegExp::TAG: { - const size_t idx = *tagidx++; - if (idx != NO_TAG) { + case RegExp::TAG: + if ((*nfa.tags)[tagidx].type == Tag::VAR) { s = &nfa.states[nfa.size++]; - s->ctx(nrule, t, idx); + s->ctx(nrule, t, tagidx); } else { s = t; } + ++tagidx; break; - } } return s; } static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, - tagidx_t &tagidx, const RegExpRule *rule) + size_t &tagidx, const RegExpRule *rule) { nfa_state_t *s = &nfa.states[nfa.size++]; s->fin(nrule); return regexp2nfa(nfa, nrule, tagidx, rule->re, s); } -void regexps2nfa(const std::vector &rs, - nfa_t &nfa, tagidx_t tagidx) +void regexps2nfa(const std::vector ®exps, nfa_t &nfa) { - const size_t nrs = rs.size(); + const size_t nregexps = regexps.size(); - if (nrs == 0) { + if (nregexps == 0) { return; } - nfa_state_t *s = regexp2nfa_rule(nfa, 0, tagidx, rs[0]); - for (size_t i = 1; i < nrs; ++i) { + size_t tagidx = 0; + nfa_state_t *s = regexp2nfa_rule(nfa, 0, tagidx, regexps[0]); + for (size_t i = 1; i < nregexps; ++i) { nfa_state_t *t = &nfa.states[nfa.size++]; - t->alt(i, s, regexp2nfa_rule(nfa, i, tagidx, rs[i])); + t->alt(i, s, regexp2nfa_rule(nfa, i, tagidx, regexps[i])); s = t; } nfa.root = s; diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index ddab677c..0056a869 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -35,11 +35,9 @@ struct Rule const RuleInfo *info; - size_t lvartag; - size_t hvartag; - size_t lfixtag; - size_t hfixtag; - Trail trail; + size_t ltag; + size_t htag; + size_t trail; bool nullable; bool *tags; std::set shadow; @@ -47,11 +45,9 @@ struct Rule Rule() : info(NULL) - , lvartag(0) - , hvartag(0) - , lfixtag(0) - , hfixtag(0) - , trail() + , ltag(0) + , htag(0) + , trail(Tag::NONE) , nullable(false) , tags(NULL) , shadow() diff --git a/re2c/src/ir/skeleton/path.h b/re2c/src/ir/skeleton/path.h index 418487c7..b0733c6f 100644 --- a/re2c/src/ir/skeleton/path.h +++ b/re2c/src/ir/skeleton/path.h @@ -50,22 +50,21 @@ public: continue; } size_t len = static_cast(head - tail) - 1; - const Trail &trail = skel.rules[rule_idx].trail; - switch (trail.type) { - case Trail::NONE: - return len; - case Trail::FIX: - return len - skel.fixtags[trail.fix].dist; - case Trail::VAR: { - const size_t ctx = trail.var; + const size_t trail = skel.rules[rule_idx].trail; + if (trail == Tag::NONE) { + return len; + } + const Tag &tag = skel.tags[trail]; + switch (tag.type) { + case Tag::VAR: for (; tail != head; ++tail) { - if (skel.nodes[*tail].tags[ctx]) { + if (skel.nodes[*tail].tags[trail]) { return static_cast(head - tail) - 1; } } assert(false); - break; - } + case Tag::FIX: + return len - tag.fix.dist; } } return 0; diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 6684fef5..b83b07b9 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -69,10 +69,10 @@ Skeleton::Skeleton( , sizeof_key(8) , rules(dfa.rules) , defrule(def) - , fixtags(dfa.fixtags) + , tags(dfa.tags) { const size_t nc = cs.size() - 1; - const size_t ntags = dfa.tagpool.ntags; + const size_t ntags = tags.size(); // initialize skeleton nodes for (size_t i = 0; i < nodes_count - 1; ++i) { diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index 90f0ea5c..819256fc 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -59,7 +59,7 @@ struct Skeleton size_t sizeof_key; std::valarray &rules; const size_t defrule; - std::vector &fixtags; + const std::valarray &tags; Skeleton(const dfa_t &dfa, const charset_t &cs, size_t def, const std::string &dfa_name, const std::string &dfa_cond, -- 2.40.0