]> granicus.if.org Git - re2c/commitdiff
Keep fixed and variable tags separately.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 28 Nov 2016 15:32:46 +0000 (15:32 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 28 Nov 2016 15:39:06 +0000 (15:39 +0000)
This commit reverts commit de3f9e70a45c42fcb848a347ece3a727b8fb983e:
    Keep fixed and variable tags together in one array.
Optimizations of variable tags get more complicated and fixed tags
should not get in the way.

This commit also drops check for tags in trailing context: there's
nothing special about them; no technical reason to forbid them.

28 files changed:
re2c/src/codegen/emit_action.cc
re2c/src/codegen/emit_dfa.cc
re2c/src/ir/adfa/adfa.cc
re2c/src/ir/adfa/adfa.h
re2c/src/ir/dfa/cfg/cfg.cc
re2c/src/ir/dfa/cfg/cfg.h
re2c/src/ir/dfa/cfg/liveanal.cc
re2c/src/ir/dfa/cfg/rename.cc
re2c/src/ir/dfa/closure.cc
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/dfa.h
re2c/src/ir/nfa/init_rules.cc
re2c/src/ir/nfa/nfa.cc
re2c/src/ir/nfa/nfa.h
re2c/src/ir/nfa/regexps2nfa.cc
re2c/src/ir/rule.h
re2c/src/ir/skeleton/generate_code.cc
re2c/src/ir/skeleton/generate_data.cc
re2c/src/ir/skeleton/skeleton.cc
re2c/src/ir/skeleton/skeleton.h
re2c/src/ir/tag.cc
re2c/src/ir/tag.h
re2c/src/ir/tcmd.cc
re2c/src/ir/tcmd.h
re2c/test/tags/fix4.i--tags.c
re2c/test/tags/fix4_trail.i--tags.c
re2c/test/tags/fix5.i--tags.c
re2c/test/tags/fix5_trail.i--tags.c

index 4b93a708e4fc61a6e957ab57451bf5f9757282e3..20047bd77ba8fe0c6021414ad602b0f8c7f2c763 100644 (file)
@@ -449,16 +449,24 @@ void gen_settags(code_lines_t &code, const DFA &dfa, tcid_t tcid)
 void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule)
 {
        const bool generic = opts->input_api.type() == InputAPI::CUSTOM;
-       const std::valarray<Tag> &tags = dfa.tags;
-       const tagver_t *vers = rule.tags;
+       const std::vector<VarTag> &vartags = dfa.vartags;
+       const std::vector<FixTag> &fixtags = dfa.fixtags;
+       const tagver_t *fins = dfa.finvers;
 
-       // trailing context
-       if (rule.trail != Tag::NONE) {
-               const Tag &tag = tags[rule.trail];
+       // variable tags
+       for (size_t t = rule.lvar; t < rule.hvar; ++t) {
+               const VarTag &tag = vartags[t];
                o.wind(ind);
-               if (tag.type == Tag::FIX) {
-                       assert(!generic);
-                       o.wstring(opts->yycursor).ws(" -= ").wu64(tag.fix.dist);
+               if (tag.name) {
+                       const std::string
+                               name = *tag.name,
+                               expr = vartag_expr(fins[t]);
+                       if (generic) {
+                               o.wstring(opts->yycopytag).ws(" (").wstring(name)
+                                       .ws(", ").wstring(expr).ws(")");
+                       } else {
+                               o.wstring(name).ws(" = ").wstring(expr);
+                       }
                } else if (dfa.oldstyle_ctxmarker) {
                        if (generic) {
                                o.wstring(opts->yyrestorectx).ws(" ()");
@@ -466,7 +474,7 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule)
                                o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker);
                        }
                } else {
-                       const std::string expr = vartag_expr(vers[rule.trail]);
+                       const std::string expr = vartag_expr(fins[t]);
                        if (generic) {
                                o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")");
                        } else {
@@ -476,34 +484,22 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule)
                o.ws(";\n");
        }
 
-       // named tags
-       for (size_t t = rule.ltag; t < rule.htag; ++t) {
-               const Tag &tag = tags[t];
-
-               // fixed
-               if (tag.type == Tag::FIX) {
-                       assert(!generic);
-                       o.wind(ind).wstring(*tag.name).ws(" = ");
-                       if (tag.fix.base == Tag::NONE) {
+       // fixed tags
+       for (size_t t = rule.lfix; t < rule.hfix; ++t) {
+               assert(!generic);
+               const FixTag &tag = fixtags[t];
+               o.wind(ind);
+               if (tag.name) {
+                       o.wstring(*tag.name).ws(" = ");
+                       if (tag.base == FixTag::RIGHTMOST) {
                                // optimize '(YYCTXMARKER + ((YYCURSOR - YCTXMARKER) - tag))'
                                // to       '(YYCURSOR - tag)'
-                               o.wstring(opts->yycursor).ws(" - ").wu64(tag.fix.dist);
+                               o.wstring(opts->yycursor).ws(" - ").wu64(tag.dist);
                        } else {
-                               const tagver_t v = vers[tag.fix.base];
-                               o.wstring(vartag_expr(v)).ws(" - ").wu64(tag.fix.dist);
+                               o.wstring(vartag_expr(fins[tag.base])).ws(" - ").wu64(tag.dist);
                        }
-                       o.ws(";\n");
-                       continue;
-               }
-
-               // variable
-               const std::string expr = vartag_expr(vers[t]);
-               o.wind(ind);
-               if (generic) {
-                       o.wstring(opts->yycopytag).ws(" (").wstring(*tag.name)
-                               .ws(", ").wstring(expr).ws(")");
                } else {
-                       o.wstring(*tag.name).ws(" = ").wstring(expr);
+                       o.wstring(opts->yycursor).ws(" -= ").wu64(tag.dist);
                }
                o.ws(";\n");
        }
index 3172a7c9f4b0eee0f978ad6ef5a375813aa1f93b..c9d5cc673ed7c698db409936d61f3b95816e4169 100644 (file)
@@ -145,9 +145,14 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
 
        std::set<std::string> tagnames, tagvars;
        if (!oldstyle_ctxmarker) {
-               const size_t ntags = tags.size();
-               for (size_t i = 0; i < ntags; ++i) {
-                       const std::string *name = tags[i].name;
+               for (size_t i = 0; i < vartags.size(); ++i) {
+                       const std::string *name = vartags[i].name;
+                       if (name) {
+                               tagvars.insert(*name);
+                       }
+               }
+               for (size_t i = 0; i < fixtags.size(); ++i) {
+                       const std::string *name = fixtags[i].name;
                        if (name) {
                                tagvars.insert(*name);
                        }
index 913de6581d03dd5cf51c4f357250988744f4f58b..afd24de26354dcb13b0ba4fd8464538be4e12e43 100644 (file)
@@ -36,7 +36,9 @@ DFA::DFA
        , nStates(0)
        , head(NULL)
        , rules(dfa.rules)
-       , tags(dfa.tags)
+       , vartags(dfa.vartags)
+       , fixtags(dfa.fixtags)
+       , finvers(dfa.finvers)
        , tcpool(dfa.tcpool)
        , max_fill (0)
        , need_backup (false)
@@ -100,7 +102,9 @@ DFA::~DFA()
        }
 
        delete &rules;
-       delete &tags;
+       delete &vartags;
+       delete &fixtags;
+       delete[] finvers;
        delete &tcpool;
 }
 
index f676cd77643315bee903b8a939bb18b113d7c19a..5d2eb639fb23c53abf2fccf9b3722d18fc2e543a 100644 (file)
@@ -67,7 +67,9 @@ struct DFA
        uint32_t nStates;
        State * head;
        std::valarray<Rule> &rules;
-       std::valarray<Tag> &tags;
+       std::vector<VarTag> &vartags;
+       std::vector<FixTag> &fixtags;
+       const tagver_t *finvers;
        tcpool_t &tcpool;
        size_t max_fill;
        bool need_backup;
index 450ff7f90bab7ce17cbcbd1db1eb702c9aeb4e00..1a944c5ca78c08d6e9b493997c0f067ef016b75e 100644 (file)
@@ -7,7 +7,7 @@ namespace re2c
 
 static void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb, cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall);
 static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbbfin, cfg_ix_t nbbfall);
-static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, tcmd_t *cmd, tagver_t *use);
+static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, tcmd_t *cmd, const Rule *rule);
 static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
 static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
 
@@ -97,7 +97,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb,
        for (size_t i = 0; i < nstate; ++i) {
                if (*a2b++ != 0) {
                        const dfa_state_t *s = dfa.states[i];
-                       basic_block(b++, NULL, NULL, &s->tcmd[nsym], dfa.rules[s->rule].tags);
+                       basic_block(b++, NULL, NULL, &s->tcmd[nsym], &dfa.rules[s->rule]);
                }
        }
 
@@ -107,7 +107,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb,
                        const dfa_state_t *s = dfa.states[i];
                        std::fill(been, been + nstate, false);
                        fallback(dfa, arc2bb, been, succe = succb, i);
-                       basic_block(b++, succb, succe, &s->tcmd[nsym + 1], dfa.rules[s->rule].tags);
+                       basic_block(b++, succb, succe, &s->tcmd[nsym + 1], &dfa.rules[s->rule]);
                }
        }
 
@@ -117,7 +117,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb,
 }
 
 void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb,
-       const cfg_ix_t *succe, tcmd_t *cmd, tagver_t *use)
+       const cfg_ix_t *succe, tcmd_t *cmd, const Rule *rule)
 {
        const size_t n = static_cast<size_t>(succe - succb);
        cfg_ix_t *s = new cfg_ix_t[n];
@@ -126,7 +126,7 @@ void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb,
        bb->succb = s;
        bb->succe = s + n;
        bb->cmd = cmd;
-       bb->use = use;
+       bb->rule = rule;
 }
 
 // find immediate successors of the given bblock
index d9d69be8bef38ae04dc1552e1986d4829c08b3c2..91db480d699fecab27c673f33821c80ecb42c2b0 100644 (file)
@@ -13,7 +13,7 @@ struct cfg_bb_t
        cfg_ix_t *succb;
        cfg_ix_t *succe;
        tcmd_t *cmd;
-       tagver_t *use;
+       const Rule *rule;
 };
 
 // control flow graph
index 03ea6413fb2bf80760eb7ac664709d278e1887ea..b64149ac19f69bd43cb92add522aac3cf7139013 100644 (file)
@@ -7,9 +7,8 @@ namespace re2c
 
 void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
 {
-       const size_t
-               nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1,
-               ntag = cfg.dfa.tags.size();
+       const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
+       const tagver_t *fins = cfg.dfa.finvers;
        bool *buf1 = new bool[nver];
        bool *buf2 = new bool[nver];
 
@@ -29,16 +28,14 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
        memset(live, 0, cfg.nbbfin * nver * sizeof(bool));
        for (cfg_ix_t i = cfg.nbbarc; i < cfg.nbbfin; ++i) {
                const cfg_bb_t *b = cfg.bblocks + i;
+               const Rule *r = b->rule;
                bool *l = &live[i * nver];
 
                // all final bblocks have USE tags, but no successors
-               assert(b->use && b->succb == b->succe);
+               assert(r && b->succb == b->succe);
 
-               for (size_t t = 0; t < ntag; ++t) {
-                       const tagver_t u = b->use[t];
-                       if (u != TAGVER_ZERO) {
-                               l[u] = true;
-                       }
+               for (size_t t = r->lvar; t < r->hvar; ++t) {
+                       l[fins[t]] = true;
                }
        }
 
@@ -50,7 +47,7 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
                        bool *old = &live[i * nver];
 
                        // transition bblocks have no USE tags
-                       assert(!b->use);
+                       assert(!b->rule);
 
                        memcpy(buf1, old, nver * sizeof(bool));
                        for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
@@ -98,16 +95,14 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
         */
        for (cfg_ix_t i = cfg.nbbfin; i < cfg.nbbfall; ++i) {
                const cfg_bb_t *b = cfg.bblocks + i;
+               const Rule *r = b->rule;
 
                // all fallback bblocks have USE tags
-               assert(b->use);
+               assert(r);
 
                memset(buf1, 0, nver * sizeof(bool));
-               for (size_t t = 0; t < ntag; ++t) {
-                       const tagver_t u = b->use[t];
-                       if (u != TAGVER_ZERO) {
-                               buf1[u] = true;
-                       }
+               for (size_t t = r->lvar; t < r->hvar; ++t) {
+                       buf1[fins[t]] = true;
                }
                for (const tagsave_t *p = b->cmd->save; p; p = p->next) {
                        buf1[p->ver] = false;
index e2924537fcf214dc547724f2ab451cc4ea874628..ad3cd5f6d4abfe4913dabfee06218ff95b340168 100644 (file)
@@ -9,8 +9,8 @@ void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver)
        if (oldmax == maxver) return;
        oldmax = maxver;
 
-       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
-       for (; b < e; ++b) {
+       cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall;
+       for (; b < be; ++b) {
 
                // tag versions in save commands
                for (tagsave_t *p = b->cmd->save; p; p = p->next) {
@@ -29,16 +29,10 @@ void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver)
                }
        }
 
-       // final tag versions in rules
-       std::valarray<Rule> &rules = cfg.dfa.rules;
-       for (size_t r = 0, t = 0; r < rules.size(); ++r) {
-               Rule &rule = rules[r];
-               for (; t < rule.htag; ++t) {
-                       tagver_t &v = rule.tags[t];
-                       if (v != TAGVER_ZERO) {
-                               v = ver2new[v];
-                       }
-               }
+       // final tag versions
+       tagver_t *f = cfg.dfa.finvers, *fe = f + cfg.dfa.vartags.size();
+       for (; f < fe; ++f) {
+               *f = ver2new[*f];
        }
 }
 
index 2ef786aeab532dc6a147b2a5e1b9c0ab9a630c61..24defa6ce7938faa449f31744d907ed21bc3a13b 100644 (file)
@@ -183,25 +183,25 @@ tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool,
        tagver_t *tags = tagpool.buffer1;
        std::fill(tags, tags + ntag, TAGVER_ZERO);
 
-       size_t r = 0, lt = 0, ht;
+       size_t r = 0;
        for (cclositer_t c = clos.begin(), e = clos.end(); c != e;) {
                const tagver_t *x = tagpool[c->tagidx];
 
                // find next rule that occurs in closure
-               for (; r < c->state->rule; lt = rules[r].htag, ++r);
-               ht = rules[r].htag;
+               for (; r < c->state->rule; ++r);
+               const Rule &rule = rules[r];
 
                // merge tags of the 1st item belonging to this rule
-               for (size_t t = lt; t < ht; ++t) {
+               for (size_t t = rule.lvar; t < rule.hvar; ++t) {
                        tags[t] = x[t];
                }
 
-               // check the remaining items with this for tag nondeterminism:
+               // check the remaining items for tag nondeterminism:
                // if some tag differs from that of the 1st item, then it is
                // nondeterministic (don't merge it, only note the conflict)
                for (++c; c != e && c->state->rule == r; ++c) {
                        const tagver_t *y = tagpool[c->tagidx];
-                       for (size_t t = lt; t < ht; ++t) {
+                       for (size_t t = rule.lvar; t < rule.hvar; ++t) {
                                badtags[t] |= y[t] != x[t];
                        }
                }
index 6f81c4e11704ceeb13c751306406c786f6c08b22..59c731d5bcaa492363176fdb2168624965a47e58 100644 (file)
 namespace re2c
 {
 
-static tagver_t vartag_maxver(const std::valarray<Tag> &tags);
 static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol);
 static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol);
-static void warn_bad_tags(const bool *badtags, const std::valarray<Tag> &tags,
+static void warn_bad_tags(const bool *badtags, const std::vector<VarTag> &tags,
        const std::valarray<Rule> &rules, const std::string &cond);
 
 const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
@@ -51,17 +50,23 @@ dfa_t::dfa_t(const nfa_t &nfa,
        : states()
        , nchars(charset.size() - 1) // (n + 1) bounds for n ranges
        , rules(nfa.rules)
-       , tags(*nfa.tags)
+       , vartags(nfa.vartags)
+       , fixtags(nfa.fixtags)
+       , finvers(NULL)
        , tcpool(*new tcpool_t)
        , maxtagver(0)
 {
-       const size_t ntag = tags.size();
+       const size_t ntag = vartags.size();
        Tagpool tagpool(ntag);
        kernels_t kernels;
        closure_t clos1, clos2;
        bool *badtags = new bool[ntag]();
 
-       maxtagver = vartag_maxver(tags);
+       finvers = new tagver_t[ntag];
+       for (size_t t = 0; t < ntag; ++t) {
+               finvers[t] = ++maxtagver;
+       }
+       maxtagver *= 2;
        clos1.push_back(clos_t(nfa.root, ZERO_TAGS));
        closure(clos1, clos2, tagpool, tcpool, rules, badtags);
        kernels.insert(clos2);
@@ -80,7 +85,9 @@ dfa_t::dfa_t(const nfa_t &nfa,
                        const nfa_state_t *f = kernel->state[i];
                        if (f->type == nfa_state_t::FIN) {
                                s->rule = f->rule;
-                               s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], rules[s->rule].tags, ntag);
+                               const Rule &rule = rules[s->rule];
+                               s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]],
+                                       finvers, rule.lvar, rule.hvar, ntag);
                                break;
                        }
                }
@@ -95,30 +102,19 @@ dfa_t::dfa_t(const nfa_t &nfa,
                }
        }
 
-       warn_bad_tags(badtags, tags, rules, cond);
+       warn_bad_tags(badtags, vartags, rules, cond);
        delete[] badtags;
 }
 
-tagver_t vartag_maxver(const std::valarray<Tag> &tags)
-{
-       const size_t ntag = tags.size();
-       for (size_t t = ntag; t > 0; --t) {
-               if (tags[t - 1].type == Tag::VAR) {
-                       return static_cast<tagver_t>(ntag + t);
-               }
-       }
-       return 0;
-}
-
 void warn_bad_tags(const bool *badtags,
-       const std::valarray<Tag> &tags,
+       const std::vector<VarTag> &tags,
        const std::valarray<Rule> &rules,
        const std::string &cond)
 {
        const size_t ntags = tags.size();
        for (size_t i = 0; i < ntags; ++i) {
                if (badtags[i]) {
-                       const Tag &tag = tags[i];
+                       const VarTag &tag = tags[i];
                        const uint32_t line = rules[tag.rule].info->loc.line;
                        warn.nondeterministic_tags(line, cond, tag.name);
                }
index bf0202351cbd19c5d10b18d705424890dcb73215..e6492b40bccb7e85ff049bd489bb82cb5888e7aa 100644 (file)
@@ -51,7 +51,9 @@ struct dfa_t
        std::vector<dfa_state_t*> states;
        const size_t nchars;
        std::valarray<Rule> &rules;
-       std::valarray<Tag> &tags;
+       std::vector<VarTag> &vartags;
+       std::vector<FixTag> &fixtags;
+       tagver_t *finvers;
        tcpool_t &tcpool;
        tagver_t maxtagver;
 
index b48022e2acb6034ae98e9f44ea9f96d4035c5b7d..9afdf8d665959e49b31f4115c631920b5f636691 100644 (file)
@@ -5,69 +5,58 @@
 
 namespace re2c {
 
-static void assert_no_tags_in_trailing_context(const Rule &rule,
-       const std::valarray<Tag> &tags)
-{
-       // rule tags should not contain other trailing contexts
-       for (size_t i = rule.ltag; i < rule.htag; ++i) {
-               if (tags[i].name == NULL) {
-                       error("line %u: tags in trailing context",
-                               rule.info->loc.line);
-                       exit(1);
-               }
-       }
-}
-
 static void assert_tags_used_once(const Rule &rule,
-       const std::valarray<Tag> &tags)
+       const std::vector<VarTag> &vartags, const std::vector<FixTag> &fixtags)
 {
        std::set<std::string> names;
-       for (size_t i = rule.ltag; i < rule.htag; ++i) {
-               const std::string *name = tags[i].name;
-               if (name && !names.insert(*name).second) {
-                       error("line %u: tag '%s' is used multiple"
-                               " times in the same rule",
-                               rule.info->loc.line, name->c_str());
-                       exit(1);
-               }
+       const std::string *name = NULL;
+
+       for (size_t t = rule.lvar; t < rule.hvar; ++t) {
+               name = vartags[t].name;
+               if (name && !names.insert(*name).second) goto error;
        }
+
+       for (size_t t = rule.lfix; t < rule.hfix; ++t) {
+               name = fixtags[t].name;
+               if (name && !names.insert(*name).second) goto error;
+       }
+
+       return;
+
+error:
+       error("line %u: tag '%s' is used multiple times in the same rule",
+               rule.info->loc.line, name->c_str());
+       exit(1);
 }
 
 void init_rules(const std::vector<const RegExpRule*> &regexps,
-       std::valarray<Rule> &rules,
-       const std::valarray<Tag> &tags)
+       std::valarray<Rule> &rules, const std::vector<VarTag> &vartags,
+       const std::vector<FixTag> &fixtags)
 {
-       const size_t nr = rules.size();
-       const size_t nt = tags.size();
+       const size_t
+               nr = rules.size(),
+               nv = vartags.size(),
+               nf = fixtags.size();
 
-       for (size_t r = 0, t = 0; r < nr; ++r) {
+       for (size_t r = 0, v = 0, f = 0, t; r < nr; ++r) {
                Rule &rule = rules[r];
                rule.info = regexps[r]->info;
 
-               rule.ltag = t;
-               for (; t < nt && tags[t].rule == r; ++t);
-               rule.htag = t;
+               rule.lvar = v;
+               for (; v < nv && vartags[v].rule == r; ++v);
+               rule.hvar = v;
+
+               rule.lfix = f;
+               for (; f < nf && fixtags[f].rule == r; ++f);
+               rule.hfix = f;
 
-               // mark *all* variable tags, including trailing context
-               tagver_t *vers = new tagver_t[nt];
-               std::fill(vers, vers + nt, TAGVER_ZERO);
-               for (size_t i = rule.ltag; i < rule.htag; ++i) {
-                       if (tags[i].type == Tag::VAR) {
-                               vers[i] = static_cast<tagver_t>(i + 1);
-                       }
-               }
-               rule.tags = vers;
+               for (t = rule.lvar; t < rule.hvar && vartags[t].name; ++t);
+               rule.tvar = t;
 
-               // tags in trailing context are forbidden (they make no sense),
-               // and since tags are constructed in reversed order, this implies
-               // that trailing context, if present, can only be the first tag
-               if (rule.ltag < rule.htag && tags[rule.ltag].name == NULL) {
-                       rule.trail = rule.ltag++;
-               }
+               for (t = rule.lfix; t < rule.hfix && fixtags[t].name; ++t);
+               rule.tfix = t;
 
-               // sanity checks
-               assert_no_tags_in_trailing_context(rule, tags);
-               assert_tags_used_once(rule, tags);
+               assert_tags_used_once(rule, vartags, fixtags);
        }
 }
 
index 07fe55245f70b459f2c5f879eebe5bc23a57123b..048a663eb9d2276a9e898bab192e1faba7b8bd95 100644 (file)
@@ -7,17 +7,17 @@ nfa_t::nfa_t(const std::vector<const RegExpRule*> &regexps)
        , size(0)
        , states(NULL)
        , rules(*new std::valarray<Rule>(regexps.size()))
-       , tags(NULL)
+       , vartags(*new std::vector<VarTag>())
+       , fixtags(*new std::vector<FixTag>())
        , root(NULL)
 {
        size_t ntags = 0;
        max_size = counters(regexps, ntags);
 
        states = new nfa_state_t[max_size];
-       tags = new std::valarray<Tag>(ntags);
        regexps2nfa(regexps, *this);
 
-       init_rules(regexps, rules, *tags);
+       init_rules(regexps, rules, vartags, fixtags);
 }
 
 nfa_t::~nfa_t()
index f0f65514bc91a4e04c061797d73fda149eb2053a..372ab215598570ff9e02ad536886311abdbf9565 100644 (file)
@@ -89,7 +89,8 @@ struct nfa_t
        size_t size;
        nfa_state_t *states;
        std::valarray<Rule> &rules;
-       std::valarray<Tag> *tags;
+       std::vector<VarTag> &vartags;
+       std::vector<FixTag> &fixtags;
        nfa_state_t *root;
 
        nfa_t(const std::vector<const RegExpRule*> &rs);
@@ -101,7 +102,8 @@ struct nfa_t
 size_t counters(const std::vector<const RegExpRule*> &regexps, size_t &ntags);
 void regexps2nfa(const std::vector<const RegExpRule*> &regexps, nfa_t &nfa);
 bool nullable_rule(const RegExpRule *rule);
-void init_rules(const std::vector<const RegExpRule*> &regexps, std::valarray<Rule> &rules, const std::valarray<Tag> &tags);
+void init_rules(const std::vector<const RegExpRule*> &regexps, std::valarray<Rule> &rules,
+       const std::vector<VarTag> &vartags, const std::vector<FixTag> &fixtags);
 
 } // namespace re2c
 
index 6db4e5a3bbaa6d70bc1297eb8932cf7e11ff5cd1..3ca3d35773a0d5f299ffcc98d34c7573d3b318c1 100644 (file)
@@ -27,9 +27,8 @@ static const size_t VARDIST = std::numeric_limits<size_t>::max();
 // the order of regexp traversal determines the order in which tags are
 // assigned indices. Splitting this in two passes would require maintaining
 // exactly the same order of traversal, which is fragile.
-static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx,
-       size_t &dist, size_t &base, bool toplevel, const RegExp *re,
-       nfa_state_t *t)
+static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &dist,
+       size_t &base, bool toplevel, const RegExp *re, nfa_state_t *t)
 {
        nfa_state_t *s = NULL;
        switch (re->type) {
@@ -44,18 +43,18 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx,
                        break;
                case RegExp::ALT: {
                        nfa_state_t *s1, *s2, *t0, *t1, *t2, *q;
-                       size_t d1 = dist, d2 = dist, i = tidx;
+                       size_t d1 = dist, d2 = dist, i = nfa.vartags.size();
 
                        t0 = &nfa.states[nfa.size++];
-                       s1 = regexp2nfa(nfa, nrule, tidx, d1, base, false, re->alt.re1, t0);
-                       for (t2 = t; i < tidx; ++i) {
+                       s1 = regexp2nfa(nfa, nrule, d1, base, false, re->alt.re1, t0);
+                       for (t2 = t; i < nfa.vartags.size(); ++i) {
                                q = &nfa.states[nfa.size++];
                                q->make_tag(nrule, t2, i, true);
                                t2 = q;
                        }
 
-                       s2 = regexp2nfa(nfa, nrule, tidx, d2, base, false, re->alt.re2, t2);
-                       for (t1 = t; i < tidx; ++i) {
+                       s2 = regexp2nfa(nfa, nrule, d2, base, false, re->alt.re2, t2);
+                       for (t1 = t; i < nfa.vartags.size(); ++i) {
                                q = &nfa.states[nfa.size++];
                                q->make_tag(nrule, t1, i, true);
                                t1 = q;
@@ -69,13 +68,13 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx,
                        break;
                }
                case RegExp::CAT:
-                       s = regexp2nfa(nfa, nrule, tidx, dist, base, toplevel, re->cat.re2, t);
-                       s = regexp2nfa(nfa, nrule, tidx, dist, base, toplevel, re->cat.re1, s);
+                       s = regexp2nfa(nfa, nrule, dist, base, toplevel, re->cat.re2, t);
+                       s = regexp2nfa(nfa, nrule, dist, base, toplevel, re->cat.re1, s);
                        break;
                case RegExp::ITER: {
                        // see note [Kleene star is expressed in terms of plus]
                        nfa_state_t *q = &nfa.states[nfa.size++];
-                       s = regexp2nfa(nfa, nrule, tidx, dist, base, false, re->iter, q);
+                       s = regexp2nfa(nfa, nrule, dist, base, false, re->iter, q);
                        q->make_alt(nrule, t, s);
 
                        dist = VARDIST;
@@ -84,35 +83,36 @@ static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, size_t &tidx,
                case RegExp::TAG: {
                        const std::string *name = re->tag;
                        if (toplevel && dist != VARDIST) {
-                               init_fix_tag((*nfa.tags)[tidx], nrule, name, base, dist);
+                               FixTag fix = {name, nrule, base, dist};
+                               nfa.fixtags.push_back(fix);
                                s = t;
                        } else {
+                               const size_t ntag = nfa.vartags.size();
+                               VarTag var = {name, nrule};
+                               nfa.vartags.push_back(var);
                                if (toplevel) {
-                                       base = tidx;
+                                       base = ntag;
                                        dist = 0;
                                }
-                               init_var_tag((*nfa.tags)[tidx], nrule, name);
                                s = &nfa.states[nfa.size++];
-                               s->make_tag(nrule, t, tidx, false);
+                               s->make_tag(nrule, t, ntag, false);
                        }
                        if (name == NULL) dist = 0;
-                       ++tidx;
                        break;
                }
        }
        return s;
 }
 
-static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule,
-       size_t &tidx, const RegExpRule *rule)
+static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, const RegExpRule *rule)
 {
        const bool generic = opts->input_api.type() == InputAPI::CUSTOM;
-       size_t base = Tag::NONE, dist = 0;
+       size_t base = FixTag::RIGHTMOST, dist = 0;
 
        nfa_state_t *s = &nfa.states[nfa.size++];
        s->make_fin(nrule);
 
-       return regexp2nfa(nfa, nrule, tidx, dist, base, !generic, rule->re, s);
+       return regexp2nfa(nfa, nrule, dist, base, !generic, rule->re, s);
 }
 
 void regexps2nfa(const std::vector<const RegExpRule*> &regexps, nfa_t &nfa)
@@ -123,11 +123,10 @@ void regexps2nfa(const std::vector<const RegExpRule*> &regexps, nfa_t &nfa)
                return;
        }
 
-       size_t tidx = 0;
-       nfa_state_t *s = regexp2nfa_rule(nfa, 0, tidx, regexps[0]);
+       nfa_state_t *s = regexp2nfa_rule(nfa, 0, regexps[0]);
        for (size_t i = 1; i < nregexps; ++i) {
                nfa_state_t *t = &nfa.states[nfa.size++];
-               t->make_alt(i, s, regexp2nfa_rule(nfa, i, tidx, regexps[i]));
+               t->make_alt(i, s, regexp2nfa_rule(nfa, i, regexps[i]));
                s = t;
        }
        nfa.root = s;
index 9d7437fa5e85869fcd43cacd36aeb20e61edac0a..e74683a009bad99367b05b1295a544ba5f093283 100644 (file)
@@ -34,26 +34,22 @@ struct Rule
        static const size_t NONE;
 
        const RuleInfo *info;
-
-       size_t ltag;
-       size_t htag;
-       size_t trail;
-       tagver_t *tags;
        std::set<uint32_t> shadow;
 
-       Rule()
-               : info(NULL)
-               , ltag(0)
-               , htag(0)
-               , trail(Tag::NONE)
-               , tags(NULL)
-               , shadow()
-       {}
-       ~Rule()
-       {
-               delete[] tags;
-       }
+       // variable tags
+       size_t lvar; // first
+       size_t hvar; // next to last
+       size_t tvar; // trailing context
 
+       // fixed tags
+       size_t lfix; // first
+       size_t hfix; // next to last
+       size_t tfix; // trailing context
+
+       Rule(): info(NULL), shadow(),
+               lvar(0), hvar(0), tvar(0),
+               lfix(0), hfix(0), tfix(0)
+       {}
        FORBID_COPY(Rule);
 };
 
index a434274410bc82ea777ebc0d3fdbb0af9cd7dc73..cbadee02b7f78da844c9abe5a4245d8c1532827e 100644 (file)
@@ -354,17 +354,18 @@ void emit_epilog(OutputFile &o, const std::set<std::string> &names)
 void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid)
 {
        const std::string &name = dfa.name;
-       const Rule &rule = dfa.rules[rid];
-       const size_t ltag = rule.ltag, htag = rule.htag,
+       const Rule &r = dfa.rules[rid];
+       const size_t
+               ntag = 3 + r.hvar - r.lvar - (r.tvar != r.hvar),
                rkey = rule2key(rid, dfa.key_size, dfa.def_rule);
 
-       o.wind(ind).ws("status = check_key_count_").wstring(name)
-               .ws("(keys_count, i, ").wu64(3 + htag - ltag).ws(")\n")
-               .wind(ind + 1).ws(" || action_").wstring(name)
+       o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ")
+               .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name)
                .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")");
 
-       for (size_t t = ltag; t < htag; ++t) {
-               const std::string &tag = *dfa.tags[t].name;
+       for (size_t t = r.lvar; t < r.hvar; ++t) {
+               if (t == r.tvar) continue;
+               const std::string &tag = *dfa.vartags[t].name;
                o.ws("\n").wind(ind + 1).ws(" || check_tag_").wstring(name)
                        .ws("(&i, keys, ").wstring(tag).ws(", input, token, \"")
                        .wstring(tag).ws("\")");
index b14c5f43e079a21a8942fe3c8452f5a6f88ba3e4..204125e3a6f1a9805248a4643d6344ccbce907e5 100644 (file)
@@ -176,28 +176,26 @@ static void write_keys(const path_t &path, const Skeleton &skel,
        }
 
        const size_t rule = path.node(skel, f).rule;
-       size_t matched = 0, ltag = 0, htag = 0;
-       const tagver_t *vers = NULL;
+       size_t matched = 0, ltag = 0, htag = 0, trail = 0;
        if (rule != Rule::NONE) {
 
                const Rule &r = skel.rules[rule];
-               ltag = r.ltag;
-               htag = r.htag;
-               vers = r.tags;
+               ltag = r.lvar;
+               htag = r.hvar;
+               trail = r.tvar;
 
                // matched length might depend on tag values
-               const size_t t = r.trail;
-               if (t == Tag::NONE) {
+               if (trail == htag) {
                        matched = f;
                } else {
-                       assert(skel.tags[t].type == Tag::VAR);
-                       matched = tags[vers[t]];
+                       assert(r.tfix == r.hfix); // no fixed trailing context
+                       matched = tags[skel.finvers[trail]];
                        assert(matched != Skeleton::DEFTAG);
                }
        }
 
        // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags
-       const size_t nkey = 3 + htag - ltag;
+       const size_t nkey = 3 + htag - ltag - (trail != htag);
        key_t *keys = new key_t[nkey * width], *k = keys;
        for (size_t w = 0; w < width; ++w) {
                *k++ = to_le(static_cast<key_t>(path.len()));
@@ -205,7 +203,8 @@ static void write_keys(const path_t &path, const Skeleton &skel,
                *k++ = to_le(rule2key<key_t>(rule, skel.defrule));
                const size_t *ts = &tags[w * nver];
                for (size_t t = ltag; t < htag; ++t) {
-                       *k++ = to_le(static_cast<key_t>(ts[vers[t]]));
+                       if (t == trail) continue;
+                       *k++ = to_le(static_cast<key_t>(ts[skel.finvers[t]]));
                }
        }
        // dump to file
index 81aa5bb570878690bf5d46e80f2ed3e8b9e52898..05c3bbf08e5e2b267b682ad8295028cd074775bb 100644 (file)
@@ -65,7 +65,8 @@ Skeleton::Skeleton(
        , defrule(def)
        , ntagver(static_cast<size_t>(dfa.maxtagver) + 1)
        , rules(dfa.rules)
-       , tags(dfa.tags)
+       , vartags(dfa.vartags)
+       , finvers(dfa.finvers)
 {
        // initialize nodes
        const size_t nil = nodes_count - 1;
index e8ebc33623246898210c8225f01baa306ba3878c..c8c7c7dc2c625377d00599ff143b38e685d98a29 100644 (file)
@@ -72,7 +72,8 @@ struct Skeleton
        size_t defrule;
        size_t ntagver;
        const std::valarray<Rule> &rules;
-       const std::valarray<Tag> &tags;
+       const std::vector<VarTag> &vartags;
+       const tagver_t *finvers;
 
        Skeleton(const dfa_t &dfa, const charset_t &cs, size_t def,
                const std::string &dfa_name, const std::string &dfa_cond,
index 09254b5905a9b8981c88e51ea023f18688c68f3c..90528a9eb3b1d9b7e4c6c8ebeedb767f77d5d6e7 100644 (file)
@@ -1,34 +1,10 @@
 #include <limits>
 
-#include "src/ir/rule.h"
 #include "src/ir/tag.h"
 
 namespace re2c
 {
 
-const size_t Tag::NONE = std::numeric_limits<size_t>::max();
-
-Tag::Tag()
-       : type(VAR)
-       , rule(Rule::NONE)
-       , name(NULL)
-       , fix()
-{}
-
-void init_var_tag(Tag &tag, size_t r, const std::string *n)
-{
-       tag.type = Tag::VAR;
-       tag.rule = r;
-       tag.name = n;
-}
-
-void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d)
-{
-       tag.type = Tag::FIX;
-       tag.rule = r;
-       tag.name = n;
-       tag.fix.base = b;
-       tag.fix.dist = d;
-}
+const size_t FixTag::RIGHTMOST = std::numeric_limits<size_t>::max();
 
 } // namespace re2c
index 06b6700d88e5209f8c7ffb211eff2d75ec954460..8eaceca95cf5be1fae8096c497345a9394ab2c10 100644 (file)
@@ -4,7 +4,6 @@
 #include <string>
 
 #include "src/util/c99_stdint.h"
-#include "src/util/forbid_copy.h"
 
 namespace re2c
 {
@@ -14,25 +13,21 @@ typedef int32_t tagver_t;
 static const tagver_t TAGVER_BOTTOM = -1; // default value for tag
 static const tagver_t TAGVER_ZERO = 0; // absense of tag
 
-struct Tag
+struct VarTag
 {
-       static const size_t NONE;
-
-       enum {VAR, FIX} type;
-       size_t rule;
        const std::string *name;
-       struct
-       {
-               size_t base;
-               size_t dist;
-       } fix;
-
-       Tag();
-       FORBID_COPY(Tag);
+       size_t rule;
 };
 
-void init_var_tag(Tag &tag, size_t r, const std::string *n);
-void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d);
+struct FixTag
+{
+       static const size_t RIGHTMOST;
+
+       const std::string *name;
+       size_t rule;
+       size_t base;
+       size_t dist;
+};
 
 } // namespace re2c
 
index 61fa2af657fa57079a5b15c3f0cdc018b6cc6271..476bc458f6800d3feafeeb89beec8ebb4b4f66dd 100644 (file)
@@ -90,14 +90,13 @@ tagsave_t *tcpool_t::conv_to_save(const tagver_t *vers, size_t ntag)
        return s;
 }
 
-tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag)
+tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *fins,
+       size_t ltag, size_t htag, size_t ntag)
 {
        tagsave_t *s = NULL;
        tagcopy_t *c = NULL;
-       for (size_t t = ntag; t-- > 0;) {
+       for (size_t t = ltag; t < htag; ++t) {
                const tagver_t v = vers[t], f = fins[t];
-               if (f == TAGVER_ZERO) continue;
-
                if (v != TAGVER_ZERO) {
                        s = make_save(s, f, v == TAGVER_BOTTOM);
                } else {
index af63bcc34df7826c5eb429ec11e80c29f238e0fd..a9121fe80e805536c1a5f92744ca9185233e3c5e 100644 (file)
@@ -70,7 +70,7 @@ public:
        tagsave_t *make_save(tagsave_t *next, tagver_t ver, bool bottom);
        tagcopy_t *make_copy(tagcopy_t *next, tagver_t lhs, tagver_t rhs);
        tagsave_t *conv_to_save(const tagver_t *vers, size_t ntag);
-       tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag);
+       tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ltag, size_t htag, size_t ntag);
 
        tcid_t insert(const tagsave_t *save, const tagcopy_t *copy);
        const tccmd_t &operator[](tcid_t id) const;
index 7ba19c81049cd17a99c0bbf9ac24b01af093bd85..58563e1505ec6b220163b7a74d879b1d9487e2d6 100644 (file)
@@ -61,10 +61,10 @@ yy9:
        }
 yy11:
        ++YYCURSOR;
-       p4 = YYCURSOR - 1;
        p3 = yyt1;
-       p2 = yyt1 - 1;
        p1 = yyt2;
+       p4 = YYCURSOR - 1;
+       p2 = yyt1 - 1;
        p0 = yyt2 - 1;
        {
             printf("'%.*s', '%.*s', '%.*s', '%.*s', '%.*s'\n",
index 4963a7ac8c95de72e7ea725b7c24a086a93a6940..8e913114c8bc429d6560032afc1a4a8717479861 100644 (file)
@@ -61,10 +61,10 @@ yy9:
        }
 yy11:
        ++YYCURSOR;
-       YYCURSOR -= 1;
        p3 = yyt1;
-       p2 = yyt1 - 1;
        p1 = yyt2;
+       YYCURSOR -= 1;
+       p2 = yyt1 - 1;
        p0 = yyt2 - 1;
        {
             printf("'%.*s', '%.*s', '%.*s', '%.*s', '%s'\n",
index 42bb00a8de96259f6a972f4e35a5b08e90fca4d4..8f1228448e210e5f0073d137044c94b4ed9505cc 100644 (file)
@@ -79,10 +79,10 @@ yy12:
        goto yy15;
 yy13:
        p4 = yyt1;
-       p3 = yyt1 - 1;
        p2 = yyt2;
-       p1 = yyt2 - 1;
        p0 = yyt3;
+       p3 = yyt1 - 1;
+       p1 = yyt2 - 1;
        {
             printf("'%.*s', '%.*s', '%.*s', '%.*s', '%.*s'\n",
                 p1 - p0, p0,
index a21476e8352857bb185fe31d2acc6bd8e4582a3e..ee57949a1ab9a2a565c5c886ae00801b876ac2e2 100644 (file)
@@ -79,10 +79,10 @@ yy12:
        goto yy15;
 yy13:
        YYCURSOR = yyt1;
-       p3 = yyt1 - 1;
        p2 = yyt2;
-       p1 = yyt2 - 1;
        p0 = yyt3;
+       p3 = yyt1 - 1;
+       p1 = yyt2 - 1;
        {
             printf("'%.*s', '%.*s', '%.*s', '%.*s', '%s'\n",
                 p1 - p0, p0,