src/conf/msg.cc \
src/conf/opt.cc \
src/conf/warn.cc \
+ src/ir/nfa/counters.cc \
src/ir/nfa/init_rules.cc \
src/ir/nfa/make_tags.cc \
src/ir/nfa/nfa.cc \
src/ir/nfa/nullable.cc \
src/ir/nfa/regexps2nfa.cc \
- src/ir/nfa/sizeof_regexps.cc \
src/ir/adfa/adfa.cc \
src/ir/adfa/prepare.cc \
src/ir/dfa/context_deduplication.cc \
void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh,
const State *to, const DFA &dfa, size_t tags);
void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags);
+std::string vartag_name(const std::string *name, size_t rule);
+std::string vartag_expr(const std::string *name, size_t rule);
} // namespace re2c
o.wind(ind).ws("}\n");
}
-static void subst_contexts(std::string &action, const Rule &rule,
- const std::vector<CtxVar> &vartags,
- const std::vector<CtxFix> &fixtags)
+static void subst_contexts(std::string &action,
+ const Rule &rule, const std::valarray<Tag> &tags)
{
- for (size_t i = rule.lvartag; i < rule.hvartag; ++i) {
- const CtxVar &ctx = vartags[i];
- strrreplace(action, "@" + *ctx.codename,
- opts->input_api.expr_ctx(ctx.expr()));
- }
-
- for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) {
- const CtxFix &ctx = fixtags[i];
- strrreplace(action, "@" + *ctx.codename,
- opts->input_api.expr_ctx_fix(ctx, vartags));
+ for (size_t i = rule.ltag; i < rule.htag; ++i) {
+ const Tag &tag = tags[i];
+ const std::string s = tag.type == Tag::VAR
+ ? opts->input_api.expr_ctx(vartag_expr(tag.name, tag.rule))
+ : opts->input_api.expr_ctx_fix(tag, tags);
+ strrreplace(action, "@" + *tag.name, s);
}
}
const Rule &rule = dfa.rules[rule_idx];
const RuleInfo *info = rule.info;
- const Trail &trail = rule.trail;
- switch (trail.type) {
- case Trail::NONE: break;
- case Trail::VAR:
+ if (rule.trail != Tag::NONE) {
+ const Tag &tag = dfa.tags[rule.trail];
+ if (tag.type == Tag::VAR) {
if (dfa.base_ctxmarker) {
o.wstring(opts->input_api.stmt_restorectx_var_base(ind,
- dfa.vartags[trail.var].expr()));
+ vartag_expr(tag.name, tag.rule)));
} else {
o.wstring(opts->input_api.stmt_restorectx_var(ind));
}
- break;
- case Trail::FIX:
- o.wstring(opts->input_api.stmt_restorectx_fix(ind,
- dfa.fixtags[trail.fix].dist));
- break;
+ } else {
+ o.wstring(opts->input_api.stmt_restorectx_fix(ind, tag.fix.dist));
+ }
}
if (opts->target == opt_t::SKELETON) {
o.wind(ind).wstring(yySetupRule).ws("\n");
}
std::string action = code->text;
- subst_contexts(action, rule, dfa.vartags, dfa.fixtags);
+ subst_contexts(action, rule, dfa.tags);
o.wline_info(code->loc.line, code->loc.filename.c_str())
.wind(ind).wstring(action).ws("\n")
.wdelay_line_info();
if (tags != 0) {
if (dfa.base_ctxmarker) {
o.wstring(opts->input_api.stmt_dist(ind,
- dfa.tagpool[tags], dfa.vartags));
+ dfa.tagpool[tags], dfa.tags));
} else {
o.wstring(opts->input_api.stmt_backupctx(ind));
}
std::set<std::string> ctxnames;
if (base_ctxmarker) {
- for (size_t i = 0; i < vartags.size(); ++i) {
- ctxnames.insert(vartags[i].name());
+ for (size_t i = 0; i < tags.size(); ++i) {
+ const Tag &t = tags[i];
+ if (t.type == Tag::VAR && t.var.orig == i) {
+ ctxnames.insert(vartag_name(t.name, t.rule));
+ }
}
ob.contexts.insert(ctxnames.begin(), ctxnames.end());
}
bWroteCondCheck = true;
}
+std::string vartag_name(const std::string *name, size_t rule)
+{
+ std::ostringstream s;
+ s << opts->contexts_prefix << rule;
+ if (name != NULL) {
+ s << *name;
+ }
+ return s.str();
+}
+
+std::string vartag_expr(const std::string *name, size_t rule)
+{
+ const std::string s = vartag_name(name, rule);
+ std::string e = opts->contexts_expr;
+ strrreplace(e, "@@", s);
+ return e;
+}
+
} // end namespace re2c
const bool *tags = dfa.tagpool[c.tags];
for (size_t j = 0; j < dfa.tagpool.ntags; ++j) {
if (tags[j]) {
- o.ws("<").wstring(dfa.vartags[j].name()).ws(">");
+ const Tag &t = dfa.tags[dfa.tags[j].var.orig];
+ o.ws("<").wstring(vartag_name(t.name, t.rule)).ws(">");
}
}
o.ws("\"]\n");
#include <assert.h>
#include <sstream>
+#include "src/codegen/emit.h"
#include "src/codegen/input_api.h"
#include "src/codegen/indent.h"
#include "src/conf/opt.h"
return s;
}
-std::string InputAPI::stmt_dist (uint32_t ind, const bool *tags,
- const std::vector<CtxVar> &contexts) const
+std::string InputAPI::stmt_dist (uint32_t ind, const bool *mask,
+ const std::valarray<Tag> &tags) const
{
std::string s = indent(ind);
- for (size_t i = 0; i < contexts.size(); ++i) {
- if (tags[i]) {
- s += contexts[i].expr() + " = ";
+ for (size_t i = 0; i < tags.size(); ++i) {
+ if (mask[i]) {
+ const Tag &t = tags[tags[i].var.orig];
+ s += vartag_expr(t.name, t.rule) + " = ";
}
}
return s + expr_dist() + ";\n";
}
-std::string InputAPI::expr_ctx(const std::string &ctx) const
+std::string InputAPI::expr_ctx(const std::string &var) const
{
switch (type_) {
- case DEFAULT: return "(" + opts->yyctxmarker + " + " + ctx + ")";
- case CUSTOM: return opts->yyctx + "(" + ctx + ")";
+ case DEFAULT: return "(" + opts->yyctxmarker + " + " + var + ")";
+ case CUSTOM: return opts->yyctx + "(" + var + ")";
default: assert(false);
}
}
-std::string InputAPI::expr_ctx_fix(const CtxFix &ctx, const std::vector<CtxVar> &ctxvars) const
+std::string InputAPI::expr_ctx_fix(const Tag &tag, const std::valarray<Tag> &tags) const
{
std::ostringstream s;
- if (ctx.base == CtxFix::RIGHTMOST) {
+ if (tag.fix.base == Tag::NONE) {
switch (type_) {
case DEFAULT:
// optimize '(YYCTXMARKER + ((YYCURSOR - YCTXMARKER) - yyctx))'
// to '(YYCURSOR - yyctx)'
- s << "(" << opts->yycursor << " - " << ctx.dist << ")";
+ s << "(" << opts->yycursor << " - " << tag.fix.dist << ")";
break;
case CUSTOM:
- s << opts->yyctx << "(" << opts->yydist << "() - " << ctx.dist << ")";
+ s << opts->yyctx << "(" << opts->yydist << "() - " << tag.fix.dist << ")";
break;
}
return s.str();
} else {
- s << "(" << ctxvars[ctx.base].expr() << " - " << ctx.dist << ")";
+ const Tag &t = tags[tags[tag.fix.base].var.orig];
+ s << "(" << vartag_expr(t.name, t.rule) << " - " << tag.fix.dist << ")";
return expr_ctx(s.str());
}
}
#include "src/util/c99_stdint.h"
#include <set>
#include <string>
-#include <vector>
+#include <valarray>
#include "src/ir/ctx.h"
std::string stmt_backup (uint32_t ind) const;
std::string stmt_backupctx (uint32_t ind) const;
std::string expr_dist () const;
- std::string stmt_dist (uint32_t ind, const bool *tags,
- const std::vector<CtxVar> &contexts) const;
- std::string expr_ctx (const std::string &ctx) const;
- std::string expr_ctx_fix (const CtxFix &ctx, const std::vector<CtxVar> &ctxvars) const;
+ std::string stmt_dist (uint32_t ind, const bool *tagmask,
+ const std::valarray<Tag> &tags) const;
+ std::string expr_ctx(const std::string &var) const;
+ std::string expr_ctx_fix(const Tag &tag, const std::valarray<Tag> &tags) const;
std::string stmt_restore (uint32_t ind) const;
std::string stmt_restorectx_fix (uint32_t ind, size_t dist) const;
std::string stmt_restorectx_var (uint32_t ind) const;
void Warn::selfoverlapping_contexts(
uint32_t line,
const std::string &cond,
- const CtxVar &ctx)
+ const std::string *tagname)
{
if (mask[SELFOVERLAPPING_CONTEXTS] & WARNING)
{
error_accuml |= e;
const char *trail, *name;
- if (ctx.codename == NULL) {
+ if (tagname == NULL) {
trail = "trailing context";
name = "";
} else {
trail = "context ";
- name = ctx.codename->c_str();
+ name = tagname->c_str();
}
warning(names[SELFOVERLAPPING_CONTEXTS], line, e,
"%s%s %sis self-overlapping", trail, name,
void condition_order (uint32_t line);
void empty_class (uint32_t line);
void match_empty_string (uint32_t line);
- void selfoverlapping_contexts(uint32_t line, const std::string &cond, const CtxVar &ctx);
+ void selfoverlapping_contexts(uint32_t line, const std::string &cond, const std::string *tagname);
void swapped_range (uint32_t line, uint32_t l, uint32_t u);
void undefined_control_flow (const Skeleton &skel, std::vector<path_t> & paths, bool overflow);
void unreachable_rule (const std::string & cond, const Rule &rule);
, nStates(0)
, head(NULL)
, rules(dfa.rules)
- , vartags(dfa.vartags)
- , fixtags(dfa.fixtags)
+ , tags(dfa.tags)
, tagpool(dfa.tagpool)
// statistics
delete skeleton;
delete &rules;
- delete &vartags;
- delete &fixtags;
+ delete &tags;
delete &tagpool;
}
uint32_t nStates;
State * head;
std::valarray<Rule> &rules;
- std::vector<CtxVar> &vartags;
- std::vector<CtxFix> &fixtags;
+ std::valarray<Tag> &tags;
Tagpool &tagpool;
size_t max_fill;
bool need_backup;
namespace re2c
{
-CtxVar::CtxVar(const std::string *n, size_t r)
- : rule(r)
- , codename(n)
- , uniqname()
-{
- std::ostringstream s;
- s << rule;
- if (codename != NULL) {
- s << *codename;
- }
- uniqname = s.str();
-}
+const size_t Tag::NONE = std::numeric_limits<size_t>::max();
-std::string CtxVar::name() const
+Tag::Tag()
+ : type(VAR)
+ , rule(Rule::NONE)
+ , name(NULL)
+{}
+
+void init_var_tag(Tag &tag, size_t r, const std::string *n, size_t o)
{
- return opts->contexts_prefix + uniqname;
+ tag.type = Tag::VAR;
+ tag.rule = r;
+ tag.name = n;
+ tag.var.orig = o;
}
-std::string CtxVar::expr() const
+void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d)
{
- std::string e = opts->contexts_expr;
- strrreplace(e, "@@", opts->contexts_prefix + uniqname);
- return e;
+ tag.type = Tag::FIX;
+ tag.rule = r;
+ tag.name = n;
+ tag.fix.base = b;
+ tag.fix.dist = d;
}
-const size_t CtxFix::RIGHTMOST = std::numeric_limits<size_t>::max();
-
} // namespace re2c
#ifndef _RE2C_IR_CTX_
#define _RE2C_IR_CTX_
-#include <limits>
#include <string>
-namespace re2c
-{
+#include "src/util/forbid_copy.h"
-static const size_t NO_TAG = std::numeric_limits<size_t>::max();
-
-struct CtxVar
+namespace re2c
{
- size_t rule;
- const std::string *codename;
- std::string uniqname;
-
- CtxVar(const std::string *n, size_t r);
- CtxVar(const CtxVar &ctx)
- : rule(ctx.rule)
- , codename(ctx.codename)
- , uniqname(ctx.uniqname)
- {}
- CtxVar& operator=(const CtxVar &ctx)
- {
- rule = ctx.rule;
- codename = ctx.codename;
- uniqname = ctx.uniqname;
- return *this;
- }
- std::string name() const;
- std::string expr() const;
-};
-struct CtxFix
+struct Tag
{
- static const size_t RIGHTMOST;
+ static const size_t NONE;
+ enum {VAR, FIX} type;
size_t rule;
- const std::string *codename;
- size_t base;
- size_t dist;
-
- CtxFix(const std::string *n, size_t r, size_t b, size_t d)
- : rule(r)
- , codename(n)
- , base(b)
- , dist(d)
- {}
-};
-
-struct Trail
-{
- enum {NONE, VAR, FIX} type;
+ const std::string *name;
union
{
- size_t var;
- size_t fix;
+ struct
+ {
+ size_t orig;
+ } var;
+ struct
+ {
+ size_t base;
+ size_t dist;
+ } fix;
};
- Trail(): type(NONE) {}
- void make_var(size_t v)
- {
- type = VAR;
- var = v;
- }
- void make_fix(size_t f)
- {
- type = FIX;
- fix = f;
- }
+ Tag();
+ FORBID_COPY(Tag);
};
+void init_var_tag(Tag &tag, size_t r, const std::string *n, size_t o);
+void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d);
+
} // namespace re2c
#endif // _RE2C_IR_CTX_
visited[i] = true;
dfa_state_t *s = dfa.states[i];
- const size_t ntags = dfa.vartags.size();
+ const size_t ntags = dfa.tags.size();
// add tags before recursing to child states,
// so that tags propagate into loopbacks to this state
const bool *livetags)
{
const size_t nstates = dfa.states.size();
- const size_t ntags = dfa.vartags.size();
+ const size_t ntags = dfa.tags.size();
for (size_t i = 0; i < nstates; ++i) {
dfa_state_t *s = dfa.states[i];
for (size_t c = 0; c < dfa.nchars; ++c) {
bool *incompattbl)
{
const size_t nstates = dfa.states.size();
- const size_t ntags = dfa.vartags.size();
+ const size_t ntags = dfa.tags.size();
for (size_t i = 0; i < nstates; ++i) {
const dfa_state_t *s = dfa.states[i];
for (size_t c = 0; c < dfa.nchars; ++c) {
}
}
}
+
+ // fixed tags should not participate in deduplication, so
+ // each fixed tag is incompatible with all other tags
+ for (size_t i = 0; i < ntags; ++i) {
+ if (dfa.tags[i].type == Tag::FIX) {
+ for (size_t j = 0; j < ntags; ++j) {
+ incompattbl[i * ntags + j]
+ = incompattbl[j * ntags + i]
+ = j != i;
+ }
+ }
+ }
}
/* We have a binary relation on the set of all tags
* The algorithm takes quadratic (in the number of tags) time.
* static void equivalence_classes(const std::vector<bool> &incompattbl,
*/
-static size_t equivalence_classes(const bool *incompattbl,
+static void equivalence_classes(const bool *incompattbl,
size_t ntags, std::vector<size_t> &represent)
{
static const size_t END = std::numeric_limits<size_t>::max();
head[0] = c;
}
}
-
- size_t nreps = 0;
- for (size_t i = 0; i < ntags; ++i) {
- if (represent[i] == i) {
- ++nreps;
- }
- }
- return nreps;
}
static size_t patch_tagset(Tagpool &tagpool, size_t oldidx,
s->rule_tags = patch_tagset(dfa.tagpool, s->rule_tags, represent);
}
- const size_t ntags = dfa.vartags.size();
+ const size_t ntags = dfa.tags.size();
for (size_t i = 0; i < ntags; ++i) {
- dfa.vartags[i].uniqname = dfa.vartags[represent[i]].uniqname;
+ Tag &t = dfa.tags[i];
+ if (t.type == Tag::VAR) {
+ t.var.orig = represent[i];
+ }
}
}
size_t deduplicate_contexts(dfa_t &dfa,
const std::vector<size_t> &fallback)
{
- const size_t ntags = dfa.vartags.size();
+ const size_t ntags = dfa.tags.size();
if (ntags == 0) {
return 0;
}
incompatibility_table(dfa, live, fbctxs, incompattbl);
std::vector<size_t> represent(ntags, 0);
- const size_t nreps = equivalence_classes(incompattbl, ntags, represent);
+ equivalence_classes(incompattbl, ntags, represent);
+
+ size_t nreps = 0;
+ for (size_t i = 0; i < ntags; ++i) {
+ if (dfa.tags[i].type == Tag::VAR && represent[i] == i) {
+ ++nreps;
+ }
+ }
if (nreps < ntags) {
patch_tags(dfa, represent);
: states()
, nchars(charset.size() - 1) // (n + 1) bounds for n ranges
, rules(nfa.rules)
- , vartags(nfa.vartags)
- , fixtags(nfa.fixtags)
- , tagpool(*new Tagpool(vartags.size()))
+ , tags(*nfa.tags)
+ , tagpool(*new Tagpool(tags.size()))
{
- const size_t ntags = vartags.size();
+ const size_t ntags = tags.size();
const size_t nrules = rules.size();
const size_t mask_size = (nchars + 1) * ntags;
kitem_t *kstart = new kitem_t[nfa.size], *kend = kstart;
bool *ktags = new bool[ntags]();
bool *badtags = new bool[ntags]();
- bool *tags = new bool[mask_size];
+ bool *arctags = new bool[mask_size];
bool *mask = new bool[mask_size];
bool *fin = new bool[nrules];
std::vector<nfa_state_t*> *arcs = new std::vector<nfa_state_t*>[nchars];
find_state(kstart, kend, kernels, tagpool);
for (size_t i = 0; i < kernels.size(); ++i) {
memset(fin, 0, nrules * sizeof(bool));
- memset(tags, 0, mask_size * sizeof(bool));
+ memset(arctags, 0, mask_size * sizeof(bool));
memset(mask, 0, mask_size * sizeof(bool));
for(size_t c = 0; c < nchars; ++c) {
arcs[c].clear();
for (const Range *r = n->value.ran.ran; r; r = r->next ()) {
for (; charset[c] != r->lower(); ++c);
for (; charset[c] != r->upper(); ++c) {
- merge_tags_with_mask(&tags[c * ntags], newtags,
+ merge_tags_with_mask(&arctags[c * ntags], newtags,
&mask[c * ntags], rules[m->rule].tags,
badtags, ntags);
arcs[c].push_back(m);
break;
}
case nfa_state_t::FIN:
- merge_tags_with_mask(&tags[nchars * ntags], newtags,
+ merge_tags_with_mask(&arctags[nchars * ntags], newtags,
&mask[nchars * ntags], rules[n->rule].tags,
badtags, ntags);
fin[n->rule] = true;
closure(kstart, kend, a[j], ktags, badtags, ntags);
}
s->arcs[c] = find_state(kstart, kend, kernels, tagpool);
- s->tags[c] = tagpool.insert(&tags[c * ntags]);
+ s->tags[c] = tagpool.insert(&arctags[c * ntags]);
}
- s->rule_tags = tagpool.insert(&tags[nchars * ntags]);
+ s->rule_tags = tagpool.insert(&arctags[nchars * ntags]);
// choose the first rule (the one with smallest rank)
size_t r;
for (size_t i = 0; i < ntags; ++i) {
if (badtags[i]) {
// TODO: use rule line, add rule reference to context struct
- warn.selfoverlapping_contexts(line, cond, vartags[i]);
+ warn.selfoverlapping_contexts(line, cond, tags[i].name);
}
}
delete[] kstart;
delete[] ktags;
delete[] badtags;
- delete[] tags;
+ delete[] arctags;
delete[] mask;
delete[] fin;
delete[] arcs;
std::vector<dfa_state_t*> states;
const size_t nchars;
std::valarray<Rule> &rules;
- std::vector<CtxVar> &vartags;
- std::vector<CtxFix> &fixtags;
+ std::valarray<Tag> &tags;
Tagpool &tagpool;
dfa_t(const nfa_t &nfa, const charset_t &charset,
namespace re2c {
-static size_t sizeof_regexp(const RegExp *re)
+static size_t count(const RegExp *re, size_t &ntags)
{
switch (re->type) {
case RegExp::NIL:
case RegExp::SYM:
return 1;
case RegExp::ALT:
- return sizeof_regexp(re->alt.re1)
- + sizeof_regexp(re->alt.re2)
+ return count(re->alt.re1, ntags)
+ + count(re->alt.re2, ntags)
+ 1;
case RegExp::CAT:
- return sizeof_regexp(re->cat.re1)
- + sizeof_regexp(re->cat.re2);
+ return count(re->cat.re1, ntags)
+ + count(re->cat.re2, ntags);
case RegExp::ITER:
- return sizeof_regexp(re->iter)
+ return count(re->iter, ntags)
+ 1;
case RegExp::TAG:
+ ++ntags;
return 1;
default:
assert(false);
}
}
-size_t sizeof_regexps(const std::vector<const RegExpRule*> ®exps)
+size_t counters(const std::vector<const RegExpRule*> ®exps, size_t &ntags)
{
const size_t nregexps = regexps.size();
size_t size = nregexps - 1;
for (size_t i = 0; i < nregexps; ++i) {
- size += sizeof_regexp(regexps[i]->re) + 1;
+ size += count(regexps[i]->re, ntags) + 1;
}
return size;
}
namespace re2c {
-static void fatal_tags_in_trail(uint32_t line)
-{
- error("line %u: tags in trailing context", line);
- exit(1);
-}
-
static void assert_no_tags_in_trailing_context(const Rule &rule,
- const std::vector<CtxVar> &vartags,
- const std::vector<CtxFix> &fixtags)
+ const std::valarray<Tag> &tags)
{
- const uint32_t line = rule.info->loc.line;
// rule tags should not contain other trailing contexts
- for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) {
- if (fixtags[i].codename == NULL) {
- fatal_tags_in_trail(line);
+ for (size_t i = rule.ltag; i < rule.htag; ++i) {
+ if (tags[i].name == NULL) {
+ error("line %u: tags in trailing context",
+ rule.info->loc.line);
+ exit(1);
}
}
- for (size_t i = rule.lvartag; i < rule.hvartag; ++i) {
- if (vartags[i].codename == NULL) {
- fatal_tags_in_trail(line);
- }
- }
- // fixed trailing context must be fixed on cursor
- if (rule.trail.type == Trail::FIX
- && fixtags[rule.trail.fix].base != CtxFix::RIGHTMOST) {
- fatal_tags_in_trail(line);
- }
-}
-
-static void fatal_tag_reuse(uint32_t line, const char *tag)
-{
- error("line %u: tag '%s' is used multiple times in the same rule", line, tag);
- exit(1);
}
static void assert_tags_used_once(const Rule &rule,
- const std::vector<CtxVar> &vartags,
- const std::vector<CtxFix> &fixtags)
+ const std::valarray<Tag> &tags)
{
- const uint32_t line = rule.info->loc.line;
std::set<std::string> names;
- for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) {
- const std::string *name = fixtags[i].codename;
- if (name && !names.insert(*name).second) {
- fatal_tag_reuse(line, name->c_str());
- }
- }
- for (size_t i = rule.lvartag; i < rule.hvartag; ++i) {
- const std::string *name = vartags[i].codename;
+ for (size_t i = rule.ltag; i < rule.htag; ++i) {
+ const std::string *name = tags[i].name;
if (name && !names.insert(*name).second) {
- fatal_tag_reuse(line, name->c_str());
+ error("line %u: tag '%s' is used multiple"
+ " times in the same rule",
+ rule.info->loc.line, name->c_str());
+ exit(1);
}
}
}
-void init_rules(std::valarray<Rule> &rules,
- const std::vector<const RegExpRule*> ®exps,
- const std::vector<CtxVar> &vartags,
- const std::vector<CtxFix> &fixtags)
+void init_rules(const std::vector<const RegExpRule*> ®exps,
+ std::valarray<Rule> &rules,
+ const std::valarray<Tag> &tags)
{
- const size_t nf = fixtags.size();
- const size_t nv = vartags.size();
const size_t nr = rules.size();
+ const size_t nt = tags.size();
- for (size_t r = 0, f = 0, v = 0; r < nr; ++r) {
+ for (size_t r = 0, t = 0; r < nr; ++r) {
Rule &rule = rules[r];
rule.info = regexps[r]->info;
rule.nullable = nullable_rule(regexps[r]);
- rule.lfixtag = f;
- for (; f < nf && fixtags[f].rule == r; ++f);
- rule.hfixtag = f;
-
- rule.lvartag = v;
- for (; v < nv && vartags[v].rule == r; ++v);
- rule.hvartag = v;
+ rule.ltag = t;
+ for (; t < nt && tags[t].rule == r; ++t);
+ rule.htag = t;
// mark *all* variable tags, including trailing context
- rule.tags = new bool[nv]();
- for (size_t t = rule.lvartag; t < rule.hvartag; ++t) {
- rule.tags[t] = true;
+ rule.tags = new bool[nt]();
+ for (size_t i = rule.ltag; i < rule.htag; ++i) {
+ rule.tags[i] = tags[i].type == Tag::VAR;
}
// tags in trailing context are forbidden (they make no sense),
// and since tags are constructed in reversed order, this implies
// that trailing context, if present, can only be the first tag
- if (rule.lfixtag < rule.hfixtag && fixtags[rule.lfixtag].codename == NULL) {
- rule.trail.make_fix(rule.lfixtag++);
- } else if (rule.lvartag < rule.hvartag && vartags[rule.lvartag].codename == NULL) {
- rule.trail.make_var(rule.lvartag++);
+ if (rule.ltag < rule.htag && tags[rule.ltag].name == NULL) {
+ rule.trail = rule.ltag++;
}
// sanity checks
- assert_no_tags_in_trailing_context(rule, vartags, fixtags);
- assert_tags_used_once(rule, vartags, fixtags);
+ assert_no_tags_in_trailing_context(rule, tags);
+ assert_tags_used_once(rule, tags);
}
}
static const size_t VARDIST = std::numeric_limits<size_t>::max();
static void make_tags_var(size_t nrule,
- std::vector<CtxVar> &vartags,
- std::vector<size_t> &tagidxs,
- const RegExp *re,
- size_t &dist)
+ std::valarray<Tag> &tags, size_t &tagidx,
+ const RegExp *re, size_t &dist)
{
switch (re->type) {
case RegExp::NIL: break;
break;
case RegExp::ALT: {
size_t d1 = dist, d2 = dist;
- make_tags_var(nrule, vartags, tagidxs, re->alt.re1, d1);
- make_tags_var(nrule, vartags, tagidxs, re->alt.re2, d2);
+ make_tags_var(nrule, tags, tagidx, re->alt.re1, d1);
+ make_tags_var(nrule, tags, tagidx, re->alt.re2, d2);
dist = (d1 == d2) ? d1 : VARDIST;
break;
}
case RegExp::CAT:
- make_tags_var(nrule, vartags, tagidxs, re->cat.re2, dist);
- make_tags_var(nrule, vartags, tagidxs, re->cat.re1, dist);
+ make_tags_var(nrule, tags, tagidx, re->cat.re2, dist);
+ make_tags_var(nrule, tags, tagidx, re->cat.re1, dist);
break;
case RegExp::ITER:
dist = VARDIST;
- make_tags_var(nrule, vartags, tagidxs, re->iter, dist);
+ make_tags_var(nrule, tags, tagidx, re->iter, dist);
break;
- case RegExp::TAG:
- tagidxs.push_back(vartags.size());
- vartags.push_back(CtxVar(re->tag, nrule));
+ case RegExp::TAG: {
+ const size_t orig = tagidx;
+ init_var_tag(tags[tagidx++], nrule, re->tag, orig);
break;
+ }
}
}
static void make_tags_var_fix(size_t nrule,
- std::vector<CtxVar> &vartags,
- std::vector<CtxFix> &fixtags,
- std::vector<size_t> &tagidxs,
- const RegExp *re,
- size_t &dist,
- size_t &base)
+ std::valarray<Tag> &tags, size_t &tagidx,
+ const RegExp *re, size_t &dist, size_t &base)
{
switch (re->type) {
case RegExp::NIL:
case RegExp::SYM:
case RegExp::ALT:
case RegExp::ITER:
- make_tags_var(nrule, vartags, tagidxs, re, dist);
+ make_tags_var(nrule, tags, tagidx, re, dist);
break;
case RegExp::CAT:
- make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re2, dist, base);
- make_tags_var_fix(nrule, vartags, fixtags, tagidxs, re->cat.re1, dist, base);
+ make_tags_var_fix(nrule, tags, tagidx, re->cat.re2, dist, base);
+ make_tags_var_fix(nrule, tags, tagidx, re->cat.re1, dist, base);
break;
case RegExp::TAG: {
const std::string *name = re->tag;
if (dist == VARDIST) {
- tagidxs.push_back(base = vartags.size());
- vartags.push_back(CtxVar(name, nrule));
+ base = tagidx;
+ init_var_tag(tags[tagidx++], nrule, name, base);
dist = 0;
} else {
- tagidxs.push_back(NO_TAG);
- fixtags.push_back(CtxFix(name, nrule, base, dist));
+ init_fix_tag(tags[tagidx++], nrule, name, base, dist);
}
if (name == NULL) {
dist = 0;
* calculate fixed tag value based on initialized value
* (and spoil default value expected by the programmer).
*/
-void make_tags(const std::vector<const RegExpRule*> &rs,
- std::vector<CtxVar> &vartags,
- std::vector<CtxFix> &fixtags,
- std::vector<size_t> &tagidxs)
+void make_tags(const std::vector<const RegExpRule*> &rs, std::valarray<Tag> &tags)
{
const size_t nrs = rs.size();
- for (size_t i = 0; i < nrs; ++i) {
- size_t base = CtxFix::RIGHTMOST, dist = 0;
+ for (size_t i = 0, tagidx = 0; i < nrs; ++i) {
+ size_t base = Tag::NONE, dist = 0;
// don't optimize fixed-length trailing context with generic API
// unless tags are explicitly enabled: generic API needs base tag
// to restore fixed-length trailing context, and base existence
if (!opts->contexts && opts->input_api.type() == InputAPI::CUSTOM) {
dist = VARDIST;
}
- make_tags_var_fix(i, vartags, fixtags, tagidxs, rs[i]->re, dist, base);
+ make_tags_var_fix(i, tags, tagidx, rs[i]->re, dist, base);
}
}
namespace re2c {
nfa_t::nfa_t(const std::vector<const RegExpRule*> ®exps)
- : max_size(sizeof_regexps(regexps))
+ : max_size(0)
, size(0)
- , states(new nfa_state_t[max_size])
+ , states(NULL)
, rules(*new std::valarray<Rule>(regexps.size()))
- , vartags(*new std::vector<CtxVar>)
- , fixtags(*new std::vector<CtxFix>)
+ , tags(NULL)
, root(NULL)
{
- std::vector<size_t> tagidxs;
- make_tags(regexps, vartags, fixtags, tagidxs);
+ size_t ntags = 0;
+ max_size = counters(regexps, ntags);
- regexps2nfa(regexps, *this, tagidxs.begin());
+ tags = new std::valarray<Tag>(ntags);
+ make_tags(regexps, *tags);
- init_rules(rules, regexps, vartags, fixtags);
+ states = new nfa_state_t[max_size];
+ regexps2nfa(regexps, *this);
+
+ init_rules(regexps, rules, *tags);
}
nfa_t::~nfa_t()
struct nfa_t
{
- const size_t max_size;
+ size_t max_size;
size_t size;
nfa_state_t *states;
std::valarray<Rule> &rules;
- std::vector<CtxVar> &vartags;
- std::vector<CtxFix> &fixtags;
+ std::valarray<Tag> *tags;
nfa_state_t *root;
nfa_t(const std::vector<const RegExpRule*> &rs);
FORBID_COPY(nfa_t);
};
-typedef std::vector<size_t>::const_iterator tagidx_t;
-
-size_t sizeof_regexps(const std::vector<const RegExpRule*> ®exps);
-void make_tags(const std::vector<const RegExpRule*> &rs,
- std::vector<CtxVar> &vartags,
- std::vector<CtxFix> &fixtags,
- std::vector<size_t> &tagidxs);
-void regexps2nfa(const std::vector<const RegExpRule*> &rs,
- nfa_t &nfa, tagidx_t tagidx);
+size_t counters(const std::vector<const RegExpRule*> ®exps, size_t &ntags);
+void make_tags(const std::vector<const RegExpRule*> ®exps, std::valarray<Tag> &tags);
+void regexps2nfa(const std::vector<const RegExpRule*> ®exps, nfa_t &nfa);
bool nullable_rule(const RegExpRule *rule);
-void init_rules(std::valarray<Rule> &rules,
- const std::vector<const RegExpRule*> ®exps,
- const std::vector<CtxVar> &vartags,
- const std::vector<CtxFix> &fixtags);
+void init_rules(const std::vector<const RegExpRule*> ®exps,
+ std::valarray<Rule> &rules,
+ const std::valarray<Tag> &tags);
} // namespace re2c
namespace re2c {
static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule,
- tagidx_t &tagidx, const RegExp *re, nfa_state_t *t)
+ size_t &tagidx, const RegExp *re, nfa_state_t *t)
{
nfa_state_t *s = NULL;
switch (re->type) {
s = &nfa.states[nfa.size++];
s->alt(nrule, t, regexp2nfa(nfa, nrule, tagidx, re->iter, s));
break;
- case RegExp::TAG: {
- const size_t idx = *tagidx++;
- if (idx != NO_TAG) {
+ case RegExp::TAG:
+ if ((*nfa.tags)[tagidx].type == Tag::VAR) {
s = &nfa.states[nfa.size++];
- s->ctx(nrule, t, idx);
+ s->ctx(nrule, t, tagidx);
} else {
s = t;
}
+ ++tagidx;
break;
- }
}
return s;
}
static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule,
- tagidx_t &tagidx, const RegExpRule *rule)
+ size_t &tagidx, const RegExpRule *rule)
{
nfa_state_t *s = &nfa.states[nfa.size++];
s->fin(nrule);
return regexp2nfa(nfa, nrule, tagidx, rule->re, s);
}
-void regexps2nfa(const std::vector<const RegExpRule*> &rs,
- nfa_t &nfa, tagidx_t tagidx)
+void regexps2nfa(const std::vector<const RegExpRule*> ®exps, nfa_t &nfa)
{
- const size_t nrs = rs.size();
+ const size_t nregexps = regexps.size();
- if (nrs == 0) {
+ if (nregexps == 0) {
return;
}
- nfa_state_t *s = regexp2nfa_rule(nfa, 0, tagidx, rs[0]);
- for (size_t i = 1; i < nrs; ++i) {
+ size_t tagidx = 0;
+ nfa_state_t *s = regexp2nfa_rule(nfa, 0, tagidx, regexps[0]);
+ for (size_t i = 1; i < nregexps; ++i) {
nfa_state_t *t = &nfa.states[nfa.size++];
- t->alt(i, s, regexp2nfa_rule(nfa, i, tagidx, rs[i]));
+ t->alt(i, s, regexp2nfa_rule(nfa, i, tagidx, regexps[i]));
s = t;
}
nfa.root = s;
const RuleInfo *info;
- size_t lvartag;
- size_t hvartag;
- size_t lfixtag;
- size_t hfixtag;
- Trail trail;
+ size_t ltag;
+ size_t htag;
+ size_t trail;
bool nullable;
bool *tags;
std::set<uint32_t> shadow;
Rule()
: info(NULL)
- , lvartag(0)
- , hvartag(0)
- , lfixtag(0)
- , hfixtag(0)
- , trail()
+ , ltag(0)
+ , htag(0)
+ , trail(Tag::NONE)
, nullable(false)
, tags(NULL)
, shadow()
continue;
}
size_t len = static_cast<size_t>(head - tail) - 1;
- const Trail &trail = skel.rules[rule_idx].trail;
- switch (trail.type) {
- case Trail::NONE:
- return len;
- case Trail::FIX:
- return len - skel.fixtags[trail.fix].dist;
- case Trail::VAR: {
- const size_t ctx = trail.var;
+ const size_t trail = skel.rules[rule_idx].trail;
+ if (trail == Tag::NONE) {
+ return len;
+ }
+ const Tag &tag = skel.tags[trail];
+ switch (tag.type) {
+ case Tag::VAR:
for (; tail != head; ++tail) {
- if (skel.nodes[*tail].tags[ctx]) {
+ if (skel.nodes[*tail].tags[trail]) {
return static_cast<size_t>(head - tail) - 1;
}
}
assert(false);
- break;
- }
+ case Tag::FIX:
+ return len - tag.fix.dist;
}
}
return 0;
, sizeof_key(8)
, rules(dfa.rules)
, defrule(def)
- , fixtags(dfa.fixtags)
+ , tags(dfa.tags)
{
const size_t nc = cs.size() - 1;
- const size_t ntags = dfa.tagpool.ntags;
+ const size_t ntags = tags.size();
// initialize skeleton nodes
for (size_t i = 0; i < nodes_count - 1; ++i) {
size_t sizeof_key;
std::valarray<Rule> &rules;
const size_t defrule;
- std::vector<CtxFix> &fixtags;
+ const std::valarray<Tag> &tags;
Skeleton(const dfa_t &dfa, const charset_t &cs, size_t def,
const std::string &dfa_name, const std::string &dfa_cond,