struct Initial
{
- static const size_t NOSAVE;
+ static const size_t NOSAVE;
- label_t label;
- size_t save;
+ label_t label;
+ size_t save;
- inline Initial (label_t l, size_t s)
- : label (l)
- , save (s)
- {}
+ inline Initial (label_t l, size_t s)
+ : label (l)
+ , save (s)
+ {}
};
typedef uniq_vector_t<std::pair<const State*, tcid_t> > accept_t;
class Action
{
public:
- enum type_t
- {
- MATCH,
- INITIAL,
- SAVE,
- MOVE,
- ACCEPT,
- RULE
- } type;
- union
- {
- Initial * initial;
- size_t save;
- const accept_t * accepts;
- size_t rule;
- } info;
+ enum type_t
+ {
+ MATCH,
+ INITIAL,
+ SAVE,
+ MOVE,
+ ACCEPT,
+ RULE
+ } type;
+ union
+ {
+ Initial * initial;
+ size_t save;
+ const accept_t * accepts;
+ size_t rule;
+ } info;
public:
- inline Action ()
- : type (MATCH)
- , info ()
- {}
- ~Action ()
- {
- if (type == INITIAL) {
- delete info.initial;
- }
- }
- void set_initial (label_t label)
- {
- if (type == MATCH) {
- // ordinary state with no special action
- type = INITIAL;
- info.initial = new Initial(label, Initial::NOSAVE);
- } else if (type == SAVE) {
- // fallback state: do not loose 'yyaccept'
- type = INITIAL;
- info.initial = new Initial(label, info.save);
- } else if (type == INITIAL) {
- // already marked as initial, probably reuse mode
- info.initial->label = label;
- } else {
- assert(false);
- }
- }
- void set_save (size_t save)
- {
- assert(type == MATCH);
- type = SAVE;
- info.save = save;
- }
- void set_move ()
- {
- assert(type == MATCH);
- type = MOVE;
- }
- void set_accept (const accept_t * accepts)
- {
- assert(type == MATCH);
- type = ACCEPT;
- info.accepts = accepts;
- }
- void set_rule (size_t rule)
- {
- assert(type == MATCH);
- type = RULE;
- info.rule = rule;
- }
+ inline Action ()
+ : type (MATCH)
+ , info ()
+ {}
+ ~Action ()
+ {
+ if (type == INITIAL) {
+ delete info.initial;
+ }
+ }
+ void set_initial (label_t label)
+ {
+ if (type == MATCH) {
+ // ordinary state with no special action
+ type = INITIAL;
+ info.initial = new Initial(label, Initial::NOSAVE);
+ } else if (type == SAVE) {
+ // fallback state: do not loose 'yyaccept'
+ type = INITIAL;
+ info.initial = new Initial(label, info.save);
+ } else if (type == INITIAL) {
+ // already marked as initial, probably reuse mode
+ info.initial->label = label;
+ } else {
+ assert(false);
+ }
+ }
+ void set_save (size_t save)
+ {
+ assert(type == MATCH);
+ type = SAVE;
+ info.save = save;
+ }
+ void set_move ()
+ {
+ assert(type == MATCH);
+ type = MOVE;
+ }
+ void set_accept (const accept_t * accepts)
+ {
+ assert(type == MATCH);
+ type = ACCEPT;
+ info.accepts = accepts;
+ }
+ void set_rule (size_t rule)
+ {
+ assert(type == MATCH);
+ type = RULE;
+ info.rule = rule;
+ }
};
} // namespace re2c
const size_t Initial::NOSAVE = std::numeric_limits<size_t>::max();
DFA::DFA
- ( const dfa_t &dfa
- , const std::vector<size_t> &fill
- , size_t def
- , size_t key
- , const std::string &nm
- , const std::string &cn
- , uint32_t ln
- , const std::string &su
- )
- : accepts ()
- , name (nm)
- , cond (cn)
- , line (ln)
- , lbChar(0)
- , ubChar(dfa.charset.back())
- , nStates(0)
- , head(NULL)
- , tags0(dfa.tcid0)
- , charset(dfa.charset)
- , rules(dfa.rules)
- , tags(dfa.tags)
- , mtagvers(dfa.mtagvers)
- , finvers(dfa.finvers)
- , tcpool(dfa.tcpool)
- , max_fill (0)
- , max_nmatch(0)
- , need_backup (false)
- , need_accept (false)
- , oldstyle_ctxmarker (false)
- , maxtagver (dfa.maxtagver)
- , def_rule (def)
- , key_size (key)
- , bitmaps (std::min(ubChar, 256u))
- , setup(su)
+ ( const dfa_t &dfa
+ , const std::vector<size_t> &fill
+ , size_t def
+ , size_t key
+ , const std::string &nm
+ , const std::string &cn
+ , uint32_t ln
+ , const std::string &su
+ )
+ : accepts ()
+ , name (nm)
+ , cond (cn)
+ , line (ln)
+ , lbChar(0)
+ , ubChar(dfa.charset.back())
+ , nStates(0)
+ , head(NULL)
+ , tags0(dfa.tcid0)
+ , charset(dfa.charset)
+ , rules(dfa.rules)
+ , tags(dfa.tags)
+ , mtagvers(dfa.mtagvers)
+ , finvers(dfa.finvers)
+ , tcpool(dfa.tcpool)
+ , max_fill (0)
+ , max_nmatch(0)
+ , need_backup (false)
+ , need_accept (false)
+ , oldstyle_ctxmarker (false)
+ , maxtagver (dfa.maxtagver)
+ , def_rule (def)
+ , key_size (key)
+ , bitmaps (std::min(ubChar, 256u))
+ , setup(su)
{
- const size_t nstates = dfa.states.size();
- const size_t nchars = dfa.nchars;
-
- State **i2s = new State*[nstates];
- for (size_t i = 0; i < nstates; ++i)
- {
- i2s[i] = new State;
- }
-
- State **p = &head;
- for (size_t i = 0; i < nstates; ++i)
- {
- dfa_state_t *t = dfa.states[i];
- State *s = i2s[i];
-
- ++nStates;
- *p = s;
- p = &s->next;
-
- s->rule = t->rule;
- s->rule_tags = t->tcid[dfa.nchars];
- s->fall_tags = t->tcid[dfa.nchars + 1];
- s->fill = fill[i];
- s->fallback = t->fallback; // see note [fallback states]
-
- s->go.span = allocate<Span>(nchars);
- uint32_t j = 0;
- for (uint32_t c = 0; c < nchars; ++j)
- {
- const size_t to = t->arcs[c];
- const tcid_t tc = t->tcid[c];
- for (;++c < nchars && t->arcs[c] == to && t->tcid[c] == tc;);
- s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
- s->go.span[j].ub = charset[c];
- s->go.span[j].tags = tc;
- }
- s->go.nSpans = j;
- }
- *p = NULL;
-
- delete[] i2s;
+ const size_t nstates = dfa.states.size();
+ const size_t nchars = dfa.nchars;
+
+ State **i2s = new State*[nstates];
+ for (size_t i = 0; i < nstates; ++i)
+ {
+ i2s[i] = new State;
+ }
+
+ State **p = &head;
+ for (size_t i = 0; i < nstates; ++i)
+ {
+ dfa_state_t *t = dfa.states[i];
+ State *s = i2s[i];
+
+ ++nStates;
+ *p = s;
+ p = &s->next;
+
+ s->rule = t->rule;
+ s->rule_tags = t->tcid[dfa.nchars];
+ s->fall_tags = t->tcid[dfa.nchars + 1];
+ s->fill = fill[i];
+ s->fallback = t->fallback; // see note [fallback states]
+
+ s->go.span = allocate<Span>(nchars);
+ uint32_t j = 0;
+ for (uint32_t c = 0; c < nchars; ++j)
+ {
+ const size_t to = t->arcs[c];
+ const tcid_t tc = t->tcid[c];
+ for (;++c < nchars && t->arcs[c] == to && t->tcid[c] == tc;);
+ s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
+ s->go.span[j].ub = charset[c];
+ s->go.span[j].tags = tc;
+ }
+ s->go.nSpans = j;
+ }
+ *p = NULL;
+
+ delete[] i2s;
}
DFA::~DFA()
{
- State *s;
-
- while ((s = head))
- {
- head = s->next;
- delete s;
- }
-
- delete &charset;
- delete &rules;
- delete &tags;
- delete &mtagvers;
- delete[] finvers;
- delete &tcpool;
+ State *s;
+
+ while ((s = head))
+ {
+ head = s->next;
+ delete s;
+ }
+
+ delete &charset;
+ delete &rules;
+ delete &tags;
+ delete &mtagvers;
+ delete[] finvers;
+ delete &tcpool;
}
/* note [reordering DFA states]
*/
void DFA::reorder()
{
- std::vector<State*> ord;
- ord.reserve(nStates);
-
- std::queue<State*> todo;
- todo.push(head);
-
- std::set<State*> done;
- done.insert(head);
-
- for(;!todo.empty();)
- {
- State *s = todo.front();
- todo.pop();
- ord.push_back(s);
- for(uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- State *q = s->go.span[i].to;
- if(q && done.insert(q).second)
- {
- todo.push(q);
- }
- }
- }
-
- assert(nStates == ord.size());
-
- ord.push_back(NULL);
- for(uint32_t i = 0; i < nStates; ++i)
- {
- ord[i]->next = ord[i + 1];
- }
+ std::vector<State*> ord;
+ ord.reserve(nStates);
+
+ std::queue<State*> todo;
+ todo.push(head);
+
+ std::set<State*> done;
+ done.insert(head);
+
+ for(;!todo.empty();)
+ {
+ State *s = todo.front();
+ todo.pop();
+ ord.push_back(s);
+ for(uint32_t i = 0; i < s->go.nSpans; ++i)
+ {
+ State *q = s->go.span[i].to;
+ if(q && done.insert(q).second)
+ {
+ todo.push(q);
+ }
+ }
+ }
+
+ assert(nStates == ord.size());
+
+ ord.push_back(NULL);
+ for(uint32_t i = 0; i < nStates; ++i)
+ {
+ ord[i]->next = ord[i + 1];
+ }
}
void DFA::addState(State *s, State *next)
{
- ++nStates;
- s->next = next->next;
- next->next = s;
+ ++nStates;
+ s->next = next->next;
+ next->next = s;
}
} // namespace re2c
struct State
{
- label_t label;
- State * next;
- size_t fill;
- bool fallback;
+ label_t label;
+ State * next;
+ size_t fill;
+ bool fallback;
- size_t rule;
- tcid_t rule_tags;
- tcid_t fall_tags;
- bool isBase;
- Go go;
- Action action;
+ size_t rule;
+ tcid_t rule_tags;
+ tcid_t fall_tags;
+ bool isBase;
+ Go go;
+ Action action;
- State ()
- : label (label_t::first ())
- , next (0)
- , fill (0)
- , fallback (false)
- , rule (Rule::NONE)
- , rule_tags (TCID0)
- , fall_tags (TCID0)
- , isBase (false)
- , go ()
- , action ()
- {}
- ~State ()
- {
- operator delete (go.span);
- }
+ State ()
+ : label (label_t::first ())
+ , next (0)
+ , fill (0)
+ , fallback (false)
+ , rule (Rule::NONE)
+ , rule_tags (TCID0)
+ , fall_tags (TCID0)
+ , isBase (false)
+ , go ()
+ , action ()
+ {}
+ ~State ()
+ {
+ operator delete (go.span);
+ }
- FORBID_COPY (State);
+ FORBID_COPY (State);
};
struct DFA
{
- accept_t accepts;
- const std::string name;
- const std::string cond;
- const uint32_t line;
- uint32_t lbChar;
- uint32_t ubChar;
- uint32_t nStates;
- State * head;
- const tcid_t tags0;
- std::vector<uint32_t> &charset;
- std::valarray<Rule> &rules;
- std::vector<Tag> &tags;
- std::set<tagver_t> &mtagvers;
- const tagver_t *finvers;
- tcpool_t &tcpool;
- size_t max_fill;
- size_t max_nmatch;
- bool need_backup;
- bool need_accept;
- bool oldstyle_ctxmarker;
- tagver_t maxtagver;
- const size_t def_rule;
- const size_t key_size;
- bitmaps_t bitmaps;
- std::string setup;
+ accept_t accepts;
+ const std::string name;
+ const std::string cond;
+ const uint32_t line;
+ uint32_t lbChar;
+ uint32_t ubChar;
+ uint32_t nStates;
+ State * head;
+ const tcid_t tags0;
+ std::vector<uint32_t> &charset;
+ std::valarray<Rule> &rules;
+ std::vector<Tag> &tags;
+ std::set<tagver_t> &mtagvers;
+ const tagver_t *finvers;
+ tcpool_t &tcpool;
+ size_t max_fill;
+ size_t max_nmatch;
+ bool need_backup;
+ bool need_accept;
+ bool oldstyle_ctxmarker;
+ tagver_t maxtagver;
+ const size_t def_rule;
+ const size_t key_size;
+ bitmaps_t bitmaps;
+ std::string setup;
- DFA ( const dfa_t &dfa
- , const std::vector<size_t> &fill
- , size_t def
- , size_t key
- , const std::string &nm
- , const std::string &cn
- , uint32_t ln
- , const std::string &su
- );
- ~DFA ();
- void reorder();
- void prepare(const opt_t *opts);
- void calc_stats(uint32_t ln, bool explicit_tags);
- void emit (Output &, uint32_t &, bool, bool &);
+ DFA ( const dfa_t &dfa
+ , const std::vector<size_t> &fill
+ , size_t def
+ , size_t key
+ , const std::string &nm
+ , const std::string &cn
+ , uint32_t ln
+ , const std::string &su
+ );
+ ~DFA ();
+ void reorder();
+ void prepare(const opt_t *opts);
+ void calc_stats(uint32_t ln, bool explicit_tags);
+ void emit (Output &, uint32_t &, bool, bool &);
private:
- void addState(State*, State *);
- void split (State *);
- void findBaseState ();
- void hoist_tags();
- void hoist_tags_and_skip(const opt_t *opts);
- void count_used_labels(std::set<label_t> &used, label_t start, label_t initial, bool force_start, bool fFlag) const;
- void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
- void emit_dot(OutputFile &o, bool last_cond) const;
+ void addState(State*, State *);
+ void split (State *);
+ void findBaseState ();
+ void hoist_tags();
+ void hoist_tags_and_skip(const opt_t *opts);
+ void count_used_labels(std::set<label_t> &used, label_t start, label_t initial, bool force_start, bool fFlag) const;
+ void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
+ void emit_dot(OutputFile &o, bool last_cond) const;
- FORBID_COPY (DFA);
+ FORBID_COPY (DFA);
};
} // namespace re2c
static void dump_adfa_range(uint32_t lower, uint32_t upper)
{
- fprintf(stderr, "%u", lower);
- if (--upper > lower) {
- fprintf(stderr, "-%u", upper);
- }
+ fprintf(stderr, "%u", lower);
+ if (--upper > lower) {
+ fprintf(stderr, "-%u", upper);
+ }
}
void dump_adfa(const DFA &dfa)
{
- fprintf(stderr,
- "digraph DFA {\n"
- " rankdir=LR\n"
- " node[shape=Mrecord fontname=fixed]\n"
- " edge[arrowhead=vee fontname=fixed]\n\n");
+ fprintf(stderr,
+ "digraph DFA {\n"
+ " rankdir=LR\n"
+ " node[shape=Mrecord fontname=fixed]\n"
+ " edge[arrowhead=vee fontname=fixed]\n\n");
- fprintf(stderr,
- " n [shape=point]"
- " n -> n%p [style=dotted label=\"", (void*)dfa.head);
- dump_tcmd(dfa.tcpool[dfa.tags0]);
- fprintf(stderr, "\"]\n");
+ fprintf(stderr,
+ " n [shape=point]"
+ " n -> n%p [style=dotted label=\"", (void*)dfa.head);
+ dump_tcmd(dfa.tcpool[dfa.tags0]);
+ fprintf(stderr, "\"]\n");
- for (const State *s = dfa.head; s; s = s->next) {
- const char *attr;
- Action::type_t action = s->action.type;
+ for (const State *s = dfa.head; s; s = s->next) {
+ const char *attr;
+ Action::type_t action = s->action.type;
- if (action == Action::ACCEPT) {
- attr = "style=filled fillcolor=gray";
- } else if (action == Action::RULE) {
- attr = "style=filled fillcolor=lightgray";
- } else {
- attr = "";
- }
- fprintf(stderr, " n%p [height=0.2 width=0.2 label=\"", (void*)s);
- if (s->fill && action != Action::MOVE) {
- fprintf(stderr, "F(%u) ", (uint32_t)s->fill);
- }
- if (action == Action::RULE) {
- const Rule &r = dfa.rules[s->action.info.rule];
- for (size_t t = r.ltag; t < r.htag; ++t) {
- if (t > r.ltag) fprintf(stderr, " ");
- const std::string *name = dfa.tags[t].name;
- fprintf(stderr, "%s(%d)",
- name ? name->c_str() : "/", dfa.finvers[t]);
- }
- }
- dump_tcmd(dfa.tcpool[s->go.tags]);
- fprintf(stderr, "\" %s]\n", attr);
+ if (action == Action::ACCEPT) {
+ attr = "style=filled fillcolor=gray";
+ } else if (action == Action::RULE) {
+ attr = "style=filled fillcolor=lightgray";
+ } else {
+ attr = "";
+ }
+ fprintf(stderr, " n%p [height=0.2 width=0.2 label=\"", (void*)s);
+ if (s->fill && action != Action::MOVE) {
+ fprintf(stderr, "F(%u) ", (uint32_t)s->fill);
+ }
+ if (action == Action::RULE) {
+ const Rule &r = dfa.rules[s->action.info.rule];
+ for (size_t t = r.ltag; t < r.htag; ++t) {
+ if (t > r.ltag) fprintf(stderr, " ");
+ const std::string *name = dfa.tags[t].name;
+ fprintf(stderr, "%s(%d)",
+ name ? name->c_str() : "/", dfa.finvers[t]);
+ }
+ }
+ dump_tcmd(dfa.tcpool[s->go.tags]);
+ fprintf(stderr, "\" %s]\n", attr);
- if (action == Action::ACCEPT) {
- const accept_t &accept = *s->action.info.accepts;
- for (uint32_t i = 0; i < accept.size(); ++i) {
- fprintf(stderr, " n%p -> n%p [label=\"",
- (void*)s, (void*)accept[i].first);
- dump_tcmd(dfa.tcpool[accept[i].second]);
- fprintf(stderr, "\" style=dotted]\n");
- }
- }
+ if (action == Action::ACCEPT) {
+ const accept_t &accept = *s->action.info.accepts;
+ for (uint32_t i = 0; i < accept.size(); ++i) {
+ fprintf(stderr, " n%p -> n%p [label=\"",
+ (void*)s, (void*)accept[i].first);
+ dump_tcmd(dfa.tcpool[accept[i].second]);
+ fprintf(stderr, "\" style=dotted]\n");
+ }
+ }
- const Span *x = s->go.span, *e = x + s->go.nSpans;
- for (uint32_t lb = 0; x < e; lb = x->ub, ++x) {
- if (!x->to) continue;
+ const Span *x = s->go.span, *e = x + s->go.nSpans;
+ for (uint32_t lb = 0; x < e; lb = x->ub, ++x) {
+ if (!x->to) continue;
- bool eat = true;
- const Action::type_t act = x->to->action.type;
- if (act == Action::MOVE || act == Action::RULE) {
- attr = "style=dotted";
- eat = false;
- } else {
- attr = "";
- }
- fprintf(stderr, " n%p -> n%p [label=\"", (void*)s, (void*)x->to);
- if (eat) dump_adfa_range(lb, x->ub);
- dump_tcmd(dfa.tcpool[x->tags]);
- fprintf(stderr, "\" %s]\n", attr);
- }
- }
+ bool eat = true;
+ const Action::type_t act = x->to->action.type;
+ if (act == Action::MOVE || act == Action::RULE) {
+ attr = "style=dotted";
+ eat = false;
+ } else {
+ attr = "";
+ }
+ fprintf(stderr, " n%p -> n%p [label=\"", (void*)s, (void*)x->to);
+ if (eat) dump_adfa_range(lb, x->ub);
+ dump_tcmd(dfa.tcpool[x->tags]);
+ fprintf(stderr, "\" %s]\n", attr);
+ }
+ }
- fprintf(stderr, "}\n");
+ fprintf(stderr, "}\n");
}
} // namespace re2c
void DFA::split(State *s)
{
- State *move = new State;
- addState(move, s);
- move->action.set_move ();
- move->rule = s->rule;
- move->fill = s->fill; /* used by tunneling, ignored by codegen */
- move->go = s->go;
- move->go.tags = TCID0; /* drop hoisted tags */
- move->rule_tags = s->rule_tags;
- move->fall_tags = s->fall_tags;
- s->rule = Rule::NONE;
- s->go.nSpans = 1;
- s->go.span = allocate<Span> (1);
- s->go.span[0].ub = ubChar;
- s->go.span[0].to = move;
- s->go.span[0].tags = TCID0;
+ State *move = new State;
+ addState(move, s);
+ move->action.set_move ();
+ move->rule = s->rule;
+ move->fill = s->fill; /* used by tunneling, ignored by codegen */
+ move->go = s->go;
+ move->go.tags = TCID0; /* drop hoisted tags */
+ move->rule_tags = s->rule_tags;
+ move->fall_tags = s->fall_tags;
+ s->rule = Rule::NONE;
+ s->go.nSpans = 1;
+ s->go.span = allocate<Span> (1);
+ s->go.span[0].ub = ubChar;
+ s->go.span[0].to = move;
+ s->go.span[0].tags = TCID0;
}
static uint32_t merge(Span *x, State *fg, State *bg)
{
- Span *f = fg->go.span;
- Span *b = bg->go.span;
- Span *const fe = f + fg->go.nSpans;
- Span *const be = b + bg->go.nSpans;
- Span *const x0 = x;
-
- for (;!(f == fe && b == be);) {
- if (f->to == b->to && f->tags == b->tags) {
- x->to = bg;
- x->tags = TCID0;
- } else {
- x->to = f->to;
- x->tags = f->tags;
- }
- if (x == x0
- || x[-1].to != x->to
- || x[-1].tags != x->tags) {
- ++x;
- }
- x[-1].ub = std::min(f->ub, b->ub);
-
- if (f->ub < b->ub) {
- ++f;
- } else if (f->ub > b->ub) {
- ++b;
- } else {
- ++f;
- ++b;
- }
- }
-
- return static_cast<uint32_t>(x - x0);
+ Span *f = fg->go.span;
+ Span *b = bg->go.span;
+ Span *const fe = f + fg->go.nSpans;
+ Span *const be = b + bg->go.nSpans;
+ Span *const x0 = x;
+
+ for (;!(f == fe && b == be);) {
+ if (f->to == b->to && f->tags == b->tags) {
+ x->to = bg;
+ x->tags = TCID0;
+ } else {
+ x->to = f->to;
+ x->tags = f->tags;
+ }
+ if (x == x0
+ || x[-1].to != x->to
+ || x[-1].tags != x->tags) {
+ ++x;
+ }
+ x[-1].ub = std::min(f->ub, b->ub);
+
+ if (f->ub < b->ub) {
+ ++f;
+ } else if (f->ub > b->ub) {
+ ++b;
+ } else {
+ ++f;
+ ++b;
+ }
+ }
+
+ return static_cast<uint32_t>(x - x0);
}
void DFA::findBaseState()
{
- Span *span = allocate<Span> (ubChar - lbChar);
-
- for (State *s = head; s; s = s->next)
- {
- if (s->fill == 0)
- {
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- State *to = s->go.span[i].to;
-
- if (to->isBase)
- {
- to = to->go.span[0].to;
- uint32_t nSpans = merge(span, s, to);
-
- if (nSpans < s->go.nSpans)
- {
- operator delete (s->go.span);
- s->go.nSpans = nSpans;
- s->go.span = allocate<Span> (nSpans);
- memcpy(s->go.span, span, nSpans*sizeof(Span));
- break;
- }
- }
- }
- }
- }
-
- operator delete (span);
+ Span *span = allocate<Span> (ubChar - lbChar);
+
+ for (State *s = head; s; s = s->next)
+ {
+ if (s->fill == 0)
+ {
+ for (uint32_t i = 0; i < s->go.nSpans; ++i)
+ {
+ State *to = s->go.span[i].to;
+
+ if (to->isBase)
+ {
+ to = to->go.span[0].to;
+ uint32_t nSpans = merge(span, s, to);
+
+ if (nSpans < s->go.nSpans)
+ {
+ operator delete (s->go.span);
+ s->go.nSpans = nSpans;
+ s->go.span = allocate<Span> (nSpans);
+ memcpy(s->go.span, span, nSpans*sizeof(Span));
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ operator delete (span);
}
/* note [tag hoisting, skip hoisting and tunneling]
void DFA::prepare(const opt_t *opts)
{
- // create rule states
- std::vector<State*> rule2state(rules.size());
- for (State *s = head; s; s = s->next) {
- if (s->rule != Rule::NONE) {
- if (!rule2state[s->rule]) {
- State *n = new State;
- n->action.set_rule(s->rule);
- rule2state[s->rule] = n;
- addState(n, s);
- }
- for (uint32_t i = 0; i < s->go.nSpans; ++i) {
- if (!s->go.span[i].to) {
- s->go.span[i].to = rule2state[s->rule];
- s->go.span[i].tags = s->rule_tags;
- }
- }
- }
- }
-
- // create default state (if needed)
- State * default_state = NULL;
- for (State * s = head; s; s = s->next)
- {
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- if (!s->go.span[i].to)
- {
- if (!default_state)
- {
- default_state = new State;
- addState(default_state, s);
- }
- s->go.span[i].to = default_state;
- }
- }
- }
-
- // bind save actions to fallback states and create accept state (if needed)
- if (default_state) {
- for (State *s = head; s; s = s->next) {
- if (s->fallback) {
- const std::pair<const State*, tcid_t> acc(rule2state[s->rule], s->fall_tags);
- s->action.set_save(accepts.find_or_add(acc));
- }
- }
- default_state->action.set_accept(&accepts);
- }
-
- // tag hoisting should be done after binding default arcs:
- // (which may introduce new tags)
- // see note [tag hoisting, skip hoisting and tunneling]
- if (!opts->eager_skip) {
- hoist_tags();
- }
-
- // split ``base'' states into two parts
- for (State * s = head; s; s = s->next)
- {
- s->isBase = false;
-
- if (s->fill != 0)
- {
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- if (s->go.span[i].to == s)
- {
- s->isBase = true;
- split(s);
-
- if (opts->bFlag) {
- bitmaps.insert(&s->next->go, s);
- }
-
- s = s->next;
- break;
- }
- }
- }
- }
- // find ``base'' state, if possible
- findBaseState();
-
- // see note [tag hoisting, skip hoisting and tunneling]
- if (opts->eager_skip) {
- hoist_tags_and_skip(opts);
- }
-
- for (State *s = head; s; s = s->next) {
- s->go.init(s, opts, bitmaps);
- }
+ // create rule states
+ std::vector<State*> rule2state(rules.size());
+ for (State *s = head; s; s = s->next) {
+ if (s->rule != Rule::NONE) {
+ if (!rule2state[s->rule]) {
+ State *n = new State;
+ n->action.set_rule(s->rule);
+ rule2state[s->rule] = n;
+ addState(n, s);
+ }
+ for (uint32_t i = 0; i < s->go.nSpans; ++i) {
+ if (!s->go.span[i].to) {
+ s->go.span[i].to = rule2state[s->rule];
+ s->go.span[i].tags = s->rule_tags;
+ }
+ }
+ }
+ }
+
+ // create default state (if needed)
+ State * default_state = NULL;
+ for (State * s = head; s; s = s->next)
+ {
+ for (uint32_t i = 0; i < s->go.nSpans; ++i)
+ {
+ if (!s->go.span[i].to)
+ {
+ if (!default_state)
+ {
+ default_state = new State;
+ addState(default_state, s);
+ }
+ s->go.span[i].to = default_state;
+ }
+ }
+ }
+
+ // bind save actions to fallback states and create accept state (if needed)
+ if (default_state) {
+ for (State *s = head; s; s = s->next) {
+ if (s->fallback) {
+ const std::pair<const State*, tcid_t> acc(rule2state[s->rule], s->fall_tags);
+ s->action.set_save(accepts.find_or_add(acc));
+ }
+ }
+ default_state->action.set_accept(&accepts);
+ }
+
+ // tag hoisting should be done after binding default arcs:
+ // (which may introduce new tags)
+ // see note [tag hoisting, skip hoisting and tunneling]
+ if (!opts->eager_skip) {
+ hoist_tags();
+ }
+
+ // split ``base'' states into two parts
+ for (State * s = head; s; s = s->next)
+ {
+ s->isBase = false;
+
+ if (s->fill != 0)
+ {
+ for (uint32_t i = 0; i < s->go.nSpans; ++i)
+ {
+ if (s->go.span[i].to == s)
+ {
+ s->isBase = true;
+ split(s);
+
+ if (opts->bFlag) {
+ bitmaps.insert(&s->next->go, s);
+ }
+
+ s = s->next;
+ break;
+ }
+ }
+ }
+ }
+ // find ``base'' state, if possible
+ findBaseState();
+
+ // see note [tag hoisting, skip hoisting and tunneling]
+ if (opts->eager_skip) {
+ hoist_tags_and_skip(opts);
+ }
+
+ for (State *s = head; s; s = s->next) {
+ s->go.init(s, opts, bitmaps);
+ }
}
void DFA::calc_stats(uint32_t ln, bool explicit_tags)
{
- // calculate 'YYMAXFILL'
- max_fill = 0;
- for (State * s = head; s; s = s->next)
- {
- if (max_fill < s->fill)
- {
- max_fill = s->fill;
- }
- }
-
- // calculate 'YYMAXNMATCH'
- max_nmatch = 0;
- const size_t nrule = rules.size();
- for (size_t i = 0; i < nrule; ++i) {
- max_nmatch = std::max(max_nmatch, rules[i].ncap);
- }
-
- // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used
- need_backup = accepts.size () > 0;
-
- // determine if 'yyaccept' variable is used
- need_accept = accepts.size () > 1;
-
- // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used
- // If tags are not enabled explicitely and trailing contexts
- // don't overlap (single variable is enough for all of them), then
- // re2c should use old-style YYCTXMARKER for backwards compatibility.
- // Note that with generic API fixed-length contexts are forbidden,
- // which may cause additional overlaps.
- oldstyle_ctxmarker = !explicit_tags && maxtagver == 1;
-
- // error if tags are not enabled, but we need them
- if (!explicit_tags && maxtagver > 1) {
- fatal_l(ln, "overlapping trailing contexts need "
- "multiple context markers, use '-t, --tags' "
- "option and '/*!stags:re2c ... */' directive");
- }
+ // calculate 'YYMAXFILL'
+ max_fill = 0;
+ for (State * s = head; s; s = s->next)
+ {
+ if (max_fill < s->fill)
+ {
+ max_fill = s->fill;
+ }
+ }
+
+ // calculate 'YYMAXNMATCH'
+ max_nmatch = 0;
+ const size_t nrule = rules.size();
+ for (size_t i = 0; i < nrule; ++i) {
+ max_nmatch = std::max(max_nmatch, rules[i].ncap);
+ }
+
+ // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used
+ need_backup = accepts.size () > 0;
+
+ // determine if 'yyaccept' variable is used
+ need_accept = accepts.size () > 1;
+
+ // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used
+ // If tags are not enabled explicitely and trailing contexts
+ // don't overlap (single variable is enough for all of them), then
+ // re2c should use old-style YYCTXMARKER for backwards compatibility.
+ // Note that with generic API fixed-length contexts are forbidden,
+ // which may cause additional overlaps.
+ oldstyle_ctxmarker = !explicit_tags && maxtagver == 1;
+
+ // error if tags are not enabled, but we need them
+ if (!explicit_tags && maxtagver > 1) {
+ fatal_l(ln, "overlapping trailing contexts need "
+ "multiple context markers, use '-t, --tags' "
+ "option and '/*!stags:re2c ... */' directive");
+ }
}
void DFA::hoist_tags()
{
- for (State * s = head; s; s = s->next) {
- Span *span = s->go.span;
- const size_t nspan = s->go.nSpans;
- if (nspan == 0) continue;
-
- tcid_t ts = span[0].tags;
- for (uint32_t i = 1; i < nspan; ++i) {
- if (span[i].tags != ts) {
- ts = TCID0;
- break;
- }
- }
- if (ts != TCID0) {
- s->go.tags = ts;
- for (uint32_t i = 0; i < nspan; ++i) {
- span[i].tags = TCID0;
- }
- }
- }
+ for (State * s = head; s; s = s->next) {
+ Span *span = s->go.span;
+ const size_t nspan = s->go.nSpans;
+ if (nspan == 0) continue;
+
+ tcid_t ts = span[0].tags;
+ for (uint32_t i = 1; i < nspan; ++i) {
+ if (span[i].tags != ts) {
+ ts = TCID0;
+ break;
+ }
+ }
+ if (ts != TCID0) {
+ s->go.tags = ts;
+ for (uint32_t i = 0; i < nspan; ++i) {
+ span[i].tags = TCID0;
+ }
+ }
+ }
}
void DFA::hoist_tags_and_skip(const opt_t *opts)
{
- assert(opts->eager_skip);
-
- for (State * s = head; s; s = s->next) {
- Span *span = s->go.span;
- const size_t nspan = s->go.nSpans;
- if (nspan == 0) continue;
-
- bool hoist_tags = true, hoist_skip = true;
-
- // do all spans agree on tags?
- for (uint32_t i = 1; i < nspan; ++i) {
- if (span[i].tags != span[0].tags) {
- hoist_tags = false;
- break;
- }
- }
-
- // do all spans agree on skip?
- for (uint32_t i = 0; i < nspan; ++i) {
- if (consume(span[i].to) != consume(span[0].to)) {
- hoist_skip = false;
- break;
- }
- }
-
- if (opts->lookahead) {
- // skip must go after tags
- hoist_skip &= hoist_tags;
- } else {
- // skip must go before tags
- hoist_tags &= hoist_skip;
- }
-
- // hoisting tags is possible
- if (hoist_tags) {
- s->go.tags = span[0].tags;
- for (uint32_t i = 0; i < nspan; ++i) {
- span[i].tags = TCID0;
- }
- }
-
- // hoisting skip is possible
- s->go.skip = hoist_skip && consume(span[0].to);
- }
+ assert(opts->eager_skip);
+
+ for (State * s = head; s; s = s->next) {
+ Span *span = s->go.span;
+ const size_t nspan = s->go.nSpans;
+ if (nspan == 0) continue;
+
+ bool hoist_tags = true, hoist_skip = true;
+
+ // do all spans agree on tags?
+ for (uint32_t i = 1; i < nspan; ++i) {
+ if (span[i].tags != span[0].tags) {
+ hoist_tags = false;
+ break;
+ }
+ }
+
+ // do all spans agree on skip?
+ for (uint32_t i = 0; i < nspan; ++i) {
+ if (consume(span[i].to) != consume(span[0].to)) {
+ hoist_skip = false;
+ break;
+ }
+ }
+
+ if (opts->lookahead) {
+ // skip must go after tags
+ hoist_skip &= hoist_tags;
+ } else {
+ // skip must go before tags
+ hoist_tags &= hoist_skip;
+ }
+
+ // hoisting tags is possible
+ if (hoist_tags) {
+ s->go.tags = span[0].tags;
+ for (uint32_t i = 0; i < nspan; ++i) {
+ span[i].tags = TCID0;
+ }
+ }
+
+ // hoisting skip is possible
+ s->go.skip = hoist_skip && consume(span[0].to);
+ }
}
} // namespace re2c
const uint32_t AST::MANY = std::numeric_limits<uint32_t>::max();
AST::AST(uint32_t l, uint32_t c, type_t t)
- : type(t), line(l), column(c)
+ : type(t), line(l), column(c)
{
- flist.insert(this);
+ flist.insert(this);
}
AST::~AST()
{
- flist.erase(this);
- if (type == TAG) {
- delete tag.name;
- } else if (type == REF) {
- delete ref.name;
- } else if (type == STR) {
- delete str.chars;
- } else if (type == CLS) {
- delete cls.ranges;
- }
+ flist.erase(this);
+ if (type == TAG) {
+ delete tag.name;
+ } else if (type == REF) {
+ delete ref.name;
+ } else if (type == STR) {
+ delete str.chars;
+ } else if (type == CLS) {
+ delete cls.ranges;
+ }
}
const AST *ast_nil(uint32_t l, uint32_t c)
{
- return new AST(l, c, AST::NIL);
+ return new AST(l, c, AST::NIL);
}
const AST *ast_str(uint32_t l, uint32_t c, std::vector<ASTChar> *chars, bool icase)
{
- AST *ast = new AST(l, c, AST::STR);
- ast->str.chars = chars;
- ast->str.icase = icase;
- return ast;
+ AST *ast = new AST(l, c, AST::STR);
+ ast->str.chars = chars;
+ ast->str.icase = icase;
+ return ast;
}
const AST *ast_cls(uint32_t l, uint32_t c, std::vector<ASTRange> *ranges, bool negated)
{
- AST *ast = new AST(l, c, AST::CLS);
- ast->cls.ranges = ranges;
- ast->cls.negated = negated;
- return ast;
+ AST *ast = new AST(l, c, AST::CLS);
+ ast->cls.ranges = ranges;
+ ast->cls.negated = negated;
+ return ast;
}
const AST *ast_dot(uint32_t l, uint32_t c)
{
- return new AST(l, c, AST::DOT);
+ return new AST(l, c, AST::DOT);
}
const AST *ast_default(uint32_t l, uint32_t c)
{
- return new AST(l, c, AST::DEFAULT);
+ return new AST(l, c, AST::DEFAULT);
}
const AST *ast_alt(const AST *a1, const AST *a2)
{
- if (!a1) return a2;
- if (!a2) return a1;
- AST *ast = new AST(a1->line, a1->column, AST::ALT);
- ast->alt.ast1 = a1;
- ast->alt.ast2 = a2;
- return ast;
+ if (!a1) return a2;
+ if (!a2) return a1;
+ AST *ast = new AST(a1->line, a1->column, AST::ALT);
+ ast->alt.ast1 = a1;
+ ast->alt.ast2 = a2;
+ return ast;
}
const AST *ast_cat(const AST *a1, const AST *a2)
{
- if (!a1) return a2;
- if (!a2) return a1;
- AST *ast = new AST(a1->line, a1->column, AST::CAT);
- ast->cat.ast1 = a1;
- ast->cat.ast2 = a2;
- return ast;
+ if (!a1) return a2;
+ if (!a2) return a1;
+ AST *ast = new AST(a1->line, a1->column, AST::CAT);
+ ast->cat.ast1 = a1;
+ ast->cat.ast2 = a2;
+ return ast;
}
const AST *ast_iter(const AST *a, uint32_t n, uint32_t m)
{
- AST *ast = new AST(a->line, a->column, AST::ITER);
- ast->iter.ast = a;
- ast->iter.min = n;
- ast->iter.max = m;
- return ast;
+ AST *ast = new AST(a->line, a->column, AST::ITER);
+ ast->iter.ast = a;
+ ast->iter.min = n;
+ ast->iter.max = m;
+ return ast;
}
const AST *ast_diff(const AST *a1, const AST *a2)
{
- AST *ast = new AST(a1->line, a1->column, AST::DIFF);
- ast->cat.ast1 = a1;
- ast->cat.ast2 = a2;
- return ast;
+ AST *ast = new AST(a1->line, a1->column, AST::DIFF);
+ ast->cat.ast1 = a1;
+ ast->cat.ast2 = a2;
+ return ast;
}
const AST *ast_tag(uint32_t l, uint32_t c, const std::string *n, bool h)
{
- AST *ast = new AST(l, c, AST::TAG);
- ast->tag.name = n;
- ast->tag.history = h;
- return ast;
+ AST *ast = new AST(l, c, AST::TAG);
+ ast->tag.name = n;
+ ast->tag.history = h;
+ return ast;
}
const AST *ast_cap(const AST *a)
{
- AST *ast = new AST(a->line, a->column, AST::CAP);
- ast->cap = a;
- return ast;
+ AST *ast = new AST(a->line, a->column, AST::CAP);
+ ast->cap = a;
+ return ast;
}
const AST *ast_ref(const AST *a, const std::string &n)
{
- AST *ast = new AST(a->line, a->column, AST::REF);
- ast->ref.ast = a;
- ast->ref.name = new std::string(n);
- return ast;
+ AST *ast = new AST(a->line, a->column, AST::REF);
+ ast->ref.ast = a;
+ ast->ref.name = new std::string(n);
+ return ast;
}
bool ast_need_wrap(const AST *a)
{
- switch (a->type) {
- case AST::ITER:
- case AST::NIL:
- case AST::STR:
- case AST::CLS:
- case AST::DOT:
- case AST::DEFAULT:
- case AST::TAG:
- case AST::CAP:
- return false;
- case AST::ALT:
- case AST::CAT:
- case AST::DIFF:
- case AST::REF:
- return true;
- }
- return false; /* unreachable */
+ switch (a->type) {
+ case AST::ITER:
+ case AST::NIL:
+ case AST::STR:
+ case AST::CLS:
+ case AST::DOT:
+ case AST::DEFAULT:
+ case AST::TAG:
+ case AST::CAP:
+ return false;
+ case AST::ALT:
+ case AST::CAT:
+ case AST::DIFF:
+ case AST::REF:
+ return true;
+ }
+ return false; /* unreachable */
}
} // namespace re2c
struct ASTChar
{
- uint32_t chr;
- uint32_t column;
- ASTChar(uint32_t x, uint32_t c)
- : chr(x), column(c) {}
+ uint32_t chr;
+ uint32_t column;
+ ASTChar(uint32_t x, uint32_t c)
+ : chr(x), column(c) {}
};
struct ASTRange
{
- uint32_t lower;
- uint32_t upper;
- uint32_t column;
- ASTRange(uint32_t l, uint32_t u, uint32_t c)
- : lower(l), upper(u), column(c) {}
+ uint32_t lower;
+ uint32_t upper;
+ uint32_t column;
+ ASTRange(uint32_t l, uint32_t u, uint32_t c)
+ : lower(l), upper(u), column(c) {}
};
/* AST must be immutable and independent of options */
struct AST
{
- static free_list<AST*> flist;
- static const uint32_t MANY;
-
- enum type_t
- { NIL, STR, CLS, DOT, DEFAULT, ALT
- , CAT, ITER, DIFF, TAG, CAP, REF } type;
- union {
- struct {
- const std::vector<ASTChar> *chars;
- bool icase;
- } str;
- struct {
- const std::vector<ASTRange> *ranges;
- bool negated;
- } cls;
- struct {
- const AST *ast1;
- const AST *ast2;
- } alt;
- struct {
- const AST *ast1;
- const AST *ast2;
- } cat;
- struct {
- const AST *ast;
- uint32_t min;
- uint32_t max;
- } iter;
- struct {
- const AST *ast1;
- const AST *ast2;
- } diff;
- struct {
- const std::string *name;
- bool history;
- } tag;
- const AST *cap;
- struct {
- const AST *ast;
- const std::string *name;
- } ref;
- };
- uint32_t line;
- uint32_t column;
-
- AST(uint32_t l, uint32_t c, type_t t);
- ~AST();
+ static free_list<AST*> flist;
+ static const uint32_t MANY;
+
+ enum type_t
+ { NIL, STR, CLS, DOT, DEFAULT, ALT
+ , CAT, ITER, DIFF, TAG, CAP, REF } type;
+ union {
+ struct {
+ const std::vector<ASTChar> *chars;
+ bool icase;
+ } str;
+ struct {
+ const std::vector<ASTRange> *ranges;
+ bool negated;
+ } cls;
+ struct {
+ const AST *ast1;
+ const AST *ast2;
+ } alt;
+ struct {
+ const AST *ast1;
+ const AST *ast2;
+ } cat;
+ struct {
+ const AST *ast;
+ uint32_t min;
+ uint32_t max;
+ } iter;
+ struct {
+ const AST *ast1;
+ const AST *ast2;
+ } diff;
+ struct {
+ const std::string *name;
+ bool history;
+ } tag;
+ const AST *cap;
+ struct {
+ const AST *ast;
+ const std::string *name;
+ } ref;
+ };
+ uint32_t line;
+ uint32_t column;
+
+ AST(uint32_t l, uint32_t c, type_t t);
+ ~AST();
};
struct ASTRule
{
- const AST *ast;
- const Code *code;
+ const AST *ast;
+ const Code *code;
- ASTRule(const AST *r, const Code *c)
- : ast(r)
- , code(c)
- {}
+ ASTRule(const AST *r, const Code *c)
+ : ast(r)
+ , code(c)
+ {}
};
struct ASTBounds
{
- uint32_t min;
- uint32_t max;
+ uint32_t min;
+ uint32_t max;
};
struct spec_t
{
- std::string name;
- std::vector<ASTRule> rules;
- std::vector<const Code*> defs;
- std::vector<const Code*> setup;
+ std::string name;
+ std::vector<ASTRule> rules;
+ std::vector<const Code*> defs;
+ std::vector<const Code*> setup;
- explicit spec_t(const std::string &n):
- name(n), rules(), defs(), setup() {}
+ explicit spec_t(const std::string &n):
+ name(n), rules(), defs(), setup() {}
};
typedef std::vector<spec_t> specs_t;
namespace re2c {
Input::Input (const char * fn)
- : file (NULL)
- , file_name (fn)
- , escaped_file_name (fn)
+ : file (NULL)
+ , file_name (fn)
+ , escaped_file_name (fn)
{
- strrreplace (escaped_file_name, "\\", "\\\\");
+ strrreplace (escaped_file_name, "\\", "\\\\");
}
bool Input::open ()
{
- if (file_name == "<stdin>")
- {
- file = stdin;
- }
- else
- {
- file = fopen (file_name.c_str (), "rb");
- }
- return file != NULL;
+ if (file_name == "<stdin>")
+ {
+ file = stdin;
+ }
+ else
+ {
+ file = fopen (file_name.c_str (), "rb");
+ }
+ return file != NULL;
}
Input::~Input ()
{
- if (file != NULL && file != stdin)
- {
- fclose (file);
- }
+ if (file != NULL && file != stdin)
+ {
+ fclose (file);
+ }
}
} // namespace re2c
struct Input
{
- FILE * file;
- const std::string file_name;
- std::string escaped_file_name;
+ FILE * file;
+ const std::string file_name;
+ std::string escaped_file_name;
- explicit Input (const char * fn);
- ~Input ();
- bool open ();
+ explicit Input (const char * fn);
+ ~Input ();
+ bool open ();
- FORBID_COPY (Input);
+ FORBID_COPY (Input);
};
} // namespace re2c
void normalize_ast(specs_t &specs)
{
- specs_t::iterator i, b = specs.begin(), e = specs.end();
-
- // merge <*> rules and <!*> setup to all conditions except "0"
- // star rules must have lower priority than normal rules
- for (i = b; i != e && i->name != "*"; ++i);
- if (i != e) {
- const specs_t::iterator star = i;
-
- for (i = b; i != e; ++i) {
- if (i == star || i->name == "0") continue;
-
- i->rules.insert(i->rules.end(), star->rules.begin(), star->rules.end());
- i->defs.insert(i->defs.end(), star->defs.begin(), star->defs.end());
- i->setup.insert(i->setup.end(), star->setup.begin(), star->setup.end());
- }
-
- specs.erase(star);
- e = specs.end();
- }
-
- // merge default rule with the lowest priority
- for (i = b; i != e; ++i) {
- if (!i->defs.empty()) {
- const Code *c = i->defs[0];
- const AST *r = ast_default(c->fline, 0);
- i->rules.push_back(ASTRule(r, c));
- }
- }
-
- // "0" condition must be the first one
- for (i = b; i != e && i->name != "0"; ++i);
- if (i != e && i != b) {
- const spec_t zero = *i;
- specs.erase(i);
- specs.insert(specs.begin(), zero);
- }
+ specs_t::iterator i, b = specs.begin(), e = specs.end();
+
+ // merge <*> rules and <!*> setup to all conditions except "0"
+ // star rules must have lower priority than normal rules
+ for (i = b; i != e && i->name != "*"; ++i);
+ if (i != e) {
+ const specs_t::iterator star = i;
+
+ for (i = b; i != e; ++i) {
+ if (i == star || i->name == "0") continue;
+
+ i->rules.insert(i->rules.end(), star->rules.begin(), star->rules.end());
+ i->defs.insert(i->defs.end(), star->defs.begin(), star->defs.end());
+ i->setup.insert(i->setup.end(), star->setup.begin(), star->setup.end());
+ }
+
+ specs.erase(star);
+ e = specs.end();
+ }
+
+ // merge default rule with the lowest priority
+ for (i = b; i != e; ++i) {
+ if (!i->defs.empty()) {
+ const Code *c = i->defs[0];
+ const AST *r = ast_default(c->fline, 0);
+ i->rules.push_back(ASTRule(r, c));
+ }
+ }
+
+ // "0" condition must be the first one
+ for (i = b; i != e && i->name != "0"; ++i);
+ if (i != e && i != b) {
+ const spec_t zero = *i;
+ specs.erase(i);
+ specs.insert(specs.begin(), zero);
+ }
}
} // namespace re2c
struct context_t
{
- Scanner &input;
- specs_t &specs;
- symtab_t &symtab;
- Opt &opts;
+ Scanner &input;
+ specs_t &specs;
+ symtab_t &symtab;
+ Opt &opts;
};
void parse(Scanner &input, specs_t &specs, symtab_t &symtab, Opt &opts);
const uint32_t Scanner::BSIZE = 8192;
ScannerState::ScannerState ()
- : tok (NULL)
- , ptr (NULL)
- , cur (NULL)
- , mar (NULL)
- , pos (NULL)
- , ctx (NULL)
- , bot (NULL)
- , lim (NULL)
- , top (NULL)
- , eof (NULL)
- , tchar (0)
- , cline (1)
- , lexer_state (LEX_NORMAL)
+ : tok (NULL)
+ , ptr (NULL)
+ , cur (NULL)
+ , mar (NULL)
+ , pos (NULL)
+ , ctx (NULL)
+ , bot (NULL)
+ , lim (NULL)
+ , top (NULL)
+ , eof (NULL)
+ , tchar (0)
+ , cline (1)
+ , lexer_state (LEX_NORMAL)
{}
Scanner::Scanner(Input &i, Warn &w)
- : ScannerState(), in(i), warn(w) {}
+ : ScannerState(), in(i), warn(w) {}
void Scanner::fill (uint32_t need)
{
- if(!eof)
- {
- /* Get rid of everything that was already parsed. */
- const ptrdiff_t diff = tok - bot;
- if (diff > 0)
- {
- const size_t move = static_cast<size_t> (top - tok);
- memmove (bot, tok, move);
- tok -= diff;
- mar -= diff;
- ptr -= diff;
- cur -= diff;
- pos -= diff;
- lim -= diff;
- ctx -= diff;
- }
- /* Increase buffer size. */
- if (BSIZE > need)
- {
- need = BSIZE;
- }
- if (static_cast<uint32_t> (top - lim) < need)
- {
- const size_t copy = static_cast<size_t> (lim - bot);
- char * buf = new char[copy + need];
- if (!buf)
- {
- fatal("Out of memory");
- }
- if (copy > 0) {
- memcpy (buf, bot, copy);
- }
- tok = &buf[tok - bot];
- mar = &buf[mar - bot];
- ptr = &buf[ptr - bot];
- cur = &buf[cur - bot];
- pos = &buf[pos - bot];
- lim = &buf[lim - bot];
- top = &lim[need];
- ctx = &buf[ctx - bot];
- delete [] bot;
- bot = buf;
- }
- /* Append to buffer. */
- const size_t have = fread (lim, 1, need, in.file);
- if (have != need)
- {
- eof = &lim[have];
- *eof++ = '\0';
- }
- lim += have;
- }
+ if(!eof)
+ {
+ /* Get rid of everything that was already parsed. */
+ const ptrdiff_t diff = tok - bot;
+ if (diff > 0)
+ {
+ const size_t move = static_cast<size_t> (top - tok);
+ memmove (bot, tok, move);
+ tok -= diff;
+ mar -= diff;
+ ptr -= diff;
+ cur -= diff;
+ pos -= diff;
+ lim -= diff;
+ ctx -= diff;
+ }
+ /* Increase buffer size. */
+ if (BSIZE > need)
+ {
+ need = BSIZE;
+ }
+ if (static_cast<uint32_t> (top - lim) < need)
+ {
+ const size_t copy = static_cast<size_t> (lim - bot);
+ char * buf = new char[copy + need];
+ if (!buf)
+ {
+ fatal("Out of memory");
+ }
+ if (copy > 0) {
+ memcpy (buf, bot, copy);
+ }
+ tok = &buf[tok - bot];
+ mar = &buf[mar - bot];
+ ptr = &buf[ptr - bot];
+ cur = &buf[cur - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[need];
+ ctx = &buf[ctx - bot];
+ delete [] bot;
+ bot = buf;
+ }
+ /* Append to buffer. */
+ const size_t have = fread (lim, 1, need, in.file);
+ if (have != need)
+ {
+ eof = &lim[have];
+ *eof++ = '\0';
+ }
+ lim += have;
+ }
}
Scanner::~Scanner()
{
- delete [] bot;
+ delete [] bot;
}
} // namespace re2c
struct ScannerState
{
- enum lexer_state_t
- {
- LEX_NORMAL,
- LEX_FLEX_NAME
- };
-
- // positioning
- char * tok;
- char * ptr;
- char * cur;
- char * mar;
- char * pos;
- char * ctx;
-
- // buffer
- char * bot;
- char * lim;
- char * top;
- char * eof;
-
- ptrdiff_t tchar;
- uint32_t cline;
-
- lexer_state_t lexer_state;
-
- ScannerState();
- FORBID_COPY(ScannerState);
+ enum lexer_state_t
+ {
+ LEX_NORMAL,
+ LEX_FLEX_NAME
+ };
+
+ // positioning
+ char * tok;
+ char * ptr;
+ char * cur;
+ char * mar;
+ char * pos;
+ char * ctx;
+
+ // buffer
+ char * bot;
+ char * lim;
+ char * top;
+ char * eof;
+
+ ptrdiff_t tchar;
+ uint32_t cline;
+
+ lexer_state_t lexer_state;
+
+ ScannerState();
+ FORBID_COPY(ScannerState);
};
class Scanner: private ScannerState
{
- static const uint32_t BSIZE;
- Input & in;
- Warn &warn;
-
- void fill(uint32_t need);
- void lex_end_of_comment(OutputFile &out);
- void lex_tags(OutputFile &out, bool mtags);
- void set_sourceline ();
- uint32_t lex_cls_chr();
- uint32_t lex_str_chr(char quote, bool &end);
- const AST *lex_cls(bool neg);
- const AST *lex_str(char quote);
- void lex_conf_encoding_policy(Opt &opts);
- void lex_conf_input(Opt &opts);
- void lex_conf_empty_class(Opt &opts);
- void lex_conf_dfa_minimization(Opt &opts);
- void lex_conf_enc(Enc::type_t enc, Opt &opts);
- void lex_conf_assign();
- void lex_conf_semicolon();
- int32_t lex_conf_number();
- bool lex_conf_bool();
- std::string lex_conf_string();
- size_t tok_len () const;
+ static const uint32_t BSIZE;
+ Input & in;
+ Warn &warn;
+
+ void fill(uint32_t need);
+ void lex_end_of_comment(OutputFile &out);
+ void lex_tags(OutputFile &out, bool mtags);
+ void set_sourceline ();
+ uint32_t lex_cls_chr();
+ uint32_t lex_str_chr(char quote, bool &end);
+ const AST *lex_cls(bool neg);
+ const AST *lex_str(char quote);
+ void lex_conf_encoding_policy(Opt &opts);
+ void lex_conf_input(Opt &opts);
+ void lex_conf_empty_class(Opt &opts);
+ void lex_conf_dfa_minimization(Opt &opts);
+ void lex_conf_enc(Enc::type_t enc, Opt &opts);
+ void lex_conf_assign();
+ void lex_conf_semicolon();
+ int32_t lex_conf_number();
+ bool lex_conf_bool();
+ std::string lex_conf_string();
+ size_t tok_len () const;
public:
- enum ParseMode {Stop, Parse, Reuse, Rules};
-
- Scanner(Input&, Warn &w);
- ~Scanner();
- ParseMode echo(OutputFile &out);
- int scan(const conopt_t *globopts);
- void lex_conf(Opt &opts);
- uint32_t get_cline() const;
- uint32_t get_column() const;
- const std::string & get_fname () const;
- FORBID_COPY (Scanner);
+ enum ParseMode {Stop, Parse, Reuse, Rules};
+
+ Scanner(Input&, Warn &w);
+ ~Scanner();
+ ParseMode echo(OutputFile &out);
+ int scan(const conopt_t *globopts);
+ void lex_conf(Opt &opts);
+ uint32_t get_cline() const;
+ uint32_t get_column() const;
+ const std::string & get_fname () const;
+ FORBID_COPY (Scanner);
};
inline size_t Scanner::tok_len () const
{
- // lexing and fill procedures must maintain: token pointer <= cursor pointer
- return static_cast<size_t> (cur - tok);
+ // lexing and fill procedures must maintain: token pointer <= cursor pointer
+ return static_cast<size_t> (cur - tok);
}
inline const std::string & Scanner::get_fname () const
{
- return in.escaped_file_name;
+ return in.escaped_file_name;
}
inline uint32_t Scanner::get_cline() const
{
- return cline;
+ return cline;
}
inline uint32_t Scanner::get_column() const
{
- return static_cast<uint32_t>(tok - pos);
+ return static_cast<uint32_t>(tok - pos);
}
} // end namespace re2c
// expected characters: [0-9a-zA-Z]
static inline uint32_t hex_digit (const char c)
{
- switch (c)
- {
- case '0': return 0;
- case '1': return 1;
- case '2': return 2;
- case '3': return 3;
- case '4': return 4;
- case '5': return 5;
- case '6': return 6;
- case '7': return 7;
- case '8': return 8;
- case '9': return 9;
- case 'a':
- case 'A': return 0xA;
- case 'b':
- case 'B': return 0xB;
- case 'c':
- case 'C': return 0xC;
- case 'd':
- case 'D': return 0xD;
- case 'e':
- case 'E': return 0xE;
- case 'f':
- case 'F': return 0xF;
- default: return ~0u; // unexpected
- }
+ switch (c)
+ {
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ case 'a':
+ case 'A': return 0xA;
+ case 'b':
+ case 'B': return 0xB;
+ case 'c':
+ case 'C': return 0xC;
+ case 'd':
+ case 'D': return 0xD;
+ case 'e':
+ case 'E': return 0xE;
+ case 'f':
+ case 'F': return 0xF;
+ default: return ~0u; // unexpected
+ }
}
// expected string format: "\" [xXuU] [0-9a-zA-Z]*
uint32_t unesc_hex (const char * s, const char * s_end)
{
- uint32_t n = 0;
- for (s += 2; s != s_end; ++s)
- {
- n <<= 4;
- n += hex_digit (*s);
- }
- return n;
+ uint32_t n = 0;
+ for (s += 2; s != s_end; ++s)
+ {
+ n <<= 4;
+ n += hex_digit (*s);
+ }
+ return n;
}
// expected string format: "\" [0-7]*
uint32_t unesc_oct (const char * s, const char * s_end)
{
- uint32_t n = 0;
- for (++s; s != s_end; ++s)
- {
- n <<= 3;
- n += static_cast<uint8_t> (*s - '0');
- }
- return n;
+ uint32_t n = 0;
+ for (++s; s != s_end; ++s)
+ {
+ n <<= 3;
+ n += static_cast<uint8_t> (*s - '0');
+ }
+ return n;
}
} // namespace re2c
void validate_mode(Scanner::ParseMode mode, bool rflag, bool rules, Scanner &input)
{
- const uint32_t l = input.get_cline();
- if (mode == Scanner::Rules) {
- if (!rflag) {
- fatal_l(l, "found 'rules:re2c' block without -r flag");
- } else if (rules) {
- fatal_l(l, "cannot have a second 'rules:re2c' block");
- }
- } else if (mode == Scanner::Reuse) {
- if (!rflag) {
- fatal_l(l, "found 'use:re2c' block without -r flag");
- } else if (!rules) {
- fatal_l(l, "got 'use:re2c' without 'rules:re2c'");
- }
- } else if (rflag) {
- fatal_l(l, "found standard 're2c' block while using -r flag");
- }
+ const uint32_t l = input.get_cline();
+ if (mode == Scanner::Rules) {
+ if (!rflag) {
+ fatal_l(l, "found 'rules:re2c' block without -r flag");
+ } else if (rules) {
+ fatal_l(l, "cannot have a second 'rules:re2c' block");
+ }
+ } else if (mode == Scanner::Reuse) {
+ if (!rflag) {
+ fatal_l(l, "found 'use:re2c' block without -r flag");
+ } else if (!rules) {
+ fatal_l(l, "got 'use:re2c' without 'rules:re2c'");
+ }
+ } else if (rflag) {
+ fatal_l(l, "found standard 're2c' block while using -r flag");
+ }
}
void validate_ast(const specs_t &specs, bool cflag)
{
- specs_t::const_iterator i,
- b = specs.begin(),
- e = specs.end();
+ specs_t::const_iterator i,
+ b = specs.begin(),
+ e = specs.end();
- for (i = b; i != e; ++i) {
- if (i->defs.size() > 1) {
- fatal_l(i->defs[1]->fline,
- "code to default rule %sis already defined at line %u",
- incond(i->name).c_str(), i->defs[0]->fline);
- }
- }
+ for (i = b; i != e; ++i) {
+ if (i->defs.size() > 1) {
+ fatal_l(i->defs[1]->fline,
+ "code to default rule %sis already defined at line %u",
+ incond(i->name).c_str(), i->defs[0]->fline);
+ }
+ }
- if (!cflag) {
- for (i = b; i != e; ++i) {
- if (i->name != "") {
- fatal_l(i->rules[0].code->fline,
- "conditions are only allowed with '-c', '--conditions' option");
- }
- }
- } else {
- for (i = b; i != e; ++i) {
- if (i->name == "") {
- fatal_l(i->rules[0].code->fline,
- "non-conditional rules are not allowed"
- " with '-c', '--conditions' option");
- }
- }
+ if (!cflag) {
+ for (i = b; i != e; ++i) {
+ if (i->name != "") {
+ fatal_l(i->rules[0].code->fline,
+ "conditions are only allowed with '-c', '--conditions' option");
+ }
+ }
+ } else {
+ for (i = b; i != e; ++i) {
+ if (i->name == "") {
+ fatal_l(i->rules[0].code->fline,
+ "non-conditional rules are not allowed"
+ " with '-c', '--conditions' option");
+ }
+ }
- for (i = b; i != e; ++i) {
- if (i->setup.size() > 1) {
- fatal_l(i->setup[1]->fline,
- "code to setup rule '%s' is already defined at line %u",
- i->name.c_str(), i->setup[0]->fline);
- }
- }
+ for (i = b; i != e; ++i) {
+ if (i->setup.size() > 1) {
+ fatal_l(i->setup[1]->fline,
+ "code to setup rule '%s' is already defined at line %u",
+ i->name.c_str(), i->setup[0]->fline);
+ }
+ }
- for (i = b; i != e; ++i) {
- if (i->name != "*" && !i->setup.empty() && i->rules.empty()) {
- fatal_l(i->setup[0]->fline,
- "setup for non existing condition '%s' found",
- i->name.c_str());
- }
- }
+ for (i = b; i != e; ++i) {
+ if (i->name != "*" && !i->setup.empty() && i->rules.empty()) {
+ fatal_l(i->setup[0]->fline,
+ "setup for non existing condition '%s' found",
+ i->name.c_str());
+ }
+ }
- for (i = b; i != e && !i->setup.empty(); ++i);
- if (i == e) {
- for (i = b; i != e; ++i) {
- if (i->name == "*") {
- fatal_l(i->setup[0]->fline,
- "setup for all conditions '<!*>' is illegal "
- "if setup for each condition is defined explicitly");
- }
- }
- }
+ for (i = b; i != e && !i->setup.empty(); ++i);
+ if (i == e) {
+ for (i = b; i != e; ++i) {
+ if (i->name == "*") {
+ fatal_l(i->setup[0]->fline,
+ "setup for all conditions '<!*>' is illegal "
+ "if setup for each condition is defined explicitly");
+ }
+ }
+ }
- for (i = b; i != e; ++i) {
- if (i->name == "0" && i->rules.size() > 1) {
- fatal_l(i->rules[1].code->fline,
- "startup code is already defined at line %u",
- i->rules[0].code->fline);
- }
- }
- }
+ for (i = b; i != e; ++i) {
+ if (i->name == "0" && i->rules.size() > 1) {
+ fatal_l(i->rules[1].code->fline,
+ "startup code is already defined at line %u",
+ i->rules[0].code->fline);
+ }
+ }
+ }
}
} // namespace re2c
static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m);
bitmaps_t::bitmaps_t(uint32_t n)
- : maps()
- , ncunit(n)
- , buffer(new uint32_t[ncunit])
- , used(false)
+ : maps()
+ , ncunit(n)
+ , buffer(new uint32_t[ncunit])
+ , used(false)
{}
bitmaps_t::~bitmaps_t()
{
- delete[] buffer;
+ delete[] buffer;
}
void bitmaps_t::insert(const Go *go, const State *s)
{
- rciter_t i = maps.rbegin(), e = maps.rend();
- for (; i != e; ++i) {
- if (matches(i->go, i->on, go, s)) return;
- }
+ rciter_t i = maps.rbegin(), e = maps.rend();
+ for (; i != e; ++i) {
+ if (matches(i->go, i->on, go, s)) return;
+ }
- bitmap_t b = {go, s, 0, 0};
- maps.push_back(b);
+ bitmap_t b = {go, s, 0, 0};
+ maps.push_back(b);
}
const bitmap_t *bitmaps_t::find(const Go *go, const State *s) const
{
- rciter_t i = maps.rbegin(), e = maps.rend();
- for (; i != e; ++i) {
- if (i->on == s && matches(i->go, i->on, go, s)) return &(*i);
- }
- return NULL;
+ rciter_t i = maps.rbegin(), e = maps.rend();
+ for (; i != e; ++i) {
+ if (i->on == s && matches(i->go, i->on, go, s)) return &(*i);
+ }
+ return NULL;
}
bool bitmaps_t::empty() const { return maps.empty(); }
void bitmaps_t::gen(OutputFile &o, uint32_t ind)
{
- if (empty() || !used) return;
-
- const opt_t *opts = o.block().opts;
- const uint32_t nmap = static_cast<uint32_t>(maps.size());
- riter_t b = maps.rbegin(), e = maps.rend();
-
- o.wind(ind).ws("static const unsigned char ")
- .wstring(opts->yybm).ws("[] = {");
-
- for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += 8) {
- memset(buffer, 0, ncunit * sizeof(uint32_t));
-
- for (uint32_t m = 0x80; b != e && m; m >>= 1, ++b) {
- b->i = i;
- b->m = m;
- doGen(b->go, b->on, buffer, 0, m);
- }
-
- if (nmap > 8) {
- o.ws("\n").wind(ind + 1).ws("/* table ").wu32(t).ws(" .. ")
- .wu32(std::min(nmap, t + 7)).ws(": ").wu32(i).ws(" */");
- }
-
- for (uint32_t c = 0; c < ncunit; ++c) {
- if (c % 8 == 0) {
- o.ws("\n").wind(ind + 1);
- }
- if (opts->yybmHexTable) {
- o.wu32_hex(buffer[c]);
- } else {
- o.wu32_width(buffer[c], 3);
- }
- o.ws(", ");
- }
- }
-
- o.ws("\n").wind(ind).ws("};\n");
+ if (empty() || !used) return;
+
+ const opt_t *opts = o.block().opts;
+ const uint32_t nmap = static_cast<uint32_t>(maps.size());
+ riter_t b = maps.rbegin(), e = maps.rend();
+
+ o.wind(ind).ws("static const unsigned char ")
+ .wstring(opts->yybm).ws("[] = {");
+
+ for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += 8) {
+ memset(buffer, 0, ncunit * sizeof(uint32_t));
+
+ for (uint32_t m = 0x80; b != e && m; m >>= 1, ++b) {
+ b->i = i;
+ b->m = m;
+ doGen(b->go, b->on, buffer, 0, m);
+ }
+
+ if (nmap > 8) {
+ o.ws("\n").wind(ind + 1).ws("/* table ").wu32(t).ws(" .. ")
+ .wu32(std::min(nmap, t + 7)).ws(": ").wu32(i).ws(" */");
+ }
+
+ for (uint32_t c = 0; c < ncunit; ++c) {
+ if (c % 8 == 0) {
+ o.ws("\n").wind(ind + 1);
+ }
+ if (opts->yybmHexTable) {
+ o.wu32_hex(buffer[c]);
+ } else {
+ o.wu32_width(buffer[c], 3);
+ }
+ o.ws(", ");
+ }
+ }
+
+ o.ws("\n").wind(ind).ws("};\n");
}
void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m)
{
- Span *b = g->span, *e = &b[g->nSpans];
- uint32_t lb = 0;
-
- for (; b < e; ++b)
- {
- if (b->to == s)
- {
- for (; lb < b->ub && lb < 256; ++lb)
- {
- bm[lb-f] |= m;
- }
- }
-
- lb = b->ub;
- }
+ Span *b = g->span, *e = &b[g->nSpans];
+ uint32_t lb = 0;
+
+ for (; b < e; ++b)
+ {
+ if (b->to == s)
+ {
+ for (; lb < b->ub && lb < 256; ++lb)
+ {
+ bm[lb-f] |= m;
+ }
+ }
+
+ lb = b->ub;
+ }
}
// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2
bool matches(const Go *go1, const State *s1, const Go *go2, const State *s2)
{
- const Span
- *b1 = go1->span, *e1 = &b1[go1->nSpans],
- *b2 = go2->span, *e2 = &b2[go2->nSpans];
- uint32_t lb1 = 0, lb2 = 0;
-
- for (;;)
- {
- for (; b1 < e1 && b1->to != s1; ++b1)
- {
- lb1 = b1->ub;
- }
- for (; b2 < e2 && b2->to != s2; ++b2)
- {
- lb2 = b2->ub;
- }
- if (b1 == e1)
- {
- return b2 == e2;
- }
- if (b2 == e2)
- {
- return false;
- }
- // tags are forbidden: transitions on different symbols
- // might go to the same state, but have different tag sets
- if (lb1 != lb2
- || b1->ub != b2->ub
- || b1->tags != TCID0
- || b2->tags != TCID0)
- {
- return false;
- }
- ++b1;
- ++b2;
- }
+ const Span
+ *b1 = go1->span, *e1 = &b1[go1->nSpans],
+ *b2 = go2->span, *e2 = &b2[go2->nSpans];
+ uint32_t lb1 = 0, lb2 = 0;
+
+ for (;;)
+ {
+ for (; b1 < e1 && b1->to != s1; ++b1)
+ {
+ lb1 = b1->ub;
+ }
+ for (; b2 < e2 && b2->to != s2; ++b2)
+ {
+ lb2 = b2->ub;
+ }
+ if (b1 == e1)
+ {
+ return b2 == e2;
+ }
+ if (b2 == e2)
+ {
+ return false;
+ }
+ // tags are forbidden: transitions on different symbols
+ // might go to the same state, but have different tag sets
+ if (lb1 != lb2
+ || b1->ub != b2->ub
+ || b1->tags != TCID0
+ || b2->tags != TCID0)
+ {
+ return false;
+ }
+ ++b1;
+ ++b2;
+ }
}
} // end namespace re2c
struct bitmap_t
{
- const Go *go;
- const State *on;
- uint32_t i;
- uint32_t m;
+ const Go *go;
+ const State *on;
+ uint32_t i;
+ uint32_t m;
};
class bitmaps_t
{
- typedef std::vector<bitmap_t> maps_t;
- typedef maps_t::reverse_iterator riter_t;
- typedef maps_t::const_reverse_iterator rciter_t;
+ typedef std::vector<bitmap_t> maps_t;
+ typedef maps_t::reverse_iterator riter_t;
+ typedef maps_t::const_reverse_iterator rciter_t;
- maps_t maps;
- uint32_t ncunit;
- uint32_t *buffer;
+ maps_t maps;
+ uint32_t ncunit;
+ uint32_t *buffer;
public:
- bool used;
-
- explicit bitmaps_t(uint32_t n);
- ~bitmaps_t();
- void insert(const Go *go, const State *s);
- const bitmap_t *find(const Go *go, const State *s) const;
- bool empty() const;
- void gen(OutputFile &o, uint32_t ind);
- FORBID_COPY(bitmaps_t);
+ bool used;
+
+ explicit bitmaps_t(uint32_t n);
+ ~bitmaps_t();
+ void insert(const Go *go, const State *s);
+ const bitmap_t *find(const Go *go, const State *s) const;
+ bool empty() const;
+ void gen(OutputFile &o, uint32_t ind);
+ FORBID_COPY(bitmaps_t);
};
} // namespace re2c
inline std::string indent(uint32_t n, const std::string &s)
{
- std::string ind;
- for (; n --> 0; ind += s);
- return ind;
+ std::string ind;
+ for (; n --> 0; ind += s);
+ return ind;
}
} // namespace re2c
static bool endstate (const State *s);
void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa,
- const State *s, const std::set<label_t> &used_labels)
+ const State *s, const std::set<label_t> &used_labels)
{
- const opt_t *opts = o.block().opts;
- switch (s->action.type) {
- case Action::MATCH:
- o.wdelay_skip(ind, !opts->eager_skip);
- need(o, ind, s->fill);
- o.wdelay_peek(ind, !endstate(s));
- break;
- case Action::INITIAL: {
- const Initial &init = *s->action.info.initial;
- const bool
- backup = init.save != Initial::NOSAVE,
- ul1 = used_labels.count(s->label);
- if (ul1 && dfa.accepts.size() > 1 && backup) {
- o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(init.save).ws(";\n");
- }
- o.wdelay_skip(ind, ul1 && !opts->eager_skip);
- if (used_labels.count(init.label)) {
- o.wstring(opts->labelPrefix).wlabel(init.label).wstring(":\n");
- }
- if (opts->dFlag) {
- o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(init.label)
- .ws(", *").wstring(opts->yycursor).ws(");\n");
- }
- need(o, ind, s->fill);
- o.wdelay_backup(ind, backup);
- o.wdelay_peek(ind, !endstate(s));
- break;
- }
- case Action::SAVE:
- if (dfa.accepts.size() > 1) {
- o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(s->action.info.save).ws(";\n");
- }
- o.wdelay_skip(ind, !opts->eager_skip);
- o.wdelay_backup(ind, true);
- need(o, ind, s->fill);
- o.wdelay_peek(ind, true);
- break;
- case Action::MOVE:
- break;
- case Action::ACCEPT:
- emit_accept(o, ind, dfa, *s->action.info.accepts);
- break;
- case Action::RULE:
- emit_rule(o, ind, dfa, s->action.info.rule);
- break;
- }
+ const opt_t *opts = o.block().opts;
+ switch (s->action.type) {
+ case Action::MATCH:
+ o.wdelay_skip(ind, !opts->eager_skip);
+ need(o, ind, s->fill);
+ o.wdelay_peek(ind, !endstate(s));
+ break;
+ case Action::INITIAL: {
+ const Initial &init = *s->action.info.initial;
+ const bool
+ backup = init.save != Initial::NOSAVE,
+ ul1 = used_labels.count(s->label);
+ if (ul1 && dfa.accepts.size() > 1 && backup) {
+ o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(init.save).ws(";\n");
+ }
+ o.wdelay_skip(ind, ul1 && !opts->eager_skip);
+ if (used_labels.count(init.label)) {
+ o.wstring(opts->labelPrefix).wlabel(init.label).wstring(":\n");
+ }
+ if (opts->dFlag) {
+ o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(init.label)
+ .ws(", *").wstring(opts->yycursor).ws(");\n");
+ }
+ need(o, ind, s->fill);
+ o.wdelay_backup(ind, backup);
+ o.wdelay_peek(ind, !endstate(s));
+ break;
+ }
+ case Action::SAVE:
+ if (dfa.accepts.size() > 1) {
+ o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(s->action.info.save).ws(";\n");
+ }
+ o.wdelay_skip(ind, !opts->eager_skip);
+ o.wdelay_backup(ind, true);
+ need(o, ind, s->fill);
+ o.wdelay_peek(ind, true);
+ break;
+ case Action::MOVE:
+ break;
+ case Action::ACCEPT:
+ emit_accept(o, ind, dfa, *s->action.info.accepts);
+ break;
+ case Action::RULE:
+ emit_rule(o, ind, dfa, s->action.info.rule);
+ break;
+ }
}
void emit_accept_binary(OutputFile &o, uint32_t ind, const DFA &dfa,
- const accept_t &acc, size_t l, size_t r)
+ const accept_t &acc, size_t l, size_t r)
{
- const opt_t *opts = o.block().opts;
- if (l < r) {
- const size_t m = (l + r) >> 1;
- o.wind(ind).ws("if (").wstring(opts->yyaccept)
- .ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n");
- emit_accept_binary (o, ++ind, dfa, acc, l, m);
- o.wind(--ind).ws("} else {\n");
- emit_accept_binary (o, ++ind, dfa, acc, m + 1, r);
- o.wind(--ind).ws("}\n");
- } else {
- gen_goto_plain(o, ind, acc[l].first, dfa, acc[l].second, false);
- }
+ const opt_t *opts = o.block().opts;
+ if (l < r) {
+ const size_t m = (l + r) >> 1;
+ o.wind(ind).ws("if (").wstring(opts->yyaccept)
+ .ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n");
+ emit_accept_binary (o, ++ind, dfa, acc, l, m);
+ o.wind(--ind).ws("} else {\n");
+ emit_accept_binary (o, ++ind, dfa, acc, m + 1, r);
+ o.wind(--ind).ws("}\n");
+ } else {
+ gen_goto_plain(o, ind, acc[l].first, dfa, acc[l].second, false);
+ }
}
void emit_accept(OutputFile &o, uint32_t ind, const DFA &dfa, const accept_t &acc)
{
- const opt_t *opts = o.block().opts;
- const size_t nacc = acc.size();
-
- if (nacc == 0) return;
-
- o.wstring(output_restore(ind, opts));
-
- // only one possible 'yyaccept' value: unconditional jump
- if (nacc == 1) {
- gen_goto_plain(o, ind, acc[0].first, dfa, acc[0].second, false);
- return;
- }
-
- bool have_tags = false;
- for (size_t i = 0; i < nacc; ++i) {
- if (acc[i].second != TCID0) {
- have_tags = true;
- break;
- }
- }
-
- // jump table
- if (opts->gFlag && nacc >= opts->cGotoThreshold && !have_tags) {
- o.wind(ind).ws("{\n")
- .wind(ind + 1).ws("static void *")
- .wstring(opts->yytarget).ws("[")
- .wu64(nacc).ws("] = {\n");
- for (uint32_t i = 0; i < nacc; ++i) {
- o.wind(ind + 2).ws("&&").wstring(opts->labelPrefix)
- .wlabel(acc[i].first->label).ws(",\n");
- }
- o.wind(ind + 1).ws("};\n")
- .wind(ind + 1).ws("goto *")
- .wstring(opts->yytarget).ws("[")
- .wstring(opts->yyaccept).ws("];\n")
- .wind(ind).ws("}\n");
- return;
- }
-
- // nested ifs
- if (opts->sFlag || nacc == 2) {
- emit_accept_binary(o, ind, dfa, acc, 0, nacc - 1);
- return;
- }
-
- // switch
- o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
- for (uint32_t i = 0; i < nacc - 1; ++i) {
- o.wind(ind).ws("case ").wu32(i).ws(": ");
- gen_goto_case(o, ind, acc[i].first, dfa, acc[i].second, false);
- }
- o.wind(ind).ws("default:");
- gen_goto_case(o, ind, acc[nacc - 1].first, dfa, acc[nacc - 1].second, false);
- o.wind(ind).ws("}\n");
+ const opt_t *opts = o.block().opts;
+ const size_t nacc = acc.size();
+
+ if (nacc == 0) return;
+
+ o.wstring(output_restore(ind, opts));
+
+ // only one possible 'yyaccept' value: unconditional jump
+ if (nacc == 1) {
+ gen_goto_plain(o, ind, acc[0].first, dfa, acc[0].second, false);
+ return;
+ }
+
+ bool have_tags = false;
+ for (size_t i = 0; i < nacc; ++i) {
+ if (acc[i].second != TCID0) {
+ have_tags = true;
+ break;
+ }
+ }
+
+ // jump table
+ if (opts->gFlag && nacc >= opts->cGotoThreshold && !have_tags) {
+ o.wind(ind).ws("{\n")
+ .wind(ind + 1).ws("static void *")
+ .wstring(opts->yytarget).ws("[")
+ .wu64(nacc).ws("] = {\n");
+ for (uint32_t i = 0; i < nacc; ++i) {
+ o.wind(ind + 2).ws("&&").wstring(opts->labelPrefix)
+ .wlabel(acc[i].first->label).ws(",\n");
+ }
+ o.wind(ind + 1).ws("};\n")
+ .wind(ind + 1).ws("goto *")
+ .wstring(opts->yytarget).ws("[")
+ .wstring(opts->yyaccept).ws("];\n")
+ .wind(ind).ws("}\n");
+ return;
+ }
+
+ // nested ifs
+ if (opts->sFlag || nacc == 2) {
+ emit_accept_binary(o, ind, dfa, acc, 0, nacc - 1);
+ return;
+ }
+
+ // switch
+ o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
+ for (uint32_t i = 0; i < nacc - 1; ++i) {
+ o.wind(ind).ws("case ").wu32(i).ws(": ");
+ gen_goto_case(o, ind, acc[i].first, dfa, acc[i].second, false);
+ }
+ o.wind(ind).ws("default:");
+ gen_goto_case(o, ind, acc[nacc - 1].first, dfa, acc[nacc - 1].second, false);
+ o.wind(ind).ws("}\n");
}
void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx)
{
- const opt_t *opts = o.block().opts;
- const Rule &rule = dfa.rules[rule_idx];
- const Code *code = rule.code;
- const std::string &cond = code->cond;
- std::string s;
-
- gen_fintags(o, ind, dfa, rule);
-
- if (opts->target == TARGET_SKELETON) {
- emit_action(o, ind, dfa, rule_idx);
- } else {
- if (!cond.empty() && dfa.cond != cond) {
- strrreplace(s = opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + cond);
- o.wind(ind).wstring(s);
- if (!opts->cond_set_naked) {
- o.ws("(").wstring(opts->condEnumPrefix).wstring(cond).ws(");");
- }
- o.ws("\n");
- }
- if (!code->autogen) {
- if (!dfa.setup.empty()) {
- o.wind(ind).wstring(dfa.setup).ws("\n");
- }
- o.wdelay_line_info_input(code->fline, code->fname)
- .wind(ind).wstring(code->text).ws("\n")
- .wdelay_line_info_output();
- } else if (!cond.empty()) {
- strrreplace(s = opts->condGoto, opts->condGotoParam, opts->condPrefix + cond);
- o.wind(ind).wstring(s).ws("\n");
- }
- }
+ const opt_t *opts = o.block().opts;
+ const Rule &rule = dfa.rules[rule_idx];
+ const Code *code = rule.code;
+ const std::string &cond = code->cond;
+ std::string s;
+
+ gen_fintags(o, ind, dfa, rule);
+
+ if (opts->target == TARGET_SKELETON) {
+ emit_action(o, ind, dfa, rule_idx);
+ } else {
+ if (!cond.empty() && dfa.cond != cond) {
+ strrreplace(s = opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + cond);
+ o.wind(ind).wstring(s);
+ if (!opts->cond_set_naked) {
+ o.ws("(").wstring(opts->condEnumPrefix).wstring(cond).ws(");");
+ }
+ o.ws("\n");
+ }
+ if (!code->autogen) {
+ if (!dfa.setup.empty()) {
+ o.wind(ind).wstring(dfa.setup).ws("\n");
+ }
+ o.wdelay_line_info_input(code->fline, code->fname)
+ .wind(ind).wstring(code->text).ws("\n")
+ .wdelay_line_info_output();
+ } else if (!cond.empty()) {
+ strrreplace(s = opts->condGoto, opts->condGotoParam, opts->condPrefix + cond);
+ o.wind(ind).wstring(s).ws("\n");
+ }
+ }
}
void need(OutputFile &o, uint32_t ind, size_t some)
{
- if (some == 0) return;
-
- const opt_t *opts = o.block().opts;
- std::string s;
-
- if (opts->fFlag) {
- strrreplace(s = opts->state_set, opts->state_set_arg, o.fill_index);
- o.wind(ind).wstring(s);
- if (!opts->state_set_naked) {
- o.ws("(").wu32(o.fill_index).ws(");");
- }
- o.ws("\n");
- }
-
- if (opts->fill_use) {
- o.wind(ind);
- if (opts->fill_check) {
- o.ws("if (").wstring(output_expr_lessthan(some, opts)).ws(") ");
- }
- strrreplace(s = opts->fill, opts->fill_arg, some);
- o.wstring(s);
- if (!opts->fill_naked) {
- if (opts->fill_arg_use) {
- o.ws("(").wu64(some).ws(")");
- }
- o.ws(";");
- }
- o.ws("\n");
- }
-
- if (opts->fFlag) {
- o.wstring(opts->yyfilllabel).wu32(o.fill_index).ws(":\n");
- ++o.fill_index;
- }
+ if (some == 0) return;
+
+ const opt_t *opts = o.block().opts;
+ std::string s;
+
+ if (opts->fFlag) {
+ strrreplace(s = opts->state_set, opts->state_set_arg, o.fill_index);
+ o.wind(ind).wstring(s);
+ if (!opts->state_set_naked) {
+ o.ws("(").wu32(o.fill_index).ws(");");
+ }
+ o.ws("\n");
+ }
+
+ if (opts->fill_use) {
+ o.wind(ind);
+ if (opts->fill_check) {
+ o.ws("if (").wstring(output_expr_lessthan(some, opts)).ws(") ");
+ }
+ strrreplace(s = opts->fill, opts->fill_arg, some);
+ o.wstring(s);
+ if (!opts->fill_naked) {
+ if (opts->fill_arg_use) {
+ o.ws("(").wu64(some).ws(")");
+ }
+ o.ws(";");
+ }
+ o.ws("\n");
+ }
+
+ if (opts->fFlag) {
+ o.wstring(opts->yyfilllabel).wu32(o.fill_index).ws(":\n");
+ ++o.fill_index;
+ }
}
void gen_goto_case(OutputFile &o, uint32_t ind, const State *to,
- const DFA &dfa, tcid_t tcid, bool skip)
+ const DFA &dfa, tcid_t tcid, bool skip)
{
- code_lines_t code;
- gen_goto(code, to, dfa, tcid, o.block().opts, skip);
- const size_t lines = code.size();
-
- if (lines == 1) {
- o.wind(1).wstring(code[0]);
- } else {
- o.ws("\n");
- for (size_t i = 0; i < lines; ++i) {
- o.wind(ind + 1).wstring(code[i]);
- }
- }
+ code_lines_t code;
+ gen_goto(code, to, dfa, tcid, o.block().opts, skip);
+ const size_t lines = code.size();
+
+ if (lines == 1) {
+ o.wind(1).wstring(code[0]);
+ } else {
+ o.ws("\n");
+ for (size_t i = 0; i < lines; ++i) {
+ o.wind(ind + 1).wstring(code[i]);
+ }
+ }
}
void gen_goto_if(OutputFile &o, uint32_t ind, const State *to,
- const DFA &dfa, tcid_t tcid, bool skip)
+ const DFA &dfa, tcid_t tcid, bool skip)
{
- code_lines_t code;
- gen_goto(code, to, dfa, tcid, o.block().opts, skip);
- const size_t lines = code.size();
-
- if (lines == 1) {
- o.wstring(code[0]);
- } else {
- o.ws("{\n");
- for (size_t i = 0; i < lines; ++i) {
- o.wind(ind + 1).wstring(code[i]);
- }
- o.wind(ind).ws("}\n");
- }
+ code_lines_t code;
+ gen_goto(code, to, dfa, tcid, o.block().opts, skip);
+ const size_t lines = code.size();
+
+ if (lines == 1) {
+ o.wstring(code[0]);
+ } else {
+ o.ws("{\n");
+ for (size_t i = 0; i < lines; ++i) {
+ o.wind(ind + 1).wstring(code[i]);
+ }
+ o.wind(ind).ws("}\n");
+ }
}
void gen_goto_plain(OutputFile &o, uint32_t ind, const State *to,
- const DFA &dfa, tcid_t tcid, bool skip)
+ const DFA &dfa, tcid_t tcid, bool skip)
{
- code_lines_t code;
- gen_goto(code, to, dfa, tcid, o.block().opts, skip);
- const size_t lines = code.size();
+ code_lines_t code;
+ gen_goto(code, to, dfa, tcid, o.block().opts, skip);
+ const size_t lines = code.size();
- for (size_t i = 0; i < lines; ++i) {
- o.wind(ind).wstring(code[i]);
- }
+ for (size_t i = 0; i < lines; ++i) {
+ o.wind(ind).wstring(code[i]);
+ }
}
void gen_goto(code_lines_t &code, const State *to, const DFA &dfa,
- tcid_t tcid, const opt_t *opts, bool skip)
+ tcid_t tcid, const opt_t *opts, bool skip)
{
- std::ostringstream s;
- output_skip(s, 0, opts);
-
- if (skip && !opts->lookahead) {
- code.push_back(s.str());
- }
- gen_settags(code, dfa, tcid, opts);
- if (skip && opts->lookahead) {
- code.push_back(s.str());
- }
- if (to) {
- code.push_back("goto " + opts->labelPrefix
- + to_string(to->label) + ";\n");
- }
+ std::ostringstream s;
+ output_skip(s, 0, opts);
+
+ if (skip && !opts->lookahead) {
+ code.push_back(s.str());
+ }
+ gen_settags(code, dfa, tcid, opts);
+ if (skip && opts->lookahead) {
+ code.push_back(s.str());
+ }
+ if (to) {
+ code.push_back("goto " + opts->labelPrefix
+ + to_string(to->label) + ";\n");
+ }
}
void gen_settags(code_lines_t &code, const DFA &dfa, tcid_t tcid, const opt_t *opts)
{
- const bool generic = opts->input_api == INPUT_CUSTOM;
- const std::string
- &prefix = opts->tags_prefix,
- &expression = opts->tags_expression;
- const tcmd_t *cmd = dfa.tcpool[tcid];
-
- // single tag YYCTXMARKER, backwards compatibility
- if (cmd && dfa.oldstyle_ctxmarker) {
- const std::string s = generic
- ? opts->yybackupctx + " ();\n"
- : opts->yyctxmarker + " = " + opts->yycursor + ";\n";
- code.push_back(s);
- return;
- }
-
- for (const tcmd_t *p = cmd; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
-
- // copy command
- if (tcmd_t::iscopy(p)) {
- const std::string
- le = vartag_expr(l, prefix, expression),
- re = vartag_expr(r, prefix, expression),
- s = le + " = " + re + ";\n";
- code.push_back(s);
-
- // save command; history
- } else if (tcmd_t::isadd(p)) {
- const std::string
- le = vartag_expr(l, prefix, expression),
- re = vartag_expr(r, prefix, expression);
- if (l != r) {
- const std::string s = le + " = " + re + ";\n";
- code.push_back(s);
- }
- code_lines_t code1;
- for (; *h != TAGVER_ZERO; ++h) {
- const std::string s = *h == TAGVER_BOTTOM
- ? opts->yymtagn + " (" + le + ");\n"
- : opts->yymtagp + " (" + le + ");\n";
- code1.push_back(s);
- }
- code.insert(code.end(), code1.rbegin(), code1.rend());
-
- // save command; no history; generic API
- } else if (generic) {
- const std::string
- v = vartag_expr(l, prefix, expression),
- s = *h == TAGVER_BOTTOM
- ? opts->yystagn + " (" + v + ");\n"
- : opts->yystagp + " (" + v + ");\n";
- code.push_back(s);
-
- // save command; no history; default API
- } else {
- std::string s1 = "", s2 = "";
- for (const tcmd_t *q = p; q && tcmd_t::isset(q); p = q, q = q->next) {
- std::string &s = q->history[0] == TAGVER_BOTTOM ? s1 : s2;
- s += vartag_expr(q->lhs, prefix, expression) + " = ";
- }
- if (!s1.empty()) {
- s1 += "NULL;\n";
- code.push_back(s1);
- }
- if (!s2.empty()) {
- s2 += opts->yycursor + ";\n";
- code.push_back(s2);
- }
- }
- }
+ const bool generic = opts->input_api == INPUT_CUSTOM;
+ const std::string
+ &prefix = opts->tags_prefix,
+ &expression = opts->tags_expression;
+ const tcmd_t *cmd = dfa.tcpool[tcid];
+
+ // single tag YYCTXMARKER, backwards compatibility
+ if (cmd && dfa.oldstyle_ctxmarker) {
+ const std::string s = generic
+ ? opts->yybackupctx + " ();\n"
+ : opts->yyctxmarker + " = " + opts->yycursor + ";\n";
+ code.push_back(s);
+ return;
+ }
+
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+
+ // copy command
+ if (tcmd_t::iscopy(p)) {
+ const std::string
+ le = vartag_expr(l, prefix, expression),
+ re = vartag_expr(r, prefix, expression),
+ s = le + " = " + re + ";\n";
+ code.push_back(s);
+
+ // save command; history
+ } else if (tcmd_t::isadd(p)) {
+ const std::string
+ le = vartag_expr(l, prefix, expression),
+ re = vartag_expr(r, prefix, expression);
+ if (l != r) {
+ const std::string s = le + " = " + re + ";\n";
+ code.push_back(s);
+ }
+ code_lines_t code1;
+ for (; *h != TAGVER_ZERO; ++h) {
+ const std::string s = *h == TAGVER_BOTTOM
+ ? opts->yymtagn + " (" + le + ");\n"
+ : opts->yymtagp + " (" + le + ");\n";
+ code1.push_back(s);
+ }
+ code.insert(code.end(), code1.rbegin(), code1.rend());
+
+ // save command; no history; generic API
+ } else if (generic) {
+ const std::string
+ v = vartag_expr(l, prefix, expression),
+ s = *h == TAGVER_BOTTOM
+ ? opts->yystagn + " (" + v + ");\n"
+ : opts->yystagp + " (" + v + ");\n";
+ code.push_back(s);
+
+ // save command; no history; default API
+ } else {
+ std::string s1 = "", s2 = "";
+ for (const tcmd_t *q = p; q && tcmd_t::isset(q); p = q, q = q->next) {
+ std::string &s = q->history[0] == TAGVER_BOTTOM ? s1 : s2;
+ s += vartag_expr(q->lhs, prefix, expression) + " = ";
+ }
+ if (!s1.empty()) {
+ s1 += "NULL;\n";
+ code.push_back(s1);
+ }
+ if (!s2.empty()) {
+ s2 += opts->yycursor + ";\n";
+ code.push_back(s2);
+ }
+ }
+ }
}
void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule)
{
- const opt_t *opts = o.block().opts;
- const bool generic = opts->input_api == INPUT_CUSTOM;
- const std::string
- &prefix = opts->tags_prefix,
- &expression = opts->tags_expression;
- std::string expr;
- const std::vector<Tag> &tags = dfa.tags;
- const tagver_t *fins = dfa.finvers;
-
- if (rule.ncap > 0) {
- o.wind(ind).ws("yynmatch = ").wu64(rule.ncap).ws(";\n");
- }
-
- // variable tags
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
- const Tag &tag = tags[t];
-
- // see note [fixed and variable tags]
- if (fictive(tag) || fixed(tag)) continue;
-
- expr = vartag_expr(fins[t], prefix, expression);
-
- o.wind(ind);
- if (!trailing(tag)) {
- o.wstring(tagname(tag)).ws(" = ").wstring(expr);
- } else if (generic) {
- if (dfa.oldstyle_ctxmarker) {
- o.wstring(opts->yyrestorectx).ws(" ()");
- } else {
- o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")");
- }
- } else {
- if (dfa.oldstyle_ctxmarker) {
- o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker);
- } else {
- o.wstring(opts->yycursor).ws(" = ").wstring(expr);
- }
- }
- o.ws(";\n");
- }
-
- // fixed tags
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
- const Tag &tag = tags[t];
-
- // see note [fixed and variable tags]
- if (fictive(tag) || !fixed(tag)) continue;
-
- const size_t dist = tag.dist;
- const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST;
- expr = fixed_on_cursor ? opts->yycursor
- : vartag_expr(fins[tag.base], prefix, expression);
-
- o.wind(ind);
- if (generic) {
- assert(dist == 0);
- if (!trailing(tag)) {
- o.wstring(tagname(tag)).ws(" = ").wstring(expr);
- } else if (!fixed_on_cursor) {
- assert(!dfa.oldstyle_ctxmarker);
- o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")");
- }
- } else {
- if (!trailing(tag)) {
- o.wstring(tagname(tag)).ws(" = ").wstring(expr);
- if (dist > 0) o.ws(" - ").wu64(dist);
- } else if (!fixed_on_cursor) {
- o.wstring(opts->yycursor).ws(" = ").wstring(expr);
- if (dist > 0) o.ws(" - ").wu64(dist);
- } else if (dist > 0) {
- o.wstring(opts->yycursor).ws(" -= ").wu64(dist);
- }
- }
- o.ws(";\n");
- }
+ const opt_t *opts = o.block().opts;
+ const bool generic = opts->input_api == INPUT_CUSTOM;
+ const std::string
+ &prefix = opts->tags_prefix,
+ &expression = opts->tags_expression;
+ std::string expr;
+ const std::vector<Tag> &tags = dfa.tags;
+ const tagver_t *fins = dfa.finvers;
+
+ if (rule.ncap > 0) {
+ o.wind(ind).ws("yynmatch = ").wu64(rule.ncap).ws(";\n");
+ }
+
+ // variable tags
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ const Tag &tag = tags[t];
+
+ // see note [fixed and variable tags]
+ if (fictive(tag) || fixed(tag)) continue;
+
+ expr = vartag_expr(fins[t], prefix, expression);
+
+ o.wind(ind);
+ if (!trailing(tag)) {
+ o.wstring(tagname(tag)).ws(" = ").wstring(expr);
+ } else if (generic) {
+ if (dfa.oldstyle_ctxmarker) {
+ o.wstring(opts->yyrestorectx).ws(" ()");
+ } else {
+ o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")");
+ }
+ } else {
+ if (dfa.oldstyle_ctxmarker) {
+ o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker);
+ } else {
+ o.wstring(opts->yycursor).ws(" = ").wstring(expr);
+ }
+ }
+ o.ws(";\n");
+ }
+
+ // fixed tags
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ const Tag &tag = tags[t];
+
+ // see note [fixed and variable tags]
+ if (fictive(tag) || !fixed(tag)) continue;
+
+ const size_t dist = tag.dist;
+ const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST;
+ expr = fixed_on_cursor ? opts->yycursor
+ : vartag_expr(fins[tag.base], prefix, expression);
+
+ o.wind(ind);
+ if (generic) {
+ assert(dist == 0);
+ if (!trailing(tag)) {
+ o.wstring(tagname(tag)).ws(" = ").wstring(expr);
+ } else if (!fixed_on_cursor) {
+ assert(!dfa.oldstyle_ctxmarker);
+ o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")");
+ }
+ } else {
+ if (!trailing(tag)) {
+ o.wstring(tagname(tag)).ws(" = ").wstring(expr);
+ if (dist > 0) o.ws(" - ").wu64(dist);
+ } else if (!fixed_on_cursor) {
+ o.wstring(opts->yycursor).ws(" = ").wstring(expr);
+ if (dist > 0) o.ws(" - ").wu64(dist);
+ } else if (dist > 0) {
+ o.wstring(opts->yycursor).ws(" -= ").wu64(dist);
+ }
+ }
+ o.ws(";\n");
+ }
}
std::string tagname(const Tag &tag)
{
- assert(!trailing(tag));
- return capture(tag)
- ? "yypmatch[" + to_string(tag.ncap) + "]"
- : *tag.name;
+ assert(!trailing(tag));
+ return capture(tag)
+ ? "yypmatch[" + to_string(tag.ncap) + "]"
+ : *tag.name;
}
bool endstate(const State *s)
{
- // 'end' state is a state which has no outgoing transitions on symbols
- // usually 'end' states are final states (not all final states are 'end'
- // states), but sometimes 'end' state happens to be initial non-accepting
- // state, e.g. in case of rule '[]'
- const Action::type_t &a = s->go.span[0].to->action.type;
- return s->go.nSpans == 1
- && (a == Action::RULE || a == Action::ACCEPT);
+ // 'end' state is a state which has no outgoing transitions on symbols
+ // usually 'end' states are final states (not all final states are 'end'
+ // states), but sometimes 'end' state happens to be initial non-accepting
+ // state, e.g. in case of rule '[]'
+ const Action::type_t &a = s->go.span[0].to->action.type;
+ return s->go.nSpans == 1
+ && (a == Action::RULE || a == Action::ACCEPT);
}
} // namespace re2c
void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label)
{
- const opt_t *opts = o.block().opts;
- if (used_label)
- {
- o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n");
- }
- if (opts->dFlag && (s->action.type != Action::INITIAL))
- {
- o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(output_expr_peek(opts)).ws(");\n");
- }
+ const opt_t *opts = o.block().opts;
+ if (used_label)
+ {
+ o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n");
+ }
+ if (opts->dFlag && (s->action.type != Action::INITIAL))
+ {
+ o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(output_expr_peek(opts)).ws(");\n");
+ }
}
void DFA::count_used_labels (std::set<label_t> & used, label_t start,
- label_t initial, bool force_start, bool fFlag) const
+ label_t initial, bool force_start, bool fFlag) const
{
- // In '-f' mode, default state is always state 0
- if (fFlag)
- {
- used.insert (label_t::first ());
- }
- if (force_start)
- {
- used.insert (start);
- }
- for (State * s = head; s; s = s->next)
- {
- s->go.used_labels (used);
- }
- for (uint32_t i = 0; i < accepts.size (); ++i)
- {
- used.insert (accepts[i].first->label);
- }
- // must go last: it needs the set of used labels
- if (used.count (head->label))
- {
- used.insert (initial);
- }
+ // In '-f' mode, default state is always state 0
+ if (fFlag)
+ {
+ used.insert (label_t::first ());
+ }
+ if (force_start)
+ {
+ used.insert (start);
+ }
+ for (State * s = head; s; s = s->next)
+ {
+ s->go.used_labels (used);
+ }
+ for (uint32_t i = 0; i < accepts.size (); ++i)
+ {
+ used.insert (accepts[i].first->label);
+ }
+ // must go last: it needs the set of used labels
+ if (used.count (head->label))
+ {
+ used.insert (initial);
+ }
}
void DFA::emit_body(OutputFile &o, uint32_t& ind,
- const std::set<label_t> &used_labels, label_t initial) const
+ const std::set<label_t> &used_labels, label_t initial) const
{
- code_lines_t code;
- gen_settags(code, *this, tags0, o.block().opts);
- for (size_t i = 0; i < code.size(); ++i) {
- o.wind(ind).wstring(code[i]);
- }
+ code_lines_t code;
+ gen_settags(code, *this, tags0, o.block().opts);
+ for (size_t i = 0; i < code.size(); ++i) {
+ o.wind(ind).wstring(code[i]);
+ }
- // If DFA has transitions to initial state, then initial state
- // has a piece of code that advances input position. Wee must
- // skip it when entering DFA.
- if (used_labels.count(head->label)) {
- o.wind(ind).ws("goto ").wstring(o.block().opts->labelPrefix)
- .wlabel(initial).ws(";\n");
- }
+ // If DFA has transitions to initial state, then initial state
+ // has a piece of code that advances input position. Wee must
+ // skip it when entering DFA.
+ if (used_labels.count(head->label)) {
+ o.wind(ind).ws("goto ").wstring(o.block().opts->labelPrefix)
+ .wlabel(initial).ws(";\n");
+ }
- for (State * s = head; s; s = s->next) {
- emit_state(o, ind, s, used_labels.count(s->label));
- emit_action(o, ind, *this, s, used_labels);
- s->go.emit(o, ind, *this);
- }
+ for (State * s = head; s; s = s->next) {
+ emit_state(o, ind, s, used_labels.count(s->label));
+ emit_action(o, ind, *this, s, used_labels);
+ s->go.emit(o, ind, *this);
+ }
}
void DFA::emit_dot(OutputFile &o, bool last_cond) const
{
- const opt_t *opts = o.block().opts;
- if (!opts->cFlag || !o.cond_goto) {
- o.ws("digraph re2c {\n");
- }
- o.wdelay_cond_goto(0);
- if (opts->cFlag) {
- o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
- }
- for (State *s = head; s; s = s->next) {
- if (s->action.type == Action::ACCEPT) {
- const accept_t &accs = *s->action.info.accepts;
- for (uint32_t i = 0; i < accs.size(); ++i) {
- o.wlabel(s->label).ws(" -> ")
- .wlabel(accs[i].first->label)
- .ws(" [label=\"yyaccept=")
- .wu32(i).ws("\"]").ws("\n");
- }
- } else if (s->action.type == Action::RULE) {
- const Code *code = rules[s->action.info.rule].code;
- if (!code->autogen) {
- o.wlabel(s->label).ws(" [label=\"")
- .wstring(code->fname)
- .ws(":").wu32(code->fline)
- .ws("\"]").ws("\n");
- }
- }
- s->go.emit(o, 0, *this);
- }
- if (!opts->cFlag || last_cond) {
- o.ws("}\n");
- }
+ const opt_t *opts = o.block().opts;
+ if (!opts->cFlag || !o.cond_goto) {
+ o.ws("digraph re2c {\n");
+ }
+ o.wdelay_cond_goto(0);
+ if (opts->cFlag) {
+ o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
+ }
+ for (State *s = head; s; s = s->next) {
+ if (s->action.type == Action::ACCEPT) {
+ const accept_t &accs = *s->action.info.accepts;
+ for (uint32_t i = 0; i < accs.size(); ++i) {
+ o.wlabel(s->label).ws(" -> ")
+ .wlabel(accs[i].first->label)
+ .ws(" [label=\"yyaccept=")
+ .wu32(i).ws("\"]").ws("\n");
+ }
+ } else if (s->action.type == Action::RULE) {
+ const Code *code = rules[s->action.info.rule].code;
+ if (!code->autogen) {
+ o.wlabel(s->label).ws(" [label=\"")
+ .wstring(code->fname)
+ .ws(":").wu32(code->fline)
+ .ws("\"]").ws("\n");
+ }
+ }
+ s->go.emit(o, 0, *this);
+ }
+ if (!opts->cFlag || last_cond) {
+ o.ws("}\n");
+ }
}
void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace)
{
- OutputFile &o = output.source;
- OutputBlock &ob = o.block();
- const opt_t *opts = ob.opts;
+ OutputFile &o = output.source;
+ OutputBlock &ob = o.block();
+ const opt_t *opts = ob.opts;
- std::set<std::string> stagnames, stagvars, mtagnames, mtagvars;
- if (!oldstyle_ctxmarker) {
- for (size_t i = 0; i < tags.size(); ++i) {
- const Tag &tag = tags[i];
- if (history(tag)) {
- mtagvars.insert(*tag.name);
- } else if (tag.name) {
- stagvars.insert(*tag.name);
- }
- }
- for (tagver_t v = 1; v <= maxtagver; ++v) {
- const std::string s = vartag_name(v, opts->tags_prefix);
- if (mtagvers.find(v) != mtagvers.end()) {
- mtagnames.insert(s);
- } else {
- stagnames.insert(s);
- }
- }
- ob.stags.insert(stagnames.begin(), stagnames.end());
- ob.mtags.insert(mtagnames.begin(), mtagnames.end());
- }
- if (!cond.empty()) o.block().types.push_back(cond);
+ std::set<std::string> stagnames, stagvars, mtagnames, mtagvars;
+ if (!oldstyle_ctxmarker) {
+ for (size_t i = 0; i < tags.size(); ++i) {
+ const Tag &tag = tags[i];
+ if (history(tag)) {
+ mtagvars.insert(*tag.name);
+ } else if (tag.name) {
+ stagvars.insert(*tag.name);
+ }
+ }
+ for (tagver_t v = 1; v <= maxtagver; ++v) {
+ const std::string s = vartag_name(v, opts->tags_prefix);
+ if (mtagvers.find(v) != mtagvers.end()) {
+ mtagnames.insert(s);
+ } else {
+ stagnames.insert(s);
+ }
+ }
+ ob.stags.insert(stagnames.begin(), stagnames.end());
+ ob.mtags.insert(mtagnames.begin(), mtagnames.end());
+ }
+ if (!cond.empty()) o.block().types.push_back(cond);
- bool bProlog = (!opts->cFlag || !o.cond_goto);
+ bool bProlog = (!opts->cFlag || !o.cond_goto);
- // start_label points to the beginning of current re2c block
- // (prior to condition dispatch in '-c' mode)
- // it can forced by configuration 're2c:startlabel = <integer>;'
- label_t start_label = o.label_counter.next ();
- // initial_label points to the beginning of DFA
- // in '-c' mode this is NOT equal to start_label
- label_t initial_label = bProlog && opts->cFlag
- ? o.label_counter.next ()
- : start_label;
- for (State * s = head; s; s = s->next)
- {
- s->label = o.label_counter.next ();
- }
- std::set<label_t> used_labels;
- count_used_labels (used_labels, start_label, initial_label,
- opts->startlabel_force && opts->startlabel.empty(), opts->fFlag);
+ // start_label points to the beginning of current re2c block
+ // (prior to condition dispatch in '-c' mode)
+ // it can forced by configuration 're2c:startlabel = <integer>;'
+ label_t start_label = o.label_counter.next ();
+ // initial_label points to the beginning of DFA
+ // in '-c' mode this is NOT equal to start_label
+ label_t initial_label = bProlog && opts->cFlag
+ ? o.label_counter.next ()
+ : start_label;
+ for (State * s = head; s; s = s->next)
+ {
+ s->label = o.label_counter.next ();
+ }
+ std::set<label_t> used_labels;
+ count_used_labels (used_labels, start_label, initial_label,
+ opts->startlabel_force && opts->startlabel.empty(), opts->fFlag);
- head->action.set_initial(initial_label);
+ head->action.set_initial(initial_label);
- if (opts->target == TARGET_SKELETON) {
- if (output.skeletons.insert (name).second)
- {
- emit_start(o, max_fill, max_nmatch, name, key_size, def_rule,
- need_backup, need_accept, oldstyle_ctxmarker,
- stagnames, stagvars, mtagnames, mtagvars, bitmaps);
- uint32_t i = 2;
- emit_body (o, i, used_labels, initial_label);
- emit_end(o, name, need_backup, oldstyle_ctxmarker, mtagnames);
- }
- } else if (opts->target == TARGET_DOT) {
- emit_dot(o, isLastCond);
- } else {
- // Generate prolog
- if (bProlog)
- {
- o.ws("\n").wdelay_line_info_output ();
- if ((!opts->fFlag && ob.used_yyaccept)
- || (!opts->fFlag && opts->bEmitYYCh)
- || (opts->bFlag && !opts->cFlag && !bitmaps.empty())
- || (opts->cFlag && !o.cond_goto && opts->gFlag)
- || (opts->fFlag && !o.state_goto && opts->gFlag)
- )
- {
- bPrologBrace = true;
- o.wind(ind++).ws("{\n");
- }
- else if (ind == 0)
- {
- ind = 1;
- }
- if (!opts->fFlag)
- {
- if (opts->bEmitYYCh)
- {
- o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n");
- }
- o.wdelay_yyaccept_init (ind);
- }
- else
- {
- o.ws("\n");
- }
- }
- if (opts->bFlag && !opts->cFlag)
- {
- bitmaps.gen(o, ind);
- }
- if (bProlog)
- {
- o.wdelay_cond_table(ind);
- o.wdelay_state_goto (ind);
- if (opts->cFlag)
- {
- if (used_labels.count(start_label))
- {
- o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n");
- }
- }
- o.wuser_start_label ();
- o.wdelay_cond_goto(ind);
- }
- if (opts->cFlag && !cond.empty())
- {
- if (opts->condDivider.length())
- {
- std::string divider = opts->condDivider;
- strrreplace(divider, opts->condDividerParam, cond);
- o.wstring(divider).ws("\n");
- }
- o.wstring(opts->condPrefix).wstring(cond).ws(":\n");
- }
- if (opts->cFlag && opts->bFlag && !bitmaps.empty())
- {
- o.wind(ind++).ws("{\n");
- bitmaps.gen(o, ind);
- }
- // Generate code
- emit_body (o, ind, used_labels, initial_label);
- if (opts->cFlag && opts->bFlag && !bitmaps.empty())
- {
- o.wind(--ind).ws("}\n");
- }
- // Generate epilog
- if ((!opts->cFlag || isLastCond) && bPrologBrace)
- {
- o.wind(--ind).ws("}\n");
- }
- }
+ if (opts->target == TARGET_SKELETON) {
+ if (output.skeletons.insert (name).second)
+ {
+ emit_start(o, max_fill, max_nmatch, name, key_size, def_rule,
+ need_backup, need_accept, oldstyle_ctxmarker,
+ stagnames, stagvars, mtagnames, mtagvars, bitmaps);
+ uint32_t i = 2;
+ emit_body (o, i, used_labels, initial_label);
+ emit_end(o, name, need_backup, oldstyle_ctxmarker, mtagnames);
+ }
+ } else if (opts->target == TARGET_DOT) {
+ emit_dot(o, isLastCond);
+ } else {
+ // Generate prolog
+ if (bProlog)
+ {
+ o.ws("\n").wdelay_line_info_output ();
+ if ((!opts->fFlag && ob.used_yyaccept)
+ || (!opts->fFlag && opts->bEmitYYCh)
+ || (opts->bFlag && !opts->cFlag && !bitmaps.empty())
+ || (opts->cFlag && !o.cond_goto && opts->gFlag)
+ || (opts->fFlag && !o.state_goto && opts->gFlag)
+ )
+ {
+ bPrologBrace = true;
+ o.wind(ind++).ws("{\n");
+ }
+ else if (ind == 0)
+ {
+ ind = 1;
+ }
+ if (!opts->fFlag)
+ {
+ if (opts->bEmitYYCh)
+ {
+ o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n");
+ }
+ o.wdelay_yyaccept_init (ind);
+ }
+ else
+ {
+ o.ws("\n");
+ }
+ }
+ if (opts->bFlag && !opts->cFlag)
+ {
+ bitmaps.gen(o, ind);
+ }
+ if (bProlog)
+ {
+ o.wdelay_cond_table(ind);
+ o.wdelay_state_goto (ind);
+ if (opts->cFlag)
+ {
+ if (used_labels.count(start_label))
+ {
+ o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n");
+ }
+ }
+ o.wuser_start_label ();
+ o.wdelay_cond_goto(ind);
+ }
+ if (opts->cFlag && !cond.empty())
+ {
+ if (opts->condDivider.length())
+ {
+ std::string divider = opts->condDivider;
+ strrreplace(divider, opts->condDividerParam, cond);
+ o.wstring(divider).ws("\n");
+ }
+ o.wstring(opts->condPrefix).wstring(cond).ws(":\n");
+ }
+ if (opts->cFlag && opts->bFlag && !bitmaps.empty())
+ {
+ o.wind(ind++).ws("{\n");
+ bitmaps.gen(o, ind);
+ }
+ // Generate code
+ emit_body (o, ind, used_labels, initial_label);
+ if (opts->cFlag && opts->bFlag && !bitmaps.empty())
+ {
+ o.wind(--ind).ws("}\n");
+ }
+ // Generate epilog
+ if ((!opts->cFlag || isLastCond) && bPrologBrace)
+ {
+ o.wind(--ind).ws("}\n");
+ }
+ }
}
std::string vartag_name(tagver_t ver, const std::string &prefix)
{
- std::ostringstream s;
- s << prefix << ver;
- return s.str();
+ std::ostringstream s;
+ s << prefix << ver;
+ return s.str();
}
std::string vartag_expr(tagver_t ver, const std::string &prefix, const std::string &expression)
{
- const std::string s = vartag_name(ver, prefix);
- std::string e = expression;
- strrreplace(e, "@@", s);
- return e;
+ const std::string s = vartag_name(ver, prefix);
+ std::string e = expression;
+ strrreplace(e, "@@", s);
+ return e;
}
} // end namespace re2c
struct Span
{
- uint32_t ub;
- State * to;
- tcid_t tags;
+ uint32_t ub;
+ State * to;
+ tcid_t tags;
};
struct Case
{
- std::vector<std::pair<uint32_t, uint32_t> > ranges;
- const State *to;
- tcid_t tags;
- bool skip;
-
- void emit(OutputFile &o, uint32_t ind) const;
- inline Case(): ranges(), to(NULL), tags(TCID0), skip(false) {}
- FORBID_COPY(Case);
+ std::vector<std::pair<uint32_t, uint32_t> > ranges;
+ const State *to;
+ tcid_t tags;
+ bool skip;
+
+ void emit(OutputFile &o, uint32_t ind) const;
+ inline Case(): ranges(), to(NULL), tags(TCID0), skip(false) {}
+ FORBID_COPY(Case);
};
struct Cases
{
- Case *cases;
- uint32_t cases_size;
-
- void add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip);
- Cases(const Span *spans, uint32_t nspans, bool skip);
- ~Cases();
- void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const;
- void used_labels(std::set<label_t> &used) const;
- FORBID_COPY(Cases);
+ Case *cases;
+ uint32_t cases_size;
+
+ void add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip);
+ Cases(const Span *spans, uint32_t nspans, bool skip);
+ ~Cases();
+ void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const;
+ void used_labels(std::set<label_t> &used) const;
+ FORBID_COPY(Cases);
};
struct Cond
{
- std::string compare;
- uint32_t value;
- Cond (const std::string & cmp, uint32_t val);
+ std::string compare;
+ uint32_t value;
+ Cond (const std::string & cmp, uint32_t val);
};
struct Binary
{
- Cond * cond;
- If * thn;
- If * els;
- Binary (const Span * s, uint32_t n, const State * next, bool skip);
- ~Binary ();
- void emit (OutputFile &o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
-
- FORBID_COPY (Binary);
+ Cond * cond;
+ If * thn;
+ If * els;
+ Binary (const Span * s, uint32_t n, const State * next, bool skip);
+ ~Binary ();
+ void emit (OutputFile &o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
+
+ FORBID_COPY (Binary);
};
struct Linear
{
- struct Branch
- {
- const Cond *cond;
- const State *to;
- tcid_t tags;
- bool skip;
- };
-
- size_t nbranches;
- Branch *branches;
-
- Linear(const Span *s, uint32_t n, const State *next, bool skip);
- ~Linear();
- void add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip);
- void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const;
- void used_labels(std::set<label_t> &used) const;
- FORBID_COPY(Linear);
+ struct Branch
+ {
+ const Cond *cond;
+ const State *to;
+ tcid_t tags;
+ bool skip;
+ };
+
+ size_t nbranches;
+ Branch *branches;
+
+ Linear(const Span *s, uint32_t n, const State *next, bool skip);
+ ~Linear();
+ void add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip);
+ void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const;
+ void used_labels(std::set<label_t> &used) const;
+ FORBID_COPY(Linear);
};
struct If
{
- enum type_t
- {
- BINARY,
- LINEAR
- } type;
- union
- {
- Binary * binary;
- Linear * linear;
- } info;
- If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip);
- ~If ();
- void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
+ enum type_t
+ {
+ BINARY,
+ LINEAR
+ } type;
+ union
+ {
+ Binary * binary;
+ Linear * linear;
+ } info;
+ If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip);
+ ~If ();
+ void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
};
struct SwitchIf
{
- enum
- {
- SWITCH,
- IF
- } type;
- union
- {
- Cases * cases;
- If * ifs;
- } info;
- SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip);
- ~SwitchIf ();
- void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
+ enum
+ {
+ SWITCH,
+ IF
+ } type;
+ union
+ {
+ Cases * cases;
+ If * ifs;
+ } info;
+ SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip);
+ ~SwitchIf ();
+ void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
};
struct GoBitmap
{
- const bitmap_t * bitmap;
- const State * bitmap_state;
- SwitchIf * hgo;
- SwitchIf * lgo;
- GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan,
- uint32_t hSpans, const bitmap_t * bm, const State * bm_state,
- const State * next, bool sflag);
- ~GoBitmap ();
- void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
-
- FORBID_COPY (GoBitmap);
+ const bitmap_t * bitmap;
+ const State * bitmap_state;
+ SwitchIf * hgo;
+ SwitchIf * lgo;
+ GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan,
+ uint32_t hSpans, const bitmap_t * bm, const State * bm_state,
+ const State * next, bool sflag);
+ ~GoBitmap ();
+ void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
+
+ FORBID_COPY (GoBitmap);
};
struct CpgotoTable
{
- static const uint32_t TABLE_SIZE;
- const State ** table;
- CpgotoTable (const Span * span, uint32_t nSpans);
- ~CpgotoTable ();
- void emit (OutputFile & o, uint32_t ind) const;
- void used_labels (std::set<label_t> & used) const;
+ static const uint32_t TABLE_SIZE;
+ const State ** table;
+ CpgotoTable (const Span * span, uint32_t nSpans);
+ ~CpgotoTable ();
+ void emit (OutputFile & o, uint32_t ind) const;
+ void used_labels (std::set<label_t> & used) const;
private:
- label_t max_label () const;
+ label_t max_label () const;
- FORBID_COPY (CpgotoTable);
+ FORBID_COPY (CpgotoTable);
};
struct Cpgoto
{
- SwitchIf * hgo;
- CpgotoTable * table;
- Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan,
- uint32_t hSpans, const State * next, bool sflag);
- ~Cpgoto ();
- void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
-
- FORBID_COPY (Cpgoto);
+ SwitchIf * hgo;
+ CpgotoTable * table;
+ Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan,
+ uint32_t hSpans, const State * next, bool sflag);
+ ~Cpgoto ();
+ void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
+
+ FORBID_COPY (Cpgoto);
};
struct Dot
{
- const State * from;
- Cases * cases;
- Dot(const Span *sp, uint32_t nsp, const State *s);
- ~Dot ();
- void emit (OutputFile & o, const DFA &dfa) const;
+ const State * from;
+ Cases * cases;
+ Dot(const Span *sp, uint32_t nsp, const State *s);
+ ~Dot ();
+ void emit (OutputFile & o, const DFA &dfa) const;
- FORBID_COPY (Dot);
+ FORBID_COPY (Dot);
};
struct Go
{
- uint32_t nSpans; // number of spans
- Span * span;
- tcid_t tags;
- bool skip;
- enum
- {
- EMPTY,
- SWITCH_IF,
- BITMAP,
- CPGOTO,
- DOT
- } type;
- union
- {
- SwitchIf * switchif;
- GoBitmap * bitmap;
- Cpgoto * cpgoto;
- Dot * dot;
- } info;
-
- Go ();
- ~Go ();
- void init(const State* from, const opt_t *opts, bitmaps_t &bitmaps);
- void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
- void used_labels (std::set<label_t> & used) const;
-
- Go (const Go & g)
- : nSpans (g.nSpans)
- , span (g.span)
- , tags (g.tags)
- , skip (g.skip)
- , type (g.type)
- , info (g.info)
- {}
- Go & operator = (const Go & g)
- {
- nSpans = g.nSpans;
- span = g.span;
- tags = g.tags;
- skip = g.skip;
- type = g.type;
- info = g.info;
- return * this;
- }
+ uint32_t nSpans; // number of spans
+ Span * span;
+ tcid_t tags;
+ bool skip;
+ enum
+ {
+ EMPTY,
+ SWITCH_IF,
+ BITMAP,
+ CPGOTO,
+ DOT
+ } type;
+ union
+ {
+ SwitchIf * switchif;
+ GoBitmap * bitmap;
+ Cpgoto * cpgoto;
+ Dot * dot;
+ } info;
+
+ Go ();
+ ~Go ();
+ void init(const State* from, const opt_t *opts, bitmaps_t &bitmaps);
+ void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const;
+ void used_labels (std::set<label_t> & used) const;
+
+ Go (const Go & g)
+ : nSpans (g.nSpans)
+ , span (g.span)
+ , tags (g.tags)
+ , skip (g.skip)
+ , type (g.type)
+ , info (g.info)
+ {}
+ Go & operator = (const Go & g)
+ {
+ nSpans = g.nSpans;
+ span = g.span;
+ tags = g.tags;
+ skip = g.skip;
+ type = g.type;
+ info = g.info;
+ return * this;
+ }
};
bool consume(const State *s);
bool consume(const State *s)
{
- switch (s->action.type) {
- case Action::RULE:
- case Action::MOVE:
- case Action::ACCEPT: return false;
- case Action::MATCH:
- case Action::INITIAL:
- case Action::SAVE: return true;
- }
- return true; /* unreachable */
+ switch (s->action.type) {
+ case Action::RULE:
+ case Action::MOVE:
+ case Action::ACCEPT: return false;
+ case Action::MATCH:
+ case Action::INITIAL:
+ case Action::SAVE: return true;
+ }
+ return true; /* unreachable */
}
Cases::Cases(const Span *spans, uint32_t nspans, bool skip)
- : cases(new Case[nspans])
- , cases_size(0)
+ : cases(new Case[nspans])
+ , cases_size(0)
{
- assert(nspans > 0);
+ assert(nspans > 0);
- // first case is default case
- Case &c = cases[cases_size++];
- const Span *s = spans + (nspans - 1);
- c.to = s->to;
- c.tags = s->tags;
- c.skip = skip && consume(s->to);
+ // first case is default case
+ Case &c = cases[cases_size++];
+ const Span *s = spans + (nspans - 1);
+ c.to = s->to;
+ c.tags = s->tags;
+ c.skip = skip && consume(s->to);
- for (uint32_t i = 0, lb = 0; i < nspans; ++i) {
- s = spans + i;
- add(lb, s->ub, s->to, s->tags, skip && consume(s->to));
- lb = s->ub;
- }
+ for (uint32_t i = 0, lb = 0; i < nspans; ++i) {
+ s = spans + i;
+ add(lb, s->ub, s->to, s->tags, skip && consume(s->to));
+ lb = s->ub;
+ }
}
void Cases::add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip)
{
- for (uint32_t i = 0; i < cases_size; ++i) {
- Case &c = cases[i];
- if (c.to == to && c.tags == tags) {
- c.ranges.push_back(std::make_pair(lb, ub));
- return;
- }
- }
- Case &c = cases[cases_size++];
- c.ranges.push_back(std::make_pair(lb, ub));
- c.to = to;
- c.tags = tags;
- c.skip = skip;
+ for (uint32_t i = 0; i < cases_size; ++i) {
+ Case &c = cases[i];
+ if (c.to == to && c.tags == tags) {
+ c.ranges.push_back(std::make_pair(lb, ub));
+ return;
+ }
+ }
+ Case &c = cases[cases_size++];
+ c.ranges.push_back(std::make_pair(lb, ub));
+ c.to = to;
+ c.tags = tags;
+ c.skip = skip;
}
Cond::Cond (const std::string & cmp, uint32_t val)
- : compare (cmp)
- , value (val)
+ : compare (cmp)
+ , value (val)
{}
Binary::Binary (const Span * s, uint32_t n, const State * next, bool skip)
- : cond (NULL)
- , thn (NULL)
- , els (NULL)
+ : cond (NULL)
+ , thn (NULL)
+ , els (NULL)
{
- const uint32_t l = n / 2;
- const uint32_t h = n - l;
- cond = new Cond ("<=", s[l - 1].ub - 1);
- thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next, skip);
- els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next, skip);
+ const uint32_t l = n / 2;
+ const uint32_t h = n - l;
+ cond = new Cond ("<=", s[l - 1].ub - 1);
+ thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next, skip);
+ els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next, skip);
}
void Linear::add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip)
{
- Branch &b = branches[nbranches++];
- b.cond = cond;
- b.to = to;
- b.tags = tags;
- b.skip = skip;
+ Branch &b = branches[nbranches++];
+ b.cond = cond;
+ b.to = to;
+ b.tags = tags;
+ b.skip = skip;
}
Linear::Linear(const Span *s, uint32_t n, const State *next, bool skip)
- : nbranches(0)
- , branches(new Branch[n])
+ : nbranches(0)
+ , branches(new Branch[n])
{
- for (;;) {
- if (n == 1 && s[0].to == next) {
- add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to));
- return;
- } else if (n == 1) {
- add_branch(NULL, s[0].to, s[0].tags, skip && consume(s[0].to));
- return;
- } else if (n == 2 && s[0].to == next) {
- add_branch(new Cond(">=", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to));
- add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to));
- return;
- } else if (n == 3
- && s[1].to == next
- && s[1].ub - s[0].ub == 1
- && s[2].to == s[0].to
- && s[2].tags == s[0].tags) {
- add_branch(new Cond("!=", s[0].ub), s[0].to, s[0].tags, skip && consume(s[0].to));
- add_branch(NULL, NULL, s[1].tags, skip && consume(s[1].to));
- return;
- } else if (n >= 3
- && s[1].ub - s[0].ub == 1
- && s[2].to == s[0].to
- && s[2].tags == s[0].tags) {
- add_branch(new Cond("==", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to));
- n -= 2;
- s += 2;
- } else {
- add_branch(new Cond("<=", s[0].ub - 1), s[0].to, s[0].tags, skip && consume(s[0].to));
- n -= 1;
- s += 1;
- }
- }
+ for (;;) {
+ if (n == 1 && s[0].to == next) {
+ add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to));
+ return;
+ } else if (n == 1) {
+ add_branch(NULL, s[0].to, s[0].tags, skip && consume(s[0].to));
+ return;
+ } else if (n == 2 && s[0].to == next) {
+ add_branch(new Cond(">=", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to));
+ add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to));
+ return;
+ } else if (n == 3
+ && s[1].to == next
+ && s[1].ub - s[0].ub == 1
+ && s[2].to == s[0].to
+ && s[2].tags == s[0].tags) {
+ add_branch(new Cond("!=", s[0].ub), s[0].to, s[0].tags, skip && consume(s[0].to));
+ add_branch(NULL, NULL, s[1].tags, skip && consume(s[1].to));
+ return;
+ } else if (n >= 3
+ && s[1].ub - s[0].ub == 1
+ && s[2].to == s[0].to
+ && s[2].tags == s[0].tags) {
+ add_branch(new Cond("==", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to));
+ n -= 2;
+ s += 2;
+ } else {
+ add_branch(new Cond("<=", s[0].ub - 1), s[0].to, s[0].tags, skip && consume(s[0].to));
+ n -= 1;
+ s += 1;
+ }
+ }
}
If::If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip)
- : type (t)
- , info ()
+ : type (t)
+ , info ()
{
- switch (type)
- {
- case BINARY:
- info.binary = new Binary (sp, nsp, next, skip);
- break;
- case LINEAR:
- info.linear = new Linear (sp, nsp, next, skip);
- break;
- }
+ switch (type)
+ {
+ case BINARY:
+ info.binary = new Binary (sp, nsp, next, skip);
+ break;
+ case LINEAR:
+ info.linear = new Linear (sp, nsp, next, skip);
+ break;
+ }
}
SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip)
- : type (IF)
- , info ()
+ : type (IF)
+ , info ()
{
- if ((!sflag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2))))
- {
- type = SWITCH;
- info.cases = new Cases (sp, nsp, skip);
- }
- else if (nsp > 5)
- {
- info.ifs = new If (If::BINARY, sp, nsp, next, skip);
- }
- else
- {
- info.ifs = new If (If::LINEAR, sp, nsp, next, skip);
- }
+ if ((!sflag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2))))
+ {
+ type = SWITCH;
+ info.cases = new Cases (sp, nsp, skip);
+ }
+ else if (nsp > 5)
+ {
+ info.ifs = new If (If::BINARY, sp, nsp, next, skip);
+ }
+ else
+ {
+ info.ifs = new If (If::LINEAR, sp, nsp, next, skip);
+ }
}
GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan,
- uint32_t hSpans, const bitmap_t * bm, const State * bm_state,
- const State * next, bool sflag)
- : bitmap (bm)
- , bitmap_state (bm_state)
- , hgo (NULL)
- , lgo (NULL)
+ uint32_t hSpans, const bitmap_t * bm, const State * bm_state,
+ const State * next, bool sflag)
+ : bitmap (bm)
+ , bitmap_state (bm_state)
+ , hgo (NULL)
+ , lgo (NULL)
{
- Span * bspan = allocate<Span> (nSpans);
- uint32_t bSpans = unmap (bspan, span, nSpans, bm_state);
- lgo = bSpans == 0
- ? NULL
- : new SwitchIf (bspan, bSpans, next, sflag, false);
- // if there are any low spans, then next state for high spans
- // must be NULL to trigger explicit goto generation in linear 'if'
- hgo = hSpans == 0
- ? NULL
- : new SwitchIf (hspan, hSpans, lgo ? NULL : next, sflag, false);
- operator delete (bspan);
+ Span * bspan = allocate<Span> (nSpans);
+ uint32_t bSpans = unmap (bspan, span, nSpans, bm_state);
+ lgo = bSpans == 0
+ ? NULL
+ : new SwitchIf (bspan, bSpans, next, sflag, false);
+ // if there are any low spans, then next state for high spans
+ // must be NULL to trigger explicit goto generation in linear 'if'
+ hgo = hSpans == 0
+ ? NULL
+ : new SwitchIf (hspan, hSpans, lgo ? NULL : next, sflag, false);
+ operator delete (bspan);
}
const uint32_t CpgotoTable::TABLE_SIZE = 0x100;
CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans)
- : table (new const State * [TABLE_SIZE])
+ : table (new const State * [TABLE_SIZE])
{
- uint32_t c = 0;
- for (uint32_t i = 0; i < nSpans; ++i)
- {
- for(; c < span[i].ub && c < TABLE_SIZE; ++c)
- {
- table[c] = span[i].to;
- }
- }
+ uint32_t c = 0;
+ for (uint32_t i = 0; i < nSpans; ++i)
+ {
+ for(; c < span[i].ub && c < TABLE_SIZE; ++c)
+ {
+ table[c] = span[i].to;
+ }
+ }
}
Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan,
- uint32_t hSpans, const State * next, bool sflag)
- : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next, sflag, false))
- , table (new CpgotoTable (span, nSpans))
+ uint32_t hSpans, const State * next, bool sflag)
+ : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next, sflag, false))
+ , table (new CpgotoTable (span, nSpans))
{}
Dot::Dot (const Span * sp, uint32_t nsp, const State * s)
- : from (s)
- , cases (new Cases (sp, nsp, false))
+ : from (s)
+ , cases (new Cases (sp, nsp, false))
{}
Go::Go ()
- : nSpans (0)
- , span (NULL)
- , tags (TCID0)
- , skip (false)
- , type (EMPTY)
- , info ()
+ : nSpans (0)
+ , span (NULL)
+ , tags (TCID0)
+ , skip (false)
+ , type (EMPTY)
+ , info ()
{}
void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps)
{
- if (nSpans == 0)
- {
- return;
- }
+ if (nSpans == 0)
+ {
+ return;
+ }
- // initialize high (wide) spans
- uint32_t hSpans = 0;
- const Span * hspan = NULL;
- for (uint32_t i = 0; i < nSpans; ++i)
- {
- if (span[i].ub > 0x100)
- {
- hspan = &span[i];
- hSpans = nSpans - i;
- break;
- }
- }
+ // initialize high (wide) spans
+ uint32_t hSpans = 0;
+ const Span * hspan = NULL;
+ for (uint32_t i = 0; i < nSpans; ++i)
+ {
+ if (span[i].ub > 0x100)
+ {
+ hspan = &span[i];
+ hSpans = nSpans - i;
+ break;
+ }
+ }
- bool low_spans_have_tags = false;
- for (uint32_t i = 0; i < nSpans - hSpans; ++i) {
- if (span[i].tags != TCID0) {
- low_spans_have_tags = true;
- break;
- }
- }
+ bool low_spans_have_tags = false;
+ for (uint32_t i = 0; i < nSpans - hSpans; ++i) {
+ if (span[i].tags != TCID0) {
+ low_spans_have_tags = true;
+ break;
+ }
+ }
- // initialize bitmaps
- uint32_t nBitmaps = 0;
- const bitmap_t *bm = NULL;
- const State *bms = NULL;
+ // initialize bitmaps
+ uint32_t nBitmaps = 0;
+ const bitmap_t *bm = NULL;
+ const State *bms = NULL;
- for (uint32_t i = 0; i < nSpans; ++i) {
- const State *s = span[i].to;
- if (!s->isBase) continue;
+ for (uint32_t i = 0; i < nSpans; ++i) {
+ const State *s = span[i].to;
+ if (!s->isBase) continue;
- const bitmap_t *b = bitmaps.find(this, s);
- if (b) {
- if (bm == NULL) {
- bm = b;
- bms = s;
- }
- ++nBitmaps;
- }
- }
+ const bitmap_t *b = bitmaps.find(this, s);
+ if (b) {
+ if (bm == NULL) {
+ bm = b;
+ bms = s;
+ }
+ ++nBitmaps;
+ }
+ }
- const uint32_t dSpans = nSpans - hSpans - nBitmaps;
- const bool part_skip = opts->eager_skip && !skip;
- if (opts->target == TARGET_DOT)
- {
- type = DOT;
- info.dot = new Dot (span, nSpans, from);
- }
- else if (opts->gFlag && !part_skip && (dSpans >= opts->cGotoThreshold) && !low_spans_have_tags)
- {
- type = CPGOTO;
- info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next, opts->sFlag);
- }
- else if (opts->bFlag && !part_skip && (nBitmaps > 0))
- {
- type = BITMAP;
- info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bm, bms, from->next, opts->sFlag);
- bitmaps.used = true;
- }
- else
- {
- type = SWITCH_IF;
- info.switchif = new SwitchIf (span, nSpans, from->next, opts->sFlag, part_skip);
- }
+ const uint32_t dSpans = nSpans - hSpans - nBitmaps;
+ const bool part_skip = opts->eager_skip && !skip;
+ if (opts->target == TARGET_DOT)
+ {
+ type = DOT;
+ info.dot = new Dot (span, nSpans, from);
+ }
+ else if (opts->gFlag && !part_skip && (dSpans >= opts->cGotoThreshold) && !low_spans_have_tags)
+ {
+ type = CPGOTO;
+ info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next, opts->sFlag);
+ }
+ else if (opts->bFlag && !part_skip && (nBitmaps > 0))
+ {
+ type = BITMAP;
+ info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bm, bms, from->next, opts->sFlag);
+ bitmaps.used = true;
+ }
+ else
+ {
+ type = SWITCH_IF;
+ info.switchif = new SwitchIf (span, nSpans, from->next, opts->sFlag, part_skip);
+ }
}
/*
*/
uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x)
{
- uint32_t new_nspans = 0;
- for (uint32_t i = 0; i < old_nspans; ++i)
- {
- if (old_span[i].to != x)
- {
- if (new_nspans > 0
- && new_span[new_nspans - 1].to == old_span[i].to
- && new_span[new_nspans - 1].tags == old_span[i].tags)
- new_span[new_nspans - 1].ub = old_span[i].ub;
- else
- {
- new_span[new_nspans].to = old_span[i].to;
- new_span[new_nspans].ub = old_span[i].ub;
- new_span[new_nspans].tags = old_span[i].tags;
- ++new_nspans;
- }
- }
- }
- if (new_nspans > 0)
- new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub;
- return new_nspans;
+ uint32_t new_nspans = 0;
+ for (uint32_t i = 0; i < old_nspans; ++i)
+ {
+ if (old_span[i].to != x)
+ {
+ if (new_nspans > 0
+ && new_span[new_nspans - 1].to == old_span[i].to
+ && new_span[new_nspans - 1].tags == old_span[i].tags)
+ new_span[new_nspans - 1].ub = old_span[i].ub;
+ else
+ {
+ new_span[new_nspans].to = old_span[i].to;
+ new_span[new_nspans].ub = old_span[i].ub;
+ new_span[new_nspans].tags = old_span[i].tags;
+ ++new_nspans;
+ }
+ }
+ }
+ if (new_nspans > 0)
+ new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub;
+ return new_nspans;
}
} // namespace re2c
Cases::~Cases ()
{
- delete [] cases;
+ delete [] cases;
}
Binary::~Binary ()
{
- delete cond;
- delete thn;
- delete els;
+ delete cond;
+ delete thn;
+ delete els;
}
Linear::~Linear ()
{
- for (uint32_t i = 0; i < nbranches; ++i) {
- delete branches[i].cond;
- }
- delete[] branches;
+ for (uint32_t i = 0; i < nbranches; ++i) {
+ delete branches[i].cond;
+ }
+ delete[] branches;
}
If::~If ()
{
- switch (type)
- {
- case BINARY:
- delete info.binary;
- break;
- case LINEAR:
- delete info.linear;
- break;
- }
+ switch (type)
+ {
+ case BINARY:
+ delete info.binary;
+ break;
+ case LINEAR:
+ delete info.linear;
+ break;
+ }
}
SwitchIf::~SwitchIf ()
{
- switch (type)
- {
- case SWITCH:
- delete info.cases;
- break;
- case IF:
- delete info.ifs;
- break;
- }
+ switch (type)
+ {
+ case SWITCH:
+ delete info.cases;
+ break;
+ case IF:
+ delete info.ifs;
+ break;
+ }
}
GoBitmap::~GoBitmap ()
{
- delete hgo;
- delete lgo;
+ delete hgo;
+ delete lgo;
}
CpgotoTable::~CpgotoTable ()
{
- delete [] table;
+ delete [] table;
}
Cpgoto::~Cpgoto ()
{
- delete hgo;
- delete table;
+ delete hgo;
+ delete table;
}
Dot::~Dot ()
{
- delete cases;
+ delete cases;
}
Go::~Go ()
{
- switch (type)
- {
- case EMPTY:
- break;
- case SWITCH_IF:
- delete info.switchif;
- break;
- case BITMAP:
- delete info.bitmap;
- break;
- case CPGOTO:
- delete info.cpgoto;
- break;
- case DOT:
- delete info.dot;
- break;
- }
+ switch (type)
+ {
+ case EMPTY:
+ break;
+ case SWITCH_IF:
+ delete info.switchif;
+ break;
+ case BITMAP:
+ delete info.bitmap;
+ break;
+ case CPGOTO:
+ delete info.cpgoto;
+ break;
+ case DOT:
+ delete info.dot;
+ break;
+ }
}
} // namespace re2c
void output_if (OutputFile & o, uint32_t ind, const std::string & compare, uint32_t value)
{
- o.wind(ind).ws("if (").wstring(o.block().opts->yych).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") ");
+ o.wind(ind).ws("if (").wstring(o.block().opts->yych).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") ");
}
std::string output_hgo (OutputFile & o, uint32_t ind, const DFA &dfa, SwitchIf * hgo)
{
- const opt_t *opts = o.block().opts;
- std::string yych = opts->yych;
- if (hgo != NULL)
- {
- o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n");
- hgo->emit (o, ind + 1, dfa);
- o.wind(ind).ws("} else ");
- yych = opts->yych;
- }
- else
- {
- o.wind(ind);
- }
- return yych;
+ const opt_t *opts = o.block().opts;
+ std::string yych = opts->yych;
+ if (hgo != NULL)
+ {
+ o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n");
+ hgo->emit (o, ind + 1, dfa);
+ o.wind(ind).ws("} else ");
+ yych = opts->yych;
+ }
+ else
+ {
+ o.wind(ind);
+ }
+ return yych;
}
void Case::emit (OutputFile & o, uint32_t ind) const
{
- const opt_t *opts = o.block().opts;
- for (uint32_t i = 0; i < ranges.size (); ++i)
- {
- for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b)
- {
- o.wind(ind).ws("case ").wc_hex (b).ws(":");
- if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC)
- {
- const uint32_t c = opts->encoding.decodeUnsafe (b);
- if (is_print (c))
- o.ws(" /* ").wc(static_cast<char> (c)).ws(" */");
- }
- bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1;
- if (!last_case)
- {
- o.ws("\n");
- }
- }
- }
+ const opt_t *opts = o.block().opts;
+ for (uint32_t i = 0; i < ranges.size (); ++i)
+ {
+ for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b)
+ {
+ o.wind(ind).ws("case ").wc_hex (b).ws(":");
+ if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC)
+ {
+ const uint32_t c = opts->encoding.decodeUnsafe (b);
+ if (is_print (c))
+ o.ws(" /* ").wc(static_cast<char> (c)).ws(" */");
+ }
+ bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1;
+ if (!last_case)
+ {
+ o.ws("\n");
+ }
+ }
+ }
}
void Cases::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const
{
- o.wind(ind).ws("switch (").wstring(o.block().opts->yych).ws(") {\n");
+ o.wind(ind).ws("switch (").wstring(o.block().opts->yych).ws(") {\n");
- for (uint32_t i = 1; i < cases_size; ++i) {
- const Case &c = cases[i];
- c.emit(o, ind);
- gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip);
- }
+ for (uint32_t i = 1; i < cases_size; ++i) {
+ const Case &c = cases[i];
+ c.emit(o, ind);
+ gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip);
+ }
- // default case must be the last one
- const Case &c = cases[0];
- o.wind(ind).ws("default:");
- gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip);
+ // default case must be the last one
+ const Case &c = cases[0];
+ o.wind(ind).ws("default:");
+ gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip);
- o.wind(ind).ws("}\n");
+ o.wind(ind).ws("}\n");
}
void Binary::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const
{
- output_if(o, ind, cond->compare, cond->value);
- o.ws("{\n");
- thn->emit(o, ind + 1, dfa);
- o.wind(ind).ws("} else {\n");
- els->emit(o, ind + 1, dfa);
- o.wind(ind).ws("}\n");
+ output_if(o, ind, cond->compare, cond->value);
+ o.ws("{\n");
+ thn->emit(o, ind + 1, dfa);
+ o.wind(ind).ws("} else {\n");
+ els->emit(o, ind + 1, dfa);
+ o.wind(ind).ws("}\n");
}
void Linear::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const
{
- for (uint32_t i = 0; i < nbranches; ++i) {
- const Branch &b = branches[i];
- const Cond *cond = b.cond;
- if (cond) {
- output_if(o, ind, cond->compare, cond->value);
- gen_goto_if(o, ind, b.to, dfa, b.tags, b.skip);
- } else {
- gen_goto_plain(o, ind, b.to, dfa, b.tags, b.skip);
- }
- }
+ for (uint32_t i = 0; i < nbranches; ++i) {
+ const Branch &b = branches[i];
+ const Cond *cond = b.cond;
+ if (cond) {
+ output_if(o, ind, cond->compare, cond->value);
+ gen_goto_if(o, ind, b.to, dfa, b.tags, b.skip);
+ } else {
+ gen_goto_plain(o, ind, b.to, dfa, b.tags, b.skip);
+ }
+ }
}
void If::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const
{
- switch (type) {
- case BINARY: info.binary->emit(o, ind, dfa); break;
- case LINEAR: info.linear->emit(o, ind, dfa); break;
- }
+ switch (type) {
+ case BINARY: info.binary->emit(o, ind, dfa); break;
+ case LINEAR: info.linear->emit(o, ind, dfa); break;
+ }
}
void SwitchIf::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const
{
- switch (type) {
- case SWITCH: info.cases->emit(o, ind, dfa); break;
- case IF: info.ifs->emit(o, ind, dfa); break;
- }
+ switch (type) {
+ case SWITCH: info.cases->emit(o, ind, dfa); break;
+ case IF: info.ifs->emit(o, ind, dfa); break;
+ }
}
void GoBitmap::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const
{
- const opt_t *opts = o.block().opts;
- std::string yych = output_hgo (o, ind, dfa, hgo);
- o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & ");
- if (opts->yybmHexTable)
- {
- o.wu32_hex(bitmap->m);
- }
- else
- {
- o.wu32(bitmap->m);
- }
- o.ws(") {\n");
- gen_goto_plain(o, ind + 1, bitmap_state, dfa, TCID0, false);
- o.wind(ind).ws("}\n");
- if (lgo != NULL)
- {
- lgo->emit (o, ind, dfa);
- }
+ const opt_t *opts = o.block().opts;
+ std::string yych = output_hgo (o, ind, dfa, hgo);
+ o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & ");
+ if (opts->yybmHexTable)
+ {
+ o.wu32_hex(bitmap->m);
+ }
+ else
+ {
+ o.wu32(bitmap->m);
+ }
+ o.ws(") {\n");
+ gen_goto_plain(o, ind + 1, bitmap_state, dfa, TCID0, false);
+ o.wind(ind).ws("}\n");
+ if (lgo != NULL)
+ {
+ lgo->emit (o, ind, dfa);
+ }
}
label_t CpgotoTable::max_label () const
{
- label_t max = label_t::first ();
- for (uint32_t i = 0; i < TABLE_SIZE; ++i)
- {
- if (max < table[i]->label)
- {
- max = table[i]->label;
- };
- }
- return max;
+ label_t max = label_t::first ();
+ for (uint32_t i = 0; i < TABLE_SIZE; ++i)
+ {
+ if (max < table[i]->label)
+ {
+ max = table[i]->label;
+ };
+ }
+ return max;
}
void CpgotoTable::emit (OutputFile & o, uint32_t ind) const
{
- const opt_t *opts = o.block().opts;
- o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n");
- o.wind(++ind);
- const uint32_t max_digits = max_label ().width ();
- for (uint32_t i = 0; i < TABLE_SIZE; ++i)
- {
- o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label);
- if (i == TABLE_SIZE - 1)
- {
- o.ws("\n");
- }
- else if (i % 8 == 7)
- {
- o.ws(",\n").wind(ind);
- }
- else
- {
- const uint32_t padding = max_digits - table[i]->label.width () + 1;
- o.ws(",").wstring(std::string (padding, ' '));
- }
- }
- o.wind(--ind).ws("};\n");
+ const opt_t *opts = o.block().opts;
+ o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n");
+ o.wind(++ind);
+ const uint32_t max_digits = max_label ().width ();
+ for (uint32_t i = 0; i < TABLE_SIZE; ++i)
+ {
+ o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label);
+ if (i == TABLE_SIZE - 1)
+ {
+ o.ws("\n");
+ }
+ else if (i % 8 == 7)
+ {
+ o.ws(",\n").wind(ind);
+ }
+ else
+ {
+ const uint32_t padding = max_digits - table[i]->label.width () + 1;
+ o.ws(",").wstring(std::string (padding, ' '));
+ }
+ }
+ o.wind(--ind).ws("};\n");
}
void Cpgoto::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const
{
- std::string yych = output_hgo (o, ind, dfa, hgo);
- o.ws("{\n");
- table->emit (o, ++ind);
- o.wind(ind).ws("goto *").wstring(o.block().opts->yytarget).ws("[").wstring(yych).ws("];\n");
- o.wind(--ind).ws("}\n");
+ std::string yych = output_hgo (o, ind, dfa, hgo);
+ o.ws("{\n");
+ table->emit (o, ++ind);
+ o.wind(ind).ws("goto *").wstring(o.block().opts->yytarget).ws("[").wstring(yych).ws("];\n");
+ o.wind(--ind).ws("}\n");
}
void Dot::emit(OutputFile &o, const DFA &dfa) const
{
- const std::string &prefix = o.block().opts->tags_prefix;
- const uint32_t n = cases->cases_size;
- if (n == 1) {
- o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n");
- } else {
- for (uint32_t i = 0; i < n; ++i) {
- const Case &c = cases->cases[i];
- o.wlabel(from->label).ws(" -> ").wlabel(c.to->label).ws(" [label=\"");
- for (uint32_t j = 0; j < c.ranges.size(); ++j) {
- o.wrange(c.ranges[j].first, c.ranges[j].second);
- }
- const tcmd_t *cmd = dfa.tcpool[c.tags];
- for (const tcmd_t *p = cmd; p; p = p->next) {
- o.ws("<").wstring(vartag_name(p->lhs, prefix));
- if (tcmd_t::iscopy(p)) {
- o.ws("~").wstring(vartag_name(p->rhs, prefix));
- }
- o.ws(">");
- }
- o.ws("\"]\n");
- }
- }
+ const std::string &prefix = o.block().opts->tags_prefix;
+ const uint32_t n = cases->cases_size;
+ if (n == 1) {
+ o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n");
+ } else {
+ for (uint32_t i = 0; i < n; ++i) {
+ const Case &c = cases->cases[i];
+ o.wlabel(from->label).ws(" -> ").wlabel(c.to->label).ws(" [label=\"");
+ for (uint32_t j = 0; j < c.ranges.size(); ++j) {
+ o.wrange(c.ranges[j].first, c.ranges[j].second);
+ }
+ const tcmd_t *cmd = dfa.tcpool[c.tags];
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ o.ws("<").wstring(vartag_name(p->lhs, prefix));
+ if (tcmd_t::iscopy(p)) {
+ o.ws("~").wstring(vartag_name(p->rhs, prefix));
+ }
+ o.ws(">");
+ }
+ o.ws("\"]\n");
+ }
+ }
}
void Go::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const
{
- if (type == DOT) {
- info.dot->emit (o, dfa);
- return;
- }
-
- const bool lookahead = o.block().opts->lookahead;
- o.wdelay_skip(ind, skip && !lookahead);
- code_lines_t code;
- gen_settags(code, dfa, tags, o.block().opts);
- for (size_t i = 0; i < code.size(); ++i) {
- o.wind(ind).wstring(code[i]);
- }
- o.wdelay_skip(ind, skip && lookahead);
-
- if (type == SWITCH_IF) {
- info.switchif->emit (o, ind, dfa);
- } else if (type == BITMAP) {
- info.bitmap->emit (o, ind, dfa);
- } else if (type == CPGOTO) {
- info.cpgoto->emit (o, ind, dfa);
- }
+ if (type == DOT) {
+ info.dot->emit (o, dfa);
+ return;
+ }
+
+ const bool lookahead = o.block().opts->lookahead;
+ o.wdelay_skip(ind, skip && !lookahead);
+ code_lines_t code;
+ gen_settags(code, dfa, tags, o.block().opts);
+ for (size_t i = 0; i < code.size(); ++i) {
+ o.wind(ind).wstring(code[i]);
+ }
+ o.wdelay_skip(ind, skip && lookahead);
+
+ if (type == SWITCH_IF) {
+ info.switchif->emit (o, ind, dfa);
+ } else if (type == BITMAP) {
+ info.bitmap->emit (o, ind, dfa);
+ } else if (type == CPGOTO) {
+ info.cpgoto->emit (o, ind, dfa);
+ }
}
} // namespace re2c
void Cases::used_labels (std::set<label_t> & used) const
{
- for (uint32_t i = 0; i < cases_size; ++i)
- {
- used.insert (cases[i].to->label);
- }
+ for (uint32_t i = 0; i < cases_size; ++i)
+ {
+ used.insert (cases[i].to->label);
+ }
}
void Binary::used_labels (std::set<label_t> & used) const
{
- thn->used_labels (used);
- els->used_labels (used);
+ thn->used_labels (used);
+ els->used_labels (used);
}
void Linear::used_labels (std::set<label_t> & used) const
{
- for (uint32_t i = 0; i < nbranches; ++i) {
- const State *to = branches[i].to;
- if (to) {
- used.insert(to->label);
- }
- }
+ for (uint32_t i = 0; i < nbranches; ++i) {
+ const State *to = branches[i].to;
+ if (to) {
+ used.insert(to->label);
+ }
+ }
}
void If::used_labels (std::set<label_t> & used) const
{
- switch (type)
- {
- case BINARY:
- info.binary->used_labels (used);
- break;
- case LINEAR:
- info.linear->used_labels (used);
- break;
- }
+ switch (type)
+ {
+ case BINARY:
+ info.binary->used_labels (used);
+ break;
+ case LINEAR:
+ info.linear->used_labels (used);
+ break;
+ }
}
void SwitchIf::used_labels (std::set<label_t> & used) const
{
- switch (type)
- {
- case SWITCH:
- info.cases->used_labels (used);
- break;
- case IF:
- info.ifs->used_labels (used);
- break;
- }
+ switch (type)
+ {
+ case SWITCH:
+ info.cases->used_labels (used);
+ break;
+ case IF:
+ info.ifs->used_labels (used);
+ break;
+ }
}
void GoBitmap::used_labels (std::set<label_t> & used) const
{
- if (hgo != NULL)
- {
- hgo->used_labels (used);
- }
- used.insert (bitmap_state->label);
- if (lgo != NULL)
- {
- lgo->used_labels (used);
- }
+ if (hgo != NULL)
+ {
+ hgo->used_labels (used);
+ }
+ used.insert (bitmap_state->label);
+ if (lgo != NULL)
+ {
+ lgo->used_labels (used);
+ }
}
void CpgotoTable::used_labels (std::set<label_t> & used) const
{
- for (uint32_t i = 0; i < TABLE_SIZE; ++i)
- {
- used.insert (table[i]->label);
- }
+ for (uint32_t i = 0; i < TABLE_SIZE; ++i)
+ {
+ used.insert (table[i]->label);
+ }
}
void Cpgoto::used_labels (std::set<label_t> & used) const
{
- if (hgo != NULL)
- {
- hgo->used_labels (used);
- }
- table->used_labels (used);
+ if (hgo != NULL)
+ {
+ hgo->used_labels (used);
+ }
+ table->used_labels (used);
}
void Go::used_labels (std::set<label_t> & used) const
{
- switch (type)
- {
- case EMPTY:
- case DOT:
- break;
- case SWITCH_IF:
- info.switchif->used_labels (used);
- break;
- case BITMAP:
- info.bitmap->used_labels (used);
- break;
- case CPGOTO:
- info.cpgoto->used_labels (used);
- break;
- }
+ switch (type)
+ {
+ case EMPTY:
+ case DOT:
+ break;
+ case SWITCH_IF:
+ info.switchif->used_labels (used);
+ break;
+ case BITMAP:
+ info.bitmap->used_labels (used);
+ break;
+ case CPGOTO:
+ info.cpgoto->used_labels (used);
+ break;
+ }
}
} // namespace re2c
std::string output_expr_peek(const opt_t *opts)
{
- return opts->input_api == INPUT_DEFAULT
- ? "*" + opts->yycursor
- : opts->yypeek + " ()";
+ return opts->input_api == INPUT_DEFAULT
+ ? "*" + opts->yycursor
+ : opts->yypeek + " ()";
}
std::string output_restore(uint32_t ind, const opt_t *opts)
{
- std::string s = opts->input_api == INPUT_DEFAULT
- ? opts->yycursor + " = " + opts->yymarker
- : opts->yyrestore + " ()";
- return indent(ind, opts->indString) + s + ";\n";
+ std::string s = opts->input_api == INPUT_DEFAULT
+ ? opts->yycursor + " = " + opts->yymarker
+ : opts->yyrestore + " ()";
+ return indent(ind, opts->indString) + s + ";\n";
}
std::string output_expr_lessthan(size_t n, const opt_t *opts)
{
- std::ostringstream s;
- if (opts->input_api == INPUT_CUSTOM) {
- s << opts->yylessthan << " (" << n << ")";
- } else if (n == 1) {
- s << opts->yylimit << " <= " << opts->yycursor;
- } else {
- s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n;
- }
- return s.str ();
+ std::ostringstream s;
+ if (opts->input_api == INPUT_CUSTOM) {
+ s << opts->yylessthan << " (" << n << ")";
+ } else if (n == 1) {
+ s << opts->yylimit << " <= " << opts->yycursor;
+ } else {
+ s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n;
+ }
+ return s.str ();
}
static std::string yych_conv(const opt_t *opts)
{
- return opts->yychConversion
- ? "(" + opts->yyctype + ")"
- : "";
+ return opts->yychConversion
+ ? "(" + opts->yyctype + ")"
+ : "";
}
void output_peek(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- o << indent(ind, opts->indString) << opts->yych << " = " << yych_conv(opts);
- if (opts->input_api == INPUT_CUSTOM) {
- o << opts->yypeek << " ()";
- } else {
- o << "*" << opts->yycursor;
- }
- o << ";\n";
+ o << indent(ind, opts->indString) << opts->yych << " = " << yych_conv(opts);
+ if (opts->input_api == INPUT_CUSTOM) {
+ o << opts->yypeek << " ()";
+ } else {
+ o << "*" << opts->yycursor;
+ }
+ o << ";\n";
}
void output_skip(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- o << indent(ind, opts->indString);
- if (opts->input_api == INPUT_CUSTOM) {
- o << opts->yyskip << " ()";
- } else {
- o << "++" << opts->yycursor;
- }
- o << ";\n";
+ o << indent(ind, opts->indString);
+ if (opts->input_api == INPUT_CUSTOM) {
+ o << opts->yyskip << " ()";
+ } else {
+ o << "++" << opts->yycursor;
+ }
+ o << ";\n";
}
void output_backup(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- o << indent(ind, opts->indString);
- if (opts->input_api == INPUT_CUSTOM) {
- o << opts->yybackup << " ()";
- } else {
- o << opts->yymarker << " = " << opts->yycursor;
- }
- o << ";\n";
+ o << indent(ind, opts->indString);
+ if (opts->input_api == INPUT_CUSTOM) {
+ o << opts->yybackup << " ()";
+ } else {
+ o << opts->yymarker << " = " << opts->yycursor;
+ }
+ o << ";\n";
}
void output_skip_peek(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yych << " = "
- << yych_conv(opts) << "*++" << opts->yycursor << ";\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yych << " = "
+ << yych_conv(opts) << "*++" << opts->yycursor << ";\n";
}
void output_peek_skip(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yych << " = "
- << yych_conv(opts) << "*" << opts->yycursor << "++;\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yych << " = "
+ << yych_conv(opts) << "*" << opts->yycursor << "++;\n";
}
void output_skip_backup(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yymarker << " = ++"
- << opts->yycursor << ";\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yymarker << " = ++"
+ << opts->yycursor << ";\n";
}
void output_backup_skip(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yymarker << " = "
- << opts->yycursor << "++;\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yymarker << " = "
+ << opts->yycursor << "++;\n";
}
void output_backup_peek(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yych << " = "
- << yych_conv(opts) << "*(" << opts->yymarker << " = "
- << opts->yycursor << ");\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yych << " = "
+ << yych_conv(opts) << "*(" << opts->yymarker << " = "
+ << opts->yycursor << ");\n";
}
void output_skip_backup_peek(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yych << " = "
- << yych_conv(opts) << "*(" << opts->yymarker << " = ++"
- << opts->yycursor << ");\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yych << " = "
+ << yych_conv(opts) << "*(" << opts->yymarker << " = ++"
+ << opts->yycursor << ");\n";
}
void output_backup_peek_skip(std::ostream &o, uint32_t ind, const opt_t *opts)
{
- assert(opts->input_api == INPUT_DEFAULT);
- o << indent(ind, opts->indString) << opts->yych << " = "
- << yych_conv(opts) << "*(" << opts->yymarker << " = "
- << opts->yycursor << "++);\n";
+ assert(opts->input_api == INPUT_DEFAULT);
+ o << indent(ind, opts->indString) << opts->yych << " = "
+ << yych_conv(opts) << "*(" << opts->yymarker << " = "
+ << opts->yycursor << "++);\n";
}
} // end namespace re2c
enum input_api_t
{
- INPUT_DEFAULT,
- INPUT_CUSTOM
+ INPUT_DEFAULT,
+ INPUT_CUSTOM
};
std::string output_expr_peek (const opt_t *opts);
const uint32_t label_t::FIRST = 0;
label_t::label_t ()
- : value (FIRST)
+ : value (FIRST)
{}
void label_t::inc ()
{
- ++value;
+ ++value;
}
label_t label_t::first ()
{
- return label_t ();
+ return label_t ();
}
bool label_t::operator < (const label_t & l) const
{
- return value < l.value;
+ return value < l.value;
}
uint32_t label_t::width () const
{
- uint32_t v = value;
- uint32_t n = 0;
- while (v /= 10) ++n;
- return n;
+ uint32_t v = value;
+ uint32_t n = 0;
+ while (v /= 10) ++n;
+ return n;
}
std::ostream & operator << (std::ostream & o, label_t l)
{
- o << l.value;
- return o;
+ o << l.value;
+ return o;
}
} // namespace re2c
// - get next label
class label_t
{
- static const uint32_t FIRST;
- uint32_t value;
- label_t ();
- void inc ();
+ static const uint32_t FIRST;
+ uint32_t value;
+ label_t ();
+ void inc ();
public:
- static label_t first ();
- bool operator < (const label_t & l) const;
- uint32_t width () const;
- friend std::ostream & operator << (std::ostream & o, label_t l);
+ static label_t first ();
+ bool operator < (const label_t & l) const;
+ uint32_t width () const;
+ friend std::ostream & operator << (std::ostream & o, label_t l);
- friend class counter_t<label_t>;
+ friend class counter_t<label_t>;
};
} // namespace re2c
{
OutputFragment::OutputFragment (type_t t, uint32_t i)
- : type (t)
- , stream ()
- , indent (i)
+ : type (t)
+ , stream ()
+ , indent (i)
{}
OutputFragment::~OutputFragment()
{
- if (type == STAGS || type == MTAGS) {
- delete tags;
- } else if (type == LINE_INFO_INPUT) {
- delete line_info;
- }
+ if (type == STAGS || type == MTAGS) {
+ delete tags;
+ } else if (type == LINE_INFO_INPUT) {
+ delete line_info;
+ }
}
uint32_t OutputFragment::count_lines () const
{
- uint32_t lines = 0;
- const std::string content = stream.str ();
- const char * p = content.c_str ();
- for (uint32_t i = 0; i < content.size (); ++i)
- {
- if (p[i] == '\n')
- {
- ++lines;
- }
- }
- return lines;
+ uint32_t lines = 0;
+ const std::string content = stream.str ();
+ const char * p = content.c_str ();
+ for (uint32_t i = 0; i < content.size (); ++i)
+ {
+ if (p[i] == '\n')
+ {
+ ++lines;
+ }
+ }
+ return lines;
}
OutputBlock::OutputBlock ()
- : fragments ()
- , used_yyaccept (false)
- , have_user_code (false)
- , line (0)
- , types ()
- , stags ()
- , mtags ()
- , opts(NULL)
+ : fragments ()
+ , used_yyaccept (false)
+ , have_user_code (false)
+ , line (0)
+ , types ()
+ , stags ()
+ , mtags ()
+ , opts(NULL)
{
- fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
+ fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
}
OutputBlock::~OutputBlock ()
{
- for (unsigned int i = 0; i < fragments.size (); ++i)
- {
- delete fragments[i];
- }
- delete opts;
+ for (unsigned int i = 0; i < fragments.size (); ++i)
+ {
+ delete fragments[i];
+ }
+ delete opts;
}
OutputFile::OutputFile(Warn &w)
- : blocks ()
- , label_counter ()
- , fill_index(0)
- , state_goto(false)
- , cond_goto(false)
- , warn_condition_order(true)
- , warn(w)
+ : blocks ()
+ , label_counter ()
+ , fill_index(0)
+ , state_goto(false)
+ , cond_goto(false)
+ , warn_condition_order(true)
+ , warn(w)
{}
OutputFile::~OutputFile ()
{
- for (unsigned int i = 0; i < blocks.size(); ++i) {
- delete blocks[i];
- }
+ for (unsigned int i = 0; i < blocks.size(); ++i) {
+ delete blocks[i];
+ }
}
OutputBlock& OutputFile::block()
{
- return *blocks.back();
+ return *blocks.back();
}
std::ostream & OutputFile::stream ()
{
- return block().fragments.back ()->stream;
+ return block().fragments.back ()->stream;
}
OutputFile &OutputFile::wraw(const char *s, const char *e)
{
- if (s != e && block().opts->target == TARGET_CODE) {
- insert_code();
-
- // scan for non-whitespace characters
- bool &code = block().have_user_code;
- for (const char *p = s; !code && p < e; ++p) {
- code = !isspace(*p);
- }
-
- // convert CR LF to LF
- std::ostream &o = stream();
- for (const char *p = s;; ++p) {
- std::streamsize l = p - s;
- if (p == e) {
- o.write(s, l);
- break;
- } else if (*p == '\n') {
- if (p > s && p[-1] == '\r') --l;
- o.write(s, l);
- s = p;
- }
- }
- }
- return *this;
+ if (s != e && block().opts->target == TARGET_CODE) {
+ insert_code();
+
+ // scan for non-whitespace characters
+ bool &code = block().have_user_code;
+ for (const char *p = s; !code && p < e; ++p) {
+ code = !isspace(*p);
+ }
+
+ // convert CR LF to LF
+ std::ostream &o = stream();
+ for (const char *p = s;; ++p) {
+ std::streamsize l = p - s;
+ if (p == e) {
+ o.write(s, l);
+ break;
+ } else if (*p == '\n') {
+ if (p > s && p[-1] == '\r') --l;
+ o.write(s, l);
+ s = p;
+ }
+ }
+ }
+ return *this;
}
OutputFile & OutputFile::wu32_hex (uint32_t n)
{
- insert_code();
- prtHex(stream(), n, block().opts->encoding.szCodeUnit());
- return *this;
+ insert_code();
+ prtHex(stream(), n, block().opts->encoding.szCodeUnit());
+ return *this;
}
OutputFile & OutputFile::wc_hex (uint32_t n)
{
- insert_code();
- const opt_t *opts = block().opts;
- const Enc &e = opts->encoding;
- prtChOrHex(stream(), n, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT);
- return *this;
+ insert_code();
+ const opt_t *opts = block().opts;
+ const Enc &e = opts->encoding;
+ prtChOrHex(stream(), n, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT);
+ return *this;
}
OutputFile & OutputFile::wrange (uint32_t l, uint32_t u)
{
- insert_code();
- const opt_t *opts = block().opts;
- const Enc &e = opts->encoding;
- printSpan(stream(), l, u, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT);
- return *this;
+ insert_code();
+ const opt_t *opts = block().opts;
+ const Enc &e = opts->encoding;
+ printSpan(stream(), l, u, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT);
+ return *this;
}
OutputFile & OutputFile::wu32_width (uint32_t n, int w)
{
- insert_code();
- stream () << std::setw (w);
- stream () << n;
- return *this;
+ insert_code();
+ stream () << std::setw (w);
+ stream () << n;
+ return *this;
}
OutputFile & OutputFile::wversion_time ()
{
- insert_code();
- output_version_time(stream(), block().opts->version, !block().opts->bNoGenerationDate);
- return *this;
+ insert_code();
+ output_version_time(stream(), block().opts->version, !block().opts->bNoGenerationDate);
+ return *this;
}
OutputFile & OutputFile::wuser_start_label ()
{
- insert_code();
- const std::string label = block().opts->startlabel;
- if (!label.empty())
- {
- wstring(label).ws(":\n");
- }
- return *this;
+ insert_code();
+ const std::string label = block().opts->startlabel;
+ if (!label.empty())
+ {
+ wstring(label).ws(":\n");
+ }
+ return *this;
}
OutputFile & OutputFile::wc (char c)
{
- insert_code();
- stream () << c;
- return *this;
+ insert_code();
+ stream () << c;
+ return *this;
}
OutputFile & OutputFile::wu32 (uint32_t n)
{
- insert_code();
- stream () << n;
- return *this;
+ insert_code();
+ stream () << n;
+ return *this;
}
OutputFile & OutputFile::wu64 (uint64_t n)
{
- insert_code();
- stream () << n;
- return *this;
+ insert_code();
+ stream () << n;
+ return *this;
}
OutputFile & OutputFile::wstring (const std::string & s)
{
- insert_code();
- stream () << s;
- return *this;
+ insert_code();
+ stream () << s;
+ return *this;
}
OutputFile & OutputFile::ws (const char * s)
{
- insert_code();
- stream () << s;
- return *this;
+ insert_code();
+ stream () << s;
+ return *this;
}
OutputFile & OutputFile::wlabel (label_t l)
{
- insert_code();
- stream () << l;
- return *this;
+ insert_code();
+ stream () << l;
+ return *this;
}
OutputFile & OutputFile::wind (uint32_t ind)
{
- insert_code();
- stream () << indent(ind, block().opts->indString);
- return *this;
+ insert_code();
+ stream () << indent(ind, block().opts->indString);
+ return *this;
}
void OutputFile::insert_code ()
{
- if (block().fragments.back()->type != OutputFragment::CODE) {
- block().fragments.push_back(new OutputFragment(OutputFragment::CODE, 0));
- }
+ if (block().fragments.back()->type != OutputFragment::CODE) {
+ block().fragments.push_back(new OutputFragment(OutputFragment::CODE, 0));
+ }
}
OutputFile &OutputFile::wdelay_tags(const ConfTags *cf, bool mtags)
{
- if (block().opts->target == TARGET_CODE) {
- OutputFragment *frag = new OutputFragment(
- mtags ? OutputFragment::MTAGS : OutputFragment::STAGS, 0);
- frag->tags = cf;
- blocks.back()->fragments.push_back(frag);
- }
- return *this;
+ if (block().opts->target == TARGET_CODE) {
+ OutputFragment *frag = new OutputFragment(
+ mtags ? OutputFragment::MTAGS : OutputFragment::STAGS, 0);
+ frag->tags = cf;
+ blocks.back()->fragments.push_back(frag);
+ }
+ return *this;
}
OutputFile & OutputFile::wdelay_line_info_input (uint32_t l, const std::string &fn)
{
- OutputFragment *frag = new OutputFragment(OutputFragment::LINE_INFO_INPUT, 0);
- frag->line_info = new LineInfo(l, fn);
- blocks.back()->fragments.push_back(frag);
- return *this;
+ OutputFragment *frag = new OutputFragment(OutputFragment::LINE_INFO_INPUT, 0);
+ frag->line_info = new LineInfo(l, fn);
+ blocks.back()->fragments.push_back(frag);
+ return *this;
}
OutputFile & OutputFile::wdelay_line_info_output ()
{
- block().fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO_OUTPUT, 0));
- return *this;
+ block().fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO_OUTPUT, 0));
+ return *this;
}
OutputFile & OutputFile::wdelay_cond_goto(uint32_t ind)
{
- if (block().opts->cFlag && !cond_goto) {
- block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind));
- cond_goto = true;
- }
- return *this;
+ if (block().opts->cFlag && !cond_goto) {
+ block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind));
+ cond_goto = true;
+ }
+ return *this;
}
OutputFile & OutputFile::wdelay_cond_table(uint32_t ind)
{
- if (block().opts->gFlag && block().opts->cFlag && !cond_goto) {
- block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind));
- }
- return *this;
+ if (block().opts->gFlag && block().opts->cFlag && !cond_goto) {
+ block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind));
+ }
+ return *this;
}
OutputFile & OutputFile::wdelay_state_goto (uint32_t ind)
{
- if (block().opts->target == TARGET_CODE
- && block().opts->fFlag && !state_goto) {
- block().fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind));
- state_goto = true;
- }
- return *this;
+ if (block().opts->target == TARGET_CODE
+ && block().opts->fFlag && !state_goto) {
+ block().fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind));
+ state_goto = true;
+ }
+ return *this;
}
OutputFile & OutputFile::wdelay_types ()
{
- if (block().opts->target == TARGET_CODE) {
- warn_condition_order = false; // see note [condition order]
- block().fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0));
- }
- return *this;
+ if (block().opts->target == TARGET_CODE) {
+ warn_condition_order = false; // see note [condition order]
+ block().fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0));
+ }
+ return *this;
}
OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind)
{
- block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
- return *this;
+ block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
+ return *this;
}
OutputFile & OutputFile::wdelay_yymaxfill ()
{
- if (block().opts->target == TARGET_CODE) {
- block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0));
- }
- return *this;
+ if (block().opts->target == TARGET_CODE) {
+ block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0));
+ }
+ return *this;
}
OutputFile& OutputFile::wdelay_yymaxnmatch()
{
- if (block().opts->target == TARGET_CODE
- && block().opts->posix_captures) {
- block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXNMATCH, 0));
- }
- return *this;
+ if (block().opts->target == TARGET_CODE
+ && block().opts->posix_captures) {
+ block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXNMATCH, 0));
+ }
+ return *this;
}
OutputFile& OutputFile::wdelay_skip(uint32_t ind, bool skip)
{
- if (skip) {
- OutputFragment *f = new OutputFragment(OutputFragment::SKIP, ind);
- block().fragments.push_back(f);
- }
- return *this;
+ if (skip) {
+ OutputFragment *f = new OutputFragment(OutputFragment::SKIP, ind);
+ block().fragments.push_back(f);
+ }
+ return *this;
}
OutputFile& OutputFile::wdelay_peek(uint32_t ind, bool peek)
{
- if (peek) {
- OutputFragment *f = new OutputFragment(OutputFragment::PEEK, ind);
- block().fragments.push_back(f);
- }
- return *this;
+ if (peek) {
+ OutputFragment *f = new OutputFragment(OutputFragment::PEEK, ind);
+ block().fragments.push_back(f);
+ }
+ return *this;
}
OutputFile& OutputFile::wdelay_backup(uint32_t ind, bool backup)
{
- if (backup) {
- OutputFragment *f = new OutputFragment(OutputFragment::BACKUP, ind);
- block().fragments.push_back(f);
- }
- return *this;
+ if (backup) {
+ OutputFragment *f = new OutputFragment(OutputFragment::BACKUP, ind);
+ block().fragments.push_back(f);
+ }
+ return *this;
}
void OutputFile::new_block(Opt &opts)
{
- OutputBlock *b = new OutputBlock;
- b->opts = opts.snapshot();
- blocks.push_back(b);
+ OutputBlock *b = new OutputBlock;
+ b->opts = opts.snapshot();
+ blocks.push_back(b);
- // start label hapens to be the only option
- // that must be reset for each new block
- opts.reset_startlabel();
+ // start label hapens to be the only option
+ // that must be reset for each new block
+ opts.reset_startlabel();
}
void OutputFile::fix_first_block_opts()
{
- // If the initial block contains only whitespace and no user code,
- // then re2c options specified in the first re2c block are also
- // applied to the initial block.
- if (blocks.size() >= 2) {
- OutputBlock
- *fst = blocks[0],
- *snd = blocks[1];
- if (!fst->have_user_code) {
- *const_cast<opt_t *>(fst->opts) = *snd->opts;
- }
- }
+ // If the initial block contains only whitespace and no user code,
+ // then re2c options specified in the first re2c block are also
+ // applied to the initial block.
+ if (blocks.size() >= 2) {
+ OutputBlock
+ *fst = blocks[0],
+ *snd = blocks[1];
+ if (!fst->have_user_code) {
+ *const_cast<opt_t *>(fst->opts) = *snd->opts;
+ }
+ }
}
void OutputFile::global_lists(uniq_vector_t<std::string> &types,
- std::set<std::string> &stags, std::set<std::string> &mtags) const
+ std::set<std::string> &stags, std::set<std::string> &mtags) const
{
- for (unsigned int i = 0; i < blocks.size(); ++i) {
- const std::vector<std::string> &cs = blocks[i]->types;
- for (size_t j = 0; j < cs.size(); ++j) {
- types.find_or_add(cs[j]);
- }
+ for (unsigned int i = 0; i < blocks.size(); ++i) {
+ const std::vector<std::string> &cs = blocks[i]->types;
+ for (size_t j = 0; j < cs.size(); ++j) {
+ types.find_or_add(cs[j]);
+ }
- const std::set<std::string>
- &st = blocks[i]->stags,
- &mt = blocks[i]->mtags;
- stags.insert(st.begin(), st.end());
- mtags.insert(mt.begin(), mt.end());
- }
+ const std::set<std::string>
+ &st = blocks[i]->stags,
+ &mt = blocks[i]->mtags;
+ stags.insert(st.begin(), st.end());
+ mtags.insert(mt.begin(), mt.end());
+ }
}
static void foldexpr(std::vector<OutputFragment*> &frags)
{
- const size_t n = frags.size();
- for (size_t i = 0; i < n;) {
-
- if (i + 2 < n) {
- OutputFragment::type_t
- &x = frags[i]->type,
- &y = frags[i + 1]->type,
- &z = frags[i + 2]->type;
- if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK && z == OutputFragment::SKIP) {
- x = OutputFragment::BACKUP_PEEK_SKIP;
- y = z = OutputFragment::EMPTY;
- i += 3;
- continue;
- } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP && z == OutputFragment::PEEK) {
- x = OutputFragment::SKIP_BACKUP_PEEK;
- y = z = OutputFragment::EMPTY;
- i += 3;
- continue;
- }
- }
-
- if (i + 1 < n) {
- OutputFragment::type_t
- &x = frags[i]->type,
- &y = frags[i + 1]->type;
- if (x == OutputFragment::PEEK && y == OutputFragment::SKIP) {
- x = OutputFragment::PEEK_SKIP;
- y = OutputFragment::EMPTY;
- i += 2;
- continue;
- } else if (x == OutputFragment::SKIP && y == OutputFragment::PEEK) {
- x = OutputFragment::SKIP_PEEK;
- y = OutputFragment::EMPTY;
- i += 2;
- continue;
- } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP) {
- x = OutputFragment::SKIP_BACKUP;
- y = OutputFragment::EMPTY;
- i += 2;
- continue;
- } else if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK) {
- x = OutputFragment::BACKUP_PEEK;
- y = OutputFragment::EMPTY;
- i += 2;
- continue;
- } else if (x == OutputFragment::BACKUP && y == OutputFragment::SKIP) {
- x = OutputFragment::BACKUP_SKIP;
- y = OutputFragment::EMPTY;
- i += 2;
- continue;
- }
- }
-
- ++i;
- }
+ const size_t n = frags.size();
+ for (size_t i = 0; i < n;) {
+
+ if (i + 2 < n) {
+ OutputFragment::type_t
+ &x = frags[i]->type,
+ &y = frags[i + 1]->type,
+ &z = frags[i + 2]->type;
+ if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK && z == OutputFragment::SKIP) {
+ x = OutputFragment::BACKUP_PEEK_SKIP;
+ y = z = OutputFragment::EMPTY;
+ i += 3;
+ continue;
+ } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP && z == OutputFragment::PEEK) {
+ x = OutputFragment::SKIP_BACKUP_PEEK;
+ y = z = OutputFragment::EMPTY;
+ i += 3;
+ continue;
+ }
+ }
+
+ if (i + 1 < n) {
+ OutputFragment::type_t
+ &x = frags[i]->type,
+ &y = frags[i + 1]->type;
+ if (x == OutputFragment::PEEK && y == OutputFragment::SKIP) {
+ x = OutputFragment::PEEK_SKIP;
+ y = OutputFragment::EMPTY;
+ i += 2;
+ continue;
+ } else if (x == OutputFragment::SKIP && y == OutputFragment::PEEK) {
+ x = OutputFragment::SKIP_PEEK;
+ y = OutputFragment::EMPTY;
+ i += 2;
+ continue;
+ } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP) {
+ x = OutputFragment::SKIP_BACKUP;
+ y = OutputFragment::EMPTY;
+ i += 2;
+ continue;
+ } else if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK) {
+ x = OutputFragment::BACKUP_PEEK;
+ y = OutputFragment::EMPTY;
+ i += 2;
+ continue;
+ } else if (x == OutputFragment::BACKUP && y == OutputFragment::SKIP) {
+ x = OutputFragment::BACKUP_SKIP;
+ y = OutputFragment::EMPTY;
+ i += 2;
+ continue;
+ }
+ }
+
+ ++i;
+ }
}
bool OutputFile::emit(const uniq_vector_t<std::string> &global_types,
- const std::set<std::string> &global_stags,
- const std::set<std::string> &global_mtags,
- size_t max_fill, size_t max_nmatch)
-{
- FILE *file = NULL;
- std::string filename = block().opts->output_file;
- if (filename.empty()) {
- filename = "<stdout>";
- file = stdout;
- } else {
- file = fopen(filename.c_str(), "w");
- if (!file) {
- error("cannot open output file: %s", filename.c_str());
- return false;
- }
- }
-
- fix_first_block_opts();
-
- unsigned int line_count = 1;
- for (unsigned int j = 0; j < blocks.size(); ++j) {
- OutputBlock & b = * blocks[j];
- const opt_t *bopt = b.opts;
-
- if (bopt->input_api == INPUT_DEFAULT) {
- foldexpr(b.fragments);
- }
-
- const size_t n = b.fragments.size();
- for (size_t i = 0; i < n; ++i) {
- OutputFragment & f = * b.fragments[i];
- std::ostringstream &o = f.stream;
- const uint32_t ind = f.indent ? f.indent : bopt->topIndent;
-
- switch (f.type) {
- case OutputFragment::EMPTY:
- case OutputFragment::CODE: break;
- case OutputFragment::LINE_INFO_INPUT:
- output_line_info(o, f.line_info->line, f.line_info->filename, bopt->iFlag);
- break;
- case OutputFragment::LINE_INFO_OUTPUT:
- output_line_info(o, line_count + 1, filename, bopt->iFlag);
- break;
- case OutputFragment::COND_GOTO:
- output_cond_goto(o, ind, b.types,
- bopt, warn, warn_condition_order, b.line);
- break;
- case OutputFragment::COND_TABLE:
- output_cond_table(o, ind, b.types, bopt);
- break;
- case OutputFragment::STATE_GOTO:
- output_state_goto(o, ind, 0, fill_index, bopt);
- break;
- case OutputFragment::STAGS:
- output_tags(o, ind, *f.tags, global_stags, bopt);
- break;
- case OutputFragment::MTAGS:
- output_tags(o, ind, *f.tags, global_mtags, bopt);
- break;
- case OutputFragment::TYPES:
- output_types(o, ind, block().opts, global_types);
- break;
- case OutputFragment::YYACCEPT_INIT:
- output_yyaccept_init(o, ind, b.used_yyaccept, bopt);
- break;
- case OutputFragment::YYMAXFILL:
- output_yymaxfill(o, ind, max_fill, bopt);
- break;
- case OutputFragment::YYMAXNMATCH:
- output_yymaxnmatch(o, ind, max_nmatch, bopt);
- break;
- case OutputFragment::SKIP:
- output_skip(o, ind, bopt);
- break;
- case OutputFragment::PEEK:
- output_peek(o, ind, bopt);
- break;
- case OutputFragment::BACKUP:
- output_backup(o, ind, bopt);
- break;
- case OutputFragment::PEEK_SKIP:
- output_peek_skip(o, ind, bopt);
- break;
- case OutputFragment::SKIP_PEEK:
- output_skip_peek(o, ind, bopt);
- break;
- case OutputFragment::SKIP_BACKUP:
- output_skip_backup(o, ind, bopt);
- break;
- case OutputFragment::BACKUP_SKIP:
- output_backup_skip(o, ind, bopt);
- break;
- case OutputFragment::BACKUP_PEEK:
- output_backup_peek(o, ind, bopt);
- break;
- case OutputFragment::BACKUP_PEEK_SKIP:
- output_backup_peek_skip(o, ind, bopt);
- break;
- case OutputFragment::SKIP_BACKUP_PEEK:
- output_skip_backup_peek(o, ind, bopt);
- break;
- }
-
- std::string content = o.str();
- fwrite(content.c_str(), 1, content.size(), file);
- line_count += f.count_lines();
- }
- }
-
- fclose(file);
- return true;
+ const std::set<std::string> &global_stags,
+ const std::set<std::string> &global_mtags,
+ size_t max_fill, size_t max_nmatch)
+{
+ FILE *file = NULL;
+ std::string filename = block().opts->output_file;
+ if (filename.empty()) {
+ filename = "<stdout>";
+ file = stdout;
+ } else {
+ file = fopen(filename.c_str(), "w");
+ if (!file) {
+ error("cannot open output file: %s", filename.c_str());
+ return false;
+ }
+ }
+
+ fix_first_block_opts();
+
+ unsigned int line_count = 1;
+ for (unsigned int j = 0; j < blocks.size(); ++j) {
+ OutputBlock & b = * blocks[j];
+ const opt_t *bopt = b.opts;
+
+ if (bopt->input_api == INPUT_DEFAULT) {
+ foldexpr(b.fragments);
+ }
+
+ const size_t n = b.fragments.size();
+ for (size_t i = 0; i < n; ++i) {
+ OutputFragment & f = * b.fragments[i];
+ std::ostringstream &o = f.stream;
+ const uint32_t ind = f.indent ? f.indent : bopt->topIndent;
+
+ switch (f.type) {
+ case OutputFragment::EMPTY:
+ case OutputFragment::CODE: break;
+ case OutputFragment::LINE_INFO_INPUT:
+ output_line_info(o, f.line_info->line, f.line_info->filename, bopt->iFlag);
+ break;
+ case OutputFragment::LINE_INFO_OUTPUT:
+ output_line_info(o, line_count + 1, filename, bopt->iFlag);
+ break;
+ case OutputFragment::COND_GOTO:
+ output_cond_goto(o, ind, b.types,
+ bopt, warn, warn_condition_order, b.line);
+ break;
+ case OutputFragment::COND_TABLE:
+ output_cond_table(o, ind, b.types, bopt);
+ break;
+ case OutputFragment::STATE_GOTO:
+ output_state_goto(o, ind, 0, fill_index, bopt);
+ break;
+ case OutputFragment::STAGS:
+ output_tags(o, ind, *f.tags, global_stags, bopt);
+ break;
+ case OutputFragment::MTAGS:
+ output_tags(o, ind, *f.tags, global_mtags, bopt);
+ break;
+ case OutputFragment::TYPES:
+ output_types(o, ind, block().opts, global_types);
+ break;
+ case OutputFragment::YYACCEPT_INIT:
+ output_yyaccept_init(o, ind, b.used_yyaccept, bopt);
+ break;
+ case OutputFragment::YYMAXFILL:
+ output_yymaxfill(o, ind, max_fill, bopt);
+ break;
+ case OutputFragment::YYMAXNMATCH:
+ output_yymaxnmatch(o, ind, max_nmatch, bopt);
+ break;
+ case OutputFragment::SKIP:
+ output_skip(o, ind, bopt);
+ break;
+ case OutputFragment::PEEK:
+ output_peek(o, ind, bopt);
+ break;
+ case OutputFragment::BACKUP:
+ output_backup(o, ind, bopt);
+ break;
+ case OutputFragment::PEEK_SKIP:
+ output_peek_skip(o, ind, bopt);
+ break;
+ case OutputFragment::SKIP_PEEK:
+ output_skip_peek(o, ind, bopt);
+ break;
+ case OutputFragment::SKIP_BACKUP:
+ output_skip_backup(o, ind, bopt);
+ break;
+ case OutputFragment::BACKUP_SKIP:
+ output_backup_skip(o, ind, bopt);
+ break;
+ case OutputFragment::BACKUP_PEEK:
+ output_backup_peek(o, ind, bopt);
+ break;
+ case OutputFragment::BACKUP_PEEK_SKIP:
+ output_backup_peek_skip(o, ind, bopt);
+ break;
+ case OutputFragment::SKIP_BACKUP_PEEK:
+ output_skip_backup_peek(o, ind, bopt);
+ break;
+ }
+
+ std::string content = o.str();
+ fwrite(content.c_str(), 1, content.size(), file);
+ line_count += f.count_lines();
+ }
+ }
+
+ fclose(file);
+ return true;
}
bool HeaderFile::emit(const opt_t *opts, const uniq_vector_t<std::string> &types)
{
- const std::string &filename = opts->header_file;
- if (filename.empty()) return true;
+ const std::string &filename = opts->header_file;
+ if (filename.empty()) return true;
- FILE *file = fopen(filename.c_str(), "w");
- if (!file) {
- error("cannot open header file: %s", filename.c_str());
- return false;
- }
+ FILE *file = fopen(filename.c_str(), "w");
+ if (!file) {
+ error("cannot open header file: %s", filename.c_str());
+ return false;
+ }
- output_version_time(stream, opts->version, !opts->bNoGenerationDate);
- output_line_info(stream, 3, filename, opts->iFlag);
- stream << "\n";
- output_types(stream, 0, opts, types);
+ output_version_time(stream, opts->version, !opts->bNoGenerationDate);
+ output_line_info(stream, 3, filename, opts->iFlag);
+ stream << "\n";
+ output_types(stream, 0, opts, types);
- std::string content = stream.str();
- fwrite(content.c_str(), 1, content.size(), file);
+ std::string content = stream.str();
+ fwrite(content.c_str(), 1, content.size(), file);
- fclose(file);
- return true;
+ fclose(file);
+ return true;
}
Output::Output(Warn &w)
- : source(w)
- , header()
- , skeletons()
- , max_fill(1)
- , max_nmatch(1)
+ : source(w)
+ , header()
+ , skeletons()
+ , max_fill(1)
+ , max_nmatch(1)
{}
bool Output::emit()
{
- if (source.warn.error()) {
- return false;
- }
+ if (source.warn.error()) {
+ return false;
+ }
- uniq_vector_t<std::string> types;
- std::set<std::string> stags, mtags;
- source.global_lists(types, stags, mtags);
+ uniq_vector_t<std::string> types;
+ std::set<std::string> stags, mtags;
+ source.global_lists(types, stags, mtags);
- // global options are last block's options
- const opt_t *opts = source.block().opts;
+ // global options are last block's options
+ const opt_t *opts = source.block().opts;
- return source.emit(types, stags, mtags, max_fill, max_nmatch)
- && header.emit(opts, types);
+ return source.emit(types, stags, mtags, max_fill, max_nmatch)
+ && header.emit(opts, types);
}
void output_tags(std::ostream &o, uint32_t ind, const ConfTags &conf,
- const std::set<std::string> &tags, const opt_t *opts)
-{
- std::set<std::string>::const_iterator
- tag = tags.begin(),
- end = tags.end();
- o << indent(ind, opts->indString);
- for (;tag != end;) {
- std::string fmt = conf.format;
- strrreplace(fmt, "@@", *tag);
- o << fmt;
- if (++tag == end) {
- break;
- }
- o << conf.separator;
- }
+ const std::set<std::string> &tags, const opt_t *opts)
+{
+ std::set<std::string>::const_iterator
+ tag = tags.begin(),
+ end = tags.end();
+ o << indent(ind, opts->indString);
+ for (;tag != end;) {
+ std::string fmt = conf.format;
+ strrreplace(fmt, "@@", *tag);
+ o << fmt;
+ if (++tag == end) {
+ break;
+ }
+ o << conf.separator;
+ }
}
void output_state_goto(std::ostream & o, uint32_t ind,
- uint32_t start_label, uint32_t fill_index, const opt_t *opts)
-{
- const std::string
- indstr = indent(ind, opts->indString),
- getstate = opts->state_get_naked
- ? opts->state_get
- : opts->state_get + "()";
-
- o << indstr << "switch (" << getstate << ") {\n";
- if (opts->bUseStateAbort)
- {
- o << indstr << "default: abort();\n";
- o << indstr << "case -1: goto " << opts->labelPrefix << start_label << ";\n";
- }
- else
- {
- o << indstr << "default: goto " << opts->labelPrefix << start_label << ";\n";
- }
- for (uint32_t i = 0; i < fill_index; ++i)
- {
- o << indstr << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n";
- }
- o << indstr << "}\n";
- if (opts->bUseStateNext)
- {
- o << opts->yynext << ":\n";
- }
+ uint32_t start_label, uint32_t fill_index, const opt_t *opts)
+{
+ const std::string
+ indstr = indent(ind, opts->indString),
+ getstate = opts->state_get_naked
+ ? opts->state_get
+ : opts->state_get + "()";
+
+ o << indstr << "switch (" << getstate << ") {\n";
+ if (opts->bUseStateAbort)
+ {
+ o << indstr << "default: abort();\n";
+ o << indstr << "case -1: goto " << opts->labelPrefix << start_label << ";\n";
+ }
+ else
+ {
+ o << indstr << "default: goto " << opts->labelPrefix << start_label << ";\n";
+ }
+ for (uint32_t i = 0; i < fill_index; ++i)
+ {
+ o << indstr << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n";
+ }
+ o << indstr << "}\n";
+ if (opts->bUseStateNext)
+ {
+ o << opts->yynext << ":\n";
+ }
}
void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept, const opt_t *opts)
{
- if (used_yyaccept)
- {
- o << indent(ind, opts->indString) << "unsigned int " << opts->yyaccept << " = 0;\n";
- }
+ if (used_yyaccept)
+ {
+ o << indent(ind, opts->indString) << "unsigned int " << opts->yyaccept << " = 0;\n";
+ }
}
void output_yymaxfill(std::ostream &o, uint32_t ind,
- size_t max_fill, const opt_t *opts)
+ size_t max_fill, const opt_t *opts)
{
- o << indent(ind, opts->indString) << "#define YYMAXFILL " << max_fill << "\n";
+ o << indent(ind, opts->indString) << "#define YYMAXFILL " << max_fill << "\n";
}
void output_yymaxnmatch(std::ostream &o, uint32_t ind,
- size_t max_nmatch, const opt_t *opts)
+ size_t max_nmatch, const opt_t *opts)
{
- o << indent(ind, opts->indString) << "#define YYMAXNMATCH " << max_nmatch << "\n";
+ o << indent(ind, opts->indString) << "#define YYMAXNMATCH " << max_nmatch << "\n";
}
void output_line_info(std::ostream &o, uint32_t line,
- const std::string &fname, bool iflag)
+ const std::string &fname, bool iflag)
{
- if (!iflag) {
- o << "#line " << line << " \"" << fname << "\"\n";
- }
+ if (!iflag) {
+ o << "#line " << line << " \"" << fname << "\"\n";
+ }
}
void output_types(std::ostream &o, uint32_t ind, const opt_t *opts,
- const uniq_vector_t<std::string> &types)
+ const uniq_vector_t<std::string> &types)
{
- const std::string indstr = opts->indString;
- o << indent(ind++, indstr) << "enum " << opts->yycondtype << " {\n";
- for (size_t i = 0; i < types.size(); ++i) {
- o << indent(ind, indstr) << opts->condEnumPrefix << types[i] << ",\n";
- }
- o << indent(--ind, indstr) << "};\n";
+ const std::string indstr = opts->indString;
+ o << indent(ind++, indstr) << "enum " << opts->yycondtype << " {\n";
+ for (size_t i = 0; i < types.size(); ++i) {
+ o << indent(ind, indstr) << opts->condEnumPrefix << types[i] << ",\n";
+ }
+ o << indent(--ind, indstr) << "};\n";
}
void output_version_time(std::ostream &o, bool version, bool date)
{
- o << "/* Generated by re2c";
- if (version) {
- o << " " << PACKAGE_VERSION;
- }
- if (date) {
- o << " on ";
- time_t now = time (NULL);
- o.write (ctime (&now), 24);
- }
- o << " */" << "\n";
+ o << "/* Generated by re2c";
+ if (version) {
+ o << " " << PACKAGE_VERSION;
+ }
+ if (date) {
+ o << " on ";
+ time_t now = time (NULL);
+ o.write (ctime (&now), 24);
+ }
+ o << " */" << "\n";
}
/*
static std::string output_cond_get(const opt_t *opts)
{
- return opts->cond_get + (opts->cond_get_naked ? "" : "()");
+ return opts->cond_get + (opts->cond_get_naked ? "" : "()");
}
static void output_cond_goto_binary(std::ostream &o, uint32_t ind,
- const std::vector<std::string> &conds, const opt_t *opts,
- size_t lower, size_t upper)
+ const std::vector<std::string> &conds, const opt_t *opts,
+ size_t lower, size_t upper)
{
- const std::string indstr = indent(ind, opts->indString);
+ const std::string indstr = indent(ind, opts->indString);
- if (lower == upper) {
- o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n";
- } else {
- const size_t middle = lower + (upper - lower + 1) / 2;
- o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n";
- output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1);
- o << indstr << "} else {\n";
- output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper);
- o << indstr << "}\n";
- }
+ if (lower == upper) {
+ o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n";
+ } else {
+ const size_t middle = lower + (upper - lower + 1) / 2;
+ o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n";
+ output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1);
+ o << indstr << "} else {\n";
+ output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper);
+ o << indstr << "}\n";
+ }
}
void output_cond_goto(std::ostream &o, uint32_t ind,
- const std::vector<std::string> &conds, const opt_t *opts,
- Warn &warn, bool warn_cond_order, uint32_t line)
-{
- const size_t ncond = conds.size();
- const std::string indstr = indent(ind, opts->indString);
-
- if (opts->target == TARGET_DOT) {
- for (size_t i = 0; i < ncond; ++i) {
- const std::string &cond = conds[i];
- o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n";
- }
- return;
- }
-
- if (opts->gFlag) {
- o << indstr << "goto *" << opts->yyctable
- << "[" << output_cond_get(opts) << "];\n";
- } else if (opts->sFlag) {
- if (ncond == 1) warn_cond_order = false;
- output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1);
- } else {
- warn_cond_order = false;
- o << indstr << "switch (" << output_cond_get(opts) << ") {\n";
- for (size_t i = 0; i < ncond; ++i) {
- const std::string &cond = conds[i];
- o << indstr << "case " << opts->condEnumPrefix << cond
- <<": goto " << opts->condPrefix << cond << ";\n";
- }
- o << indstr << "}\n";
- }
-
- warn_cond_order &= opts->header_file.empty();
-
- // see note [condition order]
- if (warn_cond_order) warn.condition_order(line);
+ const std::vector<std::string> &conds, const opt_t *opts,
+ Warn &warn, bool warn_cond_order, uint32_t line)
+{
+ const size_t ncond = conds.size();
+ const std::string indstr = indent(ind, opts->indString);
+
+ if (opts->target == TARGET_DOT) {
+ for (size_t i = 0; i < ncond; ++i) {
+ const std::string &cond = conds[i];
+ o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n";
+ }
+ return;
+ }
+
+ if (opts->gFlag) {
+ o << indstr << "goto *" << opts->yyctable
+ << "[" << output_cond_get(opts) << "];\n";
+ } else if (opts->sFlag) {
+ if (ncond == 1) warn_cond_order = false;
+ output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1);
+ } else {
+ warn_cond_order = false;
+ o << indstr << "switch (" << output_cond_get(opts) << ") {\n";
+ for (size_t i = 0; i < ncond; ++i) {
+ const std::string &cond = conds[i];
+ o << indstr << "case " << opts->condEnumPrefix << cond
+ <<": goto " << opts->condPrefix << cond << ";\n";
+ }
+ o << indstr << "}\n";
+ }
+
+ warn_cond_order &= opts->header_file.empty();
+
+ // see note [condition order]
+ if (warn_cond_order) warn.condition_order(line);
}
void output_cond_table(std::ostream &o, uint32_t ind,
- const std::vector<std::string> &conds, const opt_t *opts)
+ const std::vector<std::string> &conds, const opt_t *opts)
{
- const size_t ncond = conds.size();
- const std::string indstr = opts->indString;
+ const size_t ncond = conds.size();
+ const std::string indstr = opts->indString;
- o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n";
- for (size_t i = 0; i < ncond; ++i) {
- o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n";
- }
- o << indent(--ind, indstr) << "};\n";
+ o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n";
+ for (size_t i = 0; i < ncond; ++i) {
+ o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n";
+ }
+ o << indent(--ind, indstr) << "};\n";
}
} // namespace re2c
struct ConfTags
{
- std::string format;
- std::string separator;
+ std::string format;
+ std::string separator;
- ConfTags(const std::string &f, const std::string &s)
- : format(f), separator(s) {}
+ ConfTags(const std::string &f, const std::string &s)
+ : format(f), separator(s) {}
};
struct LineInfo
{
- uint32_t line;
- std::string filename;
+ uint32_t line;
+ std::string filename;
- LineInfo(uint32_t l, const std::string &fn)
- : line(l), filename(fn) {}
+ LineInfo(uint32_t l, const std::string &fn)
+ : line(l), filename(fn) {}
};
struct OutputFragment
{
- enum type_t
- { CODE
-// , CONFIG
- , COND_GOTO
- , COND_TABLE
- , LINE_INFO_INPUT
- , LINE_INFO_OUTPUT
- , STATE_GOTO
- , STAGS
- , MTAGS
- , TYPES
- , YYACCEPT_INIT
- , YYMAXFILL
- , YYMAXNMATCH
- , SKIP
- , PEEK
- , BACKUP
- , PEEK_SKIP
- , SKIP_PEEK
- , SKIP_BACKUP
- , BACKUP_SKIP
- , BACKUP_PEEK
- , BACKUP_PEEK_SKIP
- , SKIP_BACKUP_PEEK
- , EMPTY
- };
-
- type_t type;
- std::ostringstream stream;
- uint32_t indent;
- union
- {
- const ConfTags *tags;
- const LineInfo *line_info;
- };
-
- OutputFragment (type_t t, uint32_t i);
- ~OutputFragment ();
- uint32_t count_lines () const;
+ enum type_t
+ { CODE
+// , CONFIG
+ , COND_GOTO
+ , COND_TABLE
+ , LINE_INFO_INPUT
+ , LINE_INFO_OUTPUT
+ , STATE_GOTO
+ , STAGS
+ , MTAGS
+ , TYPES
+ , YYACCEPT_INIT
+ , YYMAXFILL
+ , YYMAXNMATCH
+ , SKIP
+ , PEEK
+ , BACKUP
+ , PEEK_SKIP
+ , SKIP_PEEK
+ , SKIP_BACKUP
+ , BACKUP_SKIP
+ , BACKUP_PEEK
+ , BACKUP_PEEK_SKIP
+ , SKIP_BACKUP_PEEK
+ , EMPTY
+ };
+
+ type_t type;
+ std::ostringstream stream;
+ uint32_t indent;
+ union
+ {
+ const ConfTags *tags;
+ const LineInfo *line_info;
+ };
+
+ OutputFragment (type_t t, uint32_t i);
+ ~OutputFragment ();
+ uint32_t count_lines () const;
};
struct OutputBlock
{
- std::vector<OutputFragment *> fragments;
- bool used_yyaccept;
- bool have_user_code;
- uint32_t line;
- std::vector<std::string> types;
- std::set<std::string> stags;
- std::set<std::string> mtags;
- const opt_t *opts;
-
- OutputBlock ();
- ~OutputBlock ();
- FORBID_COPY(OutputBlock);
+ std::vector<OutputFragment *> fragments;
+ bool used_yyaccept;
+ bool have_user_code;
+ uint32_t line;
+ std::vector<std::string> types;
+ std::set<std::string> stags;
+ std::set<std::string> mtags;
+ const opt_t *opts;
+
+ OutputBlock ();
+ ~OutputBlock ();
+ FORBID_COPY(OutputBlock);
};
class OutputFile
{
- std::vector<OutputBlock *> blocks;
+ std::vector<OutputBlock *> blocks;
public:
- counter_t<label_t> label_counter;
- uint32_t fill_index;
- bool state_goto;
- bool cond_goto;
- bool warn_condition_order;
- Warn &warn;
-
- explicit OutputFile(Warn &w);
- ~OutputFile();
-
- std::ostream & stream ();
- OutputBlock &block();
- void insert_code ();
- bool open ();
- void new_block(Opt &opts);
-
- // immediate output
- OutputFile & wraw (const char *s, const char *e);
- OutputFile & wc (char c);
- OutputFile & wc_hex (uint32_t n);
- OutputFile & wu32 (uint32_t n);
- OutputFile & wu32_hex (uint32_t n);
- OutputFile & wu32_width (uint32_t n, int w);
- OutputFile & wu64 (uint64_t n);
- OutputFile & wstring (const std::string & s);
- OutputFile & ws (const char * s);
- OutputFile & wlabel (label_t l);
- OutputFile & wrange (uint32_t u, uint32_t l);
- OutputFile & wversion_time ();
- OutputFile & wuser_start_label ();
- OutputFile & wind (uint32_t ind);
-
- // delayed output
- OutputFile & wdelay_tags(const ConfTags *cf, bool mtags);
- OutputFile & wdelay_line_info_input (uint32_t l, const std::string &fn);
- OutputFile & wdelay_line_info_output ();
- OutputFile & wdelay_cond_goto(uint32_t ind);
- OutputFile & wdelay_cond_table(uint32_t ind);
- OutputFile & wdelay_state_goto (uint32_t ind);
- OutputFile & wdelay_types ();
- OutputFile & wdelay_yyaccept_init (uint32_t ind);
- OutputFile & wdelay_yymaxfill ();
- OutputFile& wdelay_yymaxnmatch();
- OutputFile& wdelay_skip(uint32_t ind, bool skip);
- OutputFile& wdelay_peek(uint32_t ind, bool peek);
- OutputFile& wdelay_backup(uint32_t ind, bool backup);
-
- void fix_first_block_opts();
- void global_lists(uniq_vector_t<std::string> &types,
- std::set<std::string> &stags, std::set<std::string> &mtags) const;
-
- bool emit(const uniq_vector_t<std::string> &global_types,
- const std::set<std::string> &global_stags,
- const std::set<std::string> &global_mtags,
- size_t max_fill, size_t max_nmatch);
-
- FORBID_COPY (OutputFile);
+ counter_t<label_t> label_counter;
+ uint32_t fill_index;
+ bool state_goto;
+ bool cond_goto;
+ bool warn_condition_order;
+ Warn &warn;
+
+ explicit OutputFile(Warn &w);
+ ~OutputFile();
+
+ std::ostream & stream ();
+ OutputBlock &block();
+ void insert_code ();
+ bool open ();
+ void new_block(Opt &opts);
+
+ // immediate output
+ OutputFile & wraw (const char *s, const char *e);
+ OutputFile & wc (char c);
+ OutputFile & wc_hex (uint32_t n);
+ OutputFile & wu32 (uint32_t n);
+ OutputFile & wu32_hex (uint32_t n);
+ OutputFile & wu32_width (uint32_t n, int w);
+ OutputFile & wu64 (uint64_t n);
+ OutputFile & wstring (const std::string & s);
+ OutputFile & ws (const char * s);
+ OutputFile & wlabel (label_t l);
+ OutputFile & wrange (uint32_t u, uint32_t l);
+ OutputFile & wversion_time ();
+ OutputFile & wuser_start_label ();
+ OutputFile & wind (uint32_t ind);
+
+ // delayed output
+ OutputFile & wdelay_tags(const ConfTags *cf, bool mtags);
+ OutputFile & wdelay_line_info_input (uint32_t l, const std::string &fn);
+ OutputFile & wdelay_line_info_output ();
+ OutputFile & wdelay_cond_goto(uint32_t ind);
+ OutputFile & wdelay_cond_table(uint32_t ind);
+ OutputFile & wdelay_state_goto (uint32_t ind);
+ OutputFile & wdelay_types ();
+ OutputFile & wdelay_yyaccept_init (uint32_t ind);
+ OutputFile & wdelay_yymaxfill ();
+ OutputFile& wdelay_yymaxnmatch();
+ OutputFile& wdelay_skip(uint32_t ind, bool skip);
+ OutputFile& wdelay_peek(uint32_t ind, bool peek);
+ OutputFile& wdelay_backup(uint32_t ind, bool backup);
+
+ void fix_first_block_opts();
+ void global_lists(uniq_vector_t<std::string> &types,
+ std::set<std::string> &stags, std::set<std::string> &mtags) const;
+
+ bool emit(const uniq_vector_t<std::string> &global_types,
+ const std::set<std::string> &global_stags,
+ const std::set<std::string> &global_mtags,
+ size_t max_fill, size_t max_nmatch);
+
+ FORBID_COPY (OutputFile);
};
class HeaderFile
{
- std::ostringstream stream;
+ std::ostringstream stream;
public:
- HeaderFile(): stream() {}
- bool emit(const opt_t *opts, const uniq_vector_t<std::string> &types);
- FORBID_COPY (HeaderFile);
+ HeaderFile(): stream() {}
+ bool emit(const opt_t *opts, const uniq_vector_t<std::string> &types);
+ FORBID_COPY (HeaderFile);
};
struct Output
{
- OutputFile source;
- HeaderFile header;
- std::set<std::string> skeletons;
- size_t max_fill;
- size_t max_nmatch;
-
- explicit Output(Warn &w);
- bool emit();
+ OutputFile source;
+ HeaderFile header;
+ std::set<std::string> skeletons;
+ size_t max_fill;
+ size_t max_nmatch;
+
+ explicit Output(Warn &w);
+ bool emit();
};
void output_tags (std::ostream &o, uint32_t ind, const ConfTags &conf, const std::set<std::string> &tags, const opt_t *opts);
static bool is_space(uint32_t c)
{
- switch (c) {
- case '\t':
- case '\f':
- case '\v':
- case '\n':
- case '\r':
- case ' ': return true;
- default: return false;
- }
+ switch (c) {
+ case '\t':
+ case '\f':
+ case '\v':
+ case '\n':
+ case '\r':
+ case ' ': return true;
+ default: return false;
+ }
}
static inline char hex(uint32_t c)
{
- static const char * sHex = "0123456789ABCDEF";
- return sHex[c & 0x0F];
+ static const char * sHex = "0123456789ABCDEF";
+ return sHex[c & 0x0F];
}
static void prtCh(std::ostream& o, uint32_t c, bool dot)
{
- switch (c) {
- case '\'': o << (dot ? "'" : "\\'"); break;
- case '"': o << (dot ? "\\\"" : "\""); break;
- case '\n': o << (dot ? "\\\\n" : "\\n"); break;
- case '\t': o << (dot ? "\\\\t" : "\\t"); break;
- case '\v': o << (dot ? "\\\\v" : "\\v"); break;
- case '\b': o << (dot ? "\\\\b" : "\\b"); break;
- case '\r': o << (dot ? "\\\\r" : "\\r"); break;
- case '\f': o << (dot ? "\\\\f" : "\\f"); break;
- case '\a': o << (dot ? "\\\\a" : "\\a"); break;
- case '\\': o << "\\\\"; break; // both .dot and C/C++ code expect "\\"
- default: o << static_cast<char> (c); break;
- }
+ switch (c) {
+ case '\'': o << (dot ? "'" : "\\'"); break;
+ case '"': o << (dot ? "\\\"" : "\""); break;
+ case '\n': o << (dot ? "\\\\n" : "\\n"); break;
+ case '\t': o << (dot ? "\\\\t" : "\\t"); break;
+ case '\v': o << (dot ? "\\\\v" : "\\v"); break;
+ case '\b': o << (dot ? "\\\\b" : "\\b"); break;
+ case '\r': o << (dot ? "\\\\r" : "\\r"); break;
+ case '\f': o << (dot ? "\\\\f" : "\\f"); break;
+ case '\a': o << (dot ? "\\\\a" : "\\a"); break;
+ case '\\': o << "\\\\"; break; // both .dot and C/C++ code expect "\\"
+ default: o << static_cast<char> (c); break;
+ }
}
bool is_print(uint32_t c)
{
- return c >= 0x20 && c < 0x7F;
+ return c >= 0x20 && c < 0x7F;
}
void prtHex(std::ostream& o, uint32_t c, uint32_t szcunit)
{
- o << "0x";
+ o << "0x";
- if (szcunit >= 4) {
- o << hex(c >> 28u) << hex(c >> 24u) << hex(c >> 20u) << hex(c >> 16u);
- }
+ if (szcunit >= 4) {
+ o << hex(c >> 28u) << hex(c >> 24u) << hex(c >> 20u) << hex(c >> 16u);
+ }
- if (szcunit >= 2) {
- o << hex(c >> 12u) << hex(c >> 8u);
- }
+ if (szcunit >= 2) {
+ o << hex(c >> 12u) << hex(c >> 8u);
+ }
- o << hex(c >> 4u) << hex(c);
+ o << hex(c >> 4u) << hex(c);
}
void prtChOrHex(std::ostream& o, uint32_t c, uint32_t szcunit, bool ebcdic, bool dot)
{
- if (!ebcdic && (is_print(c) || is_space(c))) {
- o << '\'';
- prtCh(o, c, dot);
- o << '\'';
- } else {
- prtHex(o, c, szcunit);
- }
+ if (!ebcdic && (is_print(c) || is_space(c))) {
+ o << '\'';
+ prtCh(o, c, dot);
+ o << '\'';
+ } else {
+ prtHex(o, c, szcunit);
+ }
}
static void prtChOrHexForSpan(std::ostream& o, uint32_t c, uint32_t szcunit, bool ebcdic, bool dot)
{
- if (!ebcdic && c != ']' && is_print(c)) {
- prtCh(o, c, dot);
- } else {
- prtHex(o, c, szcunit);
- }
+ if (!ebcdic && c != ']' && is_print(c)) {
+ prtCh(o, c, dot);
+ } else {
+ prtHex(o, c, szcunit);
+ }
}
void printSpan(std::ostream& o, uint32_t l, uint32_t u, uint32_t szcunit, bool ebcdic, bool dot)
{
- o << "[";
- prtChOrHexForSpan(o, l, szcunit, ebcdic, dot);
- if (u - l > 1) {
- o << "-";
- prtChOrHexForSpan(o, u - 1, szcunit, ebcdic, dot);
- }
- o << "]";
+ o << "[";
+ prtChOrHexForSpan(o, l, szcunit, ebcdic, dot);
+ if (u - l > 1) {
+ o << "-";
+ prtChOrHexForSpan(o, u - 1, szcunit, ebcdic, dot);
+ }
+ o << "]";
}
} // end namespace re2c
static std::string make_name(const std::string &cond, uint32_t line)
{
- std::ostringstream os;
- os << "line" << line;
- std::string name = os.str();
- if (!cond.empty ())
- {
- name += "_";
- name += cond;
- }
- return name;
+ std::ostringstream os;
+ os << "line" << line;
+ std::string name = os.str();
+ if (!cond.empty ())
+ {
+ name += "_";
+ name += cond;
+ }
+ return name;
}
static smart_ptr<DFA> ast_to_dfa(const spec_t &spec, Output &output)
{
- const opt_t *opts = output.source.block().opts;
- Warn &warn = output.source.warn;
- const std::vector<ASTRule> &rules = spec.rules;
- const size_t defrule = spec.defs.empty()
- ? Rule::NONE
- : rules.size() - 1;
- const uint32_t line = output.source.block().line;
- const std::string
- &cond = spec.name,
- name = make_name(cond, line),
- &setup = spec.setup.empty() ? "" : spec.setup[0]->text;
-
- RESpec re(rules, opts, warn);
- split_charset(re);
- find_fixed_tags(re);
- insert_default_tags(re);
- warn_nullable(re, cond);
-
- nfa_t nfa(re);
- if (opts->dump_nfa) dump_nfa(nfa);
-
- dfa_t dfa(nfa, opts, cond, warn);
- if (opts->dump_dfa_det) dump_dfa(dfa);
-
- // skeleton must be constructed after DFA construction
- // but prior to any other DFA transformations
- Skeleton skeleton(dfa, opts, defrule, name, cond, line);
- warn_undefined_control_flow(skeleton, warn);
- if (opts->target == TARGET_SKELETON) {
- emit_data(skeleton);
- }
-
- cutoff_dead_rules(dfa, defrule, cond, warn);
-
- insert_fallback_tags(dfa);
-
- // try to minimize the number of tag variables
- compact_and_optimize_tags(dfa, opts->optimize_tags);
- if (opts->dump_dfa_tagopt) dump_dfa(dfa);
-
- freeze_tags(dfa);
-
- minimization(dfa, opts->dfa_minimization);
- if (opts->dump_dfa_min) dump_dfa(dfa);
-
- // find YYFILL states and calculate argument to YYFILL
- std::vector<size_t> fill;
- fillpoints(dfa, fill);
-
- // ADFA stands for 'DFA with actions'
- DFA *adfa = new DFA(dfa, fill, defrule, skeleton.sizeof_key,
- name, cond, line, setup);
-
- // see note [reordering DFA states]
- adfa->reorder();
-
- // skeleton is constructed, do further DFA transformations
- adfa->prepare(opts);
- if (opts->dump_adfa) dump_adfa(*adfa);
-
- // finally gather overall DFA statistics
- adfa->calc_stats(line, opts->tags);
-
- // accumulate global statistics from this particular DFA
- output.max_fill = std::max(output.max_fill, adfa->max_fill);
- output.max_nmatch = std::max(output.max_nmatch, adfa->max_nmatch);
- if (adfa->need_accept)
- {
- output.source.block().used_yyaccept = true;
- }
-
- return make_smart_ptr(adfa);
+ const opt_t *opts = output.source.block().opts;
+ Warn &warn = output.source.warn;
+ const std::vector<ASTRule> &rules = spec.rules;
+ const size_t defrule = spec.defs.empty()
+ ? Rule::NONE
+ : rules.size() - 1;
+ const uint32_t line = output.source.block().line;
+ const std::string
+ &cond = spec.name,
+ name = make_name(cond, line),
+ &setup = spec.setup.empty() ? "" : spec.setup[0]->text;
+
+ RESpec re(rules, opts, warn);
+ split_charset(re);
+ find_fixed_tags(re);
+ insert_default_tags(re);
+ warn_nullable(re, cond);
+
+ nfa_t nfa(re);
+ if (opts->dump_nfa) dump_nfa(nfa);
+
+ dfa_t dfa(nfa, opts, cond, warn);
+ if (opts->dump_dfa_det) dump_dfa(dfa);
+
+ // skeleton must be constructed after DFA construction
+ // but prior to any other DFA transformations
+ Skeleton skeleton(dfa, opts, defrule, name, cond, line);
+ warn_undefined_control_flow(skeleton, warn);
+ if (opts->target == TARGET_SKELETON) {
+ emit_data(skeleton);
+ }
+
+ cutoff_dead_rules(dfa, defrule, cond, warn);
+
+ insert_fallback_tags(dfa);
+
+ // try to minimize the number of tag variables
+ compact_and_optimize_tags(dfa, opts->optimize_tags);
+ if (opts->dump_dfa_tagopt) dump_dfa(dfa);
+
+ freeze_tags(dfa);
+
+ minimization(dfa, opts->dfa_minimization);
+ if (opts->dump_dfa_min) dump_dfa(dfa);
+
+ // find YYFILL states and calculate argument to YYFILL
+ std::vector<size_t> fill;
+ fillpoints(dfa, fill);
+
+ // ADFA stands for 'DFA with actions'
+ DFA *adfa = new DFA(dfa, fill, defrule, skeleton.sizeof_key,
+ name, cond, line, setup);
+
+ // see note [reordering DFA states]
+ adfa->reorder();
+
+ // skeleton is constructed, do further DFA transformations
+ adfa->prepare(opts);
+ if (opts->dump_adfa) dump_adfa(*adfa);
+
+ // finally gather overall DFA statistics
+ adfa->calc_stats(line, opts->tags);
+
+ // accumulate global statistics from this particular DFA
+ output.max_fill = std::max(output.max_fill, adfa->max_fill);
+ output.max_nmatch = std::max(output.max_nmatch, adfa->max_nmatch);
+ if (adfa->need_accept)
+ {
+ output.source.block().used_yyaccept = true;
+ }
+
+ return make_smart_ptr(adfa);
}
void compile(Scanner &input, Output &output, Opt &opts)
{
- specs_t rspecs;
- symtab_t symtab;
- const conopt_t *globopts = &opts.glob;
- const opt_t *ropts = NULL;
- OutputFile &o = output.source;
- typedef std::vector<smart_ptr<DFA> > dfas_t;
-
- o.new_block(opts);
- o.wversion_time();
- o.wdelay_line_info_input(input.get_cline(), input.get_fname());
- if (globopts->target == TARGET_SKELETON) {
- emit_prolog(o);
- }
-
- for (;;) {
- // parse everything up to the next re2c block
- Scanner::ParseMode mode = input.echo(o);
- if (mode == Scanner::Stop) break;
- validate_mode(mode, globopts->rFlag, ropts, input);
-
- // parse the next re2c block
- specs_t specs;
- if (mode == Scanner::Reuse) {
- specs = rspecs;
- opts.restore(ropts);
- opts.reset_mapCodeName();
- o.label_counter.reset();
- o.fill_index = 0;
- o.state_goto = false;
- o.cond_goto = false;
- }
- parse(input, specs, symtab, opts);
-
- // start new output block with accumulated options
- o.new_block(opts);
-
- if (mode == Scanner::Rules) {
- // save AST and options for future use
- rspecs = specs;
- ropts = o.block().opts;
- } else {
- validate_ast(specs, globopts->cFlag);
- normalize_ast(specs);
-
- // compile AST to DFA
- o.block().line = input.get_cline();
- dfas_t dfas;
- for (specs_t::const_iterator i = specs.begin(); i != specs.end(); ++i) {
- dfas.push_back(ast_to_dfa(*i, output));
- }
-
- // compile DFA to code
- bool prolog = false;
- uint32_t ind = o.block().opts->topIndent;
- for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
- (*i)->emit(output, ind, (i + 1) == dfas.end(), prolog);
- }
- }
-
- o.wdelay_line_info_input(input.get_cline(), input.get_fname());
- }
-
- if (globopts->target == TARGET_SKELETON) {
- emit_epilog (o, output.skeletons);
- }
-
- AST::flist.clear();
- Code::flist.clear();
- Range::vFreeList.clear();
- RangeSuffix::freeList.clear();
+ specs_t rspecs;
+ symtab_t symtab;
+ const conopt_t *globopts = &opts.glob;
+ const opt_t *ropts = NULL;
+ OutputFile &o = output.source;
+ typedef std::vector<smart_ptr<DFA> > dfas_t;
+
+ o.new_block(opts);
+ o.wversion_time();
+ o.wdelay_line_info_input(input.get_cline(), input.get_fname());
+ if (globopts->target == TARGET_SKELETON) {
+ emit_prolog(o);
+ }
+
+ for (;;) {
+ // parse everything up to the next re2c block
+ Scanner::ParseMode mode = input.echo(o);
+ if (mode == Scanner::Stop) break;
+ validate_mode(mode, globopts->rFlag, ropts, input);
+
+ // parse the next re2c block
+ specs_t specs;
+ if (mode == Scanner::Reuse) {
+ specs = rspecs;
+ opts.restore(ropts);
+ opts.reset_mapCodeName();
+ o.label_counter.reset();
+ o.fill_index = 0;
+ o.state_goto = false;
+ o.cond_goto = false;
+ }
+ parse(input, specs, symtab, opts);
+
+ // start new output block with accumulated options
+ o.new_block(opts);
+
+ if (mode == Scanner::Rules) {
+ // save AST and options for future use
+ rspecs = specs;
+ ropts = o.block().opts;
+ } else {
+ validate_ast(specs, globopts->cFlag);
+ normalize_ast(specs);
+
+ // compile AST to DFA
+ o.block().line = input.get_cline();
+ dfas_t dfas;
+ for (specs_t::const_iterator i = specs.begin(); i != specs.end(); ++i) {
+ dfas.push_back(ast_to_dfa(*i, output));
+ }
+
+ // compile DFA to code
+ bool prolog = false;
+ uint32_t ind = o.block().opts->topIndent;
+ for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
+ (*i)->emit(output, ind, (i + 1) == dfas.end(), prolog);
+ }
+ }
+
+ o.wdelay_line_info_input(input.get_cline(), input.get_fname());
+ }
+
+ if (globopts->target == TARGET_SKELETON) {
+ emit_epilog (o, output.skeletons);
+ }
+
+ AST::flist.clear();
+ Code::flist.clear();
+ Range::vFreeList.clear();
+ RangeSuffix::freeList.clear();
}
} // namespace re2c
void error (const char * fmt, ...)
{
- fprintf (stderr, "re2c: error: ");
+ fprintf (stderr, "re2c: error: ");
- va_list args;
- va_start (args, fmt);
- vfprintf (stderr, fmt, args);
- va_end (args);
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
- fprintf (stderr, "\n");
+ fprintf (stderr, "\n");
}
void fatal(const char *fmt, ...)
{
- fprintf (stderr, "re2c: error: ");
+ fprintf (stderr, "re2c: error: ");
- va_list args;
- va_start (args, fmt);
- vfprintf (stderr, fmt, args);
- va_end (args);
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
- fprintf (stderr, "\n");
- exit(1);
+ fprintf (stderr, "\n");
+ exit(1);
}
void fatal_l(uint32_t line, const char *fmt, ...)
{
- fprintf (stderr, "re2c: error: line %u: ", line);
+ fprintf (stderr, "re2c: error: line %u: ", line);
- va_list args;
- va_start (args, fmt);
- vfprintf (stderr, fmt, args);
- va_end (args);
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
- fprintf (stderr, "\n");
- exit(1);
+ fprintf (stderr, "\n");
+ exit(1);
}
void fatal_lc(uint32_t line, uint32_t column, const char *fmt, ...)
{
- fprintf (stderr, "re2c: error: line %u, column %u: ", line, column);
+ fprintf (stderr, "re2c: error: line %u, column %u: ", line, column);
- va_list args;
- va_start (args, fmt);
- vfprintf (stderr, fmt, args);
- va_end (args);
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
- fprintf (stderr, "\n");
- exit(1);
+ fprintf (stderr, "\n");
+ exit(1);
}
void error_arg (const char * option)
{
- error ("expected argument to option %s", option);
+ error ("expected argument to option %s", option);
}
void warning_start (uint32_t line, bool error)
{
- const char * msg = error ? "error" : "warning";
- fprintf (stderr, "re2c: %s: line %u: ", msg, line);
+ const char * msg = error ? "error" : "warning";
+ fprintf (stderr, "re2c: %s: line %u: ", msg, line);
}
void warning_end (const char * type, bool error)
{
- if (type != NULL)
- {
- const char * prefix = error ? "error-" : "";
- fprintf (stderr, " [-W%s%s]", prefix, type);
- }
- fprintf (stderr, "\n");
+ if (type != NULL)
+ {
+ const char * prefix = error ? "error-" : "";
+ fprintf (stderr, " [-W%s%s]", prefix, type);
+ }
+ fprintf (stderr, "\n");
}
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...)
{
- warning_start (line, error);
+ warning_start (line, error);
- va_list args;
- va_start (args, fmt);
- vfprintf (stderr, fmt, args);
- va_end (args);
+ va_list args;
+ va_start (args, fmt);
+ vfprintf (stderr, fmt, args);
+ va_end (args);
- warning_end (type, error);
+ warning_end (type, error);
}
void usage()
{
- fprintf(stdout, "%s", help);
+ fprintf(stdout, "%s", help);
}
void vernum ()
{
- std::string vernum (PACKAGE_VERSION);
- if (vernum[1] == '.')
- {
- vernum.insert(0, "0");
- }
- vernum.erase(2, 1);
- if (vernum[3] == '.')
- {
- vernum.insert(2, "0");
- }
- vernum.erase(4, 1);
- if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
- {
- vernum.insert(4, "0");
- }
- vernum.resize(6, '0');
-
- printf ("%s\n", vernum.c_str ());
+ std::string vernum (PACKAGE_VERSION);
+ if (vernum[1] == '.')
+ {
+ vernum.insert(0, "0");
+ }
+ vernum.erase(2, 1);
+ if (vernum[3] == '.')
+ {
+ vernum.insert(2, "0");
+ }
+ vernum.erase(4, 1);
+ if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
+ {
+ vernum.insert(4, "0");
+ }
+ vernum.resize(6, '0');
+
+ printf ("%s\n", vernum.c_str ());
}
void version ()
{
- printf ("re2c %s\n", PACKAGE_VERSION);
+ printf ("re2c %s\n", PACKAGE_VERSION);
}
std::string incond (const std::string & cond)
{
- std::string s;
- if (!cond.empty ())
- {
- s += "in condition '";
- s += cond;
- s += "' ";
- }
- return s;
+ std::string s;
+ if (!cond.empty ())
+ {
+ s += "in condition '";
+ s += cond;
+ s += "' ";
+ }
+ return s;
}
} // namespace re2c
void conopt_t::fix()
{
- if (target == TARGET_SKELETON) {
- fFlag = false;
- header_file = "";
- }
- if (!cFlag) {
- header_file = "";
- }
+ if (target == TARGET_SKELETON) {
+ fFlag = false;
+ header_file = "";
+ }
+ if (!cFlag) {
+ header_file = "";
+ }
}
void mutopt_t::fix(const conopt_t *globopts)
{
- // some options either make no sense or must have fixed value
- // with current target: reset them to default
- switch (globopts->target) {
- case TARGET_DOT:
- // default code generation options
- sFlag = Opt::baseopt.sFlag;
- bFlag = Opt::baseopt.bFlag;
- gFlag = Opt::baseopt.gFlag;
- cGotoThreshold = Opt::baseopt.cGotoThreshold;
- // default environment-insensitive formatting
- yybmHexTable = Opt::baseopt.yybmHexTable;
- // fallthrough
- case TARGET_SKELETON:
- // default line information
- iFlag = Opt::baseopt.iFlag;
- // default environment-sensitive formatting
- topIndent = Opt::baseopt.topIndent;
- indString = Opt::baseopt.indString;
- condDivider = Opt::baseopt.condDivider;
- condDividerParam = Opt::baseopt.condDividerParam;
- // default environment bindings
- yycondtype = Opt::baseopt.yycondtype;
- cond_get = Opt::baseopt.cond_get;
- cond_get_naked = Opt::baseopt.cond_get_naked;
- cond_set = Opt::baseopt.cond_set;
- cond_set_arg = Opt::baseopt.cond_set_arg;
- cond_set_naked = Opt::baseopt.cond_set_naked;
- yyctable = Opt::baseopt.yyctable;
- condPrefix = Opt::baseopt.condPrefix;
- condEnumPrefix = Opt::baseopt.condEnumPrefix;
- condGoto = Opt::baseopt.condGoto;
- condGotoParam = Opt::baseopt.condGotoParam;
- state_get = Opt::baseopt.state_get;
- state_get_naked = Opt::baseopt.state_get_naked;
- state_set = Opt::baseopt.state_set;
- state_set_arg = Opt::baseopt.state_set_arg;
- state_set_naked = Opt::baseopt.state_set_naked;
- tags_prefix = Opt::baseopt.tags_prefix;
- tags_expression = Opt::baseopt.tags_expression;
- optimize_tags = Opt::baseopt.optimize_tags;
- yyfilllabel = Opt::baseopt.yyfilllabel;
- yynext = Opt::baseopt.yynext;
- yyaccept = Opt::baseopt.yyaccept;
- bUseStateAbort = Opt::baseopt.bUseStateAbort;
- bUseStateNext = Opt::baseopt.bUseStateNext;
- yybm = Opt::baseopt.yybm;
- yytarget = Opt::baseopt.yytarget;
- input_api = Opt::baseopt.input_api;
- yycursor = Opt::baseopt.yycursor;
- yymarker = Opt::baseopt.yymarker;
- yyctxmarker = Opt::baseopt.yyctxmarker;
- yylimit = Opt::baseopt.yylimit;
- yypeek = Opt::baseopt.yypeek;
- yyskip = Opt::baseopt.yyskip;
- yybackup = Opt::baseopt.yybackup;
- yybackupctx = Opt::baseopt.yybackupctx;
- yyrestore = Opt::baseopt.yyrestore;
- yyrestorectx = Opt::baseopt.yyrestorectx;
- yyrestoretag = Opt::baseopt.yyrestoretag;
- yystagn = Opt::baseopt.yystagn;
- yystagp = Opt::baseopt.yystagp;
- yymtagn = Opt::baseopt.yymtagn;
- yymtagp = Opt::baseopt.yymtagp;
- yylessthan = Opt::baseopt.yylessthan;
- dFlag = Opt::baseopt.dFlag;
- yydebug = Opt::baseopt.yydebug;
- yyctype = Opt::baseopt.yyctype;
- yych = Opt::baseopt.yych;
- bEmitYYCh = Opt::baseopt.bEmitYYCh;
- yychConversion = Opt::baseopt.yychConversion;
- fill = Opt::baseopt.fill;
- fill_use = Opt::baseopt.fill_use;
- fill_check = Opt::baseopt.fill_check;
- fill_arg = Opt::baseopt.fill_arg;
- fill_arg_use = Opt::baseopt.fill_arg_use;
- fill_naked = Opt::baseopt.fill_naked;
- labelPrefix = Opt::baseopt.labelPrefix;
- startlabel = Opt::baseopt.startlabel;
- startlabel_force = Opt::baseopt.startlabel_force;
- break;
- case TARGET_CODE:
- break;
- }
+ // some options either make no sense or must have fixed value
+ // with current target: reset them to default
+ switch (globopts->target) {
+ case TARGET_DOT:
+ // default code generation options
+ sFlag = Opt::baseopt.sFlag;
+ bFlag = Opt::baseopt.bFlag;
+ gFlag = Opt::baseopt.gFlag;
+ cGotoThreshold = Opt::baseopt.cGotoThreshold;
+ // default environment-insensitive formatting
+ yybmHexTable = Opt::baseopt.yybmHexTable;
+ // fallthrough
+ case TARGET_SKELETON:
+ // default line information
+ iFlag = Opt::baseopt.iFlag;
+ // default environment-sensitive formatting
+ topIndent = Opt::baseopt.topIndent;
+ indString = Opt::baseopt.indString;
+ condDivider = Opt::baseopt.condDivider;
+ condDividerParam = Opt::baseopt.condDividerParam;
+ // default environment bindings
+ yycondtype = Opt::baseopt.yycondtype;
+ cond_get = Opt::baseopt.cond_get;
+ cond_get_naked = Opt::baseopt.cond_get_naked;
+ cond_set = Opt::baseopt.cond_set;
+ cond_set_arg = Opt::baseopt.cond_set_arg;
+ cond_set_naked = Opt::baseopt.cond_set_naked;
+ yyctable = Opt::baseopt.yyctable;
+ condPrefix = Opt::baseopt.condPrefix;
+ condEnumPrefix = Opt::baseopt.condEnumPrefix;
+ condGoto = Opt::baseopt.condGoto;
+ condGotoParam = Opt::baseopt.condGotoParam;
+ state_get = Opt::baseopt.state_get;
+ state_get_naked = Opt::baseopt.state_get_naked;
+ state_set = Opt::baseopt.state_set;
+ state_set_arg = Opt::baseopt.state_set_arg;
+ state_set_naked = Opt::baseopt.state_set_naked;
+ tags_prefix = Opt::baseopt.tags_prefix;
+ tags_expression = Opt::baseopt.tags_expression;
+ optimize_tags = Opt::baseopt.optimize_tags;
+ yyfilllabel = Opt::baseopt.yyfilllabel;
+ yynext = Opt::baseopt.yynext;
+ yyaccept = Opt::baseopt.yyaccept;
+ bUseStateAbort = Opt::baseopt.bUseStateAbort;
+ bUseStateNext = Opt::baseopt.bUseStateNext;
+ yybm = Opt::baseopt.yybm;
+ yytarget = Opt::baseopt.yytarget;
+ input_api = Opt::baseopt.input_api;
+ yycursor = Opt::baseopt.yycursor;
+ yymarker = Opt::baseopt.yymarker;
+ yyctxmarker = Opt::baseopt.yyctxmarker;
+ yylimit = Opt::baseopt.yylimit;
+ yypeek = Opt::baseopt.yypeek;
+ yyskip = Opt::baseopt.yyskip;
+ yybackup = Opt::baseopt.yybackup;
+ yybackupctx = Opt::baseopt.yybackupctx;
+ yyrestore = Opt::baseopt.yyrestore;
+ yyrestorectx = Opt::baseopt.yyrestorectx;
+ yyrestoretag = Opt::baseopt.yyrestoretag;
+ yystagn = Opt::baseopt.yystagn;
+ yystagp = Opt::baseopt.yystagp;
+ yymtagn = Opt::baseopt.yymtagn;
+ yymtagp = Opt::baseopt.yymtagp;
+ yylessthan = Opt::baseopt.yylessthan;
+ dFlag = Opt::baseopt.dFlag;
+ yydebug = Opt::baseopt.yydebug;
+ yyctype = Opt::baseopt.yyctype;
+ yych = Opt::baseopt.yych;
+ bEmitYYCh = Opt::baseopt.bEmitYYCh;
+ yychConversion = Opt::baseopt.yychConversion;
+ fill = Opt::baseopt.fill;
+ fill_use = Opt::baseopt.fill_use;
+ fill_check = Opt::baseopt.fill_check;
+ fill_arg = Opt::baseopt.fill_arg;
+ fill_arg_use = Opt::baseopt.fill_arg_use;
+ fill_naked = Opt::baseopt.fill_naked;
+ labelPrefix = Opt::baseopt.labelPrefix;
+ startlabel = Opt::baseopt.startlabel;
+ startlabel_force = Opt::baseopt.startlabel_force;
+ break;
+ case TARGET_CODE:
+ break;
+ }
- if (bCaseInsensitive)
- {
- bCaseInverted = Opt::baseopt.bCaseInverted;
- }
+ if (bCaseInsensitive)
+ {
+ bCaseInverted = Opt::baseopt.bCaseInverted;
+ }
- // respect hierarchy
- if (!globopts->cFlag) {
- yycondtype = Opt::baseopt.yycondtype;
- cond_get = Opt::baseopt.cond_get;
- cond_get_naked = Opt::baseopt.cond_get_naked;
- cond_set = Opt::baseopt.cond_set;
- cond_set_arg = Opt::baseopt.cond_set_arg;
- cond_set_naked = Opt::baseopt.cond_set_naked;
- yyctable = Opt::baseopt.yyctable;
- condPrefix = Opt::baseopt.condPrefix;
- condEnumPrefix = Opt::baseopt.condEnumPrefix;
- condDivider = Opt::baseopt.condDivider;
- condDividerParam = Opt::baseopt.condDividerParam;
- condGoto = Opt::baseopt.condGoto;
- condGotoParam = Opt::baseopt.condGotoParam;
- }
- if (!globopts->fFlag) {
- state_get = Opt::baseopt.state_get;
- state_get_naked = Opt::baseopt.state_get_naked;
- state_set = Opt::baseopt.state_set;
- state_set_arg = Opt::baseopt.state_set_arg;
- state_set_naked = Opt::baseopt.state_set_naked;
- yyfilllabel = Opt::baseopt.yyfilllabel;
- yynext = Opt::baseopt.yynext;
- yyaccept = Opt::baseopt.yyaccept;
- bUseStateAbort = Opt::baseopt.bUseStateAbort;
- bUseStateNext = Opt::baseopt.bUseStateNext;
- }
- if (posix_captures) {
- tags = true;
- }
- if (!tags)
- {
- tags_prefix = Opt::baseopt.tags_prefix;
- tags_expression = Opt::baseopt.tags_expression;
- lookahead = Opt::baseopt.lookahead;
- optimize_tags = Opt::baseopt.optimize_tags;
- }
- if (!bFlag)
- {
- yybmHexTable = Opt::baseopt.yybmHexTable;
- yybm = Opt::baseopt.yybm;
- }
- if (!gFlag)
- {
- cGotoThreshold = Opt::baseopt.cGotoThreshold;
- yytarget = Opt::baseopt.yytarget;
- }
- if (input_api != INPUT_DEFAULT)
- {
- yycursor = Opt::baseopt.yycursor;
- yymarker = Opt::baseopt.yymarker;
- yyctxmarker = Opt::baseopt.yyctxmarker;
- yylimit = Opt::baseopt.yylimit;
- }
- if (input_api != INPUT_CUSTOM)
- {
- yypeek = Opt::baseopt.yypeek;
- yyskip = Opt::baseopt.yyskip;
- yybackup = Opt::baseopt.yybackup;
- yybackupctx = Opt::baseopt.yybackupctx;
- yyrestore = Opt::baseopt.yyrestore;
- yyrestorectx = Opt::baseopt.yyrestorectx;
- yyrestoretag = Opt::baseopt.yyrestoretag;
- yystagn = Opt::baseopt.yystagn;
- yystagp = Opt::baseopt.yystagp;
- yymtagn = Opt::baseopt.yymtagn;
- yymtagp = Opt::baseopt.yymtagp;
- }
- if (!dFlag)
- {
- yydebug = Opt::baseopt.yydebug;
- }
- if (!fill_use)
- {
- fill = Opt::baseopt.fill;
- fill_check = Opt::baseopt.fill_check;
- fill_arg = Opt::baseopt.fill_arg;
- fill_arg_use = Opt::baseopt.fill_arg_use;
- fill_naked = Opt::baseopt.fill_naked;
- }
+ // respect hierarchy
+ if (!globopts->cFlag) {
+ yycondtype = Opt::baseopt.yycondtype;
+ cond_get = Opt::baseopt.cond_get;
+ cond_get_naked = Opt::baseopt.cond_get_naked;
+ cond_set = Opt::baseopt.cond_set;
+ cond_set_arg = Opt::baseopt.cond_set_arg;
+ cond_set_naked = Opt::baseopt.cond_set_naked;
+ yyctable = Opt::baseopt.yyctable;
+ condPrefix = Opt::baseopt.condPrefix;
+ condEnumPrefix = Opt::baseopt.condEnumPrefix;
+ condDivider = Opt::baseopt.condDivider;
+ condDividerParam = Opt::baseopt.condDividerParam;
+ condGoto = Opt::baseopt.condGoto;
+ condGotoParam = Opt::baseopt.condGotoParam;
+ }
+ if (!globopts->fFlag) {
+ state_get = Opt::baseopt.state_get;
+ state_get_naked = Opt::baseopt.state_get_naked;
+ state_set = Opt::baseopt.state_set;
+ state_set_arg = Opt::baseopt.state_set_arg;
+ state_set_naked = Opt::baseopt.state_set_naked;
+ yyfilllabel = Opt::baseopt.yyfilllabel;
+ yynext = Opt::baseopt.yynext;
+ yyaccept = Opt::baseopt.yyaccept;
+ bUseStateAbort = Opt::baseopt.bUseStateAbort;
+ bUseStateNext = Opt::baseopt.bUseStateNext;
+ }
+ if (posix_captures) {
+ tags = true;
+ }
+ if (!tags)
+ {
+ tags_prefix = Opt::baseopt.tags_prefix;
+ tags_expression = Opt::baseopt.tags_expression;
+ lookahead = Opt::baseopt.lookahead;
+ optimize_tags = Opt::baseopt.optimize_tags;
+ }
+ if (!bFlag)
+ {
+ yybmHexTable = Opt::baseopt.yybmHexTable;
+ yybm = Opt::baseopt.yybm;
+ }
+ if (!gFlag)
+ {
+ cGotoThreshold = Opt::baseopt.cGotoThreshold;
+ yytarget = Opt::baseopt.yytarget;
+ }
+ if (input_api != INPUT_DEFAULT)
+ {
+ yycursor = Opt::baseopt.yycursor;
+ yymarker = Opt::baseopt.yymarker;
+ yyctxmarker = Opt::baseopt.yyctxmarker;
+ yylimit = Opt::baseopt.yylimit;
+ }
+ if (input_api != INPUT_CUSTOM)
+ {
+ yypeek = Opt::baseopt.yypeek;
+ yyskip = Opt::baseopt.yyskip;
+ yybackup = Opt::baseopt.yybackup;
+ yybackupctx = Opt::baseopt.yybackupctx;
+ yyrestore = Opt::baseopt.yyrestore;
+ yyrestorectx = Opt::baseopt.yyrestorectx;
+ yyrestoretag = Opt::baseopt.yyrestoretag;
+ yystagn = Opt::baseopt.yystagn;
+ yystagp = Opt::baseopt.yystagp;
+ yymtagn = Opt::baseopt.yymtagn;
+ yymtagp = Opt::baseopt.yymtagp;
+ }
+ if (!dFlag)
+ {
+ yydebug = Opt::baseopt.yydebug;
+ }
+ if (!fill_use)
+ {
+ fill = Opt::baseopt.fill;
+ fill_check = Opt::baseopt.fill_check;
+ fill_arg = Opt::baseopt.fill_arg;
+ fill_arg_use = Opt::baseopt.fill_arg_use;
+ fill_naked = Opt::baseopt.fill_naked;
+ }
- // force individual options
- switch (globopts->target) {
- case TARGET_DOT:
- iFlag = true;
- break;
- case TARGET_SKELETON:
- iFlag = true;
- input_api = INPUT_CUSTOM;
- indString = " ";
- topIndent = 2;
- break;
- case TARGET_CODE:
- break;
- }
- switch (encoding.type()) {
- case Enc::UCS2:
- case Enc::UTF16:
- case Enc::UTF32:
- sFlag = true;
- break;
- case Enc::ASCII:
- case Enc::EBCDIC:
- case Enc::UTF8:
- break;
- }
- if (bFlag)
- {
- sFlag = true;
- }
- if (gFlag)
- {
- bFlag = true;
- sFlag = true;
- }
- if (!lookahead) {
- eager_skip = true;
- }
+ // force individual options
+ switch (globopts->target) {
+ case TARGET_DOT:
+ iFlag = true;
+ break;
+ case TARGET_SKELETON:
+ iFlag = true;
+ input_api = INPUT_CUSTOM;
+ indString = " ";
+ topIndent = 2;
+ break;
+ case TARGET_CODE:
+ break;
+ }
+ switch (encoding.type()) {
+ case Enc::UCS2:
+ case Enc::UTF16:
+ case Enc::UTF32:
+ sFlag = true;
+ break;
+ case Enc::ASCII:
+ case Enc::EBCDIC:
+ case Enc::UTF8:
+ break;
+ }
+ if (bFlag)
+ {
+ sFlag = true;
+ }
+ if (gFlag)
+ {
+ bFlag = true;
+ sFlag = true;
+ }
+ if (!lookahead) {
+ eager_skip = true;
+ }
}
const mutopt_t Opt::baseopt;
bool Opt::source (const char *s)
{
- if (source_file)
- {
- error ("multiple source files: %s, %s", source_file, s);
- return false;
- }
- else
- {
- source_file = s;
- return true;
- }
+ if (source_file)
+ {
+ error ("multiple source files: %s, %s", source_file, s);
+ return false;
+ }
+ else
+ {
+ source_file = s;
+ return true;
+ }
}
void Opt::reset_startlabel()
{
- set_startlabel(Opt::baseopt.startlabel);
- set_startlabel_force(Opt::baseopt.startlabel_force);
+ set_startlabel(Opt::baseopt.startlabel);
+ set_startlabel_force(Opt::baseopt.startlabel_force);
}
void Opt::reset_mapCodeName ()
{
- // historically arranged set of names
- // no actual reason why these particular options should be reset
- set_cond_get(Opt::baseopt.cond_get);
- set_cond_set(Opt::baseopt.cond_set);
- set_fill(Opt::baseopt.fill);
- set_state_get(Opt::baseopt.state_get);
- set_state_set(Opt::baseopt.state_set);
- set_yybackup(Opt::baseopt.yybackup);
- set_yybackupctx(Opt::baseopt.yybackupctx);
- set_yycondtype(Opt::baseopt.yycondtype);
- set_yyctxmarker(Opt::baseopt.yyctxmarker);
- set_yyctype(Opt::baseopt.yyctype);
- set_yycursor(Opt::baseopt.yycursor);
- set_yydebug(Opt::baseopt.yydebug);
- set_yylessthan(Opt::baseopt.yylessthan);
- set_yylimit(Opt::baseopt.yylimit);
- set_yymarker(Opt::baseopt.yymarker);
- set_yypeek(Opt::baseopt.yypeek);
- set_yyrestore(Opt::baseopt.yyrestore);
- set_yyrestorectx(Opt::baseopt.yyrestorectx);
- set_yyrestoretag(Opt::baseopt.yyrestoretag);
- set_yystagn(Opt::baseopt.yystagn);
- set_yystagp(Opt::baseopt.yystagp);
- set_yymtagn(Opt::baseopt.yymtagn);
- set_yymtagp(Opt::baseopt.yymtagp);
- set_yyskip(Opt::baseopt.yyskip);
- set_yyfilllabel(Opt::baseopt.yyfilllabel);
- set_yynext(Opt::baseopt.yynext);
- set_yyaccept(Opt::baseopt.yyaccept);
- set_yybm(Opt::baseopt.yybm);
- set_yych(Opt::baseopt.yych);
- set_yyctable(Opt::baseopt.yyctable);
- set_yytarget(Opt::baseopt.yytarget);
+ // historically arranged set of names
+ // no actual reason why these particular options should be reset
+ set_cond_get(Opt::baseopt.cond_get);
+ set_cond_set(Opt::baseopt.cond_set);
+ set_fill(Opt::baseopt.fill);
+ set_state_get(Opt::baseopt.state_get);
+ set_state_set(Opt::baseopt.state_set);
+ set_yybackup(Opt::baseopt.yybackup);
+ set_yybackupctx(Opt::baseopt.yybackupctx);
+ set_yycondtype(Opt::baseopt.yycondtype);
+ set_yyctxmarker(Opt::baseopt.yyctxmarker);
+ set_yyctype(Opt::baseopt.yyctype);
+ set_yycursor(Opt::baseopt.yycursor);
+ set_yydebug(Opt::baseopt.yydebug);
+ set_yylessthan(Opt::baseopt.yylessthan);
+ set_yylimit(Opt::baseopt.yylimit);
+ set_yymarker(Opt::baseopt.yymarker);
+ set_yypeek(Opt::baseopt.yypeek);
+ set_yyrestore(Opt::baseopt.yyrestore);
+ set_yyrestorectx(Opt::baseopt.yyrestorectx);
+ set_yyrestoretag(Opt::baseopt.yyrestoretag);
+ set_yystagn(Opt::baseopt.yystagn);
+ set_yystagp(Opt::baseopt.yystagp);
+ set_yymtagn(Opt::baseopt.yymtagn);
+ set_yymtagp(Opt::baseopt.yymtagp);
+ set_yyskip(Opt::baseopt.yyskip);
+ set_yyfilllabel(Opt::baseopt.yyfilllabel);
+ set_yynext(Opt::baseopt.yynext);
+ set_yyaccept(Opt::baseopt.yyaccept);
+ set_yybm(Opt::baseopt.yybm);
+ set_yych(Opt::baseopt.yych);
+ set_yyctable(Opt::baseopt.yyctable);
+ set_yytarget(Opt::baseopt.yytarget);
}
} // namespace re2c
enum target_t
{
- TARGET_CODE,
- TARGET_DOT,
- TARGET_SKELETON
+ TARGET_CODE,
+ TARGET_DOT,
+ TARGET_SKELETON
};
/* note [constant and mutable options]
*/
#define RE2C_CONSTOPTS \
- CONSTOPT1 (target_t, target, TARGET_CODE) \
- CONSTOPT (std::string, output_file, "") \
- CONSTOPT (std::string, header_file, "") \
- CONSTOPT (bool, bNoGenerationDate, false) \
- CONSTOPT (bool, version, true) \
- CONSTOPT (bool, cFlag, false) \
- CONSTOPT (bool, fFlag, false) \
- CONSTOPT (bool, rFlag, false) \
- CONSTOPT (bool, FFlag, false) \
- /* debug */ \
- CONSTOPT (bool, dump_nfa, false) \
- CONSTOPT (bool, dump_dfa_raw, false) \
- CONSTOPT (bool, dump_dfa_det, false) \
- CONSTOPT (bool, dump_dfa_tagopt, false) \
- CONSTOPT (bool, dump_dfa_min, false) \
- CONSTOPT (bool, dump_adfa, false)
+ CONSTOPT1 (target_t, target, TARGET_CODE) \
+ CONSTOPT (std::string, output_file, "") \
+ CONSTOPT (std::string, header_file, "") \
+ CONSTOPT (bool, bNoGenerationDate, false) \
+ CONSTOPT (bool, version, true) \
+ CONSTOPT (bool, cFlag, false) \
+ CONSTOPT (bool, fFlag, false) \
+ CONSTOPT (bool, rFlag, false) \
+ CONSTOPT (bool, FFlag, false) \
+ /* debug */ \
+ CONSTOPT (bool, dump_nfa, false) \
+ CONSTOPT (bool, dump_dfa_raw, false) \
+ CONSTOPT (bool, dump_dfa_det, false) \
+ CONSTOPT (bool, dump_dfa_tagopt, false) \
+ CONSTOPT (bool, dump_dfa_min, false) \
+ CONSTOPT (bool, dump_adfa, false)
#define RE2C_MUTOPTS \
- /* regular expressions */ \
- MUTOPT1 (Enc, encoding, Enc ()) \
- MUTOPT (bool, bCaseInsensitive, false) \
- MUTOPT (bool, bCaseInverted, false) \
- MUTOPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \
- /* conditions */ \
- MUTOPT (std::string, yycondtype, "YYCONDTYPE") \
- MUTOPT (std::string, cond_get, "YYGETCONDITION") \
- MUTOPT (bool, cond_get_naked, false) \
- MUTOPT (std::string, cond_set, "YYSETCONDITION" ) \
- MUTOPT (std::string, cond_set_arg, "@@" ) \
- MUTOPT (bool, cond_set_naked, false ) \
- MUTOPT (std::string, yyctable, "yyctable") \
- MUTOPT (std::string, condPrefix, "yyc_") \
- MUTOPT (std::string, condEnumPrefix, "yyc") \
- MUTOPT (std::string, condDivider, "/* *********************************** */") \
- MUTOPT (std::string, condDividerParam, "@@") \
- MUTOPT (std::string, condGoto, "goto @@;") \
- MUTOPT (std::string, condGotoParam, "@@") \
- /* states */ \
- MUTOPT (std::string, state_get, "YYGETSTATE") \
- MUTOPT (bool, state_get_naked, false) \
- MUTOPT (std::string, state_set, "YYSETSTATE") \
- MUTOPT (std::string, state_set_arg, "@@") \
- MUTOPT (bool, state_set_naked, false) \
- MUTOPT (std::string, yyfilllabel, "yyFillLabel") \
- MUTOPT (std::string, yynext, "yyNext") \
- MUTOPT (std::string, yyaccept, "yyaccept") \
- MUTOPT (bool, bUseStateAbort, false) \
- MUTOPT (bool, bUseStateNext, false) \
- /* tags */ \
- MUTOPT (bool, tags, false) \
- MUTOPT (std::string, tags_prefix, "yyt") \
- MUTOPT (std::string, tags_expression, "@@") \
- MUTOPT (bool, posix_captures, false) \
- MUTOPT (bool, optimize_tags, true) \
- /* code generation */ \
- MUTOPT (bool, sFlag, false) \
- MUTOPT (bool, bFlag, false) \
- MUTOPT (std::string, yybm, "yybm") \
- MUTOPT (bool, yybmHexTable, false) \
- MUTOPT (bool, gFlag, false) \
- MUTOPT (std::string, yytarget, "yytarget") \
- MUTOPT (uint32_t, cGotoThreshold, 9) \
- /* formatting */ \
- MUTOPT (uint32_t, topIndent, 0) \
- MUTOPT (std::string, indString, "\t") \
- /* input API */ \
- MUTOPT (input_api_t, input_api, INPUT_DEFAULT) \
- MUTOPT (std::string, yycursor, "YYCURSOR") \
- MUTOPT (std::string, yymarker, "YYMARKER") \
- MUTOPT (std::string, yyctxmarker, "YYCTXMARKER") \
- MUTOPT (std::string, yylimit, "YYLIMIT") \
- MUTOPT (std::string, yypeek, "YYPEEK") \
- MUTOPT (std::string, yyskip, "YYSKIP") \
- MUTOPT (std::string, yybackup, "YYBACKUP") \
- MUTOPT (std::string, yybackupctx, "YYBACKUPCTX") \
- MUTOPT (std::string, yyrestore, "YYRESTORE") \
- MUTOPT (std::string, yyrestorectx, "YYRESTORECTX") \
- MUTOPT (std::string, yyrestoretag, "YYRESTORETAG") \
- MUTOPT (std::string, yylessthan, "YYLESSTHAN") \
- MUTOPT (std::string, yystagn, "YYSTAGN") \
- MUTOPT (std::string, yystagp, "YYSTAGP") \
- MUTOPT (std::string, yymtagn, "YYMTAGN") \
- MUTOPT (std::string, yymtagp, "YYMTAGP") \
- /* #line directives */ \
- MUTOPT (bool, iFlag, false) \
- /* debug */ \
- MUTOPT (bool, dFlag, false) \
- MUTOPT (std::string, yydebug, "YYDEBUG") \
- /* yych */ \
- MUTOPT (std::string, yyctype, "YYCTYPE") \
- MUTOPT (std::string, yych, "yych") \
- MUTOPT (bool, bEmitYYCh, true) \
- MUTOPT (bool, yychConversion, false) \
- /* YYFILL */ \
- MUTOPT (std::string, fill, "YYFILL") \
- MUTOPT (bool, fill_use, true) \
- MUTOPT (bool, fill_check, true) \
- MUTOPT (std::string, fill_arg, "@@") \
- MUTOPT (bool, fill_arg_use, true) \
- MUTOPT (bool, fill_naked, false) \
- /* labels */ \
- MUTOPT (std::string, labelPrefix, "yy") \
- MUTOPT (std::string, startlabel, "") \
- MUTOPT (bool, startlabel_force, false) \
- /* internals */ \
- MUTOPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) \
- MUTOPT (bool, lookahead, true) \
- MUTOPT (bool, eager_skip, false)
+ /* regular expressions */ \
+ MUTOPT1 (Enc, encoding, Enc ()) \
+ MUTOPT (bool, bCaseInsensitive, false) \
+ MUTOPT (bool, bCaseInverted, false) \
+ MUTOPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \
+ /* conditions */ \
+ MUTOPT (std::string, yycondtype, "YYCONDTYPE") \
+ MUTOPT (std::string, cond_get, "YYGETCONDITION") \
+ MUTOPT (bool, cond_get_naked, false) \
+ MUTOPT (std::string, cond_set, "YYSETCONDITION" ) \
+ MUTOPT (std::string, cond_set_arg, "@@" ) \
+ MUTOPT (bool, cond_set_naked, false ) \
+ MUTOPT (std::string, yyctable, "yyctable") \
+ MUTOPT (std::string, condPrefix, "yyc_") \
+ MUTOPT (std::string, condEnumPrefix, "yyc") \
+ MUTOPT (std::string, condDivider, "/* *********************************** */") \
+ MUTOPT (std::string, condDividerParam, "@@") \
+ MUTOPT (std::string, condGoto, "goto @@;") \
+ MUTOPT (std::string, condGotoParam, "@@") \
+ /* states */ \
+ MUTOPT (std::string, state_get, "YYGETSTATE") \
+ MUTOPT (bool, state_get_naked, false) \
+ MUTOPT (std::string, state_set, "YYSETSTATE") \
+ MUTOPT (std::string, state_set_arg, "@@") \
+ MUTOPT (bool, state_set_naked, false) \
+ MUTOPT (std::string, yyfilllabel, "yyFillLabel") \
+ MUTOPT (std::string, yynext, "yyNext") \
+ MUTOPT (std::string, yyaccept, "yyaccept") \
+ MUTOPT (bool, bUseStateAbort, false) \
+ MUTOPT (bool, bUseStateNext, false) \
+ /* tags */ \
+ MUTOPT (bool, tags, false) \
+ MUTOPT (std::string, tags_prefix, "yyt") \
+ MUTOPT (std::string, tags_expression, "@@") \
+ MUTOPT (bool, posix_captures, false) \
+ MUTOPT (bool, optimize_tags, true) \
+ /* code generation */ \
+ MUTOPT (bool, sFlag, false) \
+ MUTOPT (bool, bFlag, false) \
+ MUTOPT (std::string, yybm, "yybm") \
+ MUTOPT (bool, yybmHexTable, false) \
+ MUTOPT (bool, gFlag, false) \
+ MUTOPT (std::string, yytarget, "yytarget") \
+ MUTOPT (uint32_t, cGotoThreshold, 9) \
+ /* formatting */ \
+ MUTOPT (uint32_t, topIndent, 0) \
+ MUTOPT (std::string, indString, "\t") \
+ /* input API */ \
+ MUTOPT (input_api_t, input_api, INPUT_DEFAULT) \
+ MUTOPT (std::string, yycursor, "YYCURSOR") \
+ MUTOPT (std::string, yymarker, "YYMARKER") \
+ MUTOPT (std::string, yyctxmarker, "YYCTXMARKER") \
+ MUTOPT (std::string, yylimit, "YYLIMIT") \
+ MUTOPT (std::string, yypeek, "YYPEEK") \
+ MUTOPT (std::string, yyskip, "YYSKIP") \
+ MUTOPT (std::string, yybackup, "YYBACKUP") \
+ MUTOPT (std::string, yybackupctx, "YYBACKUPCTX") \
+ MUTOPT (std::string, yyrestore, "YYRESTORE") \
+ MUTOPT (std::string, yyrestorectx, "YYRESTORECTX") \
+ MUTOPT (std::string, yyrestoretag, "YYRESTORETAG") \
+ MUTOPT (std::string, yylessthan, "YYLESSTHAN") \
+ MUTOPT (std::string, yystagn, "YYSTAGN") \
+ MUTOPT (std::string, yystagp, "YYSTAGP") \
+ MUTOPT (std::string, yymtagn, "YYMTAGN") \
+ MUTOPT (std::string, yymtagp, "YYMTAGP") \
+ /* #line directives */ \
+ MUTOPT (bool, iFlag, false) \
+ /* debug */ \
+ MUTOPT (bool, dFlag, false) \
+ MUTOPT (std::string, yydebug, "YYDEBUG") \
+ /* yych */ \
+ MUTOPT (std::string, yyctype, "YYCTYPE") \
+ MUTOPT (std::string, yych, "yych") \
+ MUTOPT (bool, bEmitYYCh, true) \
+ MUTOPT (bool, yychConversion, false) \
+ /* YYFILL */ \
+ MUTOPT (std::string, fill, "YYFILL") \
+ MUTOPT (bool, fill_use, true) \
+ MUTOPT (bool, fill_check, true) \
+ MUTOPT (std::string, fill_arg, "@@") \
+ MUTOPT (bool, fill_arg_use, true) \
+ MUTOPT (bool, fill_naked, false) \
+ /* labels */ \
+ MUTOPT (std::string, labelPrefix, "yy") \
+ MUTOPT (std::string, startlabel, "") \
+ MUTOPT (bool, startlabel_force, false) \
+ /* internals */ \
+ MUTOPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) \
+ MUTOPT (bool, lookahead, true) \
+ MUTOPT (bool, eager_skip, false)
struct conopt_t
{
-# define CONSTOPT1 CONSTOPT
-# define CONSTOPT(type, name, value) type name;
- RE2C_CONSTOPTS
-# undef CONSTOPT1
-# undef CONSTOPT
+# define CONSTOPT1 CONSTOPT
+# define CONSTOPT(type, name, value) type name;
+ RE2C_CONSTOPTS
+# undef CONSTOPT1
+# undef CONSTOPT
- conopt_t()
-# define CONSTOPT1(type, name, value) : name(value)
-# define CONSTOPT(type, name, value) , name(value)
- RE2C_CONSTOPTS
-# undef CONSTOPT1
-# undef CONSTOPT
- {}
- void fix();
- FORBID_COPY(conopt_t);
+ conopt_t()
+# define CONSTOPT1(type, name, value) : name(value)
+# define CONSTOPT(type, name, value) , name(value)
+ RE2C_CONSTOPTS
+# undef CONSTOPT1
+# undef CONSTOPT
+ {}
+ void fix();
+ FORBID_COPY(conopt_t);
};
struct mutopt_t
{
-# define MUTOPT1 MUTOPT
-# define MUTOPT(type, name, value) type name;
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
+# define MUTOPT1 MUTOPT
+# define MUTOPT(type, name, value) type name;
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
- mutopt_t()
-# define MUTOPT1(type, name, value) : name(value)
-# define MUTOPT(type, name, value) , name(value)
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
- {}
- void fix(const conopt_t *globopts);
- FORBID_COPY(mutopt_t);
+ mutopt_t()
+# define MUTOPT1(type, name, value) : name(value)
+# define MUTOPT(type, name, value) , name(value)
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
+ {}
+ void fix(const conopt_t *globopts);
+ FORBID_COPY(mutopt_t);
};
struct opt_t
{
-# define CONSTOPT1 CONSTOPT
-# define CONSTOPT(type, name, value) type name;
- RE2C_CONSTOPTS
-# undef CONSTOPT1
-# undef CONSTOPT
+# define CONSTOPT1 CONSTOPT
+# define CONSTOPT(type, name, value) type name;
+ RE2C_CONSTOPTS
+# undef CONSTOPT1
+# undef CONSTOPT
-# define MUTOPT1 MUTOPT
-# define MUTOPT(type, name, value) type name;
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
+# define MUTOPT1 MUTOPT
+# define MUTOPT(type, name, value) type name;
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
- opt_t(const conopt_t &con, const mutopt_t &mut)
-# define CONSTOPT1(type, name, value) : name(con.name)
-# define CONSTOPT(type, name, value) , name(con.name)
- RE2C_CONSTOPTS
-# undef CONSTOPT1
-# undef CONSTOPT
-# define MUTOPT1 MUTOPT
-# define MUTOPT(type, name, value) , name(mut.name)
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
- {}
+ opt_t(const conopt_t &con, const mutopt_t &mut)
+# define CONSTOPT1(type, name, value) : name(con.name)
+# define CONSTOPT(type, name, value) , name(con.name)
+ RE2C_CONSTOPTS
+# undef CONSTOPT1
+# undef CONSTOPT
+# define MUTOPT1 MUTOPT
+# define MUTOPT(type, name, value) , name(mut.name)
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
+ {}
};
// see note [constant and mutable options]
struct Opt
{
- static const mutopt_t baseopt;
+ static const mutopt_t baseopt;
- const char *source_file;
- const conopt_t &glob;
+ const char *source_file;
+ const conopt_t &glob;
private:
- mutopt_t user;
- mutopt_t real;
- bool diverge;
+ mutopt_t user;
+ mutopt_t real;
+ bool diverge;
- void sync()
- {
- if (!diverge) return;
-# define MUTOPT1 MUTOPT
-# define MUTOPT(type, name, value) real.name = user.name;
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
- real.fix(&glob);
- diverge = false;
- }
+ void sync()
+ {
+ if (!diverge) return;
+# define MUTOPT1 MUTOPT
+# define MUTOPT(type, name, value) real.name = user.name;
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
+ real.fix(&glob);
+ diverge = false;
+ }
public:
- explicit Opt(const conopt_t &globopts)
- : source_file(NULL)
- , glob(globopts)
- , user()
- , real()
- , diverge(true)
- {}
+ explicit Opt(const conopt_t &globopts)
+ : source_file(NULL)
+ , glob(globopts)
+ , user()
+ , real()
+ , diverge(true)
+ {}
- const opt_t *snapshot()
- {
- sync();
- return new opt_t(glob, real);
- }
+ const opt_t *snapshot()
+ {
+ sync();
+ return new opt_t(glob, real);
+ }
- void restore(const opt_t *opts)
- {
-# define MUTOPT1 MUTOPT
-# define MUTOPT(type, name, value) user.name = opts->name;
- RE2C_MUTOPTS
-# undef MUTOPT1
-# undef MUTOPT
- diverge = true;
- sync();
- }
+ void restore(const opt_t *opts)
+ {
+# define MUTOPT1 MUTOPT
+# define MUTOPT(type, name, value) user.name = opts->name;
+ RE2C_MUTOPTS
+# undef MUTOPT1
+# undef MUTOPT
+ diverge = true;
+ sync();
+ }
- bool source (const char *s);
+ bool source (const char *s);
- // RE2C allows to set configurations anywhere inside of a block
- // (in the beginning, intermixed with rules, in the end): they will
- // affect the whole block anyway. Thus one is not allowed to read
- // configurations until the whole block has been parsed. Immutable
- // options, on the contrary, are accessible for reading all the time
- // (the parser itself depends on them).
- void set_encoding(Enc::type_t t) { user.encoding.set(t); }
- void unset_encoding(Enc::type_t t) { user.encoding.unset(t); }
- void set_encoding_policy(Enc::policy_t p) { user.encoding.setPolicy(p); }
+ // RE2C allows to set configurations anywhere inside of a block
+ // (in the beginning, intermixed with rules, in the end): they will
+ // affect the whole block anyway. Thus one is not allowed to read
+ // configurations until the whole block has been parsed. Immutable
+ // options, on the contrary, are accessible for reading all the time
+ // (the parser itself depends on them).
+ void set_encoding(Enc::type_t t) { user.encoding.set(t); }
+ void unset_encoding(Enc::type_t t) { user.encoding.unset(t); }
+ void set_encoding_policy(Enc::policy_t p) { user.encoding.setPolicy(p); }
#define MUTOPT1 MUTOPT
#define MUTOPT(type, name, value) void set_##name (const type &arg) { user.name = arg; diverge = true; }
- RE2C_MUTOPTS
+ RE2C_MUTOPTS
#undef MUTOPT1
#undef MUTOPT
- // bad temporary hacks, should be fixed by proper scoping of config (parts).
- void reset_startlabel();
- void reset_mapCodeName ();
+ // bad temporary hacks, should be fixed by proper scoping of config (parts).
+ void reset_startlabel();
+ void reset_mapCodeName ();
- FORBID_COPY (Opt);
+ FORBID_COPY (Opt);
};
enum parse_opts_t
{
- OK,
- EXIT_OK,
- EXIT_FAIL
+ OK,
+ EXIT_OK,
+ EXIT_FAIL
};
parse_opts_t parse_opts(char **argv, conopt_t &globopts, Opt &opts, Warn &warn);
const char * Warn::names [TYPES] =
{
#define W(x, y) y
- RE2C_WARNING_TYPES
+ RE2C_WARNING_TYPES
#undef W
};
Warn::Warn ()
- : mask ()
- , error_accuml (false)
+ : mask ()
+ , error_accuml (false)
{
- for (uint32_t i = 0; i < TYPES; ++i)
- {
- mask[i] = SILENT;
- }
+ for (uint32_t i = 0; i < TYPES; ++i)
+ {
+ mask[i] = SILENT;
+ }
}
bool Warn::error () const
{
- return error_accuml;
+ return error_accuml;
}
void Warn::set (type_t t, option_t o)
{
- switch (o)
- {
- case W:
- mask[t] |= WARNING;
- break;
- case WNO:
- mask[t] &= ~WARNING;
- break;
- case WERROR:
- // unlike -Werror, -Werror-<warning> implies -W<warning>
- mask[t] |= (WARNING | ERROR);
- break;
- case WNOERROR:
- mask[t] &= ~ERROR;
- break;
- }
+ switch (o)
+ {
+ case W:
+ mask[t] |= WARNING;
+ break;
+ case WNO:
+ mask[t] &= ~WARNING;
+ break;
+ case WERROR:
+ // unlike -Werror, -Werror-<warning> implies -W<warning>
+ mask[t] |= (WARNING | ERROR);
+ break;
+ case WNOERROR:
+ mask[t] &= ~ERROR;
+ break;
+ }
}
void Warn::set_all ()
{
- for (uint32_t i = 0; i < TYPES; ++i)
- {
- mask[i] |= WARNING;
- }
+ for (uint32_t i = 0; i < TYPES; ++i)
+ {
+ mask[i] |= WARNING;
+ }
}
// -Werror doesn't set any warnings: it only guarantees that if a warning
// has been set by now or will be set later then it will result into error.
void Warn::set_all_error ()
{
- for (uint32_t i = 0; i < TYPES; ++i)
- {
- mask[i] |= ERROR;
- }
+ for (uint32_t i = 0; i < TYPES; ++i)
+ {
+ mask[i] |= ERROR;
+ }
}
void Warn::fail (type_t t, uint32_t line, const char * s) const
{
- if (mask[t] & WARNING)
- {
- // -Werror has no effect
- warning (names[t], line, false, "%s", s);
- }
+ if (mask[t] & WARNING)
+ {
+ // -Werror has no effect
+ warning (names[t], line, false, "%s", s);
+ }
}
void Warn::condition_order (uint32_t line)
{
- if (mask[CONDITION_ORDER] & WARNING)
- {
- const bool e = mask[CONDITION_ORDER] & ERROR;
- error_accuml |= e;
- warning (names[CONDITION_ORDER], line, e,
- "looks like you use hardcoded numbers instead of autogenerated condition names: "
- "better add '/*!types:re2c*/' directive or '-t, --type-header' option "
- "and don't rely on fixed condition order.");
- }
+ if (mask[CONDITION_ORDER] & WARNING)
+ {
+ const bool e = mask[CONDITION_ORDER] & ERROR;
+ error_accuml |= e;
+ warning (names[CONDITION_ORDER], line, e,
+ "looks like you use hardcoded numbers instead of autogenerated condition names: "
+ "better add '/*!types:re2c*/' directive or '-t, --type-header' option "
+ "and don't rely on fixed condition order.");
+ }
}
void Warn::empty_class (uint32_t line)
{
- if (mask[EMPTY_CHARACTER_CLASS] & WARNING)
- {
- const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR;
- error_accuml |= e;
- warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class");
- }
+ if (mask[EMPTY_CHARACTER_CLASS] & WARNING)
+ {
+ const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR;
+ error_accuml |= e;
+ warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class");
+ }
}
void Warn::match_empty_string (uint32_t line, const std::string &cond)
{
- if (mask[MATCH_EMPTY_STRING] & WARNING)
- {
- const bool e = mask[MATCH_EMPTY_STRING] & ERROR;
- error_accuml |= e;
- warning (names[MATCH_EMPTY_STRING], line, e,
- "rule %smatches empty string", incond(cond).c_str());
- }
+ if (mask[MATCH_EMPTY_STRING] & WARNING)
+ {
+ const bool e = mask[MATCH_EMPTY_STRING] & ERROR;
+ error_accuml |= e;
+ warning (names[MATCH_EMPTY_STRING], line, e,
+ "rule %smatches empty string", incond(cond).c_str());
+ }
}
void Warn::nondeterministic_tags(uint32_t line, const std::string &cond,
- const std::string *tagname, size_t nver)
+ const std::string *tagname, size_t nver)
{
- if (mask[NONDETERMINISTIC_TAGS] & WARNING) {
- bool e = mask[NONDETERMINISTIC_TAGS] & ERROR;
- error_accuml |= e;
-
- warning_start(line, e);
- if (tagname == NULL) {
- fprintf(stderr, "trailing context");
- } else {
- fprintf(stderr, "tag '%s'", tagname->c_str());
- }
- fprintf(stderr,
- " %shas %u%s degree of nondeterminism",
- incond(cond).c_str(), static_cast<uint32_t>(nver),
- nver == 2 ? "nd" : nver == 3 ? "rd" : "th");
- warning_end(names[NONDETERMINISTIC_TAGS], e);
- }
+ if (mask[NONDETERMINISTIC_TAGS] & WARNING) {
+ bool e = mask[NONDETERMINISTIC_TAGS] & ERROR;
+ error_accuml |= e;
+
+ warning_start(line, e);
+ if (tagname == NULL) {
+ fprintf(stderr, "trailing context");
+ } else {
+ fprintf(stderr, "tag '%s'", tagname->c_str());
+ }
+ fprintf(stderr,
+ " %shas %u%s degree of nondeterminism",
+ incond(cond).c_str(), static_cast<uint32_t>(nver),
+ nver == 2 ? "nd" : nver == 3 ? "rd" : "th");
+ warning_end(names[NONDETERMINISTIC_TAGS], e);
+ }
}
void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u)
{
- if (mask[SWAPPED_RANGE] & WARNING)
- {
- const bool e = mask[SWAPPED_RANGE] & ERROR;
- error_accuml |= e;
- warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u);
- }
+ if (mask[SWAPPED_RANGE] & WARNING)
+ {
+ const bool e = mask[SWAPPED_RANGE] & ERROR;
+ error_accuml |= e;
+ warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u);
+ }
}
void Warn::undefined_control_flow (const Skeleton &skel, std::vector<path_t> & paths, bool overflow)
{
- if (mask[UNDEFINED_CONTROL_FLOW] & WARNING)
- {
- const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR;
- error_accuml |= e;
-
- // report shorter patterns first
- std::sort (paths.begin (), paths.end ());
-
- warning_start (skel.line, e);
- fprintf (stderr, "control flow %sis undefined for strings that match ", incond (skel.cond).c_str ());
- const size_t count = paths.size ();
- if (count == 1)
- {
- fprint_default_path (stderr, skel, paths[0]);
- }
- else
- {
- for (size_t i = 0; i < count; ++i)
- {
- fprintf (stderr, "\n\t");
- fprint_default_path (stderr, skel, paths[i]);
- }
- fprintf (stderr, "\n");
- }
- if (overflow)
- {
- fprintf (stderr, " ... and a few more");
- }
- fprintf (stderr, ", use default rule '*'");
- warning_end (names[UNDEFINED_CONTROL_FLOW], e);
- }
+ if (mask[UNDEFINED_CONTROL_FLOW] & WARNING)
+ {
+ const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR;
+ error_accuml |= e;
+
+ // report shorter patterns first
+ std::sort (paths.begin (), paths.end ());
+
+ warning_start (skel.line, e);
+ fprintf (stderr, "control flow %sis undefined for strings that match ", incond (skel.cond).c_str ());
+ const size_t count = paths.size ();
+ if (count == 1)
+ {
+ fprint_default_path (stderr, skel, paths[0]);
+ }
+ else
+ {
+ for (size_t i = 0; i < count; ++i)
+ {
+ fprintf (stderr, "\n\t");
+ fprint_default_path (stderr, skel, paths[i]);
+ }
+ fprintf (stderr, "\n");
+ }
+ if (overflow)
+ {
+ fprintf (stderr, " ... and a few more");
+ }
+ fprintf (stderr, ", use default rule '*'");
+ warning_end (names[UNDEFINED_CONTROL_FLOW], e);
+ }
}
void Warn::unreachable_rule(const std::string &cond, const Rule &rule)
{
- if (mask[UNREACHABLE_RULES] & WARNING) {
- const bool e = mask[UNREACHABLE_RULES] & ERROR;
- error_accuml |= e;
-
- warning_start(rule.code->fline, e);
- fprintf(stderr, "unreachable rule %s", incond(cond).c_str());
- const size_t shadows = rule.shadow.size();
- if (shadows > 0) {
- const char * pl = shadows > 1
- ? "s"
- : "";
- std::set<uint32_t>::const_iterator i = rule.shadow.begin();
- fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, *i);
- for (++i; i != rule.shadow.end(); ++i) {
- fprintf(stderr, ", %u", *i);
- }
- fprintf(stderr, ")");
- }
- warning_end(names[UNREACHABLE_RULES], e);
- }
+ if (mask[UNREACHABLE_RULES] & WARNING) {
+ const bool e = mask[UNREACHABLE_RULES] & ERROR;
+ error_accuml |= e;
+
+ warning_start(rule.code->fline, e);
+ fprintf(stderr, "unreachable rule %s", incond(cond).c_str());
+ const size_t shadows = rule.shadow.size();
+ if (shadows > 0) {
+ const char * pl = shadows > 1
+ ? "s"
+ : "";
+ std::set<uint32_t>::const_iterator i = rule.shadow.begin();
+ fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, *i);
+ for (++i; i != rule.shadow.end(); ++i) {
+ fprintf(stderr, ", %u", *i);
+ }
+ fprintf(stderr, ")");
+ }
+ warning_end(names[UNREACHABLE_RULES], e);
+ }
}
void Warn::useless_escape (uint32_t line, uint32_t col, char c)
{
- if (mask[USELESS_ESCAPE] & WARNING)
- {
- const bool e = mask[USELESS_ESCAPE] & ERROR;
- error_accuml |= e;
- warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c);
- }
+ if (mask[USELESS_ESCAPE] & WARNING)
+ {
+ const bool e = mask[USELESS_ESCAPE] & ERROR;
+ error_accuml |= e;
+ warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c);
+ }
}
} // namespace re2c
struct Skeleton;
#define RE2C_WARNING_TYPES \
- W (CONDITION_ORDER, "condition-order"), \
- W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \
- W (MATCH_EMPTY_STRING, "match-empty-string"), \
- W (NONDETERMINISTIC_TAGS, "nondeterministic-tags"), \
- W (SWAPPED_RANGE, "swapped-range"), \
- W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \
- W (UNREACHABLE_RULES, "unreachable-rules"), \
- W (USELESS_ESCAPE, "useless-escape"),
+ W (CONDITION_ORDER, "condition-order"), \
+ W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \
+ W (MATCH_EMPTY_STRING, "match-empty-string"), \
+ W (NONDETERMINISTIC_TAGS, "nondeterministic-tags"), \
+ W (SWAPPED_RANGE, "swapped-range"), \
+ W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \
+ W (UNREACHABLE_RULES, "unreachable-rules"), \
+ W (USELESS_ESCAPE, "useless-escape"),
class Warn
{
public:
- enum type_t
- {
+ enum type_t
+ {
#define W(x, y) x
- RE2C_WARNING_TYPES
+ RE2C_WARNING_TYPES
#undef W
- TYPES // count
- };
- enum option_t
- {
- W,
- WNO,
- WERROR,
- WNOERROR
- };
+ TYPES // count
+ };
+ enum option_t
+ {
+ W,
+ WNO,
+ WERROR,
+ WNOERROR
+ };
private:
- static const uint32_t SILENT;
- static const uint32_t WARNING;
- static const uint32_t ERROR;
- static const char * names [TYPES];
- uint32_t mask[TYPES];
- bool error_accuml;
+ static const uint32_t SILENT;
+ static const uint32_t WARNING;
+ static const uint32_t ERROR;
+ static const char * names [TYPES];
+ uint32_t mask[TYPES];
+ bool error_accuml;
public:
- Warn ();
- bool error () const;
- void set (type_t t, option_t o);
- void set_all ();
- void set_all_error ();
- void fail (type_t t, uint32_t line, const char * s) const;
+ Warn ();
+ bool error () const;
+ void set (type_t t, option_t o);
+ void set_all ();
+ void set_all_error ();
+ void fail (type_t t, uint32_t line, const char * s) const;
- void condition_order (uint32_t line);
- void empty_class (uint32_t line);
- void match_empty_string (uint32_t line, const std::string &cond);
- void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname, size_t nver);
- void swapped_range (uint32_t line, uint32_t l, uint32_t u);
- void undefined_control_flow (const Skeleton &skel, std::vector<path_t> & paths, bool overflow);
- void unreachable_rule (const std::string & cond, const Rule &rule);
- void useless_escape (uint32_t line, uint32_t col, char c);
+ void condition_order (uint32_t line);
+ void empty_class (uint32_t line);
+ void match_empty_string (uint32_t line, const std::string &cond);
+ void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname, size_t nver);
+ void swapped_range (uint32_t line, uint32_t l, uint32_t u);
+ void undefined_control_flow (const Skeleton &skel, std::vector<path_t> & paths, bool overflow);
+ void unreachable_rule (const std::string & cond, const Rule &rule);
+ void useless_escape (uint32_t line, uint32_t col, char c);
};
} // namespace re2c
static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
cfg_t::cfg_t(dfa_t &a)
- : dfa(a)
- , bblocks(NULL)
- , nbbarc(0)
- , nbbfin(0)
- , nbbfall(0)
+ : dfa(a)
+ , bblocks(NULL)
+ , nbbarc(0)
+ , nbbfin(0)
+ , nbbfall(0)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
- cfg_ix_t *arc2bb = new cfg_ix_t[nstate * (nsym + 2)];
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+ cfg_ix_t *arc2bb = new cfg_ix_t[nstate * (nsym + 2)];
- map_arcs_to_bblocks(dfa, arc2bb, nbbarc, nbbfin, nbbfall);
- bblocks = create_bblocks(dfa, arc2bb, nbbfin, nbbfall);
+ map_arcs_to_bblocks(dfa, arc2bb, nbbarc, nbbfin, nbbfall);
+ bblocks = create_bblocks(dfa, arc2bb, nbbfin, nbbfall);
- delete[] arc2bb;
+ delete[] arc2bb;
}
void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb,
- cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall)
+ cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
-
- // root bblock for initial tagged epsilon-transition
- cfg_ix_t nbb = 1;
-
- // bblocks for tagged transitions
- for (size_t i = 0; i < nstate; ++i) {
- tcmd_t **c = dfa.states[i]->tcmd, **f = c + nsym;
- for (; c < f; ++c) {
- *arc2bb++ = *c == NULL ? 0 : nbb++;
- }
- }
- nbbarc = nbb;
-
- // bblock for final tagged epsilon-transition
- for (size_t i = 0; i < nstate; ++i) {
- tcmd_t *f = dfa.states[i]->tcmd[nsym];
- *arc2bb++ = f == NULL ? 0 : nbb++;
- }
- nbbfin = nbb;
-
- // bblock for fallback tagged epsilon-transition
- for (size_t i = 0; i < nstate; ++i) {
- const dfa_state_t *s = dfa.states[i];
- // (check final tags: fallback tags may be empty)
- *arc2bb++ = s->fallback && s->tcmd[nsym] ? nbb++ : 0;
- }
- nbbfall = nbb;
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ // root bblock for initial tagged epsilon-transition
+ cfg_ix_t nbb = 1;
+
+ // bblocks for tagged transitions
+ for (size_t i = 0; i < nstate; ++i) {
+ tcmd_t **c = dfa.states[i]->tcmd, **f = c + nsym;
+ for (; c < f; ++c) {
+ *arc2bb++ = *c == NULL ? 0 : nbb++;
+ }
+ }
+ nbbarc = nbb;
+
+ // bblock for final tagged epsilon-transition
+ for (size_t i = 0; i < nstate; ++i) {
+ tcmd_t *f = dfa.states[i]->tcmd[nsym];
+ *arc2bb++ = f == NULL ? 0 : nbb++;
+ }
+ nbbfin = nbb;
+
+ // bblock for fallback tagged epsilon-transition
+ for (size_t i = 0; i < nstate; ++i) {
+ const dfa_state_t *s = dfa.states[i];
+ // (check final tags: fallback tags may be empty)
+ *arc2bb++ = s->fallback && s->tcmd[nsym] ? nbb++ : 0;
+ }
+ nbbfall = nbb;
}
cfg_bb_t *create_bblocks(dfa_t &dfa, const cfg_ix_t *arc2bb,
- cfg_ix_t nbbfin, cfg_ix_t nbbfall)
+ cfg_ix_t nbbfin, cfg_ix_t nbbfall)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
- const cfg_ix_t *a2b = arc2bb;
- cfg_ix_t *succb = new cfg_ix_t[nbbfin], *succe;
- bool *been = new bool[nstate];
-
- cfg_bb_t *bblocks = allocate<cfg_bb_t>(nbbfall), *b = bblocks;
-
- // root bblock
- std::fill(been, been + nstate, false);
- successors(dfa, arc2bb, been, succe = succb, 0);
- new(b++) cfg_bb_t(succb, succe, dfa.tcmd0, NULL);
-
- // transition bblocks
- for (size_t i = 0; i < nstate; ++i) {
- const dfa_state_t *s = dfa.states[i];
- for (size_t c = 0; c < nsym; ++c) {
- if (*a2b++ != 0) {
- std::fill(been, been + nstate, false);
- successors(dfa, arc2bb, been, succe = succb, s->arcs[c]);
- new(b++) cfg_bb_t(succb, succe, s->tcmd[c], NULL);
- }
- }
- }
-
- // final bblocks
- for (size_t i = 0; i < nstate; ++i) {
- if (*a2b++ != 0) {
- const dfa_state_t *s = dfa.states[i];
- new(b++) cfg_bb_t(NULL, NULL, s->tcmd[nsym], &dfa.rules[s->rule]);
- }
- }
-
- // fallback bblocks
- for (size_t i = 0; i < nstate; ++i) {
- if (*a2b++ != 0) {
- const dfa_state_t *s = dfa.states[i];
- std::fill(been, been + nstate, false);
- fallback(dfa, arc2bb, been, succe = succb, i);
- new(b++) cfg_bb_t(succb, succe, s->tcmd[nsym + 1], &dfa.rules[s->rule]);
- }
- }
-
- delete[] succb;
- delete[] been;
- return bblocks;
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+ const cfg_ix_t *a2b = arc2bb;
+ cfg_ix_t *succb = new cfg_ix_t[nbbfin], *succe;
+ bool *been = new bool[nstate];
+
+ cfg_bb_t *bblocks = allocate<cfg_bb_t>(nbbfall), *b = bblocks;
+
+ // root bblock
+ std::fill(been, been + nstate, false);
+ successors(dfa, arc2bb, been, succe = succb, 0);
+ new(b++) cfg_bb_t(succb, succe, dfa.tcmd0, NULL);
+
+ // transition bblocks
+ for (size_t i = 0; i < nstate; ++i) {
+ const dfa_state_t *s = dfa.states[i];
+ for (size_t c = 0; c < nsym; ++c) {
+ if (*a2b++ != 0) {
+ std::fill(been, been + nstate, false);
+ successors(dfa, arc2bb, been, succe = succb, s->arcs[c]);
+ new(b++) cfg_bb_t(succb, succe, s->tcmd[c], NULL);
+ }
+ }
+ }
+
+ // final bblocks
+ for (size_t i = 0; i < nstate; ++i) {
+ if (*a2b++ != 0) {
+ const dfa_state_t *s = dfa.states[i];
+ new(b++) cfg_bb_t(NULL, NULL, s->tcmd[nsym], &dfa.rules[s->rule]);
+ }
+ }
+
+ // fallback bblocks
+ for (size_t i = 0; i < nstate; ++i) {
+ if (*a2b++ != 0) {
+ const dfa_state_t *s = dfa.states[i];
+ std::fill(been, been + nstate, false);
+ fallback(dfa, arc2bb, been, succe = succb, i);
+ new(b++) cfg_bb_t(succb, succe, s->tcmd[nsym + 1], &dfa.rules[s->rule]);
+ }
+ }
+
+ delete[] succb;
+ delete[] been;
+ return bblocks;
}
cfg_bb_t::cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se,
- tcmd_t *&c, const Rule *r)
- : succb(NULL)
- , succe(NULL)
- , cmd(c)
- , rule(r)
+ tcmd_t *&c, const Rule *r)
+ : succb(NULL)
+ , succe(NULL)
+ , cmd(c)
+ , rule(r)
{
- const size_t n = static_cast<size_t>(se - sb);
- succb = new cfg_ix_t[n];
- if (n > 0) memcpy(succb, sb, n * sizeof(cfg_ix_t));
- succe = succb + n;
+ const size_t n = static_cast<size_t>(se - sb);
+ succb = new cfg_ix_t[n];
+ if (n > 0) memcpy(succb, sb, n * sizeof(cfg_ix_t));
+ succe = succb + n;
}
// find immediate successors of the given bblock
void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
- cfg_ix_t *&succ, size_t x)
+ cfg_ix_t *&succ, size_t x)
{
- if (x == dfa_t::NIL || been[x]) return;
- been[x] = true;
-
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars,
- *a = dfa.states[x]->arcs;
- const cfg_ix_t *a2b = &arc2bb[x * nsym];
-
- for (size_t c = 0; c < nsym; ++c) {
- const cfg_ix_t b = a2b[c];
- if (b != 0) {
- *succ++ = b;
- } else {
- successors(dfa, arc2bb, been, succ, a[c]);
- }
- }
-
- const cfg_ix_t f = arc2bb[nstate * nsym + x];
- if (f != 0) {
- *succ++ = f;
- }
+ if (x == dfa_t::NIL || been[x]) return;
+ been[x] = true;
+
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars,
+ *a = dfa.states[x]->arcs;
+ const cfg_ix_t *a2b = &arc2bb[x * nsym];
+
+ for (size_t c = 0; c < nsym; ++c) {
+ const cfg_ix_t b = a2b[c];
+ if (b != 0) {
+ *succ++ = b;
+ } else {
+ successors(dfa, arc2bb, been, succ, a[c]);
+ }
+ }
+
+ const cfg_ix_t f = arc2bb[nstate * nsym + x];
+ if (f != 0) {
+ *succ++ = f;
+ }
}
// find all bblocks reachable from this one by following
// non-accepting DFA paths: this is the set of bblocks affected
// by liveness of fallback tags
void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
- cfg_ix_t *&succ, size_t x)
+ cfg_ix_t *&succ, size_t x)
{
- if (x == dfa_t::NIL || been[x]) return;
- been[x] = true;
-
- const size_t
- nsym = dfa.nchars,
- *a = dfa.states[x]->arcs;
- const cfg_ix_t *a2b = &arc2bb[x * nsym];
-
- for (size_t c = 0; c < nsym; ++c) {
- const size_t y = a[c];
- if (y != dfa_t::NIL && dfa.states[y]->fallthru) {
- const cfg_ix_t b = a2b[c];
- if (b != 0) {
- *succ++ = b;
- }
- fallback(dfa, arc2bb, been, succ, y);
- }
- }
+ if (x == dfa_t::NIL || been[x]) return;
+ been[x] = true;
+
+ const size_t
+ nsym = dfa.nchars,
+ *a = dfa.states[x]->arcs;
+ const cfg_ix_t *a2b = &arc2bb[x * nsym];
+
+ for (size_t c = 0; c < nsym; ++c) {
+ const size_t y = a[c];
+ if (y != dfa_t::NIL && dfa.states[y]->fallthru) {
+ const cfg_ix_t b = a2b[c];
+ if (b != 0) {
+ *succ++ = b;
+ }
+ fallback(dfa, arc2bb, been, succ, y);
+ }
+ }
}
cfg_t::~cfg_t()
{
- cfg_bb_t *b = bblocks, *e = b + nbbfall;
- for (; b < e; ++b) {
- delete[] b->succb;
- }
+ cfg_bb_t *b = bblocks, *e = b + nbbfall;
+ for (; b < e; ++b) {
+ delete[] b->succb;
+ }
- operator delete(bblocks);
+ operator delete(bblocks);
}
} // namespace re2c
// basic block
struct cfg_bb_t
{
- cfg_ix_t *succb;
- cfg_ix_t *succe;
- tcmd_t *&cmd;
- const Rule *rule;
+ cfg_ix_t *succb;
+ cfg_ix_t *succe;
+ tcmd_t *&cmd;
+ const Rule *rule;
- cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se, tcmd_t *&c, const Rule *r);
- FORBID_COPY(cfg_bb_t);
+ cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se, tcmd_t *&c, const Rule *r);
+ FORBID_COPY(cfg_bb_t);
};
// control flow graph
struct cfg_t
{
- dfa_t &dfa;
- cfg_bb_t *bblocks;
- cfg_ix_t nbbarc;
- cfg_ix_t nbbfin;
- cfg_ix_t nbbfall;
-
- explicit cfg_t(dfa_t &a);
- ~cfg_t();
- static tagver_t compact(const cfg_t &cfg, tagver_t *ver2new);
- static void liveness_analysis(const cfg_t &cfg, bool *live);
- static void live_through_bblock(const tcmd_t *cmd, bool *live);
- static void dead_code_elimination(cfg_t &cfg, const bool *live);
- static void interference(const cfg_t &cfg, const bool *live, bool *interf);
- static tagver_t variable_allocation(const cfg_t &cfg, const bool *interf, tagver_t *ver2new);
- static void renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver);
- static void normalization(cfg_t &cfg);
- FORBID_COPY(cfg_t);
+ dfa_t &dfa;
+ cfg_bb_t *bblocks;
+ cfg_ix_t nbbarc;
+ cfg_ix_t nbbfin;
+ cfg_ix_t nbbfall;
+
+ explicit cfg_t(dfa_t &a);
+ ~cfg_t();
+ static tagver_t compact(const cfg_t &cfg, tagver_t *ver2new);
+ static void liveness_analysis(const cfg_t &cfg, bool *live);
+ static void live_through_bblock(const tcmd_t *cmd, bool *live);
+ static void dead_code_elimination(cfg_t &cfg, const bool *live);
+ static void interference(const cfg_t &cfg, const bool *live, bool *interf);
+ static tagver_t variable_allocation(const cfg_t &cfg, const bool *interf, tagver_t *ver2new);
+ static void renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver);
+ static void normalization(cfg_t &cfg);
+ FORBID_COPY(cfg_t);
};
void dump_cfg(const cfg_t &cfg, const bool *live);
tagver_t cfg_t::compact(const cfg_t &cfg, tagver_t *ver2new)
{
- const std::vector<Tag> &tags = cfg.dfa.tags;
- const size_t
- nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1,
- ntag = tags.size();
- const tagver_t *fins = cfg.dfa.finvers;
- bool *used = new bool[nver];
-
- std::fill(used, used + nver, false);
- for (size_t t = 0; t < ntag; ++t) {
- const tagver_t f = fins[t];
- used[f] = f != TAGVER_ZERO; // fixed tag or unreachable rule
- }
- for (size_t i = 0; i < cfg.nbbfall; ++i) {
- const cfg_bb_t &b = cfg.bblocks[i];
- for (const tcmd_t *p = b.cmd; p; p = p->next) {
- const tagver_t r = p->rhs;
- if (r != TAGVER_ZERO) {
- used[r] = true;
- }
- used[p->lhs] = true;
- }
- }
-
- tagver_t maxver = 0;
- for (size_t v = 0; v < nver; ++v) {
- ver2new[v] = used[v] ? ++maxver : TAGVER_ZERO;
- }
-
- delete[] used;
- return maxver;
+ const std::vector<Tag> &tags = cfg.dfa.tags;
+ const size_t
+ nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1,
+ ntag = tags.size();
+ const tagver_t *fins = cfg.dfa.finvers;
+ bool *used = new bool[nver];
+
+ std::fill(used, used + nver, false);
+ for (size_t t = 0; t < ntag; ++t) {
+ const tagver_t f = fins[t];
+ used[f] = f != TAGVER_ZERO; // fixed tag or unreachable rule
+ }
+ for (size_t i = 0; i < cfg.nbbfall; ++i) {
+ const cfg_bb_t &b = cfg.bblocks[i];
+ for (const tcmd_t *p = b.cmd; p; p = p->next) {
+ const tagver_t r = p->rhs;
+ if (r != TAGVER_ZERO) {
+ used[r] = true;
+ }
+ used[p->lhs] = true;
+ }
+ }
+
+ tagver_t maxver = 0;
+ for (size_t v = 0; v < nver; ++v) {
+ ver2new[v] = used[v] ? ++maxver : TAGVER_ZERO;
+ }
+
+ delete[] used;
+ return maxver;
}
} // namespace re2c
void cfg_t::dead_code_elimination(cfg_t &cfg, const bool *live)
{
- const tagver_t nver = cfg.dfa.maxtagver + 1;
- // final and fallback tags can't be dead by construction
- cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbarc;
+ const tagver_t nver = cfg.dfa.maxtagver + 1;
+ // final and fallback tags can't be dead by construction
+ cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbarc;
- // ignore possible local liveness inside of bblock:
- // by construction we have no versions local to bblock
- for (; b < e; ++b, live += nver) {
- for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) {
- if (!live[p->lhs]) {
- *pp = p->next;
- } else {
- pp = &p->next;
- }
- }
- }
+ // ignore possible local liveness inside of bblock:
+ // by construction we have no versions local to bblock
+ for (; b < e; ++b, live += nver) {
+ for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) {
+ if (!live[p->lhs]) {
+ *pp = p->next;
+ } else {
+ pp = &p->next;
+ }
+ }
+ }
}
} // namespace re2c
void dump_cfg(const cfg_t &cfg, const bool *live)
{
- const tagver_t nver = cfg.dfa.maxtagver + 1;
+ const tagver_t nver = cfg.dfa.maxtagver + 1;
- fprintf(stderr, "digraph CFG {\n"
- " rankdir=LR\n"
- " node[shape=Mrecord fontname=Terminus height=0.2 width=0.2]\n"
- " edge[arrowhead=vee fontname=Terminus]\n\n");
+ fprintf(stderr, "digraph CFG {\n"
+ " rankdir=LR\n"
+ " node[shape=Mrecord fontname=Terminus height=0.2 width=0.2]\n"
+ " edge[arrowhead=vee fontname=Terminus]\n\n");
- for (cfg_ix_t i = 0; i < cfg.nbbfall; ++i, live += nver) {
- const cfg_bb_t *b = cfg.bblocks + i;
+ for (cfg_ix_t i = 0; i < cfg.nbbfall; ++i, live += nver) {
+ const cfg_bb_t *b = cfg.bblocks + i;
- fprintf(stderr, " n%u [label=\"%u\\n", i, i);
- for (const tcmd_t *p = b->cmd; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
- if (tcmd_t::iscopy(p)) {
- fprintf(stderr, "%d=%d ", l, r);
- } else {
- fprintf(stderr, "%d", l);
- if (r != TAGVER_ZERO) {
- fprintf(stderr, "=%d", r);
- }
- for (; *h != TAGVER_ZERO; ++h) {
- fprintf(stderr, "%s ", *h == TAGVER_BOTTOM ? "↓" : "↑");
- }
- }
- }
- fprintf(stderr, "/");
- if (b->rule) {
- for (size_t t = b->rule->ltag; t < b->rule->htag; ++t) {
- const tagver_t v = cfg.dfa.finvers[t];
- if (v != TAGVER_ZERO) {
- fprintf(stderr, "%i ", v);
- }
- }
- }
+ fprintf(stderr, " n%u [label=\"%u\\n", i, i);
+ for (const tcmd_t *p = b->cmd; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+ if (tcmd_t::iscopy(p)) {
+ fprintf(stderr, "%d=%d ", l, r);
+ } else {
+ fprintf(stderr, "%d", l);
+ if (r != TAGVER_ZERO) {
+ fprintf(stderr, "=%d", r);
+ }
+ for (; *h != TAGVER_ZERO; ++h) {
+ fprintf(stderr, "%s ", *h == TAGVER_BOTTOM ? "↓" : "↑");
+ }
+ }
+ }
+ fprintf(stderr, "/");
+ if (b->rule) {
+ for (size_t t = b->rule->ltag; t < b->rule->htag; ++t) {
+ const tagver_t v = cfg.dfa.finvers[t];
+ if (v != TAGVER_ZERO) {
+ fprintf(stderr, "%i ", v);
+ }
+ }
+ }
- if (i < cfg.nbbfin) {
- fprintf(stderr, "\\nneed:");
- for (tagver_t v = 0; v < nver; ++v) {
- if (live[v]) {
- fprintf(stderr, " %i", v);
- }
- }
- }
+ if (i < cfg.nbbfin) {
+ fprintf(stderr, "\\nneed:");
+ for (tagver_t v = 0; v < nver; ++v) {
+ if (live[v]) {
+ fprintf(stderr, " %i", v);
+ }
+ }
+ }
- fprintf(stderr, "\"]\n");
+ fprintf(stderr, "\"]\n");
- const char *style = b->rule ? "dotted" : "solid";
- for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
- fprintf(stderr, " n%u -> n%u [style=%s]\n", i, *j, style);
- }
- }
+ const char *style = b->rule ? "dotted" : "solid";
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ fprintf(stderr, " n%u -> n%u [style=%s]\n", i, *j, style);
+ }
+ }
- fprintf(stderr, "}\n");
+ fprintf(stderr, "}\n");
}
void dump_interf(const cfg_t &cfg, const bool *interf)
{
- const tagver_t nver = cfg.dfa.maxtagver + 1;
- for (tagver_t y = 1; y < nver; ++y) {
- fprintf(stderr, "%2d ", y);
- }
- fprintf(stderr, "\n");
- for (tagver_t x = 1; x < nver; ++x) {
- for (tagver_t y = 1; y < nver; ++y) {
- fprintf(stderr, "%2c ", interf[x * nver + y] ? '*' : '.');
- }
- fprintf(stderr, "\n");
- }
+ const tagver_t nver = cfg.dfa.maxtagver + 1;
+ for (tagver_t y = 1; y < nver; ++y) {
+ fprintf(stderr, "%2d ", y);
+ }
+ fprintf(stderr, "\n");
+ for (tagver_t x = 1; x < nver; ++x) {
+ for (tagver_t y = 1; y < nver; ++y) {
+ fprintf(stderr, "%2c ", interf[x * nver + y] ? '*' : '.');
+ }
+ fprintf(stderr, "\n");
+ }
}
} // namespace re2c
*/
void freeze_tags(dfa_t &dfa)
{
- tcpool_t &pool = dfa.tcpool;
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
-
- dfa.tcid0 = pool.insert(dfa.tcmd0);
- dfa.tcmd0 = NULL;
-
- for (size_t i = 0; i < nstate; ++i) {
- dfa_state_t *s = dfa.states[i];
- tcmd_t **cmd = s->tcmd,
- **const fin = cmd + nsym,
- **const fall = fin + 1;
- tcid_t *id = s->tcid = new tcid_t[nsym + 2];
-
- // transition commands
- for(; cmd < fin; ++cmd) {
- *id++ = pool.insert(*cmd);
- }
-
- // final epsilon-transition command
- *id++ = pool.insert(*fin);
-
- // fallback epsilon-transition command
- *id++ = pool.insert(*fall);
-
- delete[] s->tcmd;
- s->tcmd = NULL;
- }
+ tcpool_t &pool = dfa.tcpool;
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ dfa.tcid0 = pool.insert(dfa.tcmd0);
+ dfa.tcmd0 = NULL;
+
+ for (size_t i = 0; i < nstate; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ tcmd_t **cmd = s->tcmd,
+ **const fin = cmd + nsym,
+ **const fall = fin + 1;
+ tcid_t *id = s->tcid = new tcid_t[nsym + 2];
+
+ // transition commands
+ for(; cmd < fin; ++cmd) {
+ *id++ = pool.insert(*cmd);
+ }
+
+ // final epsilon-transition command
+ *id++ = pool.insert(*fin);
+
+ // fallback epsilon-transition command
+ *id++ = pool.insert(*fall);
+
+ delete[] s->tcmd;
+ s->tcmd = NULL;
+ }
}
} // namespace re2c
void cfg_t::interference(const cfg_t &cfg, const bool *live, bool *interf)
{
- const tagver_t maxver = cfg.dfa.maxtagver + 1;
- const size_t nver = static_cast<size_t>(maxver);
- bool *buf = new bool[nver];
- vals_t *vals = new vals_t[nver]();
- const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfin;
+ const tagver_t maxver = cfg.dfa.maxtagver + 1;
+ const size_t nver = static_cast<size_t>(maxver);
+ bool *buf = new bool[nver];
+ vals_t *vals = new vals_t[nver]();
+ const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfin;
- memset(interf, 0, nver * nver * sizeof(bool));
- for (; b < e; ++b, live += nver) {
- interfere(b->cmd, live, interf, buf, vals, nver);
- }
+ memset(interf, 0, nver * nver * sizeof(bool));
+ for (; b < e; ++b, live += nver) {
+ interfere(b->cmd, live, interf, buf, vals, nver);
+ }
- // versions of tags with/without history interfere
- std::set<tagver_t> &mt = cfg.dfa.mtagvers;
- for (std::set<tagver_t>::iterator i = mt.begin(); i != mt.end(); ++i) {
- for (tagver_t u = *i, v = 0; v < maxver; ++v) {
- if (mt.find(v) == mt.end()) {
- interf[v * maxver + u] = interf[u * maxver + v] = true;
- }
- }
- }
+ // versions of tags with/without history interfere
+ std::set<tagver_t> &mt = cfg.dfa.mtagvers;
+ for (std::set<tagver_t>::iterator i = mt.begin(); i != mt.end(); ++i) {
+ for (tagver_t u = *i, v = 0; v < maxver; ++v) {
+ if (mt.find(v) == mt.end()) {
+ interf[v * maxver + u] = interf[u * maxver + v] = true;
+ }
+ }
+ }
- delete[] buf;
- delete[] vals;
+ delete[] buf;
+ delete[] vals;
}
void interfere(const tcmd_t *cmd, const bool *live, bool *interf,
- bool *buf, vals_t *vals, size_t nver)
+ bool *buf, vals_t *vals, size_t nver)
{
- // initialize value of RHS for all commands in this basic block
- for (const tcmd_t *p = cmd; p; p = p->next) {
- const tagver_t r = p->rhs;
- if (r != TAGVER_ZERO) {
- vals[r].clear();
- vals[r].push_back(r);
- }
- }
+ // initialize value of RHS for all commands in this basic block
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ const tagver_t r = p->rhs;
+ if (r != TAGVER_ZERO) {
+ vals[r].clear();
+ vals[r].push_back(r);
+ }
+ }
- // find interference list for LHS of each command
- for (const tcmd_t *p = cmd; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
- vals_t &vl = vals[l], &vr = vals[r];
+ // find interference list for LHS of each command
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+ vals_t &vl = vals[l], &vr = vals[r];
- // alive after this command
- memcpy(buf, live, nver * sizeof(bool));
- cfg_t::live_through_bblock(p->next, buf);
+ // alive after this command
+ memcpy(buf, live, nver * sizeof(bool));
+ cfg_t::live_through_bblock(p->next, buf);
- // if copy command, exclude RHS
- if (tcmd_t::iscopy(p)) buf[r] = false;
+ // if copy command, exclude RHS
+ if (tcmd_t::iscopy(p)) buf[r] = false;
- // update value of current command's LHS
- if (tcmd_t::iscopy(p)) {
- vl = vr;
- } else if (tcmd_t::isset(p)) {
- vl.clear();
- vl.push_back(*h);
- } else {
- if (l != r) vl = vr;
- for (; *++h != TAGVER_ZERO;); // history is reversed
- for (; h-- != p->history;) {
- vl.push_back(*h);
- }
- }
- // Exclude from interference list all LHS from preceding commands
- // which value is equal to current LHS value. Subsequent commands
- // are ignored: if subsequent command that sets LHS to the same value
- // precedes any use of it, liveness propagation through basic block
- // would mark this LHS as dead and not interfering anyway; otherwise
- // (if use precedes setting to the same value), then it indeed
- // interferes with current LHS.
- for (const tcmd_t *q = cmd; q != p; q = q->next) {
- if (vals[q->lhs] == vl) {
- buf[q->lhs] = false;
- }
- }
+ // update value of current command's LHS
+ if (tcmd_t::iscopy(p)) {
+ vl = vr;
+ } else if (tcmd_t::isset(p)) {
+ vl.clear();
+ vl.push_back(*h);
+ } else {
+ if (l != r) vl = vr;
+ for (; *++h != TAGVER_ZERO;); // history is reversed
+ for (; h-- != p->history;) {
+ vl.push_back(*h);
+ }
+ }
+ // Exclude from interference list all LHS from preceding commands
+ // which value is equal to current LHS value. Subsequent commands
+ // are ignored: if subsequent command that sets LHS to the same value
+ // precedes any use of it, liveness propagation through basic block
+ // would mark this LHS as dead and not interfering anyway; otherwise
+ // (if use precedes setting to the same value), then it indeed
+ // interferes with current LHS.
+ for (const tcmd_t *q = cmd; q != p; q = q->next) {
+ if (vals[q->lhs] == vl) {
+ buf[q->lhs] = false;
+ }
+ }
- const size_t u = static_cast<size_t>(l);
- for (size_t v = 0; v < nver; ++v) {
- if (!buf[v]) continue;
- interf[u * nver + v] = interf[v * nver + u] = true;
- }
- }
+ const size_t u = static_cast<size_t>(l);
+ for (size_t v = 0; v < nver; ++v) {
+ if (!buf[v]) continue;
+ interf[u * nver + v] = interf[v * nver + u] = true;
+ }
+ }
}
} // namespace re2c
// calculates deep-first search postorder of transition nodes,
// skips final and fallback nodes (they have no successors anyway)
static cfg_ix_t *postorder(const cfg_t &cfg, bool *done,
- cfg_ix_t *ord, cfg_ix_t i)
+ cfg_ix_t *ord, cfg_ix_t i)
{
- if (i >= cfg.nbbarc || done[i]) return ord;
- done[i] = true;
+ if (i >= cfg.nbbarc || done[i]) return ord;
+ done[i] = true;
- const cfg_bb_t *b = cfg.bblocks + i;
- for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
- ord = postorder(cfg, done, ord, *j);
- }
+ const cfg_bb_t *b = cfg.bblocks + i;
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ ord = postorder(cfg, done, ord, *j);
+ }
- *ord = i;
- return ++ord;
+ *ord = i;
+ return ++ord;
}
void cfg_t::live_through_bblock(const tcmd_t *cmd, bool *live)
{
- if (!cmd) return;
-
- live_through_bblock(cmd->next, live);
-
- const tagver_t l = cmd->lhs, r = cmd->rhs;
- if (live[l]) {
- // first reset, than set: LHS might be equal to history
- live[l] = false;
- if (r != TAGVER_ZERO) {
- live[r] = true;
- }
- }
+ if (!cmd) return;
+
+ live_through_bblock(cmd->next, live);
+
+ const tagver_t l = cmd->lhs, r = cmd->rhs;
+ if (live[l]) {
+ // first reset, than set: LHS might be equal to history
+ live[l] = false;
+ if (r != TAGVER_ZERO) {
+ live[r] = true;
+ }
+ }
}
void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
{
- const std::vector<Tag> &tags = cfg.dfa.tags;
- const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
- const cfg_ix_t
- narc = cfg.nbbarc,
- nfin = cfg.nbbfin;
- const tagver_t *fins = cfg.dfa.finvers;
- bool *buf1 = new bool[nver];
- bool *buf2 = new bool[nver];
- bool *done = new bool[narc];
- cfg_ix_t *pord = new cfg_ix_t[narc];
-
- /* note [control flow equations for tag liveness]
- *
- * Liveness in bblock B is given by control flow equations:
- * live-out(B) = union of live-in(C), for all successors C
- * live-in(B) = live-out(B) except defined(B)
- * Equations are solved by iteration until fix point.
- *
- * Live set can only grow on each iteration, it never shrinks.
- * Initially all final tag versions used in rules are alive;
- * we pre-calculate them and then only update table by adding
- * new versions.
- */
-
- memset(live, 0, nfin * nver * sizeof(bool));
- for (cfg_ix_t i = narc; i < nfin; ++i) {
- const cfg_bb_t *b = cfg.bblocks + i;
- const Rule *r = b->rule;
- bool *l = &live[i * nver];
-
- // all final bblocks have USE tags, but no successors
- assert(r && b->succb == b->succe);
-
- for (size_t t = r->ltag; t < r->htag; ++t) {
- l[fins[t]] = !fixed(tags[t]);
- }
- }
-
- memset(done, 0, narc * sizeof(bool));
- postorder(cfg, done, pord, 0);
-
- for (bool loop = true; loop;) {
- loop = false;
-
- // iterate nodes in postorder
- for (cfg_ix_t a = 0; a < narc; ++a) {
- const cfg_ix_t i = pord[a];
- const cfg_bb_t *b = cfg.bblocks + i;
- bool *old = &live[i * nver];
-
- // transition bblocks have no USE tags
- assert(!b->rule);
-
- memcpy(buf1, old, nver * sizeof(bool));
- for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
- const bool *l = &live[*j * nver];
- const tcmd_t *cmd = cfg.bblocks[*j].cmd;
- memcpy(buf2, l, nver * sizeof(bool));
-
- cfg_t::live_through_bblock(cmd, buf2);
-
- for (size_t v = 0; v < nver; ++v) {
- buf1[v] |= buf2[v];
- }
- }
-
- if (memcmp(old, buf1, nver * sizeof(bool)) != 0) {
- memcpy(old, buf1, nver * sizeof(bool));
- loop = true;
- }
- }
- }
-
- /* note [fallback tag liveness]
- *
- * Liveness of fallback tag is propagated forward from fallback
- * state (see note [fallback states]) and until there remain
- * any fallthrough paths from current state.
- *
- * Fallback version of tag is either backup copy of tag's final
- * version, or (if there's no backup) the final version itself.
- * Absence of backup means that final version is not overwritten,
- * but still we should prevent it from merging with other tags
- * (otherwise it may become overwritten).
- */
- for (cfg_ix_t i = nfin; i < cfg.nbbfall; ++i) {
- const cfg_bb_t *b = cfg.bblocks + i;
- const Rule *r = b->rule;
-
- // all fallback bblocks have USE tags
- assert(r);
-
- memset(buf1, 0, nver * sizeof(bool));
- for (size_t t = r->ltag; t < r->htag; ++t) {
- buf1[fins[t]] = !fixed(tags[t]);
- }
-
- // need two passes: same version may occur as both LHS and RHS
- // not the same as backward propagation of liveness through bblock
- for (const tcmd_t *p = b->cmd; p; p = p->next) {
- buf1[p->lhs] = false;
- }
- for (const tcmd_t *p = b->cmd; p; p = p->next) {
- const tagver_t v = p->rhs;
- if (v != TAGVER_ZERO) {
- buf1[v] = true;
- }
- }
-
- for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
- bool *liv = &live[*j * nver];
- for (size_t v = 0; v < nver; ++v) {
- liv[v] |= buf1[v];
- }
- }
- }
-
- delete[] buf1;
- delete[] buf2;
- delete[] done;
- delete[] pord;
+ const std::vector<Tag> &tags = cfg.dfa.tags;
+ const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
+ const cfg_ix_t
+ narc = cfg.nbbarc,
+ nfin = cfg.nbbfin;
+ const tagver_t *fins = cfg.dfa.finvers;
+ bool *buf1 = new bool[nver];
+ bool *buf2 = new bool[nver];
+ bool *done = new bool[narc];
+ cfg_ix_t *pord = new cfg_ix_t[narc];
+
+ /* note [control flow equations for tag liveness]
+ *
+ * Liveness in bblock B is given by control flow equations:
+ * live-out(B) = union of live-in(C), for all successors C
+ * live-in(B) = live-out(B) except defined(B)
+ * Equations are solved by iteration until fix point.
+ *
+ * Live set can only grow on each iteration, it never shrinks.
+ * Initially all final tag versions used in rules are alive;
+ * we pre-calculate them and then only update table by adding
+ * new versions.
+ */
+
+ memset(live, 0, nfin * nver * sizeof(bool));
+ for (cfg_ix_t i = narc; i < nfin; ++i) {
+ const cfg_bb_t *b = cfg.bblocks + i;
+ const Rule *r = b->rule;
+ bool *l = &live[i * nver];
+
+ // all final bblocks have USE tags, but no successors
+ assert(r && b->succb == b->succe);
+
+ for (size_t t = r->ltag; t < r->htag; ++t) {
+ l[fins[t]] = !fixed(tags[t]);
+ }
+ }
+
+ memset(done, 0, narc * sizeof(bool));
+ postorder(cfg, done, pord, 0);
+
+ for (bool loop = true; loop;) {
+ loop = false;
+
+ // iterate nodes in postorder
+ for (cfg_ix_t a = 0; a < narc; ++a) {
+ const cfg_ix_t i = pord[a];
+ const cfg_bb_t *b = cfg.bblocks + i;
+ bool *old = &live[i * nver];
+
+ // transition bblocks have no USE tags
+ assert(!b->rule);
+
+ memcpy(buf1, old, nver * sizeof(bool));
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ const bool *l = &live[*j * nver];
+ const tcmd_t *cmd = cfg.bblocks[*j].cmd;
+ memcpy(buf2, l, nver * sizeof(bool));
+
+ cfg_t::live_through_bblock(cmd, buf2);
+
+ for (size_t v = 0; v < nver; ++v) {
+ buf1[v] |= buf2[v];
+ }
+ }
+
+ if (memcmp(old, buf1, nver * sizeof(bool)) != 0) {
+ memcpy(old, buf1, nver * sizeof(bool));
+ loop = true;
+ }
+ }
+ }
+
+ /* note [fallback tag liveness]
+ *
+ * Liveness of fallback tag is propagated forward from fallback
+ * state (see note [fallback states]) and until there remain
+ * any fallthrough paths from current state.
+ *
+ * Fallback version of tag is either backup copy of tag's final
+ * version, or (if there's no backup) the final version itself.
+ * Absence of backup means that final version is not overwritten,
+ * but still we should prevent it from merging with other tags
+ * (otherwise it may become overwritten).
+ */
+ for (cfg_ix_t i = nfin; i < cfg.nbbfall; ++i) {
+ const cfg_bb_t *b = cfg.bblocks + i;
+ const Rule *r = b->rule;
+
+ // all fallback bblocks have USE tags
+ assert(r);
+
+ memset(buf1, 0, nver * sizeof(bool));
+ for (size_t t = r->ltag; t < r->htag; ++t) {
+ buf1[fins[t]] = !fixed(tags[t]);
+ }
+
+ // need two passes: same version may occur as both LHS and RHS
+ // not the same as backward propagation of liveness through bblock
+ for (const tcmd_t *p = b->cmd; p; p = p->next) {
+ buf1[p->lhs] = false;
+ }
+ for (const tcmd_t *p = b->cmd; p; p = p->next) {
+ const tagver_t v = p->rhs;
+ if (v != TAGVER_ZERO) {
+ buf1[v] = true;
+ }
+ }
+
+ for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
+ bool *liv = &live[*j * nver];
+ for (size_t v = 0; v < nver; ++v) {
+ liv[v] |= buf1[v];
+ }
+ }
+ }
+
+ delete[] buf1;
+ delete[] buf2;
+ delete[] done;
+ delete[] pord;
}
} // namespace re2c
void cfg_t::normalization(cfg_t &cfg)
{
- const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
- uint32_t *indeg = new uint32_t[nver];
- memset(indeg, 0, nver * sizeof(uint32_t));
-
- cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
- for (; b < e; ++b) {
-
- // We cannot normalize the list of commands as a whole: the
- // relative order of some commands might be significant.
- // Therefore we split the list in continuous sublists of
- // 'copy', 'save without history' and 'save with history'
- // commands and normalize each sublist in a proper way.
- tcmd_t **px, *x;
- for (px = &b->cmd; (x = *px);) {
- if (tcmd_t::iscopy(x)) {
- for (x = *px; x && tcmd_t::iscopy(x); x = x->next);
- *normalize(px, x) = NULL; // topsort expects NULL terminator
- tcmd_t::topsort(px, indeg);
- for (; *px; px = &(*px)->next); // find tail
- *px = x; // restore tail
- } else if (tcmd_t::isset(x)) {
- for (x = *px; x && tcmd_t::isset(x); x = x->next);
- px = normalize(px, x);
- } else {
- for (; (x = *px) && tcmd_t::isadd(x); px = &x->next);
- // don't normalize, histories may have complex dependencies
- }
- }
- }
-
- delete[] indeg;
+ const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
+ uint32_t *indeg = new uint32_t[nver];
+ memset(indeg, 0, nver * sizeof(uint32_t));
+
+ cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
+ for (; b < e; ++b) {
+
+ // We cannot normalize the list of commands as a whole: the
+ // relative order of some commands might be significant.
+ // Therefore we split the list in continuous sublists of
+ // 'copy', 'save without history' and 'save with history'
+ // commands and normalize each sublist in a proper way.
+ tcmd_t **px, *x;
+ for (px = &b->cmd; (x = *px);) {
+ if (tcmd_t::iscopy(x)) {
+ for (x = *px; x && tcmd_t::iscopy(x); x = x->next);
+ *normalize(px, x) = NULL; // topsort expects NULL terminator
+ tcmd_t::topsort(px, indeg);
+ for (; *px; px = &(*px)->next); // find tail
+ *px = x; // restore tail
+ } else if (tcmd_t::isset(x)) {
+ for (x = *px; x && tcmd_t::isset(x); x = x->next);
+ px = normalize(px, x);
+ } else {
+ for (; (x = *px) && tcmd_t::isadd(x); px = &x->next);
+ // don't normalize, histories may have complex dependencies
+ }
+ }
+ }
+
+ delete[] indeg;
}
static void swap(tcmd_t &x, tcmd_t &y)
{
- assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y));
- std::swap(x.lhs, y.lhs);
- std::swap(x.rhs, y.rhs);
- std::swap(x.history[0], y.history[0]);
+ assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y));
+ std::swap(x.lhs, y.lhs);
+ std::swap(x.rhs, y.rhs);
+ std::swap(x.history[0], y.history[0]);
}
static bool less(const tcmd_t &x, const tcmd_t &y)
{
- assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y));
- tagver_t u, v;
+ assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y));
+ tagver_t u, v;
- u = x.lhs; v = y.lhs;
- if (u < v) return true;
- if (u > v) return false;
+ u = x.lhs; v = y.lhs;
+ if (u < v) return true;
+ if (u > v) return false;
- u = x.rhs; v = y.rhs;
- if (u < v) return true;
- if (u > v) return false;
+ u = x.rhs; v = y.rhs;
+ if (u < v) return true;
+ if (u > v) return false;
- u = x.history[0]; v = y.history[0];
- if (u < v) return true;
- if (u > v) return false;
+ u = x.history[0]; v = y.history[0];
+ if (u < v) return true;
+ if (u > v) return false;
- return false;
+ return false;
}
tcmd_t **normalize(tcmd_t **ps, tcmd_t *e)
{
- // sort lexicographically
- for (tcmd_t *p = *ps; p != e; p = p->next) {
- for (tcmd_t *q = p->next; q != e; q = q->next) {
- if (less(*q, *p)) {
- swap(*p, *q);
- }
- }
- }
-
- // delete duplicates
- for (tcmd_t *p = *ps; p != e;) {
- tcmd_t *q = p->next;
- if (q != e && tcmd_t::equal(*p, *q)) {
- p->next = q->next;
- } else {
- p = q;
- }
- }
-
- for (; *ps != e; ps = &(*ps)->next);
- return ps;
+ // sort lexicographically
+ for (tcmd_t *p = *ps; p != e; p = p->next) {
+ for (tcmd_t *q = p->next; q != e; q = q->next) {
+ if (less(*q, *p)) {
+ swap(*p, *q);
+ }
+ }
+ }
+
+ // delete duplicates
+ for (tcmd_t *p = *ps; p != e;) {
+ tcmd_t *q = p->next;
+ if (q != e && tcmd_t::equal(*p, *q)) {
+ p->next = q->next;
+ } else {
+ p = q;
+ }
+ }
+
+ for (; *ps != e; ps = &(*ps)->next);
+ return ps;
}
} // namespace re2c
void compact_and_optimize_tags(dfa_t &dfa, bool optimize)
{
- tagver_t maxver = dfa.maxtagver;
- if (maxver > 0) {
- cfg_t cfg(dfa);
-
- size_t nver = static_cast<size_t>(maxver) + 1;
- tagver_t *ver2new = new tagver_t[nver];
-
- maxver = cfg_t::compact(cfg, ver2new);
- cfg_t::renaming(cfg, ver2new, maxver);
-
- if (optimize && maxver > 0) {
- nver = static_cast<size_t>(maxver) + 1;
- bool *live = new bool[cfg.nbbfin * nver];
- bool *interf = new bool[nver * nver];
-
- static const uint32_t NPASS = 2;
- for (uint32_t n = 0; n < NPASS; ++n) {
- cfg_t::liveness_analysis(cfg, live);
- cfg_t::dead_code_elimination(cfg, live);
- cfg_t::interference(cfg, live, interf);
- maxver = cfg_t::variable_allocation(cfg, interf, ver2new);
- cfg_t::renaming(cfg, ver2new, maxver);
- cfg_t::normalization(cfg);
- }
-
- delete[] live;
- delete[] interf;
- }
-
- delete[] ver2new;
- }
+ tagver_t maxver = dfa.maxtagver;
+ if (maxver > 0) {
+ cfg_t cfg(dfa);
+
+ size_t nver = static_cast<size_t>(maxver) + 1;
+ tagver_t *ver2new = new tagver_t[nver];
+
+ maxver = cfg_t::compact(cfg, ver2new);
+ cfg_t::renaming(cfg, ver2new, maxver);
+
+ if (optimize && maxver > 0) {
+ nver = static_cast<size_t>(maxver) + 1;
+ bool *live = new bool[cfg.nbbfin * nver];
+ bool *interf = new bool[nver * nver];
+
+ static const uint32_t NPASS = 2;
+ for (uint32_t n = 0; n < NPASS; ++n) {
+ cfg_t::liveness_analysis(cfg, live);
+ cfg_t::dead_code_elimination(cfg, live);
+ cfg_t::interference(cfg, live, interf);
+ maxver = cfg_t::variable_allocation(cfg, interf, ver2new);
+ cfg_t::renaming(cfg, ver2new, maxver);
+ cfg_t::normalization(cfg);
+ }
+
+ delete[] live;
+ delete[] interf;
+ }
+
+ delete[] ver2new;
+ }
}
} // namespace re2c
void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver)
{
- tagver_t &oldmax = cfg.dfa.maxtagver;
- if (oldmax == maxver) return;
- oldmax = maxver;
-
- cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall;
- for (; b < be; ++b) {
- for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) {
- tagver_t &l = p->lhs, &r = p->rhs, h = p->history[0];
-
- l = ver2new[l];
- if (r != TAGVER_ZERO) {
- r = ver2new[r];
- }
- if (l == r && h == TAGVER_ZERO) {
- *pp = p->next;
- } else {
- pp = &p->next;
- }
- }
- }
-
- // final tag versions
- tagver_t *fins = cfg.dfa.finvers;
- const std::vector<Tag> &tags = cfg.dfa.tags;
- for (size_t t = 0; t < tags.size(); ++t) {
- tagver_t &f = fins[t];
- if (f != TAGVER_ZERO) { // fixed tag or unreachable rule
- f = ver2new[f];
- }
- }
-
- // versions of tags with history
- std::set<tagver_t> newmt, &oldmt = cfg.dfa.mtagvers;
- for (std::set<tagver_t>::iterator i = oldmt.begin(); i != oldmt.end(); ++i) {
- newmt.insert(ver2new[*i]);
- }
- oldmt.swap(newmt);
+ tagver_t &oldmax = cfg.dfa.maxtagver;
+ if (oldmax == maxver) return;
+ oldmax = maxver;
+
+ cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall;
+ for (; b < be; ++b) {
+ for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) {
+ tagver_t &l = p->lhs, &r = p->rhs, h = p->history[0];
+
+ l = ver2new[l];
+ if (r != TAGVER_ZERO) {
+ r = ver2new[r];
+ }
+ if (l == r && h == TAGVER_ZERO) {
+ *pp = p->next;
+ } else {
+ pp = &p->next;
+ }
+ }
+ }
+
+ // final tag versions
+ tagver_t *fins = cfg.dfa.finvers;
+ const std::vector<Tag> &tags = cfg.dfa.tags;
+ for (size_t t = 0; t < tags.size(); ++t) {
+ tagver_t &f = fins[t];
+ if (f != TAGVER_ZERO) { // fixed tag or unreachable rule
+ f = ver2new[f];
+ }
+ }
+
+ // versions of tags with history
+ std::set<tagver_t> newmt, &oldmt = cfg.dfa.mtagvers;
+ for (std::set<tagver_t>::iterator i = oldmt.begin(); i != oldmt.end(); ++i) {
+ newmt.insert(ver2new[*i]);
+ }
+ oldmt.swap(newmt);
}
} // namespace re2c
* The algorithm takes quadratic (in the number of tags) time.
*/
tagver_t cfg_t::variable_allocation(const cfg_t &cfg, const bool *interf,
- tagver_t *ver2new)
+ tagver_t *ver2new)
{
- const tagver_t
- END = std::numeric_limits<tagver_t>::max(),
- nver = cfg.dfa.maxtagver + 1;
- tagver_t *next = new tagver_t[nver]; // list of class members
- tagver_t *repr = new tagver_t[nver]; // maps tag to class representative
- tagver_t rx, ry, x, y, z;
-
- std::fill(next, next + nver, END);
- std::fill(repr, repr + nver, END);
-
- // copy coalescing: for each command X = Y, try to merge X and Y
- const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
- for (; b < e; ++b) {
- for (const tcmd_t *p = b->cmd; p; p = p->next) {
- x = p->lhs;
- y = p->rhs;
- if (y == TAGVER_ZERO || y == x) continue;
-
- rx = repr[x];
- ry = repr[y];
-
- if (rx != END) {
- if (ry != END) continue;
- for (z = rx; z != END; z = next[z]) {
- if (interf[z * nver + y]) break;
- }
- if (z == END) {
- repr[y] = rx;
- next[y] = next[rx];
- next[rx] = y;
- }
- } else if (ry != END) {
- for (z = ry; z != END; z = next[z]) {
- if (interf[z * nver + x]) break;
- }
- if (z == END) {
- repr[x] = ry;
- next[x] = next[ry];
- next[ry] = x;
- }
- } else if (!interf[x * nver + y]) {
- repr[x] = repr[y] = x;
- next[x] = y;
- }
- }
- }
-
- // try to merge equivalence classes left after copy coalescing
- for (rx = 0; rx < nver; ++rx) {
- if (rx != repr[rx]) continue;
-
- for (ry = rx + 1; ry < nver; ++ry) {
- if (ry != repr[ry]) continue;
-
- for (x = rx; x != END; x = next[x]) {
- for (y = ry; y != END; y = next[y]) {
- if (interf[x * nver + y]) break;
- }
- if (y != END) break;
- }
-
- if (x == END) {
- for (y = ry;; y = next[y]) {
- repr[y] = rx;
- if (next[y] == END) {
- next[y] = next[rx];
- next[rx] = ry;
- break;
- }
- }
- }
- }
- }
-
- // push each remaining tag to any non-interfering class
- for (x = 0; x < nver; ++x) {
- if (repr[x] != END) continue;
-
- // try all existing classes
- for (rx = 0; rx < nver; ++rx) {
- if (rx != repr[rx]) continue;
-
- // check interference with class members
- for (y = rx; y != END; y = next[y]) {
- if (interf[x * nver + y]) break;
- }
-
- // no interference; add to class
- if (y == END) {
- repr[x] = rx;
- next[x] = next[rx];
- next[rx] = x;
- break;
- }
- }
-
- // make new equivalence class
- if (rx == nver) {
- repr[x] = x;
- }
- }
-
- tagver_t maxver = 0;
- for (rx = 0; rx < nver; ++rx) {
- if (repr[rx] != rx) continue;
-
- ++maxver;
- for (x = rx; x != END; x = next[x]) {
- ver2new[x] = maxver;
- }
- }
-
- delete[] next;
- delete[] repr;
-
- return maxver;
+ const tagver_t
+ END = std::numeric_limits<tagver_t>::max(),
+ nver = cfg.dfa.maxtagver + 1;
+ tagver_t *next = new tagver_t[nver]; // list of class members
+ tagver_t *repr = new tagver_t[nver]; // maps tag to class representative
+ tagver_t rx, ry, x, y, z;
+
+ std::fill(next, next + nver, END);
+ std::fill(repr, repr + nver, END);
+
+ // copy coalescing: for each command X = Y, try to merge X and Y
+ const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
+ for (; b < e; ++b) {
+ for (const tcmd_t *p = b->cmd; p; p = p->next) {
+ x = p->lhs;
+ y = p->rhs;
+ if (y == TAGVER_ZERO || y == x) continue;
+
+ rx = repr[x];
+ ry = repr[y];
+
+ if (rx != END) {
+ if (ry != END) continue;
+ for (z = rx; z != END; z = next[z]) {
+ if (interf[z * nver + y]) break;
+ }
+ if (z == END) {
+ repr[y] = rx;
+ next[y] = next[rx];
+ next[rx] = y;
+ }
+ } else if (ry != END) {
+ for (z = ry; z != END; z = next[z]) {
+ if (interf[z * nver + x]) break;
+ }
+ if (z == END) {
+ repr[x] = ry;
+ next[x] = next[ry];
+ next[ry] = x;
+ }
+ } else if (!interf[x * nver + y]) {
+ repr[x] = repr[y] = x;
+ next[x] = y;
+ }
+ }
+ }
+
+ // try to merge equivalence classes left after copy coalescing
+ for (rx = 0; rx < nver; ++rx) {
+ if (rx != repr[rx]) continue;
+
+ for (ry = rx + 1; ry < nver; ++ry) {
+ if (ry != repr[ry]) continue;
+
+ for (x = rx; x != END; x = next[x]) {
+ for (y = ry; y != END; y = next[y]) {
+ if (interf[x * nver + y]) break;
+ }
+ if (y != END) break;
+ }
+
+ if (x == END) {
+ for (y = ry;; y = next[y]) {
+ repr[y] = rx;
+ if (next[y] == END) {
+ next[y] = next[rx];
+ next[rx] = ry;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // push each remaining tag to any non-interfering class
+ for (x = 0; x < nver; ++x) {
+ if (repr[x] != END) continue;
+
+ // try all existing classes
+ for (rx = 0; rx < nver; ++rx) {
+ if (rx != repr[rx]) continue;
+
+ // check interference with class members
+ for (y = rx; y != END; y = next[y]) {
+ if (interf[x * nver + y]) break;
+ }
+
+ // no interference; add to class
+ if (y == END) {
+ repr[x] = rx;
+ next[x] = next[rx];
+ next[rx] = x;
+ break;
+ }
+ }
+
+ // make new equivalence class
+ if (rx == nver) {
+ repr[x] = x;
+ }
+ }
+
+ tagver_t maxver = 0;
+ for (rx = 0; rx < nver; ++rx) {
+ if (repr[rx] != rx) continue;
+
+ ++maxver;
+ for (x = rx; x != END; x = next[x]) {
+ ver2new[x] = maxver;
+ }
+ }
+
+ delete[] next;
+ delete[] repr;
+
+ return maxver;
}
} // namespace re2c
void tagged_epsilon_closure(determ_context_t &ctx)
{
- closure_t &closure = ctx.dc_closure;
-
- // build tagged epsilon-closure of the given set of NFA states
- if (ctx.dc_opts->posix_captures) {
- closure_posix(ctx);
- prune(closure, ctx.dc_nfa.rules);
- std::sort(closure.begin(), closure.end(), cmpby_rule_state);
- orders(ctx);
- } else {
- closure_leftmost(ctx);
- prune(closure, ctx.dc_nfa.rules);
- }
-
- // see note [the difference between TDFA(0) and TDFA(1)]
- if (!ctx.dc_opts->lookahead) {
- lower_lookahead_to_transition(closure);
- }
-
- // merge tags from different rules, find nondeterministic tags
- generate_versions(ctx);
+ closure_t &closure = ctx.dc_closure;
+
+ // build tagged epsilon-closure of the given set of NFA states
+ if (ctx.dc_opts->posix_captures) {
+ closure_posix(ctx);
+ prune(closure, ctx.dc_nfa.rules);
+ std::sort(closure.begin(), closure.end(), cmpby_rule_state);
+ orders(ctx);
+ } else {
+ closure_leftmost(ctx);
+ prune(closure, ctx.dc_nfa.rules);
+ }
+
+ // see note [the difference between TDFA(0) and TDFA(1)]
+ if (!ctx.dc_opts->lookahead) {
+ lower_lookahead_to_transition(closure);
+ }
+
+ // merge tags from different rules, find nondeterministic tags
+ generate_versions(ctx);
}
bool cmpby_rule_state(const clos_t &x, const clos_t &y)
{
- const nfa_state_t *sx = x.state, *sy = y.state;
- const size_t rx = sx->rule, ry = sy->rule;
- if (rx < ry) return true;
- if (rx > ry) return false;
- if (sx < sy) return true;
- if (sx > sy) return false;
- // all items in closute have different states
- return false;
+ const nfa_state_t *sx = x.state, *sy = y.state;
+ const size_t rx = sx->rule, ry = sy->rule;
+ if (rx < ry) return true;
+ if (rx > ry) return false;
+ if (sx < sy) return true;
+ if (sx > sy) return false;
+ // all items in closute have different states
+ return false;
}
nfa_state_t *relax(determ_context_t &ctx, clos_t x)
{
- closure_t &done = ctx.dc_closure;
- nfa_state_t *q = x.state;
- const uint32_t idx = q->clos;
- int32_t h1, h2;
-
- // first time we see this state
- if (idx == NOCLOS) {
- q->clos = static_cast<uint32_t>(done.size());
- done.push_back(x);
- }
-
- // States of in-degree less than 2 are not joint points;
- // the fact that we are re-scanning this state means that we found
- // a better path to some previous state. Due to the right distributivity
- // of path comparison over path concatenation (X < Y => XZ < YZ) we
- // can just propagate the new path up to the next join point.
- else if (q->indeg < 2) {
- done[idx] = x;
- }
-
- // join point; compare the new path and the old path
- else if (precedence(ctx, x, done[idx], h1, h2) < 0) {
- done[idx] = x;
- }
-
- // the previous path was better, discard the new one
- else {
- q = NULL;
- }
-
- return q;
+ closure_t &done = ctx.dc_closure;
+ nfa_state_t *q = x.state;
+ const uint32_t idx = q->clos;
+ int32_t h1, h2;
+
+ // first time we see this state
+ if (idx == NOCLOS) {
+ q->clos = static_cast<uint32_t>(done.size());
+ done.push_back(x);
+ }
+
+ // States of in-degree less than 2 are not joint points;
+ // the fact that we are re-scanning this state means that we found
+ // a better path to some previous state. Due to the right distributivity
+ // of path comparison over path concatenation (X < Y => XZ < YZ) we
+ // can just propagate the new path up to the next join point.
+ else if (q->indeg < 2) {
+ done[idx] = x;
+ }
+
+ // join point; compare the new path and the old path
+ else if (precedence(ctx, x, done[idx], h1, h2) < 0) {
+ done[idx] = x;
+ }
+
+ // the previous path was better, discard the new one
+ else {
+ q = NULL;
+ }
+
+ return q;
}
nfa_state_t *explore(determ_context_t &ctx, nfa_state_t *q)
{
- // find the next admissible transition, adjust the index
- // of the next transition and return the to-state
- nfa_state_t *p = NULL;
- clos_t x = ctx.dc_closure[q->clos];
- switch (q->type) {
- case nfa_state_t::NIL:
- if (q->arcidx == 0) {
- x.state = q->nil.out;
- p = relax(ctx, x);
- ++q->arcidx;
- }
- break;
- case nfa_state_t::ALT:
- if (q->arcidx == 0) {
- x.state = q->alt.out1;
- p = relax(ctx, x);
- ++q->arcidx;
- }
- if (q->arcidx == 1 && !p) {
- x.state = q->alt.out2;
- p = relax(ctx, x);
- ++q->arcidx;
- }
- break;
- case nfa_state_t::TAG:
- if (q->arcidx == 0) {
- x.state = q->tag.out;
- x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info);
- p = relax(ctx, x);
- ++q->arcidx;
- }
- break;
- case nfa_state_t::RAN:
- case nfa_state_t::FIN:
- break;
- }
- return p;
+ // find the next admissible transition, adjust the index
+ // of the next transition and return the to-state
+ nfa_state_t *p = NULL;
+ clos_t x = ctx.dc_closure[q->clos];
+ switch (q->type) {
+ case nfa_state_t::NIL:
+ if (q->arcidx == 0) {
+ x.state = q->nil.out;
+ p = relax(ctx, x);
+ ++q->arcidx;
+ }
+ break;
+ case nfa_state_t::ALT:
+ if (q->arcidx == 0) {
+ x.state = q->alt.out1;
+ p = relax(ctx, x);
+ ++q->arcidx;
+ }
+ if (q->arcidx == 1 && !p) {
+ x.state = q->alt.out2;
+ p = relax(ctx, x);
+ ++q->arcidx;
+ }
+ break;
+ case nfa_state_t::TAG:
+ if (q->arcidx == 0) {
+ x.state = q->tag.out;
+ x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info);
+ p = relax(ctx, x);
+ ++q->arcidx;
+ }
+ break;
+ case nfa_state_t::RAN:
+ case nfa_state_t::FIN:
+ break;
+ }
+ return p;
}
void closure_posix(determ_context_t &ctx)
{
- const closure_t &init = ctx.dc_reached;
- closure_t &done = ctx.dc_closure;
- std::stack<nfa_state_t*>
- &topsort = ctx.dc_stack_topsort,
- &linear = ctx.dc_stack_linear;
- nfa_state_t *q, *p;
-
- done.clear();
-
- // enqueue all initial states (there might be duplicates)
- for (cclositer_t c = init.begin(); c != init.end(); ++c) {
- q = relax(ctx, *c);
- if (q) {
- topsort.push(q);
- q->status = GOR_TOPSORT;
- }
- }
-
- // Gordberg-Radzik 'shortest path' algorithm.
- // Papers: 1993, "A heuristic improvement of the Bellman-Ford
- // algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms:
- // Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik.
- // Complexity for digraph G=(V,E) is O(|V|*|E|).
- for (; !topsort.empty(); ) {
-
- // 1st pass: scan admissible subgraph reachable from B-stack
- // and topologically sort it (this can be done by a single
- // depth-first postorder traversal)
- for (; !topsort.empty(); ) {
- q = topsort.top();
- topsort.pop();
-
- if (q->status != GOR_LINEAR) {
- q->status = GOR_TOPSORT;
-
- // find next admissible transition
- while ((p = explore(ctx, q))
- && p->status != GOR_NOPASS) {
- p->active = 1;
- }
-
- // follow the admissible transition
- if (p) {
- topsort.push(q);
- topsort.push(p);
- p->arcidx = 0;
- }
- // done with this state: all deps visited
- else {
- q->status = GOR_LINEAR;
- linear.push(q);
- }
- }
- }
-
- // 2nd pass: scan topologically ordered states from A-stack
- // and push head states of relaxed transitions to B-stack
- for (; !linear.empty(); ) {
- q = linear.top();
- linear.pop();
-
- if (q->active) {
- // scan admissible transitions
- q->arcidx = 0;
- while ((p = explore(ctx, q))) {
- if (p->status == GOR_NOPASS) {
- topsort.push(p);
- p->arcidx = 0;
- }
- else if (p->status == GOR_LINEAR) {
- p->active = 1;
- }
- }
- }
-
- q->status = GOR_NOPASS;
- q->active = 0;
- }
- }
-
- // clean up (do this before removing any states from closure)
- for (clositer_t i = done.begin(); i != done.end(); ++i) {
- q = i->state;
- q->clos = NOCLOS;
- q->arcidx = 0;
- assert(q->status == GOR_NOPASS && q->active == 0);
- }
+ const closure_t &init = ctx.dc_reached;
+ closure_t &done = ctx.dc_closure;
+ std::stack<nfa_state_t*>
+ &topsort = ctx.dc_stack_topsort,
+ &linear = ctx.dc_stack_linear;
+ nfa_state_t *q, *p;
+
+ done.clear();
+
+ // enqueue all initial states (there might be duplicates)
+ for (cclositer_t c = init.begin(); c != init.end(); ++c) {
+ q = relax(ctx, *c);
+ if (q) {
+ topsort.push(q);
+ q->status = GOR_TOPSORT;
+ }
+ }
+
+ // Gordberg-Radzik 'shortest path' algorithm.
+ // Papers: 1993, "A heuristic improvement of the Bellman-Ford
+ // algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms:
+ // Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik.
+ // Complexity for digraph G=(V,E) is O(|V|*|E|).
+ for (; !topsort.empty(); ) {
+
+ // 1st pass: scan admissible subgraph reachable from B-stack
+ // and topologically sort it (this can be done by a single
+ // depth-first postorder traversal)
+ for (; !topsort.empty(); ) {
+ q = topsort.top();
+ topsort.pop();
+
+ if (q->status != GOR_LINEAR) {
+ q->status = GOR_TOPSORT;
+
+ // find next admissible transition
+ while ((p = explore(ctx, q))
+ && p->status != GOR_NOPASS) {
+ p->active = 1;
+ }
+
+ // follow the admissible transition
+ if (p) {
+ topsort.push(q);
+ topsort.push(p);
+ p->arcidx = 0;
+ }
+ // done with this state: all deps visited
+ else {
+ q->status = GOR_LINEAR;
+ linear.push(q);
+ }
+ }
+ }
+
+ // 2nd pass: scan topologically ordered states from A-stack
+ // and push head states of relaxed transitions to B-stack
+ for (; !linear.empty(); ) {
+ q = linear.top();
+ linear.pop();
+
+ if (q->active) {
+ // scan admissible transitions
+ q->arcidx = 0;
+ while ((p = explore(ctx, q))) {
+ if (p->status == GOR_NOPASS) {
+ topsort.push(p);
+ p->arcidx = 0;
+ }
+ else if (p->status == GOR_LINEAR) {
+ p->active = 1;
+ }
+ }
+ }
+
+ q->status = GOR_NOPASS;
+ q->active = 0;
+ }
+ }
+
+ // clean up (do this before removing any states from closure)
+ for (clositer_t i = done.begin(); i != done.end(); ++i) {
+ q = i->state;
+ q->clos = NOCLOS;
+ q->arcidx = 0;
+ assert(q->status == GOR_NOPASS && q->active == 0);
+ }
}
void closure_leftmost(determ_context_t &ctx)
{
- const closure_t &init = ctx.dc_reached;
- closure_t &done = ctx.dc_closure;
- std::stack<clos_t> &todo = ctx.dc_stack_dfs;
-
- // enqueue all initial states
- done.clear();
- for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) {
- todo.push(*c);
- }
-
- // DFS; linear complexity
- for (; !todo.empty(); ) {
- clos_t x = todo.top();
- todo.pop();
- nfa_state_t *n = x.state;
-
- if (n->clos == NOCLOS) {
- n->clos = static_cast<uint32_t>(done.size());
- done.push_back(x);
-
- switch (n->type) {
- case nfa_state_t::NIL:
- x.state = n->nil.out;
- todo.push(x);
- break;
- case nfa_state_t::ALT:
- x.state = n->alt.out2;
- todo.push(x);
- x.state = n->alt.out1;
- todo.push(x);
- break;
- case nfa_state_t::TAG:
- x.state = n->tag.out;
- x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info);
- todo.push(x);
- break;
- case nfa_state_t::RAN:
- case nfa_state_t::FIN:
- break;
- }
- }
- }
-
- // reset associated closure items
- // (do this before removing any states from closure)
- for (clositer_t i = done.begin(); i != done.end(); ++i) {
- i->state->clos = NOCLOS;
- }
+ const closure_t &init = ctx.dc_reached;
+ closure_t &done = ctx.dc_closure;
+ std::stack<clos_t> &todo = ctx.dc_stack_dfs;
+
+ // enqueue all initial states
+ done.clear();
+ for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) {
+ todo.push(*c);
+ }
+
+ // DFS; linear complexity
+ for (; !todo.empty(); ) {
+ clos_t x = todo.top();
+ todo.pop();
+ nfa_state_t *n = x.state;
+
+ if (n->clos == NOCLOS) {
+ n->clos = static_cast<uint32_t>(done.size());
+ done.push_back(x);
+
+ switch (n->type) {
+ case nfa_state_t::NIL:
+ x.state = n->nil.out;
+ todo.push(x);
+ break;
+ case nfa_state_t::ALT:
+ x.state = n->alt.out2;
+ todo.push(x);
+ x.state = n->alt.out1;
+ todo.push(x);
+ break;
+ case nfa_state_t::TAG:
+ x.state = n->tag.out;
+ x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info);
+ todo.push(x);
+ break;
+ case nfa_state_t::RAN:
+ case nfa_state_t::FIN:
+ break;
+ }
+ }
+ }
+
+ // reset associated closure items
+ // (do this before removing any states from closure)
+ for (clositer_t i = done.begin(); i != done.end(); ++i) {
+ i->state->clos = NOCLOS;
+ }
}
void prune(closure_t &closure, std::valarray<Rule> &rules)
{
- clositer_t b = closure.begin(), e = closure.end(), i, j;
-
- // drop "inner" states (non-final without outgoing non-epsilon transitions)
- j = std::stable_partition(b, e, clos_t::ran);
- e = std::stable_partition(j, e, clos_t::fin);
- size_t n = static_cast<size_t>(e - b);
-
- // drop all final states except one; mark dropped rules as shadowed
- // see note [at most one final item per closure]
- if (j != e) {
- std::sort(j, e, cmpby_rule_state);
- const uint32_t l = rules[j->state->rule].code->fline;
- for (i = j; ++i < e;) {
- rules[i->state->rule].shadow.insert(l);
- }
- n = static_cast<size_t>(j - b) + 1;
- }
-
- closure.resize(n);
+ clositer_t b = closure.begin(), e = closure.end(), i, j;
+
+ // drop "inner" states (non-final without outgoing non-epsilon transitions)
+ j = std::stable_partition(b, e, clos_t::ran);
+ e = std::stable_partition(j, e, clos_t::fin);
+ size_t n = static_cast<size_t>(e - b);
+
+ // drop all final states except one; mark dropped rules as shadowed
+ // see note [at most one final item per closure]
+ if (j != e) {
+ std::sort(j, e, cmpby_rule_state);
+ const uint32_t l = rules[j->state->rule].code->fline;
+ for (i = j; ++i < e;) {
+ rules[i->state->rule].shadow.insert(l);
+ }
+ n = static_cast<size_t>(j - b) + 1;
+ }
+
+ closure.resize(n);
}
void lower_lookahead_to_transition(closure_t &closure)
{
- for (clositer_t c = closure.begin(); c != closure.end(); ++c) {
- c->ttran = c->tlook;
- c->tlook = HROOT;
- }
+ for (clositer_t c = closure.begin(); c != closure.end(); ++c) {
+ c->ttran = c->tlook;
+ c->tlook = HROOT;
+ }
}
void generate_versions(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
- const std::vector<Tag> &tags = dfa.tags;
- const size_t ntag = tags.size();
- tagver_t &maxver = dfa.maxtagver;
- tagver_table_t &tvtbl = ctx.dc_tagvertbl;
- tagver_t *vers = tvtbl.buffer;
- closure_t &clos = ctx.dc_closure;
- tag_history_t &thist = ctx.dc_taghistory;
- newvers_t &newvers = ctx.dc_newvers;
-
- clositer_t b = clos.begin(), e = clos.end(), c;
- newver_cmp_t cmp(thist);
- newvers_t newacts(cmp);
- tcmd_t *cmd = NULL;
-
- // for each tag, if there is at least one tagged transition,
- // allocate new version (negative for bottom and positive for
- // normal transition, however absolute value should be unique
- // among all versions of all tags)
- for (c = b; c != e; ++c) {
- const hidx_t l = c->tlook, h = c->ttran;
- if (h == HROOT) continue;
-
- const tagver_t *vs = tvtbl[c->tvers];
- for (size_t t = 0; t < ntag; ++t) {
- const Tag &tag = tags[t];
- const tagver_t
- h0 = thist.last(h, t),
- l0 = thist.last(l, t);
-
- if (h0 == TAGVER_ZERO) continue;
-
- const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO;
- newver_t x = {t, v, h};
- const tagver_t
- n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1),
- m = newvers.insert(std::make_pair(x, n)).first->second;
- if (n == m) ++maxver;
-
- if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) {
- newacts.insert(std::make_pair(x, m));
- }
- }
- }
-
- // actions
- for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) {
- const tagver_t m = i->second, v = i->first.base;
- const hidx_t h = i->first.history;
- const size_t t = i->first.tag;
- if (history(tags[t])) {
- cmd = dfa.tcpool.make_add(cmd, abs(m), abs(v), thist, h, t);
- } else {
- cmd = dfa.tcpool.make_set(cmd, abs(m), thist.last(h, t));
- }
- }
-
- // mark tags with history
- for (newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) {
- if (history(tags[j->first.tag])) {
- dfa.mtagvers.insert(abs(j->second));
- }
- }
-
- // update tag versions in closure
- for (c = b; c != e; ++c) {
- const hidx_t h = c->ttran;
- if (h == HROOT) continue;
-
- const tagver_t *vs = tvtbl[c->tvers];
- for (size_t t = 0; t < ntag; ++t) {
- const tagver_t
- v0 = vs[t],
- h0 = thist.last(h, t),
- v = history(tags[t]) ? v0 : TAGVER_ZERO;
- if (h0 == TAGVER_ZERO) {
- vers[t] = v0;
- } else {
- newver_t x = {t, v, h};
- vers[t] = newvers[x];
- }
- }
- c->tvers = tvtbl.insert(vers);
- }
-
- ctx.dc_actions = cmd;
+ dfa_t &dfa = ctx.dc_dfa;
+ const std::vector<Tag> &tags = dfa.tags;
+ const size_t ntag = tags.size();
+ tagver_t &maxver = dfa.maxtagver;
+ tagver_table_t &tvtbl = ctx.dc_tagvertbl;
+ tagver_t *vers = tvtbl.buffer;
+ closure_t &clos = ctx.dc_closure;
+ tag_history_t &thist = ctx.dc_taghistory;
+ newvers_t &newvers = ctx.dc_newvers;
+
+ clositer_t b = clos.begin(), e = clos.end(), c;
+ newver_cmp_t cmp(thist);
+ newvers_t newacts(cmp);
+ tcmd_t *cmd = NULL;
+
+ // for each tag, if there is at least one tagged transition,
+ // allocate new version (negative for bottom and positive for
+ // normal transition, however absolute value should be unique
+ // among all versions of all tags)
+ for (c = b; c != e; ++c) {
+ const hidx_t l = c->tlook, h = c->ttran;
+ if (h == HROOT) continue;
+
+ const tagver_t *vs = tvtbl[c->tvers];
+ for (size_t t = 0; t < ntag; ++t) {
+ const Tag &tag = tags[t];
+ const tagver_t
+ h0 = thist.last(h, t),
+ l0 = thist.last(l, t);
+
+ if (h0 == TAGVER_ZERO) continue;
+
+ const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO;
+ newver_t x = {t, v, h};
+ const tagver_t
+ n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1),
+ m = newvers.insert(std::make_pair(x, n)).first->second;
+ if (n == m) ++maxver;
+
+ if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) {
+ newacts.insert(std::make_pair(x, m));
+ }
+ }
+ }
+
+ // actions
+ for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) {
+ const tagver_t m = i->second, v = i->first.base;
+ const hidx_t h = i->first.history;
+ const size_t t = i->first.tag;
+ if (history(tags[t])) {
+ cmd = dfa.tcpool.make_add(cmd, abs(m), abs(v), thist, h, t);
+ } else {
+ cmd = dfa.tcpool.make_set(cmd, abs(m), thist.last(h, t));
+ }
+ }
+
+ // mark tags with history
+ for (newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) {
+ if (history(tags[j->first.tag])) {
+ dfa.mtagvers.insert(abs(j->second));
+ }
+ }
+
+ // update tag versions in closure
+ for (c = b; c != e; ++c) {
+ const hidx_t h = c->ttran;
+ if (h == HROOT) continue;
+
+ const tagver_t *vs = tvtbl[c->tvers];
+ for (size_t t = 0; t < ntag; ++t) {
+ const tagver_t
+ v0 = vs[t],
+ h0 = thist.last(h, t),
+ v = history(tags[t]) ? v0 : TAGVER_ZERO;
+ if (h0 == TAGVER_ZERO) {
+ vers[t] = v0;
+ } else {
+ newver_t x = {t, v, h};
+ vers[t] = newvers[x];
+ }
+ }
+ c->tvers = tvtbl.insert(vers);
+ }
+
+ ctx.dc_actions = cmd;
}
int32_t pack(int32_t longest, int32_t leftmost)
{
- // leftmost: higher 2 bits, longest: lower 30 bits
- return longest | (leftmost << 30);
+ // leftmost: higher 2 bits, longest: lower 30 bits
+ return longest | (leftmost << 30);
}
void orders(determ_context_t &ctx)
{
- closure_t &closure = ctx.dc_closure;
- const size_t nclos = closure.size();
-
- prectable_t *prectbl = ctx.dc_allocator.alloct<prectable_t>(nclos * nclos);
-
- for (size_t i = 0; i < nclos; ++i) {
- for (size_t j = i + 1; j < nclos; ++j) {
- int32_t rho1, rho2, l;
- l = precedence (ctx, closure[i], closure[j], rho1, rho2);
- prectbl[i * nclos + j] = pack(rho1, l);
- prectbl[j * nclos + i] = pack(rho2, -l);
- }
- prectbl[i * nclos + i] = 0;
- }
-
- ctx.dc_prectbl = prectbl;
+ closure_t &closure = ctx.dc_closure;
+ const size_t nclos = closure.size();
+
+ prectable_t *prectbl = ctx.dc_allocator.alloct<prectable_t>(nclos * nclos);
+
+ for (size_t i = 0; i < nclos; ++i) {
+ for (size_t j = i + 1; j < nclos; ++j) {
+ int32_t rho1, rho2, l;
+ l = precedence (ctx, closure[i], closure[j], rho1, rho2);
+ prectbl[i * nclos + j] = pack(rho1, l);
+ prectbl[j * nclos + i] = pack(rho2, -l);
+ }
+ prectbl[i * nclos + i] = 0;
+ }
+
+ ctx.dc_prectbl = prectbl;
}
} // namespace re2c
// reversed DFA
struct rdfa_t
{
- struct arc_t
- {
- size_t dest;
- arc_t *next;
- };
-
- struct state_t
- {
- arc_t *arcs;
- size_t rule;
- bool fallthru;
- };
-
- size_t nstates;
- size_t nrules;
- state_t *states;
- arc_t *arcs;
-
- explicit rdfa_t(const dfa_t &dfa)
- : nstates(dfa.states.size())
- , nrules(dfa.rules.size())
- , states(new state_t[nstates]())
- , arcs(new arc_t[nstates * dfa.nchars])
- {
- // init states
- for (size_t i = 0; i < nstates; ++i) {
- state_t &s = states[i];
- s.arcs = NULL;
- const size_t r = dfa.states[i]->rule;
- s.rule = r == Rule::NONE ? nrules : r;
- s.fallthru = false;
- }
- // init arcs
- arc_t *a = arcs;
- for (size_t i = 0; i < nstates; ++i) {
- dfa_state_t *s = dfa.states[i];
- for (size_t c = 0; c < dfa.nchars; ++c) {
- const size_t j = s->arcs[c];
- if (j != dfa_t::NIL) {
- a->dest = i;
- a->next = states[j].arcs;
- states[j].arcs = a++;
- } else {
- states[i].fallthru = true;
- }
- }
- }
- }
-
- ~rdfa_t()
- {
- delete[] states;
- delete[] arcs;
- }
-
- FORBID_COPY(rdfa_t);
+ struct arc_t
+ {
+ size_t dest;
+ arc_t *next;
+ };
+
+ struct state_t
+ {
+ arc_t *arcs;
+ size_t rule;
+ bool fallthru;
+ };
+
+ size_t nstates;
+ size_t nrules;
+ state_t *states;
+ arc_t *arcs;
+
+ explicit rdfa_t(const dfa_t &dfa)
+ : nstates(dfa.states.size())
+ , nrules(dfa.rules.size())
+ , states(new state_t[nstates]())
+ , arcs(new arc_t[nstates * dfa.nchars])
+ {
+ // init states
+ for (size_t i = 0; i < nstates; ++i) {
+ state_t &s = states[i];
+ s.arcs = NULL;
+ const size_t r = dfa.states[i]->rule;
+ s.rule = r == Rule::NONE ? nrules : r;
+ s.fallthru = false;
+ }
+ // init arcs
+ arc_t *a = arcs;
+ for (size_t i = 0; i < nstates; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ for (size_t c = 0; c < dfa.nchars; ++c) {
+ const size_t j = s->arcs[c];
+ if (j != dfa_t::NIL) {
+ a->dest = i;
+ a->next = states[j].arcs;
+ states[j].arcs = a++;
+ } else {
+ states[i].fallthru = true;
+ }
+ }
+ }
+ }
+
+ ~rdfa_t()
+ {
+ delete[] states;
+ delete[] arcs;
+ }
+
+ FORBID_COPY(rdfa_t);
};
static void backprop(const rdfa_t &rdfa, bool *live,
- size_t rule, size_t state)
+ size_t rule, size_t state)
{
- // "none-rule" is unreachable from final states:
- // be careful to mask it before propagating
- const rdfa_t::state_t &s = rdfa.states[state];
- if (rule == rdfa.nrules) {
- rule = s.rule;
- }
-
- // if the rule has already been set, than either it's a loop
- // or another branch of back propagation has already been here,
- // in both cases we should stop: there's nothing new to propagate
- bool &l = live[rule * rdfa.nstates + state];
- if (l) return;
- l = true;
-
- for (const rdfa_t::arc_t *a = s.arcs; a; a = a->next) {
- backprop(rdfa, live, rule, a->dest);
- }
+ // "none-rule" is unreachable from final states:
+ // be careful to mask it before propagating
+ const rdfa_t::state_t &s = rdfa.states[state];
+ if (rule == rdfa.nrules) {
+ rule = s.rule;
+ }
+
+ // if the rule has already been set, than either it's a loop
+ // or another branch of back propagation has already been here,
+ // in both cases we should stop: there's nothing new to propagate
+ bool &l = live[rule * rdfa.nstates + state];
+ if (l) return;
+ l = true;
+
+ for (const rdfa_t::arc_t *a = s.arcs; a; a = a->next) {
+ backprop(rdfa, live, rule, a->dest);
+ }
}
static void liveness_analyses(const rdfa_t &rdfa, bool *live)
{
- for (size_t i = 0; i < rdfa.nstates; ++i) {
- const rdfa_t::state_t &s = rdfa.states[i];
- if (s.fallthru) {
- backprop(rdfa, live, s.rule, i);
- }
- }
+ for (size_t i = 0; i < rdfa.nstates; ++i) {
+ const rdfa_t::state_t &s = rdfa.states[i];
+ if (s.fallthru) {
+ backprop(rdfa, live, s.rule, i);
+ }
+ }
}
static void warn_dead_rules(const dfa_t &dfa, size_t defrule,
- const std::string &cond, const bool *live, Warn &warn)
+ const std::string &cond, const bool *live, Warn &warn)
{
- const size_t nstates = dfa.states.size();
- const size_t nrules = dfa.rules.size();
-
- for (size_t i = 0; i < nstates; ++i) {
- const size_t r = dfa.states[i]->rule;
- if (r != Rule::NONE && !live[r * nstates + i]) {
- // skip last rule (it's the NONE-rule)
- for (size_t j = 0; j < nrules; ++j) {
- if (live[j * nstates + i]) {
- dfa.rules[r].shadow.insert(dfa.rules[j].code->fline);
- }
- }
- }
- }
-
- for (size_t i = 0; i < nrules; ++i) {
- // default rule '*' should not be reported
- if (i != defrule && !live[i * nstates]) {
- warn.unreachable_rule(cond, dfa.rules[i]);
- }
- }
+ const size_t nstates = dfa.states.size();
+ const size_t nrules = dfa.rules.size();
+
+ for (size_t i = 0; i < nstates; ++i) {
+ const size_t r = dfa.states[i]->rule;
+ if (r != Rule::NONE && !live[r * nstates + i]) {
+ // skip last rule (it's the NONE-rule)
+ for (size_t j = 0; j < nrules; ++j) {
+ if (live[j * nstates + i]) {
+ dfa.rules[r].shadow.insert(dfa.rules[j].code->fline);
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < nrules; ++i) {
+ // default rule '*' should not be reported
+ if (i != defrule && !live[i * nstates]) {
+ warn.unreachable_rule(cond, dfa.rules[i]);
+ }
+ }
}
static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru)
{
- const size_t
- nstates = dfa.states.size(),
- nsym = dfa.nchars;
-
- for (size_t i = 0; i < nstates; ++i) {
- dfa_state_t *s = dfa.states[i];
- if (s->rule == Rule::NONE) continue;
-
- // final state is useful iff there is at least one
- // non-accepting path from this state
- bool shadowed = true;
- for (size_t c = 0; c < nsym; ++c) {
- const size_t j = s->arcs[c];
- if (j == dfa_t::NIL || fallthru[j]) {
- shadowed = false;
- break;
- }
- }
-
- if (shadowed) {
- s->rule = Rule::NONE;
- s->tcmd[nsym] = NULL;
- }
- }
+ const size_t
+ nstates = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ for (size_t i = 0; i < nstates; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ if (s->rule == Rule::NONE) continue;
+
+ // final state is useful iff there is at least one
+ // non-accepting path from this state
+ bool shadowed = true;
+ for (size_t c = 0; c < nsym; ++c) {
+ const size_t j = s->arcs[c];
+ if (j == dfa_t::NIL || fallthru[j]) {
+ shadowed = false;
+ break;
+ }
+ }
+
+ if (shadowed) {
+ s->rule = Rule::NONE;
+ s->tcmd[nsym] = NULL;
+ }
+ }
}
static void find_fallback_states(dfa_t &dfa, const bool *fallthru)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
-
- for (size_t i = 0; i < nstate; ++i) {
- dfa_state_t *s = dfa.states[i];
-
- s->fallthru = fallthru[i];
-
- if (s->rule != Rule::NONE) {
- for (size_t c = 0; c < nsym; ++c) {
- const size_t j = s->arcs[c];
- if (j != dfa_t::NIL && fallthru[j]) {
- s->fallback = true;
- break;
- }
- }
- }
- }
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ for (size_t i = 0; i < nstate; ++i) {
+ dfa_state_t *s = dfa.states[i];
+
+ s->fallthru = fallthru[i];
+
+ if (s->rule != Rule::NONE) {
+ for (size_t c = 0; c < nsym; ++c) {
+ const size_t j = s->arcs[c];
+ if (j != dfa_t::NIL && fallthru[j]) {
+ s->fallback = true;
+ break;
+ }
+ }
+ }
+ }
}
void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond, Warn &warn)
{
- const rdfa_t rdfa(dfa);
- const size_t
- ns = rdfa.nstates,
- nl = (rdfa.nrules + 1) * ns;
- bool *live = new bool[nl],
- *fallthru = live + nl - ns;
- memset(live, 0, nl * sizeof(bool));
-
- liveness_analyses(rdfa, live);
- warn_dead_rules(dfa, defrule, cond, live, warn);
- remove_dead_final_states(dfa, fallthru);
- find_fallback_states(dfa, fallthru);
-
- delete[] live;
+ const rdfa_t rdfa(dfa);
+ const size_t
+ ns = rdfa.nstates,
+ nl = (rdfa.nrules + 1) * ns;
+ bool *live = new bool[nl],
+ *fallthru = live + nl - ns;
+ memset(live, 0, nl * sizeof(bool));
+
+ liveness_analyses(rdfa, live);
+ warn_dead_rules(dfa, defrule, cond, live, warn);
+ remove_dead_final_states(dfa, fallthru);
+ find_fallback_states(dfa, fallthru);
+
+ delete[] live;
}
} // namespace re2c
nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
{
- if (state->type != nfa_state_t::RAN) {
- return NULL;
- }
- for (const Range *r = state->ran.ran; r; r = r->next()) {
- if ((r->lower() <= symbol) && (symbol < r->upper())) {
- return state->ran.out;
- }
- }
- return NULL;
+ if (state->type != nfa_state_t::RAN) {
+ return NULL;
+ }
+ for (const Range *r = state->ran.ran; r; r = r->next()) {
+ if ((r->lower() <= symbol) && (symbol < r->upper())) {
+ return state->ran.out;
+ }
+ }
+ return NULL;
}
void reach_on_symbol(determ_context_t &ctx)
{
- const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
- closure_t &reached = ctx.dc_reached;
- const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol];
-
- reached.clear();
- for (uint32_t i = 0; i < kernel->size; ++i) {
- nfa_state_t *s = transition(kernel->state[i], symbol);
- if (s) {
- clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT};
- reached.push_back(c);
- }
- }
+ const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
+ closure_t &reached = ctx.dc_reached;
+ const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol];
+
+ reached.clear();
+ for (uint32_t i = 0; i < kernel->size; ++i) {
+ nfa_state_t *s = transition(kernel->state[i], symbol);
+ if (s) {
+ clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT};
+ reached.push_back(c);
+ }
+ }
}
static uint32_t init_tag_versions(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
- const size_t ntags = dfa.tags.size();
-
- // all-zero tag configuration must have static number zero
- assert(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO));
-
- // initial tag versions: [1 .. N]
- const uint32_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1);
-
- // other versions: [ .. -(N + 1)] and [N + 1 .. ]
- dfa.maxtagver = static_cast<tagver_t>(ntags);
-
- // final/fallback versions will be assigned on the go
- dfa.finvers = new tagver_t[ntags];
- for (size_t i = 0; i < ntags; ++i) {
- dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver;
- }
-
- // mark tags with history (initial and final)
- for (size_t i = 0; i < ntags; ++i) {
- if (history(dfa.tags[i])) {
- tagver_t v = static_cast<tagver_t>(i) + 1, f = dfa.finvers[i];
- if (f != TAGVER_ZERO) {
- dfa.mtagvers.insert(f);
- }
- dfa.mtagvers.insert(v);
- }
- }
-
- return INITIAL_TAGS;
+ dfa_t &dfa = ctx.dc_dfa;
+ const size_t ntags = dfa.tags.size();
+
+ // all-zero tag configuration must have static number zero
+ assert(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO));
+
+ // initial tag versions: [1 .. N]
+ const uint32_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1);
+
+ // other versions: [ .. -(N + 1)] and [N + 1 .. ]
+ dfa.maxtagver = static_cast<tagver_t>(ntags);
+
+ // final/fallback versions will be assigned on the go
+ dfa.finvers = new tagver_t[ntags];
+ for (size_t i = 0; i < ntags; ++i) {
+ dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver;
+ }
+
+ // mark tags with history (initial and final)
+ for (size_t i = 0; i < ntags; ++i) {
+ if (history(dfa.tags[i])) {
+ tagver_t v = static_cast<tagver_t>(i) + 1, f = dfa.finvers[i];
+ if (f != TAGVER_ZERO) {
+ dfa.mtagvers.insert(f);
+ }
+ dfa.mtagvers.insert(v);
+ }
+ }
+
+ return INITIAL_TAGS;
}
dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn)
- : states()
- , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges
- , charset(nfa.charset)
- , rules(nfa.rules)
- , tags(nfa.tags)
- , mtagvers(*new std::set<tagver_t>)
- , finvers(NULL)
- , tcpool(*new tcpool_t)
- , maxtagver(0)
- , tcmd0(NULL)
- , tcid0(TCID0)
+ : states()
+ , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges
+ , charset(nfa.charset)
+ , rules(nfa.rules)
+ , tags(nfa.tags)
+ , mtagvers(*new std::set<tagver_t>)
+ , finvers(NULL)
+ , tcpool(*new tcpool_t)
+ , maxtagver(0)
+ , tcmd0(NULL)
+ , tcid0(TCID0)
{
- determ_context_t ctx(opts, warn, cond, nfa, *this);
+ determ_context_t ctx(opts, warn, cond, nfa, *this);
- const uint32_t INITIAL_TAGS = init_tag_versions(ctx);
+ const uint32_t INITIAL_TAGS = init_tag_versions(ctx);
- // initial state
- const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT};
- ctx.dc_reached.push_back(c0);
- tagged_epsilon_closure(ctx);
- find_state(ctx);
+ // initial state
+ const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT};
+ ctx.dc_reached.push_back(c0);
+ tagged_epsilon_closure(ctx);
+ find_state(ctx);
- // iterate while new kernels are added: for each alphabet symbol,
- // build tagged epsilon-closure of all reachable NFA states,
- // then find identical or mappable DFA state or add a new one
- for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) {
+ // iterate while new kernels are added: for each alphabet symbol,
+ // build tagged epsilon-closure of all reachable NFA states,
+ // then find identical or mappable DFA state or add a new one
+ for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) {
- ctx.dc_origin = i;
- ctx.dc_newvers.clear();
+ ctx.dc_origin = i;
+ ctx.dc_newvers.clear();
- for (uint32_t c = 0; c < nchars; ++c) {
- ctx.dc_symbol = c;
+ for (uint32_t c = 0; c < nchars; ++c) {
+ ctx.dc_symbol = c;
- reach_on_symbol(ctx);
- tagged_epsilon_closure(ctx);
- find_state(ctx);
- }
- }
+ reach_on_symbol(ctx);
+ tagged_epsilon_closure(ctx);
+ find_state(ctx);
+ }
+ }
- warn_nondeterministic_tags(ctx);
+ warn_nondeterministic_tags(ctx);
}
// WARNING: this function assumes that kernel items are grouped by rule
void warn_nondeterministic_tags(const determ_context_t &ctx)
{
- if (ctx.dc_opts->posix_captures) return;
-
- Warn &warn = ctx.dc_warn;
- const kernels_t &kernels = ctx.dc_kernels;
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
- const std::valarray<Rule> &rules = ctx.dc_dfa.rules;
-
- const size_t
- ntag = tags.size(),
- nkrn = kernels.size(),
- nrule = rules.size();
- std::vector<size_t> maxv(ntag, 0);
- std::set<tagver_t> uniq;
-
- for (uint32_t i = 0; i < nkrn; ++i) {
- const kernel_t *k = kernels[i];
- nfa_state_t **s = k->state;
- const size_t n = k->size;
- const uint32_t *v = k->tvers;
-
- for (size_t u = 0; u < n;) {
- const size_t r = s[u]->rule;
- const Rule &rule = rules[r];
-
- const size_t l = u;
- for (; ++u < n && s[u]->rule == r;);
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
- uniq.clear();
- for (size_t m = l; m < u; ++m) {
- uniq.insert(ctx.dc_tagvertbl[v[m]][t]);
- }
- maxv[t] = std::max(maxv[t], uniq.size());
- }
- }
- }
-
- for (uint32_t r = 0; r < nrule; ++r) {
- const Rule &rule = rules[r];
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
- const size_t m = maxv[t];
- if (m > 1) {
- const uint32_t line = rule.code->fline;
- warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m);
- }
- }
- }
+ if (ctx.dc_opts->posix_captures) return;
+
+ Warn &warn = ctx.dc_warn;
+ const kernels_t &kernels = ctx.dc_kernels;
+ const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+ const std::valarray<Rule> &rules = ctx.dc_dfa.rules;
+
+ const size_t
+ ntag = tags.size(),
+ nkrn = kernels.size(),
+ nrule = rules.size();
+ std::vector<size_t> maxv(ntag, 0);
+ std::set<tagver_t> uniq;
+
+ for (uint32_t i = 0; i < nkrn; ++i) {
+ const kernel_t *k = kernels[i];
+ nfa_state_t **s = k->state;
+ const size_t n = k->size;
+ const uint32_t *v = k->tvers;
+
+ for (size_t u = 0; u < n;) {
+ const size_t r = s[u]->rule;
+ const Rule &rule = rules[r];
+
+ const size_t l = u;
+ for (; ++u < n && s[u]->rule == r;);
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ uniq.clear();
+ for (size_t m = l; m < u; ++m) {
+ uniq.insert(ctx.dc_tagvertbl[v[m]][t]);
+ }
+ maxv[t] = std::max(maxv[t], uniq.size());
+ }
+ }
+ }
+
+ for (uint32_t r = 0; r < nrule; ++r) {
+ const Rule &rule = rules[r];
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ const size_t m = maxv[t];
+ if (m > 1) {
+ const uint32_t line = rule.code->fline;
+ warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m);
+ }
+ }
+ }
}
determ_context_t::determ_context_t(const opt_t *opts, Warn &warn
- , const std::string &condname, const nfa_t &nfa, dfa_t &dfa)
- : dc_opts(opts)
- , dc_warn(warn)
- , dc_condname(condname)
- , dc_nfa(nfa)
- , dc_dfa(dfa)
- , dc_allocator()
- , dc_origin(dfa_t::NIL)
- , dc_target(dfa_t::NIL)
- , dc_symbol(0)
- , dc_actions(NULL)
- , dc_reached()
- , dc_closure()
- , dc_prectbl(NULL)
- , dc_tagvertbl(nfa.tags.size())
- , dc_taghistory()
- , dc_kernels()
- , dc_buffers(dc_allocator)
- , dc_newvers(newver_cmp_t(dc_taghistory))
- , dc_stack_topsort()
- , dc_stack_linear()
- , dc_stack_dfs()
- , dc_dump(opts)
+ , const std::string &condname, const nfa_t &nfa, dfa_t &dfa)
+ : dc_opts(opts)
+ , dc_warn(warn)
+ , dc_condname(condname)
+ , dc_nfa(nfa)
+ , dc_dfa(dfa)
+ , dc_allocator()
+ , dc_origin(dfa_t::NIL)
+ , dc_target(dfa_t::NIL)
+ , dc_symbol(0)
+ , dc_actions(NULL)
+ , dc_reached()
+ , dc_closure()
+ , dc_prectbl(NULL)
+ , dc_tagvertbl(nfa.tags.size())
+ , dc_taghistory()
+ , dc_kernels()
+ , dc_buffers(dc_allocator)
+ , dc_newvers(newver_cmp_t(dc_taghistory))
+ , dc_stack_topsort()
+ , dc_stack_linear()
+ , dc_stack_dfs()
+ , dc_dump(opts)
{}
dfa_t::~dfa_t()
{
- std::vector<dfa_state_t*>::iterator
- i = states.begin(),
- e = states.end();
- for (; i != e; ++i)
- {
- delete *i;
- }
+ std::vector<dfa_state_t*>::iterator
+ i = states.begin(),
+ e = states.end();
+ for (; i != e; ++i)
+ {
+ delete *i;
+ }
}
bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const
{
- if (x.tag < y.tag) return true;
- if (x.tag > y.tag) return false;
+ if (x.tag < y.tag) return true;
+ if (x.tag > y.tag) return false;
- if (x.base < y.base) return true;
- if (x.base > y.base) return false;
+ if (x.base < y.base) return true;
+ if (x.base > y.base) return false;
- return history.compare_reversed(x.history, y.history, x.tag) < 0;
+ return history.compare_reversed(x.history, y.history, x.tag) < 0;
}
} // namespace re2c
struct clos_t
{
- nfa_state_t *state;
- uint32_t origin;
- uint32_t tvers; // vector of tag versions (including lookahead tags)
- hidx_t ttran; // history of transition tags
- hidx_t tlook; // history of lookahead tags
-
- static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
- static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
+ nfa_state_t *state;
+ uint32_t origin;
+ uint32_t tvers; // vector of tag versions (including lookahead tags)
+ hidx_t ttran; // history of transition tags
+ hidx_t tlook; // history of lookahead tags
+
+ static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
+ static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
};
struct newver_t
{
- size_t tag;
- tagver_t base;
- hidx_t history;
+ size_t tag;
+ tagver_t base;
+ hidx_t history;
};
struct newver_cmp_t
{
- tag_history_t &history;
+ tag_history_t &history;
- explicit newver_cmp_t(tag_history_t &h) : history(h) {}
- bool operator()(const newver_t &, const newver_t &) const;
+ explicit newver_cmp_t(tag_history_t &h) : history(h) {}
+ bool operator()(const newver_t &, const newver_t &) const;
};
struct kernel_t
{
- size_t size;
- const prectable_t *prectbl;
- nfa_state_t **state;
- uint32_t *tvers; // tag versions
- hidx_t *tlook; // lookahead tags
+ size_t size;
+ const prectable_t *prectbl;
+ nfa_state_t **state;
+ uint32_t *tvers; // tag versions
+ hidx_t *tlook; // lookahead tags
- FORBID_COPY(kernel_t);
+ FORBID_COPY(kernel_t);
};
struct kernel_buffers_t
{
- size_t maxsize;
- kernel_t *kernel;
- tagver_t cap; // capacity (greater or equal to max)
- tagver_t max; // maximal tag version
- char *memory;
- tagver_t *x2y;
- tagver_t *y2x;
- size_t *x2t;
- uint32_t *indegree;
- tcmd_t *backup_actions;
-
- explicit kernel_buffers_t(allocator_t &alc);
+ size_t maxsize;
+ kernel_t *kernel;
+ tagver_t cap; // capacity (greater or equal to max)
+ tagver_t max; // maximal tag version
+ char *memory;
+ tagver_t *x2y;
+ tagver_t *y2x;
+ size_t *x2t;
+ uint32_t *indegree;
+ tcmd_t *backup_actions;
+
+ explicit kernel_buffers_t(allocator_t &alc);
};
struct determ_context_t
{
- // determinization input
- const opt_t *dc_opts; // options
- Warn &dc_warn; // warnings
- const std::string &dc_condname; // the name of current condition (with -c)
- const nfa_t &dc_nfa; // TNFA
-
- // determinization output
- dfa_t &dc_dfa; // resulting TDFA
-
- // temporary structures used by determinization
- allocator_t dc_allocator;
- uint32_t dc_origin; // from-state of the current transition
- uint32_t dc_target; // to-state of the current transition
- uint32_t dc_symbol; // alphabet symbol of the current transition
- tcmd_t *dc_actions; // tag actions of the current transition
- closure_t dc_reached;
- closure_t dc_closure;
- prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation
- tagver_table_t dc_tagvertbl;
- tag_history_t dc_taghistory; // prefix trie of tag histories
- kernels_t dc_kernels; // TDFA states under construction
- kernel_buffers_t dc_buffers;
- newvers_t dc_newvers;
- std::stack<nfa_state_t*> dc_stack_topsort;
- std::stack<nfa_state_t*> dc_stack_linear;
- std::stack<clos_t> dc_stack_dfs;
- dump_dfa_t dc_dump;
-
- determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &);
- FORBID_COPY(determ_context_t);
+ // determinization input
+ const opt_t *dc_opts; // options
+ Warn &dc_warn; // warnings
+ const std::string &dc_condname; // the name of current condition (with -c)
+ const nfa_t &dc_nfa; // TNFA
+
+ // determinization output
+ dfa_t &dc_dfa; // resulting TDFA
+
+ // temporary structures used by determinization
+ allocator_t dc_allocator;
+ uint32_t dc_origin; // from-state of the current transition
+ uint32_t dc_target; // to-state of the current transition
+ uint32_t dc_symbol; // alphabet symbol of the current transition
+ tcmd_t *dc_actions; // tag actions of the current transition
+ closure_t dc_reached;
+ closure_t dc_closure;
+ prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation
+ tagver_table_t dc_tagvertbl;
+ tag_history_t dc_taghistory; // prefix trie of tag histories
+ kernels_t dc_kernels; // TDFA states under construction
+ kernel_buffers_t dc_buffers;
+ newvers_t dc_newvers;
+ std::stack<nfa_state_t*> dc_stack_topsort;
+ std::stack<nfa_state_t*> dc_stack_linear;
+ std::stack<clos_t> dc_stack_dfs;
+ dump_dfa_t dc_dump;
+
+ determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &);
+ FORBID_COPY(determ_context_t);
};
struct dfa_state_t
{
- size_t *arcs;
- tcmd_t **tcmd;
- tcid_t *tcid;
- size_t rule;
- bool fallthru;
- bool fallback;
+ size_t *arcs;
+ tcmd_t **tcmd;
+ tcid_t *tcid;
+ size_t rule;
+ bool fallthru;
+ bool fallback;
- explicit dfa_state_t(size_t nchars)
- : arcs(new size_t[nchars])
- , tcmd(new tcmd_t*[nchars + 2]()) // +2 for final and fallback epsilon-transitions
- , tcid(NULL)
- , rule(Rule::NONE)
- , fallthru(false)
- , fallback(false)
- {}
- ~dfa_state_t()
- {
- delete[] arcs;
- delete[] tcmd;
- delete[] tcid;
- }
- FORBID_COPY(dfa_state_t);
+ explicit dfa_state_t(size_t nchars)
+ : arcs(new size_t[nchars])
+ , tcmd(new tcmd_t*[nchars + 2]()) // +2 for final and fallback epsilon-transitions
+ , tcid(NULL)
+ , rule(Rule::NONE)
+ , fallthru(false)
+ , fallback(false)
+ {}
+ ~dfa_state_t()
+ {
+ delete[] arcs;
+ delete[] tcmd;
+ delete[] tcid;
+ }
+ FORBID_COPY(dfa_state_t);
};
struct dfa_t
{
- static const uint32_t NIL;
+ static const uint32_t NIL;
- std::vector<dfa_state_t*> states;
- const size_t nchars;
- std::vector<uint32_t> &charset;
- std::valarray<Rule> &rules;
- std::vector<Tag> &tags;
- std::set<tagver_t> &mtagvers;
- tagver_t *finvers;
- tcpool_t &tcpool;
- tagver_t maxtagver;
- tcmd_t *tcmd0;
- tcid_t tcid0;
+ std::vector<dfa_state_t*> states;
+ const size_t nchars;
+ std::vector<uint32_t> &charset;
+ std::valarray<Rule> &rules;
+ std::vector<Tag> &tags;
+ std::set<tagver_t> &mtagvers;
+ tagver_t *finvers;
+ tcpool_t &tcpool;
+ tagver_t maxtagver;
+ tcmd_t *tcmd0;
+ tcid_t tcid0;
- dfa_t(const nfa_t &nfa, const opt_t *opts,
- const std::string &cond, Warn &warn);
- ~dfa_t();
+ dfa_t(const nfa_t &nfa, const opt_t *opts,
+ const std::string &cond, Warn &warn);
+ ~dfa_t();
- FORBID_COPY(dfa_t);
+ FORBID_COPY(dfa_t);
};
enum dfa_minimization_t
{
- DFA_MINIMIZATION_TABLE,
- DFA_MINIMIZATION_MOORE
+ DFA_MINIMIZATION_TABLE,
+ DFA_MINIMIZATION_MOORE
};
void minimization(dfa_t &dfa, dfa_minimization_t type);
dump_dfa_t::dump_dfa_t(const opt_t *opts)
- : debug(opts->dump_dfa_raw)
- , uniqidx(0)
+ : debug(opts->dump_dfa_raw)
+ , uniqidx(0)
{
- if (!debug) return;
+ if (!debug) return;
- fprintf(stderr, "digraph DFA {\n"
- " rankdir=LR\n"
- " node[shape=plaintext fontname=Courier]\n"
- " edge[arrowhead=vee fontname=Courier]\n\n");
+ fprintf(stderr, "digraph DFA {\n"
+ " rankdir=LR\n"
+ " node[shape=plaintext fontname=Courier]\n"
+ " edge[arrowhead=vee fontname=Courier]\n\n");
}
dump_dfa_t::~dump_dfa_t()
{
- if (!debug) return;
+ if (!debug) return;
- fprintf(stderr, "}\n");
+ fprintf(stderr, "}\n");
}
static void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i)
{
- if (i == HROOT) {
- fprintf(stderr, " /");
- return;
- }
-
- dump_history(dfa, h, h.pred(i));
-
- const Tag &t = dfa.tags[h.tag(i)];
- const tagver_t v = h.elem(i);
- if (capture(t)) {
- fprintf(stderr, "%u", (uint32_t)t.ncap);
- } else if (!trailing(t)) {
- fprintf(stderr, "%s", t.name->c_str());
- }
- fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑");
- fprintf(stderr, " ");
+ if (i == HROOT) {
+ fprintf(stderr, " /");
+ return;
+ }
+
+ dump_history(dfa, h, h.pred(i));
+
+ const Tag &t = dfa.tags[h.tag(i)];
+ const tagver_t v = h.elem(i);
+ if (capture(t)) {
+ fprintf(stderr, "%u", (uint32_t)t.ncap);
+ } else if (!trailing(t)) {
+ fprintf(stderr, "%s", t.name->c_str());
+ }
+ fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑");
+ fprintf(stderr, " ");
}
void dump_dfa_t::state(const determ_context_t &ctx, bool isnew)
{
- if (!debug) return;
-
- const closure_t &closure = ctx.dc_closure;
- cclositer_t b = closure.begin(), e = closure.end(), c;
- const uint32_t origin = ctx.dc_origin;
- const uint32_t target = ctx.dc_target;
- const uint32_t symbol = ctx.dc_symbol;
- const dfa_t &dfa = ctx.dc_dfa;
- const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
- const tag_history_t &thist = ctx.dc_taghistory;
- uint32_t i;
-
- if (target == dfa_t::NIL) return;
-
- const uint32_t state = isnew ? target : ++uniqidx;
- const char *prefix = isnew ? "" : "i";
- const char *style = isnew ? "" : " STYLE=\"dotted\"";
-
- // closure
- fprintf(stderr, " %s%u [label=<<TABLE"
- " BORDER=\"0\""
- " CELLBORDER=\"1\""
- ">", prefix, state);
- i = 0;
- for (c = b; c != e; ++c, ++i) {
- fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
- i, style, static_cast<uint32_t>(c->state - ctx.dc_nfa.states));
-
- if (c->tvers != ZERO_TAGS) {
- const tagver_t *vers = tvtbl[c->tvers];
- const size_t ntag = dfa.tags.size();
-
- for (size_t t = 0; t < ntag; ++t) {
- fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
- }
-
- if (c->tlook != HROOT) {
- dump_history(dfa, thist, c->tlook);
- }
- }
-
- fprintf(stderr, "</TD></TR>");
- }
- fprintf(stderr, "</TABLE>>]\n");
-
- // transitions (initial state)
- if (origin == dfa_t::NIL) {
- fprintf(stderr, " void [shape=point]\n");
-
- uint32_t i = 0;
- for (c = b; c != e; ++c, ++i) {
- fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i);
- dump_tags(tvtbl, thist, c->ttran, c->tvers);
- fprintf(stderr, "\"]\n");
- }
- }
-
- // transitions (other states)
- else {
- if (!isnew) {
- fprintf(stderr,
- " i%u [style=dotted]\n"
- " i%u:s -> %u:s [style=dotted label=\"",
- state, state, origin);
- dump_tcmd(dfa.states[origin]->tcmd[symbol]);
- fprintf(stderr, "\"]\n");
- }
-
- uint32_t i = 0;
- for (c = b; c != e; ++c, ++i) {
- fprintf(stderr,
- " %u:%u:e -> %s%u:%u:w [label=\"%u",
- origin, c->origin, prefix, state, i, symbol);
- dump_tags(tvtbl, thist, c->ttran, c->tvers);
- fprintf(stderr, "\"]\n");
- }
- }
-
- // if final state, dump finalizer
- const dfa_state_t *t = dfa.states[target];
- if (t->rule != Rule::NONE) {
- const Rule &r = dfa.rules[t->rule];
- const tcmd_t *cmd = t->tcmd[dfa.nchars];
-
- // see note [at most one final item per closure]
- c = std::find_if(b, e, clos_t::fin);
- assert(c != e);
-
- fprintf(stderr, " r%u [shape=none label=\"(", state);
- for (size_t t = r.ltag; t < r.htag; ++t) {
- if (t > r.ltag) fprintf(stderr, " ");
- fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t]));
- }
- fprintf(stderr, ")\"]\n");
-
- fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"",
- state, c->origin, state);
- dump_tcmd(cmd);
- fprintf(stderr, "\"]\n");
- }
+ if (!debug) return;
+
+ const closure_t &closure = ctx.dc_closure;
+ cclositer_t b = closure.begin(), e = closure.end(), c;
+ const uint32_t origin = ctx.dc_origin;
+ const uint32_t target = ctx.dc_target;
+ const uint32_t symbol = ctx.dc_symbol;
+ const dfa_t &dfa = ctx.dc_dfa;
+ const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
+ const tag_history_t &thist = ctx.dc_taghistory;
+ uint32_t i;
+
+ if (target == dfa_t::NIL) return;
+
+ const uint32_t state = isnew ? target : ++uniqidx;
+ const char *prefix = isnew ? "" : "i";
+ const char *style = isnew ? "" : " STYLE=\"dotted\"";
+
+ // closure
+ fprintf(stderr, " %s%u [label=<<TABLE"
+ " BORDER=\"0\""
+ " CELLBORDER=\"1\""
+ ">", prefix, state);
+ i = 0;
+ for (c = b; c != e; ++c, ++i) {
+ fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
+ i, style, static_cast<uint32_t>(c->state - ctx.dc_nfa.states));
+
+ if (c->tvers != ZERO_TAGS) {
+ const tagver_t *vers = tvtbl[c->tvers];
+ const size_t ntag = dfa.tags.size();
+
+ for (size_t t = 0; t < ntag; ++t) {
+ fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
+ }
+
+ if (c->tlook != HROOT) {
+ dump_history(dfa, thist, c->tlook);
+ }
+ }
+
+ fprintf(stderr, "</TD></TR>");
+ }
+ fprintf(stderr, "</TABLE>>]\n");
+
+ // transitions (initial state)
+ if (origin == dfa_t::NIL) {
+ fprintf(stderr, " void [shape=point]\n");
+
+ uint32_t i = 0;
+ for (c = b; c != e; ++c, ++i) {
+ fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i);
+ dump_tags(tvtbl, thist, c->ttran, c->tvers);
+ fprintf(stderr, "\"]\n");
+ }
+ }
+
+ // transitions (other states)
+ else {
+ if (!isnew) {
+ fprintf(stderr,
+ " i%u [style=dotted]\n"
+ " i%u:s -> %u:s [style=dotted label=\"",
+ state, state, origin);
+ dump_tcmd(dfa.states[origin]->tcmd[symbol]);
+ fprintf(stderr, "\"]\n");
+ }
+
+ uint32_t i = 0;
+ for (c = b; c != e; ++c, ++i) {
+ fprintf(stderr,
+ " %u:%u:e -> %s%u:%u:w [label=\"%u",
+ origin, c->origin, prefix, state, i, symbol);
+ dump_tags(tvtbl, thist, c->ttran, c->tvers);
+ fprintf(stderr, "\"]\n");
+ }
+ }
+
+ // if final state, dump finalizer
+ const dfa_state_t *t = dfa.states[target];
+ if (t->rule != Rule::NONE) {
+ const Rule &r = dfa.rules[t->rule];
+ const tcmd_t *cmd = t->tcmd[dfa.nchars];
+
+ // see note [at most one final item per closure]
+ c = std::find_if(b, e, clos_t::fin);
+ assert(c != e);
+
+ fprintf(stderr, " r%u [shape=none label=\"(", state);
+ for (size_t t = r.ltag; t < r.htag; ++t) {
+ if (t > r.ltag) fprintf(stderr, " ");
+ fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t]));
+ }
+ fprintf(stderr, ")\"]\n");
+
+ fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"",
+ state, c->origin, state);
+ dump_tcmd(cmd);
+ fprintf(stderr, "\"]\n");
+ }
}
void dump_dfa(const dfa_t &dfa)
{
- const size_t
- nstate = dfa.states.size(),
- nsym = dfa.nchars;
-
- fprintf(stderr,
- "digraph DFA {\n"
- " rankdir=LR\n"
- " node[shape=Mrecord fontname=Courier]\n"
- " edge[arrowhead=vee fontname=Courier]\n\n");
-
- // initializer
- fprintf(stderr,
- " n [shape=point]"
- " n -> n0 [style=dotted label=\"");
- dump_tcmd_or_tcid(dfa.tcmd0 ? &dfa.tcmd0 : NULL, &dfa.tcid0, 0, dfa.tcpool);
- fprintf(stderr, "\"]\n");
-
- for (uint32_t i = 0; i < nstate; ++i) {
- const dfa_state_t *s = dfa.states[i];
-
- // state
- fprintf(stderr, " n%u [height=0.2 width=0.2 label=\"%u\"]\n", i, i);
-
- // finalizer
- if (s->rule != Rule::NONE) {
- const Rule &r = dfa.rules[s->rule];
-
- fprintf(stderr,
- "subgraph { rank=same"
- " n%u [style=filled fillcolor=lightgray]"
- " dr%u [shape=none label=\"",
- i, i);
- dump_tcmd_or_tcid(s->tcmd, s->tcid, nsym, dfa.tcpool);
-
- fprintf(stderr, "(");
- for (size_t t = r.ltag; t < r.htag; ++t) {
- if (t > r.ltag) fprintf(stderr, " ");
- fprintf(stderr, "%d", dfa.finvers[t]);
- }
- fprintf(stderr, ")");
-
- fprintf(stderr, "\"]"
- " n%u:s -> dr%u:n [style=dotted minlen=0]}\n",
- i, i);
- }
-
- // transitions
- for (uint32_t c = 0; c < nsym; ++c) {
- const size_t j = s->arcs[c];
- if (j != dfa_t::NIL) {
- fprintf(stderr, " n%u -> n%u [label=\"%u",
- i, static_cast<uint32_t>(j), c);
- dump_tcmd_or_tcid(s->tcmd, s->tcid, c, dfa.tcpool);
- fprintf(stderr, "\"]\n");
- }
- }
- }
-
- fprintf(stderr, "}\n");
+ const size_t
+ nstate = dfa.states.size(),
+ nsym = dfa.nchars;
+
+ fprintf(stderr,
+ "digraph DFA {\n"
+ " rankdir=LR\n"
+ " node[shape=Mrecord fontname=Courier]\n"
+ " edge[arrowhead=vee fontname=Courier]\n\n");
+
+ // initializer
+ fprintf(stderr,
+ " n [shape=point]"
+ " n -> n0 [style=dotted label=\"");
+ dump_tcmd_or_tcid(dfa.tcmd0 ? &dfa.tcmd0 : NULL, &dfa.tcid0, 0, dfa.tcpool);
+ fprintf(stderr, "\"]\n");
+
+ for (uint32_t i = 0; i < nstate; ++i) {
+ const dfa_state_t *s = dfa.states[i];
+
+ // state
+ fprintf(stderr, " n%u [height=0.2 width=0.2 label=\"%u\"]\n", i, i);
+
+ // finalizer
+ if (s->rule != Rule::NONE) {
+ const Rule &r = dfa.rules[s->rule];
+
+ fprintf(stderr,
+ "subgraph { rank=same"
+ " n%u [style=filled fillcolor=lightgray]"
+ " dr%u [shape=none label=\"",
+ i, i);
+ dump_tcmd_or_tcid(s->tcmd, s->tcid, nsym, dfa.tcpool);
+
+ fprintf(stderr, "(");
+ for (size_t t = r.ltag; t < r.htag; ++t) {
+ if (t > r.ltag) fprintf(stderr, " ");
+ fprintf(stderr, "%d", dfa.finvers[t]);
+ }
+ fprintf(stderr, ")");
+
+ fprintf(stderr, "\"]"
+ " n%u:s -> dr%u:n [style=dotted minlen=0]}\n",
+ i, i);
+ }
+
+ // transitions
+ for (uint32_t c = 0; c < nsym; ++c) {
+ const size_t j = s->arcs[c];
+ if (j != dfa_t::NIL) {
+ fprintf(stderr, " n%u -> n%u [label=\"%u",
+ i, static_cast<uint32_t>(j), c);
+ dump_tcmd_or_tcid(s->tcmd, s->tcid, c, dfa.tcpool);
+ fprintf(stderr, "\"]\n");
+ }
+ }
+ }
+
+ fprintf(stderr, "}\n");
}
void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid,
- size_t sym, const tcpool_t &tcpool)
+ size_t sym, const tcpool_t &tcpool)
{
- const tcmd_t *cmd = tcmd ? tcmd[sym] : tcpool[tcid[sym]];
- dump_tcmd(cmd);
+ const tcmd_t *cmd = tcmd ? tcmd[sym] : tcpool[tcid[sym]];
+ dump_tcmd(cmd);
}
void dump_tcmd(const tcmd_t *p)
{
- if (!p) return;
-
- fprintf(stderr, "/");
- for (; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
- if (tcmd_t::iscopy(p)) {
- fprintf(stderr, "%d=%d ", l, r);
- } else {
- fprintf(stderr, "%d", l);
- if (r != TAGVER_ZERO) {
- fprintf(stderr, "=%d", r);
- }
- for (; *h != TAGVER_ZERO; ++h) {
- fprintf(stderr, "%s", *h == TAGVER_BOTTOM ? "↓" : "↑");
- }
- fprintf(stderr, " ");
- }
- }
+ if (!p) return;
+
+ fprintf(stderr, "/");
+ for (; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+ if (tcmd_t::iscopy(p)) {
+ fprintf(stderr, "%d=%d ", l, r);
+ } else {
+ fprintf(stderr, "%d", l);
+ if (r != TAGVER_ZERO) {
+ fprintf(stderr, "=%d", r);
+ }
+ for (; *h != TAGVER_ZERO; ++h) {
+ fprintf(stderr, "%s", *h == TAGVER_BOTTOM ? "↓" : "↑");
+ }
+ fprintf(stderr, " ");
+ }
+ }
}
const char *tagname(const Tag &t)
{
- return t.name ? t.name->c_str() : "";
+ return t.name ? t.name->c_str() : "";
}
void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory,
- hidx_t ttran, uint32_t tvers)
+ hidx_t ttran, uint32_t tvers)
{
- if (ttran == HROOT) return;
-
- fprintf(stderr, "/");
- const tagver_t *vers = tagvertbl[tvers];
- for (size_t i = 0; i < tagvertbl.ntags; ++i) {
-
- if (taghistory.last(ttran, i) == TAGVER_ZERO) {
- continue;
- }
-
- fprintf(stderr, "%d", abs(vers[i]));
- for (hidx_t t = ttran; t != HROOT; t = taghistory.pred(t)) {
- if (taghistory.tag(t) != i) {
- continue;
- }
- else if (taghistory.elem(t) < TAGVER_ZERO) {
- fprintf(stderr, "↓");
- }
- else if (t > TAGVER_ZERO) {
- fprintf(stderr, "↑");
- }
- }
- fprintf(stderr, " ");
- }
+ if (ttran == HROOT) return;
+
+ fprintf(stderr, "/");
+ const tagver_t *vers = tagvertbl[tvers];
+ for (size_t i = 0; i < tagvertbl.ntags; ++i) {
+
+ if (taghistory.last(ttran, i) == TAGVER_ZERO) {
+ continue;
+ }
+
+ fprintf(stderr, "%d", abs(vers[i]));
+ for (hidx_t t = ttran; t != HROOT; t = taghistory.pred(t)) {
+ if (taghistory.tag(t) != i) {
+ continue;
+ }
+ else if (taghistory.elem(t) < TAGVER_ZERO) {
+ fprintf(stderr, "↓");
+ }
+ else if (t > TAGVER_ZERO) {
+ fprintf(stderr, "↑");
+ }
+ }
+ fprintf(stderr, " ");
+ }
}
} // namespace re2c
struct dump_dfa_t
{
- const bool debug;
- uint32_t uniqidx;
+ const bool debug;
+ uint32_t uniqidx;
- explicit dump_dfa_t(const opt_t *);
- ~dump_dfa_t();
- void state(const determ_context_t &, bool);
+ explicit dump_dfa_t(const opt_t *);
+ ~dump_dfa_t();
+ void state(const determ_context_t &, bool);
};
void dump_dfa(const dfa_t &dfa);
void find_overwritten_tags(const dfa_t &dfa, size_t state,
- bool *been, bool *owrt)
+ bool *been, bool *owrt)
{
- if (been[state]) return;
- been[state] = true;
-
- const dfa_state_t *s = dfa.states[state];
- for (size_t c = 0; c < dfa.nchars; ++c) {
- for (const tcmd_t *p = s->tcmd[c]; p; p = p->next) {
- owrt[p->lhs] = true;
- }
-
- size_t dest = s->arcs[c];
- if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) {
- find_overwritten_tags(dfa, dest, been, owrt);
- }
- }
+ if (been[state]) return;
+ been[state] = true;
+
+ const dfa_state_t *s = dfa.states[state];
+ for (size_t c = 0; c < dfa.nchars; ++c) {
+ for (const tcmd_t *p = s->tcmd[c]; p; p = p->next) {
+ owrt[p->lhs] = true;
+ }
+
+ size_t dest = s->arcs[c];
+ if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) {
+ find_overwritten_tags(dfa, dest, been, owrt);
+ }
+ }
}
// ('copy' commands must go first, before potential overwrites)
static void backup(dfa_t &dfa, dfa_state_t *s, tagver_t l, tagver_t r)
{
- for (size_t c = 0; c < dfa.nchars; ++c) {
- size_t i = s->arcs[c];
- if (i != dfa_t::NIL && dfa.states[i]->fallthru) {
- tcmd_t *&p = s->tcmd[c];
- p = dfa.tcpool.make_copy(p, l, r);
- }
- }
+ for (size_t c = 0; c < dfa.nchars; ++c) {
+ size_t i = s->arcs[c];
+ if (i != dfa_t::NIL && dfa.states[i]->fallthru) {
+ tcmd_t *&p = s->tcmd[c];
+ p = dfa.tcpool.make_copy(p, l, r);
+ }
+ }
}
// note [fallback states]
void insert_fallback_tags(dfa_t &dfa)
{
- tcpool_t &pool = dfa.tcpool;
- const size_t
- nstates = dfa.states.size(),
- nsym = dfa.nchars,
- nver = static_cast<size_t>(dfa.maxtagver) + 1;
- bool *been = new bool[nstates];
- bool *owrt = new bool[nver];
-
- for (size_t i = 0; i < nstates; ++i) {
- dfa_state_t *s = dfa.states[i];
- if (!s->fallback) continue;
-
- std::fill(been, been + nstates, false);
- std::fill(owrt, owrt + nver, false);
- find_overwritten_tags(dfa, i, been, owrt);
-
- tcmd_t *p = s->tcmd[nsym],
- *save = NULL, **ps = &save,
- **pc = &s->tcmd[nsym + 1];
- for (; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
-
- // 'copy' commands
- if (tcmd_t::iscopy(p)) {
- if (!owrt[r]) {
- *pc = pool.make_copy(NULL, l, r);
- pc = &(*pc)->next;
- } else {
- backup(dfa, s, l, r);
- }
-
- // 'save without history' commands
- } else if (tcmd_t::isset(p)) {
- *ps = pool.make_set(*ps, l, h[0]);
- ps = &(*ps)->next;
-
- // 'save with history' commands
- } else {
- if (!owrt[r]) {
- *ps = pool.copy_add(NULL, l, r, h);
- } else {
- *ps = pool.copy_add(NULL, l, l, h);
- backup(dfa, s, l, r);
- }
- ps = &(*ps)->next;
- }
- }
-
- // join 'copy' (fallback) and 'save' commands
- *pc = save;
- }
-
- delete[] been;
- delete[] owrt;
+ tcpool_t &pool = dfa.tcpool;
+ const size_t
+ nstates = dfa.states.size(),
+ nsym = dfa.nchars,
+ nver = static_cast<size_t>(dfa.maxtagver) + 1;
+ bool *been = new bool[nstates];
+ bool *owrt = new bool[nver];
+
+ for (size_t i = 0; i < nstates; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ if (!s->fallback) continue;
+
+ std::fill(been, been + nstates, false);
+ std::fill(owrt, owrt + nver, false);
+ find_overwritten_tags(dfa, i, been, owrt);
+
+ tcmd_t *p = s->tcmd[nsym],
+ *save = NULL, **ps = &save,
+ **pc = &s->tcmd[nsym + 1];
+ for (; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+
+ // 'copy' commands
+ if (tcmd_t::iscopy(p)) {
+ if (!owrt[r]) {
+ *pc = pool.make_copy(NULL, l, r);
+ pc = &(*pc)->next;
+ } else {
+ backup(dfa, s, l, r);
+ }
+
+ // 'save without history' commands
+ } else if (tcmd_t::isset(p)) {
+ *ps = pool.make_set(*ps, l, h[0]);
+ ps = &(*ps)->next;
+
+ // 'save with history' commands
+ } else {
+ if (!owrt[r]) {
+ *ps = pool.copy_add(NULL, l, r, h);
+ } else {
+ *ps = pool.copy_add(NULL, l, l, h);
+ backup(dfa, s, l, r);
+ }
+ ps = &(*ps)->next;
+ }
+ }
+
+ // join 'copy' (fallback) and 'save' commands
+ *pc = save;
+ }
+
+ delete[] been;
+ delete[] owrt;
}
} // namespace re2c
static bool loopback(size_t node, size_t narcs, const size_t *arcs)
{
- for (size_t i = 0; i < narcs; ++i)
- {
- if (arcs[i] == node)
- {
- return true;
- }
- }
- return false;
+ for (size_t i = 0; i < narcs; ++i)
+ {
+ if (arcs[i] == node)
+ {
+ return true;
+ }
+ }
+ return false;
}
static void scc(
- const dfa_t &dfa,
- std::stack<size_t> &stack,
- std::vector<size_t> &lowlink,
- std::vector<bool> &trivial,
- size_t i)
+ const dfa_t &dfa,
+ std::stack<size_t> &stack,
+ std::vector<size_t> &lowlink,
+ std::vector<bool> &trivial,
+ size_t i)
{
- const size_t link = stack.size();
- lowlink[i] = link;
- stack.push(i);
-
- const size_t *arcs = dfa.states[i]->arcs;
- for (size_t c = 0; c < dfa.nchars; ++c)
- {
- const size_t j = arcs[c];
- if (j != dfa_t::NIL)
- {
- if (lowlink[j] == SCC_UND)
- {
- scc(dfa, stack, lowlink, trivial, j);
- }
- if (lowlink[j] < lowlink[i])
- {
- lowlink[i] = lowlink[j];
- }
- }
- }
-
- if (lowlink[i] == link)
- {
- // SCC is non-trivial (has loops) iff it either:
- // - consists of multiple nodes (they all must be interconnected)
- // - consists of single node which loops back to itself
- trivial[i] = i == stack.top()
- && !loopback(i, dfa.nchars, arcs);
-
- size_t j;
- do
- {
- j = stack.top();
- stack.pop();
- lowlink[j] = SCC_INF;
- }
- while (j != i);
- }
+ const size_t link = stack.size();
+ lowlink[i] = link;
+ stack.push(i);
+
+ const size_t *arcs = dfa.states[i]->arcs;
+ for (size_t c = 0; c < dfa.nchars; ++c)
+ {
+ const size_t j = arcs[c];
+ if (j != dfa_t::NIL)
+ {
+ if (lowlink[j] == SCC_UND)
+ {
+ scc(dfa, stack, lowlink, trivial, j);
+ }
+ if (lowlink[j] < lowlink[i])
+ {
+ lowlink[i] = lowlink[j];
+ }
+ }
+ }
+
+ if (lowlink[i] == link)
+ {
+ // SCC is non-trivial (has loops) iff it either:
+ // - consists of multiple nodes (they all must be interconnected)
+ // - consists of single node which loops back to itself
+ trivial[i] = i == stack.top()
+ && !loopback(i, dfa.nchars, arcs);
+
+ size_t j;
+ do
+ {
+ j = stack.top();
+ stack.pop();
+ lowlink[j] = SCC_INF;
+ }
+ while (j != i);
+ }
}
static void calc_fill(
- const dfa_t &dfa,
- const std::vector<bool> &trivial,
- std::vector<size_t> &fill,
- size_t i)
+ const dfa_t &dfa,
+ const std::vector<bool> &trivial,
+ std::vector<size_t> &fill,
+ size_t i)
{
- if (fill[i] == SCC_UND)
- {
- fill[i] = 0;
- const size_t *arcs = dfa.states[i]->arcs;
- for (size_t c = 0; c < dfa.nchars; ++c)
- {
- const size_t j = arcs[c];
- if (j != dfa_t::NIL)
- {
- calc_fill(dfa, trivial, fill, j);
- size_t max = 1;
- if (trivial[j])
- {
- max += fill[j];
- }
- if (max > fill[i])
- {
- fill[i] = max;
- }
- }
- }
- }
+ if (fill[i] == SCC_UND)
+ {
+ fill[i] = 0;
+ const size_t *arcs = dfa.states[i]->arcs;
+ for (size_t c = 0; c < dfa.nchars; ++c)
+ {
+ const size_t j = arcs[c];
+ if (j != dfa_t::NIL)
+ {
+ calc_fill(dfa, trivial, fill, j);
+ size_t max = 1;
+ if (trivial[j])
+ {
+ max += fill[j];
+ }
+ if (max > fill[i])
+ {
+ fill[i] = max;
+ }
+ }
+ }
+ }
}
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
{
- const size_t size = dfa.states.size();
-
- // find DFA states that belong to non-trivial SCC
- std::stack<size_t> stack;
- std::vector<size_t> lowlink(size, SCC_UND);
- std::vector<bool> trivial(size, false);
- scc(dfa, stack, lowlink, trivial, 0);
-
- // for each DFA state, calculate YYFILL argument:
- // maximal path length to the next YYFILL state
- fill.resize(size, SCC_UND);
- calc_fill(dfa, trivial, fill, 0);
-
- // The following states must trigger YYFILL:
- // - inital state
- // - all states in non-trivial SCCs
- // for other states, reset YYFILL argument to zero
- for (size_t i = 1; i < size; ++i)
- {
- if (trivial[i])
- {
- fill[i] = 0;
- }
- }
+ const size_t size = dfa.states.size();
+
+ // find DFA states that belong to non-trivial SCC
+ std::stack<size_t> stack;
+ std::vector<size_t> lowlink(size, SCC_UND);
+ std::vector<bool> trivial(size, false);
+ scc(dfa, stack, lowlink, trivial, 0);
+
+ // for each DFA state, calculate YYFILL argument:
+ // maximal path length to the next YYFILL state
+ fill.resize(size, SCC_UND);
+ calc_fill(dfa, trivial, fill, 0);
+
+ // The following states must trigger YYFILL:
+ // - inital state
+ // - all states in non-trivial SCCs
+ // for other states, reset YYFILL argument to zero
+ for (size_t i = 1; i < size; ++i)
+ {
+ if (trivial[i])
+ {
+ fill[i] = 0;
+ }
+ }
}
} // namespace re2c
struct kernel_eq_t
{
- const determ_context_t &ctx;
- bool operator()(const kernel_t *, const kernel_t *) const;
+ const determ_context_t &ctx;
+ bool operator()(const kernel_t *, const kernel_t *) const;
};
struct kernel_map_t
{
- determ_context_t &ctx;
- bool operator()(const kernel_t *, const kernel_t *);
+ determ_context_t &ctx;
+ bool operator()(const kernel_t *, const kernel_t *);
};
kernel_buffers_t::kernel_buffers_t(allocator_t &alc)
- : maxsize(0) // usually ranges from one to some twenty
- , kernel(make_new_kernel(maxsize, alc))
- , cap(0)
- , max(0)
- , memory(NULL)
- , x2y(NULL)
- , y2x(NULL)
- , x2t(NULL)
- , indegree(NULL)
- , backup_actions(NULL)
+ : maxsize(0) // usually ranges from one to some twenty
+ , kernel(make_new_kernel(maxsize, alc))
+ , cap(0)
+ , max(0)
+ , memory(NULL)
+ , x2y(NULL)
+ , y2x(NULL)
+ , x2t(NULL)
+ , indegree(NULL)
+ , backup_actions(NULL)
{}
kernel_t *make_new_kernel(size_t size, allocator_t &alc)
{
- kernel_t *k = alc.alloct<kernel_t>(1);
- k->size = size;
- k->prectbl = NULL;
- k->state = alc.alloct<nfa_state_t*>(size);
- k->tvers = alc.alloct<uint32_t>(size);
- k->tlook = alc.alloct<hidx_t>(size);
- return k;
+ kernel_t *k = alc.alloct<kernel_t>(1);
+ k->size = size;
+ k->prectbl = NULL;
+ k->state = alc.alloct<nfa_state_t*>(size);
+ k->tvers = alc.alloct<uint32_t>(size);
+ k->tlook = alc.alloct<hidx_t>(size);
+ return k;
}
kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc)
{
- const size_t n = kernel->size;
+ const size_t n = kernel->size;
- kernel_t *k = make_new_kernel(n, alc);
+ kernel_t *k = make_new_kernel(n, alc);
- memcpy(k->state, kernel->state, n * sizeof(void*));
- memcpy(k->tvers, kernel->tvers, n * sizeof(size_t));
- memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t));
+ memcpy(k->state, kernel->state, n * sizeof(void*));
+ memcpy(k->tvers, kernel->tvers, n * sizeof(size_t));
+ memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t));
- prectable_t *ptbl = NULL;
- if (kernel->prectbl) {
- ptbl = alc.alloct<prectable_t>(n * n);
- memcpy(ptbl, kernel->prectbl, n * n * sizeof(prectable_t));
- }
- k->prectbl = ptbl;
+ prectable_t *ptbl = NULL;
+ if (kernel->prectbl) {
+ ptbl = alc.alloct<prectable_t>(n * n);
+ memcpy(ptbl, kernel->prectbl, n * n * sizeof(prectable_t));
+ }
+ k->prectbl = ptbl;
- return k;
+ return k;
}
void reserve_buffers(determ_context_t &ctx)
{
- kernel_buffers_t &kbufs = ctx.dc_buffers;
- allocator_t &alc = ctx.dc_allocator;
- const tagver_t maxver = ctx.dc_dfa.maxtagver;
- const size_t nkern = ctx.dc_closure.size();
-
- if (kbufs.maxsize < nkern) {
- kbufs.maxsize = nkern * 2; // in advance
- kbufs.kernel = make_new_kernel(kbufs.maxsize, alc);
- }
-
- // +1 to ensure max tag version is not forgotten in loops
- kbufs.max = maxver + 1;
- if (kbufs.cap < kbufs.max) {
- kbufs.cap = kbufs.max * 2; // in advance
-
- const size_t
- n = static_cast<size_t>(kbufs.cap),
- m = 2 * n + 1,
- sz_x2y = 2 * m * sizeof(tagver_t),
- sz_x2t = m * sizeof(size_t),
- sz_idg = n * sizeof(uint32_t),
- sz_act = n * sizeof(tcmd_t);
-
- char *p = alc.alloct<char>(sz_x2y + sz_x2t + sz_idg + sz_act);
- kbufs.memory = p;
-
- // point to the center (zero index) of each buffer
- // indexes in range [-N .. N] must be valid, where N is capacity
- kbufs.x2y = reinterpret_cast<tagver_t*>(p) + n;
- kbufs.y2x = kbufs.x2y + m;
- p += sz_x2y;
- kbufs.x2t = reinterpret_cast<size_t*>(p) + n;
- p += sz_x2t;
- kbufs.indegree = reinterpret_cast<uint32_t*>(p);
- p += sz_idg;
- kbufs.backup_actions = reinterpret_cast<tcmd_t*>(p);
- }
+ kernel_buffers_t &kbufs = ctx.dc_buffers;
+ allocator_t &alc = ctx.dc_allocator;
+ const tagver_t maxver = ctx.dc_dfa.maxtagver;
+ const size_t nkern = ctx.dc_closure.size();
+
+ if (kbufs.maxsize < nkern) {
+ kbufs.maxsize = nkern * 2; // in advance
+ kbufs.kernel = make_new_kernel(kbufs.maxsize, alc);
+ }
+
+ // +1 to ensure max tag version is not forgotten in loops
+ kbufs.max = maxver + 1;
+ if (kbufs.cap < kbufs.max) {
+ kbufs.cap = kbufs.max * 2; // in advance
+
+ const size_t
+ n = static_cast<size_t>(kbufs.cap),
+ m = 2 * n + 1,
+ sz_x2y = 2 * m * sizeof(tagver_t),
+ sz_x2t = m * sizeof(size_t),
+ sz_idg = n * sizeof(uint32_t),
+ sz_act = n * sizeof(tcmd_t);
+
+ char *p = alc.alloct<char>(sz_x2y + sz_x2t + sz_idg + sz_act);
+ kbufs.memory = p;
+
+ // point to the center (zero index) of each buffer
+ // indexes in range [-N .. N] must be valid, where N is capacity
+ kbufs.x2y = reinterpret_cast<tagver_t*>(p) + n;
+ kbufs.y2x = kbufs.x2y + m;
+ p += sz_x2y;
+ kbufs.x2t = reinterpret_cast<size_t*>(p) + n;
+ p += sz_x2t;
+ kbufs.indegree = reinterpret_cast<uint32_t*>(p);
+ p += sz_idg;
+ kbufs.backup_actions = reinterpret_cast<tcmd_t*>(p);
+ }
}
uint32_t hash_kernel(const kernel_t *kernel)
{
- const size_t n = kernel->size;
+ const size_t n = kernel->size;
- // seed
- uint32_t h = static_cast<uint32_t>(n);
+ // seed
+ uint32_t h = static_cast<uint32_t>(n);
- // TNFA states
- h = hash32(h, kernel->state, n * sizeof(void*));
+ // TNFA states
+ h = hash32(h, kernel->state, n * sizeof(void*));
- // precedence table
- if (kernel->prectbl) {
- h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t));
- }
+ // precedence table
+ if (kernel->prectbl) {
+ h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t));
+ }
- return h;
+ return h;
}
void copy_to_buffer_kernel(const closure_t &closure,
- const prectable_t *prectbl, kernel_t *buffer)
+ const prectable_t *prectbl, kernel_t *buffer)
{
- const size_t n = closure.size();
+ const size_t n = closure.size();
- buffer->size = n;
+ buffer->size = n;
- buffer->prectbl = prectbl;
+ buffer->prectbl = prectbl;
- for (size_t i = 0; i < n; ++i) {
- const clos_t &c = closure[i];
- buffer->state[i] = c.state;
- buffer->tvers[i] = c.tvers;
- buffer->tlook[i] = c.tlook;
- }
+ for (size_t i = 0; i < n; ++i) {
+ const clos_t &c = closure[i];
+ buffer->state[i] = c.state;
+ buffer->tvers[i] = c.tvers;
+ buffer->tlook[i] = c.tlook;
+ }
}
bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, const determ_context_t &ctx)
{
- assert(x->size == y->size);
-
- if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
- return true;
- }
-
- const tag_history_t &thist = ctx.dc_taghistory;
- const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
-
- for (size_t i = 0; i < x->size; ++i) {
- const hidx_t xl = x->tlook[i], yl = y->tlook[i];
- for (size_t t = 0; t < tvtbl.ntags; ++t) {
- if (history(tags[t])) {
- // compare full tag sequences
- if (thist.compare_reversed(xl, yl, t) != 0) return false;
- } else {
- // compare only the last pair of tags
- if (thist.last(xl, t) != thist.last(yl, t)) return false;
- }
- }
- }
-
- return true;
+ assert(x->size == y->size);
+
+ if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
+ return true;
+ }
+
+ const tag_history_t &thist = ctx.dc_taghistory;
+ const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
+ const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+
+ for (size_t i = 0; i < x->size; ++i) {
+ const hidx_t xl = x->tlook[i], yl = y->tlook[i];
+ for (size_t t = 0; t < tvtbl.ntags; ++t) {
+ if (history(tags[t])) {
+ // compare full tag sequences
+ if (thist.compare_reversed(xl, yl, t) != 0) return false;
+ } else {
+ // compare only the last pair of tags
+ if (thist.last(xl, t) != thist.last(yl, t)) return false;
+ }
+ }
+ }
+
+ return true;
}
bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const
{
- // check that kernel sizes, NFA states, tags versions,
- // lookahead tags and precedence table coincide
- const size_t n = x->size;
- return n == y->size
- && memcmp(x->state, y->state, n * sizeof(void*)) == 0
- && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0
- && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
- && equal_lookahead_tags(x, y, ctx);
+ // check that kernel sizes, NFA states, tags versions,
+ // lookahead tags and precedence table coincide
+ const size_t n = x->size;
+ return n == y->size
+ && memcmp(x->state, y->state, n * sizeof(void*)) == 0
+ && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0
+ && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
+ && equal_lookahead_tags(x, y, ctx);
}
bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y)
{
- // check that kernel sizes, NFA states lookahead tags
- // and precedence table coincide (versions might differ)
- const size_t n = x->size;
- const bool compatible = n == y->size
- && memcmp(x->state, y->state, n * sizeof(void*)) == 0
- && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
- && equal_lookahead_tags(x, y, ctx);
- if (!compatible) return false;
-
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
- const size_t ntag = tags.size();
- kernel_buffers_t &bufs = ctx.dc_buffers;
- tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max;
- size_t *x2t = bufs.x2t;
-
- // map tag versions of one kernel to that of another
- // and check that lookahead versions (if any) coincide
- std::fill(x2y - max, x2y + max, TAGVER_ZERO);
- std::fill(y2x - max, y2x + max, TAGVER_ZERO);
- for (size_t i = 0; i < n; ++i) {
- const tagver_t
- *xvs = ctx.dc_tagvertbl[x->tvers[i]],
- *yvs = ctx.dc_tagvertbl[y->tvers[i]];
- const hidx_t xl = x->tlook[i];
-
- for (size_t t = 0; t < ntag; ++t) {
- // see note [mapping ignores items with lookahead tags]
- if (ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO
- && !history(tags[t])) continue;
-
- const tagver_t xv = xvs[t], yv = yvs[t];
- tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv];
-
- if (yv0 == TAGVER_ZERO && xv0 == TAGVER_ZERO) {
- xv0 = xv;
- yv0 = yv;
- x2t[xv] = t;
- } else if (yv != yv0 || xv != xv0) {
- return false;
- }
- }
- }
-
- // we have bijective mapping; now try to create list of commands
- tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL;
- tcmd_t *b1 = bufs.backup_actions, *b2 = b1;
-
- // backup 'save' commands: if topsort finds cycles, this mapping
- // will be rejected and we'll have to revert all changes
- for (b2->next = a = *pacts; a; a = a->next) {
- *++b2 = *a;
- }
-
- // fix LHS of 'save' commands to reuse old version
- // see note [save(X), copy(Y,X) optimization]
- for (a = *pacts; a; a = a->next) {
- const tagver_t
- yv = a->lhs * (a->history[0] == TAGVER_BOTTOM ? -1 : 1),
- xv = y2x[yv];
- a->lhs = abs(xv);
- y2x[yv] = x2y[xv] = TAGVER_ZERO;
- }
-
- // create 'copy' commands
- for (tagver_t xv = -max; xv < max; ++xv) {
- const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv);
- if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) {
- assert(axv != ayv);
- copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv);
- }
- }
-
- // join 'copy' and 'save' commands
- for (pa = © (a = *pa); pa = &a->next);
- *pa = *pacts;
- *pacts = copy;
-
- // see note [topological ordering of copy commands]
- const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree);
-
- // in case of cycles restore 'save' commands and fail
- if (nontrivial_cycles) {
- for (*pacts = a = b1->next; a; a = a->next) {
- *a = *++b1;
- }
- }
-
- return !nontrivial_cycles;
+ // check that kernel sizes, NFA states lookahead tags
+ // and precedence table coincide (versions might differ)
+ const size_t n = x->size;
+ const bool compatible = n == y->size
+ && memcmp(x->state, y->state, n * sizeof(void*)) == 0
+ && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
+ && equal_lookahead_tags(x, y, ctx);
+ if (!compatible) return false;
+
+ const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+ const size_t ntag = tags.size();
+ kernel_buffers_t &bufs = ctx.dc_buffers;
+ tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max;
+ size_t *x2t = bufs.x2t;
+
+ // map tag versions of one kernel to that of another
+ // and check that lookahead versions (if any) coincide
+ std::fill(x2y - max, x2y + max, TAGVER_ZERO);
+ std::fill(y2x - max, y2x + max, TAGVER_ZERO);
+ for (size_t i = 0; i < n; ++i) {
+ const tagver_t
+ *xvs = ctx.dc_tagvertbl[x->tvers[i]],
+ *yvs = ctx.dc_tagvertbl[y->tvers[i]];
+ const hidx_t xl = x->tlook[i];
+
+ for (size_t t = 0; t < ntag; ++t) {
+ // see note [mapping ignores items with lookahead tags]
+ if (ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO
+ && !history(tags[t])) continue;
+
+ const tagver_t xv = xvs[t], yv = yvs[t];
+ tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv];
+
+ if (yv0 == TAGVER_ZERO && xv0 == TAGVER_ZERO) {
+ xv0 = xv;
+ yv0 = yv;
+ x2t[xv] = t;
+ } else if (yv != yv0 || xv != xv0) {
+ return false;
+ }
+ }
+ }
+
+ // we have bijective mapping; now try to create list of commands
+ tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL;
+ tcmd_t *b1 = bufs.backup_actions, *b2 = b1;
+
+ // backup 'save' commands: if topsort finds cycles, this mapping
+ // will be rejected and we'll have to revert all changes
+ for (b2->next = a = *pacts; a; a = a->next) {
+ *++b2 = *a;
+ }
+
+ // fix LHS of 'save' commands to reuse old version
+ // see note [save(X), copy(Y,X) optimization]
+ for (a = *pacts; a; a = a->next) {
+ const tagver_t
+ yv = a->lhs * (a->history[0] == TAGVER_BOTTOM ? -1 : 1),
+ xv = y2x[yv];
+ a->lhs = abs(xv);
+ y2x[yv] = x2y[xv] = TAGVER_ZERO;
+ }
+
+ // create 'copy' commands
+ for (tagver_t xv = -max; xv < max; ++xv) {
+ const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv);
+ if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) {
+ assert(axv != ayv);
+ copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv);
+ }
+ }
+
+ // join 'copy' and 'save' commands
+ for (pa = © (a = *pa); pa = &a->next);
+ *pa = *pacts;
+ *pacts = copy;
+
+ // see note [topological ordering of copy commands]
+ const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree);
+
+ // in case of cycles restore 'save' commands and fail
+ if (nontrivial_cycles) {
+ for (*pacts = a = b1->next; a; a = a->next) {
+ *a = *++b1;
+ }
+ }
+
+ return !nontrivial_cycles;
}
bool do_find_state(determ_context_t &ctx)
{
- kernels_t &kernels = ctx.dc_kernels;
- const closure_t &closure = ctx.dc_closure;
-
- // empty closure corresponds to default state
- if (closure.size() == 0) {
- ctx.dc_target = dfa_t::NIL;
- ctx.dc_actions = NULL;
- return false;
- }
-
- // resize buffer if closure is too large
- reserve_buffers(ctx);
- kernel_t *k = ctx.dc_buffers.kernel;
-
- // copy closure to buffer kernel
- copy_to_buffer_kernel(closure, ctx.dc_prectbl, k);
-
- // hash "static" part of the kernel
- const uint32_t hash = hash_kernel(k);
-
- // try to find identical kernel
- kernel_eq_t cmp_eq = {ctx};
- ctx.dc_target = kernels.find_with(hash, k, cmp_eq);
- if (ctx.dc_target != kernels_t::NIL) return false;
-
- // else try to find mappable kernel
- // see note [bijective mappings]
- kernel_map_t cmp_map = {ctx};
- ctx.dc_target = kernels.find_with(hash, k, cmp_map);
- if (ctx.dc_target != kernels_t::NIL) return false;
-
- // otherwise add new kernel
- kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator);
- ctx.dc_target = kernels.push(hash, kcopy);
- return true;
+ kernels_t &kernels = ctx.dc_kernels;
+ const closure_t &closure = ctx.dc_closure;
+
+ // empty closure corresponds to default state
+ if (closure.size() == 0) {
+ ctx.dc_target = dfa_t::NIL;
+ ctx.dc_actions = NULL;
+ return false;
+ }
+
+ // resize buffer if closure is too large
+ reserve_buffers(ctx);
+ kernel_t *k = ctx.dc_buffers.kernel;
+
+ // copy closure to buffer kernel
+ copy_to_buffer_kernel(closure, ctx.dc_prectbl, k);
+
+ // hash "static" part of the kernel
+ const uint32_t hash = hash_kernel(k);
+
+ // try to find identical kernel
+ kernel_eq_t cmp_eq = {ctx};
+ ctx.dc_target = kernels.find_with(hash, k, cmp_eq);
+ if (ctx.dc_target != kernels_t::NIL) return false;
+
+ // else try to find mappable kernel
+ // see note [bijective mappings]
+ kernel_map_t cmp_map = {ctx};
+ ctx.dc_target = kernels.find_with(hash, k, cmp_map);
+ if (ctx.dc_target != kernels_t::NIL) return false;
+
+ // otherwise add new kernel
+ kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator);
+ ctx.dc_target = kernels.push(hash, kcopy);
+ return true;
}
tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin)
{
- dfa_t &dfa = ctx.dc_dfa;
- const Rule &rule = dfa.rules[fin.state->rule];
- const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers];
- const hidx_t look = fin.tlook;
- const tag_history_t &thist = ctx.dc_taghistory;
- tcpool_t &tcpool = dfa.tcpool;
- tcmd_t *copy = NULL, *save = NULL, **p;
-
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
-
- const Tag &tag = dfa.tags[t];
- if (fixed(tag)) continue;
-
- const tagver_t v = abs(vers[t]), l = thist.last(look, t);
- tagver_t &f = dfa.finvers[t];
- if (l == TAGVER_ZERO) {
- copy = tcpool.make_copy(copy, f, v);
- } else if (history(tag)) {
- save = tcpool.make_add(save, f, v, thist, look, t);
- } else {
- save = tcpool.make_set(save, f, l);
- }
- }
-
- // join 'copy' and 'save' commands
- for (p = © *p; p = &(*p)->next);
- *p = save;
-
- return copy;
+ dfa_t &dfa = ctx.dc_dfa;
+ const Rule &rule = dfa.rules[fin.state->rule];
+ const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers];
+ const hidx_t look = fin.tlook;
+ const tag_history_t &thist = ctx.dc_taghistory;
+ tcpool_t &tcpool = dfa.tcpool;
+ tcmd_t *copy = NULL, *save = NULL, **p;
+
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+
+ const Tag &tag = dfa.tags[t];
+ if (fixed(tag)) continue;
+
+ const tagver_t v = abs(vers[t]), l = thist.last(look, t);
+ tagver_t &f = dfa.finvers[t];
+ if (l == TAGVER_ZERO) {
+ copy = tcpool.make_copy(copy, f, v);
+ } else if (history(tag)) {
+ save = tcpool.make_add(save, f, v, thist, look, t);
+ } else {
+ save = tcpool.make_set(save, f, l);
+ }
+ }
+
+ // join 'copy' and 'save' commands
+ for (p = © *p; p = &(*p)->next);
+ *p = save;
+
+ return copy;
}
void find_state(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
-
- // find or add the new state in the existing set of states
- const bool is_new = do_find_state(ctx);
-
- if (is_new) {
- // create new DFA state
- dfa_state_t *t = new dfa_state_t(dfa.nchars);
- dfa.states.push_back(t);
-
- // check if the new state is final
- // see note [at most one final item per closure]
- cclositer_t
- b = ctx.dc_closure.begin(),
- e = ctx.dc_closure.end(),
- f = std::find_if(b, e, clos_t::fin);
- if (f != e) {
- t->tcmd[dfa.nchars] = final_actions(ctx, *f);
- t->rule = f->state->rule;
- }
- }
-
- if (ctx.dc_origin == dfa_t::NIL) {
- // initial state
- dfa.tcmd0 = ctx.dc_actions;
- }
- else {
- dfa_state_t *s = dfa.states[ctx.dc_origin];
- s->arcs[ctx.dc_symbol] = ctx.dc_target;
- s->tcmd[ctx.dc_symbol] = ctx.dc_actions;
- }
-
- ctx.dc_dump.state(ctx, is_new);
+ dfa_t &dfa = ctx.dc_dfa;
+
+ // find or add the new state in the existing set of states
+ const bool is_new = do_find_state(ctx);
+
+ if (is_new) {
+ // create new DFA state
+ dfa_state_t *t = new dfa_state_t(dfa.nchars);
+ dfa.states.push_back(t);
+
+ // check if the new state is final
+ // see note [at most one final item per closure]
+ cclositer_t
+ b = ctx.dc_closure.begin(),
+ e = ctx.dc_closure.end(),
+ f = std::find_if(b, e, clos_t::fin);
+ if (f != e) {
+ t->tcmd[dfa.nchars] = final_actions(ctx, *f);
+ t->rule = f->state->rule;
+ }
+ }
+
+ if (ctx.dc_origin == dfa_t::NIL) {
+ // initial state
+ dfa.tcmd0 = ctx.dc_actions;
+ }
+ else {
+ dfa_state_t *s = dfa.states[ctx.dc_origin];
+ s->arcs[ctx.dc_symbol] = ctx.dc_target;
+ s->tcmd[ctx.dc_symbol] = ctx.dc_actions;
+ }
+
+ ctx.dc_dump.state(ctx, is_new);
}
} // namespace re2c
static void minimization_table(
- size_t *part,
- const std::vector<dfa_state_t*> &states,
- size_t nchars)
+ size_t *part,
+ const std::vector<dfa_state_t*> &states,
+ size_t nchars)
{
- const size_t count = states.size();
+ const size_t count = states.size();
- bool **tbl = new bool*[count];
- tbl[0] = new bool[count * (count - 1) / 2];
- for (size_t i = 0; i < count - 1; ++i)
- {
- tbl[i + 1] = tbl[i] + i;
- }
+ bool **tbl = new bool*[count];
+ tbl[0] = new bool[count * (count - 1) / 2];
+ for (size_t i = 0; i < count - 1; ++i)
+ {
+ tbl[i + 1] = tbl[i] + i;
+ }
- // see note [distinguish states by tags]
- for (size_t i = 0; i < count; ++i)
- {
- dfa_state_t *s1 = states[i];
- for (size_t j = 0; j < i; ++j)
- {
- dfa_state_t *s2 = states[j];
- tbl[i][j] = s1->rule != s2->rule
- || s1->tcid[nchars] != s2->tcid[nchars];
- }
- }
+ // see note [distinguish states by tags]
+ for (size_t i = 0; i < count; ++i)
+ {
+ dfa_state_t *s1 = states[i];
+ for (size_t j = 0; j < i; ++j)
+ {
+ dfa_state_t *s2 = states[j];
+ tbl[i][j] = s1->rule != s2->rule
+ || s1->tcid[nchars] != s2->tcid[nchars];
+ }
+ }
- for (bool loop = true; loop;)
- {
- loop = false;
- for (size_t i = 0; i < count; ++i)
- {
- for (size_t j = 0; j < i; ++j)
- {
- if (!tbl[i][j])
- {
- for (size_t k = 0; k < nchars; ++k)
- {
- size_t oi = states[i]->arcs[k];
- size_t oj = states[j]->arcs[k];
- if (oi < oj)
- {
- std::swap(oi, oj);
- }
- if (states[i]->tcid[k] != states[j]->tcid[k]
- || (oi != oj
- && (oi == dfa_t::NIL
- || oj == dfa_t::NIL
- || tbl[oi][oj])))
- {
- tbl[i][j] = true;
- loop = true;
- break;
- }
- }
- }
- }
- }
- }
+ for (bool loop = true; loop;)
+ {
+ loop = false;
+ for (size_t i = 0; i < count; ++i)
+ {
+ for (size_t j = 0; j < i; ++j)
+ {
+ if (!tbl[i][j])
+ {
+ for (size_t k = 0; k < nchars; ++k)
+ {
+ size_t oi = states[i]->arcs[k];
+ size_t oj = states[j]->arcs[k];
+ if (oi < oj)
+ {
+ std::swap(oi, oj);
+ }
+ if (states[i]->tcid[k] != states[j]->tcid[k]
+ || (oi != oj
+ && (oi == dfa_t::NIL
+ || oj == dfa_t::NIL
+ || tbl[oi][oj])))
+ {
+ tbl[i][j] = true;
+ loop = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
- // Equivalence relation defined by the matrix is transitive
- // by construction. Thus we can simply find the first state
- // which is not distinguishable from current and choose it as a
- // representative: all other states with the same representative
- // have to be equivalent to current state due to transitivity.
- //
- // The only requirement is to deterministically choose the
- // representative: e.g. always choose the one with the lowest
- // index.
- //
- // Note that transitivity is crucial: without it the problem
- // would be equivalent to the clique cover problem.
+ // Equivalence relation defined by the matrix is transitive
+ // by construction. Thus we can simply find the first state
+ // which is not distinguishable from current and choose it as a
+ // representative: all other states with the same representative
+ // have to be equivalent to current state due to transitivity.
+ //
+ // The only requirement is to deterministically choose the
+ // representative: e.g. always choose the one with the lowest
+ // index.
+ //
+ // Note that transitivity is crucial: without it the problem
+ // would be equivalent to the clique cover problem.
- for (size_t i = 0; i < count; ++i)
- {
- part[i] = i;
- for (size_t j = 0; j < i; ++j)
- {
- if (!tbl[i][j])
- {
- part[i] = j;
- break;
- }
- }
- }
+ for (size_t i = 0; i < count; ++i)
+ {
+ part[i] = i;
+ for (size_t j = 0; j < i; ++j)
+ {
+ if (!tbl[i][j])
+ {
+ part[i] = j;
+ break;
+ }
+ }
+ }
- delete[] tbl[0];
- delete[] tbl;
+ delete[] tbl[0];
+ delete[] tbl;
}
static void minimization_moore(
- size_t *part,
- const std::vector<dfa_state_t*> &states,
- size_t nchars)
+ size_t *part,
+ const std::vector<dfa_state_t*> &states,
+ size_t nchars)
{
- const size_t count = states.size();
+ const size_t count = states.size();
- size_t *next = new size_t[count];
+ size_t *next = new size_t[count];
- // see note [distinguish states by tags]
- std::map<std::pair<size_t, tcid_t>, size_t> init;
- for (size_t i = 0; i < count; ++i)
- {
- dfa_state_t *s = states[i];
- std::pair<size_t, tcid_t> key(s->rule, s->tcid[nchars]);
- if (init.insert(std::make_pair(key, i)).second)
- {
- part[i] = i;
- next[i] = dfa_t::NIL;
- }
- else
- {
- const size_t j = init[key];
- part[i] = j;
- next[i] = next[j];
- next[j] = i;
- }
- }
+ // see note [distinguish states by tags]
+ std::map<std::pair<size_t, tcid_t>, size_t> init;
+ for (size_t i = 0; i < count; ++i)
+ {
+ dfa_state_t *s = states[i];
+ std::pair<size_t, tcid_t> key(s->rule, s->tcid[nchars]);
+ if (init.insert(std::make_pair(key, i)).second)
+ {
+ part[i] = i;
+ next[i] = dfa_t::NIL;
+ }
+ else
+ {
+ const size_t j = init[key];
+ part[i] = j;
+ next[i] = next[j];
+ next[j] = i;
+ }
+ }
- size_t *out = new size_t[nchars * count];
- size_t *diff = new size_t[count];
- for (bool loop = true; loop;)
- {
- loop = false;
- for (size_t i = 0; i < count; ++i)
- {
- if (i != part[i] || next[i] == dfa_t::NIL)
- {
- continue;
- }
+ size_t *out = new size_t[nchars * count];
+ size_t *diff = new size_t[count];
+ for (bool loop = true; loop;)
+ {
+ loop = false;
+ for (size_t i = 0; i < count; ++i)
+ {
+ if (i != part[i] || next[i] == dfa_t::NIL)
+ {
+ continue;
+ }
- for (size_t j = i; j != dfa_t::NIL; j = next[j])
- {
- size_t *o = &out[j * nchars];
- size_t *a = states[j]->arcs;
- for (size_t c = 0; c < nchars; ++c)
- {
- o[c] = a[c] == dfa_t::NIL
- ? dfa_t::NIL
- : part[a[c]];
- }
- }
+ for (size_t j = i; j != dfa_t::NIL; j = next[j])
+ {
+ size_t *o = &out[j * nchars];
+ size_t *a = states[j]->arcs;
+ for (size_t c = 0; c < nchars; ++c)
+ {
+ o[c] = a[c] == dfa_t::NIL
+ ? dfa_t::NIL
+ : part[a[c]];
+ }
+ }
- size_t diff_count = 0;
- for (size_t j = i; j != dfa_t::NIL;)
- {
- const size_t j_next = next[j];
- size_t n = 0;
- for (; n < diff_count; ++n)
- {
- size_t k = diff[n];
- if (memcmp(&out[j * nchars],
- &out[k * nchars],
- nchars * sizeof(size_t)) == 0
- && memcmp(states[j]->tcid,
- states[k]->tcid,
- nchars * sizeof(tcid_t)) == 0
- ) {
- part[j] = k;
- next[j] = next[k];
- next[k] = j;
- break;
- }
- }
- if (n == diff_count)
- {
- diff[diff_count++] = j;
- part[j] = j;
- next[j] = dfa_t::NIL;
- }
- j = j_next;
- }
- loop |= diff_count > 1;
- }
- }
- delete[] out;
- delete[] diff;
- delete[] next;
+ size_t diff_count = 0;
+ for (size_t j = i; j != dfa_t::NIL;)
+ {
+ const size_t j_next = next[j];
+ size_t n = 0;
+ for (; n < diff_count; ++n)
+ {
+ size_t k = diff[n];
+ if (memcmp(&out[j * nchars],
+ &out[k * nchars],
+ nchars * sizeof(size_t)) == 0
+ && memcmp(states[j]->tcid,
+ states[k]->tcid,
+ nchars * sizeof(tcid_t)) == 0
+ ) {
+ part[j] = k;
+ next[j] = next[k];
+ next[k] = j;
+ break;
+ }
+ }
+ if (n == diff_count)
+ {
+ diff[diff_count++] = j;
+ part[j] = j;
+ next[j] = dfa_t::NIL;
+ }
+ j = j_next;
+ }
+ loop |= diff_count > 1;
+ }
+ }
+ delete[] out;
+ delete[] diff;
+ delete[] next;
}
void minimization(dfa_t &dfa, dfa_minimization_t type)
{
- const size_t count = dfa.states.size();
+ const size_t count = dfa.states.size();
- size_t *part = new size_t[count];
+ size_t *part = new size_t[count];
- switch (type) {
- case DFA_MINIMIZATION_TABLE:
- minimization_table(part, dfa.states, dfa.nchars); break;
- case DFA_MINIMIZATION_MOORE:
- minimization_moore(part, dfa.states, dfa.nchars); break;
- }
+ switch (type) {
+ case DFA_MINIMIZATION_TABLE:
+ minimization_table(part, dfa.states, dfa.nchars); break;
+ case DFA_MINIMIZATION_MOORE:
+ minimization_moore(part, dfa.states, dfa.nchars); break;
+ }
- size_t *compact = new size_t[count];
- for (size_t i = 0, j = 0; i < count; ++i)
- {
- if (i == part[i])
- {
- compact[i] = j++;
- }
- }
+ size_t *compact = new size_t[count];
+ for (size_t i = 0, j = 0; i < count; ++i)
+ {
+ if (i == part[i])
+ {
+ compact[i] = j++;
+ }
+ }
- size_t new_count = 0;
- for (size_t i = 0; i < count; ++i)
- {
- dfa_state_t *s = dfa.states[i];
- if (i == part[i])
- {
- size_t *arcs = s->arcs;
- for (size_t c = 0; c < dfa.nchars; ++c)
- {
- if (arcs[c] != dfa_t::NIL)
- {
- arcs[c] = compact[part[arcs[c]]];
- }
- }
- dfa.states[new_count++] = s;
- }
- else
- {
- delete s;
- }
- }
- dfa.states.resize(new_count);
+ size_t new_count = 0;
+ for (size_t i = 0; i < count; ++i)
+ {
+ dfa_state_t *s = dfa.states[i];
+ if (i == part[i])
+ {
+ size_t *arcs = s->arcs;
+ for (size_t c = 0; c < dfa.nchars; ++c)
+ {
+ if (arcs[c] != dfa_t::NIL)
+ {
+ arcs[c] = compact[part[arcs[c]]];
+ }
+ }
+ dfa.states[new_count++] = s;
+ }
+ else
+ {
+ delete s;
+ }
+ }
+ dfa.states.resize(new_count);
- delete[] compact;
- delete[] part;
+ delete[] compact;
+ delete[] part;
}
} // namespace re2c
hidx_t tag_history_t::push(hidx_t idx, tag_info_t info)
{
- node_t x = {idx, info};
- nodes.push_back(x);
- return static_cast<hidx_t>(nodes.size() - 1);
+ node_t x = {idx, info};
+ nodes.push_back(x);
+ return static_cast<hidx_t>(nodes.size() - 1);
}
tagver_t tag_history_t::last(hidx_t i, size_t t) const
{
- for (; i != HROOT; i = pred(i)) {
- if (tag(i) == t) return elem(i);
- }
- return TAGVER_ZERO;
+ for (; i != HROOT; i = pred(i)) {
+ if (tag(i) == t) return elem(i);
+ }
+ return TAGVER_ZERO;
}
int32_t tag_history_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const
{
- // compare in reverse, from tail to head: direction makes
- // no difference when comparing for exact coincidence
- for (;;) {
- for (; x != HROOT && tag(x) != t; x = pred(x));
- for (; y != HROOT && tag(y) != t; y = pred(y));
- if (x == HROOT && y == HROOT) return 0;
- if (x == HROOT) return -1;
- if (y == HROOT) return 1;
- if (elem(x) > elem(y)) return -1;
- if (elem(x) < elem(y)) return 1;
- x = pred(x);
- y = pred(y);
- }
+ // compare in reverse, from tail to head: direction makes
+ // no difference when comparing for exact coincidence
+ for (;;) {
+ for (; x != HROOT && tag(x) != t; x = pred(x));
+ for (; y != HROOT && tag(y) != t; y = pred(y));
+ if (x == HROOT && y == HROOT) return 0;
+ if (x == HROOT) return -1;
+ if (y == HROOT) return 1;
+ if (elem(x) > elem(y)) return -1;
+ if (elem(x) < elem(y)) return 1;
+ x = pred(x);
+ y = pred(y);
+ }
}
static void reconstruct_history(const tag_history_t &history,
- tag_path_t &path, hidx_t idx)
+ tag_path_t &path, hidx_t idx)
{
- path.clear();
- for (; idx != HROOT; idx = history.pred(idx)) {
- path.push_back(history.info(idx));
- }
+ path.clear();
+ for (; idx != HROOT; idx = history.pred(idx)) {
+ path.push_back(history.info(idx));
+ }
}
static inline int32_t unpack_longest(int32_t value)
{
- // lower 30 bits
- return value & 0x3fffFFFF;
+ // lower 30 bits
+ return value & 0x3fffFFFF;
}
static inline int32_t unpack_leftmost(int32_t value)
{
- // higher 2 bits
- return value >> 30u;
+ // higher 2 bits
+ return value >> 30u;
}
int32_t precedence(determ_context_t &ctx,
- const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy)
+ const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy)
{
- const hidx_t xl = x.tlook, yl = y.tlook;
- const uint32_t xo = x.origin, yo = y.origin;
-
- if (xl == yl && xo == yo) {
- rhox = rhoy = -1;
- return 0;
- }
-
- tag_history_t &thist = ctx.dc_taghistory;
- tag_path_t &p1 = thist.path1, &p2 = thist.path2;
- reconstruct_history(thist, p1, xl);
- reconstruct_history(thist, p2, yl);
- tag_path_t::const_reverse_iterator
- i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1,
- i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2;
-
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
- size_t nclos = 0;
- const prectable_t *prectbl = NULL;
- const bool fork_frame = xo == yo;
-
- if (fork_frame) {
- // find fork
- for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2);
- }
- else {
- // get precedence table and size of the origin state
- const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
- nclos = k->size;
- prectbl = k->prectbl;
- }
-
- // longest precedence
- if (!fork_frame) {
- rhox = unpack_longest(prectbl[xo * nclos + yo]);
- rhoy = unpack_longest(prectbl[yo * nclos + xo]);
- }
- else {
- rhox = rhoy = std::numeric_limits<int>::max();
- if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height;
- }
- for (g1 = j1; g1 != e1; ++g1) {
- rhox = std::min(rhox, tags[g1->idx].height);
- }
- for (g2 = j2; g2 != e2; ++g2) {
- rhoy = std::min(rhoy, tags[g2->idx].height);
- }
- if (rhox > rhoy) return -1;
- if (rhox < rhoy) return 1;
-
- // leftmost precedence
- if (!fork_frame) {
- return unpack_leftmost(prectbl[xo * nclos + yo]);
- }
- else {
- // equal => not less
- if (j1 == e1 && j2 == e2) return 0;
-
- // shorter => less
- if (j1 == e1) return -1;
- if (j2 == e2) return 1;
-
- const uint32_t idx1 = j1->idx, idx2 = j2->idx;
- const bool neg1 = j1->neg, neg2 = j2->neg;
-
- // can't be both closing
- assert(!(idx1 % 2 == 1 && idx2 % 2 == 1));
-
- // closing vs opening: closing wins
- if (idx1 % 2 == 1) return -1;
- if (idx2 % 2 == 1) return 1;
-
- // can't be both negative
- assert(!(neg1 && neg2));
-
- // positive vs negative: positive wins
- if (neg1) return 1;
- if (neg2) return -1;
-
- // positive vs positive: smaller wins
- // (this case is only possible because multiple
- // top-level RE don't have proper negative tags)
- if (idx1 < idx2) return -1;
- if (idx1 > idx2) return 1;
- }
-
- // unreachable
- assert(false);
- return 0;
+ const hidx_t xl = x.tlook, yl = y.tlook;
+ const uint32_t xo = x.origin, yo = y.origin;
+
+ if (xl == yl && xo == yo) {
+ rhox = rhoy = -1;
+ return 0;
+ }
+
+ tag_history_t &thist = ctx.dc_taghistory;
+ tag_path_t &p1 = thist.path1, &p2 = thist.path2;
+ reconstruct_history(thist, p1, xl);
+ reconstruct_history(thist, p2, yl);
+ tag_path_t::const_reverse_iterator
+ i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1,
+ i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2;
+
+ const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+ size_t nclos = 0;
+ const prectable_t *prectbl = NULL;
+ const bool fork_frame = xo == yo;
+
+ if (fork_frame) {
+ // find fork
+ for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2);
+ }
+ else {
+ // get precedence table and size of the origin state
+ const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
+ nclos = k->size;
+ prectbl = k->prectbl;
+ }
+
+ // longest precedence
+ if (!fork_frame) {
+ rhox = unpack_longest(prectbl[xo * nclos + yo]);
+ rhoy = unpack_longest(prectbl[yo * nclos + xo]);
+ }
+ else {
+ rhox = rhoy = std::numeric_limits<int>::max();
+ if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height;
+ }
+ for (g1 = j1; g1 != e1; ++g1) {
+ rhox = std::min(rhox, tags[g1->idx].height);
+ }
+ for (g2 = j2; g2 != e2; ++g2) {
+ rhoy = std::min(rhoy, tags[g2->idx].height);
+ }
+ if (rhox > rhoy) return -1;
+ if (rhox < rhoy) return 1;
+
+ // leftmost precedence
+ if (!fork_frame) {
+ return unpack_leftmost(prectbl[xo * nclos + yo]);
+ }
+ else {
+ // equal => not less
+ if (j1 == e1 && j2 == e2) return 0;
+
+ // shorter => less
+ if (j1 == e1) return -1;
+ if (j2 == e2) return 1;
+
+ const uint32_t idx1 = j1->idx, idx2 = j2->idx;
+ const bool neg1 = j1->neg, neg2 = j2->neg;
+
+ // can't be both closing
+ assert(!(idx1 % 2 == 1 && idx2 % 2 == 1));
+
+ // closing vs opening: closing wins
+ if (idx1 % 2 == 1) return -1;
+ if (idx2 % 2 == 1) return 1;
+
+ // can't be both negative
+ assert(!(neg1 && neg2));
+
+ // positive vs negative: positive wins
+ if (neg1) return 1;
+ if (neg2) return -1;
+
+ // positive vs positive: smaller wins
+ // (this case is only possible because multiple
+ // top-level RE don't have proper negative tags)
+ if (idx1 < idx2) return -1;
+ if (idx1 > idx2) return 1;
+ }
+
+ // unreachable
+ assert(false);
+ return 0;
}
} // namespace re2c
struct tag_history_t
{
- // the whole tree of tags found by the epsilon-closure
- // (a bunch of separate subtrees for each tag with common root)
- struct node_t {
- hidx_t pred;
- tag_info_t info;
- };
- std::vector<node_t> nodes;
-
- // reconstruct paths for comparison
- tag_path_t path1;
- tag_path_t path2;
-
- tag_history_t();
- hidx_t pred(hidx_t i) const;
- tag_info_t info(hidx_t i) const;
- tagver_t elem(hidx_t i) const;
- size_t tag(hidx_t i) const;
- hidx_t push(hidx_t i, tag_info_t info);
- tagver_t last(hidx_t i, size_t t) const;
- int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const;
-
- FORBID_COPY(tag_history_t);
+ // the whole tree of tags found by the epsilon-closure
+ // (a bunch of separate subtrees for each tag with common root)
+ struct node_t {
+ hidx_t pred;
+ tag_info_t info;
+ };
+ std::vector<node_t> nodes;
+
+ // reconstruct paths for comparison
+ tag_path_t path1;
+ tag_path_t path2;
+
+ tag_history_t();
+ hidx_t pred(hidx_t i) const;
+ tag_info_t info(hidx_t i) const;
+ tagver_t elem(hidx_t i) const;
+ size_t tag(hidx_t i) const;
+ hidx_t push(hidx_t i, tag_info_t info);
+ tagver_t last(hidx_t i, size_t t) const;
+ int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const;
+
+ FORBID_COPY(tag_history_t);
};
} // namespace re2c
struct eqtag_t
{
- size_t ntags;
+ size_t ntags;
- explicit eqtag_t(size_t n): ntags(n) {}
- inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const
- {
- return memcmp(x, y, ntags * sizeof(tagver_t)) == 0;
- }
+ explicit eqtag_t(size_t n): ntags(n) {}
+ inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const
+ {
+ return memcmp(x, y, ntags * sizeof(tagver_t)) == 0;
+ }
};
tagver_table_t::tagver_table_t(size_t n)
- : lookup()
- , ntags(n)
- , buffer(new tagver_t[n])
+ : lookup()
+ , ntags(n)
+ , buffer(new tagver_t[n])
{}
tagver_table_t::~tagver_table_t()
{
- delete[] buffer;
- const size_t n = lookup.size();
- for (uint32_t i = 0; i < n; ++i) {
- free(const_cast<tagver_t*>(lookup[i]));
- }
+ delete[] buffer;
+ const size_t n = lookup.size();
+ for (uint32_t i = 0; i < n; ++i) {
+ free(const_cast<tagver_t*>(lookup[i]));
+ }
}
uint32_t tagver_table_t::insert_const(tagver_t ver)
{
- std::fill(buffer, buffer + ntags, ver);
- return insert(buffer);
+ std::fill(buffer, buffer + ntags, ver);
+ return insert(buffer);
}
uint32_t tagver_table_t::insert_succ(tagver_t fst)
{
- for (uint32_t i = 0; i < ntags; ++i) {
- buffer[i] = fst++;
- }
- return insert(buffer);
+ for (uint32_t i = 0; i < ntags; ++i) {
+ buffer[i] = fst++;
+ }
+ return insert(buffer);
}
uint32_t tagver_table_t::insert(const tagver_t *tags)
{
- const size_t size = ntags * sizeof(tagver_t);
- const uint32_t hash = hash32(0, tags, size);
-
- eqtag_t eq(ntags);
- const uint32_t idx = lookup.find_with(hash, tags, eq);
- if (idx != taglookup_t::NIL) {
- return idx;
- }
-
- tagver_t *copy = static_cast<tagver_t*>(malloc(size));
- memcpy(copy, tags, size);
- return lookup.push(hash, copy);
+ const size_t size = ntags * sizeof(tagver_t);
+ const uint32_t hash = hash32(0, tags, size);
+
+ eqtag_t eq(ntags);
+ const uint32_t idx = lookup.find_with(hash, tags, eq);
+ if (idx != taglookup_t::NIL) {
+ return idx;
+ }
+
+ tagver_t *copy = static_cast<tagver_t*>(malloc(size));
+ memcpy(copy, tags, size);
+ return lookup.push(hash, copy);
}
const tagver_t *tagver_table_t::operator[](uint32_t idx) const
{
- return lookup[idx];
+ return lookup[idx];
}
} // namespace re2c
struct tagver_table_t
{
private:
- typedef lookup_t<const tagver_t*> taglookup_t;
- taglookup_t lookup;
+ typedef lookup_t<const tagver_t*> taglookup_t;
+ taglookup_t lookup;
public:
- const size_t ntags;
- tagver_t *buffer;
-
- explicit tagver_table_t(size_t n);
- ~tagver_table_t();
- uint32_t insert_const(tagver_t ver);
- uint32_t insert_succ(tagver_t fst);
- uint32_t insert(const tagver_t *tags);
- const tagver_t *operator[](uint32_t idx) const;
- FORBID_COPY(tagver_table_t);
+ const size_t ntags;
+ tagver_t *buffer;
+
+ explicit tagver_table_t(size_t n);
+ ~tagver_table_t();
+ uint32_t insert_const(tagver_t ver);
+ uint32_t insert_succ(tagver_t fst);
+ uint32_t insert(const tagver_t *tags);
+ const tagver_t *operator[](uint32_t idx) const;
+ FORBID_COPY(tagver_table_t);
};
} // namespace re2c
bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y)
{
- return x.lhs == y.lhs
- && x.rhs == y.rhs
- && equal_history(x.history, y.history);
+ return x.lhs == y.lhs
+ && x.rhs == y.rhs
+ && equal_history(x.history, y.history);
}
bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g)
{
- for (;;) {
- if (*h != *g) return false;
- if (*h == TAGVER_ZERO) return true;
- ++h; ++g;
- }
+ for (;;) {
+ if (*h != *g) return false;
+ if (*h == TAGVER_ZERO) return true;
+ ++h; ++g;
+ }
}
bool tcmd_t::iscopy(const tcmd_t *x)
{
- return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO;
+ return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO;
}
bool tcmd_t::isset(const tcmd_t *x)
{
- if (x->rhs == TAGVER_ZERO) {
- assert(x->history[0] != TAGVER_ZERO);
- return true;
- }
- return false;
+ if (x->rhs == TAGVER_ZERO) {
+ assert(x->history[0] != TAGVER_ZERO);
+ return true;
+ }
+ return false;
}
bool tcmd_t::isadd(const tcmd_t *x)
{
- return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO;
+ return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO;
}
bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg)
{
- tcmd_t *x0 = *phead, *x, *y0 = NULL, **py;
- bool nontrivial_cycles = false;
-
- // initialize in-degree
- for (x = x0; x; x = x->next) {
- indeg[x->lhs] = indeg[x->rhs] = 0;
- }
- for (x = x0; x; x = x->next) {
- ++indeg[x->rhs];
- }
-
- for (py = &y0;;) {
- // reached end of list
- if (!x0) break;
-
- tcmd_t **px = &x0, **py1 = py;
- for (x = x0; x; x = x->next) {
- if (indeg[x->lhs] == 0) {
- --indeg[x->rhs];
- *py = x;
- py = &x->next;
- } else {
- *px = x;
- px = &x->next;
- }
- }
- *px = NULL;
-
- // only cycles left
- if (py == py1) {
- // look for cycles of length 2 or more
- for (x = x0; x && x->lhs == x->rhs; x = x->next);
- nontrivial_cycles = x != NULL;
- break;
- }
- }
- *py = x0;
-
- *phead = y0;
- return nontrivial_cycles;
+ tcmd_t *x0 = *phead, *x, *y0 = NULL, **py;
+ bool nontrivial_cycles = false;
+
+ // initialize in-degree
+ for (x = x0; x; x = x->next) {
+ indeg[x->lhs] = indeg[x->rhs] = 0;
+ }
+ for (x = x0; x; x = x->next) {
+ ++indeg[x->rhs];
+ }
+
+ for (py = &y0;;) {
+ // reached end of list
+ if (!x0) break;
+
+ tcmd_t **px = &x0, **py1 = py;
+ for (x = x0; x; x = x->next) {
+ if (indeg[x->lhs] == 0) {
+ --indeg[x->rhs];
+ *py = x;
+ py = &x->next;
+ } else {
+ *px = x;
+ px = &x->next;
+ }
+ }
+ *px = NULL;
+
+ // only cycles left
+ if (py == py1) {
+ // look for cycles of length 2 or more
+ for (x = x0; x && x->lhs == x->rhs; x = x->next);
+ nontrivial_cycles = x != NULL;
+ break;
+ }
+ }
+ *py = x0;
+
+ *phead = y0;
+ return nontrivial_cycles;
}
tcpool_t::tcpool_t()
- : alc()
- , index()
+ : alc()
+ , index()
{
- // empty command must have static number zero
- assert(TCID0 == insert(NULL));
+ // empty command must have static number zero
+ assert(TCID0 == insert(NULL));
}
tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs)
{
- tcmd_t *p = alc.alloct<tcmd_t>(1);
- p->next = next;
- p->lhs = lhs;
- p->rhs = rhs;
- p->history[0] = TAGVER_ZERO;
- return p;
+ tcmd_t *p = alc.alloct<tcmd_t>(1);
+ p->next = next;
+ p->lhs = lhs;
+ p->rhs = rhs;
+ p->history[0] = TAGVER_ZERO;
+ return p;
}
tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set)
{
- const size_t size = sizeof(tcmd_t) + sizeof(tagver_t);
- tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
- p->next = next;
- p->lhs = lhs;
- p->rhs = TAGVER_ZERO;
- p->history[0] = set;
- p->history[1] = TAGVER_ZERO;
- return p;
+ const size_t size = sizeof(tcmd_t) + sizeof(tagver_t);
+ tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
+ p->next = next;
+ p->lhs = lhs;
+ p->rhs = TAGVER_ZERO;
+ p->history[0] = set;
+ p->history[1] = TAGVER_ZERO;
+ return p;
}
tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
- const tag_history_t &history, hidx_t hidx, size_t tag)
-{
- size_t hlen = 0;
- for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
- if (history.tag(i) == tag) ++hlen;
- }
-
- const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t);
- tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
- p->next = next;
- p->lhs = lhs;
- p->rhs = rhs;
- tagver_t *h = p->history;
- for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
- if (history.tag(i) == tag) {
- *h++ = history.elem(i);
- }
- }
- *h++ = TAGVER_ZERO;
- return p;
+ const tag_history_t &history, hidx_t hidx, size_t tag)
+{
+ size_t hlen = 0;
+ for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
+ if (history.tag(i) == tag) ++hlen;
+ }
+
+ const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t);
+ tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
+ p->next = next;
+ p->lhs = lhs;
+ p->rhs = rhs;
+ tagver_t *h = p->history;
+ for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
+ if (history.tag(i) == tag) {
+ *h++ = history.elem(i);
+ }
+ }
+ *h++ = TAGVER_ZERO;
+ return p;
}
tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
- const tagver_t *history)
-{
- size_t hlen = 0;
- for (const tagver_t *h = history; *h != TAGVER_ZERO; ++h) ++hlen;
-
- const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t);
- tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
- p->next = next;
- p->lhs = lhs;
- p->rhs = rhs;
- memcpy(p->history, history, (hlen + 1) * sizeof(tagver_t));
- return p;
+ const tagver_t *history)
+{
+ size_t hlen = 0;
+ for (const tagver_t *h = history; *h != TAGVER_ZERO; ++h) ++hlen;
+
+ const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t);
+ tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
+ p->next = next;
+ p->lhs = lhs;
+ p->rhs = rhs;
+ memcpy(p->history, history, (hlen + 1) * sizeof(tagver_t));
+ return p;
}
uint32_t hash_tcmd(const tcmd_t *tcmd)
{
- uint32_t h = 0;
- for (const tcmd_t *p = tcmd; p; p = p->next) {
- h = hash32(h, &p->lhs, sizeof(p->lhs));
- h = hash32(h, &p->rhs, sizeof(p->rhs));
- h = hash32(h, &p->history[0], sizeof(p->history[0]));
- }
- return h;
+ uint32_t h = 0;
+ for (const tcmd_t *p = tcmd; p; p = p->next) {
+ h = hash32(h, &p->lhs, sizeof(p->lhs));
+ h = hash32(h, &p->rhs, sizeof(p->rhs));
+ h = hash32(h, &p->history[0], sizeof(p->history[0]));
+ }
+ return h;
}
struct tcmd_eq_t
{
- bool operator()(const tcmd_t *x, const tcmd_t *y) const
- {
- for (;;) {
- if (!x && !y) return true;
- if (!x || !y) return false;
- if (!tcmd_t::equal(*x, *y)) return false;
- x = x->next;
- y = y->next;
- }
- }
+ bool operator()(const tcmd_t *x, const tcmd_t *y) const
+ {
+ for (;;) {
+ if (!x && !y) return true;
+ if (!x || !y) return false;
+ if (!tcmd_t::equal(*x, *y)) return false;
+ x = x->next;
+ y = y->next;
+ }
+ }
};
tcid_t tcpool_t::insert(const tcmd_t *tcmd)
{
- const uint32_t h = hash_tcmd(tcmd);
+ const uint32_t h = hash_tcmd(tcmd);
- tcmd_eq_t eq;
- size_t id = index.find_with(h, tcmd, eq);
- if (id == index_t::NIL) {
- id = index.push(h, tcmd);
- }
+ tcmd_eq_t eq;
+ size_t id = index.find_with(h, tcmd, eq);
+ if (id == index_t::NIL) {
+ id = index.push(h, tcmd);
+ }
- return static_cast<tcid_t>(id);
+ return static_cast<tcid_t>(id);
}
const tcmd_t *tcpool_t::operator[](tcid_t id) const
{
- return index[id];
+ return index[id];
}
} // namespace re2c
struct tcmd_t
{
- tcmd_t *next;
- tagver_t lhs; // left hand side
- tagver_t rhs; // right hand side
- tagver_t history[1];
-
- static bool equal(const tcmd_t &x, const tcmd_t &y);
- static bool equal_history(const tagver_t *h, const tagver_t *g);
- static bool topsort(tcmd_t **phead, uint32_t *indeg);
- static bool iscopy(const tcmd_t *x);
- static bool isset(const tcmd_t *x);
- static bool isadd(const tcmd_t *x);
+ tcmd_t *next;
+ tagver_t lhs; // left hand side
+ tagver_t rhs; // right hand side
+ tagver_t history[1];
+
+ static bool equal(const tcmd_t &x, const tcmd_t &y);
+ static bool equal_history(const tagver_t *h, const tagver_t *g);
+ static bool topsort(tcmd_t **phead, uint32_t *indeg);
+ static bool iscopy(const tcmd_t *x);
+ static bool isset(const tcmd_t *x);
+ static bool isadd(const tcmd_t *x);
};
typedef uint32_t tcid_t;
class tcpool_t
{
- typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t;
- typedef lookup_t<const tcmd_t*> index_t;
+ typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t;
+ typedef lookup_t<const tcmd_t*> index_t;
- alc_t alc;
- index_t index;
+ alc_t alc;
+ index_t index;
public:
- tcpool_t();
- tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs);
- tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set);
- tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag);
- tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history);
- tcid_t insert(const tcmd_t *tcmd);
- const tcmd_t *operator[](tcid_t id) const;
+ tcpool_t();
+ tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs);
+ tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set);
+ tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag);
+ tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history);
+ tcid_t insert(const tcmd_t *tcmd);
+ const tcmd_t *operator[](tcid_t id) const;
};
} // namespace re2c
int main(int, char *argv[])
{
- conopt_t globopts;
- Opt opts(globopts);
- Warn warn;
+ conopt_t globopts;
+ Opt opts(globopts);
+ Warn warn;
- switch (parse_opts(argv, globopts, opts, warn)) {
- case OK: break;
- case EXIT_OK: return 0;
- case EXIT_FAIL: return 1;
- }
+ switch (parse_opts(argv, globopts, opts, warn)) {
+ case OK: break;
+ case EXIT_OK: return 0;
+ case EXIT_FAIL: return 1;
+ }
- re2c::Input input(opts.source_file);
- if (!input.open()) {
- error("cannot open source file: %s", opts.source_file);
- return 1;
- }
- Scanner scanner(input, warn);
- Output output(warn);
+ re2c::Input input(opts.source_file);
+ if (!input.open()) {
+ error("cannot open source file: %s", opts.source_file);
+ return 1;
+ }
+ Scanner scanner(input, warn);
+ Output output(warn);
- compile(scanner, output, opts);
- if (!output.emit()) return 1;
+ compile(scanner, output, opts);
+ if (!output.emit()) return 1;
- return warn.error() ? 1 : 0;
+ return warn.error() ? 1 : 0;
}
static uint32_t index(const nfa_t &nfa, const nfa_state_t *s)
{
- return static_cast<uint32_t>(s - nfa.states);
+ return static_cast<uint32_t>(s - nfa.states);
}
void dump_nfa(const nfa_t &nfa)
{
- fprintf(stderr,
- "digraph NFA {\n"
- " rankdir=LR\n"
- " node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n"
- " edge[arrowhead=vee fontname=Courier label=\" \"]\n\n");
+ fprintf(stderr,
+ "digraph NFA {\n"
+ " rankdir=LR\n"
+ " node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n"
+ " edge[arrowhead=vee fontname=Courier label=\" \"]\n\n");
- for (uint32_t i = static_cast<uint32_t>(nfa.size); i --> 0;) {
- const nfa_state_t *n = &nfa.states[i];
+ for (uint32_t i = static_cast<uint32_t>(nfa.size); i --> 0;) {
+ const nfa_state_t *n = &nfa.states[i];
- fprintf(stderr, " n%u [label=\"%u(%d)\"]", i, i, n->indeg);
- if (n->type == nfa_state_t::FIN) {
- fprintf(stderr, " [fillcolor=gray]");
- }
- fprintf(stderr, "\n");
+ fprintf(stderr, " n%u [label=\"%u(%d)\"]", i, i, n->indeg);
+ if (n->type == nfa_state_t::FIN) {
+ fprintf(stderr, " [fillcolor=gray]");
+ }
+ fprintf(stderr, "\n");
- switch (n->type) {
- case nfa_state_t::ALT:
- fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->alt.out1));
- fprintf(stderr, " n%u -> n%u [color=lightgray]\n", i, index(nfa, n->alt.out2));
- break;
- case nfa_state_t::RAN: {
- fprintf(stderr, " n%u -> n%u [label=\"", i, index(nfa, n->ran.out));
- for (const Range *r = n->ran.ran; r; r = r->next()) {
- const uint32_t
- l = r->lower(),
- u = r->upper() - 1;
- fprintf(stderr, "%u", l);
- if (u > l) fprintf(stderr, "-%u", u);
- if (r->next()) fprintf(stderr, ",");
- }
- fprintf(stderr, "\"]\n");
- break;
- }
- case nfa_state_t::TAG: {
- const Tag &tag = nfa.tags[n->tag.info.idx];
- fprintf(stderr, " n%u -> n%u [label=\"/", i, index(nfa, n->tag.out));
- if (capture(tag)) {
- fprintf(stderr, "%u", (uint32_t)tag.ncap);
- } else if (!trailing(tag)) {
- fprintf(stderr, "%s", tag.name->c_str());
- }
- if (n->tag.info.neg) {
- fprintf(stderr, "↓");
- } else {
- fprintf(stderr, "↑");
- }
- fprintf(stderr, "(%d)", tag.height);
- fprintf(stderr, "\"]\n");
- break;
- }
- case nfa_state_t::FIN:
- break;
- case nfa_state_t::NIL:
- fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->nil.out));
- break;
- }
- }
+ switch (n->type) {
+ case nfa_state_t::ALT:
+ fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->alt.out1));
+ fprintf(stderr, " n%u -> n%u [color=lightgray]\n", i, index(nfa, n->alt.out2));
+ break;
+ case nfa_state_t::RAN: {
+ fprintf(stderr, " n%u -> n%u [label=\"", i, index(nfa, n->ran.out));
+ for (const Range *r = n->ran.ran; r; r = r->next()) {
+ const uint32_t
+ l = r->lower(),
+ u = r->upper() - 1;
+ fprintf(stderr, "%u", l);
+ if (u > l) fprintf(stderr, "-%u", u);
+ if (r->next()) fprintf(stderr, ",");
+ }
+ fprintf(stderr, "\"]\n");
+ break;
+ }
+ case nfa_state_t::TAG: {
+ const Tag &tag = nfa.tags[n->tag.info.idx];
+ fprintf(stderr, " n%u -> n%u [label=\"/", i, index(nfa, n->tag.out));
+ if (capture(tag)) {
+ fprintf(stderr, "%u", (uint32_t)tag.ncap);
+ } else if (!trailing(tag)) {
+ fprintf(stderr, "%s", tag.name->c_str());
+ }
+ if (n->tag.info.neg) {
+ fprintf(stderr, "↓");
+ } else {
+ fprintf(stderr, "↑");
+ }
+ fprintf(stderr, "(%d)", tag.height);
+ fprintf(stderr, "\"]\n");
+ break;
+ }
+ case nfa_state_t::FIN:
+ break;
+ case nfa_state_t::NIL:
+ fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->nil.out));
+ break;
+ }
+ }
- fprintf(stderr, "}\n");
+ fprintf(stderr, "}\n");
}
} // namespace re2c
static size_t estimate(const RE *re)
{
- switch (re->type) {
- case RE::NIL: return 0;
- case RE::SYM: return 1;
- case RE::TAG: return 1;
- case RE::ALT:
- return estimate(re->alt.re1)
- + estimate(re->alt.re2)
- + 1;
- case RE::CAT:
- return estimate(re->cat.re1)
- + estimate(re->cat.re2);
- case RE::ITER: {
- const size_t
- iter = estimate(re->iter.re),
- min = re->iter.min,
- max = re->iter.max;
- return max == AST::MANY
- ? iter * min + 1
- : iter * max + (max - min);
- }
- }
- return 0; /* unreachable */
+ switch (re->type) {
+ case RE::NIL: return 0;
+ case RE::SYM: return 1;
+ case RE::TAG: return 1;
+ case RE::ALT:
+ return estimate(re->alt.re1)
+ + estimate(re->alt.re2)
+ + 1;
+ case RE::CAT:
+ return estimate(re->cat.re1)
+ + estimate(re->cat.re2);
+ case RE::ITER: {
+ const size_t
+ iter = estimate(re->iter.re),
+ min = re->iter.min,
+ max = re->iter.max;
+ return max == AST::MANY
+ ? iter * min + 1
+ : iter * max + (max - min);
+ }
+ }
+ return 0; /* unreachable */
}
size_t estimate_size(const std::vector<RE*> &res)
{
- const size_t nre = res.size();
- size_t size = nre - 1;
- for (size_t i = 0; i < nre; ++i) {
- size += estimate(res[i]) + 1;
- }
- return size;
+ const size_t nre = res.size();
+ size_t size = nre - 1;
+ for (size_t i = 0; i < nre; ++i) {
+ size += estimate(res[i]) + 1;
+ }
+ return size;
}
} // namespace re2c
struct nfa_state_t
{
- enum type_t {ALT, RAN, TAG, FIN, NIL} type;
- union
- {
- struct
- {
- nfa_state_t *out1;
- nfa_state_t *out2;
- } alt;
- struct
- {
- nfa_state_t *out;
- const Range *ran;
- } ran;
- struct
- {
- nfa_state_t *out;
- tag_info_t info;
- } tag;
- struct
- {
- nfa_state_t *out;
- } nil;
- };
- size_t rule;
-
- // stuff needed for GOR1
- uint32_t clos;
- gor_status_t status : 2; // values 0, 1, 2
- uint32_t arcidx : 2; // maximum out-dergee is 2
- uint32_t active : 1; // boolean
- uint32_t indeg : 27; // the rest; we are unlikely to have more than 2^27 states
-
- void init(size_t r)
- {
- rule = r;
- clos = NOCLOS;
- status = GOR_NOPASS;
- arcidx = 0;
- active = 0;
- indeg = 0;
- }
-
- void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2)
- {
- type = ALT;
- alt.out1 = s1;
- alt.out2 = s2;
- init(r);
- }
- void make_ran(size_t r, nfa_state_t *s, const Range *p)
- {
- type = RAN;
- ran.out = s;
- ran.ran = p;
- init(r);
- }
- void make_tag(size_t r, nfa_state_t *s, tag_info_t info)
- {
- type = TAG;
- tag.out = s;
- tag.info = info;
- init(r);
- }
- void make_fin(size_t r)
- {
- type = FIN;
- init(r);
- }
- void make_nil(size_t r, nfa_state_t *s)
- {
- type = NIL;
- nil.out = s;
- init(r);
- }
+ enum type_t {ALT, RAN, TAG, FIN, NIL} type;
+ union
+ {
+ struct
+ {
+ nfa_state_t *out1;
+ nfa_state_t *out2;
+ } alt;
+ struct
+ {
+ nfa_state_t *out;
+ const Range *ran;
+ } ran;
+ struct
+ {
+ nfa_state_t *out;
+ tag_info_t info;
+ } tag;
+ struct
+ {
+ nfa_state_t *out;
+ } nil;
+ };
+ size_t rule;
+
+ // stuff needed for GOR1
+ uint32_t clos;
+ gor_status_t status : 2; // values 0, 1, 2
+ uint32_t arcidx : 2; // maximum out-dergee is 2
+ uint32_t active : 1; // boolean
+ uint32_t indeg : 27; // the rest; we are unlikely to have more than 2^27 states
+
+ void init(size_t r)
+ {
+ rule = r;
+ clos = NOCLOS;
+ status = GOR_NOPASS;
+ arcidx = 0;
+ active = 0;
+ indeg = 0;
+ }
+
+ void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2)
+ {
+ type = ALT;
+ alt.out1 = s1;
+ alt.out2 = s2;
+ init(r);
+ }
+ void make_ran(size_t r, nfa_state_t *s, const Range *p)
+ {
+ type = RAN;
+ ran.out = s;
+ ran.ran = p;
+ init(r);
+ }
+ void make_tag(size_t r, nfa_state_t *s, tag_info_t info)
+ {
+ type = TAG;
+ tag.out = s;
+ tag.info = info;
+ init(r);
+ }
+ void make_fin(size_t r)
+ {
+ type = FIN;
+ init(r);
+ }
+ void make_nil(size_t r, nfa_state_t *s)
+ {
+ type = NIL;
+ nil.out = s;
+ init(r);
+ }
};
struct nfa_t
{
- size_t max_size;
- size_t size;
- nfa_state_t *states;
- std::vector<uint32_t> &charset;
- std::valarray<Rule> &rules;
- std::vector<Tag> &tags;
- nfa_state_t *root;
-
- explicit nfa_t(const RESpec &spec);
- ~nfa_t();
-
- FORBID_COPY(nfa_t);
+ size_t max_size;
+ size_t size;
+ nfa_state_t *states;
+ std::vector<uint32_t> &charset;
+ std::valarray<Rule> &rules;
+ std::vector<Tag> &tags;
+ nfa_state_t *root;
+
+ explicit nfa_t(const RESpec &spec);
+ ~nfa_t();
+
+ FORBID_COPY(nfa_t);
};
size_t estimate_size(const std::vector<RE*> &res);
static nfa_state_t *re_to_nfa(nfa_t &nfa, size_t nrule, const RE *re, nfa_state_t *t)
{
- nfa_state_t *s = NULL;
- switch (re->type) {
- case RE::NIL:
- s = t;
- break;
- case RE::SYM:
- s = &nfa.states[nfa.size++];
- s->make_ran(nrule, t, re->sym);
- break;
- case RE::ALT: {
- nfa_state_t
- *s1 = re_to_nfa(nfa, nrule, re->alt.re1, t),
- *s2 = re_to_nfa(nfa, nrule, re->alt.re2, t);
- s = &nfa.states[nfa.size++];
- s->make_alt(nrule, s1, s2);
- break;
- }
- case RE::CAT:
- s = re_to_nfa(nfa, nrule, re->cat.re2, t);
- s = re_to_nfa(nfa, nrule, re->cat.re1, s);
- break;
- case RE::ITER: {
- const uint32_t
- min = re->iter.min,
- max = re->iter.max;
- const RE *iter = re->iter.re;
- // see note [counted repetition and iteration expansion]
- if (max == AST::MANY) {
- nfa_state_t *q = &nfa.states[nfa.size++];
- s = re_to_nfa(nfa, nrule, iter, q);
- q->make_alt(nrule, s, t);
- } else {
- s = re_to_nfa(nfa, nrule, iter, t);
- for (uint32_t i = min; i < max; ++i) {
- nfa_state_t *q = &nfa.states[nfa.size++];
- q->make_alt(nrule, s, t);
- s = re_to_nfa(nfa, nrule, iter, q);
- }
- }
- for (uint32_t i = 1; i < min; ++i) {
- s = re_to_nfa(nfa, nrule, iter, s);
- }
- break;
- }
- case RE::TAG: {
- const Tag &tag = nfa.tags[re->tag.idx];
- if (fixed(tag) && !capture(tag)) {
- s = t;
- } else {
- s = &nfa.states[nfa.size++];
- s->make_tag(nrule, t, re->tag);
- }
- break;
- }
- }
- return s;
+ nfa_state_t *s = NULL;
+ switch (re->type) {
+ case RE::NIL:
+ s = t;
+ break;
+ case RE::SYM:
+ s = &nfa.states[nfa.size++];
+ s->make_ran(nrule, t, re->sym);
+ break;
+ case RE::ALT: {
+ nfa_state_t
+ *s1 = re_to_nfa(nfa, nrule, re->alt.re1, t),
+ *s2 = re_to_nfa(nfa, nrule, re->alt.re2, t);
+ s = &nfa.states[nfa.size++];
+ s->make_alt(nrule, s1, s2);
+ break;
+ }
+ case RE::CAT:
+ s = re_to_nfa(nfa, nrule, re->cat.re2, t);
+ s = re_to_nfa(nfa, nrule, re->cat.re1, s);
+ break;
+ case RE::ITER: {
+ const uint32_t
+ min = re->iter.min,
+ max = re->iter.max;
+ const RE *iter = re->iter.re;
+ // see note [counted repetition and iteration expansion]
+ if (max == AST::MANY) {
+ nfa_state_t *q = &nfa.states[nfa.size++];
+ s = re_to_nfa(nfa, nrule, iter, q);
+ q->make_alt(nrule, s, t);
+ } else {
+ s = re_to_nfa(nfa, nrule, iter, t);
+ for (uint32_t i = min; i < max; ++i) {
+ nfa_state_t *q = &nfa.states[nfa.size++];
+ q->make_alt(nrule, s, t);
+ s = re_to_nfa(nfa, nrule, iter, q);
+ }
+ }
+ for (uint32_t i = 1; i < min; ++i) {
+ s = re_to_nfa(nfa, nrule, iter, s);
+ }
+ break;
+ }
+ case RE::TAG: {
+ const Tag &tag = nfa.tags[re->tag.idx];
+ if (fixed(tag) && !capture(tag)) {
+ s = t;
+ } else {
+ s = &nfa.states[nfa.size++];
+ s->make_tag(nrule, t, re->tag);
+ }
+ break;
+ }
+ }
+ return s;
}
void calc_indegrees(nfa_state_t *n)
{
- ++n->indeg;
- if (n->indeg > 1) return;
+ ++n->indeg;
+ if (n->indeg > 1) return;
- switch (n->type) {
- case nfa_state_t::NIL:
- calc_indegrees(n->nil.out);
- break;
- case nfa_state_t::ALT:
- calc_indegrees(n->alt.out1);
- calc_indegrees(n->alt.out2);
- break;
- case nfa_state_t::TAG:
- calc_indegrees(n->tag.out);
- break;
- case nfa_state_t::RAN:
- calc_indegrees(n->ran.out);
- case nfa_state_t::FIN:
- break;
- }
+ switch (n->type) {
+ case nfa_state_t::NIL:
+ calc_indegrees(n->nil.out);
+ break;
+ case nfa_state_t::ALT:
+ calc_indegrees(n->alt.out1);
+ calc_indegrees(n->alt.out2);
+ break;
+ case nfa_state_t::TAG:
+ calc_indegrees(n->tag.out);
+ break;
+ case nfa_state_t::RAN:
+ calc_indegrees(n->ran.out);
+ case nfa_state_t::FIN:
+ break;
+ }
}
nfa_t::nfa_t(const RESpec &spec)
- : max_size(estimate_size(spec.res))
- , size(0)
- , states(new nfa_state_t[max_size])
- , charset(spec.charset)
- , rules(spec.rules)
- , tags(spec.tags)
- , root(NULL)
+ : max_size(estimate_size(spec.res))
+ , size(0)
+ , states(new nfa_state_t[max_size])
+ , charset(spec.charset)
+ , rules(spec.rules)
+ , tags(spec.tags)
+ , root(NULL)
{
- const size_t nre = spec.res.size();
+ const size_t nre = spec.res.size();
- if (nre == 0) return;
+ if (nre == 0) return;
- for (size_t i = 0; i < nre; ++i) {
- nfa_state_t *s = &states[size++];
- s->make_fin(i);
- s = re_to_nfa(*this, i, spec.res[i], s);
+ for (size_t i = 0; i < nre; ++i) {
+ nfa_state_t *s = &states[size++];
+ s->make_fin(i);
+ s = re_to_nfa(*this, i, spec.res[i], s);
- if (root) {
- nfa_state_t *t = &states[size++];
- t->make_alt(i, root, s);
- root = t;
- } else {
- root = s;
- }
- }
+ if (root) {
+ nfa_state_t *t = &states[size++];
+ t->make_alt(i, root, s);
+ root = t;
+ } else {
+ root = s;
+ }
+ }
- calc_indegrees(root);
+ calc_indegrees(root);
}
nfa_t::~nfa_t()
{
- delete[] states;
+ delete[] states;
}
} // namespace re2c
RESpec::RESpec(const std::vector<ASTRule> &ast, const opt_t *o, Warn &w)
- : alc()
- , res()
- , charset(*new std::vector<uint32_t>)
- , tags(*new std::vector<Tag>)
- , rules(*new std::valarray<Rule>(ast.size()))
- , opts(o)
- , warn(w)
+ : alc()
+ , res()
+ , charset(*new std::vector<uint32_t>)
+ , tags(*new std::vector<Tag>)
+ , rules(*new std::valarray<Rule>(ast.size()))
+ , opts(o)
+ , warn(w)
{
- for (size_t i = 0; i < ast.size(); ++i) {
- size_t ltag = tags.size(), ncap = 0;
- res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0));
- init_rule(rules[i], ast[i].code, tags, ltag, ncap);
- }
+ for (size_t i = 0; i < ast.size(); ++i) {
+ size_t ltag = tags.size(), ncap = 0;
+ res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0));
+ init_rule(rules[i], ast[i].code, tags, ltag, ncap);
+ }
}
bool has_tags(const AST *ast)
{
- switch (ast->type) {
- case AST::NIL:
- case AST::STR:
- case AST::CLS:
- case AST::DOT:
- case AST::DEFAULT:
- case AST::DIFF: return false;
- case AST::TAG:
- case AST::CAP: return true;
- case AST::ALT: return has_tags(ast->alt.ast1) || has_tags(ast->alt.ast2);
- case AST::CAT: return has_tags(ast->cat.ast1) || has_tags(ast->cat.ast2);
- case AST::REF: return has_tags(ast->ref.ast);
- case AST::ITER: return has_tags(ast->iter.ast);
- }
- return false; /* unreachable */
+ switch (ast->type) {
+ case AST::NIL:
+ case AST::STR:
+ case AST::CLS:
+ case AST::DOT:
+ case AST::DEFAULT:
+ case AST::DIFF: return false;
+ case AST::TAG:
+ case AST::CAP: return true;
+ case AST::ALT: return has_tags(ast->alt.ast1) || has_tags(ast->alt.ast2);
+ case AST::CAT: return has_tags(ast->cat.ast1) || has_tags(ast->cat.ast2);
+ case AST::REF: return has_tags(ast->ref.ast);
+ case AST::ITER: return has_tags(ast->iter.ast);
+ }
+ return false; /* unreachable */
}
RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap, int32_t height)
{
- RE::alc_t &alc = spec.alc;
- std::vector<Tag> &tags = spec.tags;
- const opt_t *opts = spec.opts;
- Warn &warn = spec.warn;
-
- if (ast->type != AST::CAP && ast->type != AST::REF) ++height;
-
- switch (ast->type) {
- case AST::NIL:
- return re_nil(alc);
- case AST::STR: {
- const bool icase = opts->bCaseInsensitive
- || (ast->str.icase != opts->bCaseInverted);
- RE *x = NULL;
- std::vector<ASTChar>::const_iterator
- i = ast->str.chars->begin(),
- e = ast->str.chars->end();
- for (; i != e; ++i) {
- x = re_cat(alc, x, icase
- ? re_ichar(alc, ast->line, i->column, i->chr, opts)
- : re_schar(alc, ast->line, i->column, i->chr, opts));
- }
- return x ? x : re_nil(alc);
- }
- case AST::CLS: {
- Range *r = NULL;
- std::vector<ASTRange>::const_iterator
- i = ast->cls.ranges->begin(),
- e = ast->cls.ranges->end();
- for (; i != e; ++i) {
- Range *s = opts->encoding.encodeRange(i->lower, i->upper);
- if (!s) fatal_lc(ast->line, i->column,
- "bad code point range: '0x%X - 0x%X'", i->lower, i->upper);
- r = Range::add(r, s);
- }
- if (ast->cls.negated) {
- r = Range::sub(opts->encoding.fullRange(), r);
- }
- return re_class(alc, ast->line, ast->column, r, opts, warn);
- }
- case AST::DOT: {
- uint32_t c = '\n';
- if (!opts->encoding.encode(c)) {
- fatal_lc(ast->line, ast->column, "bad code point: '0x%X'", c);
- }
- return re_class(alc, ast->line, ast->column,
- Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn);
- }
- case AST::DEFAULT:
- // see note [default regexp]
- return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits()));
- case AST::ALT: {
- RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y;
- if (opts->posix_captures && has_tags(ast)) {
- // see note [POSIX subexpression hierarchy]
- if (ast->cat.ast1->type != AST::CAP) {
- t1 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
- t2 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height));
- }
- if (ast->cat.ast2->type != AST::CAP) {
- t3 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
- t4 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height));
- }
- }
- x = ast_to_re(spec, ast->alt.ast1, ncap, height);
- x = re_cat(alc, t1, re_cat(alc, x, t2));
- y = ast_to_re(spec, ast->alt.ast2, ncap, height);
- y = re_cat(alc, t3, re_cat(alc, y, t4));
- return re_alt(alc, x, y);
- }
- case AST::DIFF: {
- RE *x = ast_to_re(spec, ast->diff.ast1, ncap, height);
- RE *y = ast_to_re(spec, ast->diff.ast2, ncap, height);
- if (x->type != RE::SYM || y->type != RE::SYM) {
- fatal_lc(ast->line, ast->column, "can only difference char sets");
- }
- return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn);
- }
- case AST::CAT: {
- RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y;
- if (opts->posix_captures && has_tags(ast)) {
- // see note [POSIX subexpression hierarchy]
- if (ast->cat.ast1->type != AST::CAP) {
- t1 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
- t2 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height));
- }
- if (ast->cat.ast2->type != AST::CAP) {
- t3 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
- t4 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(Tag::FICTIVE, false, height));
- }
- }
- x = ast_to_re(spec, ast->cat.ast1, ncap, height);
- x = re_cat(alc, t1, re_cat(alc, x, t2));
- y = ast_to_re(spec, ast->cat.ast2, ncap, height);
- y = re_cat(alc, t3, re_cat(alc, y, t4));
- return re_cat(alc, x, y);
- }
- case AST::TAG: {
- if (ast->tag.name && !opts->tags) {
- fatal_lc(ast->line, ast->column,
- "tags are only allowed with '-T, --tags' option");
- }
- if (opts->posix_captures) {
- fatal_lc(ast->line, ast->column,
- "simple tags are not allowed with '--posix-captures' option");
- }
- RE *t = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(ast->tag.name, ast->tag.history, height));
- return t;
- }
- case AST::CAP: {
- if (!opts->posix_captures) {
- return ast_to_re(spec, ast->cap, ncap, height);
- }
- const AST *x = ast->cap;
- if (x->type == AST::REF) x = x->ref.ast;
-
- RE *t1 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(2 * ncap, false, height + 1));
-
- RE *t2 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(2 * ncap + 1, false, height));
-
- ++ncap;
- return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, height), t2));
- }
- case AST::REF:
- if (!opts->posix_captures) {
- return ast_to_re(spec, ast->ref.ast, ncap, height);
- }
- fatal_l(ast->line,
- "implicit grouping is forbidden with '--posix-captures'"
- " option, please wrap '%s' in capturing parenthesis",
- ast->ref.name->c_str());
- return NULL;
- case AST::ITER: {
- const uint32_t
- n = ast->iter.min,
- n1 = std::max(n, 1u),
- m = std::max(n, ast->iter.max);
- const AST *x = ast->iter.ast;
-
- RE *t1 = NULL, *t2 = NULL;
- if (opts->posix_captures && x->type == AST::CAP) {
- x = x->cap;
- if (x->type == AST::REF) x = x->ref.ast;
-
- t1 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(2 * ncap, m > 1, height + 1));
-
- t2 = re_tag(alc, tags.size(), false);
- tags.push_back(Tag(2 * ncap + 1, m > 1, height));
-
- ++ncap;
- }
-
- RE *y = NULL;
- if (m == 0) {
- y = re_cat(alc, t1, t2);
- } else if (m == 1) {
- y = ast_to_re(spec, x, ncap, height);
- y = re_cat(alc, t1, re_cat(alc, y, t2));
- } else {
- y = ast_to_re(spec, x, ncap, height);
- y = re_cat(alc, t1, y);
- y = re_cat(alc, y, t2);
- y = re_iter(alc, y, n1, m);
- }
- if (n == 0) {
- y = re_alt(alc, y, re_nil(alc));
- }
- return y;
- }
- }
- return NULL; /* unreachable */
+ RE::alc_t &alc = spec.alc;
+ std::vector<Tag> &tags = spec.tags;
+ const opt_t *opts = spec.opts;
+ Warn &warn = spec.warn;
+
+ if (ast->type != AST::CAP && ast->type != AST::REF) ++height;
+
+ switch (ast->type) {
+ case AST::NIL:
+ return re_nil(alc);
+ case AST::STR: {
+ const bool icase = opts->bCaseInsensitive
+ || (ast->str.icase != opts->bCaseInverted);
+ RE *x = NULL;
+ std::vector<ASTChar>::const_iterator
+ i = ast->str.chars->begin(),
+ e = ast->str.chars->end();
+ for (; i != e; ++i) {
+ x = re_cat(alc, x, icase
+ ? re_ichar(alc, ast->line, i->column, i->chr, opts)
+ : re_schar(alc, ast->line, i->column, i->chr, opts));
+ }
+ return x ? x : re_nil(alc);
+ }
+ case AST::CLS: {
+ Range *r = NULL;
+ std::vector<ASTRange>::const_iterator
+ i = ast->cls.ranges->begin(),
+ e = ast->cls.ranges->end();
+ for (; i != e; ++i) {
+ Range *s = opts->encoding.encodeRange(i->lower, i->upper);
+ if (!s) fatal_lc(ast->line, i->column,
+ "bad code point range: '0x%X - 0x%X'", i->lower, i->upper);
+ r = Range::add(r, s);
+ }
+ if (ast->cls.negated) {
+ r = Range::sub(opts->encoding.fullRange(), r);
+ }
+ return re_class(alc, ast->line, ast->column, r, opts, warn);
+ }
+ case AST::DOT: {
+ uint32_t c = '\n';
+ if (!opts->encoding.encode(c)) {
+ fatal_lc(ast->line, ast->column, "bad code point: '0x%X'", c);
+ }
+ return re_class(alc, ast->line, ast->column,
+ Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn);
+ }
+ case AST::DEFAULT:
+ // see note [default regexp]
+ return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits()));
+ case AST::ALT: {
+ RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y;
+ if (opts->posix_captures && has_tags(ast)) {
+ // see note [POSIX subexpression hierarchy]
+ if (ast->cat.ast1->type != AST::CAP) {
+ t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
+ t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height));
+ }
+ if (ast->cat.ast2->type != AST::CAP) {
+ t3 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
+ t4 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height));
+ }
+ }
+ x = ast_to_re(spec, ast->alt.ast1, ncap, height);
+ x = re_cat(alc, t1, re_cat(alc, x, t2));
+ y = ast_to_re(spec, ast->alt.ast2, ncap, height);
+ y = re_cat(alc, t3, re_cat(alc, y, t4));
+ return re_alt(alc, x, y);
+ }
+ case AST::DIFF: {
+ RE *x = ast_to_re(spec, ast->diff.ast1, ncap, height);
+ RE *y = ast_to_re(spec, ast->diff.ast2, ncap, height);
+ if (x->type != RE::SYM || y->type != RE::SYM) {
+ fatal_lc(ast->line, ast->column, "can only difference char sets");
+ }
+ return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn);
+ }
+ case AST::CAT: {
+ RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y;
+ if (opts->posix_captures && has_tags(ast)) {
+ // see note [POSIX subexpression hierarchy]
+ if (ast->cat.ast1->type != AST::CAP) {
+ t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
+ t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height));
+ }
+ if (ast->cat.ast2->type != AST::CAP) {
+ t3 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height + 1));
+ t4 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE, false, height));
+ }
+ }
+ x = ast_to_re(spec, ast->cat.ast1, ncap, height);
+ x = re_cat(alc, t1, re_cat(alc, x, t2));
+ y = ast_to_re(spec, ast->cat.ast2, ncap, height);
+ y = re_cat(alc, t3, re_cat(alc, y, t4));
+ return re_cat(alc, x, y);
+ }
+ case AST::TAG: {
+ if (ast->tag.name && !opts->tags) {
+ fatal_lc(ast->line, ast->column,
+ "tags are only allowed with '-T, --tags' option");
+ }
+ if (opts->posix_captures) {
+ fatal_lc(ast->line, ast->column,
+ "simple tags are not allowed with '--posix-captures' option");
+ }
+ RE *t = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(ast->tag.name, ast->tag.history, height));
+ return t;
+ }
+ case AST::CAP: {
+ if (!opts->posix_captures) {
+ return ast_to_re(spec, ast->cap, ncap, height);
+ }
+ const AST *x = ast->cap;
+ if (x->type == AST::REF) x = x->ref.ast;
+
+ RE *t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(2 * ncap, false, height + 1));
+
+ RE *t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(2 * ncap + 1, false, height));
+
+ ++ncap;
+ return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, height), t2));
+ }
+ case AST::REF:
+ if (!opts->posix_captures) {
+ return ast_to_re(spec, ast->ref.ast, ncap, height);
+ }
+ fatal_l(ast->line,
+ "implicit grouping is forbidden with '--posix-captures'"
+ " option, please wrap '%s' in capturing parenthesis",
+ ast->ref.name->c_str());
+ return NULL;
+ case AST::ITER: {
+ const uint32_t
+ n = ast->iter.min,
+ n1 = std::max(n, 1u),
+ m = std::max(n, ast->iter.max);
+ const AST *x = ast->iter.ast;
+
+ RE *t1 = NULL, *t2 = NULL;
+ if (opts->posix_captures && x->type == AST::CAP) {
+ x = x->cap;
+ if (x->type == AST::REF) x = x->ref.ast;
+
+ t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(2 * ncap, m > 1, height + 1));
+
+ t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(2 * ncap + 1, m > 1, height));
+
+ ++ncap;
+ }
+
+ RE *y = NULL;
+ if (m == 0) {
+ y = re_cat(alc, t1, t2);
+ } else if (m == 1) {
+ y = ast_to_re(spec, x, ncap, height);
+ y = re_cat(alc, t1, re_cat(alc, y, t2));
+ } else {
+ y = ast_to_re(spec, x, ncap, height);
+ y = re_cat(alc, t1, y);
+ y = re_cat(alc, y, t2);
+ y = re_iter(alc, y, n1, m);
+ }
+ if (n == 0) {
+ y = re_alt(alc, y, re_nil(alc));
+ }
+ return y;
+ }
+ }
+ return NULL; /* unreachable */
}
RE *re_schar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts)
{
- if (!opts->encoding.encode(c)) {
- fatal_lc(line, column, "bad code point: '0x%X'", c);
- }
- switch (opts->encoding.type()) {
- case Enc::UTF16:
- return UTF16Symbol(alc, c);
- case Enc::UTF8:
- return UTF8Symbol(alc, c);
- case Enc::ASCII:
- case Enc::EBCDIC:
- case Enc::UTF32:
- case Enc::UCS2:
- return re_sym(alc, Range::sym(c));
- }
- return NULL; /* unreachable */
+ if (!opts->encoding.encode(c)) {
+ fatal_lc(line, column, "bad code point: '0x%X'", c);
+ }
+ switch (opts->encoding.type()) {
+ case Enc::UTF16:
+ return UTF16Symbol(alc, c);
+ case Enc::UTF8:
+ return UTF8Symbol(alc, c);
+ case Enc::ASCII:
+ case Enc::EBCDIC:
+ case Enc::UTF32:
+ case Enc::UCS2:
+ return re_sym(alc, Range::sym(c));
+ }
+ return NULL; /* unreachable */
}
RE *re_ichar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts)
{
- if (is_alpha(c)) {
- return re_alt(alc,
- re_schar(alc, line, column, to_lower_unsafe(c), opts),
- re_schar(alc, line, column, to_upper_unsafe(c), opts));
- } else {
- return re_schar(alc, line, column, c, opts);
- }
+ if (is_alpha(c)) {
+ return re_alt(alc,
+ re_schar(alc, line, column, to_lower_unsafe(c), opts),
+ re_schar(alc, line, column, to_upper_unsafe(c), opts));
+ } else {
+ return re_schar(alc, line, column, c, opts);
+ }
}
RE *re_class(RE::alc_t &alc, uint32_t line, uint32_t column, const Range *r, const opt_t *opts, Warn &warn)
{
- if (!r) {
- switch (opts->empty_class_policy) {
- case EMPTY_CLASS_MATCH_EMPTY:
- warn.empty_class(line);
- return re_nil(alc);
- case EMPTY_CLASS_MATCH_NONE:
- warn.empty_class(line);
- break;
- case EMPTY_CLASS_ERROR:
- fatal_lc(line, column, "empty character class");
- }
- }
- switch (opts->encoding.type()) {
- case Enc::UTF16:
- return UTF16Range(alc, r);
- case Enc::UTF8:
- return UTF8Range(alc, r);
- case Enc::ASCII:
- case Enc::EBCDIC:
- case Enc::UTF32:
- case Enc::UCS2:
- return re_sym(alc, r);
- }
- return NULL; /* unreachable */
+ if (!r) {
+ switch (opts->empty_class_policy) {
+ case EMPTY_CLASS_MATCH_EMPTY:
+ warn.empty_class(line);
+ return re_nil(alc);
+ case EMPTY_CLASS_MATCH_NONE:
+ warn.empty_class(line);
+ break;
+ case EMPTY_CLASS_ERROR:
+ fatal_lc(line, column, "empty character class");
+ }
+ }
+ switch (opts->encoding.type()) {
+ case Enc::UTF16:
+ return UTF16Range(alc, r);
+ case Enc::UTF8:
+ return UTF8Range(alc, r);
+ case Enc::ASCII:
+ case Enc::EBCDIC:
+ case Enc::UTF32:
+ case Enc::UCS2:
+ return re_sym(alc, r);
+ }
+ return NULL; /* unreachable */
}
void assert_tags_used_once(const Rule &rule, const std::vector<Tag> &tags)
{
- std::set<std::string> names;
- const std::string *name = NULL;
-
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
- name = tags[t].name;
- if (name && !names.insert(*name).second) {
- fatal_l(rule.code->fline,
- "tag '%s' is used multiple times in the same rule",
- name->c_str());
- }
- }
+ std::set<std::string> names;
+ const std::string *name = NULL;
+
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ name = tags[t].name;
+ if (name && !names.insert(*name).second) {
+ fatal_l(rule.code->fline,
+ "tag '%s' is used multiple times in the same rule",
+ name->c_str());
+ }
+ }
}
void init_rule(Rule &rule, const Code *code, const std::vector<Tag> &tags,
- size_t ltag, size_t ncap)
+ size_t ltag, size_t ncap)
{
- rule.code = code;
- rule.ltag = ltag;
- rule.htag = tags.size();
- for (rule.ttag = ltag; rule.ttag < rule.htag && !trailing(tags[rule.ttag]); ++rule.ttag);
- rule.ncap = ncap;
- assert_tags_used_once(rule, tags);
+ rule.code = code;
+ rule.ltag = ltag;
+ rule.htag = tags.size();
+ for (rule.ttag = ltag; rule.ttag < rule.htag && !trailing(tags[rule.ttag]); ++rule.ttag);
+ rule.ncap = ncap;
+ assert_tags_used_once(rule, tags);
}
} // namespace re2c
// in future it might change.
static void insert_default_tags(RESpec &spec, RE *re, size_t *&tidx)
{
- RE::alc_t &alc = spec.alc;
- switch (re->type) {
- case RE::NIL: break;
- case RE::SYM: break;
- case RE::ALT: {
- size_t *i = tidx;
- RE *x = NULL, *y = NULL;
- insert_default_tags(spec, re->alt.re1, tidx);
- for (; i < tidx; ++i) {
- x = re_cat(alc, x, re_tag(alc, *i, true));
- }
- insert_default_tags(spec, re->alt.re2, tidx);
- for (; i < tidx; ++i) {
- y = re_cat(alc, y, re_tag(alc, *i, true));
- }
- re->alt.re1 = re_cat(alc, re->alt.re1, y);
- re->alt.re2 = spec.opts->posix_captures
- ? re_cat(alc, x, re->alt.re2)
- : re_cat(alc, re->alt.re2, x);
- break;
- }
- case RE::CAT:
- insert_default_tags(spec, re->cat.re1, tidx);
- insert_default_tags(spec, re->cat.re2, tidx);
- break;
- case RE::ITER:
- insert_default_tags(spec, re->iter.re, tidx);
- break;
- case RE::TAG:
- *tidx++ = re->tag.idx;
- break;
- }
+ RE::alc_t &alc = spec.alc;
+ switch (re->type) {
+ case RE::NIL: break;
+ case RE::SYM: break;
+ case RE::ALT: {
+ size_t *i = tidx;
+ RE *x = NULL, *y = NULL;
+ insert_default_tags(spec, re->alt.re1, tidx);
+ for (; i < tidx; ++i) {
+ x = re_cat(alc, x, re_tag(alc, *i, true));
+ }
+ insert_default_tags(spec, re->alt.re2, tidx);
+ for (; i < tidx; ++i) {
+ y = re_cat(alc, y, re_tag(alc, *i, true));
+ }
+ re->alt.re1 = re_cat(alc, re->alt.re1, y);
+ re->alt.re2 = spec.opts->posix_captures
+ ? re_cat(alc, x, re->alt.re2)
+ : re_cat(alc, re->alt.re2, x);
+ break;
+ }
+ case RE::CAT:
+ insert_default_tags(spec, re->cat.re1, tidx);
+ insert_default_tags(spec, re->cat.re2, tidx);
+ break;
+ case RE::ITER:
+ insert_default_tags(spec, re->iter.re, tidx);
+ break;
+ case RE::TAG:
+ *tidx++ = re->tag.idx;
+ break;
+ }
}
void insert_default_tags(RESpec &spec)
{
- size_t *tidx0 = new size_t[spec.tags.size()], *tidx = tidx0;
- std::vector<RE*>::iterator
- i = spec.res.begin(),
- e = spec.res.end();
- for (; i != e; ++i) {
- insert_default_tags(spec, *i, tidx);
- }
- delete[] tidx0;
+ size_t *tidx0 = new size_t[spec.tags.size()], *tidx = tidx0;
+ std::vector<RE*>::iterator
+ i = spec.res.begin(),
+ e = spec.res.end();
+ for (; i != e; ++i) {
+ insert_default_tags(spec, *i, tidx);
+ }
+ delete[] tidx0;
}
} // namespace re2c
enum empty_class_policy_t
{
- EMPTY_CLASS_MATCH_EMPTY, // match on empty input
- EMPTY_CLASS_MATCH_NONE, // fail to match on any input
- EMPTY_CLASS_ERROR // compilation error
+ EMPTY_CLASS_MATCH_EMPTY, // match on empty input
+ EMPTY_CLASS_MATCH_NONE, // fail to match on any input
+ EMPTY_CLASS_ERROR // compilation error
};
} // namespace re2c
inline bool is_alpha (uint32_t c)
{
- return (c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z');
+ return (c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z');
}
inline uint32_t to_lower_unsafe (uint32_t c)
{
- return c | 0x20u;
+ return c | 0x20u;
}
inline uint32_t to_upper_unsafe (uint32_t c)
{
- return c & ~0x20u;
+ return c & ~0x20u;
}
}
*/
bool Enc::encode(uint32_t & c) const
{
- if (c >= nCodePoints ())
- {
- return false;
- }
+ if (c >= nCodePoints ())
+ {
+ return false;
+ }
- switch (type_)
- {
- case ASCII:
- return true;
- case EBCDIC:
- c = asc2ebc[c];
- return true;
- case UCS2:
- case UTF16:
- case UTF32:
- case UTF8:
- if (c < SURR_MIN || c > SURR_MAX)
- return true;
- else
- {
- switch (policy_)
- {
- case POLICY_FAIL:
- return false;
- case POLICY_SUBSTITUTE:
- c = UNICODE_ERROR;
- return true;
- case POLICY_IGNORE:
- return true;
- }
- }
- }
- return false; // to silence gcc warning
+ switch (type_)
+ {
+ case ASCII:
+ return true;
+ case EBCDIC:
+ c = asc2ebc[c];
+ return true;
+ case UCS2:
+ case UTF16:
+ case UTF32:
+ case UTF8:
+ if (c < SURR_MIN || c > SURR_MAX)
+ return true;
+ else
+ {
+ switch (policy_)
+ {
+ case POLICY_FAIL:
+ return false;
+ case POLICY_SUBSTITUTE:
+ c = UNICODE_ERROR;
+ return true;
+ case POLICY_IGNORE:
+ return true;
+ }
+ }
+ }
+ return false; // to silence gcc warning
}
/*
*/
uint32_t Enc::decodeUnsafe(uint32_t c) const
{
- switch (type_)
- {
- case EBCDIC:
- c = ebc2asc[c & 0xFF];
- break;
- case ASCII:
- case UCS2:
- case UTF16:
- case UTF32:
- case UTF8:
- break;
- }
- return c;
+ switch (type_)
+ {
+ case EBCDIC:
+ c = ebc2asc[c & 0xFF];
+ break;
+ case ASCII:
+ case UCS2:
+ case UTF16:
+ case UTF32:
+ case UTF8:
+ break;
+ }
+ return c;
}
/*
*/
Range * Enc::encodeRange(uint32_t l, uint32_t h) const
{
- if (l >= nCodePoints () || h >= nCodePoints ())
- {
- return NULL;
- }
+ if (l >= nCodePoints () || h >= nCodePoints ())
+ {
+ return NULL;
+ }
- Range * r = NULL;
- switch (type_)
- {
- case ASCII:
- r = Range::ran (l, h + 1);
- break;
- case EBCDIC:
- {
- const uint32_t el = asc2ebc[l];
- r = Range::sym (el);
- for (uint32_t c = l + 1; c <= h; ++c)
- {
- const uint32_t ec = asc2ebc[c];
- r = Range::add (r, Range::sym (ec));
- }
- break;
- }
- case UCS2:
- case UTF16:
- case UTF32:
- case UTF8:
- r = Range::ran (l, h + 1);
- if (l <= SURR_MAX && h >= SURR_MIN)
- {
- switch (policy_)
- {
- case POLICY_FAIL:
- r = NULL;
- break;
- case POLICY_SUBSTITUTE:
- {
- Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
- Range * error = Range::sym (UNICODE_ERROR);
- r = Range::sub (r, surrs);
- r = Range::add (r, error);
- break;
- }
- case POLICY_IGNORE:
- break;
- }
- }
- break;
- }
- return r;
+ Range * r = NULL;
+ switch (type_)
+ {
+ case ASCII:
+ r = Range::ran (l, h + 1);
+ break;
+ case EBCDIC:
+ {
+ const uint32_t el = asc2ebc[l];
+ r = Range::sym (el);
+ for (uint32_t c = l + 1; c <= h; ++c)
+ {
+ const uint32_t ec = asc2ebc[c];
+ r = Range::add (r, Range::sym (ec));
+ }
+ break;
+ }
+ case UCS2:
+ case UTF16:
+ case UTF32:
+ case UTF8:
+ r = Range::ran (l, h + 1);
+ if (l <= SURR_MAX && h >= SURR_MIN)
+ {
+ switch (policy_)
+ {
+ case POLICY_FAIL:
+ r = NULL;
+ break;
+ case POLICY_SUBSTITUTE:
+ {
+ Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+ Range * error = Range::sym (UNICODE_ERROR);
+ r = Range::sub (r, surrs);
+ r = Range::add (r, error);
+ break;
+ }
+ case POLICY_IGNORE:
+ break;
+ }
+ }
+ break;
+ }
+ return r;
}
/*
*/
Range * Enc::fullRange() const
{
- Range * r = Range::ran (0, nCodePoints());
- if (policy_ != POLICY_IGNORE)
- {
- Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
- r = Range::sub (r, surrs);
- }
- return r;
+ Range * r = Range::ran (0, nCodePoints());
+ if (policy_ != POLICY_IGNORE)
+ {
+ Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
+ r = Range::sub (r, surrs);
+ }
+ return r;
}
} // namespace re2c
class Enc
{
public:
- // Supported encodings.
- enum type_t
- { ASCII
- , EBCDIC
- , UCS2
- , UTF16
- , UTF32
- , UTF8
- };
-
- // What to do with invalid code points
- enum policy_t
- { POLICY_FAIL
- , POLICY_SUBSTITUTE
- , POLICY_IGNORE
- };
+ // Supported encodings.
+ enum type_t
+ { ASCII
+ , EBCDIC
+ , UCS2
+ , UTF16
+ , UTF32
+ , UTF8
+ };
+
+ // What to do with invalid code points
+ enum policy_t
+ { POLICY_FAIL
+ , POLICY_SUBSTITUTE
+ , POLICY_IGNORE
+ };
private:
- static const uint32_t asc2ebc[256];
- static const uint32_t ebc2asc[256];
- static const uint32_t SURR_MIN;
- static const uint32_t SURR_MAX;
- static const uint32_t UNICODE_ERROR;
+ static const uint32_t asc2ebc[256];
+ static const uint32_t ebc2asc[256];
+ static const uint32_t SURR_MIN;
+ static const uint32_t SURR_MAX;
+ static const uint32_t UNICODE_ERROR;
- type_t type_;
- policy_t policy_;
+ type_t type_;
+ policy_t policy_;
public:
- Enc()
- : type_ (ASCII)
- , policy_ (POLICY_IGNORE)
- { }
+ Enc()
+ : type_ (ASCII)
+ , policy_ (POLICY_IGNORE)
+ { }
- static const char * name (type_t t);
+ static const char * name (type_t t);
- bool operator != (const Enc & e) const { return type_ != e.type_; }
+ bool operator != (const Enc & e) const { return type_ != e.type_; }
- inline uint32_t nCodePoints() const;
- inline uint32_t nCodeUnits() const;
- inline uint32_t szCodePoint() const;
- inline uint32_t szCodeUnit() const;
+ inline uint32_t nCodePoints() const;
+ inline uint32_t nCodeUnits() const;
+ inline uint32_t szCodePoint() const;
+ inline uint32_t szCodeUnit() const;
- inline void set(type_t t);
- inline void unset(type_t);
- inline type_t type () const;
+ inline void set(type_t t);
+ inline void unset(type_t);
+ inline type_t type () const;
- inline void setPolicy(policy_t t);
+ inline void setPolicy(policy_t t);
- bool encode(uint32_t & c) const;
- uint32_t decodeUnsafe(uint32_t c) const;
- Range * encodeRange(uint32_t l, uint32_t h) const;
- Range * fullRange() const;
+ bool encode(uint32_t & c) const;
+ uint32_t decodeUnsafe(uint32_t c) const;
+ Range * encodeRange(uint32_t l, uint32_t h) const;
+ Range * fullRange() const;
};
inline const char * Enc::name (type_t t)
{
- switch (t) {
- case ASCII: return "ASCII";
- case EBCDIC: return "EBCDIC";
- case UTF8: return "UTF8";
- case UCS2: return "USC2";
- case UTF16: return "UTF16";
- case UTF32: return "UTF32";
- }
- return "<bad encoding>"; /* error */
+ switch (t) {
+ case ASCII: return "ASCII";
+ case EBCDIC: return "EBCDIC";
+ case UTF8: return "UTF8";
+ case UCS2: return "USC2";
+ case UTF16: return "UTF16";
+ case UTF32: return "UTF32";
+ }
+ return "<bad encoding>"; /* error */
}
inline uint32_t Enc::nCodePoints() const
{
- switch (type_) {
- case ASCII:
- case EBCDIC: return 0x100;
- case UCS2: return 0x10000;
- case UTF16:
- case UTF32:
- case UTF8: return 0x110000;
- }
- return 0; /* error */
+ switch (type_) {
+ case ASCII:
+ case EBCDIC: return 0x100;
+ case UCS2: return 0x10000;
+ case UTF16:
+ case UTF32:
+ case UTF8: return 0x110000;
+ }
+ return 0; /* error */
}
inline uint32_t Enc::nCodeUnits() const
{
- switch (type_) {
- case ASCII:
- case EBCDIC:
- case UTF8: return 0x100;
- case UCS2:
- case UTF16: return 0x10000;
- case UTF32: return 0x110000;
- }
- return 0; /* error */
+ switch (type_) {
+ case ASCII:
+ case EBCDIC:
+ case UTF8: return 0x100;
+ case UCS2:
+ case UTF16: return 0x10000;
+ case UTF32: return 0x110000;
+ }
+ return 0; /* error */
}
// returns *maximal* code point size for encoding
inline uint32_t Enc::szCodePoint() const
{
- switch (type_) {
- case ASCII:
- case EBCDIC: return 1;
- case UCS2: return 2;
- case UTF16:
- case UTF32:
- case UTF8: return 4;
- }
- return 0; /* error */
+ switch (type_) {
+ case ASCII:
+ case EBCDIC: return 1;
+ case UCS2: return 2;
+ case UTF16:
+ case UTF32:
+ case UTF8: return 4;
+ }
+ return 0; /* error */
}
inline uint32_t Enc::szCodeUnit() const
{
- switch (type_) {
- case ASCII:
- case EBCDIC:
- case UTF8: return 1;
- case UCS2:
- case UTF16: return 2;
- case UTF32: return 4;
- }
- return 0; /* error */
+ switch (type_) {
+ case ASCII:
+ case EBCDIC:
+ case UTF8: return 1;
+ case UCS2:
+ case UTF16: return 2;
+ case UTF32: return 4;
+ }
+ return 0; /* error */
}
inline void Enc::set(type_t t)
{
- type_ = t;
+ type_ = t;
}
inline void Enc::unset(type_t t)
{
- if (type_ == t)
- type_ = ASCII;
+ if (type_ == t)
+ type_ = ASCII;
}
inline Enc::type_t Enc::type () const
{
- return type_;
+ return type_;
}
inline void Enc::setPolicy(policy_t t)
{
- policy_ = t;
+ policy_ = t;
}
} // namespace re2c
RE *to_regexp(RE::alc_t &alc, RangeSuffix *p)
{
- return p ? emit(alc, p, NULL) : re_sym(alc, NULL);
+ return p ? emit(alc, p, NULL) : re_sym(alc, NULL);
}
/*
*/
RE *emit(RE::alc_t &alc, RangeSuffix *p, RE *re)
{
- if (p == NULL) {
- return re;
- } else {
- RE *regexp = NULL;
- for (; p != NULL; p = p->next) {
- RE *re1 = re_cat(alc, re_sym(alc, Range::ran(p->l, p->h + 1)), re);
- regexp = re_alt(alc, regexp, emit(alc, p->child, re1));
- }
- return regexp;
- }
+ if (p == NULL) {
+ return re;
+ } else {
+ RE *regexp = NULL;
+ for (; p != NULL; p = p->next) {
+ RE *re1 = re_cat(alc, re_sym(alc, Range::ran(p->l, p->h + 1)), re);
+ regexp = re_alt(alc, regexp, emit(alc, p->child, re1));
+ }
+ return regexp;
+ }
}
} // namespace re2c
struct RangeSuffix
{
- static free_list<RangeSuffix *> freeList;
-
- uint32_t l;
- uint32_t h;
- RangeSuffix * next;
- RangeSuffix * child;
-
- RangeSuffix (uint32_t lo, uint32_t hi)
- : l (lo)
- , h (hi)
- , next (NULL)
- , child (NULL)
- {
- freeList.insert(this);
- }
-
- FORBID_COPY (RangeSuffix);
+ static free_list<RangeSuffix *> freeList;
+
+ uint32_t l;
+ uint32_t h;
+ RangeSuffix * next;
+ RangeSuffix * child;
+
+ RangeSuffix (uint32_t lo, uint32_t hi)
+ : l (lo)
+ , h (hi)
+ , next (NULL)
+ , child (NULL)
+ {
+ freeList.insert(this);
+ }
+
+ FORBID_COPY (RangeSuffix);
};
RE *to_regexp(RE::alc_t &alc, RangeSuffix *p);
namespace re2c {
-const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu;
-const uint32_t utf16::MIN_LEAD_SURR = 0xD800u;
-const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u;
-const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu;
+const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu;
+const uint32_t utf16::MIN_LEAD_SURR = 0xD800u;
+const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u;
+const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu;
} // namespace re2c
class utf16
{
public:
- typedef uint32_t rune;
+ typedef uint32_t rune;
- static const uint32_t MAX_1WORD_RUNE;
- static const uint32_t MIN_LEAD_SURR;
- static const uint32_t MIN_TRAIL_SURR;
- static const uint32_t MAX_TRAIL_SURR;
+ static const uint32_t MAX_1WORD_RUNE;
+ static const uint32_t MIN_LEAD_SURR;
+ static const uint32_t MIN_TRAIL_SURR;
+ static const uint32_t MAX_TRAIL_SURR;
- /* leading surrogate of UTF-16 symbol */
- static inline uint32_t lead_surr(rune r);
+ /* leading surrogate of UTF-16 symbol */
+ static inline uint32_t lead_surr(rune r);
- /* trailing surrogate of UTF-16 symbol */
- static inline uint32_t trail_surr(rune r);
+ /* trailing surrogate of UTF-16 symbol */
+ static inline uint32_t trail_surr(rune r);
};
inline uint32_t utf16::lead_surr(rune r)
{
- return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR;
+ return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR;
}
inline uint32_t utf16::trail_surr(rune r)
{
- return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR;
+ return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR;
}
} // namespace re2c
*/
void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h)
{
- RangeSuffix ** p = &root;
- for (;;)
- {
- if (*p == NULL)
- {
- *p = new RangeSuffix(l, h);
- break;
- }
- else if ((*p)->l == l && (*p)->h == h)
- {
- break;
- }
- else
- p = &(*p)->next;
- }
+ RangeSuffix ** p = &root;
+ for (;;)
+ {
+ if (*p == NULL)
+ {
+ *p = new RangeSuffix(l, h);
+ break;
+ }
+ else if ((*p)->l == l && (*p)->h == h)
+ {
+ break;
+ }
+ else
+ p = &(*p)->next;
+ }
}
/*
*/
void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr)
{
- RangeSuffix ** p = &root;
- for (;;)
- {
- if (*p == NULL)
- {
- *p = new RangeSuffix(l_tr, h_tr);
- p = &(*p)->child;
- break;
- }
- else if ((*p)->l == l_tr && (*p)->h == h_tr)
- {
- p = &(*p)->child;
- break;
- }
- else
- p = &(*p)->next;
- }
- for (;;)
- {
- if (*p == NULL)
- {
- *p = new RangeSuffix(l_ld, h_ld);
- break;
- }
- else if ((*p)->l == l_ld && (*p)->h == h_ld)
- {
- break;
- }
- else
- p = &(*p)->next;
- }
+ RangeSuffix ** p = &root;
+ for (;;)
+ {
+ if (*p == NULL)
+ {
+ *p = new RangeSuffix(l_tr, h_tr);
+ p = &(*p)->child;
+ break;
+ }
+ else if ((*p)->l == l_tr && (*p)->h == h_tr)
+ {
+ p = &(*p)->child;
+ break;
+ }
+ else
+ p = &(*p)->next;
+ }
+ for (;;)
+ {
+ if (*p == NULL)
+ {
+ *p = new RangeSuffix(l_ld, h_ld);
+ break;
+ }
+ else if ((*p)->l == l_ld && (*p)->h == h_ld)
+ {
+ break;
+ }
+ else
+ p = &(*p)->next;
+ }
}
/*
* This is only possible if the following condition holds:
* if L1 /= H1, then L2 == 0xdc00 and H2 == 0xdfff.
* This condition ensures that:
- * 1) all possible UTF-16 sequences between L and H are allowed
- * 2) no word ranges [w1 - w2] appear, such that w1 > w2
+ * 1) all possible UTF-16 sequences between L and H are allowed
+ * 2) no word ranges [w1 - w2] appear, such that w1 > w2
*
* E.g.:
* [\U00010001-\U00010400] => [d800-d801],[dc01-dc00].
*/
void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr)
{
- if (l_ld != h_ld)
- {
- if (l_tr > utf16::MIN_TRAIL_SURR)
- {
- UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR);
- UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
- return;
- }
- if (h_tr < utf16::MAX_TRAIL_SURR)
- {
- UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR);
- UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
- return;
- }
- }
- UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr);
+ if (l_ld != h_ld)
+ {
+ if (l_tr > utf16::MIN_TRAIL_SURR)
+ {
+ UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR);
+ UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
+ return;
+ }
+ if (h_tr < utf16::MAX_TRAIL_SURR)
+ {
+ UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR);
+ UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
+ return;
+ }
+ }
+ UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr);
}
/*
*/
void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h)
{
- if (l <= utf16::MAX_1WORD_RUNE)
- {
- if (h <= utf16::MAX_1WORD_RUNE)
- {
- UTF16addContinuous1(root, l, h);
- }
- else
- {
- UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE);
- const uint32_t h_ld = utf16::lead_surr(h);
- const uint32_t h_tr = utf16::trail_surr(h);
- UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
- }
- }
- else
- {
- const uint32_t l_ld = utf16::lead_surr(l);
- const uint32_t l_tr = utf16::trail_surr(l);
- const uint32_t h_ld = utf16::lead_surr(h);
- const uint32_t h_tr = utf16::trail_surr(h);
- UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr);
- }
+ if (l <= utf16::MAX_1WORD_RUNE)
+ {
+ if (h <= utf16::MAX_1WORD_RUNE)
+ {
+ UTF16addContinuous1(root, l, h);
+ }
+ else
+ {
+ UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE);
+ const uint32_t h_ld = utf16::lead_surr(h);
+ const uint32_t h_tr = utf16::trail_surr(h);
+ UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
+ }
+ }
+ else
+ {
+ const uint32_t l_ld = utf16::lead_surr(l);
+ const uint32_t l_tr = utf16::trail_surr(l);
+ const uint32_t h_ld = utf16::lead_surr(h);
+ const uint32_t h_tr = utf16::trail_surr(h);
+ UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr);
+ }
}
} // namespace re2c
RE *UTF16Symbol(RE::alc_t &alc, utf16::rune r)
{
- if (r <= utf16::MAX_1WORD_RUNE) {
- return re_sym(alc, Range::sym(r));
- } else {
- const uint32_t ld = utf16::lead_surr(r);
- const uint32_t tr = utf16::trail_surr(r);
- return re_cat(alc,
- re_sym(alc, Range::sym(ld)),
- re_sym(alc, Range::sym(tr)));
- }
+ if (r <= utf16::MAX_1WORD_RUNE) {
+ return re_sym(alc, Range::sym(r));
+ } else {
+ const uint32_t ld = utf16::lead_surr(r);
+ const uint32_t tr = utf16::trail_surr(r);
+ return re_cat(alc,
+ re_sym(alc, Range::sym(ld)),
+ re_sym(alc, Range::sym(tr)));
+ }
}
/*
*/
RE *UTF16Range(RE::alc_t &alc, const Range *r)
{
- RangeSuffix * root = NULL;
- for (; r != NULL; r = r->next ())
- UTF16splitByRuneLength(root, r->lower (), r->upper () - 1);
- return to_regexp(alc, root);
+ RangeSuffix * root = NULL;
+ for (; r != NULL; r = r->next ())
+ UTF16splitByRuneLength(root, r->lower (), r->upper () - 1);
+ return to_regexp(alc, root);
}
} // namespace re2c
uint32_t utf8::rune_to_bytes(uint32_t *str, rune c)
{
- // one byte sequence: 0-0x7F => 0xxxxxxx
- if (c <= MAX_1BYTE_RUNE)
- {
- str[0] = PREFIX_1BYTE | c;
- return 1;
- }
+ // one byte sequence: 0-0x7F => 0xxxxxxx
+ if (c <= MAX_1BYTE_RUNE)
+ {
+ str[0] = PREFIX_1BYTE | c;
+ return 1;
+ }
- // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx
- if (c <= MAX_2BYTE_RUNE)
- {
- str[0] = PREFIX_2BYTE | (c >> 1*SHIFT);
- str[1] = INFIX | (c & MASK);
- return 2;
- }
+ // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx
+ if (c <= MAX_2BYTE_RUNE)
+ {
+ str[0] = PREFIX_2BYTE | (c >> 1*SHIFT);
+ str[1] = INFIX | (c & MASK);
+ return 2;
+ }
- // If the Rune is out of range, convert it to the error rune.
- // Do this test here because the error rune encodes to three bytes.
- // Doing it earlier would duplicate work, since an out of range
- // Rune wouldn't have fit in one or two bytes.
- if (c > MAX_RUNE)
- c = ERROR;
+ // If the Rune is out of range, convert it to the error rune.
+ // Do this test here because the error rune encodes to three bytes.
+ // Doing it earlier would duplicate work, since an out of range
+ // Rune wouldn't have fit in one or two bytes.
+ if (c > MAX_RUNE)
+ c = ERROR;
- // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx
- if (c <= MAX_3BYTE_RUNE)
- {
- str[0] = PREFIX_3BYTE | (c >> 2*SHIFT);
- str[1] = INFIX | ((c >> 1*SHIFT) & MASK);
- str[2] = INFIX | (c & MASK);
- return 3;
- }
+ // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx
+ if (c <= MAX_3BYTE_RUNE)
+ {
+ str[0] = PREFIX_3BYTE | (c >> 2*SHIFT);
+ str[1] = INFIX | ((c >> 1*SHIFT) & MASK);
+ str[2] = INFIX | (c & MASK);
+ return 3;
+ }
- // four byte sequence (21-bit value):
- // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- str[0] = PREFIX_4BYTE | (c >> 3*SHIFT);
- str[1] = INFIX | ((c >> 2*SHIFT) & MASK);
- str[2] = INFIX | ((c >> 1*SHIFT) & MASK);
- str[3] = INFIX | (c & MASK);
- return 4;
+ // four byte sequence (21-bit value):
+ // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ str[0] = PREFIX_4BYTE | (c >> 3*SHIFT);
+ str[1] = INFIX | ((c >> 2*SHIFT) & MASK);
+ str[2] = INFIX | ((c >> 1*SHIFT) & MASK);
+ str[3] = INFIX | (c & MASK);
+ return 4;
}
uint32_t utf8::rune_length(rune r)
{
- if (r <= MAX_2BYTE_RUNE)
- return r <= MAX_1BYTE_RUNE ? 1 : 2;
- else
- return r <= MAX_3BYTE_RUNE ? 3 : 4;
+ if (r <= MAX_2BYTE_RUNE)
+ return r <= MAX_1BYTE_RUNE ? 1 : 2;
+ else
+ return r <= MAX_3BYTE_RUNE ? 3 : 4;
}
utf8::rune utf8::max_rune(uint32_t i)
{
- switch (i)
- {
- case 1: return MAX_1BYTE_RUNE;
- case 2: return MAX_2BYTE_RUNE;
- case 3: return MAX_3BYTE_RUNE;
- case 4: return MAX_4BYTE_RUNE;
- default: return ERROR;
- }
+ switch (i)
+ {
+ case 1: return MAX_1BYTE_RUNE;
+ case 2: return MAX_2BYTE_RUNE;
+ case 3: return MAX_3BYTE_RUNE;
+ case 4: return MAX_4BYTE_RUNE;
+ default: return ERROR;
+ }
}
} // namespace re2c
class utf8
{
public:
- typedef uint32_t rune;
+ typedef uint32_t rune;
- // maximum characters per rune
- // enum instead of static const member because of [-Wvla]
- enum { MAX_RUNE_LENGTH = 4u };
+ // maximum characters per rune
+ // enum instead of static const member because of [-Wvla]
+ enum { MAX_RUNE_LENGTH = 4u };
- // decoding error
- static const uint32_t ERROR;
+ // decoding error
+ static const uint32_t ERROR;
- // maximal runes for each rune length
- static const rune MAX_1BYTE_RUNE;
- static const rune MAX_2BYTE_RUNE;
- static const rune MAX_3BYTE_RUNE;
- static const rune MAX_4BYTE_RUNE;
- static const rune MAX_RUNE;
+ // maximal runes for each rune length
+ static const rune MAX_1BYTE_RUNE;
+ static const rune MAX_2BYTE_RUNE;
+ static const rune MAX_3BYTE_RUNE;
+ static const rune MAX_4BYTE_RUNE;
+ static const rune MAX_RUNE;
- static const uint32_t PREFIX_1BYTE;
- static const uint32_t INFIX;
- static const uint32_t PREFIX_2BYTE;
- static const uint32_t PREFIX_3BYTE;
- static const uint32_t PREFIX_4BYTE;
+ static const uint32_t PREFIX_1BYTE;
+ static const uint32_t INFIX;
+ static const uint32_t PREFIX_2BYTE;
+ static const uint32_t PREFIX_3BYTE;
+ static const uint32_t PREFIX_4BYTE;
- static const uint32_t SHIFT;
- static const uint32_t MASK;
+ static const uint32_t SHIFT;
+ static const uint32_t MASK;
- // UTF-8 bytestring for given Unicode rune
- static uint32_t rune_to_bytes(uint32_t * s, rune r);
+ // UTF-8 bytestring for given Unicode rune
+ static uint32_t rune_to_bytes(uint32_t * s, rune r);
- // length of UTF-8 bytestring for given Unicode rune
- static uint32_t rune_length(rune r);
+ // length of UTF-8 bytestring for given Unicode rune
+ static uint32_t rune_length(rune r);
- // maximal Unicode rune with given length of UTF-8 bytestring
- static rune max_rune(uint32_t i);
+ // maximal Unicode rune with given length of UTF-8 bytestring
+ static rune max_rune(uint32_t i);
};
} // namespace re2c
*/
void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n)
{
- uint32_t lcs[utf8::MAX_RUNE_LENGTH];
- uint32_t hcs[utf8::MAX_RUNE_LENGTH];
- utf8::rune_to_bytes(lcs, l);
- utf8::rune_to_bytes(hcs, h);
+ uint32_t lcs[utf8::MAX_RUNE_LENGTH];
+ uint32_t hcs[utf8::MAX_RUNE_LENGTH];
+ utf8::rune_to_bytes(lcs, l);
+ utf8::rune_to_bytes(hcs, h);
- RangeSuffix ** p = &root;
- for (uint32_t i = 1; i <= n; ++i)
- {
- const uint32_t lc = lcs[n - i];
- const uint32_t hc = hcs[n - i];
- for (;;)
- {
- if (*p == NULL)
- {
- *p = new RangeSuffix(lc, hc);
- p = &(*p)->child;
- break;
- }
- else if ((*p)->l == lc && (*p)->h == hc)
- {
- p = &(*p)->child;
- break;
- }
- else
- p = &(*p)->next;
- }
- }
+ RangeSuffix ** p = &root;
+ for (uint32_t i = 1; i <= n; ++i)
+ {
+ const uint32_t lc = lcs[n - i];
+ const uint32_t hc = hcs[n - i];
+ for (;;)
+ {
+ if (*p == NULL)
+ {
+ *p = new RangeSuffix(lc, hc);
+ p = &(*p)->child;
+ break;
+ }
+ else if ((*p)->l == lc && (*p)->h == hc)
+ {
+ p = &(*p)->child;
+ break;
+ }
+ else
+ p = &(*p)->next;
+ }
+ }
}
/*
* This is only possible if for all i > 1:
* if L_i /= H_i, then L_(i+1) == 0x80 and H_(i+1) == 0xbf.
* This condition ensures that:
- * 1) all possible UTF-8 sequences between L and H are allowed
- * 2) no byte ranges [b1 - b2] appear, such that b1 > b2
+ * 1) all possible UTF-8 sequences between L and H are allowed
+ * 2) no byte ranges [b1 - b2] appear, such that b1 > b2
*
* E.g.:
* [\U000e0031-\U000e0043] => [f3-f3],[a0-a0],[80-81],[b1-83].
*/
void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n)
{
- for (uint32_t i = 1; i < n; ++i)
- {
- uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence
- if ((l & ~m) != (h & ~m))
- {
- if ((l & m) != 0)
- {
- UTF8splitByContinuity(root, l, l | m, n);
- UTF8splitByContinuity(root, (l | m) + 1, h, n);
- return;
- }
- if ((h & m) != m)
- {
- UTF8splitByContinuity(root, l, (h & ~m) - 1, n);
- UTF8splitByContinuity(root, h & ~m, h, n);
- return;
- }
- }
- }
- UTF8addContinuous(root, l, h, n);
+ for (uint32_t i = 1; i < n; ++i)
+ {
+ uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence
+ if ((l & ~m) != (h & ~m))
+ {
+ if ((l & m) != 0)
+ {
+ UTF8splitByContinuity(root, l, l | m, n);
+ UTF8splitByContinuity(root, (l | m) + 1, h, n);
+ return;
+ }
+ if ((h & m) != m)
+ {
+ UTF8splitByContinuity(root, l, (h & ~m) - 1, n);
+ UTF8splitByContinuity(root, h & ~m, h, n);
+ return;
+ }
+ }
+ }
+ UTF8addContinuous(root, l, h, n);
}
/*
*/
void UTF8splitByRuneLength(RangeSuffix * & root, utf8::rune l, utf8::rune h)
{
- const uint32_t nh = utf8::rune_length(h);
- for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl)
- {
- utf8::rune r = utf8::max_rune(nl);
- UTF8splitByContinuity(root, l, r, nl);
- l = r + 1;
- }
- UTF8splitByContinuity(root, l, h, nh);
+ const uint32_t nh = utf8::rune_length(h);
+ for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl)
+ {
+ utf8::rune r = utf8::max_rune(nl);
+ UTF8splitByContinuity(root, l, r, nl);
+ l = r + 1;
+ }
+ UTF8splitByContinuity(root, l, h, nh);
}
} // namespace re2c
RE *UTF8Symbol(RE::alc_t &alc, utf8::rune r)
{
- uint32_t chars[utf8::MAX_RUNE_LENGTH];
- const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
- RE *re = re_sym(alc, Range::sym(chars[0]));
- for (uint32_t i = 1; i < chars_count; ++i) {
- re = re_cat(alc, re, re_sym(alc, Range::sym(chars[i])));
- }
- return re;
+ uint32_t chars[utf8::MAX_RUNE_LENGTH];
+ const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
+ RE *re = re_sym(alc, Range::sym(chars[0]));
+ for (uint32_t i = 1; i < chars_count; ++i) {
+ re = re_cat(alc, re, re_sym(alc, Range::sym(chars[i])));
+ }
+ return re;
}
/*
*/
RE *UTF8Range(RE::alc_t &alc, const Range *r)
{
- RangeSuffix * root = NULL;
- for (; r != NULL; r = r->next ())
- UTF8splitByRuneLength(root, r->lower (), r->upper () - 1);
- return to_regexp(alc, root);
+ RangeSuffix * root = NULL;
+ for (; r != NULL; r = r->next ())
+ UTF8splitByRuneLength(root, r->lower (), r->upper () - 1);
+ return to_regexp(alc, root);
}
} // namespace re2c
*/
static void find_fixed_tags(RE *re, std::vector<Tag> &tags,
- size_t &dist, size_t &base, bool toplevel)
+ size_t &dist, size_t &base, bool toplevel)
{
- switch (re->type) {
- case RE::NIL: break;
- case RE::SYM:
- if (dist != Tag::VARDIST) ++dist;
- break;
- case RE::ALT: {
- size_t d1 = dist, d2 = dist;
- find_fixed_tags(re->alt.re1, tags, d1, base, false);
- find_fixed_tags(re->alt.re2, tags, d2, base, false);
- dist = (d1 == d2) ? d1 : Tag::VARDIST;
- break;
- }
- case RE::CAT:
- find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
- find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
- break;
- case RE::ITER:
- find_fixed_tags(re->iter.re, tags, dist, base, false);
- dist = Tag::VARDIST;
- break;
- case RE::TAG: {
- // see note [fixed and variable tags]
- Tag &tag = tags[re->tag.idx];
- if (fictive(tag)) {
- tag.base = tag.dist = 0;
- } else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
- tag.base = base;
- tag.dist = dist;
- } else if (toplevel) {
- base = re->tag.idx;
- dist = 0;
- }
- if (trailing(tag)) dist = 0;
- break;
- }
- }
+ switch (re->type) {
+ case RE::NIL: break;
+ case RE::SYM:
+ if (dist != Tag::VARDIST) ++dist;
+ break;
+ case RE::ALT: {
+ size_t d1 = dist, d2 = dist;
+ find_fixed_tags(re->alt.re1, tags, d1, base, false);
+ find_fixed_tags(re->alt.re2, tags, d2, base, false);
+ dist = (d1 == d2) ? d1 : Tag::VARDIST;
+ break;
+ }
+ case RE::CAT:
+ find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
+ find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
+ break;
+ case RE::ITER:
+ find_fixed_tags(re->iter.re, tags, dist, base, false);
+ dist = Tag::VARDIST;
+ break;
+ case RE::TAG: {
+ // see note [fixed and variable tags]
+ Tag &tag = tags[re->tag.idx];
+ if (fictive(tag)) {
+ tag.base = tag.dist = 0;
+ } else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
+ tag.base = base;
+ tag.dist = dist;
+ } else if (toplevel) {
+ base = re->tag.idx;
+ dist = 0;
+ }
+ if (trailing(tag)) dist = 0;
+ break;
+ }
+ }
}
void find_fixed_tags(RESpec &spec)
{
- const bool generic = spec.opts->input_api == INPUT_CUSTOM;
- std::vector<RE*>::iterator
- i = spec.res.begin(),
- e = spec.res.end();
- for (; i != e; ++i) {
- size_t base = Tag::RIGHTMOST, dist = 0;
- find_fixed_tags(*i, spec.tags, dist, base, !generic);
- }
+ const bool generic = spec.opts->input_api == INPUT_CUSTOM;
+ std::vector<RE*>::iterator
+ i = spec.res.begin(),
+ e = spec.res.end();
+ for (; i != e; ++i) {
+ size_t base = Tag::RIGHTMOST, dist = 0;
+ find_fixed_tags(*i, spec.tags, dist, base, !generic);
+ }
}
} // namespace re2c
static bool nullable(const RESpec &spec, const RE *re, bool &trail)
{
- if (trail) return true;
+ if (trail) return true;
- switch (re->type) {
- case RE::NIL: return true;
- case RE::SYM: return false;
- case RE::ITER:
- return nullable(spec, re->iter.re, trail);
- case RE::TAG:
- trail |= trailing(spec.tags[re->tag.idx]);
- return true;
- case RE::ALT:
- return nullable(spec, re->alt.re1, trail)
- || nullable(spec, re->alt.re2, trail);
- case RE::CAT:
- return nullable(spec, re->cat.re1, trail)
- && nullable(spec, re->cat.re2, trail);
- }
- return false; /* unreachable */
+ switch (re->type) {
+ case RE::NIL: return true;
+ case RE::SYM: return false;
+ case RE::ITER:
+ return nullable(spec, re->iter.re, trail);
+ case RE::TAG:
+ trail |= trailing(spec.tags[re->tag.idx]);
+ return true;
+ case RE::ALT:
+ return nullable(spec, re->alt.re1, trail)
+ || nullable(spec, re->alt.re2, trail);
+ case RE::CAT:
+ return nullable(spec, re->cat.re1, trail)
+ && nullable(spec, re->cat.re2, trail);
+ }
+ return false; /* unreachable */
}
/*
*/
void warn_nullable(const RESpec &spec, const std::string &cond)
{
- const size_t nre = spec.res.size();
- for (size_t i = 0; i < nre; ++i) {
- bool trail = false;
- if (nullable(spec, spec.res[i], trail)) {
- spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
- }
- }
+ const size_t nre = spec.res.size();
+ for (size_t i = 0; i < nre; ++i) {
+ bool trail = false;
+ if (nullable(spec, spec.res[i], trail)) {
+ spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
+ }
+ }
}
} // namespace re2c
struct RE
{
- typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t;
- enum type_t {NIL, SYM, ALT, CAT, ITER, TAG} type;
- union {
- const Range *sym;
- struct {
- RE *re1;
- RE *re2;
- } alt;
- struct {
- RE *re1;
- RE *re2;
- } cat;
- struct {
- RE *re;
- uint32_t min;
- uint32_t max;
- } iter;
- tag_info_t tag;
- };
+ typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t;
+ enum type_t {NIL, SYM, ALT, CAT, ITER, TAG} type;
+ union {
+ const Range *sym;
+ struct {
+ RE *re1;
+ RE *re2;
+ } alt;
+ struct {
+ RE *re1;
+ RE *re2;
+ } cat;
+ struct {
+ RE *re;
+ uint32_t min;
+ uint32_t max;
+ } iter;
+ tag_info_t tag;
+ };
};
struct RESpec
{
- RE::alc_t alc;
- std::vector<RE*> res;
- std::vector<uint32_t> &charset;
- std::vector<Tag> &tags;
- std::valarray<Rule> &rules;
- const opt_t *opts;
- Warn &warn;
-
- explicit RESpec(const std::vector<ASTRule> &ast, const opt_t *o, Warn &w);
- FORBID_COPY(RESpec);
+ RE::alc_t alc;
+ std::vector<RE*> res;
+ std::vector<uint32_t> &charset;
+ std::vector<Tag> &tags;
+ std::valarray<Rule> &rules;
+ const opt_t *opts;
+ Warn &warn;
+
+ explicit RESpec(const std::vector<ASTRule> &ast, const opt_t *o, Warn &w);
+ FORBID_COPY(RESpec);
};
void split_charset(RESpec &spec);
inline RE *re_nil(RE::alc_t &alc)
{
- RE *x = alc.alloct<RE>(1);
- x->type = RE::NIL;
- return x;
+ RE *x = alc.alloct<RE>(1);
+ x->type = RE::NIL;
+ return x;
}
inline RE *re_sym(RE::alc_t &alc, const Range *r)
{
- RE *x = alc.alloct<RE>(1);
- x->type = RE::SYM;
- x->sym = r;
- return x;
+ RE *x = alc.alloct<RE>(1);
+ x->type = RE::SYM;
+ x->sym = r;
+ return x;
}
inline RE *re_alt(RE::alc_t &alc, RE *x, RE *y)
{
- if (!x) return y;
- if (!y) return x;
- if (x->type == RE::SYM && y->type == RE::SYM) {
- return re_sym(alc, Range::add(x->sym, y->sym));
- }
-
- RE *z = alc.alloct<RE>(1);
- z->type = RE::ALT;
- z->alt.re1 = x;
- z->alt.re2 = y;
- return z;
+ if (!x) return y;
+ if (!y) return x;
+ if (x->type == RE::SYM && y->type == RE::SYM) {
+ return re_sym(alc, Range::add(x->sym, y->sym));
+ }
+
+ RE *z = alc.alloct<RE>(1);
+ z->type = RE::ALT;
+ z->alt.re1 = x;
+ z->alt.re2 = y;
+ return z;
}
inline RE *re_cat(RE::alc_t &alc, RE *x, RE *y)
{
- if (!x) return y;
- if (!y) return x;
-
- RE *z = alc.alloct<RE>(1);
- z->type = RE::CAT;
- z->cat.re1 = x;
- z->cat.re2 = y;
- return z;
+ if (!x) return y;
+ if (!y) return x;
+
+ RE *z = alc.alloct<RE>(1);
+ z->type = RE::CAT;
+ z->cat.re1 = x;
+ z->cat.re2 = y;
+ return z;
}
inline RE *re_iter(RE::alc_t &alc, RE *x, uint32_t n, uint32_t m)
{
- RE *y = alc.alloct<RE>(1);
- y->type = RE::ITER;
- y->iter.re = x;
- y->iter.min = n;
- y->iter.max = m;
- return y;
+ RE *y = alc.alloct<RE>(1);
+ y->type = RE::ITER;
+ y->iter.re = x;
+ y->iter.min = n;
+ y->iter.max = m;
+ return y;
}
inline RE *re_tag(RE::alc_t &alc, size_t idx, bool neg)
{
- RE *x = alc.alloct<RE>(1);
- x->type = RE::TAG;
- x->tag.idx = idx & 0x7FFFffff;
- assert(idx == x->tag.idx);
- x->tag.neg = neg;
- return x;
+ RE *x = alc.alloct<RE>(1);
+ x->type = RE::TAG;
+ x->tag.idx = idx & 0x7FFFffff;
+ assert(idx == x->tag.idx);
+ x->tag.neg = neg;
+ return x;
}
} // namespace re2c
struct Code
{
- static free_list<Code*> flist;
+ static free_list<Code*> flist;
- std::string fname;
- uint32_t fline;
- bool autogen;
- const std::string text;
- std::string cond;
+ std::string fname;
+ uint32_t fline;
+ bool autogen;
+ const std::string text;
+ std::string cond;
- Code(const std::string &file, uint32_t line)
- : fname(file)
- , fline(line)
- , autogen(true)
- , text("")
- , cond("")
- {
- flist.insert(this);
- }
- Code(const std::string &file, uint32_t line, const char *s, size_t slen)
- : fname(file)
- , fline(line)
- , autogen(false)
- , text(s, slen)
- , cond("")
- {
- flist.insert(this);
- }
- ~Code()
- {
- flist.erase(this);
- }
+ Code(const std::string &file, uint32_t line)
+ : fname(file)
+ , fline(line)
+ , autogen(true)
+ , text("")
+ , cond("")
+ {
+ flist.insert(this);
+ }
+ Code(const std::string &file, uint32_t line, const char *s, size_t slen)
+ : fname(file)
+ , fline(line)
+ , autogen(false)
+ , text(s, slen)
+ , cond("")
+ {
+ flist.insert(this);
+ }
+ ~Code()
+ {
+ flist.erase(this);
+ }
};
struct Rule
{
- static const size_t NONE;
+ static const size_t NONE;
- const Code *code;
- std::set<uint32_t> shadow;
+ const Code *code;
+ std::set<uint32_t> shadow;
- // tags
- size_t ltag; // first
- size_t htag; // next to last
- size_t ttag; // trailing context
+ // tags
+ size_t ltag; // first
+ size_t htag; // next to last
+ size_t ttag; // trailing context
- size_t ncap; // number of POSIX captures
+ size_t ncap; // number of POSIX captures
- Rule(): code(NULL), shadow(),
- ltag(0), htag(0), ttag(0), ncap(0) {}
+ Rule(): code(NULL), shadow(),
+ ltag(0), htag(0), ttag(0), ncap(0) {}
- // copy ctor and assignment are required for containers on macOS
- Rule(const Rule &r)
- : code(r.code)
- , shadow(r.shadow)
- , ltag(r.ltag)
- , htag(r.htag)
- , ttag(r.ttag)
- , ncap(r.ncap)
- {}
- Rule& operator= (const Rule &r)
- {
- code = r.code;
- shadow = r.shadow;
- ltag = r.ltag;
- htag = r.htag;
- ttag = r.ttag;
- ncap = r.ncap;
- return *this;
- }
+ // copy ctor and assignment are required for containers on macOS
+ Rule(const Rule &r)
+ : code(r.code)
+ , shadow(r.shadow)
+ , ltag(r.ltag)
+ , htag(r.htag)
+ , ttag(r.ttag)
+ , ncap(r.ncap)
+ {}
+ Rule& operator= (const Rule &r)
+ {
+ code = r.code;
+ shadow = r.shadow;
+ ltag = r.ltag;
+ htag = r.htag;
+ ttag = r.ttag;
+ ncap = r.ncap;
+ return *this;
+ }
};
} // namespace re2c
*/
void split_charset(RESpec &spec)
{
- std::set<uint32_t> cs;
- std::stack<const RE*> todo;
+ std::set<uint32_t> cs;
+ std::stack<const RE*> todo;
- std::vector<RE*>::const_iterator
- i = spec.res.begin(),
- e = spec.res.end();
- for (; i != e; ++i) todo.push(*i);
- while (!todo.empty()) {
- const RE *re = todo.top();
- todo.pop();
- switch (re->type) {
- case RE::NIL: break;
- case RE::TAG: break;
- case RE::SYM:
- for (const Range *r = re->sym; r; r = r->next()) {
- cs.insert(r->lower());
- cs.insert(r->upper());
- }
- break;
- case RE::ALT:
- todo.push(re->alt.re2);
- todo.push(re->alt.re1);
- break;
- case RE::CAT:
- todo.push(re->cat.re2);
- todo.push(re->cat.re1);
- break;
- case RE::ITER:
- todo.push(re->iter.re);
- break;
- }
- }
- cs.insert(0);
- cs.insert(spec.opts->encoding.nCodeUnits());
+ std::vector<RE*>::const_iterator
+ i = spec.res.begin(),
+ e = spec.res.end();
+ for (; i != e; ++i) todo.push(*i);
+ while (!todo.empty()) {
+ const RE *re = todo.top();
+ todo.pop();
+ switch (re->type) {
+ case RE::NIL: break;
+ case RE::TAG: break;
+ case RE::SYM:
+ for (const Range *r = re->sym; r; r = r->next()) {
+ cs.insert(r->lower());
+ cs.insert(r->upper());
+ }
+ break;
+ case RE::ALT:
+ todo.push(re->alt.re2);
+ todo.push(re->alt.re1);
+ break;
+ case RE::CAT:
+ todo.push(re->cat.re2);
+ todo.push(re->cat.re1);
+ break;
+ case RE::ITER:
+ todo.push(re->iter.re);
+ break;
+ }
+ }
+ cs.insert(0);
+ cs.insert(spec.opts->encoding.nCodeUnits());
- spec.charset.insert(spec.charset.end(), cs.begin(), cs.end());
+ spec.charset.insert(spec.charset.end(), cs.begin(), cs.end());
}
} // namespace re2c
Tag::Tag(const std::string *nm, bool hi, int32_t ht)
- : name(nm)
- , ncap(Tag::RIGHTMOST)
- , base(Tag::RIGHTMOST)
- , dist(Tag::VARDIST)
- , history(hi)
- , orbit(false)
- , height(ht)
+ : name(nm)
+ , ncap(Tag::RIGHTMOST)
+ , base(Tag::RIGHTMOST)
+ , dist(Tag::VARDIST)
+ , history(hi)
+ , orbit(false)
+ , height(ht)
{}
Tag::Tag(size_t nc, bool ob, int32_t ht)
- : name(NULL)
- , ncap(nc)
- , base(Tag::RIGHTMOST)
- , dist(Tag::VARDIST)
- , history(false)
- , orbit(ob)
- , height(ht)
+ : name(NULL)
+ , ncap(nc)
+ , base(Tag::RIGHTMOST)
+ , dist(Tag::VARDIST)
+ , history(false)
+ , orbit(ob)
+ , height(ht)
{}
} // namespace re2c
struct tag_info_t
{
- uint32_t idx : 31;
- uint32_t neg : 1;
+ uint32_t idx : 31;
+ uint32_t neg : 1;
};
struct Tag
{
- static const size_t RIGHTMOST;
- static const size_t VARDIST;
- static const size_t FICTIVE;
-
- const std::string *name;
- size_t ncap;
- size_t base;
- size_t dist;
- bool history;
- bool orbit;
- int32_t height;
-
- Tag(const std::string *nm, bool hi, int32_t ht);
- Tag(size_t nc, bool ob, int32_t ht);
+ static const size_t RIGHTMOST;
+ static const size_t VARDIST;
+ static const size_t FICTIVE;
+
+ const std::string *name;
+ size_t ncap;
+ size_t base;
+ size_t dist;
+ bool history;
+ bool orbit;
+ int32_t height;
+
+ Tag(const std::string *nm, bool hi, int32_t ht);
+ Tag(size_t nc, bool ob, int32_t ht);
};
inline bool operator == (const tag_info_t &x, const tag_info_t &y)
{
- // per-component comparison is slower
- RE2C_STATIC_ASSERT(sizeof(tag_info_t) == sizeof(uint32_t));
- return *reinterpret_cast<const uint32_t*>(&x)
- == *reinterpret_cast<const uint32_t*>(&y);
+ // per-component comparison is slower
+ RE2C_STATIC_ASSERT(sizeof(tag_info_t) == sizeof(uint32_t));
+ return *reinterpret_cast<const uint32_t*>(&x)
+ == *reinterpret_cast<const uint32_t*>(&y);
}
inline bool fixed(const Tag &tag)
{
- return tag.dist != Tag::VARDIST;
+ return tag.dist != Tag::VARDIST;
}
inline bool fictive(const Tag &tag)
{
- return tag.ncap == Tag::FICTIVE;
+ return tag.ncap == Tag::FICTIVE;
}
inline bool capture(const Tag &tag)
{
- return tag.ncap != Tag::RIGHTMOST;
+ return tag.ncap != Tag::RIGHTMOST;
}
inline bool orbit(const Tag &tag)
{
- return tag.orbit;
+ return tag.orbit;
}
inline bool trailing(const Tag &tag)
{
- return !capture(tag) && tag.name == NULL;
+ return !capture(tag) && tag.name == NULL;
}
inline bool history(const Tag &tag)
{
- return tag.history;
+ return tag.history;
}
} // namespace re2c
// UCF stands for 'undefined control flow'
struct ucf_t
{
- std::valarray<bool> loops;
- std::vector<path_t> paths;
- path_t prefix;
- ucf_size_t size;
+ std::valarray<bool> loops;
+ std::vector<path_t> paths;
+ path_t prefix;
+ ucf_size_t size;
- explicit ucf_t(size_t nnodes): loops(nnodes), paths(),
- prefix(0), size(ucf_size_t::from32(0u)) {}
+ explicit ucf_t(size_t nnodes): loops(nnodes), paths(),
+ prefix(0), size(ucf_size_t::from32(0u)) {}
};
// We don't need all patterns that cause undefined behaviour.
// We only need some examples, the shorter the better.
static void naked_paths(
- const Skeleton &skel,
- ucf_t &ucf,
- size_t i)
+ const Skeleton &skel,
+ ucf_t &ucf,
+ size_t i)
{
- const Node &node = skel.nodes[i];
- bool &loop = ucf.loops[i];
- path_t &prefix = ucf.prefix;
- ucf_size_t &size = ucf.size;
+ const Node &node = skel.nodes[i];
+ bool &loop = ucf.loops[i];
+ path_t &prefix = ucf.prefix;
+ ucf_size_t &size = ucf.size;
- if (node.rule != Rule::NONE) {
- return;
- } else if (node.end()) {
- ucf.paths.push_back(prefix);
- size = size + ucf_size_t::from64(prefix.len());
- } else if (!loop) {
- loop = true;
- Node::arcs_t::const_iterator
- arc = node.arcs.begin(),
- end = node.arcs.end();
- for (; arc != end && !size.overflow(); ++arc) {
- const size_t j = arc->first;
- prefix.push(j);
- naked_paths(skel, ucf, j);
- prefix.pop();
- }
- }
+ if (node.rule != Rule::NONE) {
+ return;
+ } else if (node.end()) {
+ ucf.paths.push_back(prefix);
+ size = size + ucf_size_t::from64(prefix.len());
+ } else if (!loop) {
+ loop = true;
+ Node::arcs_t::const_iterator
+ arc = node.arcs.begin(),
+ end = node.arcs.end();
+ for (; arc != end && !size.overflow(); ++arc) {
+ const size_t j = arc->first;
+ prefix.push(j);
+ naked_paths(skel, ucf, j);
+ prefix.pop();
+ }
+ }
}
void warn_undefined_control_flow(const Skeleton &skel, Warn &warn)
{
- ucf_t ucf(skel.nodes_count);
- naked_paths(skel, ucf, 0);
- if (!ucf.paths.empty()) {
- warn.undefined_control_flow(skel, ucf.paths, ucf.size.overflow());
- } else if (ucf.size.overflow()) {
- warn.fail(Warn::UNDEFINED_CONTROL_FLOW, skel.line,
- "DFA is too large to check undefined control flow");
- }
+ ucf_t ucf(skel.nodes_count);
+ naked_paths(skel, ucf, 0);
+ if (!ucf.paths.empty()) {
+ warn.undefined_control_flow(skel, ucf.paths, ucf.size.overflow());
+ } else if (ucf.size.overflow()) {
+ warn.fail(Warn::UNDEFINED_CONTROL_FLOW, skel.line,
+ "DFA is too large to check undefined control flow");
+ }
}
static void fprint_default_arc(FILE *f, const Node::arc_t &arc)
{
- const size_t ranges = arc.size();
- if (ranges == 1 && arc[0].lower == arc[0].upper) {
- fprintf(f, "\\x%X", arc[0].lower);
- } else {
- fprintf(f, "[");
- for (size_t i = 0; i < ranges; ++i) {
- const uint32_t
- l = arc[i].lower,
- u = arc[i].upper;
- fprintf(f, "\\x%X", l);
- if (l != u) {
- fprintf(f, "-\\x%X", u);
- }
- }
- fprintf(f, "]");
- }
+ const size_t ranges = arc.size();
+ if (ranges == 1 && arc[0].lower == arc[0].upper) {
+ fprintf(f, "\\x%X", arc[0].lower);
+ } else {
+ fprintf(f, "[");
+ for (size_t i = 0; i < ranges; ++i) {
+ const uint32_t
+ l = arc[i].lower,
+ u = arc[i].upper;
+ fprintf(f, "\\x%X", l);
+ if (l != u) {
+ fprintf(f, "-\\x%X", u);
+ }
+ }
+ fprintf(f, "]");
+ }
}
void fprint_default_path(
- FILE *f,
- const Skeleton &skel,
- const path_t &p)
+ FILE *f,
+ const Skeleton &skel,
+ const path_t &p)
{
- fprintf(f, "'");
- const size_t len = p.len();
- for (size_t i = 0; i < len; ++i) {
- if (i > 0) {
- fprintf(f, " ");
- }
- const Node::arc_t &arc = p.arc(skel, i);
- fprint_default_arc(stderr, arc);
- }
- fprintf(f, "'");
+ fprintf(f, "'");
+ const size_t len = p.len();
+ for (size_t i = 0; i < len; ++i) {
+ if (i > 0) {
+ fprintf(f, " ");
+ }
+ const Node::arc_t &arc = p.arc(skel, i);
+ fprint_default_arc(stderr, arc);
+ }
+ fprintf(f, "'");
}
} // namespace re2c
static void exact_uint(OutputFile &o, size_t width)
{
- if (width == sizeof(char)) {
- o.ws("unsigned char");
- } else if (width == sizeof(short)) {
- o.ws("unsigned short");
- } else if (width == sizeof(int)) {
- o.ws("unsigned int");
- } else if (width == sizeof(long)) {
- o.ws("unsigned long");
- } else {
- o.ws("uint").wu64(width * 8).ws("_t");
- }
+ if (width == sizeof(char)) {
+ o.ws("unsigned char");
+ } else if (width == sizeof(short)) {
+ o.ws("unsigned short");
+ } else if (width == sizeof(int)) {
+ o.ws("unsigned int");
+ } else if (width == sizeof(long)) {
+ o.ws("unsigned long");
+ } else {
+ o.ws("uint").wu64(width * 8).ws("_t");
+ }
}
static void from_le(OutputFile &o, uint32_t ind, size_t size, const char *expr)
{
- o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */");
- o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";");
- o.ws("\n").wind(ind).ws(expr).ws(" = p[0]");
- for (uint32_t i = 1; i < size; ++i) {
- o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)");
- }
- o.ws(";");
+ o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */");
+ o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";");
+ o.ws("\n").wind(ind).ws(expr).ws(" = p[0]");
+ for (uint32_t i = 1; i < size; ++i) {
+ o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)");
+ }
+ o.ws(";");
}
void emit_prolog(OutputFile &o)
{
- o.ws("\n#include <stddef.h> /* size_t */");
- o.ws("\n#include <stdio.h>");
- o.ws("\n#include <stdlib.h> /* malloc, free */");
- o.ws("\n#include <string.h> /* memcpy */");
- o.ws("\n");
- o.ws("\nstatic void *read_file");
- o.ws("\n").wind(1).ws("( const char *fname");
- o.ws("\n").wind(1).ws(", size_t unit");
- o.ws("\n").wind(1).ws(", size_t padding");
- o.ws("\n").wind(1).ws(", size_t *pfsize");
- o.ws("\n").wind(1).ws(")");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("void *buffer = NULL;");
- o.ws("\n").wind(1).ws("size_t fsize = 0;");
- o.ws("\n");
- o.ws("\n").wind(1).ws("/* open file */");
- o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");");
- o.ws("\n").wind(1).ws("if(f == NULL) {");
- o.ws("\n").wind(2).ws("goto error;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- o.ws("\n").wind(1).ws("/* get file size */");
- o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);");
- o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;");
- o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);");
- o.ws("\n");
- o.ws("\n").wind(1).ws("/* allocate memory for file and padding */");
- o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));");
- o.ws("\n").wind(1).ws("if (buffer == NULL) {");
- o.ws("\n").wind(2).ws("goto error;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- o.ws("\n").wind(1).ws("/* read the whole file in memory */");
- o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {");
- o.ws("\n").wind(2).ws("goto error;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- o.ws("\n").wind(1).ws("fclose(f);");
- o.ws("\n").wind(1).ws("*pfsize = fsize;");
- o.ws("\n").wind(1).ws("return buffer;");
- o.ws("\n");
- o.ws("\nerror:");
- o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);");
- o.ws("\n").wind(1).ws("free(buffer);");
- o.ws("\n").wind(1).ws("if (f != NULL) {");
- o.ws("\n").wind(2).ws("fclose(f);");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("return NULL;");
- o.ws("\n}");
- o.ws("\n");
+ o.ws("\n#include <stddef.h> /* size_t */");
+ o.ws("\n#include <stdio.h>");
+ o.ws("\n#include <stdlib.h> /* malloc, free */");
+ o.ws("\n#include <string.h> /* memcpy */");
+ o.ws("\n");
+ o.ws("\nstatic void *read_file");
+ o.ws("\n").wind(1).ws("( const char *fname");
+ o.ws("\n").wind(1).ws(", size_t unit");
+ o.ws("\n").wind(1).ws(", size_t padding");
+ o.ws("\n").wind(1).ws(", size_t *pfsize");
+ o.ws("\n").wind(1).ws(")");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("void *buffer = NULL;");
+ o.ws("\n").wind(1).ws("size_t fsize = 0;");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("/* open file */");
+ o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");");
+ o.ws("\n").wind(1).ws("if(f == NULL) {");
+ o.ws("\n").wind(2).ws("goto error;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("/* get file size */");
+ o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);");
+ o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;");
+ o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("/* allocate memory for file and padding */");
+ o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));");
+ o.ws("\n").wind(1).ws("if (buffer == NULL) {");
+ o.ws("\n").wind(2).ws("goto error;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("/* read the whole file in memory */");
+ o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {");
+ o.ws("\n").wind(2).ws("goto error;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("fclose(f);");
+ o.ws("\n").wind(1).ws("*pfsize = fsize;");
+ o.ws("\n").wind(1).ws("return buffer;");
+ o.ws("\n");
+ o.ws("\nerror:");
+ o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);");
+ o.ws("\n").wind(1).ws("free(buffer);");
+ o.ws("\n").wind(1).ws("if (f != NULL) {");
+ o.ws("\n").wind(2).ws("fclose(f);");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("return NULL;");
+ o.ws("\n}");
+ o.ws("\n");
}
void emit_start(OutputFile &o, size_t maxfill, size_t maxnmatch, const std::string &name,
- size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker,
- const std::set<std::string> &stagnames, const std::set<std::string> &stagvars,
- const std::set<std::string> &mtagnames, const std::set<std::string> &mtagvars,
- bitmaps_t &bitmaps)
+ size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker,
+ const std::set<std::string> &stagnames, const std::set<std::string> &stagvars,
+ const std::set<std::string> &mtagnames, const std::set<std::string> &mtagvars,
+ bitmaps_t &bitmaps)
{
- const opt_t *opts = o.block().opts;
- const size_t sizeof_cunit = opts->encoding.szCodeUnit();
- const uint64_t norule = rule2key(Rule::NONE, sizeof_key, def);
- std::string filename = opts->output_file;
- if (filename.empty()) {
- filename = "<stdout>";
- }
+ const opt_t *opts = o.block().opts;
+ const size_t sizeof_cunit = opts->encoding.szCodeUnit();
+ const uint64_t norule = rule2key(Rule::NONE, sizeof_key, def);
+ std::string filename = opts->output_file;
+ if (filename.empty()) {
+ filename = "<stdout>";
+ }
- o.ws("\n#define YYCTYPE ");
- exact_uint (o, sizeof_cunit);
- o.ws("\n#define YYKEYTYPE ");
- exact_uint (o, sizeof_key);
- o.ws("\n#define YYPEEK() *cursor");
- o.ws("\n#define YYSKIP() ++cursor");
- if (backup) {
- o.ws("\n#define YYBACKUP() marker = cursor");
- o.ws("\n#define YYRESTORE() cursor = marker");
- }
- if (oldstyle_ctxmarker) {
- o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor");
- o.ws("\n#define YYRESTORECTX() cursor = ctxmarker");
- }
- if (opts->tags) {
- o.ws("\n#define YYSTAGP(t) t = cursor");
- o.ws("\n#define YYSTAGN(t) t = NULL");
- o.ws("\n#define YYMTAGP(t) yymtag(&t, cursor, &yytp)");
- o.ws("\n#define YYMTAGN(t) yymtag(&t, NULL, &yytp)");
- o.ws("\n#define YYRESTORETAG(t) cursor = t");
- }
- o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n");
- o.ws("\n#define YYFILL(n) { break; }");
- o.ws("\n");
+ o.ws("\n#define YYCTYPE ");
+ exact_uint (o, sizeof_cunit);
+ o.ws("\n#define YYKEYTYPE ");
+ exact_uint (o, sizeof_key);
+ o.ws("\n#define YYPEEK() *cursor");
+ o.ws("\n#define YYSKIP() ++cursor");
+ if (backup) {
+ o.ws("\n#define YYBACKUP() marker = cursor");
+ o.ws("\n#define YYRESTORE() cursor = marker");
+ }
+ if (oldstyle_ctxmarker) {
+ o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor");
+ o.ws("\n#define YYRESTORECTX() cursor = ctxmarker");
+ }
+ if (opts->tags) {
+ o.ws("\n#define YYSTAGP(t) t = cursor");
+ o.ws("\n#define YYSTAGN(t) t = NULL");
+ o.ws("\n#define YYMTAGP(t) yymtag(&t, cursor, &yytp)");
+ o.ws("\n#define YYMTAGN(t) yymtag(&t, NULL, &yytp)");
+ o.ws("\n#define YYRESTORETAG(t) cursor = t");
+ }
+ o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n");
+ o.ws("\n#define YYFILL(n) { break; }");
+ o.ws("\n");
- o.ws("\nstatic int action_").wstring(name);
- o.ws("\n").wind(1).ws("( unsigned *pkix");
- o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys");
- o.ws("\n").wind(1).ws(", const YYCTYPE *start");
- o.ws("\n").wind(1).ws(", const YYCTYPE *token");
- o.ws("\n").wind(1).ws(", const YYCTYPE **cursor");
- o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act");
- o.ws("\n").wind(1).ws(")");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
- o.ws("\n").wind(1).ws("const long pos = token - start;");
- o.ws("\n").wind(1).ws("const long len_act = *cursor - token;");
- o.ws("\n").wind(1).ws("const long len_exp = (long) keys[kix + 1];");
- o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys[kix + 2];");
- o.ws("\n").wind(1).ws("*pkix = kix + 3;");
- o.ws("\n").wind(1).ws("if (rule_exp == ").wu64(norule).ws(") {");
- o.ws("\n").wind(2).ws("fprintf");
- o.ws("\n").wind(3).ws("( stderr");
- o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\"");
- o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\"");
- o.ws("\n").wind(3).ws(", pos");
- o.ws("\n").wind(3).ws(");");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {");
- o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[kix];");
- o.ws("\n").wind(2).ws("*cursor = token + offset;");
- o.ws("\n").wind(2).ws("return 0;");
- o.ws("\n").wind(1).ws("} else {");
- o.ws("\n").wind(2).ws("fprintf");
- o.ws("\n").wind(3).ws("( stderr");
- o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (key %u):\\n\"");
- o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\"");
- o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\"");
- o.ws("\n").wind(3).ws(", pos");
- o.ws("\n").wind(3).ws(", kix");
- o.ws("\n").wind(3).ws(", len_exp");
- o.ws("\n").wind(3).ws(", rule_exp");
- o.ws("\n").wind(3).ws(", len_act");
- o.ws("\n").wind(3).ws(", rule_act");
- o.ws("\n").wind(3).ws(");");
- o.ws("\n").wind(2).ws("return 1;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n}");
+ o.ws("\nstatic int action_").wstring(name);
+ o.ws("\n").wind(1).ws("( unsigned *pkix");
+ o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys");
+ o.ws("\n").wind(1).ws(", const YYCTYPE *start");
+ o.ws("\n").wind(1).ws(", const YYCTYPE *token");
+ o.ws("\n").wind(1).ws(", const YYCTYPE **cursor");
+ o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act");
+ o.ws("\n").wind(1).ws(")");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
+ o.ws("\n").wind(1).ws("const long pos = token - start;");
+ o.ws("\n").wind(1).ws("const long len_act = *cursor - token;");
+ o.ws("\n").wind(1).ws("const long len_exp = (long) keys[kix + 1];");
+ o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys[kix + 2];");
+ o.ws("\n").wind(1).ws("*pkix = kix + 3;");
+ o.ws("\n").wind(1).ws("if (rule_exp == ").wu64(norule).ws(") {");
+ o.ws("\n").wind(2).ws("fprintf");
+ o.ws("\n").wind(3).ws("( stderr");
+ o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\"");
+ o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\"");
+ o.ws("\n").wind(3).ws(", pos");
+ o.ws("\n").wind(3).ws(");");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {");
+ o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[kix];");
+ o.ws("\n").wind(2).ws("*cursor = token + offset;");
+ o.ws("\n").wind(2).ws("return 0;");
+ o.ws("\n").wind(1).ws("} else {");
+ o.ws("\n").wind(2).ws("fprintf");
+ o.ws("\n").wind(3).ws("( stderr");
+ o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (key %u):\\n\"");
+ o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\"");
+ o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\"");
+ o.ws("\n").wind(3).ws(", pos");
+ o.ws("\n").wind(3).ws(", kix");
+ o.ws("\n").wind(3).ws(", len_exp");
+ o.ws("\n").wind(3).ws(", rule_exp");
+ o.ws("\n").wind(3).ws(", len_act");
+ o.ws("\n").wind(3).ws(", rule_act");
+ o.ws("\n").wind(3).ws(");");
+ o.ws("\n").wind(2).ws("return 1;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n}");
- if (!stagnames.empty()) {
- o.ws("\n");
- o.ws("\nstatic int check_stag_").wstring(name)
- .ws("(unsigned *pkix, YYKEYTYPE *keys, const YYCTYPE *tag,\n")
- .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
- o.ws("\n").wind(1).ws("const YYKEYTYPE\n")
- .wind(2).ws("exp = keys[kix],\n")
- .wind(2).ws("act = (YYKEYTYPE)(tag - token),\n")
- .wind(2).ws("NIL = (YYKEYTYPE)~0u;");
- o.ws("\n").wind(1).ws("*pkix = kix + 1;");
- o.ws("\n");
- o.ws("\n").wind(1).ws("if (exp == act || (exp == NIL && tag == NULL)) return 0;");
- o.ws("\n");
- o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
- .ws("\n").wind(2).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",")
- .ws("\n").wind(2).ws("token - input, kix, name, exp, act);");
- o.ws("\n").wind(1).ws("return 1;");
- o.ws("\n}");
- }
+ if (!stagnames.empty()) {
+ o.ws("\n");
+ o.ws("\nstatic int check_stag_").wstring(name)
+ .ws("(unsigned *pkix, YYKEYTYPE *keys, const YYCTYPE *tag,\n")
+ .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
+ o.ws("\n").wind(1).ws("const YYKEYTYPE\n")
+ .wind(2).ws("exp = keys[kix],\n")
+ .wind(2).ws("act = (YYKEYTYPE)(tag - token),\n")
+ .wind(2).ws("NIL = (YYKEYTYPE)~0u;");
+ o.ws("\n").wind(1).ws("*pkix = kix + 1;");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("if (exp == act || (exp == NIL && tag == NULL)) return 0;");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
+ .ws("\n").wind(2).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",")
+ .ws("\n").wind(2).ws("token - input, kix, name, exp, act);");
+ o.ws("\n").wind(1).ws("return 1;");
+ o.ws("\n}");
+ }
- if (!mtagnames.empty()) {
- o.ws("\n");
- o.ws("\ntypedef struct yymtag_t {");
- o.ws("\n").wind(1).ws("struct yymtag_t *pred;");
- o.ws("\n").wind(1).ws("const YYCTYPE *elem;");
- o.ws("\n} yymtag_t;");
+ if (!mtagnames.empty()) {
+ o.ws("\n");
+ o.ws("\ntypedef struct yymtag_t {");
+ o.ws("\n").wind(1).ws("struct yymtag_t *pred;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *elem;");
+ o.ws("\n} yymtag_t;");
- o.ws("\n");
- o.ws("\ntypedef struct yymtagpool_t {");
- o.ws("\n").wind(1).ws("yymtag_t *head;");
- o.ws("\n").wind(1).ws("yymtag_t *next;");
- o.ws("\n").wind(1).ws("yymtag_t *last;");
- o.ws("\n} yymtagpool_t;");
+ o.ws("\n");
+ o.ws("\ntypedef struct yymtagpool_t {");
+ o.ws("\n").wind(1).ws("yymtag_t *head;");
+ o.ws("\n").wind(1).ws("yymtag_t *next;");
+ o.ws("\n").wind(1).ws("yymtag_t *last;");
+ o.ws("\n} yymtagpool_t;");
- o.ws("\n");
- o.ws("\nstatic void yymtagpool_clear(yymtagpool_t *tp)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("tp->next = tp->head;");
- o.ws("\n}");
+ o.ws("\n");
+ o.ws("\nstatic void yymtagpool_clear(yymtagpool_t *tp)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("tp->next = tp->head;");
+ o.ws("\n}");
- o.ws("\n");
- o.ws("\nstatic void yymtagpool_init(yymtagpool_t *tp)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("static const unsigned size = 256;");
- o.ws("\n").wind(1).ws("tp->head = (yymtag_t*)malloc(size * sizeof(yymtag_t));");
- o.ws("\n").wind(1).ws("tp->next = tp->head;");
- o.ws("\n").wind(1).ws("tp->last = tp->head + size;");
- o.ws("\n}");
+ o.ws("\n");
+ o.ws("\nstatic void yymtagpool_init(yymtagpool_t *tp)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("static const unsigned size = 256;");
+ o.ws("\n").wind(1).ws("tp->head = (yymtag_t*)malloc(size * sizeof(yymtag_t));");
+ o.ws("\n").wind(1).ws("tp->next = tp->head;");
+ o.ws("\n").wind(1).ws("tp->last = tp->head + size;");
+ o.ws("\n}");
- o.ws("\n");
- o.ws("\nstatic void yymtagpool_free(yymtagpool_t *tp)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("free(tp->head);");
- o.ws("\n").wind(1).ws("tp->head = tp->next = tp->last = NULL;");
- o.ws("\n}");
+ o.ws("\n");
+ o.ws("\nstatic void yymtagpool_free(yymtagpool_t *tp)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("free(tp->head);");
+ o.ws("\n").wind(1).ws("tp->head = tp->next = tp->last = NULL;");
+ o.ws("\n}");
- o.ws("\n");
- o.ws("\nstatic yymtag_t *yymtagpool_next(yymtagpool_t *tp)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("if (tp->next == tp->last) {");
- o.ws("\n").wind(2).ws("const unsigned size = tp->last - tp->head;");
- o.ws("\n").wind(2).ws("yymtag_t *head = (yymtag_t*)malloc(2 * size * sizeof(yymtag_t));");
- o.ws("\n").wind(2).ws("memcpy(head, tp->head, size * sizeof(yymtag_t));");
- o.ws("\n").wind(2).ws("free(tp->head);");
- o.ws("\n").wind(2).ws("tp->head = head;");
- o.ws("\n").wind(2).ws("tp->next = head + size;");
- o.ws("\n").wind(2).ws("tp->last = head + size * 2;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("return tp->next++;");
- o.ws("\n}");
+ o.ws("\n");
+ o.ws("\nstatic yymtag_t *yymtagpool_next(yymtagpool_t *tp)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("if (tp->next == tp->last) {");
+ o.ws("\n").wind(2).ws("const unsigned size = tp->last - tp->head;");
+ o.ws("\n").wind(2).ws("yymtag_t *head = (yymtag_t*)malloc(2 * size * sizeof(yymtag_t));");
+ o.ws("\n").wind(2).ws("memcpy(head, tp->head, size * sizeof(yymtag_t));");
+ o.ws("\n").wind(2).ws("free(tp->head);");
+ o.ws("\n").wind(2).ws("tp->head = head;");
+ o.ws("\n").wind(2).ws("tp->next = head + size;");
+ o.ws("\n").wind(2).ws("tp->last = head + size * 2;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("return tp->next++;");
+ o.ws("\n}");
- o.ws("\n");
- o.ws("\nstatic void yymtag(yymtag_t **pt, const YYCTYPE *t, yymtagpool_t *tp)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("yymtag_t *n = yymtagpool_next(tp);");
- o.ws("\n").wind(1).ws("n->pred = *pt;");
- o.ws("\n").wind(1).ws("n->elem = t;");
- o.ws("\n").wind(1).ws("*pt = n;");
- o.ws("\n}");
+ o.ws("\n");
+ o.ws("\nstatic void yymtag(yymtag_t **pt, const YYCTYPE *t, yymtagpool_t *tp)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("yymtag_t *n = yymtagpool_next(tp);");
+ o.ws("\n").wind(1).ws("n->pred = *pt;");
+ o.ws("\n").wind(1).ws("n->elem = t;");
+ o.ws("\n").wind(1).ws("*pt = n;");
+ o.ws("\n}");
- o.ws("\n");
- o.ws("\nstatic int check_mtag_").wstring(name)
- .ws("(unsigned *pkix, YYKEYTYPE *keys, const yymtag_t *mtag,\n")
- .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)");
- o.ws("\n{");
-// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(1) && return 1;");
- o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
- o.ws("\n").wind(1).ws("YYKEYTYPE n = keys[kix];");
- o.ws("\n").wind(1).ws("*pkix = kix + n + 1;");
-// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(n) && return 1;");
- o.ws("\n").wind(1).ws("for (; n > 0; --n) {");
- o.ws("\n").wind(2).ws("if (mtag == NULL) {");
- o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
- .ws("\n").wind(4).ws("\"history for tag '%s' is too short\\n\",")
- .ws("\n").wind(4).ws("token - input, kix + n, name);");
- o.ws("\n").wind(3).ws("return 1;");
- o.ws("\n").wind(2).ws("}");
- o.ws("\n").wind(2).ws("const YYCTYPE *tag = mtag->elem;");
- o.ws("\n").wind(2).ws("mtag = mtag->pred;");
- o.ws("\n").wind(2).ws("const YYKEYTYPE\n")
- .wind(3).ws("exp = keys[kix + n],\n")
- .wind(3).ws("act = (YYKEYTYPE)(tag - token),\n")
- .wind(3).ws("NIL = (YYKEYTYPE)~0u;");
- o.ws("\n").wind(2).ws("if (!(exp == act || (exp == NIL && tag == NULL))) {");
- o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
- .ws("\n").wind(4).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",")
- .ws("\n").wind(4).ws("token - input, kix + n, name, exp, act);");
- o.ws("\n").wind(3).ws("return 1;");
- o.ws("\n").wind(2).ws("}");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("if (mtag != NULL) {");
- o.ws("\n").wind(2).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
- .ws("\n").wind(3).ws("\"history for tag '%s' is too long\\n\",")
- .ws("\n").wind(3).ws("token - input, kix, name);");
- o.ws("\n").wind(2).ws("return 1;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("return 0;");
- o.ws("\n}");
- }
+ o.ws("\n");
+ o.ws("\nstatic int check_mtag_").wstring(name)
+ .ws("(unsigned *pkix, YYKEYTYPE *keys, const yymtag_t *mtag,\n")
+ .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)");
+ o.ws("\n{");
+// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(1) && return 1;");
+ o.ws("\n").wind(1).ws("const unsigned kix = *pkix;");
+ o.ws("\n").wind(1).ws("YYKEYTYPE n = keys[kix];");
+ o.ws("\n").wind(1).ws("*pkix = kix + n + 1;");
+// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(n) && return 1;");
+ o.ws("\n").wind(1).ws("for (; n > 0; --n) {");
+ o.ws("\n").wind(2).ws("if (mtag == NULL) {");
+ o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
+ .ws("\n").wind(4).ws("\"history for tag '%s' is too short\\n\",")
+ .ws("\n").wind(4).ws("token - input, kix + n, name);");
+ o.ws("\n").wind(3).ws("return 1;");
+ o.ws("\n").wind(2).ws("}");
+ o.ws("\n").wind(2).ws("const YYCTYPE *tag = mtag->elem;");
+ o.ws("\n").wind(2).ws("mtag = mtag->pred;");
+ o.ws("\n").wind(2).ws("const YYKEYTYPE\n")
+ .wind(3).ws("exp = keys[kix + n],\n")
+ .wind(3).ws("act = (YYKEYTYPE)(tag - token),\n")
+ .wind(3).ws("NIL = (YYKEYTYPE)~0u;");
+ o.ws("\n").wind(2).ws("if (!(exp == act || (exp == NIL && tag == NULL))) {");
+ o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
+ .ws("\n").wind(4).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",")
+ .ws("\n").wind(4).ws("token - input, kix + n, name, exp, act);");
+ o.ws("\n").wind(3).ws("return 1;");
+ o.ws("\n").wind(2).ws("}");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("if (mtag != NULL) {");
+ o.ws("\n").wind(2).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"")
+ .ws("\n").wind(3).ws("\"history for tag '%s' is too long\\n\",")
+ .ws("\n").wind(3).ws("token - input, kix, name);");
+ o.ws("\n").wind(2).ws("return 1;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("return 0;");
+ o.ws("\n}");
+ }
- o.ws("\n");
- o.ws("\nstatic int check_key_count_").wstring(name).ws("(unsigned have, unsigned used, unsigned need)");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("if (used + need <= have) return 0;");
- o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": not enough keys\\n\");");
- o.ws("\n").wind(1).ws("return 1;");
- o.ws("\n}");
- o.ws("\n");
+ o.ws("\n");
+ o.ws("\nstatic int check_key_count_").wstring(name).ws("(unsigned have, unsigned used, unsigned need)");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("if (used + need <= have) return 0;");
+ o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": not enough keys\\n\");");
+ o.ws("\n").wind(1).ws("return 1;");
+ o.ws("\n}");
+ o.ws("\n");
- o.ws("\nint lex_").wstring(name).ws("()");
- o.ws("\n{");
- o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */");
- o.ws("\n").wind(1).ws("int status = 0;");
- o.ws("\n").wind(1).ws("size_t input_len = 0;");
- o.ws("\n").wind(1).ws("size_t keys_count = 0;");
- o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;");
- o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;");
- o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;");
- o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;");
- o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;");
- o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;");
- if (opts->posix_captures) {
- o.ws("\n").wind(1).ws("size_t yynmatch;");
- o.ws("\n").wind(1).ws("const YYCTYPE *yypmatch[").wu64(maxnmatch).ws(" * 2];");
- }
- o.ws("\n").wind(1).ws("unsigned int i = 0;");
- if (!mtagnames.empty()) {
- o.ws("\n");
- o.ws("\n").wind(1).ws("yymtagpool_t yytp;");
- o.ws("\n").wind(1).ws("yymtagpool_init(&yytp);");
- }
- o.ws("\n");
- o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file");
- o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".input\"");
- o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)");
- o.ws("\n").wind(2).ws(", padding");
- o.ws("\n").wind(2).ws(", &input_len");
- o.ws("\n").wind(2).ws(");");
- o.ws("\n").wind(1).ws("if (input == NULL) {");
- o.ws("\n").wind(2).ws("status = 1;");
- o.ws("\n").wind(2).ws("goto end;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- if (sizeof_cunit > 1) {
- o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {");
- from_le(o, 2, sizeof_cunit, "input[i]");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- }
- o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file");
- o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".keys\"");
- o.ws("\n").wind(2).ws(", sizeof (YYKEYTYPE)");
- o.ws("\n").wind(2).ws(", 0");
- o.ws("\n").wind(2).ws(", &keys_count");
- o.ws("\n").wind(2).ws(");");
- o.ws("\n").wind(1).ws("if (keys == NULL) {");
- o.ws("\n").wind(2).ws("status = 1;");
- o.ws("\n").wind(2).ws("goto end;");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- if (sizeof_key > 1)
- {
- o.ws("\n").wind(1).ws("for (i = 0; i < keys_count; ++i) {");
- from_le(o, 2, sizeof_key, "keys[i]");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- }
- o.ws("\n").wind(1).ws("cursor = input;");
- o.ws("\n").wind(1).ws("limit = input + input_len + padding;");
- o.ws("\n").wind(1).ws("eof = input + input_len;");
- o.ws("\n");
- o.ws("\n").wind(1).ws("for (i = 0; status == 0 && cursor < eof && i < keys_count;) {");
- o.ws("\n").wind(2).ws("token = cursor;");
- if (backup) {
- o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;");
- }
- if (oldstyle_ctxmarker) {
- o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;");
- }
- o.ws("\n").wind(2).ws("YYCTYPE yych;");
- if (accept) {
- o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;");
- }
+ o.ws("\nint lex_").wstring(name).ws("()");
+ o.ws("\n{");
+ o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */");
+ o.ws("\n").wind(1).ws("int status = 0;");
+ o.ws("\n").wind(1).ws("size_t input_len = 0;");
+ o.ws("\n").wind(1).ws("size_t keys_count = 0;");
+ o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;");
+ o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;");
+ if (opts->posix_captures) {
+ o.ws("\n").wind(1).ws("size_t yynmatch;");
+ o.ws("\n").wind(1).ws("const YYCTYPE *yypmatch[").wu64(maxnmatch).ws(" * 2];");
+ }
+ o.ws("\n").wind(1).ws("unsigned int i = 0;");
+ if (!mtagnames.empty()) {
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("yymtagpool_t yytp;");
+ o.ws("\n").wind(1).ws("yymtagpool_init(&yytp);");
+ }
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file");
+ o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".input\"");
+ o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)");
+ o.ws("\n").wind(2).ws(", padding");
+ o.ws("\n").wind(2).ws(", &input_len");
+ o.ws("\n").wind(2).ws(");");
+ o.ws("\n").wind(1).ws("if (input == NULL) {");
+ o.ws("\n").wind(2).ws("status = 1;");
+ o.ws("\n").wind(2).ws("goto end;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ if (sizeof_cunit > 1) {
+ o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {");
+ from_le(o, 2, sizeof_cunit, "input[i]");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ }
+ o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file");
+ o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".keys\"");
+ o.ws("\n").wind(2).ws(", sizeof (YYKEYTYPE)");
+ o.ws("\n").wind(2).ws(", 0");
+ o.ws("\n").wind(2).ws(", &keys_count");
+ o.ws("\n").wind(2).ws(");");
+ o.ws("\n").wind(1).ws("if (keys == NULL) {");
+ o.ws("\n").wind(2).ws("status = 1;");
+ o.ws("\n").wind(2).ws("goto end;");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ if (sizeof_key > 1)
+ {
+ o.ws("\n").wind(1).ws("for (i = 0; i < keys_count; ++i) {");
+ from_le(o, 2, sizeof_key, "keys[i]");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ }
+ o.ws("\n").wind(1).ws("cursor = input;");
+ o.ws("\n").wind(1).ws("limit = input + input_len + padding;");
+ o.ws("\n").wind(1).ws("eof = input + input_len;");
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("for (i = 0; status == 0 && cursor < eof && i < keys_count;) {");
+ o.ws("\n").wind(2).ws("token = cursor;");
+ if (backup) {
+ o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;");
+ }
+ if (oldstyle_ctxmarker) {
+ o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;");
+ }
+ o.ws("\n").wind(2).ws("YYCTYPE yych;");
+ if (accept) {
+ o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;");
+ }
- // autogenerated stag variables
- ConfTags conf("\n" + indent(2, opts->indString) + "const YYCTYPE *@@ = NULL;", "");
- output_tags(o.stream(), 0, conf, stagnames, opts);
- // user-defined stag variables
- std::set<std::string>::const_iterator
- var1 = stagvars.begin(),
- var2 = stagvars.end();
- if (var1 != var2) {
- o.ws("\n").wind(2).ws("const YYCTYPE *").wstring(*var1);
- for (++var1; var1 != var2; ++var1) {
- o.ws(", *").wstring(*var1);
- }
- o.ws(";");
- }
- if (!mtagnames.empty()) {
- o.ws("\n").wind(2).ws("yymtagpool_clear(&yytp);");
- // autogenerated mtag variables
- conf.format = "yymtag_t *@@ = NULL;";
- output_tags(o.stream(), 0, conf, mtagnames, opts);
- // user-defined mtag variables
- var1 = mtagvars.begin();
- var2 = mtagvars.end();
- if (var1 != var2) {
- o.ws("\n").wind(2).ws("yymtag_t *").wstring(*var1);
- for (++var1; var1 != var2; ++var1) {
- o.ws(", *").wstring(*var1);
- }
- o.ws(";");
- }
- }
+ // autogenerated stag variables
+ ConfTags conf("\n" + indent(2, opts->indString) + "const YYCTYPE *@@ = NULL;", "");
+ output_tags(o.stream(), 0, conf, stagnames, opts);
+ // user-defined stag variables
+ std::set<std::string>::const_iterator
+ var1 = stagvars.begin(),
+ var2 = stagvars.end();
+ if (var1 != var2) {
+ o.ws("\n").wind(2).ws("const YYCTYPE *").wstring(*var1);
+ for (++var1; var1 != var2; ++var1) {
+ o.ws(", *").wstring(*var1);
+ }
+ o.ws(";");
+ }
+ if (!mtagnames.empty()) {
+ o.ws("\n").wind(2).ws("yymtagpool_clear(&yytp);");
+ // autogenerated mtag variables
+ conf.format = "yymtag_t *@@ = NULL;";
+ output_tags(o.stream(), 0, conf, mtagnames, opts);
+ // user-defined mtag variables
+ var1 = mtagvars.begin();
+ var2 = mtagvars.end();
+ if (var1 != var2) {
+ o.ws("\n").wind(2).ws("yymtag_t *").wstring(*var1);
+ for (++var1; var1 != var2; ++var1) {
+ o.ws(", *").wstring(*var1);
+ }
+ o.ws(";");
+ }
+ }
- o.ws("\n");
- if (opts->bFlag) {
- bitmaps.gen(o, 2);
- }
- o.ws("\n");
+ o.ws("\n");
+ if (opts->bFlag) {
+ bitmaps.gen(o, 2);
+ }
+ o.ws("\n");
}
void emit_end(OutputFile &o, const std::string &name, bool backup, bool oldstyle_ctxmarker,
- const std::set<std::string> &mtagnames)
+ const std::set<std::string> &mtagnames)
{
- o.ws("\n").wind(1).ws("}");
- o.ws("\n").wind(1).ws("if (status == 0) {");
- o.ws("\n").wind(2).ws("if (cursor != eof) {");
- o.ws("\n").wind(3).ws("status = 1;");
- o.ws("\n").wind(3).ws("const long pos = token - input;");
- o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);");
- o.ws("\n").wind(2).ws("}");
- o.ws("\n").wind(2).ws("if (i != keys_count) {");
- o.ws("\n").wind(3).ws("status = 1;");
- o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u keys\\n\", i);");
- o.ws("\n").wind(2).ws("}");
- o.ws("\n").wind(1).ws("}");
- o.ws("\n");
- o.ws("\nend:");
- o.ws("\n").wind(1).ws("free(input);");
- o.ws("\n").wind(1).ws("free(keys);");
- if (!mtagnames.empty()) {
- o.ws("\n").wind(1).ws("yymtagpool_free(&yytp);");
- }
- o.ws("\n");
- o.ws("\n").wind(1).ws("return status;");
- o.ws("\n}");
- o.ws("\n");
- o.ws("\n#undef YYCTYPE");
- o.ws("\n#undef YYKEYTYPE");
- o.ws("\n#undef YYPEEK");
- o.ws("\n#undef YYSKIP");
- if (backup) {
- o.ws("\n#undef YYBACKUP");
- o.ws("\n#undef YYRESTORE");
- }
- if (oldstyle_ctxmarker) {
- o.ws("\n#undef YYBACKUPCTX");
- o.ws("\n#undef YYRESTORECTX");
- }
- if (o.block().opts->tags) {
- o.ws("\n#undef YYBACKUPTAG");
- o.ws("\n#undef YYRESTORETAG");
- o.ws("\n#undef YYCOPYTAG");
- }
- o.ws("\n#undef YYLESSTHAN");
- o.ws("\n#undef YYFILL");
- o.ws("\n");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n").wind(1).ws("if (status == 0) {");
+ o.ws("\n").wind(2).ws("if (cursor != eof) {");
+ o.ws("\n").wind(3).ws("status = 1;");
+ o.ws("\n").wind(3).ws("const long pos = token - input;");
+ o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);");
+ o.ws("\n").wind(2).ws("}");
+ o.ws("\n").wind(2).ws("if (i != keys_count) {");
+ o.ws("\n").wind(3).ws("status = 1;");
+ o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u keys\\n\", i);");
+ o.ws("\n").wind(2).ws("}");
+ o.ws("\n").wind(1).ws("}");
+ o.ws("\n");
+ o.ws("\nend:");
+ o.ws("\n").wind(1).ws("free(input);");
+ o.ws("\n").wind(1).ws("free(keys);");
+ if (!mtagnames.empty()) {
+ o.ws("\n").wind(1).ws("yymtagpool_free(&yytp);");
+ }
+ o.ws("\n");
+ o.ws("\n").wind(1).ws("return status;");
+ o.ws("\n}");
+ o.ws("\n");
+ o.ws("\n#undef YYCTYPE");
+ o.ws("\n#undef YYKEYTYPE");
+ o.ws("\n#undef YYPEEK");
+ o.ws("\n#undef YYSKIP");
+ if (backup) {
+ o.ws("\n#undef YYBACKUP");
+ o.ws("\n#undef YYRESTORE");
+ }
+ if (oldstyle_ctxmarker) {
+ o.ws("\n#undef YYBACKUPCTX");
+ o.ws("\n#undef YYRESTORECTX");
+ }
+ if (o.block().opts->tags) {
+ o.ws("\n#undef YYBACKUPTAG");
+ o.ws("\n#undef YYRESTORETAG");
+ o.ws("\n#undef YYCOPYTAG");
+ }
+ o.ws("\n#undef YYLESSTHAN");
+ o.ws("\n#undef YYFILL");
+ o.ws("\n");
}
void emit_epilog(OutputFile &o, const std::set<std::string> &names)
{
- o.ws("\n").ws("int main()");
- o.ws("\n").ws("{");
+ o.ws("\n").ws("int main()");
+ o.ws("\n").ws("{");
- for (std::set<std::string>::const_iterator i = names.begin(); i != names.end(); ++i) {
- o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {");
- o.ws("\n").wind(2).ws("return 1;");
- o.ws("\n").wind(1).ws("}");
- }
+ for (std::set<std::string>::const_iterator i = names.begin(); i != names.end(); ++i) {
+ o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {");
+ o.ws("\n").wind(2).ws("return 1;");
+ o.ws("\n").wind(1).ws("}");
+ }
- o.ws("\n").wind(1).ws("return 0;");
- o.ws("\n}");
- o.ws("\n");
+ o.ws("\n").wind(1).ws("return 0;");
+ o.ws("\n}");
+ o.ws("\n");
}
void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid)
{
- const std::string &name = dfa.name;
- const Rule &r = dfa.rules[rid];
- const uint64_t rkey = rule2key(rid, dfa.key_size, dfa.def_rule);
- size_t ntag = 3;
- for (size_t t = r.ltag; t < r.htag; ++t) {
- const Tag &tag = dfa.tags[t];
- if (t != r.ttag && !fictive(tag)) ++ntag;
- }
+ const std::string &name = dfa.name;
+ const Rule &r = dfa.rules[rid];
+ const uint64_t rkey = rule2key(rid, dfa.key_size, dfa.def_rule);
+ size_t ntag = 3;
+ for (size_t t = r.ltag; t < r.htag; ++t) {
+ const Tag &tag = dfa.tags[t];
+ if (t != r.ttag && !fictive(tag)) ++ntag;
+ }
- o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ")
- .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name)
- .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")");
+ o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ")
+ .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name)
+ .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")");
- for (size_t t = r.ltag; t < r.htag; ++t) {
- const Tag &tag = dfa.tags[t];
- if (t == r.ttag || fictive(tag)) continue;
- const std::string tname = tagname(tag),
- prefix = history(tag) ? "m" : "s";
- o.ws("\n").wind(ind + 1).ws(" || check_").wstring(prefix).ws("tag").ws("_").wstring(name)
- .ws("(&i, keys, ").wstring(tname).ws(", input, token, \"")
- .wstring(tname).ws("\")");
- }
+ for (size_t t = r.ltag; t < r.htag; ++t) {
+ const Tag &tag = dfa.tags[t];
+ if (t == r.ttag || fictive(tag)) continue;
+ const std::string tname = tagname(tag),
+ prefix = history(tag) ? "m" : "s";
+ o.ws("\n").wind(ind + 1).ws(" || check_").wstring(prefix).ws("tag").ws("_").wstring(name)
+ .ws("(&i, keys, ").wstring(tname).ws(", input, token, \"")
+ .wstring(tname).ws("\")");
+ }
- o.ws(";\n");
- o.wind(ind).ws("continue;\n");
+ o.ws(";\n");
+ o.wind(ind).ws("continue;\n");
}
} // namespace re2c
struct cover_t
{
- FILE *input;
- FILE *keys;
- std::vector<uint8_t> loops;
- std::vector<suffix_t> suffixes;
- path_t prefix;
- cover_size_t size;
-
- cover_t(FILE *fi, FILE *fk, size_t nnodes):
- input(fi), keys(fk), loops(nnodes),
- suffixes(nnodes), prefix(0),
- size(cover_size_t::from32(0u)) {}
-
- FORBID_COPY(cover_t);
+ FILE *input;
+ FILE *keys;
+ std::vector<uint8_t> loops;
+ std::vector<suffix_t> suffixes;
+ path_t prefix;
+ cover_size_t size;
+
+ cover_t(FILE *fi, FILE *fk, size_t nnodes):
+ input(fi), keys(fk), loops(nnodes),
+ suffixes(nnodes), prefix(0),
+ size(cover_size_t::from32(0u)) {}
+
+ FORBID_COPY(cover_t);
};
template<typename uintn_t> static uintn_t to_le(uintn_t n)
{
- uintn_t m;
- uint8_t *p = reinterpret_cast<uint8_t*>(&m);
- for (size_t i = 0; i < sizeof(uintn_t); ++i) {
- p[i] = static_cast<uint8_t>(n >> (i * 8));
- }
- return m;
+ uintn_t m;
+ uint8_t *p = reinterpret_cast<uint8_t*>(&m);
+ for (size_t i = 0; i < sizeof(uintn_t); ++i) {
+ p[i] = static_cast<uint8_t>(n >> (i * 8));
+ }
+ return m;
}
// pick at most 0x100 unique edges from this range
// - values should be deterministic
static uint32_t step(uint32_t lower, uint32_t upper)
{
- return 1 + (upper - lower) / 0x100;
+ return 1 + (upper - lower) / 0x100;
}
static uint32_t nsteps(uint32_t lower, uint32_t upper)
{
- return 2 + (upper - lower - 1) / step(lower, upper);
+ return 2 + (upper - lower - 1) / step(lower, upper);
}
static void apply(std::vector<size_t> *tags, const tcmd_t *cmd, size_t pos)
{
- for (const tcmd_t *p = cmd; p; p = p->next) {
- const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
- std::vector<size_t> &t = tags[l];
- if (tcmd_t::iscopy(p)) {
- t = tags[r];
- } else if (tcmd_t::isset(p)) {
- t.clear();
- t.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos);
- } else {
- if (l != r) t = tags[r];
- std::vector<size_t> x;
- for (; *h != TAGVER_ZERO; ++h) {
- x.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos);
- }
- t.insert(t.end(), x.rbegin(), x.rend());
- }
- }
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
+ std::vector<size_t> &t = tags[l];
+ if (tcmd_t::iscopy(p)) {
+ t = tags[r];
+ } else if (tcmd_t::isset(p)) {
+ t.clear();
+ t.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos);
+ } else {
+ if (l != r) t = tags[r];
+ std::vector<size_t> x;
+ for (; *h != TAGVER_ZERO; ++h) {
+ x.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos);
+ }
+ t.insert(t.end(), x.rbegin(), x.rend());
+ }
+ }
}
static size_t path_width(const path_t &path, const Skeleton &skel)
{
- size_t width = 0;
- for (size_t i = 0; i < path.len(); ++i) {
-
- // width of multiarc: total number of characters picked from all ranges
- size_t w = 0;
- const Node::arc_t &arc = path.arc(skel, i);
- for (Node::citer_t a = arc.begin(); a != arc.end(); ++a) {
- w += nsteps(a->lower, a->upper);
- }
-
- // width of multipath: maximal width of multiarc
- width = std::max(width, w);
- }
- return width;
+ size_t width = 0;
+ for (size_t i = 0; i < path.len(); ++i) {
+
+ // width of multiarc: total number of characters picked from all ranges
+ size_t w = 0;
+ const Node::arc_t &arc = path.arc(skel, i);
+ for (Node::citer_t a = arc.begin(); a != arc.end(); ++a) {
+ w += nsteps(a->lower, a->upper);
+ }
+
+ // width of multipath: maximal width of multiarc
+ width = std::max(width, w);
+ }
+ return width;
}
template<typename cunit_t>
static void write_input(const path_t &path, const Skeleton &skel,
- size_t width, FILE *file)
+ size_t width, FILE *file)
{
- const size_t
- len = path.len(),
- size = len * width;
- cunit_t *buffer = new cunit_t[size];
-
- // pick characters from ranges
- for (size_t i = 0; i < len; ++i) {
- Node::wciter_t a(path.arc(skel, i));
- for (size_t w = 0; w < width; ++a) {
- const uint32_t
- l = a->lower,
- u = a->upper,
- d = step(l, u);
- for (uint32_t m = l; m < u + d && w < width; m += d, ++w) {
- buffer[w * len + i] = to_le(static_cast<cunit_t>(std::min(m, u)));
- }
- }
- }
-
- fwrite(buffer, sizeof(cunit_t), size, file);
-
- delete[] buffer;
+ const size_t
+ len = path.len(),
+ size = len * width;
+ cunit_t *buffer = new cunit_t[size];
+
+ // pick characters from ranges
+ for (size_t i = 0; i < len; ++i) {
+ Node::wciter_t a(path.arc(skel, i));
+ for (size_t w = 0; w < width; ++a) {
+ const uint32_t
+ l = a->lower,
+ u = a->upper,
+ d = step(l, u);
+ for (uint32_t m = l; m < u + d && w < width; m += d, ++w) {
+ buffer[w * len + i] = to_le(static_cast<cunit_t>(std::min(m, u)));
+ }
+ }
+ }
+
+ fwrite(buffer, sizeof(cunit_t), size, file);
+
+ delete[] buffer;
}
template<typename key_t>
static void write_keys(const path_t &path, const Skeleton &skel,
- size_t width, FILE *file)
+ size_t width, FILE *file)
{
- // find last accepting node
- size_t f;
- for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f);
-
- // calculate tags: start with default and apply commands step by step
- const size_t
- nver = skel.ntagver,
- ntag = width * nver,
- offby = skel.opts->lookahead ? 0 : 1;
- std::vector<size_t> *tags = new std::vector<size_t>[ntag];
- for (size_t w = 0; w < width; ++w) {
- apply(&tags[w * nver], skel.cmd0, 0); // absent in LATDFA
- }
- for (size_t i = 0; i < f; ++i) {
- Node::wciter_t a(path.arc(skel, i));
- for (size_t w = 0; w < width; ++a) {
- uint32_t n = nsteps(a->lower, a->upper);
- for (; n --> 0 && w < width; ++w) {
- apply(&tags[w * nver], a->cmd, i + offby);
- }
- }
- }
- const tcmd_t *fcmd = path.node(skel, f).cmd;
- for (size_t w = 0; w < width; ++w) {
- apply(&tags[w * nver], fcmd, f); // only present in LATDFA
- }
-
- const size_t rule = path.node(skel, f).rule;
- size_t matched = 0, ltag = 0, htag = 0, trail = 0;
- if (rule != Rule::NONE) {
-
- const Rule &r = skel.rules[rule];
- ltag = r.ltag;
- htag = r.htag;
- trail = r.ttag;
-
- // matched length might depend on tag values
- if (trail == htag) {
- matched = f;
- } else {
- assert(!fixed(skel.tags[trail])); // no fixed trailing context
- matched = tags[skel.finvers[trail]].back();
- assert(matched != Skeleton::DEFTAG);
- }
- }
-
- // count keys
- size_t nkey = 0;
- for (size_t w = 0; w < width; ++w) {
- nkey += 3;
- for (size_t t = ltag; t < htag; ++t) {
- const Tag &tag = skel.tags[t];
- if (t == trail || fictive(tag)) continue;
- const size_t
- base = fixed(tag) ? tag.base : t,
- bver = static_cast<size_t>(skel.finvers[base]);
- if (history(tag)) nkey += tags[w * nver + bver].size();
- ++nkey;
- }
- }
-
- // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags
- key_t *keys = new key_t[nkey], *k = keys;
- for (size_t w = 0; w < width; ++w) {
- *k++ = to_le(static_cast<key_t>(path.len()));
- *k++ = to_le(static_cast<key_t>(matched));
- *k++ = to_le(rule2key<key_t>(rule, skel.defrule));
-
- for (size_t t = ltag; t < htag; ++t) {
- const Tag &tag = skel.tags[t];
- if (t == trail || fictive(tag)) continue;
-
- const size_t
- base = fixed(tag) ? tag.base : t,
- bver = static_cast<size_t>(skel.finvers[base]);
- const std::vector<size_t> &h = tags[w * nver + bver];
- if (history(tag)) {
- const size_t hlen = h.size();
- *k++ = to_le(static_cast<key_t>(hlen));
- for (size_t i = 0; i < hlen; ++i) {
- *k++ = to_le(static_cast<key_t>(h[i]));
- }
- } else {
- *k++ = to_le(static_cast<key_t>(h.back()));
- }
- }
- }
-
- // dump to file
- fwrite(keys, sizeof(key_t), nkey, file);
-
- delete[] tags;
- delete[] keys;
+ // find last accepting node
+ size_t f;
+ for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f);
+
+ // calculate tags: start with default and apply commands step by step
+ const size_t
+ nver = skel.ntagver,
+ ntag = width * nver,
+ offby = skel.opts->lookahead ? 0 : 1;
+ std::vector<size_t> *tags = new std::vector<size_t>[ntag];
+ for (size_t w = 0; w < width; ++w) {
+ apply(&tags[w * nver], skel.cmd0, 0); // absent in LATDFA
+ }
+ for (size_t i = 0; i < f; ++i) {
+ Node::wciter_t a(path.arc(skel, i));
+ for (size_t w = 0; w < width; ++a) {
+ uint32_t n = nsteps(a->lower, a->upper);
+ for (; n --> 0 && w < width; ++w) {
+ apply(&tags[w * nver], a->cmd, i + offby);
+ }
+ }
+ }
+ const tcmd_t *fcmd = path.node(skel, f).cmd;
+ for (size_t w = 0; w < width; ++w) {
+ apply(&tags[w * nver], fcmd, f); // only present in LATDFA
+ }
+
+ const size_t rule = path.node(skel, f).rule;
+ size_t matched = 0, ltag = 0, htag = 0, trail = 0;
+ if (rule != Rule::NONE) {
+
+ const Rule &r = skel.rules[rule];
+ ltag = r.ltag;
+ htag = r.htag;
+ trail = r.ttag;
+
+ // matched length might depend on tag values
+ if (trail == htag) {
+ matched = f;
+ } else {
+ assert(!fixed(skel.tags[trail])); // no fixed trailing context
+ matched = tags[skel.finvers[trail]].back();
+ assert(matched != Skeleton::DEFTAG);
+ }
+ }
+
+ // count keys
+ size_t nkey = 0;
+ for (size_t w = 0; w < width; ++w) {
+ nkey += 3;
+ for (size_t t = ltag; t < htag; ++t) {
+ const Tag &tag = skel.tags[t];
+ if (t == trail || fictive(tag)) continue;
+ const size_t
+ base = fixed(tag) ? tag.base : t,
+ bver = static_cast<size_t>(skel.finvers[base]);
+ if (history(tag)) nkey += tags[w * nver + bver].size();
+ ++nkey;
+ }
+ }
+
+ // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags
+ key_t *keys = new key_t[nkey], *k = keys;
+ for (size_t w = 0; w < width; ++w) {
+ *k++ = to_le(static_cast<key_t>(path.len()));
+ *k++ = to_le(static_cast<key_t>(matched));
+ *k++ = to_le(rule2key<key_t>(rule, skel.defrule));
+
+ for (size_t t = ltag; t < htag; ++t) {
+ const Tag &tag = skel.tags[t];
+ if (t == trail || fictive(tag)) continue;
+
+ const size_t
+ base = fixed(tag) ? tag.base : t,
+ bver = static_cast<size_t>(skel.finvers[base]);
+ const std::vector<size_t> &h = tags[w * nver + bver];
+ if (history(tag)) {
+ const size_t hlen = h.size();
+ *k++ = to_le(static_cast<key_t>(hlen));
+ for (size_t i = 0; i < hlen; ++i) {
+ *k++ = to_le(static_cast<key_t>(h[i]));
+ }
+ } else {
+ *k++ = to_le(static_cast<key_t>(h.back()));
+ }
+ }
+ }
+
+ // dump to file
+ fwrite(keys, sizeof(key_t), nkey, file);
+
+ delete[] tags;
+ delete[] keys;
}
template<typename cunit_t, typename key_t>
static cover_size_t cover_one(const Skeleton &skel, cover_t &cover)
{
- const path_t &path = cover.prefix;
+ const path_t &path = cover.prefix;
- const size_t width = path_width(path, skel);
+ const size_t width = path_width(path, skel);
- const cover_size_t size
- = cover_size_t::from64(path.len())
- * cover_size_t::from64(width);
+ const cover_size_t size
+ = cover_size_t::from64(path.len())
+ * cover_size_t::from64(width);
- if (!size.overflow()) {
- write_input<cunit_t>(path, skel, width, cover.input);
- write_keys<key_t>(path, skel, width, cover.keys);
- }
+ if (!size.overflow()) {
+ write_input<cunit_t>(path, skel, width, cover.input);
+ write_keys<key_t>(path, skel, width, cover.keys);
+ }
- return size;
+ return size;
}
/*
*
*/
template <typename cunit_t, typename key_t> static void gencover(
- const Skeleton &skel,
- cover_t &cover,
- size_t i)
+ const Skeleton &skel,
+ cover_t &cover,
+ size_t i)
{
- const Node &node = skel.nodes[i];
- uint8_t &loop = cover.loops[i];
- suffix_t &suffix = cover.suffixes[i];
- path_t &prefix = cover.prefix;
- cover_size_t &size = cover.size;
-
- if (node.end()) {
- suffix.init = true;
- }
-
- if (suffix.init)
- {
- prefix.push_sfx(suffix);
- size = size + cover_one<cunit_t, key_t>(skel, cover);
- prefix.pop_sfx(suffix);
- }
-
- // unroll one iteration of the loop
- else if (loop < 2) {
- local_inc _(loop);
-
- Node::arcs_t::const_iterator
- arc = node.arcs.begin(),
- end = node.arcs.end();
- const suffix_t *min_sfx = NULL;
- size_t min_idx;
-
- // pick the shortest suffix to minimize cover size
- // handle all child states before setting this state's suffix
- for (; arc != end && !size.overflow(); ++arc) {
- const size_t j = arc->first;
-
- prefix.push(j);
- gencover<cunit_t, key_t>(skel, cover, j);
- prefix.pop();
-
- const suffix_t &sfx = cover.suffixes[j];
- if (sfx.init && (!min_sfx || sfx.length() < min_sfx->length())) {
- min_sfx = &sfx;
- min_idx = j;
- }
- }
-
- if (min_sfx == NULL) {
- // all outgoing paths loop back into this node
- // this can happen in cases like [^]*
- }
- else {
- suffix = *min_sfx;
- suffix.push(min_idx);
- }
- }
+ const Node &node = skel.nodes[i];
+ uint8_t &loop = cover.loops[i];
+ suffix_t &suffix = cover.suffixes[i];
+ path_t &prefix = cover.prefix;
+ cover_size_t &size = cover.size;
+
+ if (node.end()) {
+ suffix.init = true;
+ }
+
+ if (suffix.init)
+ {
+ prefix.push_sfx(suffix);
+ size = size + cover_one<cunit_t, key_t>(skel, cover);
+ prefix.pop_sfx(suffix);
+ }
+
+ // unroll one iteration of the loop
+ else if (loop < 2) {
+ local_inc _(loop);
+
+ Node::arcs_t::const_iterator
+ arc = node.arcs.begin(),
+ end = node.arcs.end();
+ const suffix_t *min_sfx = NULL;
+ size_t min_idx;
+
+ // pick the shortest suffix to minimize cover size
+ // handle all child states before setting this state's suffix
+ for (; arc != end && !size.overflow(); ++arc) {
+ const size_t j = arc->first;
+
+ prefix.push(j);
+ gencover<cunit_t, key_t>(skel, cover, j);
+ prefix.pop();
+
+ const suffix_t &sfx = cover.suffixes[j];
+ if (sfx.init && (!min_sfx || sfx.length() < min_sfx->length())) {
+ min_sfx = &sfx;
+ min_idx = j;
+ }
+ }
+
+ if (min_sfx == NULL) {
+ // all outgoing paths loop back into this node
+ // this can happen in cases like [^]*
+ }
+ else {
+ suffix = *min_sfx;
+ suffix.push(min_idx);
+ }
+ }
}
template<typename cunit_t, typename key_t>
- static void generate_paths_cunit_key(const Skeleton &skel, cover_t &cover)
+ static void generate_paths_cunit_key(const Skeleton &skel, cover_t &cover)
{
- gencover<cunit_t, key_t>(skel, cover, 0);
- if (cover.size.overflow()) {
- warning(NULL, skel.line, false,
- "DFA %sis too large: can only generate partial path cover",
- incond(skel.cond).c_str());
- }
+ gencover<cunit_t, key_t>(skel, cover, 0);
+ if (cover.size.overflow()) {
+ warning(NULL, skel.line, false,
+ "DFA %sis too large: can only generate partial path cover",
+ incond(skel.cond).c_str());
+ }
}
template<typename cunit_t>
- static void generate_paths_cunit(const Skeleton &skel, cover_t &cover)
+ static void generate_paths_cunit(const Skeleton &skel, cover_t &cover)
{
- switch (skel.sizeof_key) {
- case 8: generate_paths_cunit_key<cunit_t, uint64_t>(skel, cover); break;
- case 4: generate_paths_cunit_key<cunit_t, uint32_t>(skel, cover); break;
- case 2: generate_paths_cunit_key<cunit_t, uint16_t>(skel, cover); break;
- case 1: generate_paths_cunit_key<cunit_t, uint8_t>(skel, cover); break;
- }
+ switch (skel.sizeof_key) {
+ case 8: generate_paths_cunit_key<cunit_t, uint64_t>(skel, cover); break;
+ case 4: generate_paths_cunit_key<cunit_t, uint32_t>(skel, cover); break;
+ case 2: generate_paths_cunit_key<cunit_t, uint16_t>(skel, cover); break;
+ case 1: generate_paths_cunit_key<cunit_t, uint8_t>(skel, cover); break;
+ }
}
static void generate_paths(const Skeleton &skel, cover_t &cover)
{
- switch (skel.opts->encoding.szCodeUnit()) {
- case 4: generate_paths_cunit<uint32_t>(skel, cover); break;
- case 2: generate_paths_cunit<uint16_t>(skel, cover); break;
- case 1: generate_paths_cunit<uint8_t>(skel, cover); break;
- }
+ switch (skel.opts->encoding.szCodeUnit()) {
+ case 4: generate_paths_cunit<uint32_t>(skel, cover); break;
+ case 2: generate_paths_cunit<uint16_t>(skel, cover); break;
+ case 1: generate_paths_cunit<uint8_t>(skel, cover); break;
+ }
}
void emit_data(const Skeleton &skel)
{
- std::string fname = skel.opts->output_file;
- if (fname.empty()) {
- fname = "<stdout>";
- }
-
- const std::string input_name = fname + "." + skel.name + ".input";
- FILE *input = fopen(input_name.c_str(), "wb");
- if (!input) {
- fatal("cannot open file: %s", input_name.c_str());
- }
- const std::string keys_name = std::string(fname) + "." + skel.name + ".keys";
- FILE *keys = fopen (keys_name.c_str(), "wb");
- if (!keys) {
- fatal("cannot open file: %s", keys_name.c_str());
- }
-
- cover_t cover(input, keys, skel.nodes_count);
- generate_paths(skel, cover);
-
- fclose(input);
- fclose(keys);
+ std::string fname = skel.opts->output_file;
+ if (fname.empty()) {
+ fname = "<stdout>";
+ }
+
+ const std::string input_name = fname + "." + skel.name + ".input";
+ FILE *input = fopen(input_name.c_str(), "wb");
+ if (!input) {
+ fatal("cannot open file: %s", input_name.c_str());
+ }
+ const std::string keys_name = std::string(fname) + "." + skel.name + ".keys";
+ FILE *keys = fopen (keys_name.c_str(), "wb");
+ if (!keys) {
+ fatal("cannot open file: %s", keys_name.c_str());
+ }
+
+ cover_t cover(input, keys, skel.nodes_count);
+ generate_paths(skel, cover);
+
+ fclose(input);
+ fclose(keys);
}
} // namespace re2c
// different from YYMAXFILL calculation
// in the way it handles loops and empty regexp
static uint32_t calc_dist(const Skeleton &skel
- , std::vector<uint8_t> &loops
- , std::vector<uint32_t> &dists
- , size_t i)
+ , std::vector<uint8_t> &loops
+ , std::vector<uint32_t> &dists
+ , size_t i)
{
- const Node &node = skel.nodes[i];
- uint32_t dist = dists[i];
+ const Node &node = skel.nodes[i];
+ uint32_t dist = dists[i];
- if (dist != DIST_ERROR) {
- return dist;
- }
+ if (dist != DIST_ERROR) {
+ return dist;
+ }
- else if (node.end()) {
- return dists[i] = 0;
- }
+ else if (node.end()) {
+ return dists[i] = 0;
+ }
- // we cut the looping path, so the current node is like
- // the "end" node; but the actual value for this node
- // is yet to be calculated on the recursive return
- else if (loops[i] > 1) {
- return 0;
- }
+ // we cut the looping path, so the current node is like
+ // the "end" node; but the actual value for this node
+ // is yet to be calculated on the recursive return
+ else if (loops[i] > 1) {
+ return 0;
+ }
- // unroll one iteration of loops
- // (must be consistent with skeleton data generation)
- else {
- local_inc _(loops[i]);
+ // unroll one iteration of loops
+ // (must be consistent with skeleton data generation)
+ else {
+ local_inc _(loops[i]);
- Node::arcs_t::const_iterator
- arc = node.arcs.begin(),
- end = node.arcs.end();
+ Node::arcs_t::const_iterator
+ arc = node.arcs.begin(),
+ end = node.arcs.end();
- // handle all child states before setting this state's suffix
- for (; arc != end; ++arc) {
- const uint32_t d = calc_dist(skel, loops, dists, arc->first);
+ // handle all child states before setting this state's suffix
+ for (; arc != end; ++arc) {
+ const uint32_t d = calc_dist(skel, loops, dists, arc->first);
- // not necessarily true for dists[arc->first]
- assert (d != DIST_ERROR);
+ // not necessarily true for dists[arc->first]
+ assert (d != DIST_ERROR);
- dist = (dist == DIST_ERROR) ? d : std::max(dist, d);
- }
+ dist = (dist == DIST_ERROR) ? d : std::max(dist, d);
+ }
- return dists[i] = std::min(dist + 1, DIST_MAX);
- }
+ return dists[i] = std::min(dist + 1, DIST_MAX);
+ }
}
// calculate maximal path length, check overflow
uint32_t maxpath(const Skeleton &skel)
{
- std::vector<uint8_t> loops(skel.nodes_count);
- std::vector<uint32_t> dists(skel.nodes_count, DIST_ERROR);
- const uint32_t maxlen = calc_dist(skel, loops, dists, 0);
- if (maxlen == DIST_MAX) {
- fatal("DFA path %sis too long", incond(skel.cond).c_str());
- }
- return maxlen;
+ std::vector<uint8_t> loops(skel.nodes_count);
+ std::vector<uint32_t> dists(skel.nodes_count, DIST_ERROR);
+ const uint32_t maxlen = calc_dist(skel, loops, dists, 0);
+ if (maxlen == DIST_MAX) {
+ fatal("DFA path %sis too long", incond(skel.cond).c_str());
+ }
+ return maxlen;
}
} // namespace re2c
struct suffix_t
{
- bool init;
+ bool init;
private:
- std::vector<size_t> arcs;
+ std::vector<size_t> arcs;
public:
- suffix_t(): init(false), arcs() {}
- size_t length () const
- {
- return arcs.size ();
- }
- void push(size_t i)
- {
- arcs.push_back(i);
- }
- friend class path_t;
+ suffix_t(): init(false), arcs() {}
+ size_t length () const
+ {
+ return arcs.size ();
+ }
+ void push(size_t i)
+ {
+ arcs.push_back(i);
+ }
+ friend class path_t;
};
class path_t
{
- std::vector<size_t> arcs;
+ std::vector<size_t> arcs;
public:
- explicit path_t(size_t i) : arcs()
- {
- arcs.push_back(i);
- }
- size_t len() const
- {
- return arcs.size() - 1;
- }
- const Node& node(const Skeleton &skel, size_t i) const
- {
- return skel.nodes[arcs[i]];
- }
- const Node::arc_t& arc(const Skeleton &skel, size_t i) const
- {
- return skel.nodes[arcs[i]].arcs.find(arcs[i + 1])->second;
- }
- void push(size_t n)
- {
- arcs.push_back(n);
- }
- void pop()
- {
- arcs.pop_back();
- }
- void push_sfx(const suffix_t &suffix)
- {
- arcs.insert(arcs.end(), suffix.arcs.rbegin(), suffix.arcs.rend());
- }
- void pop_sfx(const suffix_t &suffix)
- {
- arcs.resize(arcs.size() - suffix.arcs.size());
- }
- bool operator<(const path_t &p) const
- {
- const size_t
- s1 = arcs.size(),
- s2 = p.arcs.size();
- return (s1 == s2 && arcs < p.arcs)
- || s1 < s2;
- }
+ explicit path_t(size_t i) : arcs()
+ {
+ arcs.push_back(i);
+ }
+ size_t len() const
+ {
+ return arcs.size() - 1;
+ }
+ const Node& node(const Skeleton &skel, size_t i) const
+ {
+ return skel.nodes[arcs[i]];
+ }
+ const Node::arc_t& arc(const Skeleton &skel, size_t i) const
+ {
+ return skel.nodes[arcs[i]].arcs.find(arcs[i + 1])->second;
+ }
+ void push(size_t n)
+ {
+ arcs.push_back(n);
+ }
+ void pop()
+ {
+ arcs.pop_back();
+ }
+ void push_sfx(const suffix_t &suffix)
+ {
+ arcs.insert(arcs.end(), suffix.arcs.rbegin(), suffix.arcs.rend());
+ }
+ void pop_sfx(const suffix_t &suffix)
+ {
+ arcs.resize(arcs.size() - suffix.arcs.size());
+ }
+ bool operator<(const path_t &p) const
+ {
+ const size_t
+ s1 = arcs.size(),
+ s2 = p.arcs.size();
+ return (s1 == s2 && arcs < p.arcs)
+ || s1 < s2;
+ }
};
} // namespace re2c
struct tcmd_t;
Node::Node()
- : arcs()
- , rule(Rule::NONE)
- , cmd(NULL)
+ : arcs()
+ , rule(Rule::NONE)
+ , cmd(NULL)
{}
void Node::init(const dfa_state_t *s,
- const std::vector<uint32_t> &charset, size_t nil)
+ const std::vector<uint32_t> &charset, size_t nil)
{
- const size_t nc = charset.size() - 1;
- for (uint32_t c = 0, l = 0; c < nc;) {
+ const size_t nc = charset.size() - 1;
+ for (uint32_t c = 0, l = 0; c < nc;) {
- size_t j = s->arcs[c];
- const tcmd_t *t = s->tcmd[c];
- for (; ++c < nc && s->arcs[c] == j && s->tcmd[c] == t;);
- if (j == dfa_t::NIL) j = nil;
+ size_t j = s->arcs[c];
+ const tcmd_t *t = s->tcmd[c];
+ for (; ++c < nc && s->arcs[c] == j && s->tcmd[c] == t;);
+ if (j == dfa_t::NIL) j = nil;
- // all arcs go to default node => this node is final
- if (l == 0 && c == nc && j == nil) break;
+ // all arcs go to default node => this node is final
+ if (l == 0 && c == nc && j == nil) break;
- const uint32_t u = charset[c];
- arcs[j].push_back(Node::range_t(l, u - 1, t));
+ const uint32_t u = charset[c];
+ arcs[j].push_back(Node::range_t(l, u - 1, t));
- l = u;
- }
+ l = u;
+ }
- rule = s->rule;
- cmd = s->tcmd[nc];
+ rule = s->rule;
+ cmd = s->tcmd[nc];
}
bool Node::end() const
{
- return arcs.size() == 0;
+ return arcs.size() == 0;
}
const size_t Skeleton::DEFTAG = std::numeric_limits<size_t>::max();
Skeleton::Skeleton(
- const dfa_t &dfa,
- const opt_t *op,
- size_t def,
- const std::string &dfa_name,
- const std::string &dfa_cond,
- uint32_t dfa_line)
- : opts(op)
- , name(dfa_name)
- , cond(dfa_cond)
- , line(dfa_line)
- , nodes_count(dfa.states.size() + 1) // +1 for default state
- , nodes(new Node[nodes_count])
- , cmd0(dfa.tcmd0)
- , sizeof_key(8)
- , defrule(def)
- , ntagver(static_cast<size_t>(dfa.maxtagver) + 1)
- , charset(dfa.charset)
- , rules(dfa.rules)
- , tags(dfa.tags)
- , finvers(dfa.finvers)
+ const dfa_t &dfa,
+ const opt_t *op,
+ size_t def,
+ const std::string &dfa_name,
+ const std::string &dfa_cond,
+ uint32_t dfa_line)
+ : opts(op)
+ , name(dfa_name)
+ , cond(dfa_cond)
+ , line(dfa_line)
+ , nodes_count(dfa.states.size() + 1) // +1 for default state
+ , nodes(new Node[nodes_count])
+ , cmd0(dfa.tcmd0)
+ , sizeof_key(8)
+ , defrule(def)
+ , ntagver(static_cast<size_t>(dfa.maxtagver) + 1)
+ , charset(dfa.charset)
+ , rules(dfa.rules)
+ , tags(dfa.tags)
+ , finvers(dfa.finvers)
{
- // initialize nodes
- const size_t nil = nodes_count - 1;
- for (size_t i = 0; i < nil; ++i) {
- nodes[i].init(dfa.states[i], charset, nil);
- }
-
- // initialize size of key
- const size_t maxlen = maxpath(*this);
- const size_t maxrule = dfa.rules.size() + 1; // +1 for none-rule
- const size_t max = std::max(maxlen, maxrule);
- if (max <= std::numeric_limits<uint8_t>::max()) {
- sizeof_key = 1;
- } else if (max <= std::numeric_limits<uint16_t>::max()) {
- sizeof_key = 2;
- } else if (max <= std::numeric_limits<uint32_t>::max()) {
- sizeof_key = 4;
- }
+ // initialize nodes
+ const size_t nil = nodes_count - 1;
+ for (size_t i = 0; i < nil; ++i) {
+ nodes[i].init(dfa.states[i], charset, nil);
+ }
+
+ // initialize size of key
+ const size_t maxlen = maxpath(*this);
+ const size_t maxrule = dfa.rules.size() + 1; // +1 for none-rule
+ const size_t max = std::max(maxlen, maxrule);
+ if (max <= std::numeric_limits<uint8_t>::max()) {
+ sizeof_key = 1;
+ } else if (max <= std::numeric_limits<uint16_t>::max()) {
+ sizeof_key = 2;
+ } else if (max <= std::numeric_limits<uint32_t>::max()) {
+ sizeof_key = 4;
+ }
}
Skeleton::~Skeleton()
{
- delete[] nodes;
+ delete[] nodes;
}
uint64_t rule2key(size_t rule, size_t key, size_t def)
{
- switch (key) {
- default: assert(false); // shouldn't happen
- case 8: return rule2key<uint64_t>(rule, def);
- case 4: return rule2key<uint32_t>(rule, def);
- case 2: return rule2key<uint16_t>(rule, def);
- case 1: return rule2key<uint8_t>(rule, def);
- }
+ switch (key) {
+ default: assert(false); // shouldn't happen
+ case 8: return rule2key<uint64_t>(rule, def);
+ case 4: return rule2key<uint32_t>(rule, def);
+ case 2: return rule2key<uint16_t>(rule, def);
+ case 1: return rule2key<uint8_t>(rule, def);
+ }
}
} // namespace re2c
struct Node
{
- struct range_t {
- uint32_t lower;
- uint32_t upper;
- const tcmd_t *cmd;
-
- range_t(): lower(0), upper(0), cmd(NULL) {}
- range_t(uint32_t l, uint32_t u, const tcmd_t *c)
- : lower(l), upper(u), cmd(c) {}
- };
-
- typedef std::vector<range_t> arc_t;
- typedef std::map<size_t, arc_t> arcs_t;
- typedef arc_t::const_iterator citer_t;
- typedef wrap_citer_t<arc_t> wciter_t;
-
- arcs_t arcs;
- size_t rule;
- const tcmd_t *cmd;
-
- Node();
- void init(const dfa_state_t *s,
- const std::vector<uint32_t> &charset, size_t nil);
- bool end() const;
-
- FORBID_COPY(Node);
+ struct range_t {
+ uint32_t lower;
+ uint32_t upper;
+ const tcmd_t *cmd;
+
+ range_t(): lower(0), upper(0), cmd(NULL) {}
+ range_t(uint32_t l, uint32_t u, const tcmd_t *c)
+ : lower(l), upper(u), cmd(c) {}
+ };
+
+ typedef std::vector<range_t> arc_t;
+ typedef std::map<size_t, arc_t> arcs_t;
+ typedef arc_t::const_iterator citer_t;
+ typedef wrap_citer_t<arc_t> wciter_t;
+
+ arcs_t arcs;
+ size_t rule;
+ const tcmd_t *cmd;
+
+ Node();
+ void init(const dfa_state_t *s,
+ const std::vector<uint32_t> &charset, size_t nil);
+ bool end() const;
+
+ FORBID_COPY(Node);
};
struct Skeleton
{
- static const size_t DEFTAG;
-
- const opt_t *opts;
- const std::string name;
- const std::string cond;
- const uint32_t line;
-
- const size_t nodes_count;
- Node *nodes;
- const tcmd_t *cmd0;
-
- size_t sizeof_key;
- size_t defrule;
- size_t ntagver;
- const std::vector<uint32_t> &charset;
- const std::valarray<Rule> &rules;
- const std::vector<Tag> &tags;
- const tagver_t *finvers;
-
- Skeleton(const dfa_t &dfa, const opt_t *op, size_t def,
- const std::string &dfa_name, const std::string &dfa_cond,
- uint32_t dfa_line);
- ~Skeleton ();
- FORBID_COPY(Skeleton);
+ static const size_t DEFTAG;
+
+ const opt_t *opts;
+ const std::string name;
+ const std::string cond;
+ const uint32_t line;
+
+ const size_t nodes_count;
+ Node *nodes;
+ const tcmd_t *cmd0;
+
+ size_t sizeof_key;
+ size_t defrule;
+ size_t ntagver;
+ const std::vector<uint32_t> &charset;
+ const std::valarray<Rule> &rules;
+ const std::vector<Tag> &tags;
+ const tagver_t *finvers;
+
+ Skeleton(const dfa_t &dfa, const opt_t *op, size_t def,
+ const std::string &dfa_name, const std::string &dfa_cond,
+ uint32_t dfa_line);
+ ~Skeleton ();
+ FORBID_COPY(Skeleton);
};
template<typename key_t> key_t rule2key(size_t r, size_t def)
{
- if (r == Rule::NONE) {
- return std::numeric_limits<key_t>::max();
- } else if (r == def) {
- key_t k = std::numeric_limits<key_t>::max();
- return --k;
- } else {
- return static_cast<key_t>(r);
- }
+ if (r == Rule::NONE) {
+ return std::numeric_limits<key_t>::max();
+ } else if (r == def) {
+ key_t k = std::numeric_limits<key_t>::max();
+ return --k;
+ } else {
+ return static_cast<key_t>(r);
+ }
}
uint64_t rule2key(size_t rule, size_t key, size_t def);
void emit_data(const Skeleton &skel);
void emit_prolog(OutputFile & o);
void emit_start(OutputFile &o, size_t maxfill, size_t maxnmatch, const std::string &name,
- size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker,
- const std::set<std::string> &stagnames, const std::set<std::string> &stagvars,
- const std::set<std::string> &mtagnames, const std::set<std::string> &mtagvars,
- bitmaps_t &bitmaps);
+ size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker,
+ const std::set<std::string> &stagnames, const std::set<std::string> &stagvars,
+ const std::set<std::string> &mtagnames, const std::set<std::string> &mtagvars,
+ bitmaps_t &bitmaps);
void emit_end(OutputFile &o, const std::string &name, bool backup, bool oldstyle_ctxmarker,
- const std::set<std::string> &mtagnames);
+ const std::set<std::string> &mtagnames);
void emit_epilog(OutputFile &o, const std::set<std::string> &names);
void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid);
static inline bool bit_set (uint32_t n, uint32_t bit)
{
- return n & (1u << bit);
+ return n & (1u << bit);
}
template <uint8_t BITS>
re2c::Range * range (uint32_t n)
{
- RE2C_STATIC_ASSERT (BITS <= 31);
-
- re2c::Range * r = NULL;
- re2c::Range ** p = &r;
- for (uint32_t i = 0; i < BITS; ++i)
- {
- for (; i < BITS && !bit_set (n, i); ++i);
- if (i == BITS && !bit_set (n, BITS - 1))
- {
- break;
- }
- const uint32_t lb = i;
- for (; i < BITS && bit_set (n, i); ++i);
- re2c::Range::append (p, lb, i);
- }
- return r;
+ RE2C_STATIC_ASSERT (BITS <= 31);
+
+ re2c::Range * r = NULL;
+ re2c::Range ** p = &r;
+ for (uint32_t i = 0; i < BITS; ++i)
+ {
+ for (; i < BITS && !bit_set (n, i); ++i);
+ if (i == BITS && !bit_set (n, BITS - 1))
+ {
+ break;
+ }
+ const uint32_t lb = i;
+ for (; i < BITS && bit_set (n, i); ++i);
+ re2c::Range::append (p, lb, i);
+ }
+ return r;
}
template <uint8_t BITS>
re2c::Range * add (uint32_t n1, uint32_t n2)
{
- return range<BITS> (n1 | n2);
+ return range<BITS> (n1 | n2);
}
template <uint8_t BITS>
re2c::Range * sub (uint32_t n1, uint32_t n2)
{
- return range<BITS> (n1 & ~n2);
+ return range<BITS> (n1 & ~n2);
}
} // namespace re2c_test
static bool equal (const re2c::Range * r1, const re2c::Range * r2)
{
- for (; r1 && r2; r1 = r1->next (), r2 = r2->next ())
- {
- if (r1->lower () != r2->lower ()
- || r1->upper () != r2->upper ())
- {
- return false;
- }
- }
- return !r1 && !r2;
+ for (; r1 && r2; r1 = r1->next (), r2 = r2->next ())
+ {
+ if (r1->lower () != r2->lower ()
+ || r1->upper () != r2->upper ())
+ {
+ return false;
+ }
+ }
+ return !r1 && !r2;
}
static void show (const re2c::Range * r)
{
- if (!r)
- {
- fprintf (stderr, "[]");
- }
- for (; r; r = r->next ())
- {
- const uint32_t l = r->lower ();
- const uint32_t u = r->upper () - 1;
- if (l < u)
- {
- fprintf (stderr, "[%X-%X]", l, u);
- }
- else
- {
- fprintf (stderr, "[%X]", l);
- }
- }
+ if (!r)
+ {
+ fprintf (stderr, "[]");
+ }
+ for (; r; r = r->next ())
+ {
+ const uint32_t l = r->lower ();
+ const uint32_t u = r->upper () - 1;
+ if (l < u)
+ {
+ fprintf (stderr, "[%X-%X]", l, u);
+ }
+ else
+ {
+ fprintf (stderr, "[%X]", l);
+ }
+ }
}
static int32_t diff
- ( const re2c::Range * r1
- , const re2c::Range * r2
- , const re2c::Range * op1
- , const re2c::Range * op2
- , const char * op)
+ ( const re2c::Range * r1
+ , const re2c::Range * r2
+ , const re2c::Range * op1
+ , const re2c::Range * op2
+ , const char * op)
{
- if (equal (op1, op2))
- {
- return 0;
- }
- else
- {
- fprintf (stderr, "%s error: ", op);
- show (r1);
- fprintf (stderr, " %s ", op);
- show (r2);
- fprintf (stderr, " ====> ");
- show (op2);
- fprintf (stderr, " =/= ");
- show (op1);
- fprintf (stderr, "\n");
- return 1;
- }
+ if (equal (op1, op2))
+ {
+ return 0;
+ }
+ else
+ {
+ fprintf (stderr, "%s error: ", op);
+ show (r1);
+ fprintf (stderr, " %s ", op);
+ show (r2);
+ fprintf (stderr, " ====> ");
+ show (op2);
+ fprintf (stderr, " =/= ");
+ show (op1);
+ fprintf (stderr, "\n");
+ return 1;
+ }
}
static int32_t test ()
{
- int32_t ok = 0;
+ int32_t ok = 0;
- static const uint32_t BITS = 8;
- static const uint32_t N = 1u << BITS;
- for (uint32_t i = 0; i <= N; ++i)
- {
- for (uint32_t j = 0; j <= N; ++j)
- {
- re2c::Range * r1 = range<BITS> (i);
- re2c::Range * r2 = range<BITS> (j);
- ok |= diff (r1, r2, add<BITS> (i, j), re2c::Range::add (r1, r2), "U");
- ok |= diff (r1, r2, sub<BITS> (i, j), re2c::Range::sub (r1, r2), "D");
- re2c::Range::vFreeList.clear ();
- }
- }
+ static const uint32_t BITS = 8;
+ static const uint32_t N = 1u << BITS;
+ for (uint32_t i = 0; i <= N; ++i)
+ {
+ for (uint32_t j = 0; j <= N; ++j)
+ {
+ re2c::Range * r1 = range<BITS> (i);
+ re2c::Range * r2 = range<BITS> (j);
+ ok |= diff (r1, r2, add<BITS> (i, j), re2c::Range::add (r1, r2), "U");
+ ok |= diff (r1, r2, sub<BITS> (i, j), re2c::Range::sub (r1, r2), "D");
+ re2c::Range::vFreeList.clear ();
+ }
+ }
- return ok;
+ return ok;
}
} // namespace re2c_test
int main ()
{
- return re2c_test::test ();
+ return re2c_test::test ();
}
// no terminating null as we don't need it
static char * u64_to_s_fastest_ever (uint64_t u, char * s)
{
- while (u > 0)
- {
- const uint64_t d = u % 10 + '0';
- *--s = static_cast<char> (d);
- u /= 10;
- }
- return s;
+ while (u > 0)
+ {
+ const uint64_t d = u % 10 + '0';
+ *--s = static_cast<char> (d);
+ u /= 10;
+ }
+ return s;
}
static int32_t test_u (uint64_t i)
{
- char s [DIGITS];
- char * const s_end = s + DIGITS;
- char * const s_start = u64_to_s_fastest_ever (i, s_end);
- uint32_t u = i == 0; // not equal to i
- if (s_to_u32_unsafe (s_start, s_end, u) && u != i)
- {
- fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u);
- return 1;
- }
- return 0;
+ char s [DIGITS];
+ char * const s_end = s + DIGITS;
+ char * const s_start = u64_to_s_fastest_ever (i, s_end);
+ uint32_t u = i == 0; // not equal to i
+ if (s_to_u32_unsafe (s_start, s_end, u) && u != i)
+ {
+ fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u);
+ return 1;
+ }
+ return 0;
}
static int32_t test_i (int64_t i)
{
- char s [DIGITS];
- char * const s_end = s + DIGITS;
- const uint64_t i_abs = i < 0
- ? static_cast<uint64_t> (-i)
- : static_cast<uint64_t> (i);
- char * s_start = u64_to_s_fastest_ever (i_abs, s_end);
- if (i < 0)
- {
- *--s_start = '-';
- }
- int32_t j = i == 0; // not equal to i
- if (s_to_i32_unsafe (s_start, s_end, j) && j != i)
- {
- fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j);
- return 1;
- }
- return 0;
+ char s [DIGITS];
+ char * const s_end = s + DIGITS;
+ const uint64_t i_abs = i < 0
+ ? static_cast<uint64_t> (-i)
+ : static_cast<uint64_t> (i);
+ char * s_start = u64_to_s_fastest_ever (i_abs, s_end);
+ if (i < 0)
+ {
+ *--s_start = '-';
+ }
+ int32_t j = i == 0; // not equal to i
+ if (s_to_i32_unsafe (s_start, s_end, j) && j != i)
+ {
+ fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j);
+ return 1;
+ }
+ return 0;
}
static int32_t test ()
{
- int32_t ok = 0;
+ int32_t ok = 0;
- static const uint64_t UDELTA = 0xFFFF;
- // zero neighbourhood
- for (uint64_t i = 0; i <= UDELTA; ++i)
- {
- ok |= test_u (i);
- }
- // u32_max neighbourhood
- static const uint64_t u32_max = std::numeric_limits<uint32_t>::max();
- for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i)
- {
- ok |= test_u (i);
- }
+ static const uint64_t UDELTA = 0xFFFF;
+ // zero neighbourhood
+ for (uint64_t i = 0; i <= UDELTA; ++i)
+ {
+ ok |= test_u (i);
+ }
+ // u32_max neighbourhood
+ static const uint64_t u32_max = std::numeric_limits<uint32_t>::max();
+ for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i)
+ {
+ ok |= test_u (i);
+ }
- static const int64_t IDELTA = 0xFFFF;
- // i32_min neighbourhood
- static const int64_t i32_min = std::numeric_limits<int32_t>::min();
- for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i)
- {
- ok |= test_i (i);
- }
- // zero neighbourhood
- for (int64_t i = -IDELTA; i <= IDELTA; ++i)
- {
- ok |= test_i (i);
- }
- // i32_max neighbourhood
- static const int64_t i32_max = std::numeric_limits<int32_t>::max();
- for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i)
- {
- ok |= test_i (i);
- }
+ static const int64_t IDELTA = 0xFFFF;
+ // i32_min neighbourhood
+ static const int64_t i32_min = std::numeric_limits<int32_t>::min();
+ for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i)
+ {
+ ok |= test_i (i);
+ }
+ // zero neighbourhood
+ for (int64_t i = -IDELTA; i <= IDELTA; ++i)
+ {
+ ok |= test_i (i);
+ }
+ // i32_max neighbourhood
+ static const int64_t i32_max = std::numeric_limits<int32_t>::max();
+ for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i)
+ {
+ ok |= test_i (i);
+ }
- return ok;
+ return ok;
}
} // namespace re2c_test
int main ()
{
- return re2c_test::test ();
+ return re2c_test::test ();
}
// this can be unacceptable for performance reasons
template <typename T> T * allocate (size_t n)
{
- void * p = operator new (n * sizeof (T));
- return static_cast<T *> (p);
+ void * p = operator new (n * sizeof (T));
+ return static_cast<T *> (p);
}
} // namespace re2c
template <typename num_t>
class counter_t
{
- num_t num;
+ num_t num;
public:
- counter_t ()
- : num ()
- {}
- num_t next ()
- {
- num_t n = num;
- num.inc ();
- return n;
- }
- void reset ()
- {
- num = num_t ();
- }
+ counter_t ()
+ : num ()
+ {}
+ num_t next ()
+ {
+ num_t n = num;
+ num.inc ();
+ return n;
+ }
+ void reset ()
+ {
+ num = num_t ();
+ }
};
} // namespace re2c
// must be used at the end of class definition
// (since this macro changes scope to private)
#define FORBID_COPY(type) \
- private: \
- type (const type &); \
- type & operator = (const type &)
+ private: \
+ type (const type &); \
+ type & operator = (const type &)
#endif // _RE2C_UTIL_FORBID_COPY_
class free_list: protected std::set<_Ty>
{
public:
- typedef typename std::set<_Ty>::iterator iterator;
- typedef typename std::set<_Ty>::size_type size_type;
- typedef typename std::set<_Ty>::key_type key_type;
-
- free_list(): in_clear(false)
- {
- }
-
- using std::set<_Ty>::insert;
-
- size_type erase(const key_type& key)
- {
- if (!in_clear)
- {
- return std::set<_Ty>::erase(key);
- }
- return 0;
- }
-
- void clear()
- {
- in_clear = true;
-
- for(iterator it = this->begin(); it != this->end(); ++it)
- {
- delete *it;
- }
- std::set<_Ty>::clear();
-
- in_clear = false;
- }
-
- ~free_list()
- {
- clear();
- }
+ typedef typename std::set<_Ty>::iterator iterator;
+ typedef typename std::set<_Ty>::size_type size_type;
+ typedef typename std::set<_Ty>::key_type key_type;
+
+ free_list(): in_clear(false)
+ {
+ }
+
+ using std::set<_Ty>::insert;
+
+ size_type erase(const key_type& key)
+ {
+ if (!in_clear)
+ {
+ return std::set<_Ty>::erase(key);
+ }
+ return 0;
+ }
+
+ void clear()
+ {
+ in_clear = true;
+
+ for(iterator it = this->begin(); it != this->end(); ++it)
+ {
+ delete *it;
+ }
+ std::set<_Ty>::clear();
+
+ in_clear = false;
+ }
+
+ ~free_list()
+ {
+ clear();
+ }
protected:
- bool in_clear;
+ bool in_clear;
};
} // end namespace re2c
inline uint32_t hash32(uint32_t h, const void *data, size_t size)
{
- const uint8_t *bytes = static_cast<const uint8_t*>(data);
- for (size_t i = 0; i < size; ++i) {
- h = h ^ ((h << 5) + (h >> 2) + bytes[i]);
- }
- return h;
+ const uint8_t *bytes = static_cast<const uint8_t*>(data);
+ for (size_t i = 0; i < size; ++i) {
+ h = h ^ ((h << 5) + (h >> 2) + bytes[i]);
+ }
+ return h;
}
} // namespace re2c
template <typename counter_t>
struct local_increment_t
{
- counter_t & counter;
- inline explicit local_increment_t (counter_t & c)
- : counter (++c)
- {}
- inline ~local_increment_t ()
- {
- --counter;
- }
+ counter_t & counter;
+ inline explicit local_increment_t (counter_t & c)
+ : counter (++c)
+ {}
+ inline ~local_increment_t ()
+ {
+ --counter;
+ }
};
} // namespace re2c
template<typename data_t, typename hash_t = uint32_t>
struct lookup_t
{
- static const uint32_t NIL;
+ static const uint32_t NIL;
private:
- struct elem_t
- {
- uint32_t next;
- data_t data;
+ struct elem_t
+ {
+ uint32_t next;
+ data_t data;
- elem_t(uint32_t n, const data_t &d)
- : next(n), data(d) {}
- };
+ elem_t(uint32_t n, const data_t &d)
+ : next(n), data(d) {}
+ };
- std::vector<elem_t> elems;
- std::map<hash_t, uint32_t> lookup;
+ std::vector<elem_t> elems;
+ std::map<hash_t, uint32_t> lookup;
public:
- lookup_t();
- uint32_t size() const;
- data_t& operator[](uint32_t idx);
- const data_t& operator[](uint32_t idx) const;
- uint32_t push(hash_t hash, const data_t &data);
- template<typename pred_t> uint32_t find_with(hash_t hash, const data_t &data, pred_t &pred) const;
- template<typename pred_t> uint32_t find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const;
+ lookup_t();
+ uint32_t size() const;
+ data_t& operator[](uint32_t idx);
+ const data_t& operator[](uint32_t idx) const;
+ uint32_t push(hash_t hash, const data_t &data);
+ template<typename pred_t> uint32_t find_with(hash_t hash, const data_t &data, pred_t &pred) const;
+ template<typename pred_t> uint32_t find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const;
private:
- uint32_t head(hash_t) const;
- template<typename pred_t> uint32_t find(uint32_t next, const data_t &data, pred_t &pred) const;
+ uint32_t head(hash_t) const;
+ template<typename pred_t> uint32_t find(uint32_t next, const data_t &data, pred_t &pred) const;
};
template<typename data_t, typename hash_t>
template<typename data_t, typename hash_t>
lookup_t<data_t, hash_t>::lookup_t()
- : elems()
- , lookup()
+ : elems()
+ , lookup()
{}
template<typename data_t, typename hash_t>
uint32_t lookup_t<data_t, hash_t>::size() const
{
- return static_cast<uint32_t>(elems.size());
+ return static_cast<uint32_t>(elems.size());
}
template<typename data_t, typename hash_t>
data_t& lookup_t<data_t, hash_t>::operator[](uint32_t idx)
{
- return elems[idx].data;
+ return elems[idx].data;
}
template<typename data_t, typename hash_t>
const data_t& lookup_t<data_t, hash_t>::operator[](uint32_t idx) const
{
- return elems[idx].data;
+ return elems[idx].data;
}
template<typename data_t, typename hash_t>
uint32_t lookup_t<data_t, hash_t>::head(hash_t h) const
{
- typename std::map<hash_t, uint32_t>::const_iterator x = lookup.find(h);
- return x == lookup.end() ? NIL : x->second;
+ typename std::map<hash_t, uint32_t>::const_iterator x = lookup.find(h);
+ return x == lookup.end() ? NIL : x->second;
}
template<typename data_t, typename hash_t>
uint32_t lookup_t<data_t, hash_t>::push(hash_t hash, const data_t &data)
{
- assert(elems.size() < NIL);
- const uint32_t idx = static_cast<uint32_t>(elems.size());
- elems.push_back(elem_t(head(hash), data));
- lookup[hash] = idx;
- return idx;
+ assert(elems.size() < NIL);
+ const uint32_t idx = static_cast<uint32_t>(elems.size());
+ elems.push_back(elem_t(head(hash), data));
+ lookup[hash] = idx;
+ return idx;
}
template<typename data_t, typename hash_t>
template<typename pred_t>
uint32_t lookup_t<data_t, hash_t>::find(uint32_t next, const data_t &data, pred_t &pred) const
{
- for (uint32_t i = next; i != NIL;) {
- const elem_t &e = elems[i];
- if (pred(e.data, data)) {
- return i;
- }
- i = e.next;
- }
- return NIL;
+ for (uint32_t i = next; i != NIL;) {
+ const elem_t &e = elems[i];
+ if (pred(e.data, data)) {
+ return i;
+ }
+ i = e.next;
+ }
+ return NIL;
}
template<typename data_t, typename hash_t>
template<typename pred_t>
uint32_t lookup_t<data_t, hash_t>::find_with(hash_t hash, const data_t &data, pred_t &pred) const
{
- return find(head(hash), data, pred);
+ return find(head(hash), data, pred);
}
template<typename data_t, typename hash_t>
template<typename pred_t>
uint32_t lookup_t<data_t, hash_t>::find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const
{
- return find(elems[prev].next, data, pred);
+ return find(elems[prev].next, data, pred);
}
} // namespace re2c
void Range::append_overlapping (Range * & head, Range * & tail, const Range * r)
{
- if (!head)
- {
- head = Range::ran (r->lb, r->ub);
- tail = head;
- }
- else if (tail->ub < r->lb)
- {
- tail->nx = Range::ran (r->lb, r->ub);
- tail = tail->nx;
- }
- else if (tail->ub < r->ub)
- {
- tail->ub = r->ub;
- }
+ if (!head)
+ {
+ head = Range::ran (r->lb, r->ub);
+ tail = head;
+ }
+ else if (tail->ub < r->lb)
+ {
+ tail->nx = Range::ran (r->lb, r->ub);
+ tail = tail->nx;
+ }
+ else if (tail->ub < r->ub)
+ {
+ tail->ub = r->ub;
+ }
}
Range * Range::add (const Range * r1, const Range * r2)
{
- Range * head = NULL;
- Range * tail = NULL;
- for (; r1 && r2;)
- {
- if (r1->lb < r2->lb)
- {
- append_overlapping (head, tail, r1);
- r1 = r1->nx;
- }
- else
- {
- append_overlapping (head, tail, r2);
- r2 = r2->nx;
- }
- }
- for (; r1; r1 = r1->nx)
- {
- append_overlapping (head, tail, r1);
- }
- for (; r2; r2 = r2->nx)
- {
- append_overlapping (head, tail, r2);
- }
- return head;
+ Range * head = NULL;
+ Range * tail = NULL;
+ for (; r1 && r2;)
+ {
+ if (r1->lb < r2->lb)
+ {
+ append_overlapping (head, tail, r1);
+ r1 = r1->nx;
+ }
+ else
+ {
+ append_overlapping (head, tail, r2);
+ r2 = r2->nx;
+ }
+ }
+ for (; r1; r1 = r1->nx)
+ {
+ append_overlapping (head, tail, r1);
+ }
+ for (; r2; r2 = r2->nx)
+ {
+ append_overlapping (head, tail, r2);
+ }
+ return head;
}
void Range::append (Range ** & ptail, uint32_t l, uint32_t u)
{
- Range * & tail = * ptail;
- tail = Range::ran (l, u);
- ptail = &tail->nx;
+ Range * & tail = * ptail;
+ tail = Range::ran (l, u);
+ ptail = &tail->nx;
}
Range * Range::sub (const Range * r1, const Range * r2)
{
- Range * head = NULL;
- Range ** ptail = &head;
- while (r1)
- {
- if (!r2 || r2->lb >= r1->ub)
- {
- append (ptail, r1->lb, r1->ub);
- r1 = r1->nx;
- }
- else if (r2->ub <= r1->lb)
- {
- r2 = r2->nx;
- }
- else
- {
- if (r1->lb < r2->lb)
- {
- append (ptail, r1->lb, r2->lb);
- }
- while (r2 && r2->ub < r1->ub)
- {
- const uint32_t lb = r2->ub;
- r2 = r2->nx;
- const uint32_t ub = r2 && r2->lb < r1->ub
- ? r2->lb
- : r1->ub;
- append (ptail, lb, ub);
- }
- r1 = r1->nx;
- }
- }
- return head;
+ Range * head = NULL;
+ Range ** ptail = &head;
+ while (r1)
+ {
+ if (!r2 || r2->lb >= r1->ub)
+ {
+ append (ptail, r1->lb, r1->ub);
+ r1 = r1->nx;
+ }
+ else if (r2->ub <= r1->lb)
+ {
+ r2 = r2->nx;
+ }
+ else
+ {
+ if (r1->lb < r2->lb)
+ {
+ append (ptail, r1->lb, r2->lb);
+ }
+ while (r2 && r2->ub < r1->ub)
+ {
+ const uint32_t lb = r2->ub;
+ r2 = r2->nx;
+ const uint32_t ub = r2 && r2->lb < r1->ub
+ ? r2->lb
+ : r1->ub;
+ append (ptail, lb, ub);
+ }
+ r1 = r1->nx;
+ }
+ }
+ return head;
}
} // namespace re2c
class Range
{
public:
- static free_list<Range*> vFreeList;
+ static free_list<Range*> vFreeList;
private:
- Range * nx;
- // [lb,ub)
- uint32_t lb;
- uint32_t ub;
+ Range * nx;
+ // [lb,ub)
+ uint32_t lb;
+ uint32_t ub;
public:
- static Range * sym (uint32_t c)
- {
- return new Range (NULL, c, c + 1);
- }
- static Range * ran (uint32_t l, uint32_t u)
- {
- return new Range (NULL, l, u);
- }
- ~Range ()
- {
- vFreeList.erase (this);
- }
- Range * next () const { return nx; }
- uint32_t lower () const { return lb; }
- uint32_t upper () const { return ub; }
- static Range * add (const Range * r1, const Range * r2);
- static Range * sub (const Range * r1, const Range * r2);
+ static Range * sym (uint32_t c)
+ {
+ return new Range (NULL, c, c + 1);
+ }
+ static Range * ran (uint32_t l, uint32_t u)
+ {
+ return new Range (NULL, l, u);
+ }
+ ~Range ()
+ {
+ vFreeList.erase (this);
+ }
+ Range * next () const { return nx; }
+ uint32_t lower () const { return lb; }
+ uint32_t upper () const { return ub; }
+ static Range * add (const Range * r1, const Range * r2);
+ static Range * sub (const Range * r1, const Range * r2);
private:
- Range (Range * n, uint32_t l, uint32_t u)
- : nx (n)
- , lb (l)
- , ub (u)
- {
- assert (lb < ub);
- vFreeList.insert (this);
- }
- static void append_overlapping (Range * & head, Range * & tail, const Range * r);
- static void append (Range ** & ptail, uint32_t l, uint32_t u);
+ Range (Range * n, uint32_t l, uint32_t u)
+ : nx (n)
+ , lb (l)
+ , ub (u)
+ {
+ assert (lb < ub);
+ vFreeList.insert (this);
+ }
+ static void append_overlapping (Range * & head, Range * & tail, const Range * r);
+ static void append (Range ** & ptail, uint32_t l, uint32_t u);
- // test addition and subtraction
- template <uint8_t> friend Range * re2c_test::range (uint32_t n);
+ // test addition and subtraction
+ template <uint8_t> friend Range * re2c_test::range (uint32_t n);
- FORBID_COPY (Range);
+ FORBID_COPY (Range);
};
} // namespace re2c
// returns false on overflow
bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number)
{
- uint64_t u = 0;
- for (; s != s_end; ++s)
- {
- u *= 10;
- u += static_cast<uint32_t> (*s) - 0x30;
- if (u >= std::numeric_limits<uint32_t>::max())
- {
- return false;
- }
- }
- number = static_cast<uint32_t> (u);
- return true;
+ uint64_t u = 0;
+ for (; s != s_end; ++s)
+ {
+ u *= 10;
+ u += static_cast<uint32_t> (*s) - 0x30;
+ if (u >= std::numeric_limits<uint32_t>::max())
+ {
+ return false;
+ }
+ }
+ number = static_cast<uint32_t> (u);
+ return true;
}
// assumes that string matches regexp "-"? [0-9]+
// returns false on underflow/overflow
bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number)
{
- int64_t i = 0;
- if (*s == '-')
- {
- ++s;
- for (; s != s_end; ++s)
- {
- i *= 10;
- i -= *s - 0x30;
- if (i < std::numeric_limits<int32_t>::min())
- {
- return false;
- }
- }
- }
- else
- {
- for (; s != s_end; ++s)
- {
- i *= 10;
- i += *s - 0x30;
- if (i > std::numeric_limits<int32_t>::max())
- {
- return false;
- }
- }
- }
- number = static_cast<int32_t> (i);
- return true;
+ int64_t i = 0;
+ if (*s == '-')
+ {
+ ++s;
+ for (; s != s_end; ++s)
+ {
+ i *= 10;
+ i -= *s - 0x30;
+ if (i < std::numeric_limits<int32_t>::min())
+ {
+ return false;
+ }
+ }
+ }
+ else
+ {
+ for (; s != s_end; ++s)
+ {
+ i *= 10;
+ i += *s - 0x30;
+ if (i > std::numeric_limits<int32_t>::max())
+ {
+ return false;
+ }
+ }
+ }
+ number = static_cast<int32_t> (i);
+ return true;
}
* Works ~20 times faster, than linux's glibc allocator :]
*/
template<uint32_t MAXIMUM_INLINE = 4 * 1024,
- uint32_t SLAB_SIZE = 1024 * 1024,
- size_t ALIGN = 1>
+ uint32_t SLAB_SIZE = 1024 * 1024,
+ size_t ALIGN = 1>
class slab_allocator_t
{
- typedef std::vector<char*> slabs_t;
+ typedef std::vector<char*> slabs_t;
- slabs_t slabs_; /* quasilist of allocated slabs of 'SLAB_SIZE' bytes */
- char *current_slab_;
- char *current_slab_end_;
+ slabs_t slabs_; /* quasilist of allocated slabs of 'SLAB_SIZE' bytes */
+ char *current_slab_;
+ char *current_slab_end_;
public:
- slab_allocator_t(): slabs_(), current_slab_(0), current_slab_end_(0) {}
+ slab_allocator_t(): slabs_(), current_slab_(0), current_slab_end_(0) {}
- ~slab_allocator_t() { std::for_each(slabs_.rbegin(), slabs_.rend(), free); }
+ ~slab_allocator_t() { std::for_each(slabs_.rbegin(), slabs_.rend(), free); }
- void *alloc(size_t size)
- {
- char *result;
+ void *alloc(size_t size)
+ {
+ char *result;
- /* alignment */
- size += ALIGN - size % ALIGN;
+ /* alignment */
+ size += ALIGN - size % ALIGN;
- /* very large objects */
- if (size > MAXIMUM_INLINE) {
- result = static_cast<char*>(malloc(size));
- slabs_.push_back(result);
- return result;
- }
+ /* very large objects */
+ if (size > MAXIMUM_INLINE) {
+ result = static_cast<char*>(malloc(size));
+ slabs_.push_back(result);
+ return result;
+ }
- /* no space in slab */
- const size_t yet_in_slab = static_cast<size_t>(current_slab_end_ - current_slab_);
- if (yet_in_slab < size) {
- current_slab_ = static_cast<char*>(malloc(SLAB_SIZE));
- current_slab_end_ = current_slab_ + SLAB_SIZE;
- slabs_.push_back(current_slab_);
- }
+ /* no space in slab */
+ const size_t yet_in_slab = static_cast<size_t>(current_slab_end_ - current_slab_);
+ if (yet_in_slab < size) {
+ current_slab_ = static_cast<char*>(malloc(SLAB_SIZE));
+ current_slab_end_ = current_slab_ + SLAB_SIZE;
+ slabs_.push_back(current_slab_);
+ }
- result = current_slab_;
- current_slab_ += size;
+ result = current_slab_;
+ current_slab_ += size;
- return result;
- }
+ return result;
+ }
- template<typename data_t>
- inline data_t *alloct(size_t n)
- {
- return static_cast<data_t*>(alloc(n * sizeof(data_t)));
- }
+ template<typename data_t>
+ inline data_t *alloct(size_t n)
+ {
+ return static_cast<data_t*>(alloc(n * sizeof(data_t)));
+ }
- FORBID_COPY(slab_allocator_t);
+ FORBID_COPY(slab_allocator_t);
};
#endif // _RE2C_UTIL_SLAB_ALLOCATOR_
namespace re2c
{
- template <class T>
- class smart_ptr
- {
- private:
- T* ptr;
- long* count; // shared number of owners
+ template <class T>
+ class smart_ptr
+ {
+ private:
+ T* ptr;
+ long* count; // shared number of owners
- public:
- explicit smart_ptr (T* p=0)
- : ptr(p), count(new long(1)) {}
+ public:
+ explicit smart_ptr (T* p=0)
+ : ptr(p), count(new long(1)) {}
- smart_ptr (const smart_ptr<T>& p) throw()
- : ptr(p.ptr), count(p.count)
- {
- ++*count;
- }
+ smart_ptr (const smart_ptr<T>& p) throw()
+ : ptr(p.ptr), count(p.count)
+ {
+ ++*count;
+ }
- ~smart_ptr ()
- {
- dispose();
- }
+ ~smart_ptr ()
+ {
+ dispose();
+ }
- smart_ptr<T>& operator= (const smart_ptr<T>& p)
- {
- if (this != &p)
- {
- dispose();
- ptr = p.ptr;
- count = p.count;
- ++*count;
- }
- return *this;
- }
+ smart_ptr<T>& operator= (const smart_ptr<T>& p)
+ {
+ if (this != &p)
+ {
+ dispose();
+ ptr = p.ptr;
+ count = p.count;
+ ++*count;
+ }
+ return *this;
+ }
- T& operator*() const
- {
- return *ptr;
- }
+ T& operator*() const
+ {
+ return *ptr;
+ }
- T* operator->() const
- {
- return ptr;
- }
+ T* operator->() const
+ {
+ return ptr;
+ }
- private:
- void dispose()
- {
- if (--*count == 0)
- {
- delete count;
- delete ptr;
- }
- }
- };
+ private:
+ void dispose()
+ {
+ if (--*count == 0)
+ {
+ delete count;
+ delete ptr;
+ }
+ }
+ };
- template <typename T>
- smart_ptr<T> make_smart_ptr(T* p)
- {
- return smart_ptr<T>(p);
- }
+ template <typename T>
+ smart_ptr<T> make_smart_ptr(T* p)
+ {
+ return smart_ptr<T>(p);
+ }
}
#endif // _RE2C_UTIL_SMART_PTR_
{
template<typename type_t> void strrreplace(
- std::string &s,
- const std::string &s1,
- const type_t &v)
+ std::string &s,
+ const std::string &s1,
+ const type_t &v)
{
- std::ostringstream sv;
- sv << v;
- const std::string &s2 = sv.str();
- const size_t l1 = s1.length(), l2 = s2.length();
- if (l1 != 0) {
- std::string::size_type pos = s.find(s1);
- while (pos != std::string::npos) {
- s.replace(pos, l1, s2);
- pos = s.find(s1, pos + l2);
- }
- }
+ std::ostringstream sv;
+ sv << v;
+ const std::string &s2 = sv.str();
+ const size_t l1 = s1.length(), l2 = s2.length();
+ if (l1 != 0) {
+ std::string::size_type pos = s.find(s1);
+ while (pos != std::string::npos) {
+ s.replace(pos, l1, s2);
+ pos = s.find(s1, pos + l2);
+ }
+ }
}
template<typename T>
static std::string to_string(const T &v)
{
- std::ostringstream s;
- s << v;
- return s.str();
+ std::ostringstream s;
+ s << v;
+ return s.str();
}
} // namespace re2c
template<uint32_t LIMIT>
class u32lim_t
{
- uint32_t value;
- explicit u32lim_t (uint32_t x)
- : value (x < LIMIT ? x : LIMIT)
- {}
- explicit u32lim_t (uint64_t x)
- : value (x < LIMIT ? static_cast<uint32_t> (x) : LIMIT)
- {}
+ uint32_t value;
+ explicit u32lim_t (uint32_t x)
+ : value (x < LIMIT ? x : LIMIT)
+ {}
+ explicit u32lim_t (uint64_t x)
+ : value (x < LIMIT ? static_cast<uint32_t> (x) : LIMIT)
+ {}
public:
- // implicit conversion is forbidden, because
- // operands should be converted before operation:
- // uint32_t x, y; ... u32lim_t z = x + y;
- // will result in 32-bit addition and may overflow
- // Don't export overloaded constructors: it breaks OS X builds
- // ('size_t' causes resolution ambiguity)
- static u32lim_t from32 (uint32_t x) { return u32lim_t(x); }
- static u32lim_t from64 (uint64_t x) { return u32lim_t(x); }
+ // implicit conversion is forbidden, because
+ // operands should be converted before operation:
+ // uint32_t x, y; ... u32lim_t z = x + y;
+ // will result in 32-bit addition and may overflow
+ // Don't export overloaded constructors: it breaks OS X builds
+ // ('size_t' causes resolution ambiguity)
+ static u32lim_t from32 (uint32_t x) { return u32lim_t(x); }
+ static u32lim_t from64 (uint64_t x) { return u32lim_t(x); }
- static u32lim_t limit ()
- {
- return u32lim_t (LIMIT);
- }
+ static u32lim_t limit ()
+ {
+ return u32lim_t (LIMIT);
+ }
- uint32_t uint32 () const
- {
- return value;
- }
+ uint32_t uint32 () const
+ {
+ return value;
+ }
- bool overflow () const
- {
- return value == LIMIT;
- }
+ bool overflow () const
+ {
+ return value == LIMIT;
+ }
- friend u32lim_t operator + (u32lim_t x, u32lim_t y)
- {
- const uint64_t z
- = static_cast<uint64_t> (x.value)
- + static_cast<uint64_t> (y.value);
- return z < LIMIT
- ? u32lim_t (z)
- : u32lim_t (LIMIT);
- }
+ friend u32lim_t operator + (u32lim_t x, u32lim_t y)
+ {
+ const uint64_t z
+ = static_cast<uint64_t> (x.value)
+ + static_cast<uint64_t> (y.value);
+ return z < LIMIT
+ ? u32lim_t (z)
+ : u32lim_t (LIMIT);
+ }
- friend u32lim_t operator * (u32lim_t x, u32lim_t y)
- {
- const uint64_t z
- = static_cast<uint64_t> (x.value)
- * static_cast<uint64_t> (y.value);
- return z < LIMIT
- ? u32lim_t (z)
- : u32lim_t (LIMIT);
- }
+ friend u32lim_t operator * (u32lim_t x, u32lim_t y)
+ {
+ const uint64_t z
+ = static_cast<uint64_t> (x.value)
+ * static_cast<uint64_t> (y.value);
+ return z < LIMIT
+ ? u32lim_t (z)
+ : u32lim_t (LIMIT);
+ }
- friend bool operator < (u32lim_t x, u32lim_t y)
- {
- return x.value < y.value;
- }
+ friend bool operator < (u32lim_t x, u32lim_t y)
+ {
+ return x.value < y.value;
+ }
};
#endif // _RE2C_UTIL_U32LIM_
template <typename value_t>
class uniq_vector_t
{
- typedef std::vector<value_t> elems_t;
- elems_t elems;
+ typedef std::vector<value_t> elems_t;
+ elems_t elems;
public:
- uniq_vector_t ()
- : elems ()
- {}
- size_t size () const
- {
- return elems.size ();
- }
- const value_t & operator [] (size_t i) const
- {
- return elems[i];
- }
- size_t find_or_add (const value_t & v)
- {
- const size_t size = elems.size ();
- for (size_t i = 0; i < size; ++i)
- {
- if (elems[i] == v)
- {
- return i;
- }
- }
- elems.push_back (v);
- return size;
- }
+ uniq_vector_t ()
+ : elems ()
+ {}
+ size_t size () const
+ {
+ return elems.size ();
+ }
+ const value_t & operator [] (size_t i) const
+ {
+ return elems[i];
+ }
+ size_t find_or_add (const value_t & v)
+ {
+ const size_t size = elems.size ();
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (elems[i] == v)
+ {
+ return i;
+ }
+ }
+ elems.push_back (v);
+ return size;
+ }
};
} // namespace re2c
template<typename container_t>
class wrap_citer_t
{
- typedef typename container_t::const_iterator citer_t;
- typedef const typename container_t::value_type* cpval_t;
+ typedef typename container_t::const_iterator citer_t;
+ typedef const typename container_t::value_type* cpval_t;
- const citer_t beg;
- const citer_t end;
- citer_t cur;
+ const citer_t beg;
+ const citer_t end;
+ citer_t cur;
public:
- explicit wrap_citer_t(const container_t &c): beg(c.begin()), end(c.end()), cur(beg) {}
- wrap_citer_t& operator++() { if (++cur == end) cur = beg; return *this; }
- cpval_t operator->() const { return cur.operator->(); }
+ explicit wrap_citer_t(const container_t &c): beg(c.begin()), end(c.end()), cur(beg) {}
+ wrap_citer_t& operator++() { if (++cur == end) cur = beg; return *this; }
+ cpval_t operator->() const { return cur.operator->(); }
};
} // namespace re2c