From: Ulya Trofimovich Date: Mon, 13 Aug 2018 21:49:44 +0000 (+0100) Subject: Converted tabs to spaces. X-Git-Tag: 1.1~9 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cc1ae4e877f063593c5833b7178d91ae1bd89228;p=re2c Converted tabs to spaces. --- diff --git a/re2c/src/adfa/action.h b/re2c/src/adfa/action.h index 5e7762be..7671499d 100644 --- a/re2c/src/adfa/action.h +++ b/re2c/src/adfa/action.h @@ -17,15 +17,15 @@ struct State; struct Initial { - static const size_t NOSAVE; + static const size_t NOSAVE; - label_t label; - size_t save; + label_t label; + size_t save; - inline Initial (label_t l, size_t s) - : label (l) - , save (s) - {} + inline Initial (label_t l, size_t s) + : label (l) + , save (s) + {} }; typedef uniq_vector_t > accept_t; @@ -33,74 +33,74 @@ typedef uniq_vector_t > accept_t; class Action { public: - enum type_t - { - MATCH, - INITIAL, - SAVE, - MOVE, - ACCEPT, - RULE - } type; - union - { - Initial * initial; - size_t save; - const accept_t * accepts; - size_t rule; - } info; + enum type_t + { + MATCH, + INITIAL, + SAVE, + MOVE, + ACCEPT, + RULE + } type; + union + { + Initial * initial; + size_t save; + const accept_t * accepts; + size_t rule; + } info; public: - inline Action () - : type (MATCH) - , info () - {} - ~Action () - { - if (type == INITIAL) { - delete info.initial; - } - } - void set_initial (label_t label) - { - if (type == MATCH) { - // ordinary state with no special action - type = INITIAL; - info.initial = new Initial(label, Initial::NOSAVE); - } else if (type == SAVE) { - // fallback state: do not loose 'yyaccept' - type = INITIAL; - info.initial = new Initial(label, info.save); - } else if (type == INITIAL) { - // already marked as initial, probably reuse mode - info.initial->label = label; - } else { - assert(false); - } - } - void set_save (size_t save) - { - assert(type == MATCH); - type = SAVE; - info.save = save; - } - void set_move () - { - assert(type == MATCH); - type = MOVE; - } - void set_accept (const accept_t * accepts) - { - assert(type == MATCH); - type = ACCEPT; - info.accepts = accepts; - } - void set_rule (size_t rule) - { - assert(type == MATCH); - type = RULE; - info.rule = rule; - } + inline Action () + : type (MATCH) + , info () + {} + ~Action () + { + if (type == INITIAL) { + delete info.initial; + } + } + void set_initial (label_t label) + { + if (type == MATCH) { + // ordinary state with no special action + type = INITIAL; + info.initial = new Initial(label, Initial::NOSAVE); + } else if (type == SAVE) { + // fallback state: do not loose 'yyaccept' + type = INITIAL; + info.initial = new Initial(label, info.save); + } else if (type == INITIAL) { + // already marked as initial, probably reuse mode + info.initial->label = label; + } else { + assert(false); + } + } + void set_save (size_t save) + { + assert(type == MATCH); + type = SAVE; + info.save = save; + } + void set_move () + { + assert(type == MATCH); + type = MOVE; + } + void set_accept (const accept_t * accepts) + { + assert(type == MATCH); + type = ACCEPT; + info.accepts = accepts; + } + void set_rule (size_t rule) + { + assert(type == MATCH); + type = RULE; + info.rule = rule; + } }; } // namespace re2c diff --git a/re2c/src/adfa/adfa.cc b/re2c/src/adfa/adfa.cc index 99147e23..4ae81005 100644 --- a/re2c/src/adfa/adfa.cc +++ b/re2c/src/adfa/adfa.cc @@ -17,100 +17,100 @@ namespace re2c const size_t Initial::NOSAVE = std::numeric_limits::max(); DFA::DFA - ( const dfa_t &dfa - , const std::vector &fill - , size_t def - , size_t key - , const std::string &nm - , const std::string &cn - , uint32_t ln - , const std::string &su - ) - : accepts () - , name (nm) - , cond (cn) - , line (ln) - , lbChar(0) - , ubChar(dfa.charset.back()) - , nStates(0) - , head(NULL) - , tags0(dfa.tcid0) - , charset(dfa.charset) - , rules(dfa.rules) - , tags(dfa.tags) - , mtagvers(dfa.mtagvers) - , finvers(dfa.finvers) - , tcpool(dfa.tcpool) - , max_fill (0) - , max_nmatch(0) - , need_backup (false) - , need_accept (false) - , oldstyle_ctxmarker (false) - , maxtagver (dfa.maxtagver) - , def_rule (def) - , key_size (key) - , bitmaps (std::min(ubChar, 256u)) - , setup(su) + ( const dfa_t &dfa + , const std::vector &fill + , size_t def + , size_t key + , const std::string &nm + , const std::string &cn + , uint32_t ln + , const std::string &su + ) + : accepts () + , name (nm) + , cond (cn) + , line (ln) + , lbChar(0) + , ubChar(dfa.charset.back()) + , nStates(0) + , head(NULL) + , tags0(dfa.tcid0) + , charset(dfa.charset) + , rules(dfa.rules) + , tags(dfa.tags) + , mtagvers(dfa.mtagvers) + , finvers(dfa.finvers) + , tcpool(dfa.tcpool) + , max_fill (0) + , max_nmatch(0) + , need_backup (false) + , need_accept (false) + , oldstyle_ctxmarker (false) + , maxtagver (dfa.maxtagver) + , def_rule (def) + , key_size (key) + , bitmaps (std::min(ubChar, 256u)) + , setup(su) { - const size_t nstates = dfa.states.size(); - const size_t nchars = dfa.nchars; - - State **i2s = new State*[nstates]; - for (size_t i = 0; i < nstates; ++i) - { - i2s[i] = new State; - } - - State **p = &head; - for (size_t i = 0; i < nstates; ++i) - { - dfa_state_t *t = dfa.states[i]; - State *s = i2s[i]; - - ++nStates; - *p = s; - p = &s->next; - - s->rule = t->rule; - s->rule_tags = t->tcid[dfa.nchars]; - s->fall_tags = t->tcid[dfa.nchars + 1]; - s->fill = fill[i]; - s->fallback = t->fallback; // see note [fallback states] - - s->go.span = allocate(nchars); - uint32_t j = 0; - for (uint32_t c = 0; c < nchars; ++j) - { - const size_t to = t->arcs[c]; - const tcid_t tc = t->tcid[c]; - for (;++c < nchars && t->arcs[c] == to && t->tcid[c] == tc;); - s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; - s->go.span[j].ub = charset[c]; - s->go.span[j].tags = tc; - } - s->go.nSpans = j; - } - *p = NULL; - - delete[] i2s; + const size_t nstates = dfa.states.size(); + const size_t nchars = dfa.nchars; + + State **i2s = new State*[nstates]; + for (size_t i = 0; i < nstates; ++i) + { + i2s[i] = new State; + } + + State **p = &head; + for (size_t i = 0; i < nstates; ++i) + { + dfa_state_t *t = dfa.states[i]; + State *s = i2s[i]; + + ++nStates; + *p = s; + p = &s->next; + + s->rule = t->rule; + s->rule_tags = t->tcid[dfa.nchars]; + s->fall_tags = t->tcid[dfa.nchars + 1]; + s->fill = fill[i]; + s->fallback = t->fallback; // see note [fallback states] + + s->go.span = allocate(nchars); + uint32_t j = 0; + for (uint32_t c = 0; c < nchars; ++j) + { + const size_t to = t->arcs[c]; + const tcid_t tc = t->tcid[c]; + for (;++c < nchars && t->arcs[c] == to && t->tcid[c] == tc;); + s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; + s->go.span[j].ub = charset[c]; + s->go.span[j].tags = tc; + } + s->go.nSpans = j; + } + *p = NULL; + + delete[] i2s; } DFA::~DFA() { - State *s; - - while ((s = head)) - { - head = s->next; - delete s; - } - - delete &charset; - delete &rules; - delete &tags; - delete &mtagvers; - delete[] finvers; - delete &tcpool; + State *s; + + while ((s = head)) + { + head = s->next; + delete s; + } + + delete &charset; + delete &rules; + delete &tags; + delete &mtagvers; + delete[] finvers; + delete &tcpool; } /* note [reordering DFA states] @@ -133,44 +133,44 @@ DFA::~DFA() */ void DFA::reorder() { - std::vector ord; - ord.reserve(nStates); - - std::queue todo; - todo.push(head); - - std::set done; - done.insert(head); - - for(;!todo.empty();) - { - State *s = todo.front(); - todo.pop(); - ord.push_back(s); - for(uint32_t i = 0; i < s->go.nSpans; ++i) - { - State *q = s->go.span[i].to; - if(q && done.insert(q).second) - { - todo.push(q); - } - } - } - - assert(nStates == ord.size()); - - ord.push_back(NULL); - for(uint32_t i = 0; i < nStates; ++i) - { - ord[i]->next = ord[i + 1]; - } + std::vector ord; + ord.reserve(nStates); + + std::queue todo; + todo.push(head); + + std::set done; + done.insert(head); + + for(;!todo.empty();) + { + State *s = todo.front(); + todo.pop(); + ord.push_back(s); + for(uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *q = s->go.span[i].to; + if(q && done.insert(q).second) + { + todo.push(q); + } + } + } + + assert(nStates == ord.size()); + + ord.push_back(NULL); + for(uint32_t i = 0; i < nStates; ++i) + { + ord[i]->next = ord[i + 1]; + } } void DFA::addState(State *s, State *next) { - ++nStates; - s->next = next->next; - next->next = s; + ++nStates; + s->next = next->next; + next->next = s; } } // namespace re2c diff --git a/re2c/src/adfa/adfa.h b/re2c/src/adfa/adfa.h index 48259dd3..e3a425a9 100644 --- a/re2c/src/adfa/adfa.h +++ b/re2c/src/adfa/adfa.h @@ -27,92 +27,92 @@ struct dfa_t; struct State { - label_t label; - State * next; - size_t fill; - bool fallback; + label_t label; + State * next; + size_t fill; + bool fallback; - size_t rule; - tcid_t rule_tags; - tcid_t fall_tags; - bool isBase; - Go go; - Action action; + size_t rule; + tcid_t rule_tags; + tcid_t fall_tags; + bool isBase; + Go go; + Action action; - State () - : label (label_t::first ()) - , next (0) - , fill (0) - , fallback (false) - , rule (Rule::NONE) - , rule_tags (TCID0) - , fall_tags (TCID0) - , isBase (false) - , go () - , action () - {} - ~State () - { - operator delete (go.span); - } + State () + : label (label_t::first ()) + , next (0) + , fill (0) + , fallback (false) + , rule (Rule::NONE) + , rule_tags (TCID0) + , fall_tags (TCID0) + , isBase (false) + , go () + , action () + {} + ~State () + { + operator delete (go.span); + } - FORBID_COPY (State); + FORBID_COPY (State); }; struct DFA { - accept_t accepts; - const std::string name; - const std::string cond; - const uint32_t line; - uint32_t lbChar; - uint32_t ubChar; - uint32_t nStates; - State * head; - const tcid_t tags0; - std::vector &charset; - std::valarray &rules; - std::vector &tags; - std::set &mtagvers; - const tagver_t *finvers; - tcpool_t &tcpool; - size_t max_fill; - size_t max_nmatch; - bool need_backup; - bool need_accept; - bool oldstyle_ctxmarker; - tagver_t maxtagver; - const size_t def_rule; - const size_t key_size; - bitmaps_t bitmaps; - std::string setup; + accept_t accepts; + const std::string name; + const std::string cond; + const uint32_t line; + uint32_t lbChar; + uint32_t ubChar; + uint32_t nStates; + State * head; + const tcid_t tags0; + std::vector &charset; + std::valarray &rules; + std::vector &tags; + std::set &mtagvers; + const tagver_t *finvers; + tcpool_t &tcpool; + size_t max_fill; + size_t max_nmatch; + bool need_backup; + bool need_accept; + bool oldstyle_ctxmarker; + tagver_t maxtagver; + const size_t def_rule; + const size_t key_size; + bitmaps_t bitmaps; + std::string setup; - DFA ( const dfa_t &dfa - , const std::vector &fill - , size_t def - , size_t key - , const std::string &nm - , const std::string &cn - , uint32_t ln - , const std::string &su - ); - ~DFA (); - void reorder(); - void prepare(const opt_t *opts); - void calc_stats(uint32_t ln, bool explicit_tags); - void emit (Output &, uint32_t &, bool, bool &); + DFA ( const dfa_t &dfa + , const std::vector &fill + , size_t def + , size_t key + , const std::string &nm + , const std::string &cn + , uint32_t ln + , const std::string &su + ); + ~DFA (); + void reorder(); + void prepare(const opt_t *opts); + void calc_stats(uint32_t ln, bool explicit_tags); + void emit (Output &, uint32_t &, bool, bool &); private: - void addState(State*, State *); - void split (State *); - void findBaseState (); - void hoist_tags(); - void hoist_tags_and_skip(const opt_t *opts); - void count_used_labels(std::set &used, label_t start, label_t initial, bool force_start, bool fFlag) const; - void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; - void emit_dot(OutputFile &o, bool last_cond) const; + void addState(State*, State *); + void split (State *); + void findBaseState (); + void hoist_tags(); + void hoist_tags_and_skip(const opt_t *opts); + void count_used_labels(std::set &used, label_t start, label_t initial, bool force_start, bool fFlag) const; + void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; + void emit_dot(OutputFile &o, bool last_cond) const; - FORBID_COPY (DFA); + FORBID_COPY (DFA); }; } // namespace re2c diff --git a/re2c/src/adfa/dump.cc b/re2c/src/adfa/dump.cc index fd7c70e4..771c3561 100644 --- a/re2c/src/adfa/dump.cc +++ b/re2c/src/adfa/dump.cc @@ -19,83 +19,83 @@ namespace re2c static void dump_adfa_range(uint32_t lower, uint32_t upper) { - fprintf(stderr, "%u", lower); - if (--upper > lower) { - fprintf(stderr, "-%u", upper); - } + fprintf(stderr, "%u", lower); + if (--upper > lower) { + fprintf(stderr, "-%u", upper); + } } void dump_adfa(const DFA &dfa) { - fprintf(stderr, - "digraph DFA {\n" - " rankdir=LR\n" - " node[shape=Mrecord fontname=fixed]\n" - " edge[arrowhead=vee fontname=fixed]\n\n"); + fprintf(stderr, + "digraph DFA {\n" + " rankdir=LR\n" + " node[shape=Mrecord fontname=fixed]\n" + " edge[arrowhead=vee fontname=fixed]\n\n"); - fprintf(stderr, - " n [shape=point]" - " n -> n%p [style=dotted label=\"", (void*)dfa.head); - dump_tcmd(dfa.tcpool[dfa.tags0]); - fprintf(stderr, "\"]\n"); + fprintf(stderr, + " n [shape=point]" + " n -> n%p [style=dotted label=\"", (void*)dfa.head); + dump_tcmd(dfa.tcpool[dfa.tags0]); + fprintf(stderr, "\"]\n"); - for (const State *s = dfa.head; s; s = s->next) { - const char *attr; - Action::type_t action = s->action.type; + for (const State *s = dfa.head; s; s = s->next) { + const char *attr; + Action::type_t action = s->action.type; - if (action == Action::ACCEPT) { - attr = "style=filled fillcolor=gray"; - } else if (action == Action::RULE) { - attr = "style=filled fillcolor=lightgray"; - } else { - attr = ""; - } - fprintf(stderr, " n%p [height=0.2 width=0.2 label=\"", (void*)s); - if (s->fill && action != Action::MOVE) { - fprintf(stderr, "F(%u) ", (uint32_t)s->fill); - } - if (action == Action::RULE) { - const Rule &r = dfa.rules[s->action.info.rule]; - for (size_t t = r.ltag; t < r.htag; ++t) { - if (t > r.ltag) fprintf(stderr, " "); - const std::string *name = dfa.tags[t].name; - fprintf(stderr, "%s(%d)", - name ? name->c_str() : "/", dfa.finvers[t]); - } - } - dump_tcmd(dfa.tcpool[s->go.tags]); - fprintf(stderr, "\" %s]\n", attr); + if (action == Action::ACCEPT) { + attr = "style=filled fillcolor=gray"; + } else if (action == Action::RULE) { + attr = "style=filled fillcolor=lightgray"; + } else { + attr = ""; + } + fprintf(stderr, " n%p [height=0.2 width=0.2 label=\"", (void*)s); + if (s->fill && action != Action::MOVE) { + fprintf(stderr, "F(%u) ", (uint32_t)s->fill); + } + if (action == Action::RULE) { + const Rule &r = dfa.rules[s->action.info.rule]; + for (size_t t = r.ltag; t < r.htag; ++t) { + if (t > r.ltag) fprintf(stderr, " "); + const std::string *name = dfa.tags[t].name; + fprintf(stderr, "%s(%d)", + name ? name->c_str() : "/", dfa.finvers[t]); + } + } + dump_tcmd(dfa.tcpool[s->go.tags]); + fprintf(stderr, "\" %s]\n", attr); - if (action == Action::ACCEPT) { - const accept_t &accept = *s->action.info.accepts; - for (uint32_t i = 0; i < accept.size(); ++i) { - fprintf(stderr, " n%p -> n%p [label=\"", - (void*)s, (void*)accept[i].first); - dump_tcmd(dfa.tcpool[accept[i].second]); - fprintf(stderr, "\" style=dotted]\n"); - } - } + if (action == Action::ACCEPT) { + const accept_t &accept = *s->action.info.accepts; + for (uint32_t i = 0; i < accept.size(); ++i) { + fprintf(stderr, " n%p -> n%p [label=\"", + (void*)s, (void*)accept[i].first); + dump_tcmd(dfa.tcpool[accept[i].second]); + fprintf(stderr, "\" style=dotted]\n"); + } + } - const Span *x = s->go.span, *e = x + s->go.nSpans; - for (uint32_t lb = 0; x < e; lb = x->ub, ++x) { - if (!x->to) continue; + const Span *x = s->go.span, *e = x + s->go.nSpans; + for (uint32_t lb = 0; x < e; lb = x->ub, ++x) { + if (!x->to) continue; - bool eat = true; - const Action::type_t act = x->to->action.type; - if (act == Action::MOVE || act == Action::RULE) { - attr = "style=dotted"; - eat = false; - } else { - attr = ""; - } - fprintf(stderr, " n%p -> n%p [label=\"", (void*)s, (void*)x->to); - if (eat) dump_adfa_range(lb, x->ub); - dump_tcmd(dfa.tcpool[x->tags]); - fprintf(stderr, "\" %s]\n", attr); - } - } + bool eat = true; + const Action::type_t act = x->to->action.type; + if (act == Action::MOVE || act == Action::RULE) { + attr = "style=dotted"; + eat = false; + } else { + attr = ""; + } + fprintf(stderr, " n%p -> n%p [label=\"", (void*)s, (void*)x->to); + if (eat) dump_adfa_range(lb, x->ub); + dump_tcmd(dfa.tcpool[x->tags]); + fprintf(stderr, "\" %s]\n", attr); + } + } - fprintf(stderr, "}\n"); + fprintf(stderr, "}\n"); } } // namespace re2c diff --git a/re2c/src/adfa/prepare.cc b/re2c/src/adfa/prepare.cc index 362308d1..a9032d67 100644 --- a/re2c/src/adfa/prepare.cc +++ b/re2c/src/adfa/prepare.cc @@ -20,90 +20,90 @@ namespace re2c { void DFA::split(State *s) { - State *move = new State; - addState(move, s); - move->action.set_move (); - move->rule = s->rule; - move->fill = s->fill; /* used by tunneling, ignored by codegen */ - move->go = s->go; - move->go.tags = TCID0; /* drop hoisted tags */ - move->rule_tags = s->rule_tags; - move->fall_tags = s->fall_tags; - s->rule = Rule::NONE; - s->go.nSpans = 1; - s->go.span = allocate (1); - s->go.span[0].ub = ubChar; - s->go.span[0].to = move; - s->go.span[0].tags = TCID0; + State *move = new State; + addState(move, s); + move->action.set_move (); + move->rule = s->rule; + move->fill = s->fill; /* used by tunneling, ignored by codegen */ + move->go = s->go; + move->go.tags = TCID0; /* drop hoisted tags */ + move->rule_tags = s->rule_tags; + move->fall_tags = s->fall_tags; + s->rule = Rule::NONE; + s->go.nSpans = 1; + s->go.span = allocate (1); + s->go.span[0].ub = ubChar; + s->go.span[0].to = move; + s->go.span[0].tags = TCID0; } static uint32_t merge(Span *x, State *fg, State *bg) { - Span *f = fg->go.span; - Span *b = bg->go.span; - Span *const fe = f + fg->go.nSpans; - Span *const be = b + bg->go.nSpans; - Span *const x0 = x; - - for (;!(f == fe && b == be);) { - if (f->to == b->to && f->tags == b->tags) { - x->to = bg; - x->tags = TCID0; - } else { - x->to = f->to; - x->tags = f->tags; - } - if (x == x0 - || x[-1].to != x->to - || x[-1].tags != x->tags) { - ++x; - } - x[-1].ub = std::min(f->ub, b->ub); - - if (f->ub < b->ub) { - ++f; - } else if (f->ub > b->ub) { - ++b; - } else { - ++f; - ++b; - } - } - - return static_cast(x - x0); + Span *f = fg->go.span; + Span *b = bg->go.span; + Span *const fe = f + fg->go.nSpans; + Span *const be = b + bg->go.nSpans; + Span *const x0 = x; + + for (;!(f == fe && b == be);) { + if (f->to == b->to && f->tags == b->tags) { + x->to = bg; + x->tags = TCID0; + } else { + x->to = f->to; + x->tags = f->tags; + } + if (x == x0 + || x[-1].to != x->to + || x[-1].tags != x->tags) { + ++x; + } + x[-1].ub = std::min(f->ub, b->ub); + + if (f->ub < b->ub) { + ++f; + } else if (f->ub > b->ub) { + ++b; + } else { + ++f; + ++b; + } + } + + return static_cast(x - x0); } void DFA::findBaseState() { - Span *span = allocate (ubChar - lbChar); - - for (State *s = head; s; s = s->next) - { - if (s->fill == 0) - { - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - State *to = s->go.span[i].to; - - if (to->isBase) - { - to = to->go.span[0].to; - uint32_t nSpans = merge(span, s, to); - - if (nSpans < s->go.nSpans) - { - operator delete (s->go.span); - s->go.nSpans = nSpans; - s->go.span = allocate (nSpans); - memcpy(s->go.span, span, nSpans*sizeof(Span)); - break; - } - } - } - } - } - - operator delete (span); + Span *span = allocate (ubChar - lbChar); + + for (State *s = head; s; s = s->next) + { + if (s->fill == 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *to = s->go.span[i].to; + + if (to->isBase) + { + to = to->go.span[0].to; + uint32_t nSpans = merge(span, s, to); + + if (nSpans < s->go.nSpans) + { + operator delete (s->go.span); + s->go.nSpans = nSpans; + s->go.span = allocate (nSpans); + memcpy(s->go.span, span, nSpans*sizeof(Span)); + break; + } + } + } + } + } + + operator delete (span); } /* note [tag hoisting, skip hoisting and tunneling] @@ -141,208 +141,208 @@ void DFA::findBaseState() void DFA::prepare(const opt_t *opts) { - // create rule states - std::vector rule2state(rules.size()); - for (State *s = head; s; s = s->next) { - if (s->rule != Rule::NONE) { - if (!rule2state[s->rule]) { - State *n = new State; - n->action.set_rule(s->rule); - rule2state[s->rule] = n; - addState(n, s); - } - for (uint32_t i = 0; i < s->go.nSpans; ++i) { - if (!s->go.span[i].to) { - s->go.span[i].to = rule2state[s->rule]; - s->go.span[i].tags = s->rule_tags; - } - } - } - } - - // create default state (if needed) - State * default_state = NULL; - for (State * s = head; s; s = s->next) - { - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - if (!s->go.span[i].to) - { - if (!default_state) - { - default_state = new State; - addState(default_state, s); - } - s->go.span[i].to = default_state; - } - } - } - - // bind save actions to fallback states and create accept state (if needed) - if (default_state) { - for (State *s = head; s; s = s->next) { - if (s->fallback) { - const std::pair acc(rule2state[s->rule], s->fall_tags); - s->action.set_save(accepts.find_or_add(acc)); - } - } - default_state->action.set_accept(&accepts); - } - - // tag hoisting should be done after binding default arcs: - // (which may introduce new tags) - // see note [tag hoisting, skip hoisting and tunneling] - if (!opts->eager_skip) { - hoist_tags(); - } - - // split ``base'' states into two parts - for (State * s = head; s; s = s->next) - { - s->isBase = false; - - if (s->fill != 0) - { - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - if (s->go.span[i].to == s) - { - s->isBase = true; - split(s); - - if (opts->bFlag) { - bitmaps.insert(&s->next->go, s); - } - - s = s->next; - break; - } - } - } - } - // find ``base'' state, if possible - findBaseState(); - - // see note [tag hoisting, skip hoisting and tunneling] - if (opts->eager_skip) { - hoist_tags_and_skip(opts); - } - - for (State *s = head; s; s = s->next) { - s->go.init(s, opts, bitmaps); - } + // create rule states + std::vector rule2state(rules.size()); + for (State *s = head; s; s = s->next) { + if (s->rule != Rule::NONE) { + if (!rule2state[s->rule]) { + State *n = new State; + n->action.set_rule(s->rule); + rule2state[s->rule] = n; + addState(n, s); + } + for (uint32_t i = 0; i < s->go.nSpans; ++i) { + if (!s->go.span[i].to) { + s->go.span[i].to = rule2state[s->rule]; + s->go.span[i].tags = s->rule_tags; + } + } + } + } + + // create default state (if needed) + State * default_state = NULL; + for (State * s = head; s; s = s->next) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + if (!default_state) + { + default_state = new State; + addState(default_state, s); + } + s->go.span[i].to = default_state; + } + } + } + + // bind save actions to fallback states and create accept state (if needed) + if (default_state) { + for (State *s = head; s; s = s->next) { + if (s->fallback) { + const std::pair acc(rule2state[s->rule], s->fall_tags); + s->action.set_save(accepts.find_or_add(acc)); + } + } + default_state->action.set_accept(&accepts); + } + + // tag hoisting should be done after binding default arcs: + // (which may introduce new tags) + // see note [tag hoisting, skip hoisting and tunneling] + if (!opts->eager_skip) { + hoist_tags(); + } + + // split ``base'' states into two parts + for (State * s = head; s; s = s->next) + { + s->isBase = false; + + if (s->fill != 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (s->go.span[i].to == s) + { + s->isBase = true; + split(s); + + if (opts->bFlag) { + bitmaps.insert(&s->next->go, s); + } + + s = s->next; + break; + } + } + } + } + // find ``base'' state, if possible + findBaseState(); + + // see note [tag hoisting, skip hoisting and tunneling] + if (opts->eager_skip) { + hoist_tags_and_skip(opts); + } + + for (State *s = head; s; s = s->next) { + s->go.init(s, opts, bitmaps); + } } void DFA::calc_stats(uint32_t ln, bool explicit_tags) { - // calculate 'YYMAXFILL' - max_fill = 0; - for (State * s = head; s; s = s->next) - { - if (max_fill < s->fill) - { - max_fill = s->fill; - } - } - - // calculate 'YYMAXNMATCH' - max_nmatch = 0; - const size_t nrule = rules.size(); - for (size_t i = 0; i < nrule; ++i) { - max_nmatch = std::max(max_nmatch, rules[i].ncap); - } - - // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used - need_backup = accepts.size () > 0; - - // determine if 'yyaccept' variable is used - need_accept = accepts.size () > 1; - - // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used - // If tags are not enabled explicitely and trailing contexts - // don't overlap (single variable is enough for all of them), then - // re2c should use old-style YYCTXMARKER for backwards compatibility. - // Note that with generic API fixed-length contexts are forbidden, - // which may cause additional overlaps. - oldstyle_ctxmarker = !explicit_tags && maxtagver == 1; - - // error if tags are not enabled, but we need them - if (!explicit_tags && maxtagver > 1) { - fatal_l(ln, "overlapping trailing contexts need " - "multiple context markers, use '-t, --tags' " - "option and '/*!stags:re2c ... */' directive"); - } + // calculate 'YYMAXFILL' + max_fill = 0; + for (State * s = head; s; s = s->next) + { + if (max_fill < s->fill) + { + max_fill = s->fill; + } + } + + // calculate 'YYMAXNMATCH' + max_nmatch = 0; + const size_t nrule = rules.size(); + for (size_t i = 0; i < nrule; ++i) { + max_nmatch = std::max(max_nmatch, rules[i].ncap); + } + + // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used + need_backup = accepts.size () > 0; + + // determine if 'yyaccept' variable is used + need_accept = accepts.size () > 1; + + // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used + // If tags are not enabled explicitely and trailing contexts + // don't overlap (single variable is enough for all of them), then + // re2c should use old-style YYCTXMARKER for backwards compatibility. + // Note that with generic API fixed-length contexts are forbidden, + // which may cause additional overlaps. + oldstyle_ctxmarker = !explicit_tags && maxtagver == 1; + + // error if tags are not enabled, but we need them + if (!explicit_tags && maxtagver > 1) { + fatal_l(ln, "overlapping trailing contexts need " + "multiple context markers, use '-t, --tags' " + "option and '/*!stags:re2c ... */' directive"); + } } void DFA::hoist_tags() { - for (State * s = head; s; s = s->next) { - Span *span = s->go.span; - const size_t nspan = s->go.nSpans; - if (nspan == 0) continue; - - tcid_t ts = span[0].tags; - for (uint32_t i = 1; i < nspan; ++i) { - if (span[i].tags != ts) { - ts = TCID0; - break; - } - } - if (ts != TCID0) { - s->go.tags = ts; - for (uint32_t i = 0; i < nspan; ++i) { - span[i].tags = TCID0; - } - } - } + for (State * s = head; s; s = s->next) { + Span *span = s->go.span; + const size_t nspan = s->go.nSpans; + if (nspan == 0) continue; + + tcid_t ts = span[0].tags; + for (uint32_t i = 1; i < nspan; ++i) { + if (span[i].tags != ts) { + ts = TCID0; + break; + } + } + if (ts != TCID0) { + s->go.tags = ts; + for (uint32_t i = 0; i < nspan; ++i) { + span[i].tags = TCID0; + } + } + } } void DFA::hoist_tags_and_skip(const opt_t *opts) { - assert(opts->eager_skip); - - for (State * s = head; s; s = s->next) { - Span *span = s->go.span; - const size_t nspan = s->go.nSpans; - if (nspan == 0) continue; - - bool hoist_tags = true, hoist_skip = true; - - // do all spans agree on tags? - for (uint32_t i = 1; i < nspan; ++i) { - if (span[i].tags != span[0].tags) { - hoist_tags = false; - break; - } - } - - // do all spans agree on skip? - for (uint32_t i = 0; i < nspan; ++i) { - if (consume(span[i].to) != consume(span[0].to)) { - hoist_skip = false; - break; - } - } - - if (opts->lookahead) { - // skip must go after tags - hoist_skip &= hoist_tags; - } else { - // skip must go before tags - hoist_tags &= hoist_skip; - } - - // hoisting tags is possible - if (hoist_tags) { - s->go.tags = span[0].tags; - for (uint32_t i = 0; i < nspan; ++i) { - span[i].tags = TCID0; - } - } - - // hoisting skip is possible - s->go.skip = hoist_skip && consume(span[0].to); - } + assert(opts->eager_skip); + + for (State * s = head; s; s = s->next) { + Span *span = s->go.span; + const size_t nspan = s->go.nSpans; + if (nspan == 0) continue; + + bool hoist_tags = true, hoist_skip = true; + + // do all spans agree on tags? + for (uint32_t i = 1; i < nspan; ++i) { + if (span[i].tags != span[0].tags) { + hoist_tags = false; + break; + } + } + + // do all spans agree on skip? + for (uint32_t i = 0; i < nspan; ++i) { + if (consume(span[i].to) != consume(span[0].to)) { + hoist_skip = false; + break; + } + } + + if (opts->lookahead) { + // skip must go after tags + hoist_skip &= hoist_tags; + } else { + // skip must go before tags + hoist_tags &= hoist_skip; + } + + // hoisting tags is possible + if (hoist_tags) { + s->go.tags = span[0].tags; + for (uint32_t i = 0; i < nspan; ++i) { + span[i].tags = TCID0; + } + } + + // hoisting skip is possible + s->go.skip = hoist_skip && consume(span[0].to); + } } } // namespace re2c diff --git a/re2c/src/ast/ast.cc b/re2c/src/ast/ast.cc index 612641e9..36bd12be 100644 --- a/re2c/src/ast/ast.cc +++ b/re2c/src/ast/ast.cc @@ -11,135 +11,135 @@ free_list AST::flist; const uint32_t AST::MANY = std::numeric_limits::max(); AST::AST(uint32_t l, uint32_t c, type_t t) - : type(t), line(l), column(c) + : type(t), line(l), column(c) { - flist.insert(this); + flist.insert(this); } AST::~AST() { - flist.erase(this); - if (type == TAG) { - delete tag.name; - } else if (type == REF) { - delete ref.name; - } else if (type == STR) { - delete str.chars; - } else if (type == CLS) { - delete cls.ranges; - } + flist.erase(this); + if (type == TAG) { + delete tag.name; + } else if (type == REF) { + delete ref.name; + } else if (type == STR) { + delete str.chars; + } else if (type == CLS) { + delete cls.ranges; + } } const AST *ast_nil(uint32_t l, uint32_t c) { - return new AST(l, c, AST::NIL); + return new AST(l, c, AST::NIL); } const AST *ast_str(uint32_t l, uint32_t c, std::vector *chars, bool icase) { - AST *ast = new AST(l, c, AST::STR); - ast->str.chars = chars; - ast->str.icase = icase; - return ast; + AST *ast = new AST(l, c, AST::STR); + ast->str.chars = chars; + ast->str.icase = icase; + return ast; } const AST *ast_cls(uint32_t l, uint32_t c, std::vector *ranges, bool negated) { - AST *ast = new AST(l, c, AST::CLS); - ast->cls.ranges = ranges; - ast->cls.negated = negated; - return ast; + AST *ast = new AST(l, c, AST::CLS); + ast->cls.ranges = ranges; + ast->cls.negated = negated; + return ast; } const AST *ast_dot(uint32_t l, uint32_t c) { - return new AST(l, c, AST::DOT); + return new AST(l, c, AST::DOT); } const AST *ast_default(uint32_t l, uint32_t c) { - return new AST(l, c, AST::DEFAULT); + return new AST(l, c, AST::DEFAULT); } const AST *ast_alt(const AST *a1, const AST *a2) { - if (!a1) return a2; - if (!a2) return a1; - AST *ast = new AST(a1->line, a1->column, AST::ALT); - ast->alt.ast1 = a1; - ast->alt.ast2 = a2; - return ast; + if (!a1) return a2; + if (!a2) return a1; + AST *ast = new AST(a1->line, a1->column, AST::ALT); + ast->alt.ast1 = a1; + ast->alt.ast2 = a2; + return ast; } const AST *ast_cat(const AST *a1, const AST *a2) { - if (!a1) return a2; - if (!a2) return a1; - AST *ast = new AST(a1->line, a1->column, AST::CAT); - ast->cat.ast1 = a1; - ast->cat.ast2 = a2; - return ast; + if (!a1) return a2; + if (!a2) return a1; + AST *ast = new AST(a1->line, a1->column, AST::CAT); + ast->cat.ast1 = a1; + ast->cat.ast2 = a2; + return ast; } const AST *ast_iter(const AST *a, uint32_t n, uint32_t m) { - AST *ast = new AST(a->line, a->column, AST::ITER); - ast->iter.ast = a; - ast->iter.min = n; - ast->iter.max = m; - return ast; + AST *ast = new AST(a->line, a->column, AST::ITER); + ast->iter.ast = a; + ast->iter.min = n; + ast->iter.max = m; + return ast; } const AST *ast_diff(const AST *a1, const AST *a2) { - AST *ast = new AST(a1->line, a1->column, AST::DIFF); - ast->cat.ast1 = a1; - ast->cat.ast2 = a2; - return ast; + AST *ast = new AST(a1->line, a1->column, AST::DIFF); + ast->cat.ast1 = a1; + ast->cat.ast2 = a2; + return ast; } const AST *ast_tag(uint32_t l, uint32_t c, const std::string *n, bool h) { - AST *ast = new AST(l, c, AST::TAG); - ast->tag.name = n; - ast->tag.history = h; - return ast; + AST *ast = new AST(l, c, AST::TAG); + ast->tag.name = n; + ast->tag.history = h; + return ast; } const AST *ast_cap(const AST *a) { - AST *ast = new AST(a->line, a->column, AST::CAP); - ast->cap = a; - return ast; + AST *ast = new AST(a->line, a->column, AST::CAP); + ast->cap = a; + return ast; } const AST *ast_ref(const AST *a, const std::string &n) { - AST *ast = new AST(a->line, a->column, AST::REF); - ast->ref.ast = a; - ast->ref.name = new std::string(n); - return ast; + AST *ast = new AST(a->line, a->column, AST::REF); + ast->ref.ast = a; + ast->ref.name = new std::string(n); + return ast; } bool ast_need_wrap(const AST *a) { - switch (a->type) { - case AST::ITER: - case AST::NIL: - case AST::STR: - case AST::CLS: - case AST::DOT: - case AST::DEFAULT: - case AST::TAG: - case AST::CAP: - return false; - case AST::ALT: - case AST::CAT: - case AST::DIFF: - case AST::REF: - return true; - } - return false; /* unreachable */ + switch (a->type) { + case AST::ITER: + case AST::NIL: + case AST::STR: + case AST::CLS: + case AST::DOT: + case AST::DEFAULT: + case AST::TAG: + case AST::CAP: + return false; + case AST::ALT: + case AST::CAT: + case AST::DIFF: + case AST::REF: + return true; + } + return false; /* unreachable */ } } // namespace re2c diff --git a/re2c/src/ast/ast.h b/re2c/src/ast/ast.h index e4685f76..a1f5248f 100644 --- a/re2c/src/ast/ast.h +++ b/re2c/src/ast/ast.h @@ -19,99 +19,99 @@ template class free_list; struct ASTChar { - uint32_t chr; - uint32_t column; - ASTChar(uint32_t x, uint32_t c) - : chr(x), column(c) {} + uint32_t chr; + uint32_t column; + ASTChar(uint32_t x, uint32_t c) + : chr(x), column(c) {} }; struct ASTRange { - uint32_t lower; - uint32_t upper; - uint32_t column; - ASTRange(uint32_t l, uint32_t u, uint32_t c) - : lower(l), upper(u), column(c) {} + uint32_t lower; + uint32_t upper; + uint32_t column; + ASTRange(uint32_t l, uint32_t u, uint32_t c) + : lower(l), upper(u), column(c) {} }; /* AST must be immutable and independent of options */ struct AST { - static free_list flist; - static const uint32_t MANY; - - enum type_t - { NIL, STR, CLS, DOT, DEFAULT, ALT - , CAT, ITER, DIFF, TAG, CAP, REF } type; - union { - struct { - const std::vector *chars; - bool icase; - } str; - struct { - const std::vector *ranges; - bool negated; - } cls; - struct { - const AST *ast1; - const AST *ast2; - } alt; - struct { - const AST *ast1; - const AST *ast2; - } cat; - struct { - const AST *ast; - uint32_t min; - uint32_t max; - } iter; - struct { - const AST *ast1; - const AST *ast2; - } diff; - struct { - const std::string *name; - bool history; - } tag; - const AST *cap; - struct { - const AST *ast; - const std::string *name; - } ref; - }; - uint32_t line; - uint32_t column; - - AST(uint32_t l, uint32_t c, type_t t); - ~AST(); + static free_list flist; + static const uint32_t MANY; + + enum type_t + { NIL, STR, CLS, DOT, DEFAULT, ALT + , CAT, ITER, DIFF, TAG, CAP, REF } type; + union { + struct { + const std::vector *chars; + bool icase; + } str; + struct { + const std::vector *ranges; + bool negated; + } cls; + struct { + const AST *ast1; + const AST *ast2; + } alt; + struct { + const AST *ast1; + const AST *ast2; + } cat; + struct { + const AST *ast; + uint32_t min; + uint32_t max; + } iter; + struct { + const AST *ast1; + const AST *ast2; + } diff; + struct { + const std::string *name; + bool history; + } tag; + const AST *cap; + struct { + const AST *ast; + const std::string *name; + } ref; + }; + uint32_t line; + uint32_t column; + + AST(uint32_t l, uint32_t c, type_t t); + ~AST(); }; struct ASTRule { - const AST *ast; - const Code *code; + const AST *ast; + const Code *code; - ASTRule(const AST *r, const Code *c) - : ast(r) - , code(c) - {} + ASTRule(const AST *r, const Code *c) + : ast(r) + , code(c) + {} }; struct ASTBounds { - uint32_t min; - uint32_t max; + uint32_t min; + uint32_t max; }; struct spec_t { - std::string name; - std::vector rules; - std::vector defs; - std::vector setup; + std::string name; + std::vector rules; + std::vector defs; + std::vector setup; - explicit spec_t(const std::string &n): - name(n), rules(), defs(), setup() {} + explicit spec_t(const std::string &n): + name(n), rules(), defs(), setup() {} }; typedef std::vector specs_t; diff --git a/re2c/src/ast/input.cc b/re2c/src/ast/input.cc index e12b4198..b7073ec8 100644 --- a/re2c/src/ast/input.cc +++ b/re2c/src/ast/input.cc @@ -4,32 +4,32 @@ namespace re2c { Input::Input (const char * fn) - : file (NULL) - , file_name (fn) - , escaped_file_name (fn) + : file (NULL) + , file_name (fn) + , escaped_file_name (fn) { - strrreplace (escaped_file_name, "\\", "\\\\"); + strrreplace (escaped_file_name, "\\", "\\\\"); } bool Input::open () { - if (file_name == "") - { - file = stdin; - } - else - { - file = fopen (file_name.c_str (), "rb"); - } - return file != NULL; + if (file_name == "") + { + file = stdin; + } + else + { + file = fopen (file_name.c_str (), "rb"); + } + return file != NULL; } Input::~Input () { - if (file != NULL && file != stdin) - { - fclose (file); - } + if (file != NULL && file != stdin) + { + fclose (file); + } } } // namespace re2c diff --git a/re2c/src/ast/input.h b/re2c/src/ast/input.h index f4fb8f8b..1da3dfc8 100644 --- a/re2c/src/ast/input.h +++ b/re2c/src/ast/input.h @@ -10,15 +10,15 @@ namespace re2c { struct Input { - FILE * file; - const std::string file_name; - std::string escaped_file_name; + FILE * file; + const std::string file_name; + std::string escaped_file_name; - explicit Input (const char * fn); - ~Input (); - bool open (); + explicit Input (const char * fn); + ~Input (); + bool open (); - FORBID_COPY (Input); + FORBID_COPY (Input); }; } // namespace re2c diff --git a/re2c/src/ast/normalize.cc b/re2c/src/ast/normalize.cc index dda268de..959694fc 100644 --- a/re2c/src/ast/normalize.cc +++ b/re2c/src/ast/normalize.cc @@ -8,42 +8,42 @@ namespace re2c { void normalize_ast(specs_t &specs) { - specs_t::iterator i, b = specs.begin(), e = specs.end(); - - // merge <*> rules and setup to all conditions except "0" - // star rules must have lower priority than normal rules - for (i = b; i != e && i->name != "*"; ++i); - if (i != e) { - const specs_t::iterator star = i; - - for (i = b; i != e; ++i) { - if (i == star || i->name == "0") continue; - - i->rules.insert(i->rules.end(), star->rules.begin(), star->rules.end()); - i->defs.insert(i->defs.end(), star->defs.begin(), star->defs.end()); - i->setup.insert(i->setup.end(), star->setup.begin(), star->setup.end()); - } - - specs.erase(star); - e = specs.end(); - } - - // merge default rule with the lowest priority - for (i = b; i != e; ++i) { - if (!i->defs.empty()) { - const Code *c = i->defs[0]; - const AST *r = ast_default(c->fline, 0); - i->rules.push_back(ASTRule(r, c)); - } - } - - // "0" condition must be the first one - for (i = b; i != e && i->name != "0"; ++i); - if (i != e && i != b) { - const spec_t zero = *i; - specs.erase(i); - specs.insert(specs.begin(), zero); - } + specs_t::iterator i, b = specs.begin(), e = specs.end(); + + // merge <*> rules and setup to all conditions except "0" + // star rules must have lower priority than normal rules + for (i = b; i != e && i->name != "*"; ++i); + if (i != e) { + const specs_t::iterator star = i; + + for (i = b; i != e; ++i) { + if (i == star || i->name == "0") continue; + + i->rules.insert(i->rules.end(), star->rules.begin(), star->rules.end()); + i->defs.insert(i->defs.end(), star->defs.begin(), star->defs.end()); + i->setup.insert(i->setup.end(), star->setup.begin(), star->setup.end()); + } + + specs.erase(star); + e = specs.end(); + } + + // merge default rule with the lowest priority + for (i = b; i != e; ++i) { + if (!i->defs.empty()) { + const Code *c = i->defs[0]; + const AST *r = ast_default(c->fline, 0); + i->rules.push_back(ASTRule(r, c)); + } + } + + // "0" condition must be the first one + for (i = b; i != e && i->name != "0"; ++i); + if (i != e && i != b) { + const spec_t zero = *i; + specs.erase(i); + specs.insert(specs.begin(), zero); + } } } // namespace re2c diff --git a/re2c/src/ast/parser.h b/re2c/src/ast/parser.h index 88469ae1..8d1e5d34 100644 --- a/re2c/src/ast/parser.h +++ b/re2c/src/ast/parser.h @@ -19,10 +19,10 @@ typedef std::set CondList; struct context_t { - Scanner &input; - specs_t &specs; - symtab_t &symtab; - Opt &opts; + Scanner &input; + specs_t &specs; + symtab_t &symtab; + Opt &opts; }; void parse(Scanner &input, specs_t &specs, symtab_t &symtab, Opt &opts); diff --git a/re2c/src/ast/scanner.cc b/re2c/src/ast/scanner.cc index 9dd4dcfe..f643996a 100644 --- a/re2c/src/ast/scanner.cc +++ b/re2c/src/ast/scanner.cc @@ -16,83 +16,83 @@ class Warn; const uint32_t Scanner::BSIZE = 8192; ScannerState::ScannerState () - : tok (NULL) - , ptr (NULL) - , cur (NULL) - , mar (NULL) - , pos (NULL) - , ctx (NULL) - , bot (NULL) - , lim (NULL) - , top (NULL) - , eof (NULL) - , tchar (0) - , cline (1) - , lexer_state (LEX_NORMAL) + : tok (NULL) + , ptr (NULL) + , cur (NULL) + , mar (NULL) + , pos (NULL) + , ctx (NULL) + , bot (NULL) + , lim (NULL) + , top (NULL) + , eof (NULL) + , tchar (0) + , cline (1) + , lexer_state (LEX_NORMAL) {} Scanner::Scanner(Input &i, Warn &w) - : ScannerState(), in(i), warn(w) {} + : ScannerState(), in(i), warn(w) {} void Scanner::fill (uint32_t need) { - if(!eof) - { - /* Get rid of everything that was already parsed. */ - const ptrdiff_t diff = tok - bot; - if (diff > 0) - { - const size_t move = static_cast (top - tok); - memmove (bot, tok, move); - tok -= diff; - mar -= diff; - ptr -= diff; - cur -= diff; - pos -= diff; - lim -= diff; - ctx -= diff; - } - /* Increase buffer size. */ - if (BSIZE > need) - { - need = BSIZE; - } - if (static_cast (top - lim) < need) - { - const size_t copy = static_cast (lim - bot); - char * buf = new char[copy + need]; - if (!buf) - { - fatal("Out of memory"); - } - if (copy > 0) { - memcpy (buf, bot, copy); - } - tok = &buf[tok - bot]; - mar = &buf[mar - bot]; - ptr = &buf[ptr - bot]; - cur = &buf[cur - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[need]; - ctx = &buf[ctx - bot]; - delete [] bot; - bot = buf; - } - /* Append to buffer. */ - const size_t have = fread (lim, 1, need, in.file); - if (have != need) - { - eof = &lim[have]; - *eof++ = '\0'; - } - lim += have; - } + if(!eof) + { + /* Get rid of everything that was already parsed. */ + const ptrdiff_t diff = tok - bot; + if (diff > 0) + { + const size_t move = static_cast (top - tok); + memmove (bot, tok, move); + tok -= diff; + mar -= diff; + ptr -= diff; + cur -= diff; + pos -= diff; + lim -= diff; + ctx -= diff; + } + /* Increase buffer size. */ + if (BSIZE > need) + { + need = BSIZE; + } + if (static_cast (top - lim) < need) + { + const size_t copy = static_cast (lim - bot); + char * buf = new char[copy + need]; + if (!buf) + { + fatal("Out of memory"); + } + if (copy > 0) { + memcpy (buf, bot, copy); + } + tok = &buf[tok - bot]; + mar = &buf[mar - bot]; + ptr = &buf[ptr - bot]; + cur = &buf[cur - bot]; + pos = &buf[pos - bot]; + lim = &buf[lim - bot]; + top = &lim[need]; + ctx = &buf[ctx - bot]; + delete [] bot; + bot = buf; + } + /* Append to buffer. */ + const size_t have = fread (lim, 1, need, in.file); + if (have != need) + { + eof = &lim[have]; + *eof++ = '\0'; + } + lim += have; + } } Scanner::~Scanner() { - delete [] bot; + delete [] bot; } } // namespace re2c diff --git a/re2c/src/ast/scanner.h b/re2c/src/ast/scanner.h index 45ed38ca..6db16ab1 100644 --- a/re2c/src/ast/scanner.h +++ b/re2c/src/ast/scanner.h @@ -25,94 +25,94 @@ struct AST; struct ScannerState { - enum lexer_state_t - { - LEX_NORMAL, - LEX_FLEX_NAME - }; - - // positioning - char * tok; - char * ptr; - char * cur; - char * mar; - char * pos; - char * ctx; - - // buffer - char * bot; - char * lim; - char * top; - char * eof; - - ptrdiff_t tchar; - uint32_t cline; - - lexer_state_t lexer_state; - - ScannerState(); - FORBID_COPY(ScannerState); + enum lexer_state_t + { + LEX_NORMAL, + LEX_FLEX_NAME + }; + + // positioning + char * tok; + char * ptr; + char * cur; + char * mar; + char * pos; + char * ctx; + + // buffer + char * bot; + char * lim; + char * top; + char * eof; + + ptrdiff_t tchar; + uint32_t cline; + + lexer_state_t lexer_state; + + ScannerState(); + FORBID_COPY(ScannerState); }; class Scanner: private ScannerState { - static const uint32_t BSIZE; - Input & in; - Warn &warn; - - void fill(uint32_t need); - void lex_end_of_comment(OutputFile &out); - void lex_tags(OutputFile &out, bool mtags); - void set_sourceline (); - uint32_t lex_cls_chr(); - uint32_t lex_str_chr(char quote, bool &end); - const AST *lex_cls(bool neg); - const AST *lex_str(char quote); - void lex_conf_encoding_policy(Opt &opts); - void lex_conf_input(Opt &opts); - void lex_conf_empty_class(Opt &opts); - void lex_conf_dfa_minimization(Opt &opts); - void lex_conf_enc(Enc::type_t enc, Opt &opts); - void lex_conf_assign(); - void lex_conf_semicolon(); - int32_t lex_conf_number(); - bool lex_conf_bool(); - std::string lex_conf_string(); - size_t tok_len () const; + static const uint32_t BSIZE; + Input & in; + Warn &warn; + + void fill(uint32_t need); + void lex_end_of_comment(OutputFile &out); + void lex_tags(OutputFile &out, bool mtags); + void set_sourceline (); + uint32_t lex_cls_chr(); + uint32_t lex_str_chr(char quote, bool &end); + const AST *lex_cls(bool neg); + const AST *lex_str(char quote); + void lex_conf_encoding_policy(Opt &opts); + void lex_conf_input(Opt &opts); + void lex_conf_empty_class(Opt &opts); + void lex_conf_dfa_minimization(Opt &opts); + void lex_conf_enc(Enc::type_t enc, Opt &opts); + void lex_conf_assign(); + void lex_conf_semicolon(); + int32_t lex_conf_number(); + bool lex_conf_bool(); + std::string lex_conf_string(); + size_t tok_len () const; public: - enum ParseMode {Stop, Parse, Reuse, Rules}; - - Scanner(Input&, Warn &w); - ~Scanner(); - ParseMode echo(OutputFile &out); - int scan(const conopt_t *globopts); - void lex_conf(Opt &opts); - uint32_t get_cline() const; - uint32_t get_column() const; - const std::string & get_fname () const; - FORBID_COPY (Scanner); + enum ParseMode {Stop, Parse, Reuse, Rules}; + + Scanner(Input&, Warn &w); + ~Scanner(); + ParseMode echo(OutputFile &out); + int scan(const conopt_t *globopts); + void lex_conf(Opt &opts); + uint32_t get_cline() const; + uint32_t get_column() const; + const std::string & get_fname () const; + FORBID_COPY (Scanner); }; inline size_t Scanner::tok_len () const { - // lexing and fill procedures must maintain: token pointer <= cursor pointer - return static_cast (cur - tok); + // lexing and fill procedures must maintain: token pointer <= cursor pointer + return static_cast (cur - tok); } inline const std::string & Scanner::get_fname () const { - return in.escaped_file_name; + return in.escaped_file_name; } inline uint32_t Scanner::get_cline() const { - return cline; + return cline; } inline uint32_t Scanner::get_column() const { - return static_cast(tok - pos); + return static_cast(tok - pos); } } // end namespace re2c diff --git a/re2c/src/ast/unescape.cc b/re2c/src/ast/unescape.cc index e685cdc6..04bd2efa 100644 --- a/re2c/src/ast/unescape.cc +++ b/re2c/src/ast/unescape.cc @@ -5,56 +5,56 @@ namespace re2c { // expected characters: [0-9a-zA-Z] static inline uint32_t hex_digit (const char c) { - switch (c) - { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - case 'a': - case 'A': return 0xA; - case 'b': - case 'B': return 0xB; - case 'c': - case 'C': return 0xC; - case 'd': - case 'D': return 0xD; - case 'e': - case 'E': return 0xE; - case 'f': - case 'F': return 0xF; - default: return ~0u; // unexpected - } + switch (c) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 0xA; + case 'b': + case 'B': return 0xB; + case 'c': + case 'C': return 0xC; + case 'd': + case 'D': return 0xD; + case 'e': + case 'E': return 0xE; + case 'f': + case 'F': return 0xF; + default: return ~0u; // unexpected + } } // expected string format: "\" [xXuU] [0-9a-zA-Z]* uint32_t unesc_hex (const char * s, const char * s_end) { - uint32_t n = 0; - for (s += 2; s != s_end; ++s) - { - n <<= 4; - n += hex_digit (*s); - } - return n; + uint32_t n = 0; + for (s += 2; s != s_end; ++s) + { + n <<= 4; + n += hex_digit (*s); + } + return n; } // expected string format: "\" [0-7]* uint32_t unesc_oct (const char * s, const char * s_end) { - uint32_t n = 0; - for (++s; s != s_end; ++s) - { - n <<= 3; - n += static_cast (*s - '0'); - } - return n; + uint32_t n = 0; + for (++s; s != s_end; ++s) + { + n <<= 3; + n += static_cast (*s - '0'); + } + return n; } } // namespace re2c diff --git a/re2c/src/ast/validate.cc b/re2c/src/ast/validate.cc index 9f95f767..66ec19b0 100644 --- a/re2c/src/ast/validate.cc +++ b/re2c/src/ast/validate.cc @@ -11,89 +11,89 @@ namespace re2c { void validate_mode(Scanner::ParseMode mode, bool rflag, bool rules, Scanner &input) { - const uint32_t l = input.get_cline(); - if (mode == Scanner::Rules) { - if (!rflag) { - fatal_l(l, "found 'rules:re2c' block without -r flag"); - } else if (rules) { - fatal_l(l, "cannot have a second 'rules:re2c' block"); - } - } else if (mode == Scanner::Reuse) { - if (!rflag) { - fatal_l(l, "found 'use:re2c' block without -r flag"); - } else if (!rules) { - fatal_l(l, "got 'use:re2c' without 'rules:re2c'"); - } - } else if (rflag) { - fatal_l(l, "found standard 're2c' block while using -r flag"); - } + const uint32_t l = input.get_cline(); + if (mode == Scanner::Rules) { + if (!rflag) { + fatal_l(l, "found 'rules:re2c' block without -r flag"); + } else if (rules) { + fatal_l(l, "cannot have a second 'rules:re2c' block"); + } + } else if (mode == Scanner::Reuse) { + if (!rflag) { + fatal_l(l, "found 'use:re2c' block without -r flag"); + } else if (!rules) { + fatal_l(l, "got 'use:re2c' without 'rules:re2c'"); + } + } else if (rflag) { + fatal_l(l, "found standard 're2c' block while using -r flag"); + } } void validate_ast(const specs_t &specs, bool cflag) { - specs_t::const_iterator i, - b = specs.begin(), - e = specs.end(); + specs_t::const_iterator i, + b = specs.begin(), + e = specs.end(); - for (i = b; i != e; ++i) { - if (i->defs.size() > 1) { - fatal_l(i->defs[1]->fline, - "code to default rule %sis already defined at line %u", - incond(i->name).c_str(), i->defs[0]->fline); - } - } + for (i = b; i != e; ++i) { + if (i->defs.size() > 1) { + fatal_l(i->defs[1]->fline, + "code to default rule %sis already defined at line %u", + incond(i->name).c_str(), i->defs[0]->fline); + } + } - if (!cflag) { - for (i = b; i != e; ++i) { - if (i->name != "") { - fatal_l(i->rules[0].code->fline, - "conditions are only allowed with '-c', '--conditions' option"); - } - } - } else { - for (i = b; i != e; ++i) { - if (i->name == "") { - fatal_l(i->rules[0].code->fline, - "non-conditional rules are not allowed" - " with '-c', '--conditions' option"); - } - } + if (!cflag) { + for (i = b; i != e; ++i) { + if (i->name != "") { + fatal_l(i->rules[0].code->fline, + "conditions are only allowed with '-c', '--conditions' option"); + } + } + } else { + for (i = b; i != e; ++i) { + if (i->name == "") { + fatal_l(i->rules[0].code->fline, + "non-conditional rules are not allowed" + " with '-c', '--conditions' option"); + } + } - for (i = b; i != e; ++i) { - if (i->setup.size() > 1) { - fatal_l(i->setup[1]->fline, - "code to setup rule '%s' is already defined at line %u", - i->name.c_str(), i->setup[0]->fline); - } - } + for (i = b; i != e; ++i) { + if (i->setup.size() > 1) { + fatal_l(i->setup[1]->fline, + "code to setup rule '%s' is already defined at line %u", + i->name.c_str(), i->setup[0]->fline); + } + } - for (i = b; i != e; ++i) { - if (i->name != "*" && !i->setup.empty() && i->rules.empty()) { - fatal_l(i->setup[0]->fline, - "setup for non existing condition '%s' found", - i->name.c_str()); - } - } + for (i = b; i != e; ++i) { + if (i->name != "*" && !i->setup.empty() && i->rules.empty()) { + fatal_l(i->setup[0]->fline, + "setup for non existing condition '%s' found", + i->name.c_str()); + } + } - for (i = b; i != e && !i->setup.empty(); ++i); - if (i == e) { - for (i = b; i != e; ++i) { - if (i->name == "*") { - fatal_l(i->setup[0]->fline, - "setup for all conditions '' is illegal " - "if setup for each condition is defined explicitly"); - } - } - } + for (i = b; i != e && !i->setup.empty(); ++i); + if (i == e) { + for (i = b; i != e; ++i) { + if (i->name == "*") { + fatal_l(i->setup[0]->fline, + "setup for all conditions '' is illegal " + "if setup for each condition is defined explicitly"); + } + } + } - for (i = b; i != e; ++i) { - if (i->name == "0" && i->rules.size() > 1) { - fatal_l(i->rules[1].code->fline, - "startup code is already defined at line %u", - i->rules[0].code->fline); - } - } - } + for (i = b; i != e; ++i) { + if (i->name == "0" && i->rules.size() > 1) { + fatal_l(i->rules[1].code->fline, + "startup code is already defined at line %u", + i->rules[0].code->fline); + } + } + } } } // namespace re2c diff --git a/re2c/src/code/bitmap.cc b/re2c/src/code/bitmap.cc index 36a9617d..7c7bb534 100644 --- a/re2c/src/code/bitmap.cc +++ b/re2c/src/code/bitmap.cc @@ -14,137 +14,137 @@ static bool matches(const Go *go1, const State *s1, const Go *go2, const State * static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m); bitmaps_t::bitmaps_t(uint32_t n) - : maps() - , ncunit(n) - , buffer(new uint32_t[ncunit]) - , used(false) + : maps() + , ncunit(n) + , buffer(new uint32_t[ncunit]) + , used(false) {} bitmaps_t::~bitmaps_t() { - delete[] buffer; + delete[] buffer; } void bitmaps_t::insert(const Go *go, const State *s) { - rciter_t i = maps.rbegin(), e = maps.rend(); - for (; i != e; ++i) { - if (matches(i->go, i->on, go, s)) return; - } + rciter_t i = maps.rbegin(), e = maps.rend(); + for (; i != e; ++i) { + if (matches(i->go, i->on, go, s)) return; + } - bitmap_t b = {go, s, 0, 0}; - maps.push_back(b); + bitmap_t b = {go, s, 0, 0}; + maps.push_back(b); } const bitmap_t *bitmaps_t::find(const Go *go, const State *s) const { - rciter_t i = maps.rbegin(), e = maps.rend(); - for (; i != e; ++i) { - if (i->on == s && matches(i->go, i->on, go, s)) return &(*i); - } - return NULL; + rciter_t i = maps.rbegin(), e = maps.rend(); + for (; i != e; ++i) { + if (i->on == s && matches(i->go, i->on, go, s)) return &(*i); + } + return NULL; } bool bitmaps_t::empty() const { return maps.empty(); } void bitmaps_t::gen(OutputFile &o, uint32_t ind) { - if (empty() || !used) return; - - const opt_t *opts = o.block().opts; - const uint32_t nmap = static_cast(maps.size()); - riter_t b = maps.rbegin(), e = maps.rend(); - - o.wind(ind).ws("static const unsigned char ") - .wstring(opts->yybm).ws("[] = {"); - - for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += 8) { - memset(buffer, 0, ncunit * sizeof(uint32_t)); - - for (uint32_t m = 0x80; b != e && m; m >>= 1, ++b) { - b->i = i; - b->m = m; - doGen(b->go, b->on, buffer, 0, m); - } - - if (nmap > 8) { - o.ws("\n").wind(ind + 1).ws("/* table ").wu32(t).ws(" .. ") - .wu32(std::min(nmap, t + 7)).ws(": ").wu32(i).ws(" */"); - } - - for (uint32_t c = 0; c < ncunit; ++c) { - if (c % 8 == 0) { - o.ws("\n").wind(ind + 1); - } - if (opts->yybmHexTable) { - o.wu32_hex(buffer[c]); - } else { - o.wu32_width(buffer[c], 3); - } - o.ws(", "); - } - } - - o.ws("\n").wind(ind).ws("};\n"); + if (empty() || !used) return; + + const opt_t *opts = o.block().opts; + const uint32_t nmap = static_cast(maps.size()); + riter_t b = maps.rbegin(), e = maps.rend(); + + o.wind(ind).ws("static const unsigned char ") + .wstring(opts->yybm).ws("[] = {"); + + for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += 8) { + memset(buffer, 0, ncunit * sizeof(uint32_t)); + + for (uint32_t m = 0x80; b != e && m; m >>= 1, ++b) { + b->i = i; + b->m = m; + doGen(b->go, b->on, buffer, 0, m); + } + + if (nmap > 8) { + o.ws("\n").wind(ind + 1).ws("/* table ").wu32(t).ws(" .. ") + .wu32(std::min(nmap, t + 7)).ws(": ").wu32(i).ws(" */"); + } + + for (uint32_t c = 0; c < ncunit; ++c) { + if (c % 8 == 0) { + o.ws("\n").wind(ind + 1); + } + if (opts->yybmHexTable) { + o.wu32_hex(buffer[c]); + } else { + o.wu32_width(buffer[c], 3); + } + o.ws(", "); + } + } + + o.ws("\n").wind(ind).ws("};\n"); } void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m) { - Span *b = g->span, *e = &b[g->nSpans]; - uint32_t lb = 0; - - for (; b < e; ++b) - { - if (b->to == s) - { - for (; lb < b->ub && lb < 256; ++lb) - { - bm[lb-f] |= m; - } - } - - lb = b->ub; - } + Span *b = g->span, *e = &b[g->nSpans]; + uint32_t lb = 0; + + for (; b < e; ++b) + { + if (b->to == s) + { + for (; lb < b->ub && lb < 256; ++lb) + { + bm[lb-f] |= m; + } + } + + lb = b->ub; + } } // All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2 bool matches(const Go *go1, const State *s1, const Go *go2, const State *s2) { - const Span - *b1 = go1->span, *e1 = &b1[go1->nSpans], - *b2 = go2->span, *e2 = &b2[go2->nSpans]; - uint32_t lb1 = 0, lb2 = 0; - - for (;;) - { - for (; b1 < e1 && b1->to != s1; ++b1) - { - lb1 = b1->ub; - } - for (; b2 < e2 && b2->to != s2; ++b2) - { - lb2 = b2->ub; - } - if (b1 == e1) - { - return b2 == e2; - } - if (b2 == e2) - { - return false; - } - // tags are forbidden: transitions on different symbols - // might go to the same state, but have different tag sets - if (lb1 != lb2 - || b1->ub != b2->ub - || b1->tags != TCID0 - || b2->tags != TCID0) - { - return false; - } - ++b1; - ++b2; - } + const Span + *b1 = go1->span, *e1 = &b1[go1->nSpans], + *b2 = go2->span, *e2 = &b2[go2->nSpans]; + uint32_t lb1 = 0, lb2 = 0; + + for (;;) + { + for (; b1 < e1 && b1->to != s1; ++b1) + { + lb1 = b1->ub; + } + for (; b2 < e2 && b2->to != s2; ++b2) + { + lb2 = b2->ub; + } + if (b1 == e1) + { + return b2 == e2; + } + if (b2 == e2) + { + return false; + } + // tags are forbidden: transitions on different symbols + // might go to the same state, but have different tag sets + if (lb1 != lb2 + || b1->ub != b2->ub + || b1->tags != TCID0 + || b2->tags != TCID0) + { + return false; + } + ++b1; + ++b2; + } } } // end namespace re2c diff --git a/re2c/src/code/bitmap.h b/re2c/src/code/bitmap.h index 4ece4726..0a996488 100644 --- a/re2c/src/code/bitmap.h +++ b/re2c/src/code/bitmap.h @@ -15,32 +15,32 @@ struct State; struct bitmap_t { - const Go *go; - const State *on; - uint32_t i; - uint32_t m; + const Go *go; + const State *on; + uint32_t i; + uint32_t m; }; class bitmaps_t { - typedef std::vector maps_t; - typedef maps_t::reverse_iterator riter_t; - typedef maps_t::const_reverse_iterator rciter_t; + typedef std::vector maps_t; + typedef maps_t::reverse_iterator riter_t; + typedef maps_t::const_reverse_iterator rciter_t; - maps_t maps; - uint32_t ncunit; - uint32_t *buffer; + maps_t maps; + uint32_t ncunit; + uint32_t *buffer; public: - bool used; - - explicit bitmaps_t(uint32_t n); - ~bitmaps_t(); - void insert(const Go *go, const State *s); - const bitmap_t *find(const Go *go, const State *s) const; - bool empty() const; - void gen(OutputFile &o, uint32_t ind); - FORBID_COPY(bitmaps_t); + bool used; + + explicit bitmaps_t(uint32_t n); + ~bitmaps_t(); + void insert(const Go *go, const State *s); + const bitmap_t *find(const Go *go, const State *s) const; + bool empty() const; + void gen(OutputFile &o, uint32_t ind); + FORBID_COPY(bitmaps_t); }; } // namespace re2c diff --git a/re2c/src/code/emit.h b/re2c/src/code/emit.h index ddef595b..2f7276fa 100644 --- a/re2c/src/code/emit.h +++ b/re2c/src/code/emit.h @@ -19,9 +19,9 @@ std::string tagname(const Tag &tag); inline std::string indent(uint32_t n, const std::string &s) { - std::string ind; - for (; n --> 0; ind += s); - return ind; + std::string ind; + for (; n --> 0; ind += s); + return ind; } } // namespace re2c diff --git a/re2c/src/code/emit_action.cc b/re2c/src/code/emit_action.cc index 732e0cef..af0797b6 100644 --- a/re2c/src/code/emit_action.cc +++ b/re2c/src/code/emit_action.cc @@ -34,440 +34,440 @@ static void gen_goto (code_lines_t &code, const State *to, const DFA & static bool endstate (const State *s); void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, - const State *s, const std::set &used_labels) + const State *s, const std::set &used_labels) { - const opt_t *opts = o.block().opts; - switch (s->action.type) { - case Action::MATCH: - o.wdelay_skip(ind, !opts->eager_skip); - need(o, ind, s->fill); - o.wdelay_peek(ind, !endstate(s)); - break; - case Action::INITIAL: { - const Initial &init = *s->action.info.initial; - const bool - backup = init.save != Initial::NOSAVE, - ul1 = used_labels.count(s->label); - if (ul1 && dfa.accepts.size() > 1 && backup) { - o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(init.save).ws(";\n"); - } - o.wdelay_skip(ind, ul1 && !opts->eager_skip); - if (used_labels.count(init.label)) { - o.wstring(opts->labelPrefix).wlabel(init.label).wstring(":\n"); - } - if (opts->dFlag) { - o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(init.label) - .ws(", *").wstring(opts->yycursor).ws(");\n"); - } - need(o, ind, s->fill); - o.wdelay_backup(ind, backup); - o.wdelay_peek(ind, !endstate(s)); - break; - } - case Action::SAVE: - if (dfa.accepts.size() > 1) { - o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(s->action.info.save).ws(";\n"); - } - o.wdelay_skip(ind, !opts->eager_skip); - o.wdelay_backup(ind, true); - need(o, ind, s->fill); - o.wdelay_peek(ind, true); - break; - case Action::MOVE: - break; - case Action::ACCEPT: - emit_accept(o, ind, dfa, *s->action.info.accepts); - break; - case Action::RULE: - emit_rule(o, ind, dfa, s->action.info.rule); - break; - } + const opt_t *opts = o.block().opts; + switch (s->action.type) { + case Action::MATCH: + o.wdelay_skip(ind, !opts->eager_skip); + need(o, ind, s->fill); + o.wdelay_peek(ind, !endstate(s)); + break; + case Action::INITIAL: { + const Initial &init = *s->action.info.initial; + const bool + backup = init.save != Initial::NOSAVE, + ul1 = used_labels.count(s->label); + if (ul1 && dfa.accepts.size() > 1 && backup) { + o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(init.save).ws(";\n"); + } + o.wdelay_skip(ind, ul1 && !opts->eager_skip); + if (used_labels.count(init.label)) { + o.wstring(opts->labelPrefix).wlabel(init.label).wstring(":\n"); + } + if (opts->dFlag) { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(init.label) + .ws(", *").wstring(opts->yycursor).ws(");\n"); + } + need(o, ind, s->fill); + o.wdelay_backup(ind, backup); + o.wdelay_peek(ind, !endstate(s)); + break; + } + case Action::SAVE: + if (dfa.accepts.size() > 1) { + o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu64(s->action.info.save).ws(";\n"); + } + o.wdelay_skip(ind, !opts->eager_skip); + o.wdelay_backup(ind, true); + need(o, ind, s->fill); + o.wdelay_peek(ind, true); + break; + case Action::MOVE: + break; + case Action::ACCEPT: + emit_accept(o, ind, dfa, *s->action.info.accepts); + break; + case Action::RULE: + emit_rule(o, ind, dfa, s->action.info.rule); + break; + } } void emit_accept_binary(OutputFile &o, uint32_t ind, const DFA &dfa, - const accept_t &acc, size_t l, size_t r) + const accept_t &acc, size_t l, size_t r) { - const opt_t *opts = o.block().opts; - if (l < r) { - const size_t m = (l + r) >> 1; - o.wind(ind).ws("if (").wstring(opts->yyaccept) - .ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n"); - emit_accept_binary (o, ++ind, dfa, acc, l, m); - o.wind(--ind).ws("} else {\n"); - emit_accept_binary (o, ++ind, dfa, acc, m + 1, r); - o.wind(--ind).ws("}\n"); - } else { - gen_goto_plain(o, ind, acc[l].first, dfa, acc[l].second, false); - } + const opt_t *opts = o.block().opts; + if (l < r) { + const size_t m = (l + r) >> 1; + o.wind(ind).ws("if (").wstring(opts->yyaccept) + .ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n"); + emit_accept_binary (o, ++ind, dfa, acc, l, m); + o.wind(--ind).ws("} else {\n"); + emit_accept_binary (o, ++ind, dfa, acc, m + 1, r); + o.wind(--ind).ws("}\n"); + } else { + gen_goto_plain(o, ind, acc[l].first, dfa, acc[l].second, false); + } } void emit_accept(OutputFile &o, uint32_t ind, const DFA &dfa, const accept_t &acc) { - const opt_t *opts = o.block().opts; - const size_t nacc = acc.size(); - - if (nacc == 0) return; - - o.wstring(output_restore(ind, opts)); - - // only one possible 'yyaccept' value: unconditional jump - if (nacc == 1) { - gen_goto_plain(o, ind, acc[0].first, dfa, acc[0].second, false); - return; - } - - bool have_tags = false; - for (size_t i = 0; i < nacc; ++i) { - if (acc[i].second != TCID0) { - have_tags = true; - break; - } - } - - // jump table - if (opts->gFlag && nacc >= opts->cGotoThreshold && !have_tags) { - o.wind(ind).ws("{\n") - .wind(ind + 1).ws("static void *") - .wstring(opts->yytarget).ws("[") - .wu64(nacc).ws("] = {\n"); - for (uint32_t i = 0; i < nacc; ++i) { - o.wind(ind + 2).ws("&&").wstring(opts->labelPrefix) - .wlabel(acc[i].first->label).ws(",\n"); - } - o.wind(ind + 1).ws("};\n") - .wind(ind + 1).ws("goto *") - .wstring(opts->yytarget).ws("[") - .wstring(opts->yyaccept).ws("];\n") - .wind(ind).ws("}\n"); - return; - } - - // nested ifs - if (opts->sFlag || nacc == 2) { - emit_accept_binary(o, ind, dfa, acc, 0, nacc - 1); - return; - } - - // switch - o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n"); - for (uint32_t i = 0; i < nacc - 1; ++i) { - o.wind(ind).ws("case ").wu32(i).ws(": "); - gen_goto_case(o, ind, acc[i].first, dfa, acc[i].second, false); - } - o.wind(ind).ws("default:"); - gen_goto_case(o, ind, acc[nacc - 1].first, dfa, acc[nacc - 1].second, false); - o.wind(ind).ws("}\n"); + const opt_t *opts = o.block().opts; + const size_t nacc = acc.size(); + + if (nacc == 0) return; + + o.wstring(output_restore(ind, opts)); + + // only one possible 'yyaccept' value: unconditional jump + if (nacc == 1) { + gen_goto_plain(o, ind, acc[0].first, dfa, acc[0].second, false); + return; + } + + bool have_tags = false; + for (size_t i = 0; i < nacc; ++i) { + if (acc[i].second != TCID0) { + have_tags = true; + break; + } + } + + // jump table + if (opts->gFlag && nacc >= opts->cGotoThreshold && !have_tags) { + o.wind(ind).ws("{\n") + .wind(ind + 1).ws("static void *") + .wstring(opts->yytarget).ws("[") + .wu64(nacc).ws("] = {\n"); + for (uint32_t i = 0; i < nacc; ++i) { + o.wind(ind + 2).ws("&&").wstring(opts->labelPrefix) + .wlabel(acc[i].first->label).ws(",\n"); + } + o.wind(ind + 1).ws("};\n") + .wind(ind + 1).ws("goto *") + .wstring(opts->yytarget).ws("[") + .wstring(opts->yyaccept).ws("];\n") + .wind(ind).ws("}\n"); + return; + } + + // nested ifs + if (opts->sFlag || nacc == 2) { + emit_accept_binary(o, ind, dfa, acc, 0, nacc - 1); + return; + } + + // switch + o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n"); + for (uint32_t i = 0; i < nacc - 1; ++i) { + o.wind(ind).ws("case ").wu32(i).ws(": "); + gen_goto_case(o, ind, acc[i].first, dfa, acc[i].second, false); + } + o.wind(ind).ws("default:"); + gen_goto_case(o, ind, acc[nacc - 1].first, dfa, acc[nacc - 1].second, false); + o.wind(ind).ws("}\n"); } void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) { - const opt_t *opts = o.block().opts; - const Rule &rule = dfa.rules[rule_idx]; - const Code *code = rule.code; - const std::string &cond = code->cond; - std::string s; - - gen_fintags(o, ind, dfa, rule); - - if (opts->target == TARGET_SKELETON) { - emit_action(o, ind, dfa, rule_idx); - } else { - if (!cond.empty() && dfa.cond != cond) { - strrreplace(s = opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + cond); - o.wind(ind).wstring(s); - if (!opts->cond_set_naked) { - o.ws("(").wstring(opts->condEnumPrefix).wstring(cond).ws(");"); - } - o.ws("\n"); - } - if (!code->autogen) { - if (!dfa.setup.empty()) { - o.wind(ind).wstring(dfa.setup).ws("\n"); - } - o.wdelay_line_info_input(code->fline, code->fname) - .wind(ind).wstring(code->text).ws("\n") - .wdelay_line_info_output(); - } else if (!cond.empty()) { - strrreplace(s = opts->condGoto, opts->condGotoParam, opts->condPrefix + cond); - o.wind(ind).wstring(s).ws("\n"); - } - } + const opt_t *opts = o.block().opts; + const Rule &rule = dfa.rules[rule_idx]; + const Code *code = rule.code; + const std::string &cond = code->cond; + std::string s; + + gen_fintags(o, ind, dfa, rule); + + if (opts->target == TARGET_SKELETON) { + emit_action(o, ind, dfa, rule_idx); + } else { + if (!cond.empty() && dfa.cond != cond) { + strrreplace(s = opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + cond); + o.wind(ind).wstring(s); + if (!opts->cond_set_naked) { + o.ws("(").wstring(opts->condEnumPrefix).wstring(cond).ws(");"); + } + o.ws("\n"); + } + if (!code->autogen) { + if (!dfa.setup.empty()) { + o.wind(ind).wstring(dfa.setup).ws("\n"); + } + o.wdelay_line_info_input(code->fline, code->fname) + .wind(ind).wstring(code->text).ws("\n") + .wdelay_line_info_output(); + } else if (!cond.empty()) { + strrreplace(s = opts->condGoto, opts->condGotoParam, opts->condPrefix + cond); + o.wind(ind).wstring(s).ws("\n"); + } + } } void need(OutputFile &o, uint32_t ind, size_t some) { - if (some == 0) return; - - const opt_t *opts = o.block().opts; - std::string s; - - if (opts->fFlag) { - strrreplace(s = opts->state_set, opts->state_set_arg, o.fill_index); - o.wind(ind).wstring(s); - if (!opts->state_set_naked) { - o.ws("(").wu32(o.fill_index).ws(");"); - } - o.ws("\n"); - } - - if (opts->fill_use) { - o.wind(ind); - if (opts->fill_check) { - o.ws("if (").wstring(output_expr_lessthan(some, opts)).ws(") "); - } - strrreplace(s = opts->fill, opts->fill_arg, some); - o.wstring(s); - if (!opts->fill_naked) { - if (opts->fill_arg_use) { - o.ws("(").wu64(some).ws(")"); - } - o.ws(";"); - } - o.ws("\n"); - } - - if (opts->fFlag) { - o.wstring(opts->yyfilllabel).wu32(o.fill_index).ws(":\n"); - ++o.fill_index; - } + if (some == 0) return; + + const opt_t *opts = o.block().opts; + std::string s; + + if (opts->fFlag) { + strrreplace(s = opts->state_set, opts->state_set_arg, o.fill_index); + o.wind(ind).wstring(s); + if (!opts->state_set_naked) { + o.ws("(").wu32(o.fill_index).ws(");"); + } + o.ws("\n"); + } + + if (opts->fill_use) { + o.wind(ind); + if (opts->fill_check) { + o.ws("if (").wstring(output_expr_lessthan(some, opts)).ws(") "); + } + strrreplace(s = opts->fill, opts->fill_arg, some); + o.wstring(s); + if (!opts->fill_naked) { + if (opts->fill_arg_use) { + o.ws("(").wu64(some).ws(")"); + } + o.ws(";"); + } + o.ws("\n"); + } + + if (opts->fFlag) { + o.wstring(opts->yyfilllabel).wu32(o.fill_index).ws(":\n"); + ++o.fill_index; + } } void gen_goto_case(OutputFile &o, uint32_t ind, const State *to, - const DFA &dfa, tcid_t tcid, bool skip) + const DFA &dfa, tcid_t tcid, bool skip) { - code_lines_t code; - gen_goto(code, to, dfa, tcid, o.block().opts, skip); - const size_t lines = code.size(); - - if (lines == 1) { - o.wind(1).wstring(code[0]); - } else { - o.ws("\n"); - for (size_t i = 0; i < lines; ++i) { - o.wind(ind + 1).wstring(code[i]); - } - } + code_lines_t code; + gen_goto(code, to, dfa, tcid, o.block().opts, skip); + const size_t lines = code.size(); + + if (lines == 1) { + o.wind(1).wstring(code[0]); + } else { + o.ws("\n"); + for (size_t i = 0; i < lines; ++i) { + o.wind(ind + 1).wstring(code[i]); + } + } } void gen_goto_if(OutputFile &o, uint32_t ind, const State *to, - const DFA &dfa, tcid_t tcid, bool skip) + const DFA &dfa, tcid_t tcid, bool skip) { - code_lines_t code; - gen_goto(code, to, dfa, tcid, o.block().opts, skip); - const size_t lines = code.size(); - - if (lines == 1) { - o.wstring(code[0]); - } else { - o.ws("{\n"); - for (size_t i = 0; i < lines; ++i) { - o.wind(ind + 1).wstring(code[i]); - } - o.wind(ind).ws("}\n"); - } + code_lines_t code; + gen_goto(code, to, dfa, tcid, o.block().opts, skip); + const size_t lines = code.size(); + + if (lines == 1) { + o.wstring(code[0]); + } else { + o.ws("{\n"); + for (size_t i = 0; i < lines; ++i) { + o.wind(ind + 1).wstring(code[i]); + } + o.wind(ind).ws("}\n"); + } } void gen_goto_plain(OutputFile &o, uint32_t ind, const State *to, - const DFA &dfa, tcid_t tcid, bool skip) + const DFA &dfa, tcid_t tcid, bool skip) { - code_lines_t code; - gen_goto(code, to, dfa, tcid, o.block().opts, skip); - const size_t lines = code.size(); + code_lines_t code; + gen_goto(code, to, dfa, tcid, o.block().opts, skip); + const size_t lines = code.size(); - for (size_t i = 0; i < lines; ++i) { - o.wind(ind).wstring(code[i]); - } + for (size_t i = 0; i < lines; ++i) { + o.wind(ind).wstring(code[i]); + } } void gen_goto(code_lines_t &code, const State *to, const DFA &dfa, - tcid_t tcid, const opt_t *opts, bool skip) + tcid_t tcid, const opt_t *opts, bool skip) { - std::ostringstream s; - output_skip(s, 0, opts); - - if (skip && !opts->lookahead) { - code.push_back(s.str()); - } - gen_settags(code, dfa, tcid, opts); - if (skip && opts->lookahead) { - code.push_back(s.str()); - } - if (to) { - code.push_back("goto " + opts->labelPrefix - + to_string(to->label) + ";\n"); - } + std::ostringstream s; + output_skip(s, 0, opts); + + if (skip && !opts->lookahead) { + code.push_back(s.str()); + } + gen_settags(code, dfa, tcid, opts); + if (skip && opts->lookahead) { + code.push_back(s.str()); + } + if (to) { + code.push_back("goto " + opts->labelPrefix + + to_string(to->label) + ";\n"); + } } void gen_settags(code_lines_t &code, const DFA &dfa, tcid_t tcid, const opt_t *opts) { - const bool generic = opts->input_api == INPUT_CUSTOM; - const std::string - &prefix = opts->tags_prefix, - &expression = opts->tags_expression; - const tcmd_t *cmd = dfa.tcpool[tcid]; - - // single tag YYCTXMARKER, backwards compatibility - if (cmd && dfa.oldstyle_ctxmarker) { - const std::string s = generic - ? opts->yybackupctx + " ();\n" - : opts->yyctxmarker + " = " + opts->yycursor + ";\n"; - code.push_back(s); - return; - } - - for (const tcmd_t *p = cmd; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - - // copy command - if (tcmd_t::iscopy(p)) { - const std::string - le = vartag_expr(l, prefix, expression), - re = vartag_expr(r, prefix, expression), - s = le + " = " + re + ";\n"; - code.push_back(s); - - // save command; history - } else if (tcmd_t::isadd(p)) { - const std::string - le = vartag_expr(l, prefix, expression), - re = vartag_expr(r, prefix, expression); - if (l != r) { - const std::string s = le + " = " + re + ";\n"; - code.push_back(s); - } - code_lines_t code1; - for (; *h != TAGVER_ZERO; ++h) { - const std::string s = *h == TAGVER_BOTTOM - ? opts->yymtagn + " (" + le + ");\n" - : opts->yymtagp + " (" + le + ");\n"; - code1.push_back(s); - } - code.insert(code.end(), code1.rbegin(), code1.rend()); - - // save command; no history; generic API - } else if (generic) { - const std::string - v = vartag_expr(l, prefix, expression), - s = *h == TAGVER_BOTTOM - ? opts->yystagn + " (" + v + ");\n" - : opts->yystagp + " (" + v + ");\n"; - code.push_back(s); - - // save command; no history; default API - } else { - std::string s1 = "", s2 = ""; - for (const tcmd_t *q = p; q && tcmd_t::isset(q); p = q, q = q->next) { - std::string &s = q->history[0] == TAGVER_BOTTOM ? s1 : s2; - s += vartag_expr(q->lhs, prefix, expression) + " = "; - } - if (!s1.empty()) { - s1 += "NULL;\n"; - code.push_back(s1); - } - if (!s2.empty()) { - s2 += opts->yycursor + ";\n"; - code.push_back(s2); - } - } - } + const bool generic = opts->input_api == INPUT_CUSTOM; + const std::string + &prefix = opts->tags_prefix, + &expression = opts->tags_expression; + const tcmd_t *cmd = dfa.tcpool[tcid]; + + // single tag YYCTXMARKER, backwards compatibility + if (cmd && dfa.oldstyle_ctxmarker) { + const std::string s = generic + ? opts->yybackupctx + " ();\n" + : opts->yyctxmarker + " = " + opts->yycursor + ";\n"; + code.push_back(s); + return; + } + + for (const tcmd_t *p = cmd; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + + // copy command + if (tcmd_t::iscopy(p)) { + const std::string + le = vartag_expr(l, prefix, expression), + re = vartag_expr(r, prefix, expression), + s = le + " = " + re + ";\n"; + code.push_back(s); + + // save command; history + } else if (tcmd_t::isadd(p)) { + const std::string + le = vartag_expr(l, prefix, expression), + re = vartag_expr(r, prefix, expression); + if (l != r) { + const std::string s = le + " = " + re + ";\n"; + code.push_back(s); + } + code_lines_t code1; + for (; *h != TAGVER_ZERO; ++h) { + const std::string s = *h == TAGVER_BOTTOM + ? opts->yymtagn + " (" + le + ");\n" + : opts->yymtagp + " (" + le + ");\n"; + code1.push_back(s); + } + code.insert(code.end(), code1.rbegin(), code1.rend()); + + // save command; no history; generic API + } else if (generic) { + const std::string + v = vartag_expr(l, prefix, expression), + s = *h == TAGVER_BOTTOM + ? opts->yystagn + " (" + v + ");\n" + : opts->yystagp + " (" + v + ");\n"; + code.push_back(s); + + // save command; no history; default API + } else { + std::string s1 = "", s2 = ""; + for (const tcmd_t *q = p; q && tcmd_t::isset(q); p = q, q = q->next) { + std::string &s = q->history[0] == TAGVER_BOTTOM ? s1 : s2; + s += vartag_expr(q->lhs, prefix, expression) + " = "; + } + if (!s1.empty()) { + s1 += "NULL;\n"; + code.push_back(s1); + } + if (!s2.empty()) { + s2 += opts->yycursor + ";\n"; + code.push_back(s2); + } + } + } } void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) { - const opt_t *opts = o.block().opts; - const bool generic = opts->input_api == INPUT_CUSTOM; - const std::string - &prefix = opts->tags_prefix, - &expression = opts->tags_expression; - std::string expr; - const std::vector &tags = dfa.tags; - const tagver_t *fins = dfa.finvers; - - if (rule.ncap > 0) { - o.wind(ind).ws("yynmatch = ").wu64(rule.ncap).ws(";\n"); - } - - // variable tags - for (size_t t = rule.ltag; t < rule.htag; ++t) { - const Tag &tag = tags[t]; - - // see note [fixed and variable tags] - if (fictive(tag) || fixed(tag)) continue; - - expr = vartag_expr(fins[t], prefix, expression); - - o.wind(ind); - if (!trailing(tag)) { - o.wstring(tagname(tag)).ws(" = ").wstring(expr); - } else if (generic) { - if (dfa.oldstyle_ctxmarker) { - o.wstring(opts->yyrestorectx).ws(" ()"); - } else { - o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); - } - } else { - if (dfa.oldstyle_ctxmarker) { - o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker); - } else { - o.wstring(opts->yycursor).ws(" = ").wstring(expr); - } - } - o.ws(";\n"); - } - - // fixed tags - for (size_t t = rule.ltag; t < rule.htag; ++t) { - const Tag &tag = tags[t]; - - // see note [fixed and variable tags] - if (fictive(tag) || !fixed(tag)) continue; - - const size_t dist = tag.dist; - const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST; - expr = fixed_on_cursor ? opts->yycursor - : vartag_expr(fins[tag.base], prefix, expression); - - o.wind(ind); - if (generic) { - assert(dist == 0); - if (!trailing(tag)) { - o.wstring(tagname(tag)).ws(" = ").wstring(expr); - } else if (!fixed_on_cursor) { - assert(!dfa.oldstyle_ctxmarker); - o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); - } - } else { - if (!trailing(tag)) { - o.wstring(tagname(tag)).ws(" = ").wstring(expr); - if (dist > 0) o.ws(" - ").wu64(dist); - } else if (!fixed_on_cursor) { - o.wstring(opts->yycursor).ws(" = ").wstring(expr); - if (dist > 0) o.ws(" - ").wu64(dist); - } else if (dist > 0) { - o.wstring(opts->yycursor).ws(" -= ").wu64(dist); - } - } - o.ws(";\n"); - } + const opt_t *opts = o.block().opts; + const bool generic = opts->input_api == INPUT_CUSTOM; + const std::string + &prefix = opts->tags_prefix, + &expression = opts->tags_expression; + std::string expr; + const std::vector &tags = dfa.tags; + const tagver_t *fins = dfa.finvers; + + if (rule.ncap > 0) { + o.wind(ind).ws("yynmatch = ").wu64(rule.ncap).ws(";\n"); + } + + // variable tags + for (size_t t = rule.ltag; t < rule.htag; ++t) { + const Tag &tag = tags[t]; + + // see note [fixed and variable tags] + if (fictive(tag) || fixed(tag)) continue; + + expr = vartag_expr(fins[t], prefix, expression); + + o.wind(ind); + if (!trailing(tag)) { + o.wstring(tagname(tag)).ws(" = ").wstring(expr); + } else if (generic) { + if (dfa.oldstyle_ctxmarker) { + o.wstring(opts->yyrestorectx).ws(" ()"); + } else { + o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); + } + } else { + if (dfa.oldstyle_ctxmarker) { + o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker); + } else { + o.wstring(opts->yycursor).ws(" = ").wstring(expr); + } + } + o.ws(";\n"); + } + + // fixed tags + for (size_t t = rule.ltag; t < rule.htag; ++t) { + const Tag &tag = tags[t]; + + // see note [fixed and variable tags] + if (fictive(tag) || !fixed(tag)) continue; + + const size_t dist = tag.dist; + const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST; + expr = fixed_on_cursor ? opts->yycursor + : vartag_expr(fins[tag.base], prefix, expression); + + o.wind(ind); + if (generic) { + assert(dist == 0); + if (!trailing(tag)) { + o.wstring(tagname(tag)).ws(" = ").wstring(expr); + } else if (!fixed_on_cursor) { + assert(!dfa.oldstyle_ctxmarker); + o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); + } + } else { + if (!trailing(tag)) { + o.wstring(tagname(tag)).ws(" = ").wstring(expr); + if (dist > 0) o.ws(" - ").wu64(dist); + } else if (!fixed_on_cursor) { + o.wstring(opts->yycursor).ws(" = ").wstring(expr); + if (dist > 0) o.ws(" - ").wu64(dist); + } else if (dist > 0) { + o.wstring(opts->yycursor).ws(" -= ").wu64(dist); + } + } + o.ws(";\n"); + } } std::string tagname(const Tag &tag) { - assert(!trailing(tag)); - return capture(tag) - ? "yypmatch[" + to_string(tag.ncap) + "]" - : *tag.name; + assert(!trailing(tag)); + return capture(tag) + ? "yypmatch[" + to_string(tag.ncap) + "]" + : *tag.name; } bool endstate(const State *s) { - // 'end' state is a state which has no outgoing transitions on symbols - // usually 'end' states are final states (not all final states are 'end' - // states), but sometimes 'end' state happens to be initial non-accepting - // state, e.g. in case of rule '[]' - const Action::type_t &a = s->go.span[0].to->action.type; - return s->go.nSpans == 1 - && (a == Action::RULE || a == Action::ACCEPT); + // 'end' state is a state which has no outgoing transitions on symbols + // usually 'end' states are final states (not all final states are 'end' + // states), but sometimes 'end' state happens to be initial non-accepting + // state, e.g. in case of rule '[]' + const Action::type_t &a = s->go.span[0].to->action.type; + return s->go.nSpans == 1 + && (a == Action::RULE || a == Action::ACCEPT); } } // namespace re2c diff --git a/re2c/src/code/emit_dfa.cc b/re2c/src/code/emit_dfa.cc index 33a040e9..dd0c56e6 100644 --- a/re2c/src/code/emit_dfa.cc +++ b/re2c/src/code/emit_dfa.cc @@ -29,257 +29,257 @@ static void emit_state(OutputFile & o, uint32_t ind, const State * s, bool used_ void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label) { - const opt_t *opts = o.block().opts; - if (used_label) - { - o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n"); - } - if (opts->dFlag && (s->action.type != Action::INITIAL)) - { - o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(output_expr_peek(opts)).ws(");\n"); - } + const opt_t *opts = o.block().opts; + if (used_label) + { + o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n"); + } + if (opts->dFlag && (s->action.type != Action::INITIAL)) + { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(output_expr_peek(opts)).ws(");\n"); + } } void DFA::count_used_labels (std::set & used, label_t start, - label_t initial, bool force_start, bool fFlag) const + label_t initial, bool force_start, bool fFlag) const { - // In '-f' mode, default state is always state 0 - if (fFlag) - { - used.insert (label_t::first ()); - } - if (force_start) - { - used.insert (start); - } - for (State * s = head; s; s = s->next) - { - s->go.used_labels (used); - } - for (uint32_t i = 0; i < accepts.size (); ++i) - { - used.insert (accepts[i].first->label); - } - // must go last: it needs the set of used labels - if (used.count (head->label)) - { - used.insert (initial); - } + // In '-f' mode, default state is always state 0 + if (fFlag) + { + used.insert (label_t::first ()); + } + if (force_start) + { + used.insert (start); + } + for (State * s = head; s; s = s->next) + { + s->go.used_labels (used); + } + for (uint32_t i = 0; i < accepts.size (); ++i) + { + used.insert (accepts[i].first->label); + } + // must go last: it needs the set of used labels + if (used.count (head->label)) + { + used.insert (initial); + } } void DFA::emit_body(OutputFile &o, uint32_t& ind, - const std::set &used_labels, label_t initial) const + const std::set &used_labels, label_t initial) const { - code_lines_t code; - gen_settags(code, *this, tags0, o.block().opts); - for (size_t i = 0; i < code.size(); ++i) { - o.wind(ind).wstring(code[i]); - } + code_lines_t code; + gen_settags(code, *this, tags0, o.block().opts); + for (size_t i = 0; i < code.size(); ++i) { + o.wind(ind).wstring(code[i]); + } - // If DFA has transitions to initial state, then initial state - // has a piece of code that advances input position. Wee must - // skip it when entering DFA. - if (used_labels.count(head->label)) { - o.wind(ind).ws("goto ").wstring(o.block().opts->labelPrefix) - .wlabel(initial).ws(";\n"); - } + // If DFA has transitions to initial state, then initial state + // has a piece of code that advances input position. Wee must + // skip it when entering DFA. + if (used_labels.count(head->label)) { + o.wind(ind).ws("goto ").wstring(o.block().opts->labelPrefix) + .wlabel(initial).ws(";\n"); + } - for (State * s = head; s; s = s->next) { - emit_state(o, ind, s, used_labels.count(s->label)); - emit_action(o, ind, *this, s, used_labels); - s->go.emit(o, ind, *this); - } + for (State * s = head; s; s = s->next) { + emit_state(o, ind, s, used_labels.count(s->label)); + emit_action(o, ind, *this, s, used_labels); + s->go.emit(o, ind, *this); + } } void DFA::emit_dot(OutputFile &o, bool last_cond) const { - const opt_t *opts = o.block().opts; - if (!opts->cFlag || !o.cond_goto) { - o.ws("digraph re2c {\n"); - } - o.wdelay_cond_goto(0); - if (opts->cFlag) { - o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n"); - } - for (State *s = head; s; s = s->next) { - if (s->action.type == Action::ACCEPT) { - const accept_t &accs = *s->action.info.accepts; - for (uint32_t i = 0; i < accs.size(); ++i) { - o.wlabel(s->label).ws(" -> ") - .wlabel(accs[i].first->label) - .ws(" [label=\"yyaccept=") - .wu32(i).ws("\"]").ws("\n"); - } - } else if (s->action.type == Action::RULE) { - const Code *code = rules[s->action.info.rule].code; - if (!code->autogen) { - o.wlabel(s->label).ws(" [label=\"") - .wstring(code->fname) - .ws(":").wu32(code->fline) - .ws("\"]").ws("\n"); - } - } - s->go.emit(o, 0, *this); - } - if (!opts->cFlag || last_cond) { - o.ws("}\n"); - } + const opt_t *opts = o.block().opts; + if (!opts->cFlag || !o.cond_goto) { + o.ws("digraph re2c {\n"); + } + o.wdelay_cond_goto(0); + if (opts->cFlag) { + o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n"); + } + for (State *s = head; s; s = s->next) { + if (s->action.type == Action::ACCEPT) { + const accept_t &accs = *s->action.info.accepts; + for (uint32_t i = 0; i < accs.size(); ++i) { + o.wlabel(s->label).ws(" -> ") + .wlabel(accs[i].first->label) + .ws(" [label=\"yyaccept=") + .wu32(i).ws("\"]").ws("\n"); + } + } else if (s->action.type == Action::RULE) { + const Code *code = rules[s->action.info.rule].code; + if (!code->autogen) { + o.wlabel(s->label).ws(" [label=\"") + .wstring(code->fname) + .ws(":").wu32(code->fline) + .ws("\"]").ws("\n"); + } + } + s->go.emit(o, 0, *this); + } + if (!opts->cFlag || last_cond) { + o.ws("}\n"); + } } void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace) { - OutputFile &o = output.source; - OutputBlock &ob = o.block(); - const opt_t *opts = ob.opts; + OutputFile &o = output.source; + OutputBlock &ob = o.block(); + const opt_t *opts = ob.opts; - std::set stagnames, stagvars, mtagnames, mtagvars; - if (!oldstyle_ctxmarker) { - for (size_t i = 0; i < tags.size(); ++i) { - const Tag &tag = tags[i]; - if (history(tag)) { - mtagvars.insert(*tag.name); - } else if (tag.name) { - stagvars.insert(*tag.name); - } - } - for (tagver_t v = 1; v <= maxtagver; ++v) { - const std::string s = vartag_name(v, opts->tags_prefix); - if (mtagvers.find(v) != mtagvers.end()) { - mtagnames.insert(s); - } else { - stagnames.insert(s); - } - } - ob.stags.insert(stagnames.begin(), stagnames.end()); - ob.mtags.insert(mtagnames.begin(), mtagnames.end()); - } - if (!cond.empty()) o.block().types.push_back(cond); + std::set stagnames, stagvars, mtagnames, mtagvars; + if (!oldstyle_ctxmarker) { + for (size_t i = 0; i < tags.size(); ++i) { + const Tag &tag = tags[i]; + if (history(tag)) { + mtagvars.insert(*tag.name); + } else if (tag.name) { + stagvars.insert(*tag.name); + } + } + for (tagver_t v = 1; v <= maxtagver; ++v) { + const std::string s = vartag_name(v, opts->tags_prefix); + if (mtagvers.find(v) != mtagvers.end()) { + mtagnames.insert(s); + } else { + stagnames.insert(s); + } + } + ob.stags.insert(stagnames.begin(), stagnames.end()); + ob.mtags.insert(mtagnames.begin(), mtagnames.end()); + } + if (!cond.empty()) o.block().types.push_back(cond); - bool bProlog = (!opts->cFlag || !o.cond_goto); + bool bProlog = (!opts->cFlag || !o.cond_goto); - // start_label points to the beginning of current re2c block - // (prior to condition dispatch in '-c' mode) - // it can forced by configuration 're2c:startlabel = ;' - label_t start_label = o.label_counter.next (); - // initial_label points to the beginning of DFA - // in '-c' mode this is NOT equal to start_label - label_t initial_label = bProlog && opts->cFlag - ? o.label_counter.next () - : start_label; - for (State * s = head; s; s = s->next) - { - s->label = o.label_counter.next (); - } - std::set used_labels; - count_used_labels (used_labels, start_label, initial_label, - opts->startlabel_force && opts->startlabel.empty(), opts->fFlag); + // start_label points to the beginning of current re2c block + // (prior to condition dispatch in '-c' mode) + // it can forced by configuration 're2c:startlabel = ;' + label_t start_label = o.label_counter.next (); + // initial_label points to the beginning of DFA + // in '-c' mode this is NOT equal to start_label + label_t initial_label = bProlog && opts->cFlag + ? o.label_counter.next () + : start_label; + for (State * s = head; s; s = s->next) + { + s->label = o.label_counter.next (); + } + std::set used_labels; + count_used_labels (used_labels, start_label, initial_label, + opts->startlabel_force && opts->startlabel.empty(), opts->fFlag); - head->action.set_initial(initial_label); + head->action.set_initial(initial_label); - if (opts->target == TARGET_SKELETON) { - if (output.skeletons.insert (name).second) - { - emit_start(o, max_fill, max_nmatch, name, key_size, def_rule, - need_backup, need_accept, oldstyle_ctxmarker, - stagnames, stagvars, mtagnames, mtagvars, bitmaps); - uint32_t i = 2; - emit_body (o, i, used_labels, initial_label); - emit_end(o, name, need_backup, oldstyle_ctxmarker, mtagnames); - } - } else if (opts->target == TARGET_DOT) { - emit_dot(o, isLastCond); - } else { - // Generate prolog - if (bProlog) - { - o.ws("\n").wdelay_line_info_output (); - if ((!opts->fFlag && ob.used_yyaccept) - || (!opts->fFlag && opts->bEmitYYCh) - || (opts->bFlag && !opts->cFlag && !bitmaps.empty()) - || (opts->cFlag && !o.cond_goto && opts->gFlag) - || (opts->fFlag && !o.state_goto && opts->gFlag) - ) - { - bPrologBrace = true; - o.wind(ind++).ws("{\n"); - } - else if (ind == 0) - { - ind = 1; - } - if (!opts->fFlag) - { - if (opts->bEmitYYCh) - { - o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n"); - } - o.wdelay_yyaccept_init (ind); - } - else - { - o.ws("\n"); - } - } - if (opts->bFlag && !opts->cFlag) - { - bitmaps.gen(o, ind); - } - if (bProlog) - { - o.wdelay_cond_table(ind); - o.wdelay_state_goto (ind); - if (opts->cFlag) - { - if (used_labels.count(start_label)) - { - o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n"); - } - } - o.wuser_start_label (); - o.wdelay_cond_goto(ind); - } - if (opts->cFlag && !cond.empty()) - { - if (opts->condDivider.length()) - { - std::string divider = opts->condDivider; - strrreplace(divider, opts->condDividerParam, cond); - o.wstring(divider).ws("\n"); - } - o.wstring(opts->condPrefix).wstring(cond).ws(":\n"); - } - if (opts->cFlag && opts->bFlag && !bitmaps.empty()) - { - o.wind(ind++).ws("{\n"); - bitmaps.gen(o, ind); - } - // Generate code - emit_body (o, ind, used_labels, initial_label); - if (opts->cFlag && opts->bFlag && !bitmaps.empty()) - { - o.wind(--ind).ws("}\n"); - } - // Generate epilog - if ((!opts->cFlag || isLastCond) && bPrologBrace) - { - o.wind(--ind).ws("}\n"); - } - } + if (opts->target == TARGET_SKELETON) { + if (output.skeletons.insert (name).second) + { + emit_start(o, max_fill, max_nmatch, name, key_size, def_rule, + need_backup, need_accept, oldstyle_ctxmarker, + stagnames, stagvars, mtagnames, mtagvars, bitmaps); + uint32_t i = 2; + emit_body (o, i, used_labels, initial_label); + emit_end(o, name, need_backup, oldstyle_ctxmarker, mtagnames); + } + } else if (opts->target == TARGET_DOT) { + emit_dot(o, isLastCond); + } else { + // Generate prolog + if (bProlog) + { + o.ws("\n").wdelay_line_info_output (); + if ((!opts->fFlag && ob.used_yyaccept) + || (!opts->fFlag && opts->bEmitYYCh) + || (opts->bFlag && !opts->cFlag && !bitmaps.empty()) + || (opts->cFlag && !o.cond_goto && opts->gFlag) + || (opts->fFlag && !o.state_goto && opts->gFlag) + ) + { + bPrologBrace = true; + o.wind(ind++).ws("{\n"); + } + else if (ind == 0) + { + ind = 1; + } + if (!opts->fFlag) + { + if (opts->bEmitYYCh) + { + o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n"); + } + o.wdelay_yyaccept_init (ind); + } + else + { + o.ws("\n"); + } + } + if (opts->bFlag && !opts->cFlag) + { + bitmaps.gen(o, ind); + } + if (bProlog) + { + o.wdelay_cond_table(ind); + o.wdelay_state_goto (ind); + if (opts->cFlag) + { + if (used_labels.count(start_label)) + { + o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n"); + } + } + o.wuser_start_label (); + o.wdelay_cond_goto(ind); + } + if (opts->cFlag && !cond.empty()) + { + if (opts->condDivider.length()) + { + std::string divider = opts->condDivider; + strrreplace(divider, opts->condDividerParam, cond); + o.wstring(divider).ws("\n"); + } + o.wstring(opts->condPrefix).wstring(cond).ws(":\n"); + } + if (opts->cFlag && opts->bFlag && !bitmaps.empty()) + { + o.wind(ind++).ws("{\n"); + bitmaps.gen(o, ind); + } + // Generate code + emit_body (o, ind, used_labels, initial_label); + if (opts->cFlag && opts->bFlag && !bitmaps.empty()) + { + o.wind(--ind).ws("}\n"); + } + // Generate epilog + if ((!opts->cFlag || isLastCond) && bPrologBrace) + { + o.wind(--ind).ws("}\n"); + } + } } std::string vartag_name(tagver_t ver, const std::string &prefix) { - std::ostringstream s; - s << prefix << ver; - return s.str(); + std::ostringstream s; + s << prefix << ver; + return s.str(); } std::string vartag_expr(tagver_t ver, const std::string &prefix, const std::string &expression) { - const std::string s = vartag_name(ver, prefix); - std::string e = expression; - strrreplace(e, "@@", s); - return e; + const std::string s = vartag_name(ver, prefix); + std::string e = expression; + strrreplace(e, "@@", s); + return e; } } // end namespace re2c diff --git a/re2c/src/code/go.h b/re2c/src/code/go.h index 767965b8..ac03e015 100644 --- a/re2c/src/code/go.h +++ b/re2c/src/code/go.h @@ -21,214 +21,214 @@ struct If; struct Span { - uint32_t ub; - State * to; - tcid_t tags; + uint32_t ub; + State * to; + tcid_t tags; }; struct Case { - std::vector > ranges; - const State *to; - tcid_t tags; - bool skip; - - void emit(OutputFile &o, uint32_t ind) const; - inline Case(): ranges(), to(NULL), tags(TCID0), skip(false) {} - FORBID_COPY(Case); + std::vector > ranges; + const State *to; + tcid_t tags; + bool skip; + + void emit(OutputFile &o, uint32_t ind) const; + inline Case(): ranges(), to(NULL), tags(TCID0), skip(false) {} + FORBID_COPY(Case); }; struct Cases { - Case *cases; - uint32_t cases_size; - - void add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip); - Cases(const Span *spans, uint32_t nspans, bool skip); - ~Cases(); - void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const; - void used_labels(std::set &used) const; - FORBID_COPY(Cases); + Case *cases; + uint32_t cases_size; + + void add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip); + Cases(const Span *spans, uint32_t nspans, bool skip); + ~Cases(); + void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const; + void used_labels(std::set &used) const; + FORBID_COPY(Cases); }; struct Cond { - std::string compare; - uint32_t value; - Cond (const std::string & cmp, uint32_t val); + std::string compare; + uint32_t value; + Cond (const std::string & cmp, uint32_t val); }; struct Binary { - Cond * cond; - If * thn; - If * els; - Binary (const Span * s, uint32_t n, const State * next, bool skip); - ~Binary (); - void emit (OutputFile &o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; - - FORBID_COPY (Binary); + Cond * cond; + If * thn; + If * els; + Binary (const Span * s, uint32_t n, const State * next, bool skip); + ~Binary (); + void emit (OutputFile &o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; + + FORBID_COPY (Binary); }; struct Linear { - struct Branch - { - const Cond *cond; - const State *to; - tcid_t tags; - bool skip; - }; - - size_t nbranches; - Branch *branches; - - Linear(const Span *s, uint32_t n, const State *next, bool skip); - ~Linear(); - void add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip); - void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const; - void used_labels(std::set &used) const; - FORBID_COPY(Linear); + struct Branch + { + const Cond *cond; + const State *to; + tcid_t tags; + bool skip; + }; + + size_t nbranches; + Branch *branches; + + Linear(const Span *s, uint32_t n, const State *next, bool skip); + ~Linear(); + void add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip); + void emit(OutputFile &o, uint32_t ind, const DFA &dfa) const; + void used_labels(std::set &used) const; + FORBID_COPY(Linear); }; struct If { - enum type_t - { - BINARY, - LINEAR - } type; - union - { - Binary * binary; - Linear * linear; - } info; - If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip); - ~If (); - void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; + enum type_t + { + BINARY, + LINEAR + } type; + union + { + Binary * binary; + Linear * linear; + } info; + If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip); + ~If (); + void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; }; struct SwitchIf { - enum - { - SWITCH, - IF - } type; - union - { - Cases * cases; - If * ifs; - } info; - SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip); - ~SwitchIf (); - void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; + enum + { + SWITCH, + IF + } type; + union + { + Cases * cases; + If * ifs; + } info; + SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip); + ~SwitchIf (); + void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; }; struct GoBitmap { - const bitmap_t * bitmap; - const State * bitmap_state; - SwitchIf * hgo; - SwitchIf * lgo; - GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, - uint32_t hSpans, const bitmap_t * bm, const State * bm_state, - const State * next, bool sflag); - ~GoBitmap (); - void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; - - FORBID_COPY (GoBitmap); + const bitmap_t * bitmap; + const State * bitmap_state; + SwitchIf * hgo; + SwitchIf * lgo; + GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, + uint32_t hSpans, const bitmap_t * bm, const State * bm_state, + const State * next, bool sflag); + ~GoBitmap (); + void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; + + FORBID_COPY (GoBitmap); }; struct CpgotoTable { - static const uint32_t TABLE_SIZE; - const State ** table; - CpgotoTable (const Span * span, uint32_t nSpans); - ~CpgotoTable (); - void emit (OutputFile & o, uint32_t ind) const; - void used_labels (std::set & used) const; + static const uint32_t TABLE_SIZE; + const State ** table; + CpgotoTable (const Span * span, uint32_t nSpans); + ~CpgotoTable (); + void emit (OutputFile & o, uint32_t ind) const; + void used_labels (std::set & used) const; private: - label_t max_label () const; + label_t max_label () const; - FORBID_COPY (CpgotoTable); + FORBID_COPY (CpgotoTable); }; struct Cpgoto { - SwitchIf * hgo; - CpgotoTable * table; - Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, - uint32_t hSpans, const State * next, bool sflag); - ~Cpgoto (); - void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; - - FORBID_COPY (Cpgoto); + SwitchIf * hgo; + CpgotoTable * table; + Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, + uint32_t hSpans, const State * next, bool sflag); + ~Cpgoto (); + void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; + + FORBID_COPY (Cpgoto); }; struct Dot { - const State * from; - Cases * cases; - Dot(const Span *sp, uint32_t nsp, const State *s); - ~Dot (); - void emit (OutputFile & o, const DFA &dfa) const; + const State * from; + Cases * cases; + Dot(const Span *sp, uint32_t nsp, const State *s); + ~Dot (); + void emit (OutputFile & o, const DFA &dfa) const; - FORBID_COPY (Dot); + FORBID_COPY (Dot); }; struct Go { - uint32_t nSpans; // number of spans - Span * span; - tcid_t tags; - bool skip; - enum - { - EMPTY, - SWITCH_IF, - BITMAP, - CPGOTO, - DOT - } type; - union - { - SwitchIf * switchif; - GoBitmap * bitmap; - Cpgoto * cpgoto; - Dot * dot; - } info; - - Go (); - ~Go (); - void init(const State* from, const opt_t *opts, bitmaps_t &bitmaps); - void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; - void used_labels (std::set & used) const; - - Go (const Go & g) - : nSpans (g.nSpans) - , span (g.span) - , tags (g.tags) - , skip (g.skip) - , type (g.type) - , info (g.info) - {} - Go & operator = (const Go & g) - { - nSpans = g.nSpans; - span = g.span; - tags = g.tags; - skip = g.skip; - type = g.type; - info = g.info; - return * this; - } + uint32_t nSpans; // number of spans + Span * span; + tcid_t tags; + bool skip; + enum + { + EMPTY, + SWITCH_IF, + BITMAP, + CPGOTO, + DOT + } type; + union + { + SwitchIf * switchif; + GoBitmap * bitmap; + Cpgoto * cpgoto; + Dot * dot; + } info; + + Go (); + ~Go (); + void init(const State* from, const opt_t *opts, bitmaps_t &bitmaps); + void emit (OutputFile & o, uint32_t ind, const DFA &dfa) const; + void used_labels (std::set & used) const; + + Go (const Go & g) + : nSpans (g.nSpans) + , span (g.span) + , tags (g.tags) + , skip (g.skip) + , type (g.type) + , info (g.info) + {} + Go & operator = (const Go & g) + { + nSpans = g.nSpans; + span = g.span; + tags = g.tags; + skip = g.skip; + type = g.type; + info = g.info; + return * this; + } }; bool consume(const State *s); diff --git a/re2c/src/code/go_construct.cc b/re2c/src/code/go_construct.cc index eec4cef5..d3940804 100644 --- a/re2c/src/code/go_construct.cc +++ b/re2c/src/code/go_construct.cc @@ -20,277 +20,277 @@ static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspa bool consume(const State *s) { - switch (s->action.type) { - case Action::RULE: - case Action::MOVE: - case Action::ACCEPT: return false; - case Action::MATCH: - case Action::INITIAL: - case Action::SAVE: return true; - } - return true; /* unreachable */ + switch (s->action.type) { + case Action::RULE: + case Action::MOVE: + case Action::ACCEPT: return false; + case Action::MATCH: + case Action::INITIAL: + case Action::SAVE: return true; + } + return true; /* unreachable */ } Cases::Cases(const Span *spans, uint32_t nspans, bool skip) - : cases(new Case[nspans]) - , cases_size(0) + : cases(new Case[nspans]) + , cases_size(0) { - assert(nspans > 0); + assert(nspans > 0); - // first case is default case - Case &c = cases[cases_size++]; - const Span *s = spans + (nspans - 1); - c.to = s->to; - c.tags = s->tags; - c.skip = skip && consume(s->to); + // first case is default case + Case &c = cases[cases_size++]; + const Span *s = spans + (nspans - 1); + c.to = s->to; + c.tags = s->tags; + c.skip = skip && consume(s->to); - for (uint32_t i = 0, lb = 0; i < nspans; ++i) { - s = spans + i; - add(lb, s->ub, s->to, s->tags, skip && consume(s->to)); - lb = s->ub; - } + for (uint32_t i = 0, lb = 0; i < nspans; ++i) { + s = spans + i; + add(lb, s->ub, s->to, s->tags, skip && consume(s->to)); + lb = s->ub; + } } void Cases::add(uint32_t lb, uint32_t ub, State *to, tcid_t tags, bool skip) { - for (uint32_t i = 0; i < cases_size; ++i) { - Case &c = cases[i]; - if (c.to == to && c.tags == tags) { - c.ranges.push_back(std::make_pair(lb, ub)); - return; - } - } - Case &c = cases[cases_size++]; - c.ranges.push_back(std::make_pair(lb, ub)); - c.to = to; - c.tags = tags; - c.skip = skip; + for (uint32_t i = 0; i < cases_size; ++i) { + Case &c = cases[i]; + if (c.to == to && c.tags == tags) { + c.ranges.push_back(std::make_pair(lb, ub)); + return; + } + } + Case &c = cases[cases_size++]; + c.ranges.push_back(std::make_pair(lb, ub)); + c.to = to; + c.tags = tags; + c.skip = skip; } Cond::Cond (const std::string & cmp, uint32_t val) - : compare (cmp) - , value (val) + : compare (cmp) + , value (val) {} Binary::Binary (const Span * s, uint32_t n, const State * next, bool skip) - : cond (NULL) - , thn (NULL) - , els (NULL) + : cond (NULL) + , thn (NULL) + , els (NULL) { - const uint32_t l = n / 2; - const uint32_t h = n - l; - cond = new Cond ("<=", s[l - 1].ub - 1); - thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next, skip); - els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next, skip); + const uint32_t l = n / 2; + const uint32_t h = n - l; + cond = new Cond ("<=", s[l - 1].ub - 1); + thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next, skip); + els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next, skip); } void Linear::add_branch(const Cond *cond, const State *to, tcid_t tags, bool skip) { - Branch &b = branches[nbranches++]; - b.cond = cond; - b.to = to; - b.tags = tags; - b.skip = skip; + Branch &b = branches[nbranches++]; + b.cond = cond; + b.to = to; + b.tags = tags; + b.skip = skip; } Linear::Linear(const Span *s, uint32_t n, const State *next, bool skip) - : nbranches(0) - , branches(new Branch[n]) + : nbranches(0) + , branches(new Branch[n]) { - for (;;) { - if (n == 1 && s[0].to == next) { - add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to)); - return; - } else if (n == 1) { - add_branch(NULL, s[0].to, s[0].tags, skip && consume(s[0].to)); - return; - } else if (n == 2 && s[0].to == next) { - add_branch(new Cond(">=", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to)); - add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to)); - return; - } else if (n == 3 - && s[1].to == next - && s[1].ub - s[0].ub == 1 - && s[2].to == s[0].to - && s[2].tags == s[0].tags) { - add_branch(new Cond("!=", s[0].ub), s[0].to, s[0].tags, skip && consume(s[0].to)); - add_branch(NULL, NULL, s[1].tags, skip && consume(s[1].to)); - return; - } else if (n >= 3 - && s[1].ub - s[0].ub == 1 - && s[2].to == s[0].to - && s[2].tags == s[0].tags) { - add_branch(new Cond("==", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to)); - n -= 2; - s += 2; - } else { - add_branch(new Cond("<=", s[0].ub - 1), s[0].to, s[0].tags, skip && consume(s[0].to)); - n -= 1; - s += 1; - } - } + for (;;) { + if (n == 1 && s[0].to == next) { + add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to)); + return; + } else if (n == 1) { + add_branch(NULL, s[0].to, s[0].tags, skip && consume(s[0].to)); + return; + } else if (n == 2 && s[0].to == next) { + add_branch(new Cond(">=", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to)); + add_branch(NULL, NULL, s[0].tags, skip && consume(s[0].to)); + return; + } else if (n == 3 + && s[1].to == next + && s[1].ub - s[0].ub == 1 + && s[2].to == s[0].to + && s[2].tags == s[0].tags) { + add_branch(new Cond("!=", s[0].ub), s[0].to, s[0].tags, skip && consume(s[0].to)); + add_branch(NULL, NULL, s[1].tags, skip && consume(s[1].to)); + return; + } else if (n >= 3 + && s[1].ub - s[0].ub == 1 + && s[2].to == s[0].to + && s[2].tags == s[0].tags) { + add_branch(new Cond("==", s[0].ub), s[1].to, s[1].tags, skip && consume(s[1].to)); + n -= 2; + s += 2; + } else { + add_branch(new Cond("<=", s[0].ub - 1), s[0].to, s[0].tags, skip && consume(s[0].to)); + n -= 1; + s += 1; + } + } } If::If (type_t t, const Span * sp, uint32_t nsp, const State * next, bool skip) - : type (t) - , info () + : type (t) + , info () { - switch (type) - { - case BINARY: - info.binary = new Binary (sp, nsp, next, skip); - break; - case LINEAR: - info.linear = new Linear (sp, nsp, next, skip); - break; - } + switch (type) + { + case BINARY: + info.binary = new Binary (sp, nsp, next, skip); + break; + case LINEAR: + info.linear = new Linear (sp, nsp, next, skip); + break; + } } SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next, bool sflag, bool skip) - : type (IF) - , info () + : type (IF) + , info () { - if ((!sflag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) - { - type = SWITCH; - info.cases = new Cases (sp, nsp, skip); - } - else if (nsp > 5) - { - info.ifs = new If (If::BINARY, sp, nsp, next, skip); - } - else - { - info.ifs = new If (If::LINEAR, sp, nsp, next, skip); - } + if ((!sflag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) + { + type = SWITCH; + info.cases = new Cases (sp, nsp, skip); + } + else if (nsp > 5) + { + info.ifs = new If (If::BINARY, sp, nsp, next, skip); + } + else + { + info.ifs = new If (If::LINEAR, sp, nsp, next, skip); + } } GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, - uint32_t hSpans, const bitmap_t * bm, const State * bm_state, - const State * next, bool sflag) - : bitmap (bm) - , bitmap_state (bm_state) - , hgo (NULL) - , lgo (NULL) + uint32_t hSpans, const bitmap_t * bm, const State * bm_state, + const State * next, bool sflag) + : bitmap (bm) + , bitmap_state (bm_state) + , hgo (NULL) + , lgo (NULL) { - Span * bspan = allocate (nSpans); - uint32_t bSpans = unmap (bspan, span, nSpans, bm_state); - lgo = bSpans == 0 - ? NULL - : new SwitchIf (bspan, bSpans, next, sflag, false); - // if there are any low spans, then next state for high spans - // must be NULL to trigger explicit goto generation in linear 'if' - hgo = hSpans == 0 - ? NULL - : new SwitchIf (hspan, hSpans, lgo ? NULL : next, sflag, false); - operator delete (bspan); + Span * bspan = allocate (nSpans); + uint32_t bSpans = unmap (bspan, span, nSpans, bm_state); + lgo = bSpans == 0 + ? NULL + : new SwitchIf (bspan, bSpans, next, sflag, false); + // if there are any low spans, then next state for high spans + // must be NULL to trigger explicit goto generation in linear 'if' + hgo = hSpans == 0 + ? NULL + : new SwitchIf (hspan, hSpans, lgo ? NULL : next, sflag, false); + operator delete (bspan); } const uint32_t CpgotoTable::TABLE_SIZE = 0x100; CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans) - : table (new const State * [TABLE_SIZE]) + : table (new const State * [TABLE_SIZE]) { - uint32_t c = 0; - for (uint32_t i = 0; i < nSpans; ++i) - { - for(; c < span[i].ub && c < TABLE_SIZE; ++c) - { - table[c] = span[i].to; - } - } + uint32_t c = 0; + for (uint32_t i = 0; i < nSpans; ++i) + { + for(; c < span[i].ub && c < TABLE_SIZE; ++c) + { + table[c] = span[i].to; + } + } } Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, - uint32_t hSpans, const State * next, bool sflag) - : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next, sflag, false)) - , table (new CpgotoTable (span, nSpans)) + uint32_t hSpans, const State * next, bool sflag) + : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next, sflag, false)) + , table (new CpgotoTable (span, nSpans)) {} Dot::Dot (const Span * sp, uint32_t nsp, const State * s) - : from (s) - , cases (new Cases (sp, nsp, false)) + : from (s) + , cases (new Cases (sp, nsp, false)) {} Go::Go () - : nSpans (0) - , span (NULL) - , tags (TCID0) - , skip (false) - , type (EMPTY) - , info () + : nSpans (0) + , span (NULL) + , tags (TCID0) + , skip (false) + , type (EMPTY) + , info () {} void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps) { - if (nSpans == 0) - { - return; - } + if (nSpans == 0) + { + return; + } - // initialize high (wide) spans - uint32_t hSpans = 0; - const Span * hspan = NULL; - for (uint32_t i = 0; i < nSpans; ++i) - { - if (span[i].ub > 0x100) - { - hspan = &span[i]; - hSpans = nSpans - i; - break; - } - } + // initialize high (wide) spans + uint32_t hSpans = 0; + const Span * hspan = NULL; + for (uint32_t i = 0; i < nSpans; ++i) + { + if (span[i].ub > 0x100) + { + hspan = &span[i]; + hSpans = nSpans - i; + break; + } + } - bool low_spans_have_tags = false; - for (uint32_t i = 0; i < nSpans - hSpans; ++i) { - if (span[i].tags != TCID0) { - low_spans_have_tags = true; - break; - } - } + bool low_spans_have_tags = false; + for (uint32_t i = 0; i < nSpans - hSpans; ++i) { + if (span[i].tags != TCID0) { + low_spans_have_tags = true; + break; + } + } - // initialize bitmaps - uint32_t nBitmaps = 0; - const bitmap_t *bm = NULL; - const State *bms = NULL; + // initialize bitmaps + uint32_t nBitmaps = 0; + const bitmap_t *bm = NULL; + const State *bms = NULL; - for (uint32_t i = 0; i < nSpans; ++i) { - const State *s = span[i].to; - if (!s->isBase) continue; + for (uint32_t i = 0; i < nSpans; ++i) { + const State *s = span[i].to; + if (!s->isBase) continue; - const bitmap_t *b = bitmaps.find(this, s); - if (b) { - if (bm == NULL) { - bm = b; - bms = s; - } - ++nBitmaps; - } - } + const bitmap_t *b = bitmaps.find(this, s); + if (b) { + if (bm == NULL) { + bm = b; + bms = s; + } + ++nBitmaps; + } + } - const uint32_t dSpans = nSpans - hSpans - nBitmaps; - const bool part_skip = opts->eager_skip && !skip; - if (opts->target == TARGET_DOT) - { - type = DOT; - info.dot = new Dot (span, nSpans, from); - } - else if (opts->gFlag && !part_skip && (dSpans >= opts->cGotoThreshold) && !low_spans_have_tags) - { - type = CPGOTO; - info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next, opts->sFlag); - } - else if (opts->bFlag && !part_skip && (nBitmaps > 0)) - { - type = BITMAP; - info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bm, bms, from->next, opts->sFlag); - bitmaps.used = true; - } - else - { - type = SWITCH_IF; - info.switchif = new SwitchIf (span, nSpans, from->next, opts->sFlag, part_skip); - } + const uint32_t dSpans = nSpans - hSpans - nBitmaps; + const bool part_skip = opts->eager_skip && !skip; + if (opts->target == TARGET_DOT) + { + type = DOT; + info.dot = new Dot (span, nSpans, from); + } + else if (opts->gFlag && !part_skip && (dSpans >= opts->cGotoThreshold) && !low_spans_have_tags) + { + type = CPGOTO; + info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next, opts->sFlag); + } + else if (opts->bFlag && !part_skip && (nBitmaps > 0)) + { + type = BITMAP; + info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bm, bms, from->next, opts->sFlag); + bitmaps.used = true; + } + else + { + type = SWITCH_IF; + info.switchif = new SwitchIf (span, nSpans, from->next, opts->sFlag, part_skip); + } } /* @@ -302,27 +302,27 @@ void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps) */ uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x) { - uint32_t new_nspans = 0; - for (uint32_t i = 0; i < old_nspans; ++i) - { - if (old_span[i].to != x) - { - if (new_nspans > 0 - && new_span[new_nspans - 1].to == old_span[i].to - && new_span[new_nspans - 1].tags == old_span[i].tags) - new_span[new_nspans - 1].ub = old_span[i].ub; - else - { - new_span[new_nspans].to = old_span[i].to; - new_span[new_nspans].ub = old_span[i].ub; - new_span[new_nspans].tags = old_span[i].tags; - ++new_nspans; - } - } - } - if (new_nspans > 0) - new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub; - return new_nspans; + uint32_t new_nspans = 0; + for (uint32_t i = 0; i < old_nspans; ++i) + { + if (old_span[i].to != x) + { + if (new_nspans > 0 + && new_span[new_nspans - 1].to == old_span[i].to + && new_span[new_nspans - 1].tags == old_span[i].tags) + new_span[new_nspans - 1].ub = old_span[i].ub; + else + { + new_span[new_nspans].to = old_span[i].to; + new_span[new_nspans].ub = old_span[i].ub; + new_span[new_nspans].tags = old_span[i].tags; + ++new_nspans; + } + } + } + if (new_nspans > 0) + new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub; + return new_nspans; } } // namespace re2c diff --git a/re2c/src/code/go_destruct.cc b/re2c/src/code/go_destruct.cc index 9876491b..6ee1e13c 100644 --- a/re2c/src/code/go_destruct.cc +++ b/re2c/src/code/go_destruct.cc @@ -7,91 +7,91 @@ namespace re2c Cases::~Cases () { - delete [] cases; + delete [] cases; } Binary::~Binary () { - delete cond; - delete thn; - delete els; + delete cond; + delete thn; + delete els; } Linear::~Linear () { - for (uint32_t i = 0; i < nbranches; ++i) { - delete branches[i].cond; - } - delete[] branches; + for (uint32_t i = 0; i < nbranches; ++i) { + delete branches[i].cond; + } + delete[] branches; } If::~If () { - switch (type) - { - case BINARY: - delete info.binary; - break; - case LINEAR: - delete info.linear; - break; - } + switch (type) + { + case BINARY: + delete info.binary; + break; + case LINEAR: + delete info.linear; + break; + } } SwitchIf::~SwitchIf () { - switch (type) - { - case SWITCH: - delete info.cases; - break; - case IF: - delete info.ifs; - break; - } + switch (type) + { + case SWITCH: + delete info.cases; + break; + case IF: + delete info.ifs; + break; + } } GoBitmap::~GoBitmap () { - delete hgo; - delete lgo; + delete hgo; + delete lgo; } CpgotoTable::~CpgotoTable () { - delete [] table; + delete [] table; } Cpgoto::~Cpgoto () { - delete hgo; - delete table; + delete hgo; + delete table; } Dot::~Dot () { - delete cases; + delete cases; } Go::~Go () { - switch (type) - { - case EMPTY: - break; - case SWITCH_IF: - delete info.switchif; - break; - case BITMAP: - delete info.bitmap; - break; - case CPGOTO: - delete info.cpgoto; - break; - case DOT: - delete info.dot; - break; - } + switch (type) + { + case EMPTY: + break; + case SWITCH_IF: + delete info.switchif; + break; + case BITMAP: + delete info.bitmap; + break; + case CPGOTO: + delete info.cpgoto; + break; + case DOT: + delete info.dot; + break; + } } } // namespace re2c diff --git a/re2c/src/code/go_emit.cc b/re2c/src/code/go_emit.cc index d96bd7cb..e3c19622 100644 --- a/re2c/src/code/go_emit.cc +++ b/re2c/src/code/go_emit.cc @@ -23,227 +23,227 @@ static std::string output_hgo (OutputFile & o, uint32_t ind, const DFA &dfa, Swi void output_if (OutputFile & o, uint32_t ind, const std::string & compare, uint32_t value) { - o.wind(ind).ws("if (").wstring(o.block().opts->yych).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") "); + o.wind(ind).ws("if (").wstring(o.block().opts->yych).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") "); } std::string output_hgo (OutputFile & o, uint32_t ind, const DFA &dfa, SwitchIf * hgo) { - const opt_t *opts = o.block().opts; - std::string yych = opts->yych; - if (hgo != NULL) - { - o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n"); - hgo->emit (o, ind + 1, dfa); - o.wind(ind).ws("} else "); - yych = opts->yych; - } - else - { - o.wind(ind); - } - return yych; + const opt_t *opts = o.block().opts; + std::string yych = opts->yych; + if (hgo != NULL) + { + o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n"); + hgo->emit (o, ind + 1, dfa); + o.wind(ind).ws("} else "); + yych = opts->yych; + } + else + { + o.wind(ind); + } + return yych; } void Case::emit (OutputFile & o, uint32_t ind) const { - const opt_t *opts = o.block().opts; - for (uint32_t i = 0; i < ranges.size (); ++i) - { - for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b) - { - o.wind(ind).ws("case ").wc_hex (b).ws(":"); - if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC) - { - const uint32_t c = opts->encoding.decodeUnsafe (b); - if (is_print (c)) - o.ws(" /* ").wc(static_cast (c)).ws(" */"); - } - bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; - if (!last_case) - { - o.ws("\n"); - } - } - } + const opt_t *opts = o.block().opts; + for (uint32_t i = 0; i < ranges.size (); ++i) + { + for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b) + { + o.wind(ind).ws("case ").wc_hex (b).ws(":"); + if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC) + { + const uint32_t c = opts->encoding.decodeUnsafe (b); + if (is_print (c)) + o.ws(" /* ").wc(static_cast (c)).ws(" */"); + } + bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; + if (!last_case) + { + o.ws("\n"); + } + } + } } void Cases::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const { - o.wind(ind).ws("switch (").wstring(o.block().opts->yych).ws(") {\n"); + o.wind(ind).ws("switch (").wstring(o.block().opts->yych).ws(") {\n"); - for (uint32_t i = 1; i < cases_size; ++i) { - const Case &c = cases[i]; - c.emit(o, ind); - gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip); - } + for (uint32_t i = 1; i < cases_size; ++i) { + const Case &c = cases[i]; + c.emit(o, ind); + gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip); + } - // default case must be the last one - const Case &c = cases[0]; - o.wind(ind).ws("default:"); - gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip); + // default case must be the last one + const Case &c = cases[0]; + o.wind(ind).ws("default:"); + gen_goto_case(o, ind, c.to, dfa, c.tags, c.skip); - o.wind(ind).ws("}\n"); + o.wind(ind).ws("}\n"); } void Binary::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const { - output_if(o, ind, cond->compare, cond->value); - o.ws("{\n"); - thn->emit(o, ind + 1, dfa); - o.wind(ind).ws("} else {\n"); - els->emit(o, ind + 1, dfa); - o.wind(ind).ws("}\n"); + output_if(o, ind, cond->compare, cond->value); + o.ws("{\n"); + thn->emit(o, ind + 1, dfa); + o.wind(ind).ws("} else {\n"); + els->emit(o, ind + 1, dfa); + o.wind(ind).ws("}\n"); } void Linear::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const { - for (uint32_t i = 0; i < nbranches; ++i) { - const Branch &b = branches[i]; - const Cond *cond = b.cond; - if (cond) { - output_if(o, ind, cond->compare, cond->value); - gen_goto_if(o, ind, b.to, dfa, b.tags, b.skip); - } else { - gen_goto_plain(o, ind, b.to, dfa, b.tags, b.skip); - } - } + for (uint32_t i = 0; i < nbranches; ++i) { + const Branch &b = branches[i]; + const Cond *cond = b.cond; + if (cond) { + output_if(o, ind, cond->compare, cond->value); + gen_goto_if(o, ind, b.to, dfa, b.tags, b.skip); + } else { + gen_goto_plain(o, ind, b.to, dfa, b.tags, b.skip); + } + } } void If::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const { - switch (type) { - case BINARY: info.binary->emit(o, ind, dfa); break; - case LINEAR: info.linear->emit(o, ind, dfa); break; - } + switch (type) { + case BINARY: info.binary->emit(o, ind, dfa); break; + case LINEAR: info.linear->emit(o, ind, dfa); break; + } } void SwitchIf::emit(OutputFile &o, uint32_t ind, const DFA &dfa) const { - switch (type) { - case SWITCH: info.cases->emit(o, ind, dfa); break; - case IF: info.ifs->emit(o, ind, dfa); break; - } + switch (type) { + case SWITCH: info.cases->emit(o, ind, dfa); break; + case IF: info.ifs->emit(o, ind, dfa); break; + } } void GoBitmap::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const { - const opt_t *opts = o.block().opts; - std::string yych = output_hgo (o, ind, dfa, hgo); - o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & "); - if (opts->yybmHexTable) - { - o.wu32_hex(bitmap->m); - } - else - { - o.wu32(bitmap->m); - } - o.ws(") {\n"); - gen_goto_plain(o, ind + 1, bitmap_state, dfa, TCID0, false); - o.wind(ind).ws("}\n"); - if (lgo != NULL) - { - lgo->emit (o, ind, dfa); - } + const opt_t *opts = o.block().opts; + std::string yych = output_hgo (o, ind, dfa, hgo); + o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & "); + if (opts->yybmHexTable) + { + o.wu32_hex(bitmap->m); + } + else + { + o.wu32(bitmap->m); + } + o.ws(") {\n"); + gen_goto_plain(o, ind + 1, bitmap_state, dfa, TCID0, false); + o.wind(ind).ws("}\n"); + if (lgo != NULL) + { + lgo->emit (o, ind, dfa); + } } label_t CpgotoTable::max_label () const { - label_t max = label_t::first (); - for (uint32_t i = 0; i < TABLE_SIZE; ++i) - { - if (max < table[i]->label) - { - max = table[i]->label; - }; - } - return max; + label_t max = label_t::first (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + if (max < table[i]->label) + { + max = table[i]->label; + }; + } + return max; } void CpgotoTable::emit (OutputFile & o, uint32_t ind) const { - const opt_t *opts = o.block().opts; - o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n"); - o.wind(++ind); - const uint32_t max_digits = max_label ().width (); - for (uint32_t i = 0; i < TABLE_SIZE; ++i) - { - o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label); - if (i == TABLE_SIZE - 1) - { - o.ws("\n"); - } - else if (i % 8 == 7) - { - o.ws(",\n").wind(ind); - } - else - { - const uint32_t padding = max_digits - table[i]->label.width () + 1; - o.ws(",").wstring(std::string (padding, ' ')); - } - } - o.wind(--ind).ws("};\n"); + const opt_t *opts = o.block().opts; + o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n"); + o.wind(++ind); + const uint32_t max_digits = max_label ().width (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label); + if (i == TABLE_SIZE - 1) + { + o.ws("\n"); + } + else if (i % 8 == 7) + { + o.ws(",\n").wind(ind); + } + else + { + const uint32_t padding = max_digits - table[i]->label.width () + 1; + o.ws(",").wstring(std::string (padding, ' ')); + } + } + o.wind(--ind).ws("};\n"); } void Cpgoto::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const { - std::string yych = output_hgo (o, ind, dfa, hgo); - o.ws("{\n"); - table->emit (o, ++ind); - o.wind(ind).ws("goto *").wstring(o.block().opts->yytarget).ws("[").wstring(yych).ws("];\n"); - o.wind(--ind).ws("}\n"); + std::string yych = output_hgo (o, ind, dfa, hgo); + o.ws("{\n"); + table->emit (o, ++ind); + o.wind(ind).ws("goto *").wstring(o.block().opts->yytarget).ws("[").wstring(yych).ws("];\n"); + o.wind(--ind).ws("}\n"); } void Dot::emit(OutputFile &o, const DFA &dfa) const { - const std::string &prefix = o.block().opts->tags_prefix; - const uint32_t n = cases->cases_size; - if (n == 1) { - o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n"); - } else { - for (uint32_t i = 0; i < n; ++i) { - const Case &c = cases->cases[i]; - o.wlabel(from->label).ws(" -> ").wlabel(c.to->label).ws(" [label=\""); - for (uint32_t j = 0; j < c.ranges.size(); ++j) { - o.wrange(c.ranges[j].first, c.ranges[j].second); - } - const tcmd_t *cmd = dfa.tcpool[c.tags]; - for (const tcmd_t *p = cmd; p; p = p->next) { - o.ws("<").wstring(vartag_name(p->lhs, prefix)); - if (tcmd_t::iscopy(p)) { - o.ws("~").wstring(vartag_name(p->rhs, prefix)); - } - o.ws(">"); - } - o.ws("\"]\n"); - } - } + const std::string &prefix = o.block().opts->tags_prefix; + const uint32_t n = cases->cases_size; + if (n == 1) { + o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n"); + } else { + for (uint32_t i = 0; i < n; ++i) { + const Case &c = cases->cases[i]; + o.wlabel(from->label).ws(" -> ").wlabel(c.to->label).ws(" [label=\""); + for (uint32_t j = 0; j < c.ranges.size(); ++j) { + o.wrange(c.ranges[j].first, c.ranges[j].second); + } + const tcmd_t *cmd = dfa.tcpool[c.tags]; + for (const tcmd_t *p = cmd; p; p = p->next) { + o.ws("<").wstring(vartag_name(p->lhs, prefix)); + if (tcmd_t::iscopy(p)) { + o.ws("~").wstring(vartag_name(p->rhs, prefix)); + } + o.ws(">"); + } + o.ws("\"]\n"); + } + } } void Go::emit (OutputFile & o, uint32_t ind, const DFA &dfa) const { - if (type == DOT) { - info.dot->emit (o, dfa); - return; - } - - const bool lookahead = o.block().opts->lookahead; - o.wdelay_skip(ind, skip && !lookahead); - code_lines_t code; - gen_settags(code, dfa, tags, o.block().opts); - for (size_t i = 0; i < code.size(); ++i) { - o.wind(ind).wstring(code[i]); - } - o.wdelay_skip(ind, skip && lookahead); - - if (type == SWITCH_IF) { - info.switchif->emit (o, ind, dfa); - } else if (type == BITMAP) { - info.bitmap->emit (o, ind, dfa); - } else if (type == CPGOTO) { - info.cpgoto->emit (o, ind, dfa); - } + if (type == DOT) { + info.dot->emit (o, dfa); + return; + } + + const bool lookahead = o.block().opts->lookahead; + o.wdelay_skip(ind, skip && !lookahead); + code_lines_t code; + gen_settags(code, dfa, tags, o.block().opts); + for (size_t i = 0; i < code.size(); ++i) { + o.wind(ind).wstring(code[i]); + } + o.wdelay_skip(ind, skip && lookahead); + + if (type == SWITCH_IF) { + info.switchif->emit (o, ind, dfa); + } else if (type == BITMAP) { + info.bitmap->emit (o, ind, dfa); + } else if (type == CPGOTO) { + info.cpgoto->emit (o, ind, dfa); + } } } // namespace re2c diff --git a/re2c/src/code/go_used_labels.cc b/re2c/src/code/go_used_labels.cc index 5ce70d94..391bc2a6 100644 --- a/re2c/src/code/go_used_labels.cc +++ b/re2c/src/code/go_used_labels.cc @@ -12,101 +12,101 @@ class label_t; void Cases::used_labels (std::set & used) const { - for (uint32_t i = 0; i < cases_size; ++i) - { - used.insert (cases[i].to->label); - } + for (uint32_t i = 0; i < cases_size; ++i) + { + used.insert (cases[i].to->label); + } } void Binary::used_labels (std::set & used) const { - thn->used_labels (used); - els->used_labels (used); + thn->used_labels (used); + els->used_labels (used); } void Linear::used_labels (std::set & used) const { - for (uint32_t i = 0; i < nbranches; ++i) { - const State *to = branches[i].to; - if (to) { - used.insert(to->label); - } - } + for (uint32_t i = 0; i < nbranches; ++i) { + const State *to = branches[i].to; + if (to) { + used.insert(to->label); + } + } } void If::used_labels (std::set & used) const { - switch (type) - { - case BINARY: - info.binary->used_labels (used); - break; - case LINEAR: - info.linear->used_labels (used); - break; - } + switch (type) + { + case BINARY: + info.binary->used_labels (used); + break; + case LINEAR: + info.linear->used_labels (used); + break; + } } void SwitchIf::used_labels (std::set & used) const { - switch (type) - { - case SWITCH: - info.cases->used_labels (used); - break; - case IF: - info.ifs->used_labels (used); - break; - } + switch (type) + { + case SWITCH: + info.cases->used_labels (used); + break; + case IF: + info.ifs->used_labels (used); + break; + } } void GoBitmap::used_labels (std::set & used) const { - if (hgo != NULL) - { - hgo->used_labels (used); - } - used.insert (bitmap_state->label); - if (lgo != NULL) - { - lgo->used_labels (used); - } + if (hgo != NULL) + { + hgo->used_labels (used); + } + used.insert (bitmap_state->label); + if (lgo != NULL) + { + lgo->used_labels (used); + } } void CpgotoTable::used_labels (std::set & used) const { - for (uint32_t i = 0; i < TABLE_SIZE; ++i) - { - used.insert (table[i]->label); - } + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + used.insert (table[i]->label); + } } void Cpgoto::used_labels (std::set & used) const { - if (hgo != NULL) - { - hgo->used_labels (used); - } - table->used_labels (used); + if (hgo != NULL) + { + hgo->used_labels (used); + } + table->used_labels (used); } void Go::used_labels (std::set & used) const { - switch (type) - { - case EMPTY: - case DOT: - break; - case SWITCH_IF: - info.switchif->used_labels (used); - break; - case BITMAP: - info.bitmap->used_labels (used); - break; - case CPGOTO: - info.cpgoto->used_labels (used); - break; - } + switch (type) + { + case EMPTY: + case DOT: + break; + case SWITCH_IF: + info.switchif->used_labels (used); + break; + case BITMAP: + info.bitmap->used_labels (used); + break; + case CPGOTO: + info.cpgoto->used_labels (used); + break; + } } } // namespace re2c diff --git a/re2c/src/code/input_api.cc b/re2c/src/code/input_api.cc index 3b91ec2b..77e67d1a 100644 --- a/re2c/src/code/input_api.cc +++ b/re2c/src/code/input_api.cc @@ -10,122 +10,122 @@ namespace re2c std::string output_expr_peek(const opt_t *opts) { - return opts->input_api == INPUT_DEFAULT - ? "*" + opts->yycursor - : opts->yypeek + " ()"; + return opts->input_api == INPUT_DEFAULT + ? "*" + opts->yycursor + : opts->yypeek + " ()"; } std::string output_restore(uint32_t ind, const opt_t *opts) { - std::string s = opts->input_api == INPUT_DEFAULT - ? opts->yycursor + " = " + opts->yymarker - : opts->yyrestore + " ()"; - return indent(ind, opts->indString) + s + ";\n"; + std::string s = opts->input_api == INPUT_DEFAULT + ? opts->yycursor + " = " + opts->yymarker + : opts->yyrestore + " ()"; + return indent(ind, opts->indString) + s + ";\n"; } std::string output_expr_lessthan(size_t n, const opt_t *opts) { - std::ostringstream s; - if (opts->input_api == INPUT_CUSTOM) { - s << opts->yylessthan << " (" << n << ")"; - } else if (n == 1) { - s << opts->yylimit << " <= " << opts->yycursor; - } else { - s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n; - } - return s.str (); + std::ostringstream s; + if (opts->input_api == INPUT_CUSTOM) { + s << opts->yylessthan << " (" << n << ")"; + } else if (n == 1) { + s << opts->yylimit << " <= " << opts->yycursor; + } else { + s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n; + } + return s.str (); } static std::string yych_conv(const opt_t *opts) { - return opts->yychConversion - ? "(" + opts->yyctype + ")" - : ""; + return opts->yychConversion + ? "(" + opts->yyctype + ")" + : ""; } void output_peek(std::ostream &o, uint32_t ind, const opt_t *opts) { - o << indent(ind, opts->indString) << opts->yych << " = " << yych_conv(opts); - if (opts->input_api == INPUT_CUSTOM) { - o << opts->yypeek << " ()"; - } else { - o << "*" << opts->yycursor; - } - o << ";\n"; + o << indent(ind, opts->indString) << opts->yych << " = " << yych_conv(opts); + if (opts->input_api == INPUT_CUSTOM) { + o << opts->yypeek << " ()"; + } else { + o << "*" << opts->yycursor; + } + o << ";\n"; } void output_skip(std::ostream &o, uint32_t ind, const opt_t *opts) { - o << indent(ind, opts->indString); - if (opts->input_api == INPUT_CUSTOM) { - o << opts->yyskip << " ()"; - } else { - o << "++" << opts->yycursor; - } - o << ";\n"; + o << indent(ind, opts->indString); + if (opts->input_api == INPUT_CUSTOM) { + o << opts->yyskip << " ()"; + } else { + o << "++" << opts->yycursor; + } + o << ";\n"; } void output_backup(std::ostream &o, uint32_t ind, const opt_t *opts) { - o << indent(ind, opts->indString); - if (opts->input_api == INPUT_CUSTOM) { - o << opts->yybackup << " ()"; - } else { - o << opts->yymarker << " = " << opts->yycursor; - } - o << ";\n"; + o << indent(ind, opts->indString); + if (opts->input_api == INPUT_CUSTOM) { + o << opts->yybackup << " ()"; + } else { + o << opts->yymarker << " = " << opts->yycursor; + } + o << ";\n"; } void output_skip_peek(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yych << " = " - << yych_conv(opts) << "*++" << opts->yycursor << ";\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yych << " = " + << yych_conv(opts) << "*++" << opts->yycursor << ";\n"; } void output_peek_skip(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yych << " = " - << yych_conv(opts) << "*" << opts->yycursor << "++;\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yych << " = " + << yych_conv(opts) << "*" << opts->yycursor << "++;\n"; } void output_skip_backup(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yymarker << " = ++" - << opts->yycursor << ";\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yymarker << " = ++" + << opts->yycursor << ";\n"; } void output_backup_skip(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yymarker << " = " - << opts->yycursor << "++;\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yymarker << " = " + << opts->yycursor << "++;\n"; } void output_backup_peek(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yych << " = " - << yych_conv(opts) << "*(" << opts->yymarker << " = " - << opts->yycursor << ");\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yych << " = " + << yych_conv(opts) << "*(" << opts->yymarker << " = " + << opts->yycursor << ");\n"; } void output_skip_backup_peek(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yych << " = " - << yych_conv(opts) << "*(" << opts->yymarker << " = ++" - << opts->yycursor << ");\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yych << " = " + << yych_conv(opts) << "*(" << opts->yymarker << " = ++" + << opts->yycursor << ");\n"; } void output_backup_peek_skip(std::ostream &o, uint32_t ind, const opt_t *opts) { - assert(opts->input_api == INPUT_DEFAULT); - o << indent(ind, opts->indString) << opts->yych << " = " - << yych_conv(opts) << "*(" << opts->yymarker << " = " - << opts->yycursor << "++);\n"; + assert(opts->input_api == INPUT_DEFAULT); + o << indent(ind, opts->indString) << opts->yych << " = " + << yych_conv(opts) << "*(" << opts->yymarker << " = " + << opts->yycursor << "++);\n"; } } // end namespace re2c diff --git a/re2c/src/code/input_api.h b/re2c/src/code/input_api.h index abb6289c..5c92a45d 100644 --- a/re2c/src/code/input_api.h +++ b/re2c/src/code/input_api.h @@ -13,8 +13,8 @@ struct opt_t; enum input_api_t { - INPUT_DEFAULT, - INPUT_CUSTOM + INPUT_DEFAULT, + INPUT_CUSTOM }; std::string output_expr_peek (const opt_t *opts); diff --git a/re2c/src/code/label.cc b/re2c/src/code/label.cc index 12e1ee24..96a54c65 100644 --- a/re2c/src/code/label.cc +++ b/re2c/src/code/label.cc @@ -7,36 +7,36 @@ namespace re2c { const uint32_t label_t::FIRST = 0; label_t::label_t () - : value (FIRST) + : value (FIRST) {} void label_t::inc () { - ++value; + ++value; } label_t label_t::first () { - return label_t (); + return label_t (); } bool label_t::operator < (const label_t & l) const { - return value < l.value; + return value < l.value; } uint32_t label_t::width () const { - uint32_t v = value; - uint32_t n = 0; - while (v /= 10) ++n; - return n; + uint32_t v = value; + uint32_t n = 0; + while (v /= 10) ++n; + return n; } std::ostream & operator << (std::ostream & o, label_t l) { - o << l.value; - return o; + o << l.value; + return o; } } // namespace re2c diff --git a/re2c/src/code/label.h b/re2c/src/code/label.h index bf29628a..0eecf0cf 100644 --- a/re2c/src/code/label.h +++ b/re2c/src/code/label.h @@ -19,18 +19,18 @@ template class counter_t; // - get next label class label_t { - static const uint32_t FIRST; - uint32_t value; - label_t (); - void inc (); + static const uint32_t FIRST; + uint32_t value; + label_t (); + void inc (); public: - static label_t first (); - bool operator < (const label_t & l) const; - uint32_t width () const; - friend std::ostream & operator << (std::ostream & o, label_t l); + static label_t first (); + bool operator < (const label_t & l) const; + uint32_t width () const; + friend std::ostream & operator << (std::ostream & o, label_t l); - friend class counter_t; + friend class counter_t; }; } // namespace re2c diff --git a/re2c/src/code/output.cc b/re2c/src/code/output.cc index fc0fff49..af2798e6 100644 --- a/re2c/src/code/output.cc +++ b/re2c/src/code/output.cc @@ -18,693 +18,693 @@ namespace re2c { OutputFragment::OutputFragment (type_t t, uint32_t i) - : type (t) - , stream () - , indent (i) + : type (t) + , stream () + , indent (i) {} OutputFragment::~OutputFragment() { - if (type == STAGS || type == MTAGS) { - delete tags; - } else if (type == LINE_INFO_INPUT) { - delete line_info; - } + if (type == STAGS || type == MTAGS) { + delete tags; + } else if (type == LINE_INFO_INPUT) { + delete line_info; + } } uint32_t OutputFragment::count_lines () const { - uint32_t lines = 0; - const std::string content = stream.str (); - const char * p = content.c_str (); - for (uint32_t i = 0; i < content.size (); ++i) - { - if (p[i] == '\n') - { - ++lines; - } - } - return lines; + uint32_t lines = 0; + const std::string content = stream.str (); + const char * p = content.c_str (); + for (uint32_t i = 0; i < content.size (); ++i) + { + if (p[i] == '\n') + { + ++lines; + } + } + return lines; } OutputBlock::OutputBlock () - : fragments () - , used_yyaccept (false) - , have_user_code (false) - , line (0) - , types () - , stags () - , mtags () - , opts(NULL) + : fragments () + , used_yyaccept (false) + , have_user_code (false) + , line (0) + , types () + , stags () + , mtags () + , opts(NULL) { - fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); + fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); } OutputBlock::~OutputBlock () { - for (unsigned int i = 0; i < fragments.size (); ++i) - { - delete fragments[i]; - } - delete opts; + for (unsigned int i = 0; i < fragments.size (); ++i) + { + delete fragments[i]; + } + delete opts; } OutputFile::OutputFile(Warn &w) - : blocks () - , label_counter () - , fill_index(0) - , state_goto(false) - , cond_goto(false) - , warn_condition_order(true) - , warn(w) + : blocks () + , label_counter () + , fill_index(0) + , state_goto(false) + , cond_goto(false) + , warn_condition_order(true) + , warn(w) {} OutputFile::~OutputFile () { - for (unsigned int i = 0; i < blocks.size(); ++i) { - delete blocks[i]; - } + for (unsigned int i = 0; i < blocks.size(); ++i) { + delete blocks[i]; + } } OutputBlock& OutputFile::block() { - return *blocks.back(); + return *blocks.back(); } std::ostream & OutputFile::stream () { - return block().fragments.back ()->stream; + return block().fragments.back ()->stream; } OutputFile &OutputFile::wraw(const char *s, const char *e) { - if (s != e && block().opts->target == TARGET_CODE) { - insert_code(); - - // scan for non-whitespace characters - bool &code = block().have_user_code; - for (const char *p = s; !code && p < e; ++p) { - code = !isspace(*p); - } - - // convert CR LF to LF - std::ostream &o = stream(); - for (const char *p = s;; ++p) { - std::streamsize l = p - s; - if (p == e) { - o.write(s, l); - break; - } else if (*p == '\n') { - if (p > s && p[-1] == '\r') --l; - o.write(s, l); - s = p; - } - } - } - return *this; + if (s != e && block().opts->target == TARGET_CODE) { + insert_code(); + + // scan for non-whitespace characters + bool &code = block().have_user_code; + for (const char *p = s; !code && p < e; ++p) { + code = !isspace(*p); + } + + // convert CR LF to LF + std::ostream &o = stream(); + for (const char *p = s;; ++p) { + std::streamsize l = p - s; + if (p == e) { + o.write(s, l); + break; + } else if (*p == '\n') { + if (p > s && p[-1] == '\r') --l; + o.write(s, l); + s = p; + } + } + } + return *this; } OutputFile & OutputFile::wu32_hex (uint32_t n) { - insert_code(); - prtHex(stream(), n, block().opts->encoding.szCodeUnit()); - return *this; + insert_code(); + prtHex(stream(), n, block().opts->encoding.szCodeUnit()); + return *this; } OutputFile & OutputFile::wc_hex (uint32_t n) { - insert_code(); - const opt_t *opts = block().opts; - const Enc &e = opts->encoding; - prtChOrHex(stream(), n, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT); - return *this; + insert_code(); + const opt_t *opts = block().opts; + const Enc &e = opts->encoding; + prtChOrHex(stream(), n, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT); + return *this; } OutputFile & OutputFile::wrange (uint32_t l, uint32_t u) { - insert_code(); - const opt_t *opts = block().opts; - const Enc &e = opts->encoding; - printSpan(stream(), l, u, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT); - return *this; + insert_code(); + const opt_t *opts = block().opts; + const Enc &e = opts->encoding; + printSpan(stream(), l, u, e.szCodeUnit(), e.type() == Enc::EBCDIC, opts->target == TARGET_DOT); + return *this; } OutputFile & OutputFile::wu32_width (uint32_t n, int w) { - insert_code(); - stream () << std::setw (w); - stream () << n; - return *this; + insert_code(); + stream () << std::setw (w); + stream () << n; + return *this; } OutputFile & OutputFile::wversion_time () { - insert_code(); - output_version_time(stream(), block().opts->version, !block().opts->bNoGenerationDate); - return *this; + insert_code(); + output_version_time(stream(), block().opts->version, !block().opts->bNoGenerationDate); + return *this; } OutputFile & OutputFile::wuser_start_label () { - insert_code(); - const std::string label = block().opts->startlabel; - if (!label.empty()) - { - wstring(label).ws(":\n"); - } - return *this; + insert_code(); + const std::string label = block().opts->startlabel; + if (!label.empty()) + { + wstring(label).ws(":\n"); + } + return *this; } OutputFile & OutputFile::wc (char c) { - insert_code(); - stream () << c; - return *this; + insert_code(); + stream () << c; + return *this; } OutputFile & OutputFile::wu32 (uint32_t n) { - insert_code(); - stream () << n; - return *this; + insert_code(); + stream () << n; + return *this; } OutputFile & OutputFile::wu64 (uint64_t n) { - insert_code(); - stream () << n; - return *this; + insert_code(); + stream () << n; + return *this; } OutputFile & OutputFile::wstring (const std::string & s) { - insert_code(); - stream () << s; - return *this; + insert_code(); + stream () << s; + return *this; } OutputFile & OutputFile::ws (const char * s) { - insert_code(); - stream () << s; - return *this; + insert_code(); + stream () << s; + return *this; } OutputFile & OutputFile::wlabel (label_t l) { - insert_code(); - stream () << l; - return *this; + insert_code(); + stream () << l; + return *this; } OutputFile & OutputFile::wind (uint32_t ind) { - insert_code(); - stream () << indent(ind, block().opts->indString); - return *this; + insert_code(); + stream () << indent(ind, block().opts->indString); + return *this; } void OutputFile::insert_code () { - if (block().fragments.back()->type != OutputFragment::CODE) { - block().fragments.push_back(new OutputFragment(OutputFragment::CODE, 0)); - } + if (block().fragments.back()->type != OutputFragment::CODE) { + block().fragments.push_back(new OutputFragment(OutputFragment::CODE, 0)); + } } OutputFile &OutputFile::wdelay_tags(const ConfTags *cf, bool mtags) { - if (block().opts->target == TARGET_CODE) { - OutputFragment *frag = new OutputFragment( - mtags ? OutputFragment::MTAGS : OutputFragment::STAGS, 0); - frag->tags = cf; - blocks.back()->fragments.push_back(frag); - } - return *this; + if (block().opts->target == TARGET_CODE) { + OutputFragment *frag = new OutputFragment( + mtags ? OutputFragment::MTAGS : OutputFragment::STAGS, 0); + frag->tags = cf; + blocks.back()->fragments.push_back(frag); + } + return *this; } OutputFile & OutputFile::wdelay_line_info_input (uint32_t l, const std::string &fn) { - OutputFragment *frag = new OutputFragment(OutputFragment::LINE_INFO_INPUT, 0); - frag->line_info = new LineInfo(l, fn); - blocks.back()->fragments.push_back(frag); - return *this; + OutputFragment *frag = new OutputFragment(OutputFragment::LINE_INFO_INPUT, 0); + frag->line_info = new LineInfo(l, fn); + blocks.back()->fragments.push_back(frag); + return *this; } OutputFile & OutputFile::wdelay_line_info_output () { - block().fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO_OUTPUT, 0)); - return *this; + block().fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO_OUTPUT, 0)); + return *this; } OutputFile & OutputFile::wdelay_cond_goto(uint32_t ind) { - if (block().opts->cFlag && !cond_goto) { - block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind)); - cond_goto = true; - } - return *this; + if (block().opts->cFlag && !cond_goto) { + block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind)); + cond_goto = true; + } + return *this; } OutputFile & OutputFile::wdelay_cond_table(uint32_t ind) { - if (block().opts->gFlag && block().opts->cFlag && !cond_goto) { - block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind)); - } - return *this; + if (block().opts->gFlag && block().opts->cFlag && !cond_goto) { + block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind)); + } + return *this; } OutputFile & OutputFile::wdelay_state_goto (uint32_t ind) { - if (block().opts->target == TARGET_CODE - && block().opts->fFlag && !state_goto) { - block().fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind)); - state_goto = true; - } - return *this; + if (block().opts->target == TARGET_CODE + && block().opts->fFlag && !state_goto) { + block().fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind)); + state_goto = true; + } + return *this; } OutputFile & OutputFile::wdelay_types () { - if (block().opts->target == TARGET_CODE) { - warn_condition_order = false; // see note [condition order] - block().fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0)); - } - return *this; + if (block().opts->target == TARGET_CODE) { + warn_condition_order = false; // see note [condition order] + block().fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0)); + } + return *this; } OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind) { - block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind)); - return *this; + block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind)); + return *this; } OutputFile & OutputFile::wdelay_yymaxfill () { - if (block().opts->target == TARGET_CODE) { - block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0)); - } - return *this; + if (block().opts->target == TARGET_CODE) { + block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0)); + } + return *this; } OutputFile& OutputFile::wdelay_yymaxnmatch() { - if (block().opts->target == TARGET_CODE - && block().opts->posix_captures) { - block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXNMATCH, 0)); - } - return *this; + if (block().opts->target == TARGET_CODE + && block().opts->posix_captures) { + block().fragments.push_back (new OutputFragment (OutputFragment::YYMAXNMATCH, 0)); + } + return *this; } OutputFile& OutputFile::wdelay_skip(uint32_t ind, bool skip) { - if (skip) { - OutputFragment *f = new OutputFragment(OutputFragment::SKIP, ind); - block().fragments.push_back(f); - } - return *this; + if (skip) { + OutputFragment *f = new OutputFragment(OutputFragment::SKIP, ind); + block().fragments.push_back(f); + } + return *this; } OutputFile& OutputFile::wdelay_peek(uint32_t ind, bool peek) { - if (peek) { - OutputFragment *f = new OutputFragment(OutputFragment::PEEK, ind); - block().fragments.push_back(f); - } - return *this; + if (peek) { + OutputFragment *f = new OutputFragment(OutputFragment::PEEK, ind); + block().fragments.push_back(f); + } + return *this; } OutputFile& OutputFile::wdelay_backup(uint32_t ind, bool backup) { - if (backup) { - OutputFragment *f = new OutputFragment(OutputFragment::BACKUP, ind); - block().fragments.push_back(f); - } - return *this; + if (backup) { + OutputFragment *f = new OutputFragment(OutputFragment::BACKUP, ind); + block().fragments.push_back(f); + } + return *this; } void OutputFile::new_block(Opt &opts) { - OutputBlock *b = new OutputBlock; - b->opts = opts.snapshot(); - blocks.push_back(b); + OutputBlock *b = new OutputBlock; + b->opts = opts.snapshot(); + blocks.push_back(b); - // start label hapens to be the only option - // that must be reset for each new block - opts.reset_startlabel(); + // start label hapens to be the only option + // that must be reset for each new block + opts.reset_startlabel(); } void OutputFile::fix_first_block_opts() { - // If the initial block contains only whitespace and no user code, - // then re2c options specified in the first re2c block are also - // applied to the initial block. - if (blocks.size() >= 2) { - OutputBlock - *fst = blocks[0], - *snd = blocks[1]; - if (!fst->have_user_code) { - *const_cast(fst->opts) = *snd->opts; - } - } + // If the initial block contains only whitespace and no user code, + // then re2c options specified in the first re2c block are also + // applied to the initial block. + if (blocks.size() >= 2) { + OutputBlock + *fst = blocks[0], + *snd = blocks[1]; + if (!fst->have_user_code) { + *const_cast(fst->opts) = *snd->opts; + } + } } void OutputFile::global_lists(uniq_vector_t &types, - std::set &stags, std::set &mtags) const + std::set &stags, std::set &mtags) const { - for (unsigned int i = 0; i < blocks.size(); ++i) { - const std::vector &cs = blocks[i]->types; - for (size_t j = 0; j < cs.size(); ++j) { - types.find_or_add(cs[j]); - } + for (unsigned int i = 0; i < blocks.size(); ++i) { + const std::vector &cs = blocks[i]->types; + for (size_t j = 0; j < cs.size(); ++j) { + types.find_or_add(cs[j]); + } - const std::set - &st = blocks[i]->stags, - &mt = blocks[i]->mtags; - stags.insert(st.begin(), st.end()); - mtags.insert(mt.begin(), mt.end()); - } + const std::set + &st = blocks[i]->stags, + &mt = blocks[i]->mtags; + stags.insert(st.begin(), st.end()); + mtags.insert(mt.begin(), mt.end()); + } } static void foldexpr(std::vector &frags) { - const size_t n = frags.size(); - for (size_t i = 0; i < n;) { - - if (i + 2 < n) { - OutputFragment::type_t - &x = frags[i]->type, - &y = frags[i + 1]->type, - &z = frags[i + 2]->type; - if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK && z == OutputFragment::SKIP) { - x = OutputFragment::BACKUP_PEEK_SKIP; - y = z = OutputFragment::EMPTY; - i += 3; - continue; - } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP && z == OutputFragment::PEEK) { - x = OutputFragment::SKIP_BACKUP_PEEK; - y = z = OutputFragment::EMPTY; - i += 3; - continue; - } - } - - if (i + 1 < n) { - OutputFragment::type_t - &x = frags[i]->type, - &y = frags[i + 1]->type; - if (x == OutputFragment::PEEK && y == OutputFragment::SKIP) { - x = OutputFragment::PEEK_SKIP; - y = OutputFragment::EMPTY; - i += 2; - continue; - } else if (x == OutputFragment::SKIP && y == OutputFragment::PEEK) { - x = OutputFragment::SKIP_PEEK; - y = OutputFragment::EMPTY; - i += 2; - continue; - } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP) { - x = OutputFragment::SKIP_BACKUP; - y = OutputFragment::EMPTY; - i += 2; - continue; - } else if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK) { - x = OutputFragment::BACKUP_PEEK; - y = OutputFragment::EMPTY; - i += 2; - continue; - } else if (x == OutputFragment::BACKUP && y == OutputFragment::SKIP) { - x = OutputFragment::BACKUP_SKIP; - y = OutputFragment::EMPTY; - i += 2; - continue; - } - } - - ++i; - } + const size_t n = frags.size(); + for (size_t i = 0; i < n;) { + + if (i + 2 < n) { + OutputFragment::type_t + &x = frags[i]->type, + &y = frags[i + 1]->type, + &z = frags[i + 2]->type; + if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK && z == OutputFragment::SKIP) { + x = OutputFragment::BACKUP_PEEK_SKIP; + y = z = OutputFragment::EMPTY; + i += 3; + continue; + } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP && z == OutputFragment::PEEK) { + x = OutputFragment::SKIP_BACKUP_PEEK; + y = z = OutputFragment::EMPTY; + i += 3; + continue; + } + } + + if (i + 1 < n) { + OutputFragment::type_t + &x = frags[i]->type, + &y = frags[i + 1]->type; + if (x == OutputFragment::PEEK && y == OutputFragment::SKIP) { + x = OutputFragment::PEEK_SKIP; + y = OutputFragment::EMPTY; + i += 2; + continue; + } else if (x == OutputFragment::SKIP && y == OutputFragment::PEEK) { + x = OutputFragment::SKIP_PEEK; + y = OutputFragment::EMPTY; + i += 2; + continue; + } else if (x == OutputFragment::SKIP && y == OutputFragment::BACKUP) { + x = OutputFragment::SKIP_BACKUP; + y = OutputFragment::EMPTY; + i += 2; + continue; + } else if (x == OutputFragment::BACKUP && y == OutputFragment::PEEK) { + x = OutputFragment::BACKUP_PEEK; + y = OutputFragment::EMPTY; + i += 2; + continue; + } else if (x == OutputFragment::BACKUP && y == OutputFragment::SKIP) { + x = OutputFragment::BACKUP_SKIP; + y = OutputFragment::EMPTY; + i += 2; + continue; + } + } + + ++i; + } } bool OutputFile::emit(const uniq_vector_t &global_types, - const std::set &global_stags, - const std::set &global_mtags, - size_t max_fill, size_t max_nmatch) -{ - FILE *file = NULL; - std::string filename = block().opts->output_file; - if (filename.empty()) { - filename = ""; - file = stdout; - } else { - file = fopen(filename.c_str(), "w"); - if (!file) { - error("cannot open output file: %s", filename.c_str()); - return false; - } - } - - fix_first_block_opts(); - - unsigned int line_count = 1; - for (unsigned int j = 0; j < blocks.size(); ++j) { - OutputBlock & b = * blocks[j]; - const opt_t *bopt = b.opts; - - if (bopt->input_api == INPUT_DEFAULT) { - foldexpr(b.fragments); - } - - const size_t n = b.fragments.size(); - for (size_t i = 0; i < n; ++i) { - OutputFragment & f = * b.fragments[i]; - std::ostringstream &o = f.stream; - const uint32_t ind = f.indent ? f.indent : bopt->topIndent; - - switch (f.type) { - case OutputFragment::EMPTY: - case OutputFragment::CODE: break; - case OutputFragment::LINE_INFO_INPUT: - output_line_info(o, f.line_info->line, f.line_info->filename, bopt->iFlag); - break; - case OutputFragment::LINE_INFO_OUTPUT: - output_line_info(o, line_count + 1, filename, bopt->iFlag); - break; - case OutputFragment::COND_GOTO: - output_cond_goto(o, ind, b.types, - bopt, warn, warn_condition_order, b.line); - break; - case OutputFragment::COND_TABLE: - output_cond_table(o, ind, b.types, bopt); - break; - case OutputFragment::STATE_GOTO: - output_state_goto(o, ind, 0, fill_index, bopt); - break; - case OutputFragment::STAGS: - output_tags(o, ind, *f.tags, global_stags, bopt); - break; - case OutputFragment::MTAGS: - output_tags(o, ind, *f.tags, global_mtags, bopt); - break; - case OutputFragment::TYPES: - output_types(o, ind, block().opts, global_types); - break; - case OutputFragment::YYACCEPT_INIT: - output_yyaccept_init(o, ind, b.used_yyaccept, bopt); - break; - case OutputFragment::YYMAXFILL: - output_yymaxfill(o, ind, max_fill, bopt); - break; - case OutputFragment::YYMAXNMATCH: - output_yymaxnmatch(o, ind, max_nmatch, bopt); - break; - case OutputFragment::SKIP: - output_skip(o, ind, bopt); - break; - case OutputFragment::PEEK: - output_peek(o, ind, bopt); - break; - case OutputFragment::BACKUP: - output_backup(o, ind, bopt); - break; - case OutputFragment::PEEK_SKIP: - output_peek_skip(o, ind, bopt); - break; - case OutputFragment::SKIP_PEEK: - output_skip_peek(o, ind, bopt); - break; - case OutputFragment::SKIP_BACKUP: - output_skip_backup(o, ind, bopt); - break; - case OutputFragment::BACKUP_SKIP: - output_backup_skip(o, ind, bopt); - break; - case OutputFragment::BACKUP_PEEK: - output_backup_peek(o, ind, bopt); - break; - case OutputFragment::BACKUP_PEEK_SKIP: - output_backup_peek_skip(o, ind, bopt); - break; - case OutputFragment::SKIP_BACKUP_PEEK: - output_skip_backup_peek(o, ind, bopt); - break; - } - - std::string content = o.str(); - fwrite(content.c_str(), 1, content.size(), file); - line_count += f.count_lines(); - } - } - - fclose(file); - return true; + const std::set &global_stags, + const std::set &global_mtags, + size_t max_fill, size_t max_nmatch) +{ + FILE *file = NULL; + std::string filename = block().opts->output_file; + if (filename.empty()) { + filename = ""; + file = stdout; + } else { + file = fopen(filename.c_str(), "w"); + if (!file) { + error("cannot open output file: %s", filename.c_str()); + return false; + } + } + + fix_first_block_opts(); + + unsigned int line_count = 1; + for (unsigned int j = 0; j < blocks.size(); ++j) { + OutputBlock & b = * blocks[j]; + const opt_t *bopt = b.opts; + + if (bopt->input_api == INPUT_DEFAULT) { + foldexpr(b.fragments); + } + + const size_t n = b.fragments.size(); + for (size_t i = 0; i < n; ++i) { + OutputFragment & f = * b.fragments[i]; + std::ostringstream &o = f.stream; + const uint32_t ind = f.indent ? f.indent : bopt->topIndent; + + switch (f.type) { + case OutputFragment::EMPTY: + case OutputFragment::CODE: break; + case OutputFragment::LINE_INFO_INPUT: + output_line_info(o, f.line_info->line, f.line_info->filename, bopt->iFlag); + break; + case OutputFragment::LINE_INFO_OUTPUT: + output_line_info(o, line_count + 1, filename, bopt->iFlag); + break; + case OutputFragment::COND_GOTO: + output_cond_goto(o, ind, b.types, + bopt, warn, warn_condition_order, b.line); + break; + case OutputFragment::COND_TABLE: + output_cond_table(o, ind, b.types, bopt); + break; + case OutputFragment::STATE_GOTO: + output_state_goto(o, ind, 0, fill_index, bopt); + break; + case OutputFragment::STAGS: + output_tags(o, ind, *f.tags, global_stags, bopt); + break; + case OutputFragment::MTAGS: + output_tags(o, ind, *f.tags, global_mtags, bopt); + break; + case OutputFragment::TYPES: + output_types(o, ind, block().opts, global_types); + break; + case OutputFragment::YYACCEPT_INIT: + output_yyaccept_init(o, ind, b.used_yyaccept, bopt); + break; + case OutputFragment::YYMAXFILL: + output_yymaxfill(o, ind, max_fill, bopt); + break; + case OutputFragment::YYMAXNMATCH: + output_yymaxnmatch(o, ind, max_nmatch, bopt); + break; + case OutputFragment::SKIP: + output_skip(o, ind, bopt); + break; + case OutputFragment::PEEK: + output_peek(o, ind, bopt); + break; + case OutputFragment::BACKUP: + output_backup(o, ind, bopt); + break; + case OutputFragment::PEEK_SKIP: + output_peek_skip(o, ind, bopt); + break; + case OutputFragment::SKIP_PEEK: + output_skip_peek(o, ind, bopt); + break; + case OutputFragment::SKIP_BACKUP: + output_skip_backup(o, ind, bopt); + break; + case OutputFragment::BACKUP_SKIP: + output_backup_skip(o, ind, bopt); + break; + case OutputFragment::BACKUP_PEEK: + output_backup_peek(o, ind, bopt); + break; + case OutputFragment::BACKUP_PEEK_SKIP: + output_backup_peek_skip(o, ind, bopt); + break; + case OutputFragment::SKIP_BACKUP_PEEK: + output_skip_backup_peek(o, ind, bopt); + break; + } + + std::string content = o.str(); + fwrite(content.c_str(), 1, content.size(), file); + line_count += f.count_lines(); + } + } + + fclose(file); + return true; } bool HeaderFile::emit(const opt_t *opts, const uniq_vector_t &types) { - const std::string &filename = opts->header_file; - if (filename.empty()) return true; + const std::string &filename = opts->header_file; + if (filename.empty()) return true; - FILE *file = fopen(filename.c_str(), "w"); - if (!file) { - error("cannot open header file: %s", filename.c_str()); - return false; - } + FILE *file = fopen(filename.c_str(), "w"); + if (!file) { + error("cannot open header file: %s", filename.c_str()); + return false; + } - output_version_time(stream, opts->version, !opts->bNoGenerationDate); - output_line_info(stream, 3, filename, opts->iFlag); - stream << "\n"; - output_types(stream, 0, opts, types); + output_version_time(stream, opts->version, !opts->bNoGenerationDate); + output_line_info(stream, 3, filename, opts->iFlag); + stream << "\n"; + output_types(stream, 0, opts, types); - std::string content = stream.str(); - fwrite(content.c_str(), 1, content.size(), file); + std::string content = stream.str(); + fwrite(content.c_str(), 1, content.size(), file); - fclose(file); - return true; + fclose(file); + return true; } Output::Output(Warn &w) - : source(w) - , header() - , skeletons() - , max_fill(1) - , max_nmatch(1) + : source(w) + , header() + , skeletons() + , max_fill(1) + , max_nmatch(1) {} bool Output::emit() { - if (source.warn.error()) { - return false; - } + if (source.warn.error()) { + return false; + } - uniq_vector_t types; - std::set stags, mtags; - source.global_lists(types, stags, mtags); + uniq_vector_t types; + std::set stags, mtags; + source.global_lists(types, stags, mtags); - // global options are last block's options - const opt_t *opts = source.block().opts; + // global options are last block's options + const opt_t *opts = source.block().opts; - return source.emit(types, stags, mtags, max_fill, max_nmatch) - && header.emit(opts, types); + return source.emit(types, stags, mtags, max_fill, max_nmatch) + && header.emit(opts, types); } void output_tags(std::ostream &o, uint32_t ind, const ConfTags &conf, - const std::set &tags, const opt_t *opts) -{ - std::set::const_iterator - tag = tags.begin(), - end = tags.end(); - o << indent(ind, opts->indString); - for (;tag != end;) { - std::string fmt = conf.format; - strrreplace(fmt, "@@", *tag); - o << fmt; - if (++tag == end) { - break; - } - o << conf.separator; - } + const std::set &tags, const opt_t *opts) +{ + std::set::const_iterator + tag = tags.begin(), + end = tags.end(); + o << indent(ind, opts->indString); + for (;tag != end;) { + std::string fmt = conf.format; + strrreplace(fmt, "@@", *tag); + o << fmt; + if (++tag == end) { + break; + } + o << conf.separator; + } } void output_state_goto(std::ostream & o, uint32_t ind, - uint32_t start_label, uint32_t fill_index, const opt_t *opts) -{ - const std::string - indstr = indent(ind, opts->indString), - getstate = opts->state_get_naked - ? opts->state_get - : opts->state_get + "()"; - - o << indstr << "switch (" << getstate << ") {\n"; - if (opts->bUseStateAbort) - { - o << indstr << "default: abort();\n"; - o << indstr << "case -1: goto " << opts->labelPrefix << start_label << ";\n"; - } - else - { - o << indstr << "default: goto " << opts->labelPrefix << start_label << ";\n"; - } - for (uint32_t i = 0; i < fill_index; ++i) - { - o << indstr << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n"; - } - o << indstr << "}\n"; - if (opts->bUseStateNext) - { - o << opts->yynext << ":\n"; - } + uint32_t start_label, uint32_t fill_index, const opt_t *opts) +{ + const std::string + indstr = indent(ind, opts->indString), + getstate = opts->state_get_naked + ? opts->state_get + : opts->state_get + "()"; + + o << indstr << "switch (" << getstate << ") {\n"; + if (opts->bUseStateAbort) + { + o << indstr << "default: abort();\n"; + o << indstr << "case -1: goto " << opts->labelPrefix << start_label << ";\n"; + } + else + { + o << indstr << "default: goto " << opts->labelPrefix << start_label << ";\n"; + } + for (uint32_t i = 0; i < fill_index; ++i) + { + o << indstr << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n"; + } + o << indstr << "}\n"; + if (opts->bUseStateNext) + { + o << opts->yynext << ":\n"; + } } void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept, const opt_t *opts) { - if (used_yyaccept) - { - o << indent(ind, opts->indString) << "unsigned int " << opts->yyaccept << " = 0;\n"; - } + if (used_yyaccept) + { + o << indent(ind, opts->indString) << "unsigned int " << opts->yyaccept << " = 0;\n"; + } } void output_yymaxfill(std::ostream &o, uint32_t ind, - size_t max_fill, const opt_t *opts) + size_t max_fill, const opt_t *opts) { - o << indent(ind, opts->indString) << "#define YYMAXFILL " << max_fill << "\n"; + o << indent(ind, opts->indString) << "#define YYMAXFILL " << max_fill << "\n"; } void output_yymaxnmatch(std::ostream &o, uint32_t ind, - size_t max_nmatch, const opt_t *opts) + size_t max_nmatch, const opt_t *opts) { - o << indent(ind, opts->indString) << "#define YYMAXNMATCH " << max_nmatch << "\n"; + o << indent(ind, opts->indString) << "#define YYMAXNMATCH " << max_nmatch << "\n"; } void output_line_info(std::ostream &o, uint32_t line, - const std::string &fname, bool iflag) + const std::string &fname, bool iflag) { - if (!iflag) { - o << "#line " << line << " \"" << fname << "\"\n"; - } + if (!iflag) { + o << "#line " << line << " \"" << fname << "\"\n"; + } } void output_types(std::ostream &o, uint32_t ind, const opt_t *opts, - const uniq_vector_t &types) + const uniq_vector_t &types) { - const std::string indstr = opts->indString; - o << indent(ind++, indstr) << "enum " << opts->yycondtype << " {\n"; - for (size_t i = 0; i < types.size(); ++i) { - o << indent(ind, indstr) << opts->condEnumPrefix << types[i] << ",\n"; - } - o << indent(--ind, indstr) << "};\n"; + const std::string indstr = opts->indString; + o << indent(ind++, indstr) << "enum " << opts->yycondtype << " {\n"; + for (size_t i = 0; i < types.size(); ++i) { + o << indent(ind, indstr) << opts->condEnumPrefix << types[i] << ",\n"; + } + o << indent(--ind, indstr) << "};\n"; } void output_version_time(std::ostream &o, bool version, bool date) { - o << "/* Generated by re2c"; - if (version) { - o << " " << PACKAGE_VERSION; - } - if (date) { - o << " on "; - time_t now = time (NULL); - o.write (ctime (&now), 24); - } - o << " */" << "\n"; + o << "/* Generated by re2c"; + if (version) { + o << " " << PACKAGE_VERSION; + } + if (date) { + o << " on "; + time_t now = time (NULL); + o.write (ctime (&now), 24); + } + o << " */" << "\n"; } /* @@ -733,76 +733,76 @@ void output_version_time(std::ostream &o, bool version, bool date) static std::string output_cond_get(const opt_t *opts) { - return opts->cond_get + (opts->cond_get_naked ? "" : "()"); + return opts->cond_get + (opts->cond_get_naked ? "" : "()"); } static void output_cond_goto_binary(std::ostream &o, uint32_t ind, - const std::vector &conds, const opt_t *opts, - size_t lower, size_t upper) + const std::vector &conds, const opt_t *opts, + size_t lower, size_t upper) { - const std::string indstr = indent(ind, opts->indString); + const std::string indstr = indent(ind, opts->indString); - if (lower == upper) { - o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n"; - } else { - const size_t middle = lower + (upper - lower + 1) / 2; - o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n"; - output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1); - o << indstr << "} else {\n"; - output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper); - o << indstr << "}\n"; - } + if (lower == upper) { + o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n"; + } else { + const size_t middle = lower + (upper - lower + 1) / 2; + o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n"; + output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1); + o << indstr << "} else {\n"; + output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper); + o << indstr << "}\n"; + } } void output_cond_goto(std::ostream &o, uint32_t ind, - const std::vector &conds, const opt_t *opts, - Warn &warn, bool warn_cond_order, uint32_t line) -{ - const size_t ncond = conds.size(); - const std::string indstr = indent(ind, opts->indString); - - if (opts->target == TARGET_DOT) { - for (size_t i = 0; i < ncond; ++i) { - const std::string &cond = conds[i]; - o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n"; - } - return; - } - - if (opts->gFlag) { - o << indstr << "goto *" << opts->yyctable - << "[" << output_cond_get(opts) << "];\n"; - } else if (opts->sFlag) { - if (ncond == 1) warn_cond_order = false; - output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1); - } else { - warn_cond_order = false; - o << indstr << "switch (" << output_cond_get(opts) << ") {\n"; - for (size_t i = 0; i < ncond; ++i) { - const std::string &cond = conds[i]; - o << indstr << "case " << opts->condEnumPrefix << cond - <<": goto " << opts->condPrefix << cond << ";\n"; - } - o << indstr << "}\n"; - } - - warn_cond_order &= opts->header_file.empty(); - - // see note [condition order] - if (warn_cond_order) warn.condition_order(line); + const std::vector &conds, const opt_t *opts, + Warn &warn, bool warn_cond_order, uint32_t line) +{ + const size_t ncond = conds.size(); + const std::string indstr = indent(ind, opts->indString); + + if (opts->target == TARGET_DOT) { + for (size_t i = 0; i < ncond; ++i) { + const std::string &cond = conds[i]; + o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n"; + } + return; + } + + if (opts->gFlag) { + o << indstr << "goto *" << opts->yyctable + << "[" << output_cond_get(opts) << "];\n"; + } else if (opts->sFlag) { + if (ncond == 1) warn_cond_order = false; + output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1); + } else { + warn_cond_order = false; + o << indstr << "switch (" << output_cond_get(opts) << ") {\n"; + for (size_t i = 0; i < ncond; ++i) { + const std::string &cond = conds[i]; + o << indstr << "case " << opts->condEnumPrefix << cond + <<": goto " << opts->condPrefix << cond << ";\n"; + } + o << indstr << "}\n"; + } + + warn_cond_order &= opts->header_file.empty(); + + // see note [condition order] + if (warn_cond_order) warn.condition_order(line); } void output_cond_table(std::ostream &o, uint32_t ind, - const std::vector &conds, const opt_t *opts) + const std::vector &conds, const opt_t *opts) { - const size_t ncond = conds.size(); - const std::string indstr = opts->indString; + const size_t ncond = conds.size(); + const std::string indstr = opts->indString; - o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n"; - for (size_t i = 0; i < ncond; ++i) { - o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n"; - } - o << indent(--ind, indstr) << "};\n"; + o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n"; + for (size_t i = 0; i < ncond; ++i) { + o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n"; + } + o << indent(--ind, indstr) << "};\n"; } } // namespace re2c diff --git a/re2c/src/code/output.h b/re2c/src/code/output.h index 2f040ffb..ec2dd326 100644 --- a/re2c/src/code/output.h +++ b/re2c/src/code/output.h @@ -27,165 +27,165 @@ template class uniq_vector_t; struct ConfTags { - std::string format; - std::string separator; + std::string format; + std::string separator; - ConfTags(const std::string &f, const std::string &s) - : format(f), separator(s) {} + ConfTags(const std::string &f, const std::string &s) + : format(f), separator(s) {} }; struct LineInfo { - uint32_t line; - std::string filename; + uint32_t line; + std::string filename; - LineInfo(uint32_t l, const std::string &fn) - : line(l), filename(fn) {} + LineInfo(uint32_t l, const std::string &fn) + : line(l), filename(fn) {} }; struct OutputFragment { - enum type_t - { CODE -// , CONFIG - , COND_GOTO - , COND_TABLE - , LINE_INFO_INPUT - , LINE_INFO_OUTPUT - , STATE_GOTO - , STAGS - , MTAGS - , TYPES - , YYACCEPT_INIT - , YYMAXFILL - , YYMAXNMATCH - , SKIP - , PEEK - , BACKUP - , PEEK_SKIP - , SKIP_PEEK - , SKIP_BACKUP - , BACKUP_SKIP - , BACKUP_PEEK - , BACKUP_PEEK_SKIP - , SKIP_BACKUP_PEEK - , EMPTY - }; - - type_t type; - std::ostringstream stream; - uint32_t indent; - union - { - const ConfTags *tags; - const LineInfo *line_info; - }; - - OutputFragment (type_t t, uint32_t i); - ~OutputFragment (); - uint32_t count_lines () const; + enum type_t + { CODE +// , CONFIG + , COND_GOTO + , COND_TABLE + , LINE_INFO_INPUT + , LINE_INFO_OUTPUT + , STATE_GOTO + , STAGS + , MTAGS + , TYPES + , YYACCEPT_INIT + , YYMAXFILL + , YYMAXNMATCH + , SKIP + , PEEK + , BACKUP + , PEEK_SKIP + , SKIP_PEEK + , SKIP_BACKUP + , BACKUP_SKIP + , BACKUP_PEEK + , BACKUP_PEEK_SKIP + , SKIP_BACKUP_PEEK + , EMPTY + }; + + type_t type; + std::ostringstream stream; + uint32_t indent; + union + { + const ConfTags *tags; + const LineInfo *line_info; + }; + + OutputFragment (type_t t, uint32_t i); + ~OutputFragment (); + uint32_t count_lines () const; }; struct OutputBlock { - std::vector fragments; - bool used_yyaccept; - bool have_user_code; - uint32_t line; - std::vector types; - std::set stags; - std::set mtags; - const opt_t *opts; - - OutputBlock (); - ~OutputBlock (); - FORBID_COPY(OutputBlock); + std::vector fragments; + bool used_yyaccept; + bool have_user_code; + uint32_t line; + std::vector types; + std::set stags; + std::set mtags; + const opt_t *opts; + + OutputBlock (); + ~OutputBlock (); + FORBID_COPY(OutputBlock); }; class OutputFile { - std::vector blocks; + std::vector blocks; public: - counter_t label_counter; - uint32_t fill_index; - bool state_goto; - bool cond_goto; - bool warn_condition_order; - Warn &warn; - - explicit OutputFile(Warn &w); - ~OutputFile(); - - std::ostream & stream (); - OutputBlock &block(); - void insert_code (); - bool open (); - void new_block(Opt &opts); - - // immediate output - OutputFile & wraw (const char *s, const char *e); - OutputFile & wc (char c); - OutputFile & wc_hex (uint32_t n); - OutputFile & wu32 (uint32_t n); - OutputFile & wu32_hex (uint32_t n); - OutputFile & wu32_width (uint32_t n, int w); - OutputFile & wu64 (uint64_t n); - OutputFile & wstring (const std::string & s); - OutputFile & ws (const char * s); - OutputFile & wlabel (label_t l); - OutputFile & wrange (uint32_t u, uint32_t l); - OutputFile & wversion_time (); - OutputFile & wuser_start_label (); - OutputFile & wind (uint32_t ind); - - // delayed output - OutputFile & wdelay_tags(const ConfTags *cf, bool mtags); - OutputFile & wdelay_line_info_input (uint32_t l, const std::string &fn); - OutputFile & wdelay_line_info_output (); - OutputFile & wdelay_cond_goto(uint32_t ind); - OutputFile & wdelay_cond_table(uint32_t ind); - OutputFile & wdelay_state_goto (uint32_t ind); - OutputFile & wdelay_types (); - OutputFile & wdelay_yyaccept_init (uint32_t ind); - OutputFile & wdelay_yymaxfill (); - OutputFile& wdelay_yymaxnmatch(); - OutputFile& wdelay_skip(uint32_t ind, bool skip); - OutputFile& wdelay_peek(uint32_t ind, bool peek); - OutputFile& wdelay_backup(uint32_t ind, bool backup); - - void fix_first_block_opts(); - void global_lists(uniq_vector_t &types, - std::set &stags, std::set &mtags) const; - - bool emit(const uniq_vector_t &global_types, - const std::set &global_stags, - const std::set &global_mtags, - size_t max_fill, size_t max_nmatch); - - FORBID_COPY (OutputFile); + counter_t label_counter; + uint32_t fill_index; + bool state_goto; + bool cond_goto; + bool warn_condition_order; + Warn &warn; + + explicit OutputFile(Warn &w); + ~OutputFile(); + + std::ostream & stream (); + OutputBlock &block(); + void insert_code (); + bool open (); + void new_block(Opt &opts); + + // immediate output + OutputFile & wraw (const char *s, const char *e); + OutputFile & wc (char c); + OutputFile & wc_hex (uint32_t n); + OutputFile & wu32 (uint32_t n); + OutputFile & wu32_hex (uint32_t n); + OutputFile & wu32_width (uint32_t n, int w); + OutputFile & wu64 (uint64_t n); + OutputFile & wstring (const std::string & s); + OutputFile & ws (const char * s); + OutputFile & wlabel (label_t l); + OutputFile & wrange (uint32_t u, uint32_t l); + OutputFile & wversion_time (); + OutputFile & wuser_start_label (); + OutputFile & wind (uint32_t ind); + + // delayed output + OutputFile & wdelay_tags(const ConfTags *cf, bool mtags); + OutputFile & wdelay_line_info_input (uint32_t l, const std::string &fn); + OutputFile & wdelay_line_info_output (); + OutputFile & wdelay_cond_goto(uint32_t ind); + OutputFile & wdelay_cond_table(uint32_t ind); + OutputFile & wdelay_state_goto (uint32_t ind); + OutputFile & wdelay_types (); + OutputFile & wdelay_yyaccept_init (uint32_t ind); + OutputFile & wdelay_yymaxfill (); + OutputFile& wdelay_yymaxnmatch(); + OutputFile& wdelay_skip(uint32_t ind, bool skip); + OutputFile& wdelay_peek(uint32_t ind, bool peek); + OutputFile& wdelay_backup(uint32_t ind, bool backup); + + void fix_first_block_opts(); + void global_lists(uniq_vector_t &types, + std::set &stags, std::set &mtags) const; + + bool emit(const uniq_vector_t &global_types, + const std::set &global_stags, + const std::set &global_mtags, + size_t max_fill, size_t max_nmatch); + + FORBID_COPY (OutputFile); }; class HeaderFile { - std::ostringstream stream; + std::ostringstream stream; public: - HeaderFile(): stream() {} - bool emit(const opt_t *opts, const uniq_vector_t &types); - FORBID_COPY (HeaderFile); + HeaderFile(): stream() {} + bool emit(const opt_t *opts, const uniq_vector_t &types); + FORBID_COPY (HeaderFile); }; struct Output { - OutputFile source; - HeaderFile header; - std::set skeletons; - size_t max_fill; - size_t max_nmatch; - - explicit Output(Warn &w); - bool emit(); + OutputFile source; + HeaderFile header; + std::set skeletons; + size_t max_fill; + size_t max_nmatch; + + explicit Output(Warn &w); + bool emit(); }; void output_tags (std::ostream &o, uint32_t ind, const ConfTags &conf, const std::set &tags, const opt_t *opts); diff --git a/re2c/src/code/print.cc b/re2c/src/code/print.cc index d24f06d0..2b98f9f7 100644 --- a/re2c/src/code/print.cc +++ b/re2c/src/code/print.cc @@ -7,89 +7,89 @@ namespace re2c static bool is_space(uint32_t c) { - switch (c) { - case '\t': - case '\f': - case '\v': - case '\n': - case '\r': - case ' ': return true; - default: return false; - } + switch (c) { + case '\t': + case '\f': + case '\v': + case '\n': + case '\r': + case ' ': return true; + default: return false; + } } static inline char hex(uint32_t c) { - static const char * sHex = "0123456789ABCDEF"; - return sHex[c & 0x0F]; + static const char * sHex = "0123456789ABCDEF"; + return sHex[c & 0x0F]; } static void prtCh(std::ostream& o, uint32_t c, bool dot) { - switch (c) { - case '\'': o << (dot ? "'" : "\\'"); break; - case '"': o << (dot ? "\\\"" : "\""); break; - case '\n': o << (dot ? "\\\\n" : "\\n"); break; - case '\t': o << (dot ? "\\\\t" : "\\t"); break; - case '\v': o << (dot ? "\\\\v" : "\\v"); break; - case '\b': o << (dot ? "\\\\b" : "\\b"); break; - case '\r': o << (dot ? "\\\\r" : "\\r"); break; - case '\f': o << (dot ? "\\\\f" : "\\f"); break; - case '\a': o << (dot ? "\\\\a" : "\\a"); break; - case '\\': o << "\\\\"; break; // both .dot and C/C++ code expect "\\" - default: o << static_cast (c); break; - } + switch (c) { + case '\'': o << (dot ? "'" : "\\'"); break; + case '"': o << (dot ? "\\\"" : "\""); break; + case '\n': o << (dot ? "\\\\n" : "\\n"); break; + case '\t': o << (dot ? "\\\\t" : "\\t"); break; + case '\v': o << (dot ? "\\\\v" : "\\v"); break; + case '\b': o << (dot ? "\\\\b" : "\\b"); break; + case '\r': o << (dot ? "\\\\r" : "\\r"); break; + case '\f': o << (dot ? "\\\\f" : "\\f"); break; + case '\a': o << (dot ? "\\\\a" : "\\a"); break; + case '\\': o << "\\\\"; break; // both .dot and C/C++ code expect "\\" + default: o << static_cast (c); break; + } } bool is_print(uint32_t c) { - return c >= 0x20 && c < 0x7F; + return c >= 0x20 && c < 0x7F; } void prtHex(std::ostream& o, uint32_t c, uint32_t szcunit) { - o << "0x"; + o << "0x"; - if (szcunit >= 4) { - o << hex(c >> 28u) << hex(c >> 24u) << hex(c >> 20u) << hex(c >> 16u); - } + if (szcunit >= 4) { + o << hex(c >> 28u) << hex(c >> 24u) << hex(c >> 20u) << hex(c >> 16u); + } - if (szcunit >= 2) { - o << hex(c >> 12u) << hex(c >> 8u); - } + if (szcunit >= 2) { + o << hex(c >> 12u) << hex(c >> 8u); + } - o << hex(c >> 4u) << hex(c); + o << hex(c >> 4u) << hex(c); } void prtChOrHex(std::ostream& o, uint32_t c, uint32_t szcunit, bool ebcdic, bool dot) { - if (!ebcdic && (is_print(c) || is_space(c))) { - o << '\''; - prtCh(o, c, dot); - o << '\''; - } else { - prtHex(o, c, szcunit); - } + if (!ebcdic && (is_print(c) || is_space(c))) { + o << '\''; + prtCh(o, c, dot); + o << '\''; + } else { + prtHex(o, c, szcunit); + } } static void prtChOrHexForSpan(std::ostream& o, uint32_t c, uint32_t szcunit, bool ebcdic, bool dot) { - if (!ebcdic && c != ']' && is_print(c)) { - prtCh(o, c, dot); - } else { - prtHex(o, c, szcunit); - } + if (!ebcdic && c != ']' && is_print(c)) { + prtCh(o, c, dot); + } else { + prtHex(o, c, szcunit); + } } void printSpan(std::ostream& o, uint32_t l, uint32_t u, uint32_t szcunit, bool ebcdic, bool dot) { - o << "["; - prtChOrHexForSpan(o, l, szcunit, ebcdic, dot); - if (u - l > 1) { - o << "-"; - prtChOrHexForSpan(o, u - 1, szcunit, ebcdic, dot); - } - o << "]"; + o << "["; + prtChOrHexForSpan(o, l, szcunit, ebcdic, dot); + if (u - l > 1) { + o << "-"; + prtChOrHexForSpan(o, u - 1, szcunit, ebcdic, dot); + } + o << "]"; } } // end namespace re2c diff --git a/re2c/src/compile.cc b/re2c/src/compile.cc index 7e088dea..391d728d 100644 --- a/re2c/src/compile.cc +++ b/re2c/src/compile.cc @@ -32,165 +32,165 @@ class Warn; static std::string make_name(const std::string &cond, uint32_t line) { - std::ostringstream os; - os << "line" << line; - std::string name = os.str(); - if (!cond.empty ()) - { - name += "_"; - name += cond; - } - return name; + std::ostringstream os; + os << "line" << line; + std::string name = os.str(); + if (!cond.empty ()) + { + name += "_"; + name += cond; + } + return name; } static smart_ptr ast_to_dfa(const spec_t &spec, Output &output) { - const opt_t *opts = output.source.block().opts; - Warn &warn = output.source.warn; - const std::vector &rules = spec.rules; - const size_t defrule = spec.defs.empty() - ? Rule::NONE - : rules.size() - 1; - const uint32_t line = output.source.block().line; - const std::string - &cond = spec.name, - name = make_name(cond, line), - &setup = spec.setup.empty() ? "" : spec.setup[0]->text; - - RESpec re(rules, opts, warn); - split_charset(re); - find_fixed_tags(re); - insert_default_tags(re); - warn_nullable(re, cond); - - nfa_t nfa(re); - if (opts->dump_nfa) dump_nfa(nfa); - - dfa_t dfa(nfa, opts, cond, warn); - if (opts->dump_dfa_det) dump_dfa(dfa); - - // skeleton must be constructed after DFA construction - // but prior to any other DFA transformations - Skeleton skeleton(dfa, opts, defrule, name, cond, line); - warn_undefined_control_flow(skeleton, warn); - if (opts->target == TARGET_SKELETON) { - emit_data(skeleton); - } - - cutoff_dead_rules(dfa, defrule, cond, warn); - - insert_fallback_tags(dfa); - - // try to minimize the number of tag variables - compact_and_optimize_tags(dfa, opts->optimize_tags); - if (opts->dump_dfa_tagopt) dump_dfa(dfa); - - freeze_tags(dfa); - - minimization(dfa, opts->dfa_minimization); - if (opts->dump_dfa_min) dump_dfa(dfa); - - // find YYFILL states and calculate argument to YYFILL - std::vector fill; - fillpoints(dfa, fill); - - // ADFA stands for 'DFA with actions' - DFA *adfa = new DFA(dfa, fill, defrule, skeleton.sizeof_key, - name, cond, line, setup); - - // see note [reordering DFA states] - adfa->reorder(); - - // skeleton is constructed, do further DFA transformations - adfa->prepare(opts); - if (opts->dump_adfa) dump_adfa(*adfa); - - // finally gather overall DFA statistics - adfa->calc_stats(line, opts->tags); - - // accumulate global statistics from this particular DFA - output.max_fill = std::max(output.max_fill, adfa->max_fill); - output.max_nmatch = std::max(output.max_nmatch, adfa->max_nmatch); - if (adfa->need_accept) - { - output.source.block().used_yyaccept = true; - } - - return make_smart_ptr(adfa); + const opt_t *opts = output.source.block().opts; + Warn &warn = output.source.warn; + const std::vector &rules = spec.rules; + const size_t defrule = spec.defs.empty() + ? Rule::NONE + : rules.size() - 1; + const uint32_t line = output.source.block().line; + const std::string + &cond = spec.name, + name = make_name(cond, line), + &setup = spec.setup.empty() ? "" : spec.setup[0]->text; + + RESpec re(rules, opts, warn); + split_charset(re); + find_fixed_tags(re); + insert_default_tags(re); + warn_nullable(re, cond); + + nfa_t nfa(re); + if (opts->dump_nfa) dump_nfa(nfa); + + dfa_t dfa(nfa, opts, cond, warn); + if (opts->dump_dfa_det) dump_dfa(dfa); + + // skeleton must be constructed after DFA construction + // but prior to any other DFA transformations + Skeleton skeleton(dfa, opts, defrule, name, cond, line); + warn_undefined_control_flow(skeleton, warn); + if (opts->target == TARGET_SKELETON) { + emit_data(skeleton); + } + + cutoff_dead_rules(dfa, defrule, cond, warn); + + insert_fallback_tags(dfa); + + // try to minimize the number of tag variables + compact_and_optimize_tags(dfa, opts->optimize_tags); + if (opts->dump_dfa_tagopt) dump_dfa(dfa); + + freeze_tags(dfa); + + minimization(dfa, opts->dfa_minimization); + if (opts->dump_dfa_min) dump_dfa(dfa); + + // find YYFILL states and calculate argument to YYFILL + std::vector fill; + fillpoints(dfa, fill); + + // ADFA stands for 'DFA with actions' + DFA *adfa = new DFA(dfa, fill, defrule, skeleton.sizeof_key, + name, cond, line, setup); + + // see note [reordering DFA states] + adfa->reorder(); + + // skeleton is constructed, do further DFA transformations + adfa->prepare(opts); + if (opts->dump_adfa) dump_adfa(*adfa); + + // finally gather overall DFA statistics + adfa->calc_stats(line, opts->tags); + + // accumulate global statistics from this particular DFA + output.max_fill = std::max(output.max_fill, adfa->max_fill); + output.max_nmatch = std::max(output.max_nmatch, adfa->max_nmatch); + if (adfa->need_accept) + { + output.source.block().used_yyaccept = true; + } + + return make_smart_ptr(adfa); } void compile(Scanner &input, Output &output, Opt &opts) { - specs_t rspecs; - symtab_t symtab; - const conopt_t *globopts = &opts.glob; - const opt_t *ropts = NULL; - OutputFile &o = output.source; - typedef std::vector > dfas_t; - - o.new_block(opts); - o.wversion_time(); - o.wdelay_line_info_input(input.get_cline(), input.get_fname()); - if (globopts->target == TARGET_SKELETON) { - emit_prolog(o); - } - - for (;;) { - // parse everything up to the next re2c block - Scanner::ParseMode mode = input.echo(o); - if (mode == Scanner::Stop) break; - validate_mode(mode, globopts->rFlag, ropts, input); - - // parse the next re2c block - specs_t specs; - if (mode == Scanner::Reuse) { - specs = rspecs; - opts.restore(ropts); - opts.reset_mapCodeName(); - o.label_counter.reset(); - o.fill_index = 0; - o.state_goto = false; - o.cond_goto = false; - } - parse(input, specs, symtab, opts); - - // start new output block with accumulated options - o.new_block(opts); - - if (mode == Scanner::Rules) { - // save AST and options for future use - rspecs = specs; - ropts = o.block().opts; - } else { - validate_ast(specs, globopts->cFlag); - normalize_ast(specs); - - // compile AST to DFA - o.block().line = input.get_cline(); - dfas_t dfas; - for (specs_t::const_iterator i = specs.begin(); i != specs.end(); ++i) { - dfas.push_back(ast_to_dfa(*i, output)); - } - - // compile DFA to code - bool prolog = false; - uint32_t ind = o.block().opts->topIndent; - for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { - (*i)->emit(output, ind, (i + 1) == dfas.end(), prolog); - } - } - - o.wdelay_line_info_input(input.get_cline(), input.get_fname()); - } - - if (globopts->target == TARGET_SKELETON) { - emit_epilog (o, output.skeletons); - } - - AST::flist.clear(); - Code::flist.clear(); - Range::vFreeList.clear(); - RangeSuffix::freeList.clear(); + specs_t rspecs; + symtab_t symtab; + const conopt_t *globopts = &opts.glob; + const opt_t *ropts = NULL; + OutputFile &o = output.source; + typedef std::vector > dfas_t; + + o.new_block(opts); + o.wversion_time(); + o.wdelay_line_info_input(input.get_cline(), input.get_fname()); + if (globopts->target == TARGET_SKELETON) { + emit_prolog(o); + } + + for (;;) { + // parse everything up to the next re2c block + Scanner::ParseMode mode = input.echo(o); + if (mode == Scanner::Stop) break; + validate_mode(mode, globopts->rFlag, ropts, input); + + // parse the next re2c block + specs_t specs; + if (mode == Scanner::Reuse) { + specs = rspecs; + opts.restore(ropts); + opts.reset_mapCodeName(); + o.label_counter.reset(); + o.fill_index = 0; + o.state_goto = false; + o.cond_goto = false; + } + parse(input, specs, symtab, opts); + + // start new output block with accumulated options + o.new_block(opts); + + if (mode == Scanner::Rules) { + // save AST and options for future use + rspecs = specs; + ropts = o.block().opts; + } else { + validate_ast(specs, globopts->cFlag); + normalize_ast(specs); + + // compile AST to DFA + o.block().line = input.get_cline(); + dfas_t dfas; + for (specs_t::const_iterator i = specs.begin(); i != specs.end(); ++i) { + dfas.push_back(ast_to_dfa(*i, output)); + } + + // compile DFA to code + bool prolog = false; + uint32_t ind = o.block().opts->topIndent; + for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { + (*i)->emit(output, ind, (i + 1) == dfas.end(), prolog); + } + } + + o.wdelay_line_info_input(input.get_cline(), input.get_fname()); + } + + if (globopts->target == TARGET_SKELETON) { + emit_epilog (o, output.skeletons); + } + + AST::flist.clear(); + Code::flist.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); } } // namespace re2c diff --git a/re2c/src/conf/msg.cc b/re2c/src/conf/msg.cc index 68e9cd14..72eaa452 100644 --- a/re2c/src/conf/msg.cc +++ b/re2c/src/conf/msg.cc @@ -12,130 +12,130 @@ namespace re2c { void error (const char * fmt, ...) { - fprintf (stderr, "re2c: error: "); + fprintf (stderr, "re2c: error: "); - va_list args; - va_start (args, fmt); - vfprintf (stderr, fmt, args); - va_end (args); + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); - fprintf (stderr, "\n"); + fprintf (stderr, "\n"); } void fatal(const char *fmt, ...) { - fprintf (stderr, "re2c: error: "); + fprintf (stderr, "re2c: error: "); - va_list args; - va_start (args, fmt); - vfprintf (stderr, fmt, args); - va_end (args); + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); - fprintf (stderr, "\n"); - exit(1); + fprintf (stderr, "\n"); + exit(1); } void fatal_l(uint32_t line, const char *fmt, ...) { - fprintf (stderr, "re2c: error: line %u: ", line); + fprintf (stderr, "re2c: error: line %u: ", line); - va_list args; - va_start (args, fmt); - vfprintf (stderr, fmt, args); - va_end (args); + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); - fprintf (stderr, "\n"); - exit(1); + fprintf (stderr, "\n"); + exit(1); } void fatal_lc(uint32_t line, uint32_t column, const char *fmt, ...) { - fprintf (stderr, "re2c: error: line %u, column %u: ", line, column); + fprintf (stderr, "re2c: error: line %u, column %u: ", line, column); - va_list args; - va_start (args, fmt); - vfprintf (stderr, fmt, args); - va_end (args); + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); - fprintf (stderr, "\n"); - exit(1); + fprintf (stderr, "\n"); + exit(1); } void error_arg (const char * option) { - error ("expected argument to option %s", option); + error ("expected argument to option %s", option); } void warning_start (uint32_t line, bool error) { - const char * msg = error ? "error" : "warning"; - fprintf (stderr, "re2c: %s: line %u: ", msg, line); + const char * msg = error ? "error" : "warning"; + fprintf (stderr, "re2c: %s: line %u: ", msg, line); } void warning_end (const char * type, bool error) { - if (type != NULL) - { - const char * prefix = error ? "error-" : ""; - fprintf (stderr, " [-W%s%s]", prefix, type); - } - fprintf (stderr, "\n"); + if (type != NULL) + { + const char * prefix = error ? "error-" : ""; + fprintf (stderr, " [-W%s%s]", prefix, type); + } + fprintf (stderr, "\n"); } void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) { - warning_start (line, error); + warning_start (line, error); - va_list args; - va_start (args, fmt); - vfprintf (stderr, fmt, args); - va_end (args); + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); - warning_end (type, error); + warning_end (type, error); } void usage() { - fprintf(stdout, "%s", help); + fprintf(stdout, "%s", help); } void vernum () { - std::string vernum (PACKAGE_VERSION); - if (vernum[1] == '.') - { - vernum.insert(0, "0"); - } - vernum.erase(2, 1); - if (vernum[3] == '.') - { - vernum.insert(2, "0"); - } - vernum.erase(4, 1); - if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9') - { - vernum.insert(4, "0"); - } - vernum.resize(6, '0'); - - printf ("%s\n", vernum.c_str ()); + std::string vernum (PACKAGE_VERSION); + if (vernum[1] == '.') + { + vernum.insert(0, "0"); + } + vernum.erase(2, 1); + if (vernum[3] == '.') + { + vernum.insert(2, "0"); + } + vernum.erase(4, 1); + if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9') + { + vernum.insert(4, "0"); + } + vernum.resize(6, '0'); + + printf ("%s\n", vernum.c_str ()); } void version () { - printf ("re2c %s\n", PACKAGE_VERSION); + printf ("re2c %s\n", PACKAGE_VERSION); } std::string incond (const std::string & cond) { - std::string s; - if (!cond.empty ()) - { - s += "in condition '"; - s += cond; - s += "' "; - } - return s; + std::string s; + if (!cond.empty ()) + { + s += "in condition '"; + s += cond; + s += "' "; + } + return s; } } // namespace re2c diff --git a/re2c/src/conf/opt.cc b/re2c/src/conf/opt.cc index 33b30518..96c65e93 100644 --- a/re2c/src/conf/opt.cc +++ b/re2c/src/conf/opt.cc @@ -6,284 +6,284 @@ namespace re2c void conopt_t::fix() { - if (target == TARGET_SKELETON) { - fFlag = false; - header_file = ""; - } - if (!cFlag) { - header_file = ""; - } + if (target == TARGET_SKELETON) { + fFlag = false; + header_file = ""; + } + if (!cFlag) { + header_file = ""; + } } void mutopt_t::fix(const conopt_t *globopts) { - // some options either make no sense or must have fixed value - // with current target: reset them to default - switch (globopts->target) { - case TARGET_DOT: - // default code generation options - sFlag = Opt::baseopt.sFlag; - bFlag = Opt::baseopt.bFlag; - gFlag = Opt::baseopt.gFlag; - cGotoThreshold = Opt::baseopt.cGotoThreshold; - // default environment-insensitive formatting - yybmHexTable = Opt::baseopt.yybmHexTable; - // fallthrough - case TARGET_SKELETON: - // default line information - iFlag = Opt::baseopt.iFlag; - // default environment-sensitive formatting - topIndent = Opt::baseopt.topIndent; - indString = Opt::baseopt.indString; - condDivider = Opt::baseopt.condDivider; - condDividerParam = Opt::baseopt.condDividerParam; - // default environment bindings - yycondtype = Opt::baseopt.yycondtype; - cond_get = Opt::baseopt.cond_get; - cond_get_naked = Opt::baseopt.cond_get_naked; - cond_set = Opt::baseopt.cond_set; - cond_set_arg = Opt::baseopt.cond_set_arg; - cond_set_naked = Opt::baseopt.cond_set_naked; - yyctable = Opt::baseopt.yyctable; - condPrefix = Opt::baseopt.condPrefix; - condEnumPrefix = Opt::baseopt.condEnumPrefix; - condGoto = Opt::baseopt.condGoto; - condGotoParam = Opt::baseopt.condGotoParam; - state_get = Opt::baseopt.state_get; - state_get_naked = Opt::baseopt.state_get_naked; - state_set = Opt::baseopt.state_set; - state_set_arg = Opt::baseopt.state_set_arg; - state_set_naked = Opt::baseopt.state_set_naked; - tags_prefix = Opt::baseopt.tags_prefix; - tags_expression = Opt::baseopt.tags_expression; - optimize_tags = Opt::baseopt.optimize_tags; - yyfilllabel = Opt::baseopt.yyfilllabel; - yynext = Opt::baseopt.yynext; - yyaccept = Opt::baseopt.yyaccept; - bUseStateAbort = Opt::baseopt.bUseStateAbort; - bUseStateNext = Opt::baseopt.bUseStateNext; - yybm = Opt::baseopt.yybm; - yytarget = Opt::baseopt.yytarget; - input_api = Opt::baseopt.input_api; - yycursor = Opt::baseopt.yycursor; - yymarker = Opt::baseopt.yymarker; - yyctxmarker = Opt::baseopt.yyctxmarker; - yylimit = Opt::baseopt.yylimit; - yypeek = Opt::baseopt.yypeek; - yyskip = Opt::baseopt.yyskip; - yybackup = Opt::baseopt.yybackup; - yybackupctx = Opt::baseopt.yybackupctx; - yyrestore = Opt::baseopt.yyrestore; - yyrestorectx = Opt::baseopt.yyrestorectx; - yyrestoretag = Opt::baseopt.yyrestoretag; - yystagn = Opt::baseopt.yystagn; - yystagp = Opt::baseopt.yystagp; - yymtagn = Opt::baseopt.yymtagn; - yymtagp = Opt::baseopt.yymtagp; - yylessthan = Opt::baseopt.yylessthan; - dFlag = Opt::baseopt.dFlag; - yydebug = Opt::baseopt.yydebug; - yyctype = Opt::baseopt.yyctype; - yych = Opt::baseopt.yych; - bEmitYYCh = Opt::baseopt.bEmitYYCh; - yychConversion = Opt::baseopt.yychConversion; - fill = Opt::baseopt.fill; - fill_use = Opt::baseopt.fill_use; - fill_check = Opt::baseopt.fill_check; - fill_arg = Opt::baseopt.fill_arg; - fill_arg_use = Opt::baseopt.fill_arg_use; - fill_naked = Opt::baseopt.fill_naked; - labelPrefix = Opt::baseopt.labelPrefix; - startlabel = Opt::baseopt.startlabel; - startlabel_force = Opt::baseopt.startlabel_force; - break; - case TARGET_CODE: - break; - } + // some options either make no sense or must have fixed value + // with current target: reset them to default + switch (globopts->target) { + case TARGET_DOT: + // default code generation options + sFlag = Opt::baseopt.sFlag; + bFlag = Opt::baseopt.bFlag; + gFlag = Opt::baseopt.gFlag; + cGotoThreshold = Opt::baseopt.cGotoThreshold; + // default environment-insensitive formatting + yybmHexTable = Opt::baseopt.yybmHexTable; + // fallthrough + case TARGET_SKELETON: + // default line information + iFlag = Opt::baseopt.iFlag; + // default environment-sensitive formatting + topIndent = Opt::baseopt.topIndent; + indString = Opt::baseopt.indString; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + // default environment bindings + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + tags_prefix = Opt::baseopt.tags_prefix; + tags_expression = Opt::baseopt.tags_expression; + optimize_tags = Opt::baseopt.optimize_tags; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + yybm = Opt::baseopt.yybm; + yytarget = Opt::baseopt.yytarget; + input_api = Opt::baseopt.input_api; + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yyrestoretag = Opt::baseopt.yyrestoretag; + yystagn = Opt::baseopt.yystagn; + yystagp = Opt::baseopt.yystagp; + yymtagn = Opt::baseopt.yymtagn; + yymtagp = Opt::baseopt.yymtagp; + yylessthan = Opt::baseopt.yylessthan; + dFlag = Opt::baseopt.dFlag; + yydebug = Opt::baseopt.yydebug; + yyctype = Opt::baseopt.yyctype; + yych = Opt::baseopt.yych; + bEmitYYCh = Opt::baseopt.bEmitYYCh; + yychConversion = Opt::baseopt.yychConversion; + fill = Opt::baseopt.fill; + fill_use = Opt::baseopt.fill_use; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + labelPrefix = Opt::baseopt.labelPrefix; + startlabel = Opt::baseopt.startlabel; + startlabel_force = Opt::baseopt.startlabel_force; + break; + case TARGET_CODE: + break; + } - if (bCaseInsensitive) - { - bCaseInverted = Opt::baseopt.bCaseInverted; - } + if (bCaseInsensitive) + { + bCaseInverted = Opt::baseopt.bCaseInverted; + } - // respect hierarchy - if (!globopts->cFlag) { - yycondtype = Opt::baseopt.yycondtype; - cond_get = Opt::baseopt.cond_get; - cond_get_naked = Opt::baseopt.cond_get_naked; - cond_set = Opt::baseopt.cond_set; - cond_set_arg = Opt::baseopt.cond_set_arg; - cond_set_naked = Opt::baseopt.cond_set_naked; - yyctable = Opt::baseopt.yyctable; - condPrefix = Opt::baseopt.condPrefix; - condEnumPrefix = Opt::baseopt.condEnumPrefix; - condDivider = Opt::baseopt.condDivider; - condDividerParam = Opt::baseopt.condDividerParam; - condGoto = Opt::baseopt.condGoto; - condGotoParam = Opt::baseopt.condGotoParam; - } - if (!globopts->fFlag) { - state_get = Opt::baseopt.state_get; - state_get_naked = Opt::baseopt.state_get_naked; - state_set = Opt::baseopt.state_set; - state_set_arg = Opt::baseopt.state_set_arg; - state_set_naked = Opt::baseopt.state_set_naked; - yyfilllabel = Opt::baseopt.yyfilllabel; - yynext = Opt::baseopt.yynext; - yyaccept = Opt::baseopt.yyaccept; - bUseStateAbort = Opt::baseopt.bUseStateAbort; - bUseStateNext = Opt::baseopt.bUseStateNext; - } - if (posix_captures) { - tags = true; - } - if (!tags) - { - tags_prefix = Opt::baseopt.tags_prefix; - tags_expression = Opt::baseopt.tags_expression; - lookahead = Opt::baseopt.lookahead; - optimize_tags = Opt::baseopt.optimize_tags; - } - if (!bFlag) - { - yybmHexTable = Opt::baseopt.yybmHexTable; - yybm = Opt::baseopt.yybm; - } - if (!gFlag) - { - cGotoThreshold = Opt::baseopt.cGotoThreshold; - yytarget = Opt::baseopt.yytarget; - } - if (input_api != INPUT_DEFAULT) - { - yycursor = Opt::baseopt.yycursor; - yymarker = Opt::baseopt.yymarker; - yyctxmarker = Opt::baseopt.yyctxmarker; - yylimit = Opt::baseopt.yylimit; - } - if (input_api != INPUT_CUSTOM) - { - yypeek = Opt::baseopt.yypeek; - yyskip = Opt::baseopt.yyskip; - yybackup = Opt::baseopt.yybackup; - yybackupctx = Opt::baseopt.yybackupctx; - yyrestore = Opt::baseopt.yyrestore; - yyrestorectx = Opt::baseopt.yyrestorectx; - yyrestoretag = Opt::baseopt.yyrestoretag; - yystagn = Opt::baseopt.yystagn; - yystagp = Opt::baseopt.yystagp; - yymtagn = Opt::baseopt.yymtagn; - yymtagp = Opt::baseopt.yymtagp; - } - if (!dFlag) - { - yydebug = Opt::baseopt.yydebug; - } - if (!fill_use) - { - fill = Opt::baseopt.fill; - fill_check = Opt::baseopt.fill_check; - fill_arg = Opt::baseopt.fill_arg; - fill_arg_use = Opt::baseopt.fill_arg_use; - fill_naked = Opt::baseopt.fill_naked; - } + // respect hierarchy + if (!globopts->cFlag) { + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + } + if (!globopts->fFlag) { + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + } + if (posix_captures) { + tags = true; + } + if (!tags) + { + tags_prefix = Opt::baseopt.tags_prefix; + tags_expression = Opt::baseopt.tags_expression; + lookahead = Opt::baseopt.lookahead; + optimize_tags = Opt::baseopt.optimize_tags; + } + if (!bFlag) + { + yybmHexTable = Opt::baseopt.yybmHexTable; + yybm = Opt::baseopt.yybm; + } + if (!gFlag) + { + cGotoThreshold = Opt::baseopt.cGotoThreshold; + yytarget = Opt::baseopt.yytarget; + } + if (input_api != INPUT_DEFAULT) + { + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + } + if (input_api != INPUT_CUSTOM) + { + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yyrestoretag = Opt::baseopt.yyrestoretag; + yystagn = Opt::baseopt.yystagn; + yystagp = Opt::baseopt.yystagp; + yymtagn = Opt::baseopt.yymtagn; + yymtagp = Opt::baseopt.yymtagp; + } + if (!dFlag) + { + yydebug = Opt::baseopt.yydebug; + } + if (!fill_use) + { + fill = Opt::baseopt.fill; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + } - // force individual options - switch (globopts->target) { - case TARGET_DOT: - iFlag = true; - break; - case TARGET_SKELETON: - iFlag = true; - input_api = INPUT_CUSTOM; - indString = " "; - topIndent = 2; - break; - case TARGET_CODE: - break; - } - switch (encoding.type()) { - case Enc::UCS2: - case Enc::UTF16: - case Enc::UTF32: - sFlag = true; - break; - case Enc::ASCII: - case Enc::EBCDIC: - case Enc::UTF8: - break; - } - if (bFlag) - { - sFlag = true; - } - if (gFlag) - { - bFlag = true; - sFlag = true; - } - if (!lookahead) { - eager_skip = true; - } + // force individual options + switch (globopts->target) { + case TARGET_DOT: + iFlag = true; + break; + case TARGET_SKELETON: + iFlag = true; + input_api = INPUT_CUSTOM; + indString = " "; + topIndent = 2; + break; + case TARGET_CODE: + break; + } + switch (encoding.type()) { + case Enc::UCS2: + case Enc::UTF16: + case Enc::UTF32: + sFlag = true; + break; + case Enc::ASCII: + case Enc::EBCDIC: + case Enc::UTF8: + break; + } + if (bFlag) + { + sFlag = true; + } + if (gFlag) + { + bFlag = true; + sFlag = true; + } + if (!lookahead) { + eager_skip = true; + } } const mutopt_t Opt::baseopt; bool Opt::source (const char *s) { - if (source_file) - { - error ("multiple source files: %s, %s", source_file, s); - return false; - } - else - { - source_file = s; - return true; - } + if (source_file) + { + error ("multiple source files: %s, %s", source_file, s); + return false; + } + else + { + source_file = s; + return true; + } } void Opt::reset_startlabel() { - set_startlabel(Opt::baseopt.startlabel); - set_startlabel_force(Opt::baseopt.startlabel_force); + set_startlabel(Opt::baseopt.startlabel); + set_startlabel_force(Opt::baseopt.startlabel_force); } void Opt::reset_mapCodeName () { - // historically arranged set of names - // no actual reason why these particular options should be reset - set_cond_get(Opt::baseopt.cond_get); - set_cond_set(Opt::baseopt.cond_set); - set_fill(Opt::baseopt.fill); - set_state_get(Opt::baseopt.state_get); - set_state_set(Opt::baseopt.state_set); - set_yybackup(Opt::baseopt.yybackup); - set_yybackupctx(Opt::baseopt.yybackupctx); - set_yycondtype(Opt::baseopt.yycondtype); - set_yyctxmarker(Opt::baseopt.yyctxmarker); - set_yyctype(Opt::baseopt.yyctype); - set_yycursor(Opt::baseopt.yycursor); - set_yydebug(Opt::baseopt.yydebug); - set_yylessthan(Opt::baseopt.yylessthan); - set_yylimit(Opt::baseopt.yylimit); - set_yymarker(Opt::baseopt.yymarker); - set_yypeek(Opt::baseopt.yypeek); - set_yyrestore(Opt::baseopt.yyrestore); - set_yyrestorectx(Opt::baseopt.yyrestorectx); - set_yyrestoretag(Opt::baseopt.yyrestoretag); - set_yystagn(Opt::baseopt.yystagn); - set_yystagp(Opt::baseopt.yystagp); - set_yymtagn(Opt::baseopt.yymtagn); - set_yymtagp(Opt::baseopt.yymtagp); - set_yyskip(Opt::baseopt.yyskip); - set_yyfilllabel(Opt::baseopt.yyfilllabel); - set_yynext(Opt::baseopt.yynext); - set_yyaccept(Opt::baseopt.yyaccept); - set_yybm(Opt::baseopt.yybm); - set_yych(Opt::baseopt.yych); - set_yyctable(Opt::baseopt.yyctable); - set_yytarget(Opt::baseopt.yytarget); + // historically arranged set of names + // no actual reason why these particular options should be reset + set_cond_get(Opt::baseopt.cond_get); + set_cond_set(Opt::baseopt.cond_set); + set_fill(Opt::baseopt.fill); + set_state_get(Opt::baseopt.state_get); + set_state_set(Opt::baseopt.state_set); + set_yybackup(Opt::baseopt.yybackup); + set_yybackupctx(Opt::baseopt.yybackupctx); + set_yycondtype(Opt::baseopt.yycondtype); + set_yyctxmarker(Opt::baseopt.yyctxmarker); + set_yyctype(Opt::baseopt.yyctype); + set_yycursor(Opt::baseopt.yycursor); + set_yydebug(Opt::baseopt.yydebug); + set_yylessthan(Opt::baseopt.yylessthan); + set_yylimit(Opt::baseopt.yylimit); + set_yymarker(Opt::baseopt.yymarker); + set_yypeek(Opt::baseopt.yypeek); + set_yyrestore(Opt::baseopt.yyrestore); + set_yyrestorectx(Opt::baseopt.yyrestorectx); + set_yyrestoretag(Opt::baseopt.yyrestoretag); + set_yystagn(Opt::baseopt.yystagn); + set_yystagp(Opt::baseopt.yystagp); + set_yymtagn(Opt::baseopt.yymtagn); + set_yymtagp(Opt::baseopt.yymtagp); + set_yyskip(Opt::baseopt.yyskip); + set_yyfilllabel(Opt::baseopt.yyfilllabel); + set_yynext(Opt::baseopt.yynext); + set_yyaccept(Opt::baseopt.yyaccept); + set_yybm(Opt::baseopt.yybm); + set_yych(Opt::baseopt.yych); + set_yyctable(Opt::baseopt.yyctable); + set_yytarget(Opt::baseopt.yytarget); } } // namespace re2c diff --git a/re2c/src/conf/opt.h b/re2c/src/conf/opt.h index 8e7387fe..ebc51b8d 100644 --- a/re2c/src/conf/opt.h +++ b/re2c/src/conf/opt.h @@ -19,9 +19,9 @@ class Warn; enum target_t { - TARGET_CODE, - TARGET_DOT, - TARGET_SKELETON + TARGET_CODE, + TARGET_DOT, + TARGET_SKELETON }; /* note [constant and mutable options] @@ -41,261 +41,261 @@ enum target_t */ #define RE2C_CONSTOPTS \ - CONSTOPT1 (target_t, target, TARGET_CODE) \ - CONSTOPT (std::string, output_file, "") \ - CONSTOPT (std::string, header_file, "") \ - CONSTOPT (bool, bNoGenerationDate, false) \ - CONSTOPT (bool, version, true) \ - CONSTOPT (bool, cFlag, false) \ - CONSTOPT (bool, fFlag, false) \ - CONSTOPT (bool, rFlag, false) \ - CONSTOPT (bool, FFlag, false) \ - /* debug */ \ - CONSTOPT (bool, dump_nfa, false) \ - CONSTOPT (bool, dump_dfa_raw, false) \ - CONSTOPT (bool, dump_dfa_det, false) \ - CONSTOPT (bool, dump_dfa_tagopt, false) \ - CONSTOPT (bool, dump_dfa_min, false) \ - CONSTOPT (bool, dump_adfa, false) + CONSTOPT1 (target_t, target, TARGET_CODE) \ + CONSTOPT (std::string, output_file, "") \ + CONSTOPT (std::string, header_file, "") \ + CONSTOPT (bool, bNoGenerationDate, false) \ + CONSTOPT (bool, version, true) \ + CONSTOPT (bool, cFlag, false) \ + CONSTOPT (bool, fFlag, false) \ + CONSTOPT (bool, rFlag, false) \ + CONSTOPT (bool, FFlag, false) \ + /* debug */ \ + CONSTOPT (bool, dump_nfa, false) \ + CONSTOPT (bool, dump_dfa_raw, false) \ + CONSTOPT (bool, dump_dfa_det, false) \ + CONSTOPT (bool, dump_dfa_tagopt, false) \ + CONSTOPT (bool, dump_dfa_min, false) \ + CONSTOPT (bool, dump_adfa, false) #define RE2C_MUTOPTS \ - /* regular expressions */ \ - MUTOPT1 (Enc, encoding, Enc ()) \ - MUTOPT (bool, bCaseInsensitive, false) \ - MUTOPT (bool, bCaseInverted, false) \ - MUTOPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \ - /* conditions */ \ - MUTOPT (std::string, yycondtype, "YYCONDTYPE") \ - MUTOPT (std::string, cond_get, "YYGETCONDITION") \ - MUTOPT (bool, cond_get_naked, false) \ - MUTOPT (std::string, cond_set, "YYSETCONDITION" ) \ - MUTOPT (std::string, cond_set_arg, "@@" ) \ - MUTOPT (bool, cond_set_naked, false ) \ - MUTOPT (std::string, yyctable, "yyctable") \ - MUTOPT (std::string, condPrefix, "yyc_") \ - MUTOPT (std::string, condEnumPrefix, "yyc") \ - MUTOPT (std::string, condDivider, "/* *********************************** */") \ - MUTOPT (std::string, condDividerParam, "@@") \ - MUTOPT (std::string, condGoto, "goto @@;") \ - MUTOPT (std::string, condGotoParam, "@@") \ - /* states */ \ - MUTOPT (std::string, state_get, "YYGETSTATE") \ - MUTOPT (bool, state_get_naked, false) \ - MUTOPT (std::string, state_set, "YYSETSTATE") \ - MUTOPT (std::string, state_set_arg, "@@") \ - MUTOPT (bool, state_set_naked, false) \ - MUTOPT (std::string, yyfilllabel, "yyFillLabel") \ - MUTOPT (std::string, yynext, "yyNext") \ - MUTOPT (std::string, yyaccept, "yyaccept") \ - MUTOPT (bool, bUseStateAbort, false) \ - MUTOPT (bool, bUseStateNext, false) \ - /* tags */ \ - MUTOPT (bool, tags, false) \ - MUTOPT (std::string, tags_prefix, "yyt") \ - MUTOPT (std::string, tags_expression, "@@") \ - MUTOPT (bool, posix_captures, false) \ - MUTOPT (bool, optimize_tags, true) \ - /* code generation */ \ - MUTOPT (bool, sFlag, false) \ - MUTOPT (bool, bFlag, false) \ - MUTOPT (std::string, yybm, "yybm") \ - MUTOPT (bool, yybmHexTable, false) \ - MUTOPT (bool, gFlag, false) \ - MUTOPT (std::string, yytarget, "yytarget") \ - MUTOPT (uint32_t, cGotoThreshold, 9) \ - /* formatting */ \ - MUTOPT (uint32_t, topIndent, 0) \ - MUTOPT (std::string, indString, "\t") \ - /* input API */ \ - MUTOPT (input_api_t, input_api, INPUT_DEFAULT) \ - MUTOPT (std::string, yycursor, "YYCURSOR") \ - MUTOPT (std::string, yymarker, "YYMARKER") \ - MUTOPT (std::string, yyctxmarker, "YYCTXMARKER") \ - MUTOPT (std::string, yylimit, "YYLIMIT") \ - MUTOPT (std::string, yypeek, "YYPEEK") \ - MUTOPT (std::string, yyskip, "YYSKIP") \ - MUTOPT (std::string, yybackup, "YYBACKUP") \ - MUTOPT (std::string, yybackupctx, "YYBACKUPCTX") \ - MUTOPT (std::string, yyrestore, "YYRESTORE") \ - MUTOPT (std::string, yyrestorectx, "YYRESTORECTX") \ - MUTOPT (std::string, yyrestoretag, "YYRESTORETAG") \ - MUTOPT (std::string, yylessthan, "YYLESSTHAN") \ - MUTOPT (std::string, yystagn, "YYSTAGN") \ - MUTOPT (std::string, yystagp, "YYSTAGP") \ - MUTOPT (std::string, yymtagn, "YYMTAGN") \ - MUTOPT (std::string, yymtagp, "YYMTAGP") \ - /* #line directives */ \ - MUTOPT (bool, iFlag, false) \ - /* debug */ \ - MUTOPT (bool, dFlag, false) \ - MUTOPT (std::string, yydebug, "YYDEBUG") \ - /* yych */ \ - MUTOPT (std::string, yyctype, "YYCTYPE") \ - MUTOPT (std::string, yych, "yych") \ - MUTOPT (bool, bEmitYYCh, true) \ - MUTOPT (bool, yychConversion, false) \ - /* YYFILL */ \ - MUTOPT (std::string, fill, "YYFILL") \ - MUTOPT (bool, fill_use, true) \ - MUTOPT (bool, fill_check, true) \ - MUTOPT (std::string, fill_arg, "@@") \ - MUTOPT (bool, fill_arg_use, true) \ - MUTOPT (bool, fill_naked, false) \ - /* labels */ \ - MUTOPT (std::string, labelPrefix, "yy") \ - MUTOPT (std::string, startlabel, "") \ - MUTOPT (bool, startlabel_force, false) \ - /* internals */ \ - MUTOPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) \ - MUTOPT (bool, lookahead, true) \ - MUTOPT (bool, eager_skip, false) + /* regular expressions */ \ + MUTOPT1 (Enc, encoding, Enc ()) \ + MUTOPT (bool, bCaseInsensitive, false) \ + MUTOPT (bool, bCaseInverted, false) \ + MUTOPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \ + /* conditions */ \ + MUTOPT (std::string, yycondtype, "YYCONDTYPE") \ + MUTOPT (std::string, cond_get, "YYGETCONDITION") \ + MUTOPT (bool, cond_get_naked, false) \ + MUTOPT (std::string, cond_set, "YYSETCONDITION" ) \ + MUTOPT (std::string, cond_set_arg, "@@" ) \ + MUTOPT (bool, cond_set_naked, false ) \ + MUTOPT (std::string, yyctable, "yyctable") \ + MUTOPT (std::string, condPrefix, "yyc_") \ + MUTOPT (std::string, condEnumPrefix, "yyc") \ + MUTOPT (std::string, condDivider, "/* *********************************** */") \ + MUTOPT (std::string, condDividerParam, "@@") \ + MUTOPT (std::string, condGoto, "goto @@;") \ + MUTOPT (std::string, condGotoParam, "@@") \ + /* states */ \ + MUTOPT (std::string, state_get, "YYGETSTATE") \ + MUTOPT (bool, state_get_naked, false) \ + MUTOPT (std::string, state_set, "YYSETSTATE") \ + MUTOPT (std::string, state_set_arg, "@@") \ + MUTOPT (bool, state_set_naked, false) \ + MUTOPT (std::string, yyfilllabel, "yyFillLabel") \ + MUTOPT (std::string, yynext, "yyNext") \ + MUTOPT (std::string, yyaccept, "yyaccept") \ + MUTOPT (bool, bUseStateAbort, false) \ + MUTOPT (bool, bUseStateNext, false) \ + /* tags */ \ + MUTOPT (bool, tags, false) \ + MUTOPT (std::string, tags_prefix, "yyt") \ + MUTOPT (std::string, tags_expression, "@@") \ + MUTOPT (bool, posix_captures, false) \ + MUTOPT (bool, optimize_tags, true) \ + /* code generation */ \ + MUTOPT (bool, sFlag, false) \ + MUTOPT (bool, bFlag, false) \ + MUTOPT (std::string, yybm, "yybm") \ + MUTOPT (bool, yybmHexTable, false) \ + MUTOPT (bool, gFlag, false) \ + MUTOPT (std::string, yytarget, "yytarget") \ + MUTOPT (uint32_t, cGotoThreshold, 9) \ + /* formatting */ \ + MUTOPT (uint32_t, topIndent, 0) \ + MUTOPT (std::string, indString, "\t") \ + /* input API */ \ + MUTOPT (input_api_t, input_api, INPUT_DEFAULT) \ + MUTOPT (std::string, yycursor, "YYCURSOR") \ + MUTOPT (std::string, yymarker, "YYMARKER") \ + MUTOPT (std::string, yyctxmarker, "YYCTXMARKER") \ + MUTOPT (std::string, yylimit, "YYLIMIT") \ + MUTOPT (std::string, yypeek, "YYPEEK") \ + MUTOPT (std::string, yyskip, "YYSKIP") \ + MUTOPT (std::string, yybackup, "YYBACKUP") \ + MUTOPT (std::string, yybackupctx, "YYBACKUPCTX") \ + MUTOPT (std::string, yyrestore, "YYRESTORE") \ + MUTOPT (std::string, yyrestorectx, "YYRESTORECTX") \ + MUTOPT (std::string, yyrestoretag, "YYRESTORETAG") \ + MUTOPT (std::string, yylessthan, "YYLESSTHAN") \ + MUTOPT (std::string, yystagn, "YYSTAGN") \ + MUTOPT (std::string, yystagp, "YYSTAGP") \ + MUTOPT (std::string, yymtagn, "YYMTAGN") \ + MUTOPT (std::string, yymtagp, "YYMTAGP") \ + /* #line directives */ \ + MUTOPT (bool, iFlag, false) \ + /* debug */ \ + MUTOPT (bool, dFlag, false) \ + MUTOPT (std::string, yydebug, "YYDEBUG") \ + /* yych */ \ + MUTOPT (std::string, yyctype, "YYCTYPE") \ + MUTOPT (std::string, yych, "yych") \ + MUTOPT (bool, bEmitYYCh, true) \ + MUTOPT (bool, yychConversion, false) \ + /* YYFILL */ \ + MUTOPT (std::string, fill, "YYFILL") \ + MUTOPT (bool, fill_use, true) \ + MUTOPT (bool, fill_check, true) \ + MUTOPT (std::string, fill_arg, "@@") \ + MUTOPT (bool, fill_arg_use, true) \ + MUTOPT (bool, fill_naked, false) \ + /* labels */ \ + MUTOPT (std::string, labelPrefix, "yy") \ + MUTOPT (std::string, startlabel, "") \ + MUTOPT (bool, startlabel_force, false) \ + /* internals */ \ + MUTOPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) \ + MUTOPT (bool, lookahead, true) \ + MUTOPT (bool, eager_skip, false) struct conopt_t { -# define CONSTOPT1 CONSTOPT -# define CONSTOPT(type, name, value) type name; - RE2C_CONSTOPTS -# undef CONSTOPT1 -# undef CONSTOPT +# define CONSTOPT1 CONSTOPT +# define CONSTOPT(type, name, value) type name; + RE2C_CONSTOPTS +# undef CONSTOPT1 +# undef CONSTOPT - conopt_t() -# define CONSTOPT1(type, name, value) : name(value) -# define CONSTOPT(type, name, value) , name(value) - RE2C_CONSTOPTS -# undef CONSTOPT1 -# undef CONSTOPT - {} - void fix(); - FORBID_COPY(conopt_t); + conopt_t() +# define CONSTOPT1(type, name, value) : name(value) +# define CONSTOPT(type, name, value) , name(value) + RE2C_CONSTOPTS +# undef CONSTOPT1 +# undef CONSTOPT + {} + void fix(); + FORBID_COPY(conopt_t); }; struct mutopt_t { -# define MUTOPT1 MUTOPT -# define MUTOPT(type, name, value) type name; - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT +# define MUTOPT1 MUTOPT +# define MUTOPT(type, name, value) type name; + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT - mutopt_t() -# define MUTOPT1(type, name, value) : name(value) -# define MUTOPT(type, name, value) , name(value) - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT - {} - void fix(const conopt_t *globopts); - FORBID_COPY(mutopt_t); + mutopt_t() +# define MUTOPT1(type, name, value) : name(value) +# define MUTOPT(type, name, value) , name(value) + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT + {} + void fix(const conopt_t *globopts); + FORBID_COPY(mutopt_t); }; struct opt_t { -# define CONSTOPT1 CONSTOPT -# define CONSTOPT(type, name, value) type name; - RE2C_CONSTOPTS -# undef CONSTOPT1 -# undef CONSTOPT +# define CONSTOPT1 CONSTOPT +# define CONSTOPT(type, name, value) type name; + RE2C_CONSTOPTS +# undef CONSTOPT1 +# undef CONSTOPT -# define MUTOPT1 MUTOPT -# define MUTOPT(type, name, value) type name; - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT +# define MUTOPT1 MUTOPT +# define MUTOPT(type, name, value) type name; + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT - opt_t(const conopt_t &con, const mutopt_t &mut) -# define CONSTOPT1(type, name, value) : name(con.name) -# define CONSTOPT(type, name, value) , name(con.name) - RE2C_CONSTOPTS -# undef CONSTOPT1 -# undef CONSTOPT -# define MUTOPT1 MUTOPT -# define MUTOPT(type, name, value) , name(mut.name) - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT - {} + opt_t(const conopt_t &con, const mutopt_t &mut) +# define CONSTOPT1(type, name, value) : name(con.name) +# define CONSTOPT(type, name, value) , name(con.name) + RE2C_CONSTOPTS +# undef CONSTOPT1 +# undef CONSTOPT +# define MUTOPT1 MUTOPT +# define MUTOPT(type, name, value) , name(mut.name) + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT + {} }; // see note [constant and mutable options] struct Opt { - static const mutopt_t baseopt; + static const mutopt_t baseopt; - const char *source_file; - const conopt_t &glob; + const char *source_file; + const conopt_t &glob; private: - mutopt_t user; - mutopt_t real; - bool diverge; + mutopt_t user; + mutopt_t real; + bool diverge; - void sync() - { - if (!diverge) return; -# define MUTOPT1 MUTOPT -# define MUTOPT(type, name, value) real.name = user.name; - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT - real.fix(&glob); - diverge = false; - } + void sync() + { + if (!diverge) return; +# define MUTOPT1 MUTOPT +# define MUTOPT(type, name, value) real.name = user.name; + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT + real.fix(&glob); + diverge = false; + } public: - explicit Opt(const conopt_t &globopts) - : source_file(NULL) - , glob(globopts) - , user() - , real() - , diverge(true) - {} + explicit Opt(const conopt_t &globopts) + : source_file(NULL) + , glob(globopts) + , user() + , real() + , diverge(true) + {} - const opt_t *snapshot() - { - sync(); - return new opt_t(glob, real); - } + const opt_t *snapshot() + { + sync(); + return new opt_t(glob, real); + } - void restore(const opt_t *opts) - { -# define MUTOPT1 MUTOPT -# define MUTOPT(type, name, value) user.name = opts->name; - RE2C_MUTOPTS -# undef MUTOPT1 -# undef MUTOPT - diverge = true; - sync(); - } + void restore(const opt_t *opts) + { +# define MUTOPT1 MUTOPT +# define MUTOPT(type, name, value) user.name = opts->name; + RE2C_MUTOPTS +# undef MUTOPT1 +# undef MUTOPT + diverge = true; + sync(); + } - bool source (const char *s); + bool source (const char *s); - // RE2C allows to set configurations anywhere inside of a block - // (in the beginning, intermixed with rules, in the end): they will - // affect the whole block anyway. Thus one is not allowed to read - // configurations until the whole block has been parsed. Immutable - // options, on the contrary, are accessible for reading all the time - // (the parser itself depends on them). - void set_encoding(Enc::type_t t) { user.encoding.set(t); } - void unset_encoding(Enc::type_t t) { user.encoding.unset(t); } - void set_encoding_policy(Enc::policy_t p) { user.encoding.setPolicy(p); } + // RE2C allows to set configurations anywhere inside of a block + // (in the beginning, intermixed with rules, in the end): they will + // affect the whole block anyway. Thus one is not allowed to read + // configurations until the whole block has been parsed. Immutable + // options, on the contrary, are accessible for reading all the time + // (the parser itself depends on them). + void set_encoding(Enc::type_t t) { user.encoding.set(t); } + void unset_encoding(Enc::type_t t) { user.encoding.unset(t); } + void set_encoding_policy(Enc::policy_t p) { user.encoding.setPolicy(p); } #define MUTOPT1 MUTOPT #define MUTOPT(type, name, value) void set_##name (const type &arg) { user.name = arg; diverge = true; } - RE2C_MUTOPTS + RE2C_MUTOPTS #undef MUTOPT1 #undef MUTOPT - // bad temporary hacks, should be fixed by proper scoping of config (parts). - void reset_startlabel(); - void reset_mapCodeName (); + // bad temporary hacks, should be fixed by proper scoping of config (parts). + void reset_startlabel(); + void reset_mapCodeName (); - FORBID_COPY (Opt); + FORBID_COPY (Opt); }; enum parse_opts_t { - OK, - EXIT_OK, - EXIT_FAIL + OK, + EXIT_OK, + EXIT_FAIL }; parse_opts_t parse_opts(char **argv, conopt_t &globopts, Opt &opts, Warn &warn); diff --git a/re2c/src/conf/warn.cc b/re2c/src/conf/warn.cc index 2bf406b1..56f60a1f 100644 --- a/re2c/src/conf/warn.cc +++ b/re2c/src/conf/warn.cc @@ -18,204 +18,204 @@ const uint32_t Warn::ERROR = 1u << 1; const char * Warn::names [TYPES] = { #define W(x, y) y - RE2C_WARNING_TYPES + RE2C_WARNING_TYPES #undef W }; Warn::Warn () - : mask () - , error_accuml (false) + : mask () + , error_accuml (false) { - for (uint32_t i = 0; i < TYPES; ++i) - { - mask[i] = SILENT; - } + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] = SILENT; + } } bool Warn::error () const { - return error_accuml; + return error_accuml; } void Warn::set (type_t t, option_t o) { - switch (o) - { - case W: - mask[t] |= WARNING; - break; - case WNO: - mask[t] &= ~WARNING; - break; - case WERROR: - // unlike -Werror, -Werror- implies -W - mask[t] |= (WARNING | ERROR); - break; - case WNOERROR: - mask[t] &= ~ERROR; - break; - } + switch (o) + { + case W: + mask[t] |= WARNING; + break; + case WNO: + mask[t] &= ~WARNING; + break; + case WERROR: + // unlike -Werror, -Werror- implies -W + mask[t] |= (WARNING | ERROR); + break; + case WNOERROR: + mask[t] &= ~ERROR; + break; + } } void Warn::set_all () { - for (uint32_t i = 0; i < TYPES; ++i) - { - mask[i] |= WARNING; - } + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= WARNING; + } } // -Werror doesn't set any warnings: it only guarantees that if a warning // has been set by now or will be set later then it will result into error. void Warn::set_all_error () { - for (uint32_t i = 0; i < TYPES; ++i) - { - mask[i] |= ERROR; - } + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= ERROR; + } } void Warn::fail (type_t t, uint32_t line, const char * s) const { - if (mask[t] & WARNING) - { - // -Werror has no effect - warning (names[t], line, false, "%s", s); - } + if (mask[t] & WARNING) + { + // -Werror has no effect + warning (names[t], line, false, "%s", s); + } } void Warn::condition_order (uint32_t line) { - if (mask[CONDITION_ORDER] & WARNING) - { - const bool e = mask[CONDITION_ORDER] & ERROR; - error_accuml |= e; - warning (names[CONDITION_ORDER], line, e, - "looks like you use hardcoded numbers instead of autogenerated condition names: " - "better add '/*!types:re2c*/' directive or '-t, --type-header' option " - "and don't rely on fixed condition order."); - } + if (mask[CONDITION_ORDER] & WARNING) + { + const bool e = mask[CONDITION_ORDER] & ERROR; + error_accuml |= e; + warning (names[CONDITION_ORDER], line, e, + "looks like you use hardcoded numbers instead of autogenerated condition names: " + "better add '/*!types:re2c*/' directive or '-t, --type-header' option " + "and don't rely on fixed condition order."); + } } void Warn::empty_class (uint32_t line) { - if (mask[EMPTY_CHARACTER_CLASS] & WARNING) - { - const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR; - error_accuml |= e; - warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class"); - } + if (mask[EMPTY_CHARACTER_CLASS] & WARNING) + { + const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR; + error_accuml |= e; + warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class"); + } } void Warn::match_empty_string (uint32_t line, const std::string &cond) { - if (mask[MATCH_EMPTY_STRING] & WARNING) - { - const bool e = mask[MATCH_EMPTY_STRING] & ERROR; - error_accuml |= e; - warning (names[MATCH_EMPTY_STRING], line, e, - "rule %smatches empty string", incond(cond).c_str()); - } + if (mask[MATCH_EMPTY_STRING] & WARNING) + { + const bool e = mask[MATCH_EMPTY_STRING] & ERROR; + error_accuml |= e; + warning (names[MATCH_EMPTY_STRING], line, e, + "rule %smatches empty string", incond(cond).c_str()); + } } void Warn::nondeterministic_tags(uint32_t line, const std::string &cond, - const std::string *tagname, size_t nver) + const std::string *tagname, size_t nver) { - if (mask[NONDETERMINISTIC_TAGS] & WARNING) { - bool e = mask[NONDETERMINISTIC_TAGS] & ERROR; - error_accuml |= e; - - warning_start(line, e); - if (tagname == NULL) { - fprintf(stderr, "trailing context"); - } else { - fprintf(stderr, "tag '%s'", tagname->c_str()); - } - fprintf(stderr, - " %shas %u%s degree of nondeterminism", - incond(cond).c_str(), static_cast(nver), - nver == 2 ? "nd" : nver == 3 ? "rd" : "th"); - warning_end(names[NONDETERMINISTIC_TAGS], e); - } + if (mask[NONDETERMINISTIC_TAGS] & WARNING) { + bool e = mask[NONDETERMINISTIC_TAGS] & ERROR; + error_accuml |= e; + + warning_start(line, e); + if (tagname == NULL) { + fprintf(stderr, "trailing context"); + } else { + fprintf(stderr, "tag '%s'", tagname->c_str()); + } + fprintf(stderr, + " %shas %u%s degree of nondeterminism", + incond(cond).c_str(), static_cast(nver), + nver == 2 ? "nd" : nver == 3 ? "rd" : "th"); + warning_end(names[NONDETERMINISTIC_TAGS], e); + } } void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u) { - if (mask[SWAPPED_RANGE] & WARNING) - { - const bool e = mask[SWAPPED_RANGE] & ERROR; - error_accuml |= e; - warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u); - } + if (mask[SWAPPED_RANGE] & WARNING) + { + const bool e = mask[SWAPPED_RANGE] & ERROR; + error_accuml |= e; + warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u); + } } void Warn::undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow) { - if (mask[UNDEFINED_CONTROL_FLOW] & WARNING) - { - const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR; - error_accuml |= e; - - // report shorter patterns first - std::sort (paths.begin (), paths.end ()); - - warning_start (skel.line, e); - fprintf (stderr, "control flow %sis undefined for strings that match ", incond (skel.cond).c_str ()); - const size_t count = paths.size (); - if (count == 1) - { - fprint_default_path (stderr, skel, paths[0]); - } - else - { - for (size_t i = 0; i < count; ++i) - { - fprintf (stderr, "\n\t"); - fprint_default_path (stderr, skel, paths[i]); - } - fprintf (stderr, "\n"); - } - if (overflow) - { - fprintf (stderr, " ... and a few more"); - } - fprintf (stderr, ", use default rule '*'"); - warning_end (names[UNDEFINED_CONTROL_FLOW], e); - } + if (mask[UNDEFINED_CONTROL_FLOW] & WARNING) + { + const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR; + error_accuml |= e; + + // report shorter patterns first + std::sort (paths.begin (), paths.end ()); + + warning_start (skel.line, e); + fprintf (stderr, "control flow %sis undefined for strings that match ", incond (skel.cond).c_str ()); + const size_t count = paths.size (); + if (count == 1) + { + fprint_default_path (stderr, skel, paths[0]); + } + else + { + for (size_t i = 0; i < count; ++i) + { + fprintf (stderr, "\n\t"); + fprint_default_path (stderr, skel, paths[i]); + } + fprintf (stderr, "\n"); + } + if (overflow) + { + fprintf (stderr, " ... and a few more"); + } + fprintf (stderr, ", use default rule '*'"); + warning_end (names[UNDEFINED_CONTROL_FLOW], e); + } } void Warn::unreachable_rule(const std::string &cond, const Rule &rule) { - if (mask[UNREACHABLE_RULES] & WARNING) { - const bool e = mask[UNREACHABLE_RULES] & ERROR; - error_accuml |= e; - - warning_start(rule.code->fline, e); - fprintf(stderr, "unreachable rule %s", incond(cond).c_str()); - const size_t shadows = rule.shadow.size(); - if (shadows > 0) { - const char * pl = shadows > 1 - ? "s" - : ""; - std::set::const_iterator i = rule.shadow.begin(); - fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, *i); - for (++i; i != rule.shadow.end(); ++i) { - fprintf(stderr, ", %u", *i); - } - fprintf(stderr, ")"); - } - warning_end(names[UNREACHABLE_RULES], e); - } + if (mask[UNREACHABLE_RULES] & WARNING) { + const bool e = mask[UNREACHABLE_RULES] & ERROR; + error_accuml |= e; + + warning_start(rule.code->fline, e); + fprintf(stderr, "unreachable rule %s", incond(cond).c_str()); + const size_t shadows = rule.shadow.size(); + if (shadows > 0) { + const char * pl = shadows > 1 + ? "s" + : ""; + std::set::const_iterator i = rule.shadow.begin(); + fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, *i); + for (++i; i != rule.shadow.end(); ++i) { + fprintf(stderr, ", %u", *i); + } + fprintf(stderr, ")"); + } + warning_end(names[UNREACHABLE_RULES], e); + } } void Warn::useless_escape (uint32_t line, uint32_t col, char c) { - if (mask[USELESS_ESCAPE] & WARNING) - { - const bool e = mask[USELESS_ESCAPE] & ERROR; - error_accuml |= e; - warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c); - } + if (mask[USELESS_ESCAPE] & WARNING) + { + const bool e = mask[USELESS_ESCAPE] & ERROR; + error_accuml |= e; + warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c); + } } } // namespace re2c diff --git a/re2c/src/conf/warn.h b/re2c/src/conf/warn.h index c89cfbb2..0b14a24b 100644 --- a/re2c/src/conf/warn.h +++ b/re2c/src/conf/warn.h @@ -15,57 +15,57 @@ struct Rule; struct Skeleton; #define RE2C_WARNING_TYPES \ - W (CONDITION_ORDER, "condition-order"), \ - W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \ - W (MATCH_EMPTY_STRING, "match-empty-string"), \ - W (NONDETERMINISTIC_TAGS, "nondeterministic-tags"), \ - W (SWAPPED_RANGE, "swapped-range"), \ - W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \ - W (UNREACHABLE_RULES, "unreachable-rules"), \ - W (USELESS_ESCAPE, "useless-escape"), + W (CONDITION_ORDER, "condition-order"), \ + W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \ + W (MATCH_EMPTY_STRING, "match-empty-string"), \ + W (NONDETERMINISTIC_TAGS, "nondeterministic-tags"), \ + W (SWAPPED_RANGE, "swapped-range"), \ + W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \ + W (UNREACHABLE_RULES, "unreachable-rules"), \ + W (USELESS_ESCAPE, "useless-escape"), class Warn { public: - enum type_t - { + enum type_t + { #define W(x, y) x - RE2C_WARNING_TYPES + RE2C_WARNING_TYPES #undef W - TYPES // count - }; - enum option_t - { - W, - WNO, - WERROR, - WNOERROR - }; + TYPES // count + }; + enum option_t + { + W, + WNO, + WERROR, + WNOERROR + }; private: - static const uint32_t SILENT; - static const uint32_t WARNING; - static const uint32_t ERROR; - static const char * names [TYPES]; - uint32_t mask[TYPES]; - bool error_accuml; + static const uint32_t SILENT; + static const uint32_t WARNING; + static const uint32_t ERROR; + static const char * names [TYPES]; + uint32_t mask[TYPES]; + bool error_accuml; public: - Warn (); - bool error () const; - void set (type_t t, option_t o); - void set_all (); - void set_all_error (); - void fail (type_t t, uint32_t line, const char * s) const; + Warn (); + bool error () const; + void set (type_t t, option_t o); + void set_all (); + void set_all_error (); + void fail (type_t t, uint32_t line, const char * s) const; - void condition_order (uint32_t line); - void empty_class (uint32_t line); - void match_empty_string (uint32_t line, const std::string &cond); - void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname, size_t nver); - void swapped_range (uint32_t line, uint32_t l, uint32_t u); - void undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow); - void unreachable_rule (const std::string & cond, const Rule &rule); - void useless_escape (uint32_t line, uint32_t col, char c); + void condition_order (uint32_t line); + void empty_class (uint32_t line); + void match_empty_string (uint32_t line, const std::string &cond); + void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname, size_t nver); + void swapped_range (uint32_t line, uint32_t l, uint32_t u); + void undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow); + void unreachable_rule (const std::string & cond, const Rule &rule); + void useless_escape (uint32_t line, uint32_t col, char c); }; } // namespace re2c diff --git a/re2c/src/dfa/cfg/cfg.cc b/re2c/src/dfa/cfg/cfg.cc index 00acf968..07ea09f8 100644 --- a/re2c/src/dfa/cfg/cfg.cc +++ b/re2c/src/dfa/cfg/cfg.cc @@ -20,185 +20,185 @@ static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x); cfg_t::cfg_t(dfa_t &a) - : dfa(a) - , bblocks(NULL) - , nbbarc(0) - , nbbfin(0) - , nbbfall(0) + : dfa(a) + , bblocks(NULL) + , nbbarc(0) + , nbbfin(0) + , nbbfall(0) { - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - cfg_ix_t *arc2bb = new cfg_ix_t[nstate * (nsym + 2)]; + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + cfg_ix_t *arc2bb = new cfg_ix_t[nstate * (nsym + 2)]; - map_arcs_to_bblocks(dfa, arc2bb, nbbarc, nbbfin, nbbfall); - bblocks = create_bblocks(dfa, arc2bb, nbbfin, nbbfall); + map_arcs_to_bblocks(dfa, arc2bb, nbbarc, nbbfin, nbbfall); + bblocks = create_bblocks(dfa, arc2bb, nbbfin, nbbfall); - delete[] arc2bb; + delete[] arc2bb; } void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb, - cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall) + cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall) { - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - - // root bblock for initial tagged epsilon-transition - cfg_ix_t nbb = 1; - - // bblocks for tagged transitions - for (size_t i = 0; i < nstate; ++i) { - tcmd_t **c = dfa.states[i]->tcmd, **f = c + nsym; - for (; c < f; ++c) { - *arc2bb++ = *c == NULL ? 0 : nbb++; - } - } - nbbarc = nbb; - - // bblock for final tagged epsilon-transition - for (size_t i = 0; i < nstate; ++i) { - tcmd_t *f = dfa.states[i]->tcmd[nsym]; - *arc2bb++ = f == NULL ? 0 : nbb++; - } - nbbfin = nbb; - - // bblock for fallback tagged epsilon-transition - for (size_t i = 0; i < nstate; ++i) { - const dfa_state_t *s = dfa.states[i]; - // (check final tags: fallback tags may be empty) - *arc2bb++ = s->fallback && s->tcmd[nsym] ? nbb++ : 0; - } - nbbfall = nbb; + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + + // root bblock for initial tagged epsilon-transition + cfg_ix_t nbb = 1; + + // bblocks for tagged transitions + for (size_t i = 0; i < nstate; ++i) { + tcmd_t **c = dfa.states[i]->tcmd, **f = c + nsym; + for (; c < f; ++c) { + *arc2bb++ = *c == NULL ? 0 : nbb++; + } + } + nbbarc = nbb; + + // bblock for final tagged epsilon-transition + for (size_t i = 0; i < nstate; ++i) { + tcmd_t *f = dfa.states[i]->tcmd[nsym]; + *arc2bb++ = f == NULL ? 0 : nbb++; + } + nbbfin = nbb; + + // bblock for fallback tagged epsilon-transition + for (size_t i = 0; i < nstate; ++i) { + const dfa_state_t *s = dfa.states[i]; + // (check final tags: fallback tags may be empty) + *arc2bb++ = s->fallback && s->tcmd[nsym] ? nbb++ : 0; + } + nbbfall = nbb; } cfg_bb_t *create_bblocks(dfa_t &dfa, const cfg_ix_t *arc2bb, - cfg_ix_t nbbfin, cfg_ix_t nbbfall) + cfg_ix_t nbbfin, cfg_ix_t nbbfall) { - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - const cfg_ix_t *a2b = arc2bb; - cfg_ix_t *succb = new cfg_ix_t[nbbfin], *succe; - bool *been = new bool[nstate]; - - cfg_bb_t *bblocks = allocate(nbbfall), *b = bblocks; - - // root bblock - std::fill(been, been + nstate, false); - successors(dfa, arc2bb, been, succe = succb, 0); - new(b++) cfg_bb_t(succb, succe, dfa.tcmd0, NULL); - - // transition bblocks - for (size_t i = 0; i < nstate; ++i) { - const dfa_state_t *s = dfa.states[i]; - for (size_t c = 0; c < nsym; ++c) { - if (*a2b++ != 0) { - std::fill(been, been + nstate, false); - successors(dfa, arc2bb, been, succe = succb, s->arcs[c]); - new(b++) cfg_bb_t(succb, succe, s->tcmd[c], NULL); - } - } - } - - // final bblocks - for (size_t i = 0; i < nstate; ++i) { - if (*a2b++ != 0) { - const dfa_state_t *s = dfa.states[i]; - new(b++) cfg_bb_t(NULL, NULL, s->tcmd[nsym], &dfa.rules[s->rule]); - } - } - - // fallback bblocks - for (size_t i = 0; i < nstate; ++i) { - if (*a2b++ != 0) { - const dfa_state_t *s = dfa.states[i]; - std::fill(been, been + nstate, false); - fallback(dfa, arc2bb, been, succe = succb, i); - new(b++) cfg_bb_t(succb, succe, s->tcmd[nsym + 1], &dfa.rules[s->rule]); - } - } - - delete[] succb; - delete[] been; - return bblocks; + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + const cfg_ix_t *a2b = arc2bb; + cfg_ix_t *succb = new cfg_ix_t[nbbfin], *succe; + bool *been = new bool[nstate]; + + cfg_bb_t *bblocks = allocate(nbbfall), *b = bblocks; + + // root bblock + std::fill(been, been + nstate, false); + successors(dfa, arc2bb, been, succe = succb, 0); + new(b++) cfg_bb_t(succb, succe, dfa.tcmd0, NULL); + + // transition bblocks + for (size_t i = 0; i < nstate; ++i) { + const dfa_state_t *s = dfa.states[i]; + for (size_t c = 0; c < nsym; ++c) { + if (*a2b++ != 0) { + std::fill(been, been + nstate, false); + successors(dfa, arc2bb, been, succe = succb, s->arcs[c]); + new(b++) cfg_bb_t(succb, succe, s->tcmd[c], NULL); + } + } + } + + // final bblocks + for (size_t i = 0; i < nstate; ++i) { + if (*a2b++ != 0) { + const dfa_state_t *s = dfa.states[i]; + new(b++) cfg_bb_t(NULL, NULL, s->tcmd[nsym], &dfa.rules[s->rule]); + } + } + + // fallback bblocks + for (size_t i = 0; i < nstate; ++i) { + if (*a2b++ != 0) { + const dfa_state_t *s = dfa.states[i]; + std::fill(been, been + nstate, false); + fallback(dfa, arc2bb, been, succe = succb, i); + new(b++) cfg_bb_t(succb, succe, s->tcmd[nsym + 1], &dfa.rules[s->rule]); + } + } + + delete[] succb; + delete[] been; + return bblocks; } cfg_bb_t::cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se, - tcmd_t *&c, const Rule *r) - : succb(NULL) - , succe(NULL) - , cmd(c) - , rule(r) + tcmd_t *&c, const Rule *r) + : succb(NULL) + , succe(NULL) + , cmd(c) + , rule(r) { - const size_t n = static_cast(se - sb); - succb = new cfg_ix_t[n]; - if (n > 0) memcpy(succb, sb, n * sizeof(cfg_ix_t)); - succe = succb + n; + const size_t n = static_cast(se - sb); + succb = new cfg_ix_t[n]; + if (n > 0) memcpy(succb, sb, n * sizeof(cfg_ix_t)); + succe = succb + n; } // find immediate successors of the given bblock void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, - cfg_ix_t *&succ, size_t x) + cfg_ix_t *&succ, size_t x) { - if (x == dfa_t::NIL || been[x]) return; - been[x] = true; - - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars, - *a = dfa.states[x]->arcs; - const cfg_ix_t *a2b = &arc2bb[x * nsym]; - - for (size_t c = 0; c < nsym; ++c) { - const cfg_ix_t b = a2b[c]; - if (b != 0) { - *succ++ = b; - } else { - successors(dfa, arc2bb, been, succ, a[c]); - } - } - - const cfg_ix_t f = arc2bb[nstate * nsym + x]; - if (f != 0) { - *succ++ = f; - } + if (x == dfa_t::NIL || been[x]) return; + been[x] = true; + + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars, + *a = dfa.states[x]->arcs; + const cfg_ix_t *a2b = &arc2bb[x * nsym]; + + for (size_t c = 0; c < nsym; ++c) { + const cfg_ix_t b = a2b[c]; + if (b != 0) { + *succ++ = b; + } else { + successors(dfa, arc2bb, been, succ, a[c]); + } + } + + const cfg_ix_t f = arc2bb[nstate * nsym + x]; + if (f != 0) { + *succ++ = f; + } } // find all bblocks reachable from this one by following // non-accepting DFA paths: this is the set of bblocks affected // by liveness of fallback tags void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, - cfg_ix_t *&succ, size_t x) + cfg_ix_t *&succ, size_t x) { - if (x == dfa_t::NIL || been[x]) return; - been[x] = true; - - const size_t - nsym = dfa.nchars, - *a = dfa.states[x]->arcs; - const cfg_ix_t *a2b = &arc2bb[x * nsym]; - - for (size_t c = 0; c < nsym; ++c) { - const size_t y = a[c]; - if (y != dfa_t::NIL && dfa.states[y]->fallthru) { - const cfg_ix_t b = a2b[c]; - if (b != 0) { - *succ++ = b; - } - fallback(dfa, arc2bb, been, succ, y); - } - } + if (x == dfa_t::NIL || been[x]) return; + been[x] = true; + + const size_t + nsym = dfa.nchars, + *a = dfa.states[x]->arcs; + const cfg_ix_t *a2b = &arc2bb[x * nsym]; + + for (size_t c = 0; c < nsym; ++c) { + const size_t y = a[c]; + if (y != dfa_t::NIL && dfa.states[y]->fallthru) { + const cfg_ix_t b = a2b[c]; + if (b != 0) { + *succ++ = b; + } + fallback(dfa, arc2bb, been, succ, y); + } + } } cfg_t::~cfg_t() { - cfg_bb_t *b = bblocks, *e = b + nbbfall; - for (; b < e; ++b) { - delete[] b->succb; - } + cfg_bb_t *b = bblocks, *e = b + nbbfall; + for (; b < e; ++b) { + delete[] b->succb; + } - operator delete(bblocks); + operator delete(bblocks); } } // namespace re2c diff --git a/re2c/src/dfa/cfg/cfg.h b/re2c/src/dfa/cfg/cfg.h index ccb54fd9..c95825c6 100644 --- a/re2c/src/dfa/cfg/cfg.h +++ b/re2c/src/dfa/cfg/cfg.h @@ -18,35 +18,35 @@ typedef uint32_t cfg_ix_t; // basic block struct cfg_bb_t { - cfg_ix_t *succb; - cfg_ix_t *succe; - tcmd_t *&cmd; - const Rule *rule; + cfg_ix_t *succb; + cfg_ix_t *succe; + tcmd_t *&cmd; + const Rule *rule; - cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se, tcmd_t *&c, const Rule *r); - FORBID_COPY(cfg_bb_t); + cfg_bb_t(const cfg_ix_t *sb, const cfg_ix_t *se, tcmd_t *&c, const Rule *r); + FORBID_COPY(cfg_bb_t); }; // control flow graph struct cfg_t { - dfa_t &dfa; - cfg_bb_t *bblocks; - cfg_ix_t nbbarc; - cfg_ix_t nbbfin; - cfg_ix_t nbbfall; - - explicit cfg_t(dfa_t &a); - ~cfg_t(); - static tagver_t compact(const cfg_t &cfg, tagver_t *ver2new); - static void liveness_analysis(const cfg_t &cfg, bool *live); - static void live_through_bblock(const tcmd_t *cmd, bool *live); - static void dead_code_elimination(cfg_t &cfg, const bool *live); - static void interference(const cfg_t &cfg, const bool *live, bool *interf); - static tagver_t variable_allocation(const cfg_t &cfg, const bool *interf, tagver_t *ver2new); - static void renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver); - static void normalization(cfg_t &cfg); - FORBID_COPY(cfg_t); + dfa_t &dfa; + cfg_bb_t *bblocks; + cfg_ix_t nbbarc; + cfg_ix_t nbbfin; + cfg_ix_t nbbfall; + + explicit cfg_t(dfa_t &a); + ~cfg_t(); + static tagver_t compact(const cfg_t &cfg, tagver_t *ver2new); + static void liveness_analysis(const cfg_t &cfg, bool *live); + static void live_through_bblock(const tcmd_t *cmd, bool *live); + static void dead_code_elimination(cfg_t &cfg, const bool *live); + static void interference(const cfg_t &cfg, const bool *live, bool *interf); + static tagver_t variable_allocation(const cfg_t &cfg, const bool *interf, tagver_t *ver2new); + static void renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver); + static void normalization(cfg_t &cfg); + FORBID_COPY(cfg_t); }; void dump_cfg(const cfg_t &cfg, const bool *live); diff --git a/re2c/src/dfa/cfg/compact.cc b/re2c/src/dfa/cfg/compact.cc index e67e8e5e..3a244d5a 100644 --- a/re2c/src/dfa/cfg/compact.cc +++ b/re2c/src/dfa/cfg/compact.cc @@ -12,36 +12,36 @@ namespace re2c tagver_t cfg_t::compact(const cfg_t &cfg, tagver_t *ver2new) { - const std::vector &tags = cfg.dfa.tags; - const size_t - nver = static_cast(cfg.dfa.maxtagver) + 1, - ntag = tags.size(); - const tagver_t *fins = cfg.dfa.finvers; - bool *used = new bool[nver]; - - std::fill(used, used + nver, false); - for (size_t t = 0; t < ntag; ++t) { - const tagver_t f = fins[t]; - used[f] = f != TAGVER_ZERO; // fixed tag or unreachable rule - } - for (size_t i = 0; i < cfg.nbbfall; ++i) { - const cfg_bb_t &b = cfg.bblocks[i]; - for (const tcmd_t *p = b.cmd; p; p = p->next) { - const tagver_t r = p->rhs; - if (r != TAGVER_ZERO) { - used[r] = true; - } - used[p->lhs] = true; - } - } - - tagver_t maxver = 0; - for (size_t v = 0; v < nver; ++v) { - ver2new[v] = used[v] ? ++maxver : TAGVER_ZERO; - } - - delete[] used; - return maxver; + const std::vector &tags = cfg.dfa.tags; + const size_t + nver = static_cast(cfg.dfa.maxtagver) + 1, + ntag = tags.size(); + const tagver_t *fins = cfg.dfa.finvers; + bool *used = new bool[nver]; + + std::fill(used, used + nver, false); + for (size_t t = 0; t < ntag; ++t) { + const tagver_t f = fins[t]; + used[f] = f != TAGVER_ZERO; // fixed tag or unreachable rule + } + for (size_t i = 0; i < cfg.nbbfall; ++i) { + const cfg_bb_t &b = cfg.bblocks[i]; + for (const tcmd_t *p = b.cmd; p; p = p->next) { + const tagver_t r = p->rhs; + if (r != TAGVER_ZERO) { + used[r] = true; + } + used[p->lhs] = true; + } + } + + tagver_t maxver = 0; + for (size_t v = 0; v < nver; ++v) { + ver2new[v] = used[v] ? ++maxver : TAGVER_ZERO; + } + + delete[] used; + return maxver; } } // namespace re2c diff --git a/re2c/src/dfa/cfg/dce.cc b/re2c/src/dfa/cfg/dce.cc index 88b97efd..3c4786d5 100644 --- a/re2c/src/dfa/cfg/dce.cc +++ b/re2c/src/dfa/cfg/dce.cc @@ -8,21 +8,21 @@ namespace re2c void cfg_t::dead_code_elimination(cfg_t &cfg, const bool *live) { - const tagver_t nver = cfg.dfa.maxtagver + 1; - // final and fallback tags can't be dead by construction - cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbarc; + const tagver_t nver = cfg.dfa.maxtagver + 1; + // final and fallback tags can't be dead by construction + cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbarc; - // ignore possible local liveness inside of bblock: - // by construction we have no versions local to bblock - for (; b < e; ++b, live += nver) { - for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) { - if (!live[p->lhs]) { - *pp = p->next; - } else { - pp = &p->next; - } - } - } + // ignore possible local liveness inside of bblock: + // by construction we have no versions local to bblock + for (; b < e; ++b, live += nver) { + for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) { + if (!live[p->lhs]) { + *pp = p->next; + } else { + pp = &p->next; + } + } + } } } // namespace re2c diff --git a/re2c/src/dfa/cfg/dump.cc b/re2c/src/dfa/cfg/dump.cc index d3757c1f..02f54b8f 100644 --- a/re2c/src/dfa/cfg/dump.cc +++ b/re2c/src/dfa/cfg/dump.cc @@ -11,74 +11,74 @@ namespace re2c void dump_cfg(const cfg_t &cfg, const bool *live) { - const tagver_t nver = cfg.dfa.maxtagver + 1; + const tagver_t nver = cfg.dfa.maxtagver + 1; - fprintf(stderr, "digraph CFG {\n" - " rankdir=LR\n" - " node[shape=Mrecord fontname=Terminus height=0.2 width=0.2]\n" - " edge[arrowhead=vee fontname=Terminus]\n\n"); + fprintf(stderr, "digraph CFG {\n" + " rankdir=LR\n" + " node[shape=Mrecord fontname=Terminus height=0.2 width=0.2]\n" + " edge[arrowhead=vee fontname=Terminus]\n\n"); - for (cfg_ix_t i = 0; i < cfg.nbbfall; ++i, live += nver) { - const cfg_bb_t *b = cfg.bblocks + i; + for (cfg_ix_t i = 0; i < cfg.nbbfall; ++i, live += nver) { + const cfg_bb_t *b = cfg.bblocks + i; - fprintf(stderr, " n%u [label=\"%u\\n", i, i); - for (const tcmd_t *p = b->cmd; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - if (tcmd_t::iscopy(p)) { - fprintf(stderr, "%d=%d ", l, r); - } else { - fprintf(stderr, "%d", l); - if (r != TAGVER_ZERO) { - fprintf(stderr, "=%d", r); - } - for (; *h != TAGVER_ZERO; ++h) { - fprintf(stderr, "%s ", *h == TAGVER_BOTTOM ? "↓" : "↑"); - } - } - } - fprintf(stderr, "/"); - if (b->rule) { - for (size_t t = b->rule->ltag; t < b->rule->htag; ++t) { - const tagver_t v = cfg.dfa.finvers[t]; - if (v != TAGVER_ZERO) { - fprintf(stderr, "%i ", v); - } - } - } + fprintf(stderr, " n%u [label=\"%u\\n", i, i); + for (const tcmd_t *p = b->cmd; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + if (tcmd_t::iscopy(p)) { + fprintf(stderr, "%d=%d ", l, r); + } else { + fprintf(stderr, "%d", l); + if (r != TAGVER_ZERO) { + fprintf(stderr, "=%d", r); + } + for (; *h != TAGVER_ZERO; ++h) { + fprintf(stderr, "%s ", *h == TAGVER_BOTTOM ? "↓" : "↑"); + } + } + } + fprintf(stderr, "/"); + if (b->rule) { + for (size_t t = b->rule->ltag; t < b->rule->htag; ++t) { + const tagver_t v = cfg.dfa.finvers[t]; + if (v != TAGVER_ZERO) { + fprintf(stderr, "%i ", v); + } + } + } - if (i < cfg.nbbfin) { - fprintf(stderr, "\\nneed:"); - for (tagver_t v = 0; v < nver; ++v) { - if (live[v]) { - fprintf(stderr, " %i", v); - } - } - } + if (i < cfg.nbbfin) { + fprintf(stderr, "\\nneed:"); + for (tagver_t v = 0; v < nver; ++v) { + if (live[v]) { + fprintf(stderr, " %i", v); + } + } + } - fprintf(stderr, "\"]\n"); + fprintf(stderr, "\"]\n"); - const char *style = b->rule ? "dotted" : "solid"; - for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { - fprintf(stderr, " n%u -> n%u [style=%s]\n", i, *j, style); - } - } + const char *style = b->rule ? "dotted" : "solid"; + for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { + fprintf(stderr, " n%u -> n%u [style=%s]\n", i, *j, style); + } + } - fprintf(stderr, "}\n"); + fprintf(stderr, "}\n"); } void dump_interf(const cfg_t &cfg, const bool *interf) { - const tagver_t nver = cfg.dfa.maxtagver + 1; - for (tagver_t y = 1; y < nver; ++y) { - fprintf(stderr, "%2d ", y); - } - fprintf(stderr, "\n"); - for (tagver_t x = 1; x < nver; ++x) { - for (tagver_t y = 1; y < nver; ++y) { - fprintf(stderr, "%2c ", interf[x * nver + y] ? '*' : '.'); - } - fprintf(stderr, "\n"); - } + const tagver_t nver = cfg.dfa.maxtagver + 1; + for (tagver_t y = 1; y < nver; ++y) { + fprintf(stderr, "%2d ", y); + } + fprintf(stderr, "\n"); + for (tagver_t x = 1; x < nver; ++x) { + for (tagver_t y = 1; y < nver; ++y) { + fprintf(stderr, "%2c ", interf[x * nver + y] ? '*' : '.'); + } + fprintf(stderr, "\n"); + } } } // namespace re2c diff --git a/re2c/src/dfa/cfg/freeze.cc b/re2c/src/dfa/cfg/freeze.cc index 926089d3..aa4d15ff 100644 --- a/re2c/src/dfa/cfg/freeze.cc +++ b/re2c/src/dfa/cfg/freeze.cc @@ -21,35 +21,35 @@ namespace re2c */ void freeze_tags(dfa_t &dfa) { - tcpool_t &pool = dfa.tcpool; - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - - dfa.tcid0 = pool.insert(dfa.tcmd0); - dfa.tcmd0 = NULL; - - for (size_t i = 0; i < nstate; ++i) { - dfa_state_t *s = dfa.states[i]; - tcmd_t **cmd = s->tcmd, - **const fin = cmd + nsym, - **const fall = fin + 1; - tcid_t *id = s->tcid = new tcid_t[nsym + 2]; - - // transition commands - for(; cmd < fin; ++cmd) { - *id++ = pool.insert(*cmd); - } - - // final epsilon-transition command - *id++ = pool.insert(*fin); - - // fallback epsilon-transition command - *id++ = pool.insert(*fall); - - delete[] s->tcmd; - s->tcmd = NULL; - } + tcpool_t &pool = dfa.tcpool; + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + + dfa.tcid0 = pool.insert(dfa.tcmd0); + dfa.tcmd0 = NULL; + + for (size_t i = 0; i < nstate; ++i) { + dfa_state_t *s = dfa.states[i]; + tcmd_t **cmd = s->tcmd, + **const fin = cmd + nsym, + **const fall = fin + 1; + tcid_t *id = s->tcid = new tcid_t[nsym + 2]; + + // transition commands + for(; cmd < fin; ++cmd) { + *id++ = pool.insert(*cmd); + } + + // final epsilon-transition command + *id++ = pool.insert(*fin); + + // fallback epsilon-transition command + *id++ = pool.insert(*fall); + + delete[] s->tcmd; + s->tcmd = NULL; + } } } // namespace re2c diff --git a/re2c/src/dfa/cfg/interfere.cc b/re2c/src/dfa/cfg/interfere.cc index 45da84bc..ce0046d9 100644 --- a/re2c/src/dfa/cfg/interfere.cc +++ b/re2c/src/dfa/cfg/interfere.cc @@ -15,87 +15,87 @@ static void interfere(const tcmd_t *cmd, const bool *live, bool *interf, bool *b void cfg_t::interference(const cfg_t &cfg, const bool *live, bool *interf) { - const tagver_t maxver = cfg.dfa.maxtagver + 1; - const size_t nver = static_cast(maxver); - bool *buf = new bool[nver]; - vals_t *vals = new vals_t[nver](); - const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfin; + const tagver_t maxver = cfg.dfa.maxtagver + 1; + const size_t nver = static_cast(maxver); + bool *buf = new bool[nver]; + vals_t *vals = new vals_t[nver](); + const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfin; - memset(interf, 0, nver * nver * sizeof(bool)); - for (; b < e; ++b, live += nver) { - interfere(b->cmd, live, interf, buf, vals, nver); - } + memset(interf, 0, nver * nver * sizeof(bool)); + for (; b < e; ++b, live += nver) { + interfere(b->cmd, live, interf, buf, vals, nver); + } - // versions of tags with/without history interfere - std::set &mt = cfg.dfa.mtagvers; - for (std::set::iterator i = mt.begin(); i != mt.end(); ++i) { - for (tagver_t u = *i, v = 0; v < maxver; ++v) { - if (mt.find(v) == mt.end()) { - interf[v * maxver + u] = interf[u * maxver + v] = true; - } - } - } + // versions of tags with/without history interfere + std::set &mt = cfg.dfa.mtagvers; + for (std::set::iterator i = mt.begin(); i != mt.end(); ++i) { + for (tagver_t u = *i, v = 0; v < maxver; ++v) { + if (mt.find(v) == mt.end()) { + interf[v * maxver + u] = interf[u * maxver + v] = true; + } + } + } - delete[] buf; - delete[] vals; + delete[] buf; + delete[] vals; } void interfere(const tcmd_t *cmd, const bool *live, bool *interf, - bool *buf, vals_t *vals, size_t nver) + bool *buf, vals_t *vals, size_t nver) { - // initialize value of RHS for all commands in this basic block - for (const tcmd_t *p = cmd; p; p = p->next) { - const tagver_t r = p->rhs; - if (r != TAGVER_ZERO) { - vals[r].clear(); - vals[r].push_back(r); - } - } + // initialize value of RHS for all commands in this basic block + for (const tcmd_t *p = cmd; p; p = p->next) { + const tagver_t r = p->rhs; + if (r != TAGVER_ZERO) { + vals[r].clear(); + vals[r].push_back(r); + } + } - // find interference list for LHS of each command - for (const tcmd_t *p = cmd; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - vals_t &vl = vals[l], &vr = vals[r]; + // find interference list for LHS of each command + for (const tcmd_t *p = cmd; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + vals_t &vl = vals[l], &vr = vals[r]; - // alive after this command - memcpy(buf, live, nver * sizeof(bool)); - cfg_t::live_through_bblock(p->next, buf); + // alive after this command + memcpy(buf, live, nver * sizeof(bool)); + cfg_t::live_through_bblock(p->next, buf); - // if copy command, exclude RHS - if (tcmd_t::iscopy(p)) buf[r] = false; + // if copy command, exclude RHS + if (tcmd_t::iscopy(p)) buf[r] = false; - // update value of current command's LHS - if (tcmd_t::iscopy(p)) { - vl = vr; - } else if (tcmd_t::isset(p)) { - vl.clear(); - vl.push_back(*h); - } else { - if (l != r) vl = vr; - for (; *++h != TAGVER_ZERO;); // history is reversed - for (; h-- != p->history;) { - vl.push_back(*h); - } - } - // Exclude from interference list all LHS from preceding commands - // which value is equal to current LHS value. Subsequent commands - // are ignored: if subsequent command that sets LHS to the same value - // precedes any use of it, liveness propagation through basic block - // would mark this LHS as dead and not interfering anyway; otherwise - // (if use precedes setting to the same value), then it indeed - // interferes with current LHS. - for (const tcmd_t *q = cmd; q != p; q = q->next) { - if (vals[q->lhs] == vl) { - buf[q->lhs] = false; - } - } + // update value of current command's LHS + if (tcmd_t::iscopy(p)) { + vl = vr; + } else if (tcmd_t::isset(p)) { + vl.clear(); + vl.push_back(*h); + } else { + if (l != r) vl = vr; + for (; *++h != TAGVER_ZERO;); // history is reversed + for (; h-- != p->history;) { + vl.push_back(*h); + } + } + // Exclude from interference list all LHS from preceding commands + // which value is equal to current LHS value. Subsequent commands + // are ignored: if subsequent command that sets LHS to the same value + // precedes any use of it, liveness propagation through basic block + // would mark this LHS as dead and not interfering anyway; otherwise + // (if use precedes setting to the same value), then it indeed + // interferes with current LHS. + for (const tcmd_t *q = cmd; q != p; q = q->next) { + if (vals[q->lhs] == vl) { + buf[q->lhs] = false; + } + } - const size_t u = static_cast(l); - for (size_t v = 0; v < nver; ++v) { - if (!buf[v]) continue; - interf[u * nver + v] = interf[v * nver + u] = true; - } - } + const size_t u = static_cast(l); + for (size_t v = 0; v < nver; ++v) { + if (!buf[v]) continue; + interf[u * nver + v] = interf[v * nver + u] = true; + } + } } } // namespace re2c diff --git a/re2c/src/dfa/cfg/liveanal.cc b/re2c/src/dfa/cfg/liveanal.cc index 4123f3fd..62f9781a 100644 --- a/re2c/src/dfa/cfg/liveanal.cc +++ b/re2c/src/dfa/cfg/liveanal.cc @@ -14,159 +14,159 @@ namespace re2c // calculates deep-first search postorder of transition nodes, // skips final and fallback nodes (they have no successors anyway) static cfg_ix_t *postorder(const cfg_t &cfg, bool *done, - cfg_ix_t *ord, cfg_ix_t i) + cfg_ix_t *ord, cfg_ix_t i) { - if (i >= cfg.nbbarc || done[i]) return ord; - done[i] = true; + if (i >= cfg.nbbarc || done[i]) return ord; + done[i] = true; - const cfg_bb_t *b = cfg.bblocks + i; - for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { - ord = postorder(cfg, done, ord, *j); - } + const cfg_bb_t *b = cfg.bblocks + i; + for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { + ord = postorder(cfg, done, ord, *j); + } - *ord = i; - return ++ord; + *ord = i; + return ++ord; } void cfg_t::live_through_bblock(const tcmd_t *cmd, bool *live) { - if (!cmd) return; - - live_through_bblock(cmd->next, live); - - const tagver_t l = cmd->lhs, r = cmd->rhs; - if (live[l]) { - // first reset, than set: LHS might be equal to history - live[l] = false; - if (r != TAGVER_ZERO) { - live[r] = true; - } - } + if (!cmd) return; + + live_through_bblock(cmd->next, live); + + const tagver_t l = cmd->lhs, r = cmd->rhs; + if (live[l]) { + // first reset, than set: LHS might be equal to history + live[l] = false; + if (r != TAGVER_ZERO) { + live[r] = true; + } + } } void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live) { - const std::vector &tags = cfg.dfa.tags; - const size_t nver = static_cast(cfg.dfa.maxtagver) + 1; - const cfg_ix_t - narc = cfg.nbbarc, - nfin = cfg.nbbfin; - const tagver_t *fins = cfg.dfa.finvers; - bool *buf1 = new bool[nver]; - bool *buf2 = new bool[nver]; - bool *done = new bool[narc]; - cfg_ix_t *pord = new cfg_ix_t[narc]; - - /* note [control flow equations for tag liveness] - * - * Liveness in bblock B is given by control flow equations: - * live-out(B) = union of live-in(C), for all successors C - * live-in(B) = live-out(B) except defined(B) - * Equations are solved by iteration until fix point. - * - * Live set can only grow on each iteration, it never shrinks. - * Initially all final tag versions used in rules are alive; - * we pre-calculate them and then only update table by adding - * new versions. - */ - - memset(live, 0, nfin * nver * sizeof(bool)); - for (cfg_ix_t i = narc; i < nfin; ++i) { - const cfg_bb_t *b = cfg.bblocks + i; - const Rule *r = b->rule; - bool *l = &live[i * nver]; - - // all final bblocks have USE tags, but no successors - assert(r && b->succb == b->succe); - - for (size_t t = r->ltag; t < r->htag; ++t) { - l[fins[t]] = !fixed(tags[t]); - } - } - - memset(done, 0, narc * sizeof(bool)); - postorder(cfg, done, pord, 0); - - for (bool loop = true; loop;) { - loop = false; - - // iterate nodes in postorder - for (cfg_ix_t a = 0; a < narc; ++a) { - const cfg_ix_t i = pord[a]; - const cfg_bb_t *b = cfg.bblocks + i; - bool *old = &live[i * nver]; - - // transition bblocks have no USE tags - assert(!b->rule); - - memcpy(buf1, old, nver * sizeof(bool)); - for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { - const bool *l = &live[*j * nver]; - const tcmd_t *cmd = cfg.bblocks[*j].cmd; - memcpy(buf2, l, nver * sizeof(bool)); - - cfg_t::live_through_bblock(cmd, buf2); - - for (size_t v = 0; v < nver; ++v) { - buf1[v] |= buf2[v]; - } - } - - if (memcmp(old, buf1, nver * sizeof(bool)) != 0) { - memcpy(old, buf1, nver * sizeof(bool)); - loop = true; - } - } - } - - /* note [fallback tag liveness] - * - * Liveness of fallback tag is propagated forward from fallback - * state (see note [fallback states]) and until there remain - * any fallthrough paths from current state. - * - * Fallback version of tag is either backup copy of tag's final - * version, or (if there's no backup) the final version itself. - * Absence of backup means that final version is not overwritten, - * but still we should prevent it from merging with other tags - * (otherwise it may become overwritten). - */ - for (cfg_ix_t i = nfin; i < cfg.nbbfall; ++i) { - const cfg_bb_t *b = cfg.bblocks + i; - const Rule *r = b->rule; - - // all fallback bblocks have USE tags - assert(r); - - memset(buf1, 0, nver * sizeof(bool)); - for (size_t t = r->ltag; t < r->htag; ++t) { - buf1[fins[t]] = !fixed(tags[t]); - } - - // need two passes: same version may occur as both LHS and RHS - // not the same as backward propagation of liveness through bblock - for (const tcmd_t *p = b->cmd; p; p = p->next) { - buf1[p->lhs] = false; - } - for (const tcmd_t *p = b->cmd; p; p = p->next) { - const tagver_t v = p->rhs; - if (v != TAGVER_ZERO) { - buf1[v] = true; - } - } - - for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { - bool *liv = &live[*j * nver]; - for (size_t v = 0; v < nver; ++v) { - liv[v] |= buf1[v]; - } - } - } - - delete[] buf1; - delete[] buf2; - delete[] done; - delete[] pord; + const std::vector &tags = cfg.dfa.tags; + const size_t nver = static_cast(cfg.dfa.maxtagver) + 1; + const cfg_ix_t + narc = cfg.nbbarc, + nfin = cfg.nbbfin; + const tagver_t *fins = cfg.dfa.finvers; + bool *buf1 = new bool[nver]; + bool *buf2 = new bool[nver]; + bool *done = new bool[narc]; + cfg_ix_t *pord = new cfg_ix_t[narc]; + + /* note [control flow equations for tag liveness] + * + * Liveness in bblock B is given by control flow equations: + * live-out(B) = union of live-in(C), for all successors C + * live-in(B) = live-out(B) except defined(B) + * Equations are solved by iteration until fix point. + * + * Live set can only grow on each iteration, it never shrinks. + * Initially all final tag versions used in rules are alive; + * we pre-calculate them and then only update table by adding + * new versions. + */ + + memset(live, 0, nfin * nver * sizeof(bool)); + for (cfg_ix_t i = narc; i < nfin; ++i) { + const cfg_bb_t *b = cfg.bblocks + i; + const Rule *r = b->rule; + bool *l = &live[i * nver]; + + // all final bblocks have USE tags, but no successors + assert(r && b->succb == b->succe); + + for (size_t t = r->ltag; t < r->htag; ++t) { + l[fins[t]] = !fixed(tags[t]); + } + } + + memset(done, 0, narc * sizeof(bool)); + postorder(cfg, done, pord, 0); + + for (bool loop = true; loop;) { + loop = false; + + // iterate nodes in postorder + for (cfg_ix_t a = 0; a < narc; ++a) { + const cfg_ix_t i = pord[a]; + const cfg_bb_t *b = cfg.bblocks + i; + bool *old = &live[i * nver]; + + // transition bblocks have no USE tags + assert(!b->rule); + + memcpy(buf1, old, nver * sizeof(bool)); + for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { + const bool *l = &live[*j * nver]; + const tcmd_t *cmd = cfg.bblocks[*j].cmd; + memcpy(buf2, l, nver * sizeof(bool)); + + cfg_t::live_through_bblock(cmd, buf2); + + for (size_t v = 0; v < nver; ++v) { + buf1[v] |= buf2[v]; + } + } + + if (memcmp(old, buf1, nver * sizeof(bool)) != 0) { + memcpy(old, buf1, nver * sizeof(bool)); + loop = true; + } + } + } + + /* note [fallback tag liveness] + * + * Liveness of fallback tag is propagated forward from fallback + * state (see note [fallback states]) and until there remain + * any fallthrough paths from current state. + * + * Fallback version of tag is either backup copy of tag's final + * version, or (if there's no backup) the final version itself. + * Absence of backup means that final version is not overwritten, + * but still we should prevent it from merging with other tags + * (otherwise it may become overwritten). + */ + for (cfg_ix_t i = nfin; i < cfg.nbbfall; ++i) { + const cfg_bb_t *b = cfg.bblocks + i; + const Rule *r = b->rule; + + // all fallback bblocks have USE tags + assert(r); + + memset(buf1, 0, nver * sizeof(bool)); + for (size_t t = r->ltag; t < r->htag; ++t) { + buf1[fins[t]] = !fixed(tags[t]); + } + + // need two passes: same version may occur as both LHS and RHS + // not the same as backward propagation of liveness through bblock + for (const tcmd_t *p = b->cmd; p; p = p->next) { + buf1[p->lhs] = false; + } + for (const tcmd_t *p = b->cmd; p; p = p->next) { + const tagver_t v = p->rhs; + if (v != TAGVER_ZERO) { + buf1[v] = true; + } + } + + for (cfg_ix_t *j = b->succb; j < b->succe; ++j) { + bool *liv = &live[*j * nver]; + for (size_t v = 0; v < nver; ++v) { + liv[v] |= buf1[v]; + } + } + } + + delete[] buf1; + delete[] buf2; + delete[] done; + delete[] pord; } } // namespace re2c diff --git a/re2c/src/dfa/cfg/normalize.cc b/re2c/src/dfa/cfg/normalize.cc index ac6f09e0..0c81abb5 100644 --- a/re2c/src/dfa/cfg/normalize.cc +++ b/re2c/src/dfa/cfg/normalize.cc @@ -26,90 +26,90 @@ static tcmd_t **normalize(tcmd_t **ps, tcmd_t *e); void cfg_t::normalization(cfg_t &cfg) { - const size_t nver = static_cast(cfg.dfa.maxtagver) + 1; - uint32_t *indeg = new uint32_t[nver]; - memset(indeg, 0, nver * sizeof(uint32_t)); - - cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; - for (; b < e; ++b) { - - // We cannot normalize the list of commands as a whole: the - // relative order of some commands might be significant. - // Therefore we split the list in continuous sublists of - // 'copy', 'save without history' and 'save with history' - // commands and normalize each sublist in a proper way. - tcmd_t **px, *x; - for (px = &b->cmd; (x = *px);) { - if (tcmd_t::iscopy(x)) { - for (x = *px; x && tcmd_t::iscopy(x); x = x->next); - *normalize(px, x) = NULL; // topsort expects NULL terminator - tcmd_t::topsort(px, indeg); - for (; *px; px = &(*px)->next); // find tail - *px = x; // restore tail - } else if (tcmd_t::isset(x)) { - for (x = *px; x && tcmd_t::isset(x); x = x->next); - px = normalize(px, x); - } else { - for (; (x = *px) && tcmd_t::isadd(x); px = &x->next); - // don't normalize, histories may have complex dependencies - } - } - } - - delete[] indeg; + const size_t nver = static_cast(cfg.dfa.maxtagver) + 1; + uint32_t *indeg = new uint32_t[nver]; + memset(indeg, 0, nver * sizeof(uint32_t)); + + cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; + for (; b < e; ++b) { + + // We cannot normalize the list of commands as a whole: the + // relative order of some commands might be significant. + // Therefore we split the list in continuous sublists of + // 'copy', 'save without history' and 'save with history' + // commands and normalize each sublist in a proper way. + tcmd_t **px, *x; + for (px = &b->cmd; (x = *px);) { + if (tcmd_t::iscopy(x)) { + for (x = *px; x && tcmd_t::iscopy(x); x = x->next); + *normalize(px, x) = NULL; // topsort expects NULL terminator + tcmd_t::topsort(px, indeg); + for (; *px; px = &(*px)->next); // find tail + *px = x; // restore tail + } else if (tcmd_t::isset(x)) { + for (x = *px; x && tcmd_t::isset(x); x = x->next); + px = normalize(px, x); + } else { + for (; (x = *px) && tcmd_t::isadd(x); px = &x->next); + // don't normalize, histories may have complex dependencies + } + } + } + + delete[] indeg; } static void swap(tcmd_t &x, tcmd_t &y) { - assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y)); - std::swap(x.lhs, y.lhs); - std::swap(x.rhs, y.rhs); - std::swap(x.history[0], y.history[0]); + assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y)); + std::swap(x.lhs, y.lhs); + std::swap(x.rhs, y.rhs); + std::swap(x.history[0], y.history[0]); } static bool less(const tcmd_t &x, const tcmd_t &y) { - assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y)); - tagver_t u, v; + assert(!tcmd_t::isadd(&x) && !tcmd_t::isadd(&y)); + tagver_t u, v; - u = x.lhs; v = y.lhs; - if (u < v) return true; - if (u > v) return false; + u = x.lhs; v = y.lhs; + if (u < v) return true; + if (u > v) return false; - u = x.rhs; v = y.rhs; - if (u < v) return true; - if (u > v) return false; + u = x.rhs; v = y.rhs; + if (u < v) return true; + if (u > v) return false; - u = x.history[0]; v = y.history[0]; - if (u < v) return true; - if (u > v) return false; + u = x.history[0]; v = y.history[0]; + if (u < v) return true; + if (u > v) return false; - return false; + return false; } tcmd_t **normalize(tcmd_t **ps, tcmd_t *e) { - // sort lexicographically - for (tcmd_t *p = *ps; p != e; p = p->next) { - for (tcmd_t *q = p->next; q != e; q = q->next) { - if (less(*q, *p)) { - swap(*p, *q); - } - } - } - - // delete duplicates - for (tcmd_t *p = *ps; p != e;) { - tcmd_t *q = p->next; - if (q != e && tcmd_t::equal(*p, *q)) { - p->next = q->next; - } else { - p = q; - } - } - - for (; *ps != e; ps = &(*ps)->next); - return ps; + // sort lexicographically + for (tcmd_t *p = *ps; p != e; p = p->next) { + for (tcmd_t *q = p->next; q != e; q = q->next) { + if (less(*q, *p)) { + swap(*p, *q); + } + } + } + + // delete duplicates + for (tcmd_t *p = *ps; p != e;) { + tcmd_t *q = p->next; + if (q != e && tcmd_t::equal(*p, *q)) { + p->next = q->next; + } else { + p = q; + } + } + + for (; *ps != e; ps = &(*ps)->next); + return ps; } } // namespace re2c diff --git a/re2c/src/dfa/cfg/optimize.cc b/re2c/src/dfa/cfg/optimize.cc index c2ea2ed6..723bf448 100644 --- a/re2c/src/dfa/cfg/optimize.cc +++ b/re2c/src/dfa/cfg/optimize.cc @@ -10,37 +10,37 @@ namespace re2c void compact_and_optimize_tags(dfa_t &dfa, bool optimize) { - tagver_t maxver = dfa.maxtagver; - if (maxver > 0) { - cfg_t cfg(dfa); - - size_t nver = static_cast(maxver) + 1; - tagver_t *ver2new = new tagver_t[nver]; - - maxver = cfg_t::compact(cfg, ver2new); - cfg_t::renaming(cfg, ver2new, maxver); - - if (optimize && maxver > 0) { - nver = static_cast(maxver) + 1; - bool *live = new bool[cfg.nbbfin * nver]; - bool *interf = new bool[nver * nver]; - - static const uint32_t NPASS = 2; - for (uint32_t n = 0; n < NPASS; ++n) { - cfg_t::liveness_analysis(cfg, live); - cfg_t::dead_code_elimination(cfg, live); - cfg_t::interference(cfg, live, interf); - maxver = cfg_t::variable_allocation(cfg, interf, ver2new); - cfg_t::renaming(cfg, ver2new, maxver); - cfg_t::normalization(cfg); - } - - delete[] live; - delete[] interf; - } - - delete[] ver2new; - } + tagver_t maxver = dfa.maxtagver; + if (maxver > 0) { + cfg_t cfg(dfa); + + size_t nver = static_cast(maxver) + 1; + tagver_t *ver2new = new tagver_t[nver]; + + maxver = cfg_t::compact(cfg, ver2new); + cfg_t::renaming(cfg, ver2new, maxver); + + if (optimize && maxver > 0) { + nver = static_cast(maxver) + 1; + bool *live = new bool[cfg.nbbfin * nver]; + bool *interf = new bool[nver * nver]; + + static const uint32_t NPASS = 2; + for (uint32_t n = 0; n < NPASS; ++n) { + cfg_t::liveness_analysis(cfg, live); + cfg_t::dead_code_elimination(cfg, live); + cfg_t::interference(cfg, live, interf); + maxver = cfg_t::variable_allocation(cfg, interf, ver2new); + cfg_t::renaming(cfg, ver2new, maxver); + cfg_t::normalization(cfg); + } + + delete[] live; + delete[] interf; + } + + delete[] ver2new; + } } } // namespace re2c diff --git a/re2c/src/dfa/cfg/rename.cc b/re2c/src/dfa/cfg/rename.cc index 615bbf08..24683b82 100644 --- a/re2c/src/dfa/cfg/rename.cc +++ b/re2c/src/dfa/cfg/rename.cc @@ -12,43 +12,43 @@ namespace re2c void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver) { - tagver_t &oldmax = cfg.dfa.maxtagver; - if (oldmax == maxver) return; - oldmax = maxver; - - cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall; - for (; b < be; ++b) { - for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) { - tagver_t &l = p->lhs, &r = p->rhs, h = p->history[0]; - - l = ver2new[l]; - if (r != TAGVER_ZERO) { - r = ver2new[r]; - } - if (l == r && h == TAGVER_ZERO) { - *pp = p->next; - } else { - pp = &p->next; - } - } - } - - // final tag versions - tagver_t *fins = cfg.dfa.finvers; - const std::vector &tags = cfg.dfa.tags; - for (size_t t = 0; t < tags.size(); ++t) { - tagver_t &f = fins[t]; - if (f != TAGVER_ZERO) { // fixed tag or unreachable rule - f = ver2new[f]; - } - } - - // versions of tags with history - std::set newmt, &oldmt = cfg.dfa.mtagvers; - for (std::set::iterator i = oldmt.begin(); i != oldmt.end(); ++i) { - newmt.insert(ver2new[*i]); - } - oldmt.swap(newmt); + tagver_t &oldmax = cfg.dfa.maxtagver; + if (oldmax == maxver) return; + oldmax = maxver; + + cfg_bb_t *b = cfg.bblocks, *be = b + cfg.nbbfall; + for (; b < be; ++b) { + for (tcmd_t *p, **pp = &b->cmd; (p = *pp);) { + tagver_t &l = p->lhs, &r = p->rhs, h = p->history[0]; + + l = ver2new[l]; + if (r != TAGVER_ZERO) { + r = ver2new[r]; + } + if (l == r && h == TAGVER_ZERO) { + *pp = p->next; + } else { + pp = &p->next; + } + } + } + + // final tag versions + tagver_t *fins = cfg.dfa.finvers; + const std::vector &tags = cfg.dfa.tags; + for (size_t t = 0; t < tags.size(); ++t) { + tagver_t &f = fins[t]; + if (f != TAGVER_ZERO) { // fixed tag or unreachable rule + f = ver2new[f]; + } + } + + // versions of tags with history + std::set newmt, &oldmt = cfg.dfa.mtagvers; + for (std::set::iterator i = oldmt.begin(); i != oldmt.end(); ++i) { + newmt.insert(ver2new[*i]); + } + oldmt.swap(newmt); } } // namespace re2c diff --git a/re2c/src/dfa/cfg/varalloc.cc b/re2c/src/dfa/cfg/varalloc.cc index 75d449c7..f5a064c4 100644 --- a/re2c/src/dfa/cfg/varalloc.cc +++ b/re2c/src/dfa/cfg/varalloc.cc @@ -21,124 +21,124 @@ namespace re2c * The algorithm takes quadratic (in the number of tags) time. */ tagver_t cfg_t::variable_allocation(const cfg_t &cfg, const bool *interf, - tagver_t *ver2new) + tagver_t *ver2new) { - const tagver_t - END = std::numeric_limits::max(), - nver = cfg.dfa.maxtagver + 1; - tagver_t *next = new tagver_t[nver]; // list of class members - tagver_t *repr = new tagver_t[nver]; // maps tag to class representative - tagver_t rx, ry, x, y, z; - - std::fill(next, next + nver, END); - std::fill(repr, repr + nver, END); - - // copy coalescing: for each command X = Y, try to merge X and Y - const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; - for (; b < e; ++b) { - for (const tcmd_t *p = b->cmd; p; p = p->next) { - x = p->lhs; - y = p->rhs; - if (y == TAGVER_ZERO || y == x) continue; - - rx = repr[x]; - ry = repr[y]; - - if (rx != END) { - if (ry != END) continue; - for (z = rx; z != END; z = next[z]) { - if (interf[z * nver + y]) break; - } - if (z == END) { - repr[y] = rx; - next[y] = next[rx]; - next[rx] = y; - } - } else if (ry != END) { - for (z = ry; z != END; z = next[z]) { - if (interf[z * nver + x]) break; - } - if (z == END) { - repr[x] = ry; - next[x] = next[ry]; - next[ry] = x; - } - } else if (!interf[x * nver + y]) { - repr[x] = repr[y] = x; - next[x] = y; - } - } - } - - // try to merge equivalence classes left after copy coalescing - for (rx = 0; rx < nver; ++rx) { - if (rx != repr[rx]) continue; - - for (ry = rx + 1; ry < nver; ++ry) { - if (ry != repr[ry]) continue; - - for (x = rx; x != END; x = next[x]) { - for (y = ry; y != END; y = next[y]) { - if (interf[x * nver + y]) break; - } - if (y != END) break; - } - - if (x == END) { - for (y = ry;; y = next[y]) { - repr[y] = rx; - if (next[y] == END) { - next[y] = next[rx]; - next[rx] = ry; - break; - } - } - } - } - } - - // push each remaining tag to any non-interfering class - for (x = 0; x < nver; ++x) { - if (repr[x] != END) continue; - - // try all existing classes - for (rx = 0; rx < nver; ++rx) { - if (rx != repr[rx]) continue; - - // check interference with class members - for (y = rx; y != END; y = next[y]) { - if (interf[x * nver + y]) break; - } - - // no interference; add to class - if (y == END) { - repr[x] = rx; - next[x] = next[rx]; - next[rx] = x; - break; - } - } - - // make new equivalence class - if (rx == nver) { - repr[x] = x; - } - } - - tagver_t maxver = 0; - for (rx = 0; rx < nver; ++rx) { - if (repr[rx] != rx) continue; - - ++maxver; - for (x = rx; x != END; x = next[x]) { - ver2new[x] = maxver; - } - } - - delete[] next; - delete[] repr; - - return maxver; + const tagver_t + END = std::numeric_limits::max(), + nver = cfg.dfa.maxtagver + 1; + tagver_t *next = new tagver_t[nver]; // list of class members + tagver_t *repr = new tagver_t[nver]; // maps tag to class representative + tagver_t rx, ry, x, y, z; + + std::fill(next, next + nver, END); + std::fill(repr, repr + nver, END); + + // copy coalescing: for each command X = Y, try to merge X and Y + const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; + for (; b < e; ++b) { + for (const tcmd_t *p = b->cmd; p; p = p->next) { + x = p->lhs; + y = p->rhs; + if (y == TAGVER_ZERO || y == x) continue; + + rx = repr[x]; + ry = repr[y]; + + if (rx != END) { + if (ry != END) continue; + for (z = rx; z != END; z = next[z]) { + if (interf[z * nver + y]) break; + } + if (z == END) { + repr[y] = rx; + next[y] = next[rx]; + next[rx] = y; + } + } else if (ry != END) { + for (z = ry; z != END; z = next[z]) { + if (interf[z * nver + x]) break; + } + if (z == END) { + repr[x] = ry; + next[x] = next[ry]; + next[ry] = x; + } + } else if (!interf[x * nver + y]) { + repr[x] = repr[y] = x; + next[x] = y; + } + } + } + + // try to merge equivalence classes left after copy coalescing + for (rx = 0; rx < nver; ++rx) { + if (rx != repr[rx]) continue; + + for (ry = rx + 1; ry < nver; ++ry) { + if (ry != repr[ry]) continue; + + for (x = rx; x != END; x = next[x]) { + for (y = ry; y != END; y = next[y]) { + if (interf[x * nver + y]) break; + } + if (y != END) break; + } + + if (x == END) { + for (y = ry;; y = next[y]) { + repr[y] = rx; + if (next[y] == END) { + next[y] = next[rx]; + next[rx] = ry; + break; + } + } + } + } + } + + // push each remaining tag to any non-interfering class + for (x = 0; x < nver; ++x) { + if (repr[x] != END) continue; + + // try all existing classes + for (rx = 0; rx < nver; ++rx) { + if (rx != repr[rx]) continue; + + // check interference with class members + for (y = rx; y != END; y = next[y]) { + if (interf[x * nver + y]) break; + } + + // no interference; add to class + if (y == END) { + repr[x] = rx; + next[x] = next[rx]; + next[rx] = x; + break; + } + } + + // make new equivalence class + if (rx == nver) { + repr[x] = x; + } + } + + tagver_t maxver = 0; + for (rx = 0; rx < nver; ++rx) { + if (repr[rx] != rx) continue; + + ++maxver; + for (x = rx; x != END; x = next[x]) { + ver2new[x] = maxver; + } + } + + delete[] next; + delete[] repr; + + return maxver; } } // namespace re2c diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index 26c95982..a00ec1b4 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -90,414 +90,414 @@ static int32_t pack(int32_t, int32_t); void tagged_epsilon_closure(determ_context_t &ctx) { - closure_t &closure = ctx.dc_closure; - - // build tagged epsilon-closure of the given set of NFA states - if (ctx.dc_opts->posix_captures) { - closure_posix(ctx); - prune(closure, ctx.dc_nfa.rules); - std::sort(closure.begin(), closure.end(), cmpby_rule_state); - orders(ctx); - } else { - closure_leftmost(ctx); - prune(closure, ctx.dc_nfa.rules); - } - - // see note [the difference between TDFA(0) and TDFA(1)] - if (!ctx.dc_opts->lookahead) { - lower_lookahead_to_transition(closure); - } - - // merge tags from different rules, find nondeterministic tags - generate_versions(ctx); + closure_t &closure = ctx.dc_closure; + + // build tagged epsilon-closure of the given set of NFA states + if (ctx.dc_opts->posix_captures) { + closure_posix(ctx); + prune(closure, ctx.dc_nfa.rules); + std::sort(closure.begin(), closure.end(), cmpby_rule_state); + orders(ctx); + } else { + closure_leftmost(ctx); + prune(closure, ctx.dc_nfa.rules); + } + + // see note [the difference between TDFA(0) and TDFA(1)] + if (!ctx.dc_opts->lookahead) { + lower_lookahead_to_transition(closure); + } + + // merge tags from different rules, find nondeterministic tags + generate_versions(ctx); } bool cmpby_rule_state(const clos_t &x, const clos_t &y) { - const nfa_state_t *sx = x.state, *sy = y.state; - const size_t rx = sx->rule, ry = sy->rule; - if (rx < ry) return true; - if (rx > ry) return false; - if (sx < sy) return true; - if (sx > sy) return false; - // all items in closute have different states - return false; + const nfa_state_t *sx = x.state, *sy = y.state; + const size_t rx = sx->rule, ry = sy->rule; + if (rx < ry) return true; + if (rx > ry) return false; + if (sx < sy) return true; + if (sx > sy) return false; + // all items in closute have different states + return false; } nfa_state_t *relax(determ_context_t &ctx, clos_t x) { - closure_t &done = ctx.dc_closure; - nfa_state_t *q = x.state; - const uint32_t idx = q->clos; - int32_t h1, h2; - - // first time we see this state - if (idx == NOCLOS) { - q->clos = static_cast(done.size()); - done.push_back(x); - } - - // States of in-degree less than 2 are not joint points; - // the fact that we are re-scanning this state means that we found - // a better path to some previous state. Due to the right distributivity - // of path comparison over path concatenation (X < Y => XZ < YZ) we - // can just propagate the new path up to the next join point. - else if (q->indeg < 2) { - done[idx] = x; - } - - // join point; compare the new path and the old path - else if (precedence(ctx, x, done[idx], h1, h2) < 0) { - done[idx] = x; - } - - // the previous path was better, discard the new one - else { - q = NULL; - } - - return q; + closure_t &done = ctx.dc_closure; + nfa_state_t *q = x.state; + const uint32_t idx = q->clos; + int32_t h1, h2; + + // first time we see this state + if (idx == NOCLOS) { + q->clos = static_cast(done.size()); + done.push_back(x); + } + + // States of in-degree less than 2 are not joint points; + // the fact that we are re-scanning this state means that we found + // a better path to some previous state. Due to the right distributivity + // of path comparison over path concatenation (X < Y => XZ < YZ) we + // can just propagate the new path up to the next join point. + else if (q->indeg < 2) { + done[idx] = x; + } + + // join point; compare the new path and the old path + else if (precedence(ctx, x, done[idx], h1, h2) < 0) { + done[idx] = x; + } + + // the previous path was better, discard the new one + else { + q = NULL; + } + + return q; } nfa_state_t *explore(determ_context_t &ctx, nfa_state_t *q) { - // find the next admissible transition, adjust the index - // of the next transition and return the to-state - nfa_state_t *p = NULL; - clos_t x = ctx.dc_closure[q->clos]; - switch (q->type) { - case nfa_state_t::NIL: - if (q->arcidx == 0) { - x.state = q->nil.out; - p = relax(ctx, x); - ++q->arcidx; - } - break; - case nfa_state_t::ALT: - if (q->arcidx == 0) { - x.state = q->alt.out1; - p = relax(ctx, x); - ++q->arcidx; - } - if (q->arcidx == 1 && !p) { - x.state = q->alt.out2; - p = relax(ctx, x); - ++q->arcidx; - } - break; - case nfa_state_t::TAG: - if (q->arcidx == 0) { - x.state = q->tag.out; - x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info); - p = relax(ctx, x); - ++q->arcidx; - } - break; - case nfa_state_t::RAN: - case nfa_state_t::FIN: - break; - } - return p; + // find the next admissible transition, adjust the index + // of the next transition and return the to-state + nfa_state_t *p = NULL; + clos_t x = ctx.dc_closure[q->clos]; + switch (q->type) { + case nfa_state_t::NIL: + if (q->arcidx == 0) { + x.state = q->nil.out; + p = relax(ctx, x); + ++q->arcidx; + } + break; + case nfa_state_t::ALT: + if (q->arcidx == 0) { + x.state = q->alt.out1; + p = relax(ctx, x); + ++q->arcidx; + } + if (q->arcidx == 1 && !p) { + x.state = q->alt.out2; + p = relax(ctx, x); + ++q->arcidx; + } + break; + case nfa_state_t::TAG: + if (q->arcidx == 0) { + x.state = q->tag.out; + x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info); + p = relax(ctx, x); + ++q->arcidx; + } + break; + case nfa_state_t::RAN: + case nfa_state_t::FIN: + break; + } + return p; } void closure_posix(determ_context_t &ctx) { - const closure_t &init = ctx.dc_reached; - closure_t &done = ctx.dc_closure; - std::stack - &topsort = ctx.dc_stack_topsort, - &linear = ctx.dc_stack_linear; - nfa_state_t *q, *p; - - done.clear(); - - // enqueue all initial states (there might be duplicates) - for (cclositer_t c = init.begin(); c != init.end(); ++c) { - q = relax(ctx, *c); - if (q) { - topsort.push(q); - q->status = GOR_TOPSORT; - } - } - - // Gordberg-Radzik 'shortest path' algorithm. - // Papers: 1993, "A heuristic improvement of the Bellman-Ford - // algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms: - // Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik. - // Complexity for digraph G=(V,E) is O(|V|*|E|). - for (; !topsort.empty(); ) { - - // 1st pass: scan admissible subgraph reachable from B-stack - // and topologically sort it (this can be done by a single - // depth-first postorder traversal) - for (; !topsort.empty(); ) { - q = topsort.top(); - topsort.pop(); - - if (q->status != GOR_LINEAR) { - q->status = GOR_TOPSORT; - - // find next admissible transition - while ((p = explore(ctx, q)) - && p->status != GOR_NOPASS) { - p->active = 1; - } - - // follow the admissible transition - if (p) { - topsort.push(q); - topsort.push(p); - p->arcidx = 0; - } - // done with this state: all deps visited - else { - q->status = GOR_LINEAR; - linear.push(q); - } - } - } - - // 2nd pass: scan topologically ordered states from A-stack - // and push head states of relaxed transitions to B-stack - for (; !linear.empty(); ) { - q = linear.top(); - linear.pop(); - - if (q->active) { - // scan admissible transitions - q->arcidx = 0; - while ((p = explore(ctx, q))) { - if (p->status == GOR_NOPASS) { - topsort.push(p); - p->arcidx = 0; - } - else if (p->status == GOR_LINEAR) { - p->active = 1; - } - } - } - - q->status = GOR_NOPASS; - q->active = 0; - } - } - - // clean up (do this before removing any states from closure) - for (clositer_t i = done.begin(); i != done.end(); ++i) { - q = i->state; - q->clos = NOCLOS; - q->arcidx = 0; - assert(q->status == GOR_NOPASS && q->active == 0); - } + const closure_t &init = ctx.dc_reached; + closure_t &done = ctx.dc_closure; + std::stack + &topsort = ctx.dc_stack_topsort, + &linear = ctx.dc_stack_linear; + nfa_state_t *q, *p; + + done.clear(); + + // enqueue all initial states (there might be duplicates) + for (cclositer_t c = init.begin(); c != init.end(); ++c) { + q = relax(ctx, *c); + if (q) { + topsort.push(q); + q->status = GOR_TOPSORT; + } + } + + // Gordberg-Radzik 'shortest path' algorithm. + // Papers: 1993, "A heuristic improvement of the Bellman-Ford + // algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms: + // Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik. + // Complexity for digraph G=(V,E) is O(|V|*|E|). + for (; !topsort.empty(); ) { + + // 1st pass: scan admissible subgraph reachable from B-stack + // and topologically sort it (this can be done by a single + // depth-first postorder traversal) + for (; !topsort.empty(); ) { + q = topsort.top(); + topsort.pop(); + + if (q->status != GOR_LINEAR) { + q->status = GOR_TOPSORT; + + // find next admissible transition + while ((p = explore(ctx, q)) + && p->status != GOR_NOPASS) { + p->active = 1; + } + + // follow the admissible transition + if (p) { + topsort.push(q); + topsort.push(p); + p->arcidx = 0; + } + // done with this state: all deps visited + else { + q->status = GOR_LINEAR; + linear.push(q); + } + } + } + + // 2nd pass: scan topologically ordered states from A-stack + // and push head states of relaxed transitions to B-stack + for (; !linear.empty(); ) { + q = linear.top(); + linear.pop(); + + if (q->active) { + // scan admissible transitions + q->arcidx = 0; + while ((p = explore(ctx, q))) { + if (p->status == GOR_NOPASS) { + topsort.push(p); + p->arcidx = 0; + } + else if (p->status == GOR_LINEAR) { + p->active = 1; + } + } + } + + q->status = GOR_NOPASS; + q->active = 0; + } + } + + // clean up (do this before removing any states from closure) + for (clositer_t i = done.begin(); i != done.end(); ++i) { + q = i->state; + q->clos = NOCLOS; + q->arcidx = 0; + assert(q->status == GOR_NOPASS && q->active == 0); + } } void closure_leftmost(determ_context_t &ctx) { - const closure_t &init = ctx.dc_reached; - closure_t &done = ctx.dc_closure; - std::stack &todo = ctx.dc_stack_dfs; - - // enqueue all initial states - done.clear(); - for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) { - todo.push(*c); - } - - // DFS; linear complexity - for (; !todo.empty(); ) { - clos_t x = todo.top(); - todo.pop(); - nfa_state_t *n = x.state; - - if (n->clos == NOCLOS) { - n->clos = static_cast(done.size()); - done.push_back(x); - - switch (n->type) { - case nfa_state_t::NIL: - x.state = n->nil.out; - todo.push(x); - break; - case nfa_state_t::ALT: - x.state = n->alt.out2; - todo.push(x); - x.state = n->alt.out1; - todo.push(x); - break; - case nfa_state_t::TAG: - x.state = n->tag.out; - x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info); - todo.push(x); - break; - case nfa_state_t::RAN: - case nfa_state_t::FIN: - break; - } - } - } - - // reset associated closure items - // (do this before removing any states from closure) - for (clositer_t i = done.begin(); i != done.end(); ++i) { - i->state->clos = NOCLOS; - } + const closure_t &init = ctx.dc_reached; + closure_t &done = ctx.dc_closure; + std::stack &todo = ctx.dc_stack_dfs; + + // enqueue all initial states + done.clear(); + for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) { + todo.push(*c); + } + + // DFS; linear complexity + for (; !todo.empty(); ) { + clos_t x = todo.top(); + todo.pop(); + nfa_state_t *n = x.state; + + if (n->clos == NOCLOS) { + n->clos = static_cast(done.size()); + done.push_back(x); + + switch (n->type) { + case nfa_state_t::NIL: + x.state = n->nil.out; + todo.push(x); + break; + case nfa_state_t::ALT: + x.state = n->alt.out2; + todo.push(x); + x.state = n->alt.out1; + todo.push(x); + break; + case nfa_state_t::TAG: + x.state = n->tag.out; + x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info); + todo.push(x); + break; + case nfa_state_t::RAN: + case nfa_state_t::FIN: + break; + } + } + } + + // reset associated closure items + // (do this before removing any states from closure) + for (clositer_t i = done.begin(); i != done.end(); ++i) { + i->state->clos = NOCLOS; + } } void prune(closure_t &closure, std::valarray &rules) { - clositer_t b = closure.begin(), e = closure.end(), i, j; - - // drop "inner" states (non-final without outgoing non-epsilon transitions) - j = std::stable_partition(b, e, clos_t::ran); - e = std::stable_partition(j, e, clos_t::fin); - size_t n = static_cast(e - b); - - // drop all final states except one; mark dropped rules as shadowed - // see note [at most one final item per closure] - if (j != e) { - std::sort(j, e, cmpby_rule_state); - const uint32_t l = rules[j->state->rule].code->fline; - for (i = j; ++i < e;) { - rules[i->state->rule].shadow.insert(l); - } - n = static_cast(j - b) + 1; - } - - closure.resize(n); + clositer_t b = closure.begin(), e = closure.end(), i, j; + + // drop "inner" states (non-final without outgoing non-epsilon transitions) + j = std::stable_partition(b, e, clos_t::ran); + e = std::stable_partition(j, e, clos_t::fin); + size_t n = static_cast(e - b); + + // drop all final states except one; mark dropped rules as shadowed + // see note [at most one final item per closure] + if (j != e) { + std::sort(j, e, cmpby_rule_state); + const uint32_t l = rules[j->state->rule].code->fline; + for (i = j; ++i < e;) { + rules[i->state->rule].shadow.insert(l); + } + n = static_cast(j - b) + 1; + } + + closure.resize(n); } void lower_lookahead_to_transition(closure_t &closure) { - for (clositer_t c = closure.begin(); c != closure.end(); ++c) { - c->ttran = c->tlook; - c->tlook = HROOT; - } + for (clositer_t c = closure.begin(); c != closure.end(); ++c) { + c->ttran = c->tlook; + c->tlook = HROOT; + } } void generate_versions(determ_context_t &ctx) { - dfa_t &dfa = ctx.dc_dfa; - const std::vector &tags = dfa.tags; - const size_t ntag = tags.size(); - tagver_t &maxver = dfa.maxtagver; - tagver_table_t &tvtbl = ctx.dc_tagvertbl; - tagver_t *vers = tvtbl.buffer; - closure_t &clos = ctx.dc_closure; - tag_history_t &thist = ctx.dc_taghistory; - newvers_t &newvers = ctx.dc_newvers; - - clositer_t b = clos.begin(), e = clos.end(), c; - newver_cmp_t cmp(thist); - newvers_t newacts(cmp); - tcmd_t *cmd = NULL; - - // for each tag, if there is at least one tagged transition, - // allocate new version (negative for bottom and positive for - // normal transition, however absolute value should be unique - // among all versions of all tags) - for (c = b; c != e; ++c) { - const hidx_t l = c->tlook, h = c->ttran; - if (h == HROOT) continue; - - const tagver_t *vs = tvtbl[c->tvers]; - for (size_t t = 0; t < ntag; ++t) { - const Tag &tag = tags[t]; - const tagver_t - h0 = thist.last(h, t), - l0 = thist.last(l, t); - - if (h0 == TAGVER_ZERO) continue; - - const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO; - newver_t x = {t, v, h}; - const tagver_t - n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1), - m = newvers.insert(std::make_pair(x, n)).first->second; - if (n == m) ++maxver; - - if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) { - newacts.insert(std::make_pair(x, m)); - } - } - } - - // actions - for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) { - const tagver_t m = i->second, v = i->first.base; - const hidx_t h = i->first.history; - const size_t t = i->first.tag; - if (history(tags[t])) { - cmd = dfa.tcpool.make_add(cmd, abs(m), abs(v), thist, h, t); - } else { - cmd = dfa.tcpool.make_set(cmd, abs(m), thist.last(h, t)); - } - } - - // mark tags with history - for (newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) { - if (history(tags[j->first.tag])) { - dfa.mtagvers.insert(abs(j->second)); - } - } - - // update tag versions in closure - for (c = b; c != e; ++c) { - const hidx_t h = c->ttran; - if (h == HROOT) continue; - - const tagver_t *vs = tvtbl[c->tvers]; - for (size_t t = 0; t < ntag; ++t) { - const tagver_t - v0 = vs[t], - h0 = thist.last(h, t), - v = history(tags[t]) ? v0 : TAGVER_ZERO; - if (h0 == TAGVER_ZERO) { - vers[t] = v0; - } else { - newver_t x = {t, v, h}; - vers[t] = newvers[x]; - } - } - c->tvers = tvtbl.insert(vers); - } - - ctx.dc_actions = cmd; + dfa_t &dfa = ctx.dc_dfa; + const std::vector &tags = dfa.tags; + const size_t ntag = tags.size(); + tagver_t &maxver = dfa.maxtagver; + tagver_table_t &tvtbl = ctx.dc_tagvertbl; + tagver_t *vers = tvtbl.buffer; + closure_t &clos = ctx.dc_closure; + tag_history_t &thist = ctx.dc_taghistory; + newvers_t &newvers = ctx.dc_newvers; + + clositer_t b = clos.begin(), e = clos.end(), c; + newver_cmp_t cmp(thist); + newvers_t newacts(cmp); + tcmd_t *cmd = NULL; + + // for each tag, if there is at least one tagged transition, + // allocate new version (negative for bottom and positive for + // normal transition, however absolute value should be unique + // among all versions of all tags) + for (c = b; c != e; ++c) { + const hidx_t l = c->tlook, h = c->ttran; + if (h == HROOT) continue; + + const tagver_t *vs = tvtbl[c->tvers]; + for (size_t t = 0; t < ntag; ++t) { + const Tag &tag = tags[t]; + const tagver_t + h0 = thist.last(h, t), + l0 = thist.last(l, t); + + if (h0 == TAGVER_ZERO) continue; + + const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO; + newver_t x = {t, v, h}; + const tagver_t + n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1), + m = newvers.insert(std::make_pair(x, n)).first->second; + if (n == m) ++maxver; + + if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) { + newacts.insert(std::make_pair(x, m)); + } + } + } + + // actions + for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) { + const tagver_t m = i->second, v = i->first.base; + const hidx_t h = i->first.history; + const size_t t = i->first.tag; + if (history(tags[t])) { + cmd = dfa.tcpool.make_add(cmd, abs(m), abs(v), thist, h, t); + } else { + cmd = dfa.tcpool.make_set(cmd, abs(m), thist.last(h, t)); + } + } + + // mark tags with history + for (newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) { + if (history(tags[j->first.tag])) { + dfa.mtagvers.insert(abs(j->second)); + } + } + + // update tag versions in closure + for (c = b; c != e; ++c) { + const hidx_t h = c->ttran; + if (h == HROOT) continue; + + const tagver_t *vs = tvtbl[c->tvers]; + for (size_t t = 0; t < ntag; ++t) { + const tagver_t + v0 = vs[t], + h0 = thist.last(h, t), + v = history(tags[t]) ? v0 : TAGVER_ZERO; + if (h0 == TAGVER_ZERO) { + vers[t] = v0; + } else { + newver_t x = {t, v, h}; + vers[t] = newvers[x]; + } + } + c->tvers = tvtbl.insert(vers); + } + + ctx.dc_actions = cmd; } int32_t pack(int32_t longest, int32_t leftmost) { - // leftmost: higher 2 bits, longest: lower 30 bits - return longest | (leftmost << 30); + // leftmost: higher 2 bits, longest: lower 30 bits + return longest | (leftmost << 30); } void orders(determ_context_t &ctx) { - closure_t &closure = ctx.dc_closure; - const size_t nclos = closure.size(); - - prectable_t *prectbl = ctx.dc_allocator.alloct(nclos * nclos); - - for (size_t i = 0; i < nclos; ++i) { - for (size_t j = i + 1; j < nclos; ++j) { - int32_t rho1, rho2, l; - l = precedence (ctx, closure[i], closure[j], rho1, rho2); - prectbl[i * nclos + j] = pack(rho1, l); - prectbl[j * nclos + i] = pack(rho2, -l); - } - prectbl[i * nclos + i] = 0; - } - - ctx.dc_prectbl = prectbl; + closure_t &closure = ctx.dc_closure; + const size_t nclos = closure.size(); + + prectable_t *prectbl = ctx.dc_allocator.alloct(nclos * nclos); + + for (size_t i = 0; i < nclos; ++i) { + for (size_t j = i + 1; j < nclos; ++j) { + int32_t rho1, rho2, l; + l = precedence (ctx, closure[i], closure[j], rho1, rho2); + prectbl[i * nclos + j] = pack(rho1, l); + prectbl[j * nclos + i] = pack(rho2, -l); + } + prectbl[i * nclos + i] = 0; + } + + ctx.dc_prectbl = prectbl; } } // namespace re2c diff --git a/re2c/src/dfa/dead_rules.cc b/re2c/src/dfa/dead_rules.cc index d0960149..70d91d5b 100644 --- a/re2c/src/dfa/dead_rules.cc +++ b/re2c/src/dfa/dead_rules.cc @@ -66,195 +66,195 @@ struct tcmd_t; // reversed DFA struct rdfa_t { - struct arc_t - { - size_t dest; - arc_t *next; - }; - - struct state_t - { - arc_t *arcs; - size_t rule; - bool fallthru; - }; - - size_t nstates; - size_t nrules; - state_t *states; - arc_t *arcs; - - explicit rdfa_t(const dfa_t &dfa) - : nstates(dfa.states.size()) - , nrules(dfa.rules.size()) - , states(new state_t[nstates]()) - , arcs(new arc_t[nstates * dfa.nchars]) - { - // init states - for (size_t i = 0; i < nstates; ++i) { - state_t &s = states[i]; - s.arcs = NULL; - const size_t r = dfa.states[i]->rule; - s.rule = r == Rule::NONE ? nrules : r; - s.fallthru = false; - } - // init arcs - arc_t *a = arcs; - for (size_t i = 0; i < nstates; ++i) { - dfa_state_t *s = dfa.states[i]; - for (size_t c = 0; c < dfa.nchars; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL) { - a->dest = i; - a->next = states[j].arcs; - states[j].arcs = a++; - } else { - states[i].fallthru = true; - } - } - } - } - - ~rdfa_t() - { - delete[] states; - delete[] arcs; - } - - FORBID_COPY(rdfa_t); + struct arc_t + { + size_t dest; + arc_t *next; + }; + + struct state_t + { + arc_t *arcs; + size_t rule; + bool fallthru; + }; + + size_t nstates; + size_t nrules; + state_t *states; + arc_t *arcs; + + explicit rdfa_t(const dfa_t &dfa) + : nstates(dfa.states.size()) + , nrules(dfa.rules.size()) + , states(new state_t[nstates]()) + , arcs(new arc_t[nstates * dfa.nchars]) + { + // init states + for (size_t i = 0; i < nstates; ++i) { + state_t &s = states[i]; + s.arcs = NULL; + const size_t r = dfa.states[i]->rule; + s.rule = r == Rule::NONE ? nrules : r; + s.fallthru = false; + } + // init arcs + arc_t *a = arcs; + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL) { + a->dest = i; + a->next = states[j].arcs; + states[j].arcs = a++; + } else { + states[i].fallthru = true; + } + } + } + } + + ~rdfa_t() + { + delete[] states; + delete[] arcs; + } + + FORBID_COPY(rdfa_t); }; static void backprop(const rdfa_t &rdfa, bool *live, - size_t rule, size_t state) + size_t rule, size_t state) { - // "none-rule" is unreachable from final states: - // be careful to mask it before propagating - const rdfa_t::state_t &s = rdfa.states[state]; - if (rule == rdfa.nrules) { - rule = s.rule; - } - - // if the rule has already been set, than either it's a loop - // or another branch of back propagation has already been here, - // in both cases we should stop: there's nothing new to propagate - bool &l = live[rule * rdfa.nstates + state]; - if (l) return; - l = true; - - for (const rdfa_t::arc_t *a = s.arcs; a; a = a->next) { - backprop(rdfa, live, rule, a->dest); - } + // "none-rule" is unreachable from final states: + // be careful to mask it before propagating + const rdfa_t::state_t &s = rdfa.states[state]; + if (rule == rdfa.nrules) { + rule = s.rule; + } + + // if the rule has already been set, than either it's a loop + // or another branch of back propagation has already been here, + // in both cases we should stop: there's nothing new to propagate + bool &l = live[rule * rdfa.nstates + state]; + if (l) return; + l = true; + + for (const rdfa_t::arc_t *a = s.arcs; a; a = a->next) { + backprop(rdfa, live, rule, a->dest); + } } static void liveness_analyses(const rdfa_t &rdfa, bool *live) { - for (size_t i = 0; i < rdfa.nstates; ++i) { - const rdfa_t::state_t &s = rdfa.states[i]; - if (s.fallthru) { - backprop(rdfa, live, s.rule, i); - } - } + for (size_t i = 0; i < rdfa.nstates; ++i) { + const rdfa_t::state_t &s = rdfa.states[i]; + if (s.fallthru) { + backprop(rdfa, live, s.rule, i); + } + } } static void warn_dead_rules(const dfa_t &dfa, size_t defrule, - const std::string &cond, const bool *live, Warn &warn) + const std::string &cond, const bool *live, Warn &warn) { - const size_t nstates = dfa.states.size(); - const size_t nrules = dfa.rules.size(); - - for (size_t i = 0; i < nstates; ++i) { - const size_t r = dfa.states[i]->rule; - if (r != Rule::NONE && !live[r * nstates + i]) { - // skip last rule (it's the NONE-rule) - for (size_t j = 0; j < nrules; ++j) { - if (live[j * nstates + i]) { - dfa.rules[r].shadow.insert(dfa.rules[j].code->fline); - } - } - } - } - - for (size_t i = 0; i < nrules; ++i) { - // default rule '*' should not be reported - if (i != defrule && !live[i * nstates]) { - warn.unreachable_rule(cond, dfa.rules[i]); - } - } + const size_t nstates = dfa.states.size(); + const size_t nrules = dfa.rules.size(); + + for (size_t i = 0; i < nstates; ++i) { + const size_t r = dfa.states[i]->rule; + if (r != Rule::NONE && !live[r * nstates + i]) { + // skip last rule (it's the NONE-rule) + for (size_t j = 0; j < nrules; ++j) { + if (live[j * nstates + i]) { + dfa.rules[r].shadow.insert(dfa.rules[j].code->fline); + } + } + } + } + + for (size_t i = 0; i < nrules; ++i) { + // default rule '*' should not be reported + if (i != defrule && !live[i * nstates]) { + warn.unreachable_rule(cond, dfa.rules[i]); + } + } } static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru) { - const size_t - nstates = dfa.states.size(), - nsym = dfa.nchars; - - for (size_t i = 0; i < nstates; ++i) { - dfa_state_t *s = dfa.states[i]; - if (s->rule == Rule::NONE) continue; - - // final state is useful iff there is at least one - // non-accepting path from this state - bool shadowed = true; - for (size_t c = 0; c < nsym; ++c) { - const size_t j = s->arcs[c]; - if (j == dfa_t::NIL || fallthru[j]) { - shadowed = false; - break; - } - } - - if (shadowed) { - s->rule = Rule::NONE; - s->tcmd[nsym] = NULL; - } - } + const size_t + nstates = dfa.states.size(), + nsym = dfa.nchars; + + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + if (s->rule == Rule::NONE) continue; + + // final state is useful iff there is at least one + // non-accepting path from this state + bool shadowed = true; + for (size_t c = 0; c < nsym; ++c) { + const size_t j = s->arcs[c]; + if (j == dfa_t::NIL || fallthru[j]) { + shadowed = false; + break; + } + } + + if (shadowed) { + s->rule = Rule::NONE; + s->tcmd[nsym] = NULL; + } + } } static void find_fallback_states(dfa_t &dfa, const bool *fallthru) { - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - - for (size_t i = 0; i < nstate; ++i) { - dfa_state_t *s = dfa.states[i]; - - s->fallthru = fallthru[i]; - - if (s->rule != Rule::NONE) { - for (size_t c = 0; c < nsym; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL && fallthru[j]) { - s->fallback = true; - break; - } - } - } - } + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + + for (size_t i = 0; i < nstate; ++i) { + dfa_state_t *s = dfa.states[i]; + + s->fallthru = fallthru[i]; + + if (s->rule != Rule::NONE) { + for (size_t c = 0; c < nsym; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL && fallthru[j]) { + s->fallback = true; + break; + } + } + } + } } void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond, Warn &warn) { - const rdfa_t rdfa(dfa); - const size_t - ns = rdfa.nstates, - nl = (rdfa.nrules + 1) * ns; - bool *live = new bool[nl], - *fallthru = live + nl - ns; - memset(live, 0, nl * sizeof(bool)); - - liveness_analyses(rdfa, live); - warn_dead_rules(dfa, defrule, cond, live, warn); - remove_dead_final_states(dfa, fallthru); - find_fallback_states(dfa, fallthru); - - delete[] live; + const rdfa_t rdfa(dfa); + const size_t + ns = rdfa.nstates, + nl = (rdfa.nrules + 1) * ns; + bool *live = new bool[nl], + *fallthru = live + nl - ns; + memset(live, 0, nl * sizeof(bool)); + + liveness_analyses(rdfa, live); + warn_dead_rules(dfa, defrule, cond, live, warn); + remove_dead_final_states(dfa, fallthru); + find_fallback_states(dfa, fallthru); + + delete[] live; } } // namespace re2c diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 5e0a5ab3..8de098a6 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -33,111 +33,111 @@ const uint32_t dfa_t::NIL = ~0u; nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) { - if (state->type != nfa_state_t::RAN) { - return NULL; - } - for (const Range *r = state->ran.ran; r; r = r->next()) { - if ((r->lower() <= symbol) && (symbol < r->upper())) { - return state->ran.out; - } - } - return NULL; + if (state->type != nfa_state_t::RAN) { + return NULL; + } + for (const Range *r = state->ran.ran; r; r = r->next()) { + if ((r->lower() <= symbol) && (symbol < r->upper())) { + return state->ran.out; + } + } + return NULL; } void reach_on_symbol(determ_context_t &ctx) { - const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin]; - closure_t &reached = ctx.dc_reached; - const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol]; - - reached.clear(); - for (uint32_t i = 0; i < kernel->size; ++i) { - nfa_state_t *s = transition(kernel->state[i], symbol); - if (s) { - clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT}; - reached.push_back(c); - } - } + const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin]; + closure_t &reached = ctx.dc_reached; + const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol]; + + reached.clear(); + for (uint32_t i = 0; i < kernel->size; ++i) { + nfa_state_t *s = transition(kernel->state[i], symbol); + if (s) { + clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT}; + reached.push_back(c); + } + } } static uint32_t init_tag_versions(determ_context_t &ctx) { - dfa_t &dfa = ctx.dc_dfa; - const size_t ntags = dfa.tags.size(); - - // all-zero tag configuration must have static number zero - assert(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO)); - - // initial tag versions: [1 .. N] - const uint32_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1); - - // other versions: [ .. -(N + 1)] and [N + 1 .. ] - dfa.maxtagver = static_cast(ntags); - - // final/fallback versions will be assigned on the go - dfa.finvers = new tagver_t[ntags]; - for (size_t i = 0; i < ntags; ++i) { - dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver; - } - - // mark tags with history (initial and final) - for (size_t i = 0; i < ntags; ++i) { - if (history(dfa.tags[i])) { - tagver_t v = static_cast(i) + 1, f = dfa.finvers[i]; - if (f != TAGVER_ZERO) { - dfa.mtagvers.insert(f); - } - dfa.mtagvers.insert(v); - } - } - - return INITIAL_TAGS; + dfa_t &dfa = ctx.dc_dfa; + const size_t ntags = dfa.tags.size(); + + // all-zero tag configuration must have static number zero + assert(ZERO_TAGS == ctx.dc_tagvertbl.insert_const(TAGVER_ZERO)); + + // initial tag versions: [1 .. N] + const uint32_t INITIAL_TAGS = ctx.dc_tagvertbl.insert_succ(1); + + // other versions: [ .. -(N + 1)] and [N + 1 .. ] + dfa.maxtagver = static_cast(ntags); + + // final/fallback versions will be assigned on the go + dfa.finvers = new tagver_t[ntags]; + for (size_t i = 0; i < ntags; ++i) { + dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver; + } + + // mark tags with history (initial and final) + for (size_t i = 0; i < ntags; ++i) { + if (history(dfa.tags[i])) { + tagver_t v = static_cast(i) + 1, f = dfa.finvers[i]; + if (f != TAGVER_ZERO) { + dfa.mtagvers.insert(f); + } + dfa.mtagvers.insert(v); + } + } + + return INITIAL_TAGS; } dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn) - : states() - , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges - , charset(nfa.charset) - , rules(nfa.rules) - , tags(nfa.tags) - , mtagvers(*new std::set) - , finvers(NULL) - , tcpool(*new tcpool_t) - , maxtagver(0) - , tcmd0(NULL) - , tcid0(TCID0) + : states() + , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges + , charset(nfa.charset) + , rules(nfa.rules) + , tags(nfa.tags) + , mtagvers(*new std::set) + , finvers(NULL) + , tcpool(*new tcpool_t) + , maxtagver(0) + , tcmd0(NULL) + , tcid0(TCID0) { - determ_context_t ctx(opts, warn, cond, nfa, *this); + determ_context_t ctx(opts, warn, cond, nfa, *this); - const uint32_t INITIAL_TAGS = init_tag_versions(ctx); + const uint32_t INITIAL_TAGS = init_tag_versions(ctx); - // initial state - const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; - ctx.dc_reached.push_back(c0); - tagged_epsilon_closure(ctx); - find_state(ctx); + // initial state + const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; + ctx.dc_reached.push_back(c0); + tagged_epsilon_closure(ctx); + find_state(ctx); - // iterate while new kernels are added: for each alphabet symbol, - // build tagged epsilon-closure of all reachable NFA states, - // then find identical or mappable DFA state or add a new one - for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { + // iterate while new kernels are added: for each alphabet symbol, + // build tagged epsilon-closure of all reachable NFA states, + // then find identical or mappable DFA state or add a new one + for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { - ctx.dc_origin = i; - ctx.dc_newvers.clear(); + ctx.dc_origin = i; + ctx.dc_newvers.clear(); - for (uint32_t c = 0; c < nchars; ++c) { - ctx.dc_symbol = c; + for (uint32_t c = 0; c < nchars; ++c) { + ctx.dc_symbol = c; - reach_on_symbol(ctx); - tagged_epsilon_closure(ctx); - find_state(ctx); - } - } + reach_on_symbol(ctx); + tagged_epsilon_closure(ctx); + find_state(ctx); + } + } - warn_nondeterministic_tags(ctx); + warn_nondeterministic_tags(ctx); } @@ -147,103 +147,103 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn // WARNING: this function assumes that kernel items are grouped by rule void warn_nondeterministic_tags(const determ_context_t &ctx) { - if (ctx.dc_opts->posix_captures) return; - - Warn &warn = ctx.dc_warn; - const kernels_t &kernels = ctx.dc_kernels; - const std::vector &tags = ctx.dc_dfa.tags; - const std::valarray &rules = ctx.dc_dfa.rules; - - const size_t - ntag = tags.size(), - nkrn = kernels.size(), - nrule = rules.size(); - std::vector maxv(ntag, 0); - std::set uniq; - - for (uint32_t i = 0; i < nkrn; ++i) { - const kernel_t *k = kernels[i]; - nfa_state_t **s = k->state; - const size_t n = k->size; - const uint32_t *v = k->tvers; - - for (size_t u = 0; u < n;) { - const size_t r = s[u]->rule; - const Rule &rule = rules[r]; - - const size_t l = u; - for (; ++u < n && s[u]->rule == r;); - for (size_t t = rule.ltag; t < rule.htag; ++t) { - uniq.clear(); - for (size_t m = l; m < u; ++m) { - uniq.insert(ctx.dc_tagvertbl[v[m]][t]); - } - maxv[t] = std::max(maxv[t], uniq.size()); - } - } - } - - for (uint32_t r = 0; r < nrule; ++r) { - const Rule &rule = rules[r]; - for (size_t t = rule.ltag; t < rule.htag; ++t) { - const size_t m = maxv[t]; - if (m > 1) { - const uint32_t line = rule.code->fline; - warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m); - } - } - } + if (ctx.dc_opts->posix_captures) return; + + Warn &warn = ctx.dc_warn; + const kernels_t &kernels = ctx.dc_kernels; + const std::vector &tags = ctx.dc_dfa.tags; + const std::valarray &rules = ctx.dc_dfa.rules; + + const size_t + ntag = tags.size(), + nkrn = kernels.size(), + nrule = rules.size(); + std::vector maxv(ntag, 0); + std::set uniq; + + for (uint32_t i = 0; i < nkrn; ++i) { + const kernel_t *k = kernels[i]; + nfa_state_t **s = k->state; + const size_t n = k->size; + const uint32_t *v = k->tvers; + + for (size_t u = 0; u < n;) { + const size_t r = s[u]->rule; + const Rule &rule = rules[r]; + + const size_t l = u; + for (; ++u < n && s[u]->rule == r;); + for (size_t t = rule.ltag; t < rule.htag; ++t) { + uniq.clear(); + for (size_t m = l; m < u; ++m) { + uniq.insert(ctx.dc_tagvertbl[v[m]][t]); + } + maxv[t] = std::max(maxv[t], uniq.size()); + } + } + } + + for (uint32_t r = 0; r < nrule; ++r) { + const Rule &rule = rules[r]; + for (size_t t = rule.ltag; t < rule.htag; ++t) { + const size_t m = maxv[t]; + if (m > 1) { + const uint32_t line = rule.code->fline; + warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m); + } + } + } } determ_context_t::determ_context_t(const opt_t *opts, Warn &warn - , const std::string &condname, const nfa_t &nfa, dfa_t &dfa) - : dc_opts(opts) - , dc_warn(warn) - , dc_condname(condname) - , dc_nfa(nfa) - , dc_dfa(dfa) - , dc_allocator() - , dc_origin(dfa_t::NIL) - , dc_target(dfa_t::NIL) - , dc_symbol(0) - , dc_actions(NULL) - , dc_reached() - , dc_closure() - , dc_prectbl(NULL) - , dc_tagvertbl(nfa.tags.size()) - , dc_taghistory() - , dc_kernels() - , dc_buffers(dc_allocator) - , dc_newvers(newver_cmp_t(dc_taghistory)) - , dc_stack_topsort() - , dc_stack_linear() - , dc_stack_dfs() - , dc_dump(opts) + , const std::string &condname, const nfa_t &nfa, dfa_t &dfa) + : dc_opts(opts) + , dc_warn(warn) + , dc_condname(condname) + , dc_nfa(nfa) + , dc_dfa(dfa) + , dc_allocator() + , dc_origin(dfa_t::NIL) + , dc_target(dfa_t::NIL) + , dc_symbol(0) + , dc_actions(NULL) + , dc_reached() + , dc_closure() + , dc_prectbl(NULL) + , dc_tagvertbl(nfa.tags.size()) + , dc_taghistory() + , dc_kernels() + , dc_buffers(dc_allocator) + , dc_newvers(newver_cmp_t(dc_taghistory)) + , dc_stack_topsort() + , dc_stack_linear() + , dc_stack_dfs() + , dc_dump(opts) {} dfa_t::~dfa_t() { - std::vector::iterator - i = states.begin(), - e = states.end(); - for (; i != e; ++i) - { - delete *i; - } + std::vector::iterator + i = states.begin(), + e = states.end(); + for (; i != e; ++i) + { + delete *i; + } } bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const { - if (x.tag < y.tag) return true; - if (x.tag > y.tag) return false; + if (x.tag < y.tag) return true; + if (x.tag > y.tag) return false; - if (x.base < y.base) return true; - if (x.base > y.base) return false; + if (x.base < y.base) return true; + if (x.base > y.base) return false; - return history.compare_reversed(x.history, y.history, x.tag) < 0; + return history.compare_reversed(x.history, y.history, x.tag) < 0; } } // namespace re2c diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 35a88d46..9d9a45b3 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -32,14 +32,14 @@ typedef slab_allocator_t<> allocator_t; struct clos_t { - nfa_state_t *state; - uint32_t origin; - uint32_t tvers; // vector of tag versions (including lookahead tags) - hidx_t ttran; // history of transition tags - hidx_t tlook; // history of lookahead tags - - static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; } - static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; } + nfa_state_t *state; + uint32_t origin; + uint32_t tvers; // vector of tag versions (including lookahead tags) + hidx_t ttran; // history of transition tags + hidx_t tlook; // history of lookahead tags + + static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; } + static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; } }; @@ -52,18 +52,18 @@ typedef closure_t::const_reverse_iterator rcclositer_t; struct newver_t { - size_t tag; - tagver_t base; - hidx_t history; + size_t tag; + tagver_t base; + hidx_t history; }; struct newver_cmp_t { - tag_history_t &history; + tag_history_t &history; - explicit newver_cmp_t(tag_history_t &h) : history(h) {} - bool operator()(const newver_t &, const newver_t &) const; + explicit newver_cmp_t(tag_history_t &h) : history(h) {} + bool operator()(const newver_t &, const newver_t &) const; }; @@ -72,30 +72,30 @@ typedef std::map newvers_t; struct kernel_t { - size_t size; - const prectable_t *prectbl; - nfa_state_t **state; - uint32_t *tvers; // tag versions - hidx_t *tlook; // lookahead tags + size_t size; + const prectable_t *prectbl; + nfa_state_t **state; + uint32_t *tvers; // tag versions + hidx_t *tlook; // lookahead tags - FORBID_COPY(kernel_t); + FORBID_COPY(kernel_t); }; struct kernel_buffers_t { - size_t maxsize; - kernel_t *kernel; - tagver_t cap; // capacity (greater or equal to max) - tagver_t max; // maximal tag version - char *memory; - tagver_t *x2y; - tagver_t *y2x; - size_t *x2t; - uint32_t *indegree; - tcmd_t *backup_actions; - - explicit kernel_buffers_t(allocator_t &alc); + size_t maxsize; + kernel_t *kernel; + tagver_t cap; // capacity (greater or equal to max) + tagver_t max; // maximal tag version + char *memory; + tagver_t *x2y; + tagver_t *y2x; + size_t *x2t; + uint32_t *indegree; + tcmd_t *backup_actions; + + explicit kernel_buffers_t(allocator_t &alc); }; @@ -104,36 +104,36 @@ typedef lookup_t kernels_t; struct determ_context_t { - // determinization input - const opt_t *dc_opts; // options - Warn &dc_warn; // warnings - const std::string &dc_condname; // the name of current condition (with -c) - const nfa_t &dc_nfa; // TNFA - - // determinization output - dfa_t &dc_dfa; // resulting TDFA - - // temporary structures used by determinization - allocator_t dc_allocator; - uint32_t dc_origin; // from-state of the current transition - uint32_t dc_target; // to-state of the current transition - uint32_t dc_symbol; // alphabet symbol of the current transition - tcmd_t *dc_actions; // tag actions of the current transition - closure_t dc_reached; - closure_t dc_closure; - prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation - tagver_table_t dc_tagvertbl; - tag_history_t dc_taghistory; // prefix trie of tag histories - kernels_t dc_kernels; // TDFA states under construction - kernel_buffers_t dc_buffers; - newvers_t dc_newvers; - std::stack dc_stack_topsort; - std::stack dc_stack_linear; - std::stack dc_stack_dfs; - dump_dfa_t dc_dump; - - determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &); - FORBID_COPY(determ_context_t); + // determinization input + const opt_t *dc_opts; // options + Warn &dc_warn; // warnings + const std::string &dc_condname; // the name of current condition (with -c) + const nfa_t &dc_nfa; // TNFA + + // determinization output + dfa_t &dc_dfa; // resulting TDFA + + // temporary structures used by determinization + allocator_t dc_allocator; + uint32_t dc_origin; // from-state of the current transition + uint32_t dc_target; // to-state of the current transition + uint32_t dc_symbol; // alphabet symbol of the current transition + tcmd_t *dc_actions; // tag actions of the current transition + closure_t dc_reached; + closure_t dc_closure; + prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation + tagver_table_t dc_tagvertbl; + tag_history_t dc_taghistory; // prefix trie of tag histories + kernels_t dc_kernels; // TDFA states under construction + kernel_buffers_t dc_buffers; + newvers_t dc_newvers; + std::stack dc_stack_topsort; + std::stack dc_stack_linear; + std::stack dc_stack_dfs; + dump_dfa_t dc_dump; + + determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &); + FORBID_COPY(determ_context_t); }; diff --git a/re2c/src/dfa/dfa.h b/re2c/src/dfa/dfa.h index ce27789a..a52536dc 100644 --- a/re2c/src/dfa/dfa.h +++ b/re2c/src/dfa/dfa.h @@ -20,57 +20,57 @@ struct opt_t; struct dfa_state_t { - size_t *arcs; - tcmd_t **tcmd; - tcid_t *tcid; - size_t rule; - bool fallthru; - bool fallback; + size_t *arcs; + tcmd_t **tcmd; + tcid_t *tcid; + size_t rule; + bool fallthru; + bool fallback; - explicit dfa_state_t(size_t nchars) - : arcs(new size_t[nchars]) - , tcmd(new tcmd_t*[nchars + 2]()) // +2 for final and fallback epsilon-transitions - , tcid(NULL) - , rule(Rule::NONE) - , fallthru(false) - , fallback(false) - {} - ~dfa_state_t() - { - delete[] arcs; - delete[] tcmd; - delete[] tcid; - } - FORBID_COPY(dfa_state_t); + explicit dfa_state_t(size_t nchars) + : arcs(new size_t[nchars]) + , tcmd(new tcmd_t*[nchars + 2]()) // +2 for final and fallback epsilon-transitions + , tcid(NULL) + , rule(Rule::NONE) + , fallthru(false) + , fallback(false) + {} + ~dfa_state_t() + { + delete[] arcs; + delete[] tcmd; + delete[] tcid; + } + FORBID_COPY(dfa_state_t); }; struct dfa_t { - static const uint32_t NIL; + static const uint32_t NIL; - std::vector states; - const size_t nchars; - std::vector &charset; - std::valarray &rules; - std::vector &tags; - std::set &mtagvers; - tagver_t *finvers; - tcpool_t &tcpool; - tagver_t maxtagver; - tcmd_t *tcmd0; - tcid_t tcid0; + std::vector states; + const size_t nchars; + std::vector &charset; + std::valarray &rules; + std::vector &tags; + std::set &mtagvers; + tagver_t *finvers; + tcpool_t &tcpool; + tagver_t maxtagver; + tcmd_t *tcmd0; + tcid_t tcid0; - dfa_t(const nfa_t &nfa, const opt_t *opts, - const std::string &cond, Warn &warn); - ~dfa_t(); + dfa_t(const nfa_t &nfa, const opt_t *opts, + const std::string &cond, Warn &warn); + ~dfa_t(); - FORBID_COPY(dfa_t); + FORBID_COPY(dfa_t); }; enum dfa_minimization_t { - DFA_MINIMIZATION_TABLE, - DFA_MINIMIZATION_MOORE + DFA_MINIMIZATION_TABLE, + DFA_MINIMIZATION_MOORE }; void minimization(dfa_t &dfa, dfa_minimization_t type); diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc index eaf96e9d..32a1f1a8 100644 --- a/re2c/src/dfa/dump.cc +++ b/re2c/src/dfa/dump.cc @@ -25,280 +25,280 @@ static void dump_tags(const tagver_table_t &, const tag_history_t &, hidx_t, uin dump_dfa_t::dump_dfa_t(const opt_t *opts) - : debug(opts->dump_dfa_raw) - , uniqidx(0) + : debug(opts->dump_dfa_raw) + , uniqidx(0) { - if (!debug) return; + if (!debug) return; - fprintf(stderr, "digraph DFA {\n" - " rankdir=LR\n" - " node[shape=plaintext fontname=Courier]\n" - " edge[arrowhead=vee fontname=Courier]\n\n"); + fprintf(stderr, "digraph DFA {\n" + " rankdir=LR\n" + " node[shape=plaintext fontname=Courier]\n" + " edge[arrowhead=vee fontname=Courier]\n\n"); } dump_dfa_t::~dump_dfa_t() { - if (!debug) return; + if (!debug) return; - fprintf(stderr, "}\n"); + fprintf(stderr, "}\n"); } static void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) { - if (i == HROOT) { - fprintf(stderr, " /"); - return; - } - - dump_history(dfa, h, h.pred(i)); - - const Tag &t = dfa.tags[h.tag(i)]; - const tagver_t v = h.elem(i); - if (capture(t)) { - fprintf(stderr, "%u", (uint32_t)t.ncap); - } else if (!trailing(t)) { - fprintf(stderr, "%s", t.name->c_str()); - } - fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑"); - fprintf(stderr, " "); + if (i == HROOT) { + fprintf(stderr, " /"); + return; + } + + dump_history(dfa, h, h.pred(i)); + + const Tag &t = dfa.tags[h.tag(i)]; + const tagver_t v = h.elem(i); + if (capture(t)) { + fprintf(stderr, "%u", (uint32_t)t.ncap); + } else if (!trailing(t)) { + fprintf(stderr, "%s", t.name->c_str()); + } + fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑"); + fprintf(stderr, " "); } void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) { - if (!debug) return; - - const closure_t &closure = ctx.dc_closure; - cclositer_t b = closure.begin(), e = closure.end(), c; - const uint32_t origin = ctx.dc_origin; - const uint32_t target = ctx.dc_target; - const uint32_t symbol = ctx.dc_symbol; - const dfa_t &dfa = ctx.dc_dfa; - const tagver_table_t &tvtbl = ctx.dc_tagvertbl; - const tag_history_t &thist = ctx.dc_taghistory; - uint32_t i; - - if (target == dfa_t::NIL) return; - - const uint32_t state = isnew ? target : ++uniqidx; - const char *prefix = isnew ? "" : "i"; - const char *style = isnew ? "" : " STYLE=\"dotted\""; - - // closure - fprintf(stderr, " %s%u [label=<", prefix, state); - i = 0; - for (c = b; c != e; ++c, ++i) { - fprintf(stderr, "%u", - i, style, static_cast(c->state - ctx.dc_nfa.states)); - - if (c->tvers != ZERO_TAGS) { - const tagver_t *vers = tvtbl[c->tvers]; - const size_t ntag = dfa.tags.size(); - - for (size_t t = 0; t < ntag; ++t) { - fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t])); - } - - if (c->tlook != HROOT) { - dump_history(dfa, thist, c->tlook); - } - } - - fprintf(stderr, ""); - } - fprintf(stderr, ">]\n"); - - // transitions (initial state) - if (origin == dfa_t::NIL) { - fprintf(stderr, " void [shape=point]\n"); - - uint32_t i = 0; - for (c = b; c != e; ++c, ++i) { - fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i); - dump_tags(tvtbl, thist, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); - } - } - - // transitions (other states) - else { - if (!isnew) { - fprintf(stderr, - " i%u [style=dotted]\n" - " i%u:s -> %u:s [style=dotted label=\"", - state, state, origin); - dump_tcmd(dfa.states[origin]->tcmd[symbol]); - fprintf(stderr, "\"]\n"); - } - - uint32_t i = 0; - for (c = b; c != e; ++c, ++i) { - fprintf(stderr, - " %u:%u:e -> %s%u:%u:w [label=\"%u", - origin, c->origin, prefix, state, i, symbol); - dump_tags(tvtbl, thist, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); - } - } - - // if final state, dump finalizer - const dfa_state_t *t = dfa.states[target]; - if (t->rule != Rule::NONE) { - const Rule &r = dfa.rules[t->rule]; - const tcmd_t *cmd = t->tcmd[dfa.nchars]; - - // see note [at most one final item per closure] - c = std::find_if(b, e, clos_t::fin); - assert(c != e); - - fprintf(stderr, " r%u [shape=none label=\"(", state); - for (size_t t = r.ltag; t < r.htag; ++t) { - if (t > r.ltag) fprintf(stderr, " "); - fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t])); - } - fprintf(stderr, ")\"]\n"); - - fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"", - state, c->origin, state); - dump_tcmd(cmd); - fprintf(stderr, "\"]\n"); - } + if (!debug) return; + + const closure_t &closure = ctx.dc_closure; + cclositer_t b = closure.begin(), e = closure.end(), c; + const uint32_t origin = ctx.dc_origin; + const uint32_t target = ctx.dc_target; + const uint32_t symbol = ctx.dc_symbol; + const dfa_t &dfa = ctx.dc_dfa; + const tagver_table_t &tvtbl = ctx.dc_tagvertbl; + const tag_history_t &thist = ctx.dc_taghistory; + uint32_t i; + + if (target == dfa_t::NIL) return; + + const uint32_t state = isnew ? target : ++uniqidx; + const char *prefix = isnew ? "" : "i"; + const char *style = isnew ? "" : " STYLE=\"dotted\""; + + // closure + fprintf(stderr, " %s%u [label=<", prefix, state); + i = 0; + for (c = b; c != e; ++c, ++i) { + fprintf(stderr, "%u", + i, style, static_cast(c->state - ctx.dc_nfa.states)); + + if (c->tvers != ZERO_TAGS) { + const tagver_t *vers = tvtbl[c->tvers]; + const size_t ntag = dfa.tags.size(); + + for (size_t t = 0; t < ntag; ++t) { + fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t])); + } + + if (c->tlook != HROOT) { + dump_history(dfa, thist, c->tlook); + } + } + + fprintf(stderr, ""); + } + fprintf(stderr, ">]\n"); + + // transitions (initial state) + if (origin == dfa_t::NIL) { + fprintf(stderr, " void [shape=point]\n"); + + uint32_t i = 0; + for (c = b; c != e; ++c, ++i) { + fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i); + dump_tags(tvtbl, thist, c->ttran, c->tvers); + fprintf(stderr, "\"]\n"); + } + } + + // transitions (other states) + else { + if (!isnew) { + fprintf(stderr, + " i%u [style=dotted]\n" + " i%u:s -> %u:s [style=dotted label=\"", + state, state, origin); + dump_tcmd(dfa.states[origin]->tcmd[symbol]); + fprintf(stderr, "\"]\n"); + } + + uint32_t i = 0; + for (c = b; c != e; ++c, ++i) { + fprintf(stderr, + " %u:%u:e -> %s%u:%u:w [label=\"%u", + origin, c->origin, prefix, state, i, symbol); + dump_tags(tvtbl, thist, c->ttran, c->tvers); + fprintf(stderr, "\"]\n"); + } + } + + // if final state, dump finalizer + const dfa_state_t *t = dfa.states[target]; + if (t->rule != Rule::NONE) { + const Rule &r = dfa.rules[t->rule]; + const tcmd_t *cmd = t->tcmd[dfa.nchars]; + + // see note [at most one final item per closure] + c = std::find_if(b, e, clos_t::fin); + assert(c != e); + + fprintf(stderr, " r%u [shape=none label=\"(", state); + for (size_t t = r.ltag; t < r.htag; ++t) { + if (t > r.ltag) fprintf(stderr, " "); + fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t])); + } + fprintf(stderr, ")\"]\n"); + + fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"", + state, c->origin, state); + dump_tcmd(cmd); + fprintf(stderr, "\"]\n"); + } } void dump_dfa(const dfa_t &dfa) { - const size_t - nstate = dfa.states.size(), - nsym = dfa.nchars; - - fprintf(stderr, - "digraph DFA {\n" - " rankdir=LR\n" - " node[shape=Mrecord fontname=Courier]\n" - " edge[arrowhead=vee fontname=Courier]\n\n"); - - // initializer - fprintf(stderr, - " n [shape=point]" - " n -> n0 [style=dotted label=\""); - dump_tcmd_or_tcid(dfa.tcmd0 ? &dfa.tcmd0 : NULL, &dfa.tcid0, 0, dfa.tcpool); - fprintf(stderr, "\"]\n"); - - for (uint32_t i = 0; i < nstate; ++i) { - const dfa_state_t *s = dfa.states[i]; - - // state - fprintf(stderr, " n%u [height=0.2 width=0.2 label=\"%u\"]\n", i, i); - - // finalizer - if (s->rule != Rule::NONE) { - const Rule &r = dfa.rules[s->rule]; - - fprintf(stderr, - "subgraph { rank=same" - " n%u [style=filled fillcolor=lightgray]" - " dr%u [shape=none label=\"", - i, i); - dump_tcmd_or_tcid(s->tcmd, s->tcid, nsym, dfa.tcpool); - - fprintf(stderr, "("); - for (size_t t = r.ltag; t < r.htag; ++t) { - if (t > r.ltag) fprintf(stderr, " "); - fprintf(stderr, "%d", dfa.finvers[t]); - } - fprintf(stderr, ")"); - - fprintf(stderr, "\"]" - " n%u:s -> dr%u:n [style=dotted minlen=0]}\n", - i, i); - } - - // transitions - for (uint32_t c = 0; c < nsym; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL) { - fprintf(stderr, " n%u -> n%u [label=\"%u", - i, static_cast(j), c); - dump_tcmd_or_tcid(s->tcmd, s->tcid, c, dfa.tcpool); - fprintf(stderr, "\"]\n"); - } - } - } - - fprintf(stderr, "}\n"); + const size_t + nstate = dfa.states.size(), + nsym = dfa.nchars; + + fprintf(stderr, + "digraph DFA {\n" + " rankdir=LR\n" + " node[shape=Mrecord fontname=Courier]\n" + " edge[arrowhead=vee fontname=Courier]\n\n"); + + // initializer + fprintf(stderr, + " n [shape=point]" + " n -> n0 [style=dotted label=\""); + dump_tcmd_or_tcid(dfa.tcmd0 ? &dfa.tcmd0 : NULL, &dfa.tcid0, 0, dfa.tcpool); + fprintf(stderr, "\"]\n"); + + for (uint32_t i = 0; i < nstate; ++i) { + const dfa_state_t *s = dfa.states[i]; + + // state + fprintf(stderr, " n%u [height=0.2 width=0.2 label=\"%u\"]\n", i, i); + + // finalizer + if (s->rule != Rule::NONE) { + const Rule &r = dfa.rules[s->rule]; + + fprintf(stderr, + "subgraph { rank=same" + " n%u [style=filled fillcolor=lightgray]" + " dr%u [shape=none label=\"", + i, i); + dump_tcmd_or_tcid(s->tcmd, s->tcid, nsym, dfa.tcpool); + + fprintf(stderr, "("); + for (size_t t = r.ltag; t < r.htag; ++t) { + if (t > r.ltag) fprintf(stderr, " "); + fprintf(stderr, "%d", dfa.finvers[t]); + } + fprintf(stderr, ")"); + + fprintf(stderr, "\"]" + " n%u:s -> dr%u:n [style=dotted minlen=0]}\n", + i, i); + } + + // transitions + for (uint32_t c = 0; c < nsym; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL) { + fprintf(stderr, " n%u -> n%u [label=\"%u", + i, static_cast(j), c); + dump_tcmd_or_tcid(s->tcmd, s->tcid, c, dfa.tcpool); + fprintf(stderr, "\"]\n"); + } + } + } + + fprintf(stderr, "}\n"); } void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, - size_t sym, const tcpool_t &tcpool) + size_t sym, const tcpool_t &tcpool) { - const tcmd_t *cmd = tcmd ? tcmd[sym] : tcpool[tcid[sym]]; - dump_tcmd(cmd); + const tcmd_t *cmd = tcmd ? tcmd[sym] : tcpool[tcid[sym]]; + dump_tcmd(cmd); } void dump_tcmd(const tcmd_t *p) { - if (!p) return; - - fprintf(stderr, "/"); - for (; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - if (tcmd_t::iscopy(p)) { - fprintf(stderr, "%d=%d ", l, r); - } else { - fprintf(stderr, "%d", l); - if (r != TAGVER_ZERO) { - fprintf(stderr, "=%d", r); - } - for (; *h != TAGVER_ZERO; ++h) { - fprintf(stderr, "%s", *h == TAGVER_BOTTOM ? "↓" : "↑"); - } - fprintf(stderr, " "); - } - } + if (!p) return; + + fprintf(stderr, "/"); + for (; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + if (tcmd_t::iscopy(p)) { + fprintf(stderr, "%d=%d ", l, r); + } else { + fprintf(stderr, "%d", l); + if (r != TAGVER_ZERO) { + fprintf(stderr, "=%d", r); + } + for (; *h != TAGVER_ZERO; ++h) { + fprintf(stderr, "%s", *h == TAGVER_BOTTOM ? "↓" : "↑"); + } + fprintf(stderr, " "); + } + } } const char *tagname(const Tag &t) { - return t.name ? t.name->c_str() : ""; + return t.name ? t.name->c_str() : ""; } void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory, - hidx_t ttran, uint32_t tvers) + hidx_t ttran, uint32_t tvers) { - if (ttran == HROOT) return; - - fprintf(stderr, "/"); - const tagver_t *vers = tagvertbl[tvers]; - for (size_t i = 0; i < tagvertbl.ntags; ++i) { - - if (taghistory.last(ttran, i) == TAGVER_ZERO) { - continue; - } - - fprintf(stderr, "%d", abs(vers[i])); - for (hidx_t t = ttran; t != HROOT; t = taghistory.pred(t)) { - if (taghistory.tag(t) != i) { - continue; - } - else if (taghistory.elem(t) < TAGVER_ZERO) { - fprintf(stderr, "↓"); - } - else if (t > TAGVER_ZERO) { - fprintf(stderr, "↑"); - } - } - fprintf(stderr, " "); - } + if (ttran == HROOT) return; + + fprintf(stderr, "/"); + const tagver_t *vers = tagvertbl[tvers]; + for (size_t i = 0; i < tagvertbl.ntags; ++i) { + + if (taghistory.last(ttran, i) == TAGVER_ZERO) { + continue; + } + + fprintf(stderr, "%d", abs(vers[i])); + for (hidx_t t = ttran; t != HROOT; t = taghistory.pred(t)) { + if (taghistory.tag(t) != i) { + continue; + } + else if (taghistory.elem(t) < TAGVER_ZERO) { + fprintf(stderr, "↓"); + } + else if (t > TAGVER_ZERO) { + fprintf(stderr, "↑"); + } + } + fprintf(stderr, " "); + } } } // namespace re2c diff --git a/re2c/src/dfa/dump.h b/re2c/src/dfa/dump.h index 52a41d9e..f37c0759 100644 --- a/re2c/src/dfa/dump.h +++ b/re2c/src/dfa/dump.h @@ -14,12 +14,12 @@ struct tcmd_t; struct dump_dfa_t { - const bool debug; - uint32_t uniqidx; + const bool debug; + uint32_t uniqidx; - explicit dump_dfa_t(const opt_t *); - ~dump_dfa_t(); - void state(const determ_context_t &, bool); + explicit dump_dfa_t(const opt_t *); + ~dump_dfa_t(); + void state(const determ_context_t &, bool); }; void dump_dfa(const dfa_t &dfa); diff --git a/re2c/src/dfa/fallback_tags.cc b/re2c/src/dfa/fallback_tags.cc index 06353320..5556985a 100644 --- a/re2c/src/dfa/fallback_tags.cc +++ b/re2c/src/dfa/fallback_tags.cc @@ -39,22 +39,22 @@ static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bo void find_overwritten_tags(const dfa_t &dfa, size_t state, - bool *been, bool *owrt) + bool *been, bool *owrt) { - if (been[state]) return; - been[state] = true; - - const dfa_state_t *s = dfa.states[state]; - for (size_t c = 0; c < dfa.nchars; ++c) { - for (const tcmd_t *p = s->tcmd[c]; p; p = p->next) { - owrt[p->lhs] = true; - } - - size_t dest = s->arcs[c]; - if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) { - find_overwritten_tags(dfa, dest, been, owrt); - } - } + if (been[state]) return; + been[state] = true; + + const dfa_state_t *s = dfa.states[state]; + for (size_t c = 0; c < dfa.nchars; ++c) { + for (const tcmd_t *p = s->tcmd[c]; p; p = p->next) { + owrt[p->lhs] = true; + } + + size_t dest = s->arcs[c]; + if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) { + find_overwritten_tags(dfa, dest, been, owrt); + } + } } @@ -62,13 +62,13 @@ void find_overwritten_tags(const dfa_t &dfa, size_t state, // ('copy' commands must go first, before potential overwrites) static void backup(dfa_t &dfa, dfa_state_t *s, tagver_t l, tagver_t r) { - for (size_t c = 0; c < dfa.nchars; ++c) { - size_t i = s->arcs[c]; - if (i != dfa_t::NIL && dfa.states[i]->fallthru) { - tcmd_t *&p = s->tcmd[c]; - p = dfa.tcpool.make_copy(p, l, r); - } - } + for (size_t c = 0; c < dfa.nchars; ++c) { + size_t i = s->arcs[c]; + if (i != dfa_t::NIL && dfa.states[i]->fallthru) { + tcmd_t *&p = s->tcmd[c]; + p = dfa.tcpool.make_copy(p, l, r); + } + } } @@ -77,60 +77,60 @@ static void backup(dfa_t &dfa, dfa_state_t *s, tagver_t l, tagver_t r) // note [fallback states] void insert_fallback_tags(dfa_t &dfa) { - tcpool_t &pool = dfa.tcpool; - const size_t - nstates = dfa.states.size(), - nsym = dfa.nchars, - nver = static_cast(dfa.maxtagver) + 1; - bool *been = new bool[nstates]; - bool *owrt = new bool[nver]; - - for (size_t i = 0; i < nstates; ++i) { - dfa_state_t *s = dfa.states[i]; - if (!s->fallback) continue; - - std::fill(been, been + nstates, false); - std::fill(owrt, owrt + nver, false); - find_overwritten_tags(dfa, i, been, owrt); - - tcmd_t *p = s->tcmd[nsym], - *save = NULL, **ps = &save, - **pc = &s->tcmd[nsym + 1]; - for (; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - - // 'copy' commands - if (tcmd_t::iscopy(p)) { - if (!owrt[r]) { - *pc = pool.make_copy(NULL, l, r); - pc = &(*pc)->next; - } else { - backup(dfa, s, l, r); - } - - // 'save without history' commands - } else if (tcmd_t::isset(p)) { - *ps = pool.make_set(*ps, l, h[0]); - ps = &(*ps)->next; - - // 'save with history' commands - } else { - if (!owrt[r]) { - *ps = pool.copy_add(NULL, l, r, h); - } else { - *ps = pool.copy_add(NULL, l, l, h); - backup(dfa, s, l, r); - } - ps = &(*ps)->next; - } - } - - // join 'copy' (fallback) and 'save' commands - *pc = save; - } - - delete[] been; - delete[] owrt; + tcpool_t &pool = dfa.tcpool; + const size_t + nstates = dfa.states.size(), + nsym = dfa.nchars, + nver = static_cast(dfa.maxtagver) + 1; + bool *been = new bool[nstates]; + bool *owrt = new bool[nver]; + + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + if (!s->fallback) continue; + + std::fill(been, been + nstates, false); + std::fill(owrt, owrt + nver, false); + find_overwritten_tags(dfa, i, been, owrt); + + tcmd_t *p = s->tcmd[nsym], + *save = NULL, **ps = &save, + **pc = &s->tcmd[nsym + 1]; + for (; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + + // 'copy' commands + if (tcmd_t::iscopy(p)) { + if (!owrt[r]) { + *pc = pool.make_copy(NULL, l, r); + pc = &(*pc)->next; + } else { + backup(dfa, s, l, r); + } + + // 'save without history' commands + } else if (tcmd_t::isset(p)) { + *ps = pool.make_set(*ps, l, h[0]); + ps = &(*ps)->next; + + // 'save with history' commands + } else { + if (!owrt[r]) { + *ps = pool.copy_add(NULL, l, r, h); + } else { + *ps = pool.copy_add(NULL, l, l, h); + backup(dfa, s, l, r); + } + ps = &(*ps)->next; + } + } + + // join 'copy' (fallback) and 'save' commands + *pc = save; + } + + delete[] been; + delete[] owrt; } } // namespace re2c diff --git a/re2c/src/dfa/fillpoints.cc b/re2c/src/dfa/fillpoints.cc index c43af1a9..419ad38d 100644 --- a/re2c/src/dfa/fillpoints.cc +++ b/re2c/src/dfa/fillpoints.cc @@ -40,122 +40,122 @@ static const size_t SCC_UND = SCC_INF - 1; static bool loopback(size_t node, size_t narcs, const size_t *arcs) { - for (size_t i = 0; i < narcs; ++i) - { - if (arcs[i] == node) - { - return true; - } - } - return false; + for (size_t i = 0; i < narcs; ++i) + { + if (arcs[i] == node) + { + return true; + } + } + return false; } static void scc( - const dfa_t &dfa, - std::stack &stack, - std::vector &lowlink, - std::vector &trivial, - size_t i) + const dfa_t &dfa, + std::stack &stack, + std::vector &lowlink, + std::vector &trivial, + size_t i) { - const size_t link = stack.size(); - lowlink[i] = link; - stack.push(i); - - const size_t *arcs = dfa.states[i]->arcs; - for (size_t c = 0; c < dfa.nchars; ++c) - { - const size_t j = arcs[c]; - if (j != dfa_t::NIL) - { - if (lowlink[j] == SCC_UND) - { - scc(dfa, stack, lowlink, trivial, j); - } - if (lowlink[j] < lowlink[i]) - { - lowlink[i] = lowlink[j]; - } - } - } - - if (lowlink[i] == link) - { - // SCC is non-trivial (has loops) iff it either: - // - consists of multiple nodes (they all must be interconnected) - // - consists of single node which loops back to itself - trivial[i] = i == stack.top() - && !loopback(i, dfa.nchars, arcs); - - size_t j; - do - { - j = stack.top(); - stack.pop(); - lowlink[j] = SCC_INF; - } - while (j != i); - } + const size_t link = stack.size(); + lowlink[i] = link; + stack.push(i); + + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + if (lowlink[j] == SCC_UND) + { + scc(dfa, stack, lowlink, trivial, j); + } + if (lowlink[j] < lowlink[i]) + { + lowlink[i] = lowlink[j]; + } + } + } + + if (lowlink[i] == link) + { + // SCC is non-trivial (has loops) iff it either: + // - consists of multiple nodes (they all must be interconnected) + // - consists of single node which loops back to itself + trivial[i] = i == stack.top() + && !loopback(i, dfa.nchars, arcs); + + size_t j; + do + { + j = stack.top(); + stack.pop(); + lowlink[j] = SCC_INF; + } + while (j != i); + } } static void calc_fill( - const dfa_t &dfa, - const std::vector &trivial, - std::vector &fill, - size_t i) + const dfa_t &dfa, + const std::vector &trivial, + std::vector &fill, + size_t i) { - if (fill[i] == SCC_UND) - { - fill[i] = 0; - const size_t *arcs = dfa.states[i]->arcs; - for (size_t c = 0; c < dfa.nchars; ++c) - { - const size_t j = arcs[c]; - if (j != dfa_t::NIL) - { - calc_fill(dfa, trivial, fill, j); - size_t max = 1; - if (trivial[j]) - { - max += fill[j]; - } - if (max > fill[i]) - { - fill[i] = max; - } - } - } - } + if (fill[i] == SCC_UND) + { + fill[i] = 0; + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + calc_fill(dfa, trivial, fill, j); + size_t max = 1; + if (trivial[j]) + { + max += fill[j]; + } + if (max > fill[i]) + { + fill[i] = max; + } + } + } + } } void fillpoints(const dfa_t &dfa, std::vector &fill) { - const size_t size = dfa.states.size(); - - // find DFA states that belong to non-trivial SCC - std::stack stack; - std::vector lowlink(size, SCC_UND); - std::vector trivial(size, false); - scc(dfa, stack, lowlink, trivial, 0); - - // for each DFA state, calculate YYFILL argument: - // maximal path length to the next YYFILL state - fill.resize(size, SCC_UND); - calc_fill(dfa, trivial, fill, 0); - - // The following states must trigger YYFILL: - // - inital state - // - all states in non-trivial SCCs - // for other states, reset YYFILL argument to zero - for (size_t i = 1; i < size; ++i) - { - if (trivial[i]) - { - fill[i] = 0; - } - } + const size_t size = dfa.states.size(); + + // find DFA states that belong to non-trivial SCC + std::stack stack; + std::vector lowlink(size, SCC_UND); + std::vector trivial(size, false); + scc(dfa, stack, lowlink, trivial, 0); + + // for each DFA state, calculate YYFILL argument: + // maximal path length to the next YYFILL state + fill.resize(size, SCC_UND); + calc_fill(dfa, trivial, fill, 0); + + // The following states must trigger YYFILL: + // - inital state + // - all states in non-trivial SCCs + // for other states, reset YYFILL argument to zero + for (size_t i = 1; i < size; ++i) + { + if (trivial[i]) + { + fill[i] = 0; + } + } } } // namespace re2c diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index 05a057b2..2b682ec3 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -79,15 +79,15 @@ namespace re2c struct kernel_eq_t { - const determ_context_t &ctx; - bool operator()(const kernel_t *, const kernel_t *) const; + const determ_context_t &ctx; + bool operator()(const kernel_t *, const kernel_t *) const; }; struct kernel_map_t { - determ_context_t &ctx; - bool operator()(const kernel_t *, const kernel_t *); + determ_context_t &ctx; + bool operator()(const kernel_t *, const kernel_t *); }; @@ -102,375 +102,375 @@ static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin); kernel_buffers_t::kernel_buffers_t(allocator_t &alc) - : maxsize(0) // usually ranges from one to some twenty - , kernel(make_new_kernel(maxsize, alc)) - , cap(0) - , max(0) - , memory(NULL) - , x2y(NULL) - , y2x(NULL) - , x2t(NULL) - , indegree(NULL) - , backup_actions(NULL) + : maxsize(0) // usually ranges from one to some twenty + , kernel(make_new_kernel(maxsize, alc)) + , cap(0) + , max(0) + , memory(NULL) + , x2y(NULL) + , y2x(NULL) + , x2t(NULL) + , indegree(NULL) + , backup_actions(NULL) {} kernel_t *make_new_kernel(size_t size, allocator_t &alc) { - kernel_t *k = alc.alloct(1); - k->size = size; - k->prectbl = NULL; - k->state = alc.alloct(size); - k->tvers = alc.alloct(size); - k->tlook = alc.alloct(size); - return k; + kernel_t *k = alc.alloct(1); + k->size = size; + k->prectbl = NULL; + k->state = alc.alloct(size); + k->tvers = alc.alloct(size); + k->tlook = alc.alloct(size); + return k; } kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc) { - const size_t n = kernel->size; + const size_t n = kernel->size; - kernel_t *k = make_new_kernel(n, alc); + kernel_t *k = make_new_kernel(n, alc); - memcpy(k->state, kernel->state, n * sizeof(void*)); - memcpy(k->tvers, kernel->tvers, n * sizeof(size_t)); - memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t)); + memcpy(k->state, kernel->state, n * sizeof(void*)); + memcpy(k->tvers, kernel->tvers, n * sizeof(size_t)); + memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t)); - prectable_t *ptbl = NULL; - if (kernel->prectbl) { - ptbl = alc.alloct(n * n); - memcpy(ptbl, kernel->prectbl, n * n * sizeof(prectable_t)); - } - k->prectbl = ptbl; + prectable_t *ptbl = NULL; + if (kernel->prectbl) { + ptbl = alc.alloct(n * n); + memcpy(ptbl, kernel->prectbl, n * n * sizeof(prectable_t)); + } + k->prectbl = ptbl; - return k; + return k; } void reserve_buffers(determ_context_t &ctx) { - kernel_buffers_t &kbufs = ctx.dc_buffers; - allocator_t &alc = ctx.dc_allocator; - const tagver_t maxver = ctx.dc_dfa.maxtagver; - const size_t nkern = ctx.dc_closure.size(); - - if (kbufs.maxsize < nkern) { - kbufs.maxsize = nkern * 2; // in advance - kbufs.kernel = make_new_kernel(kbufs.maxsize, alc); - } - - // +1 to ensure max tag version is not forgotten in loops - kbufs.max = maxver + 1; - if (kbufs.cap < kbufs.max) { - kbufs.cap = kbufs.max * 2; // in advance - - const size_t - n = static_cast(kbufs.cap), - m = 2 * n + 1, - sz_x2y = 2 * m * sizeof(tagver_t), - sz_x2t = m * sizeof(size_t), - sz_idg = n * sizeof(uint32_t), - sz_act = n * sizeof(tcmd_t); - - char *p = alc.alloct(sz_x2y + sz_x2t + sz_idg + sz_act); - kbufs.memory = p; - - // point to the center (zero index) of each buffer - // indexes in range [-N .. N] must be valid, where N is capacity - kbufs.x2y = reinterpret_cast(p) + n; - kbufs.y2x = kbufs.x2y + m; - p += sz_x2y; - kbufs.x2t = reinterpret_cast(p) + n; - p += sz_x2t; - kbufs.indegree = reinterpret_cast(p); - p += sz_idg; - kbufs.backup_actions = reinterpret_cast(p); - } + kernel_buffers_t &kbufs = ctx.dc_buffers; + allocator_t &alc = ctx.dc_allocator; + const tagver_t maxver = ctx.dc_dfa.maxtagver; + const size_t nkern = ctx.dc_closure.size(); + + if (kbufs.maxsize < nkern) { + kbufs.maxsize = nkern * 2; // in advance + kbufs.kernel = make_new_kernel(kbufs.maxsize, alc); + } + + // +1 to ensure max tag version is not forgotten in loops + kbufs.max = maxver + 1; + if (kbufs.cap < kbufs.max) { + kbufs.cap = kbufs.max * 2; // in advance + + const size_t + n = static_cast(kbufs.cap), + m = 2 * n + 1, + sz_x2y = 2 * m * sizeof(tagver_t), + sz_x2t = m * sizeof(size_t), + sz_idg = n * sizeof(uint32_t), + sz_act = n * sizeof(tcmd_t); + + char *p = alc.alloct(sz_x2y + sz_x2t + sz_idg + sz_act); + kbufs.memory = p; + + // point to the center (zero index) of each buffer + // indexes in range [-N .. N] must be valid, where N is capacity + kbufs.x2y = reinterpret_cast(p) + n; + kbufs.y2x = kbufs.x2y + m; + p += sz_x2y; + kbufs.x2t = reinterpret_cast(p) + n; + p += sz_x2t; + kbufs.indegree = reinterpret_cast(p); + p += sz_idg; + kbufs.backup_actions = reinterpret_cast(p); + } } uint32_t hash_kernel(const kernel_t *kernel) { - const size_t n = kernel->size; + const size_t n = kernel->size; - // seed - uint32_t h = static_cast(n); + // seed + uint32_t h = static_cast(n); - // TNFA states - h = hash32(h, kernel->state, n * sizeof(void*)); + // TNFA states + h = hash32(h, kernel->state, n * sizeof(void*)); - // precedence table - if (kernel->prectbl) { - h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t)); - } + // precedence table + if (kernel->prectbl) { + h = hash32(h, kernel->prectbl, n * n * sizeof(prectable_t)); + } - return h; + return h; } void copy_to_buffer_kernel(const closure_t &closure, - const prectable_t *prectbl, kernel_t *buffer) + const prectable_t *prectbl, kernel_t *buffer) { - const size_t n = closure.size(); + const size_t n = closure.size(); - buffer->size = n; + buffer->size = n; - buffer->prectbl = prectbl; + buffer->prectbl = prectbl; - for (size_t i = 0; i < n; ++i) { - const clos_t &c = closure[i]; - buffer->state[i] = c.state; - buffer->tvers[i] = c.tvers; - buffer->tlook[i] = c.tlook; - } + for (size_t i = 0; i < n; ++i) { + const clos_t &c = closure[i]; + buffer->state[i] = c.state; + buffer->tvers[i] = c.tvers; + buffer->tlook[i] = c.tlook; + } } bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, const determ_context_t &ctx) { - assert(x->size == y->size); - - if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) { - return true; - } - - const tag_history_t &thist = ctx.dc_taghistory; - const tagver_table_t &tvtbl = ctx.dc_tagvertbl; - const std::vector &tags = ctx.dc_dfa.tags; - - for (size_t i = 0; i < x->size; ++i) { - const hidx_t xl = x->tlook[i], yl = y->tlook[i]; - for (size_t t = 0; t < tvtbl.ntags; ++t) { - if (history(tags[t])) { - // compare full tag sequences - if (thist.compare_reversed(xl, yl, t) != 0) return false; - } else { - // compare only the last pair of tags - if (thist.last(xl, t) != thist.last(yl, t)) return false; - } - } - } - - return true; + assert(x->size == y->size); + + if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) { + return true; + } + + const tag_history_t &thist = ctx.dc_taghistory; + const tagver_table_t &tvtbl = ctx.dc_tagvertbl; + const std::vector &tags = ctx.dc_dfa.tags; + + for (size_t i = 0; i < x->size; ++i) { + const hidx_t xl = x->tlook[i], yl = y->tlook[i]; + for (size_t t = 0; t < tvtbl.ntags; ++t) { + if (history(tags[t])) { + // compare full tag sequences + if (thist.compare_reversed(xl, yl, t) != 0) return false; + } else { + // compare only the last pair of tags + if (thist.last(xl, t) != thist.last(yl, t)) return false; + } + } + } + + return true; } bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const { - // check that kernel sizes, NFA states, tags versions, - // lookahead tags and precedence table coincide - const size_t n = x->size; - return n == y->size - && memcmp(x->state, y->state, n * sizeof(void*)) == 0 - && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0 - && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) - && equal_lookahead_tags(x, y, ctx); + // check that kernel sizes, NFA states, tags versions, + // lookahead tags and precedence table coincide + const size_t n = x->size; + return n == y->size + && memcmp(x->state, y->state, n * sizeof(void*)) == 0 + && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0 + && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) + && equal_lookahead_tags(x, y, ctx); } bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) { - // check that kernel sizes, NFA states lookahead tags - // and precedence table coincide (versions might differ) - const size_t n = x->size; - const bool compatible = n == y->size - && memcmp(x->state, y->state, n * sizeof(void*)) == 0 - && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) - && equal_lookahead_tags(x, y, ctx); - if (!compatible) return false; - - const std::vector &tags = ctx.dc_dfa.tags; - const size_t ntag = tags.size(); - kernel_buffers_t &bufs = ctx.dc_buffers; - tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max; - size_t *x2t = bufs.x2t; - - // map tag versions of one kernel to that of another - // and check that lookahead versions (if any) coincide - std::fill(x2y - max, x2y + max, TAGVER_ZERO); - std::fill(y2x - max, y2x + max, TAGVER_ZERO); - for (size_t i = 0; i < n; ++i) { - const tagver_t - *xvs = ctx.dc_tagvertbl[x->tvers[i]], - *yvs = ctx.dc_tagvertbl[y->tvers[i]]; - const hidx_t xl = x->tlook[i]; - - for (size_t t = 0; t < ntag; ++t) { - // see note [mapping ignores items with lookahead tags] - if (ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO - && !history(tags[t])) continue; - - const tagver_t xv = xvs[t], yv = yvs[t]; - tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv]; - - if (yv0 == TAGVER_ZERO && xv0 == TAGVER_ZERO) { - xv0 = xv; - yv0 = yv; - x2t[xv] = t; - } else if (yv != yv0 || xv != xv0) { - return false; - } - } - } - - // we have bijective mapping; now try to create list of commands - tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL; - tcmd_t *b1 = bufs.backup_actions, *b2 = b1; - - // backup 'save' commands: if topsort finds cycles, this mapping - // will be rejected and we'll have to revert all changes - for (b2->next = a = *pacts; a; a = a->next) { - *++b2 = *a; - } - - // fix LHS of 'save' commands to reuse old version - // see note [save(X), copy(Y,X) optimization] - for (a = *pacts; a; a = a->next) { - const tagver_t - yv = a->lhs * (a->history[0] == TAGVER_BOTTOM ? -1 : 1), - xv = y2x[yv]; - a->lhs = abs(xv); - y2x[yv] = x2y[xv] = TAGVER_ZERO; - } - - // create 'copy' commands - for (tagver_t xv = -max; xv < max; ++xv) { - const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv); - if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) { - assert(axv != ayv); - copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv); - } - } - - // join 'copy' and 'save' commands - for (pa = © (a = *pa); pa = &a->next); - *pa = *pacts; - *pacts = copy; - - // see note [topological ordering of copy commands] - const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree); - - // in case of cycles restore 'save' commands and fail - if (nontrivial_cycles) { - for (*pacts = a = b1->next; a; a = a->next) { - *a = *++b1; - } - } - - return !nontrivial_cycles; + // check that kernel sizes, NFA states lookahead tags + // and precedence table coincide (versions might differ) + const size_t n = x->size; + const bool compatible = n == y->size + && memcmp(x->state, y->state, n * sizeof(void*)) == 0 + && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) + && equal_lookahead_tags(x, y, ctx); + if (!compatible) return false; + + const std::vector &tags = ctx.dc_dfa.tags; + const size_t ntag = tags.size(); + kernel_buffers_t &bufs = ctx.dc_buffers; + tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max; + size_t *x2t = bufs.x2t; + + // map tag versions of one kernel to that of another + // and check that lookahead versions (if any) coincide + std::fill(x2y - max, x2y + max, TAGVER_ZERO); + std::fill(y2x - max, y2x + max, TAGVER_ZERO); + for (size_t i = 0; i < n; ++i) { + const tagver_t + *xvs = ctx.dc_tagvertbl[x->tvers[i]], + *yvs = ctx.dc_tagvertbl[y->tvers[i]]; + const hidx_t xl = x->tlook[i]; + + for (size_t t = 0; t < ntag; ++t) { + // see note [mapping ignores items with lookahead tags] + if (ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO + && !history(tags[t])) continue; + + const tagver_t xv = xvs[t], yv = yvs[t]; + tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv]; + + if (yv0 == TAGVER_ZERO && xv0 == TAGVER_ZERO) { + xv0 = xv; + yv0 = yv; + x2t[xv] = t; + } else if (yv != yv0 || xv != xv0) { + return false; + } + } + } + + // we have bijective mapping; now try to create list of commands + tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL; + tcmd_t *b1 = bufs.backup_actions, *b2 = b1; + + // backup 'save' commands: if topsort finds cycles, this mapping + // will be rejected and we'll have to revert all changes + for (b2->next = a = *pacts; a; a = a->next) { + *++b2 = *a; + } + + // fix LHS of 'save' commands to reuse old version + // see note [save(X), copy(Y,X) optimization] + for (a = *pacts; a; a = a->next) { + const tagver_t + yv = a->lhs * (a->history[0] == TAGVER_BOTTOM ? -1 : 1), + xv = y2x[yv]; + a->lhs = abs(xv); + y2x[yv] = x2y[xv] = TAGVER_ZERO; + } + + // create 'copy' commands + for (tagver_t xv = -max; xv < max; ++xv) { + const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv); + if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) { + assert(axv != ayv); + copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv); + } + } + + // join 'copy' and 'save' commands + for (pa = © (a = *pa); pa = &a->next); + *pa = *pacts; + *pacts = copy; + + // see note [topological ordering of copy commands] + const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree); + + // in case of cycles restore 'save' commands and fail + if (nontrivial_cycles) { + for (*pacts = a = b1->next; a; a = a->next) { + *a = *++b1; + } + } + + return !nontrivial_cycles; } bool do_find_state(determ_context_t &ctx) { - kernels_t &kernels = ctx.dc_kernels; - const closure_t &closure = ctx.dc_closure; - - // empty closure corresponds to default state - if (closure.size() == 0) { - ctx.dc_target = dfa_t::NIL; - ctx.dc_actions = NULL; - return false; - } - - // resize buffer if closure is too large - reserve_buffers(ctx); - kernel_t *k = ctx.dc_buffers.kernel; - - // copy closure to buffer kernel - copy_to_buffer_kernel(closure, ctx.dc_prectbl, k); - - // hash "static" part of the kernel - const uint32_t hash = hash_kernel(k); - - // try to find identical kernel - kernel_eq_t cmp_eq = {ctx}; - ctx.dc_target = kernels.find_with(hash, k, cmp_eq); - if (ctx.dc_target != kernels_t::NIL) return false; - - // else try to find mappable kernel - // see note [bijective mappings] - kernel_map_t cmp_map = {ctx}; - ctx.dc_target = kernels.find_with(hash, k, cmp_map); - if (ctx.dc_target != kernels_t::NIL) return false; - - // otherwise add new kernel - kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator); - ctx.dc_target = kernels.push(hash, kcopy); - return true; + kernels_t &kernels = ctx.dc_kernels; + const closure_t &closure = ctx.dc_closure; + + // empty closure corresponds to default state + if (closure.size() == 0) { + ctx.dc_target = dfa_t::NIL; + ctx.dc_actions = NULL; + return false; + } + + // resize buffer if closure is too large + reserve_buffers(ctx); + kernel_t *k = ctx.dc_buffers.kernel; + + // copy closure to buffer kernel + copy_to_buffer_kernel(closure, ctx.dc_prectbl, k); + + // hash "static" part of the kernel + const uint32_t hash = hash_kernel(k); + + // try to find identical kernel + kernel_eq_t cmp_eq = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_eq); + if (ctx.dc_target != kernels_t::NIL) return false; + + // else try to find mappable kernel + // see note [bijective mappings] + kernel_map_t cmp_map = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_map); + if (ctx.dc_target != kernels_t::NIL) return false; + + // otherwise add new kernel + kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator); + ctx.dc_target = kernels.push(hash, kcopy); + return true; } tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) { - dfa_t &dfa = ctx.dc_dfa; - const Rule &rule = dfa.rules[fin.state->rule]; - const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers]; - const hidx_t look = fin.tlook; - const tag_history_t &thist = ctx.dc_taghistory; - tcpool_t &tcpool = dfa.tcpool; - tcmd_t *copy = NULL, *save = NULL, **p; - - for (size_t t = rule.ltag; t < rule.htag; ++t) { - - const Tag &tag = dfa.tags[t]; - if (fixed(tag)) continue; - - const tagver_t v = abs(vers[t]), l = thist.last(look, t); - tagver_t &f = dfa.finvers[t]; - if (l == TAGVER_ZERO) { - copy = tcpool.make_copy(copy, f, v); - } else if (history(tag)) { - save = tcpool.make_add(save, f, v, thist, look, t); - } else { - save = tcpool.make_set(save, f, l); - } - } - - // join 'copy' and 'save' commands - for (p = © *p; p = &(*p)->next); - *p = save; - - return copy; + dfa_t &dfa = ctx.dc_dfa; + const Rule &rule = dfa.rules[fin.state->rule]; + const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers]; + const hidx_t look = fin.tlook; + const tag_history_t &thist = ctx.dc_taghistory; + tcpool_t &tcpool = dfa.tcpool; + tcmd_t *copy = NULL, *save = NULL, **p; + + for (size_t t = rule.ltag; t < rule.htag; ++t) { + + const Tag &tag = dfa.tags[t]; + if (fixed(tag)) continue; + + const tagver_t v = abs(vers[t]), l = thist.last(look, t); + tagver_t &f = dfa.finvers[t]; + if (l == TAGVER_ZERO) { + copy = tcpool.make_copy(copy, f, v); + } else if (history(tag)) { + save = tcpool.make_add(save, f, v, thist, look, t); + } else { + save = tcpool.make_set(save, f, l); + } + } + + // join 'copy' and 'save' commands + for (p = © *p; p = &(*p)->next); + *p = save; + + return copy; } void find_state(determ_context_t &ctx) { - dfa_t &dfa = ctx.dc_dfa; - - // find or add the new state in the existing set of states - const bool is_new = do_find_state(ctx); - - if (is_new) { - // create new DFA state - dfa_state_t *t = new dfa_state_t(dfa.nchars); - dfa.states.push_back(t); - - // check if the new state is final - // see note [at most one final item per closure] - cclositer_t - b = ctx.dc_closure.begin(), - e = ctx.dc_closure.end(), - f = std::find_if(b, e, clos_t::fin); - if (f != e) { - t->tcmd[dfa.nchars] = final_actions(ctx, *f); - t->rule = f->state->rule; - } - } - - if (ctx.dc_origin == dfa_t::NIL) { - // initial state - dfa.tcmd0 = ctx.dc_actions; - } - else { - dfa_state_t *s = dfa.states[ctx.dc_origin]; - s->arcs[ctx.dc_symbol] = ctx.dc_target; - s->tcmd[ctx.dc_symbol] = ctx.dc_actions; - } - - ctx.dc_dump.state(ctx, is_new); + dfa_t &dfa = ctx.dc_dfa; + + // find or add the new state in the existing set of states + const bool is_new = do_find_state(ctx); + + if (is_new) { + // create new DFA state + dfa_state_t *t = new dfa_state_t(dfa.nchars); + dfa.states.push_back(t); + + // check if the new state is final + // see note [at most one final item per closure] + cclositer_t + b = ctx.dc_closure.begin(), + e = ctx.dc_closure.end(), + f = std::find_if(b, e, clos_t::fin); + if (f != e) { + t->tcmd[dfa.nchars] = final_actions(ctx, *f); + t->rule = f->state->rule; + } + } + + if (ctx.dc_origin == dfa_t::NIL) { + // initial state + dfa.tcmd0 = ctx.dc_actions; + } + else { + dfa_state_t *s = dfa.states[ctx.dc_origin]; + s->arcs[ctx.dc_symbol] = ctx.dc_target; + s->tcmd[ctx.dc_symbol] = ctx.dc_actions; + } + + ctx.dc_dump.state(ctx, is_new); } } // namespace re2c diff --git a/re2c/src/dfa/minimization.cc b/re2c/src/dfa/minimization.cc index 000e1a9d..1a42d37f 100644 --- a/re2c/src/dfa/minimization.cc +++ b/re2c/src/dfa/minimization.cc @@ -52,233 +52,233 @@ namespace re2c static void minimization_table( - size_t *part, - const std::vector &states, - size_t nchars) + size_t *part, + const std::vector &states, + size_t nchars) { - const size_t count = states.size(); + const size_t count = states.size(); - bool **tbl = new bool*[count]; - tbl[0] = new bool[count * (count - 1) / 2]; - for (size_t i = 0; i < count - 1; ++i) - { - tbl[i + 1] = tbl[i] + i; - } + bool **tbl = new bool*[count]; + tbl[0] = new bool[count * (count - 1) / 2]; + for (size_t i = 0; i < count - 1; ++i) + { + tbl[i + 1] = tbl[i] + i; + } - // see note [distinguish states by tags] - for (size_t i = 0; i < count; ++i) - { - dfa_state_t *s1 = states[i]; - for (size_t j = 0; j < i; ++j) - { - dfa_state_t *s2 = states[j]; - tbl[i][j] = s1->rule != s2->rule - || s1->tcid[nchars] != s2->tcid[nchars]; - } - } + // see note [distinguish states by tags] + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s1 = states[i]; + for (size_t j = 0; j < i; ++j) + { + dfa_state_t *s2 = states[j]; + tbl[i][j] = s1->rule != s2->rule + || s1->tcid[nchars] != s2->tcid[nchars]; + } + } - for (bool loop = true; loop;) - { - loop = false; - for (size_t i = 0; i < count; ++i) - { - for (size_t j = 0; j < i; ++j) - { - if (!tbl[i][j]) - { - for (size_t k = 0; k < nchars; ++k) - { - size_t oi = states[i]->arcs[k]; - size_t oj = states[j]->arcs[k]; - if (oi < oj) - { - std::swap(oi, oj); - } - if (states[i]->tcid[k] != states[j]->tcid[k] - || (oi != oj - && (oi == dfa_t::NIL - || oj == dfa_t::NIL - || tbl[oi][oj]))) - { - tbl[i][j] = true; - loop = true; - break; - } - } - } - } - } - } + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + for (size_t k = 0; k < nchars; ++k) + { + size_t oi = states[i]->arcs[k]; + size_t oj = states[j]->arcs[k]; + if (oi < oj) + { + std::swap(oi, oj); + } + if (states[i]->tcid[k] != states[j]->tcid[k] + || (oi != oj + && (oi == dfa_t::NIL + || oj == dfa_t::NIL + || tbl[oi][oj]))) + { + tbl[i][j] = true; + loop = true; + break; + } + } + } + } + } + } - // Equivalence relation defined by the matrix is transitive - // by construction. Thus we can simply find the first state - // which is not distinguishable from current and choose it as a - // representative: all other states with the same representative - // have to be equivalent to current state due to transitivity. - // - // The only requirement is to deterministically choose the - // representative: e.g. always choose the one with the lowest - // index. - // - // Note that transitivity is crucial: without it the problem - // would be equivalent to the clique cover problem. + // Equivalence relation defined by the matrix is transitive + // by construction. Thus we can simply find the first state + // which is not distinguishable from current and choose it as a + // representative: all other states with the same representative + // have to be equivalent to current state due to transitivity. + // + // The only requirement is to deterministically choose the + // representative: e.g. always choose the one with the lowest + // index. + // + // Note that transitivity is crucial: without it the problem + // would be equivalent to the clique cover problem. - for (size_t i = 0; i < count; ++i) - { - part[i] = i; - for (size_t j = 0; j < i; ++j) - { - if (!tbl[i][j]) - { - part[i] = j; - break; - } - } - } + for (size_t i = 0; i < count; ++i) + { + part[i] = i; + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + part[i] = j; + break; + } + } + } - delete[] tbl[0]; - delete[] tbl; + delete[] tbl[0]; + delete[] tbl; } static void minimization_moore( - size_t *part, - const std::vector &states, - size_t nchars) + size_t *part, + const std::vector &states, + size_t nchars) { - const size_t count = states.size(); + const size_t count = states.size(); - size_t *next = new size_t[count]; + size_t *next = new size_t[count]; - // see note [distinguish states by tags] - std::map, size_t> init; - for (size_t i = 0; i < count; ++i) - { - dfa_state_t *s = states[i]; - std::pair key(s->rule, s->tcid[nchars]); - if (init.insert(std::make_pair(key, i)).second) - { - part[i] = i; - next[i] = dfa_t::NIL; - } - else - { - const size_t j = init[key]; - part[i] = j; - next[i] = next[j]; - next[j] = i; - } - } + // see note [distinguish states by tags] + std::map, size_t> init; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = states[i]; + std::pair key(s->rule, s->tcid[nchars]); + if (init.insert(std::make_pair(key, i)).second) + { + part[i] = i; + next[i] = dfa_t::NIL; + } + else + { + const size_t j = init[key]; + part[i] = j; + next[i] = next[j]; + next[j] = i; + } + } - size_t *out = new size_t[nchars * count]; - size_t *diff = new size_t[count]; - for (bool loop = true; loop;) - { - loop = false; - for (size_t i = 0; i < count; ++i) - { - if (i != part[i] || next[i] == dfa_t::NIL) - { - continue; - } + size_t *out = new size_t[nchars * count]; + size_t *diff = new size_t[count]; + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + if (i != part[i] || next[i] == dfa_t::NIL) + { + continue; + } - for (size_t j = i; j != dfa_t::NIL; j = next[j]) - { - size_t *o = &out[j * nchars]; - size_t *a = states[j]->arcs; - for (size_t c = 0; c < nchars; ++c) - { - o[c] = a[c] == dfa_t::NIL - ? dfa_t::NIL - : part[a[c]]; - } - } + for (size_t j = i; j != dfa_t::NIL; j = next[j]) + { + size_t *o = &out[j * nchars]; + size_t *a = states[j]->arcs; + for (size_t c = 0; c < nchars; ++c) + { + o[c] = a[c] == dfa_t::NIL + ? dfa_t::NIL + : part[a[c]]; + } + } - size_t diff_count = 0; - for (size_t j = i; j != dfa_t::NIL;) - { - const size_t j_next = next[j]; - size_t n = 0; - for (; n < diff_count; ++n) - { - size_t k = diff[n]; - if (memcmp(&out[j * nchars], - &out[k * nchars], - nchars * sizeof(size_t)) == 0 - && memcmp(states[j]->tcid, - states[k]->tcid, - nchars * sizeof(tcid_t)) == 0 - ) { - part[j] = k; - next[j] = next[k]; - next[k] = j; - break; - } - } - if (n == diff_count) - { - diff[diff_count++] = j; - part[j] = j; - next[j] = dfa_t::NIL; - } - j = j_next; - } - loop |= diff_count > 1; - } - } - delete[] out; - delete[] diff; - delete[] next; + size_t diff_count = 0; + for (size_t j = i; j != dfa_t::NIL;) + { + const size_t j_next = next[j]; + size_t n = 0; + for (; n < diff_count; ++n) + { + size_t k = diff[n]; + if (memcmp(&out[j * nchars], + &out[k * nchars], + nchars * sizeof(size_t)) == 0 + && memcmp(states[j]->tcid, + states[k]->tcid, + nchars * sizeof(tcid_t)) == 0 + ) { + part[j] = k; + next[j] = next[k]; + next[k] = j; + break; + } + } + if (n == diff_count) + { + diff[diff_count++] = j; + part[j] = j; + next[j] = dfa_t::NIL; + } + j = j_next; + } + loop |= diff_count > 1; + } + } + delete[] out; + delete[] diff; + delete[] next; } void minimization(dfa_t &dfa, dfa_minimization_t type) { - const size_t count = dfa.states.size(); + const size_t count = dfa.states.size(); - size_t *part = new size_t[count]; + size_t *part = new size_t[count]; - switch (type) { - case DFA_MINIMIZATION_TABLE: - minimization_table(part, dfa.states, dfa.nchars); break; - case DFA_MINIMIZATION_MOORE: - minimization_moore(part, dfa.states, dfa.nchars); break; - } + switch (type) { + case DFA_MINIMIZATION_TABLE: + minimization_table(part, dfa.states, dfa.nchars); break; + case DFA_MINIMIZATION_MOORE: + minimization_moore(part, dfa.states, dfa.nchars); break; + } - size_t *compact = new size_t[count]; - for (size_t i = 0, j = 0; i < count; ++i) - { - if (i == part[i]) - { - compact[i] = j++; - } - } + size_t *compact = new size_t[count]; + for (size_t i = 0, j = 0; i < count; ++i) + { + if (i == part[i]) + { + compact[i] = j++; + } + } - size_t new_count = 0; - for (size_t i = 0; i < count; ++i) - { - dfa_state_t *s = dfa.states[i]; - if (i == part[i]) - { - size_t *arcs = s->arcs; - for (size_t c = 0; c < dfa.nchars; ++c) - { - if (arcs[c] != dfa_t::NIL) - { - arcs[c] = compact[part[arcs[c]]]; - } - } - dfa.states[new_count++] = s; - } - else - { - delete s; - } - } - dfa.states.resize(new_count); + size_t new_count = 0; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = dfa.states[i]; + if (i == part[i]) + { + size_t *arcs = s->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + if (arcs[c] != dfa_t::NIL) + { + arcs[c] = compact[part[arcs[c]]]; + } + } + dfa.states[new_count++] = s; + } + else + { + delete s; + } + } + dfa.states.resize(new_count); - delete[] compact; - delete[] part; + delete[] compact; + delete[] part; } } // namespace re2c diff --git a/re2c/src/dfa/tag_history.cc b/re2c/src/dfa/tag_history.cc index b2a82f09..fbaeadb2 100644 --- a/re2c/src/dfa/tag_history.cc +++ b/re2c/src/dfa/tag_history.cc @@ -28,155 +28,155 @@ size_t tag_history_t::tag(hidx_t i) const { return nodes[i].info.idx; } hidx_t tag_history_t::push(hidx_t idx, tag_info_t info) { - node_t x = {idx, info}; - nodes.push_back(x); - return static_cast(nodes.size() - 1); + node_t x = {idx, info}; + nodes.push_back(x); + return static_cast(nodes.size() - 1); } tagver_t tag_history_t::last(hidx_t i, size_t t) const { - for (; i != HROOT; i = pred(i)) { - if (tag(i) == t) return elem(i); - } - return TAGVER_ZERO; + for (; i != HROOT; i = pred(i)) { + if (tag(i) == t) return elem(i); + } + return TAGVER_ZERO; } int32_t tag_history_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const { - // compare in reverse, from tail to head: direction makes - // no difference when comparing for exact coincidence - for (;;) { - for (; x != HROOT && tag(x) != t; x = pred(x)); - for (; y != HROOT && tag(y) != t; y = pred(y)); - if (x == HROOT && y == HROOT) return 0; - if (x == HROOT) return -1; - if (y == HROOT) return 1; - if (elem(x) > elem(y)) return -1; - if (elem(x) < elem(y)) return 1; - x = pred(x); - y = pred(y); - } + // compare in reverse, from tail to head: direction makes + // no difference when comparing for exact coincidence + for (;;) { + for (; x != HROOT && tag(x) != t; x = pred(x)); + for (; y != HROOT && tag(y) != t; y = pred(y)); + if (x == HROOT && y == HROOT) return 0; + if (x == HROOT) return -1; + if (y == HROOT) return 1; + if (elem(x) > elem(y)) return -1; + if (elem(x) < elem(y)) return 1; + x = pred(x); + y = pred(y); + } } static void reconstruct_history(const tag_history_t &history, - tag_path_t &path, hidx_t idx) + tag_path_t &path, hidx_t idx) { - path.clear(); - for (; idx != HROOT; idx = history.pred(idx)) { - path.push_back(history.info(idx)); - } + path.clear(); + for (; idx != HROOT; idx = history.pred(idx)) { + path.push_back(history.info(idx)); + } } static inline int32_t unpack_longest(int32_t value) { - // lower 30 bits - return value & 0x3fffFFFF; + // lower 30 bits + return value & 0x3fffFFFF; } static inline int32_t unpack_leftmost(int32_t value) { - // higher 2 bits - return value >> 30u; + // higher 2 bits + return value >> 30u; } int32_t precedence(determ_context_t &ctx, - const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy) + const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy) { - const hidx_t xl = x.tlook, yl = y.tlook; - const uint32_t xo = x.origin, yo = y.origin; - - if (xl == yl && xo == yo) { - rhox = rhoy = -1; - return 0; - } - - tag_history_t &thist = ctx.dc_taghistory; - tag_path_t &p1 = thist.path1, &p2 = thist.path2; - reconstruct_history(thist, p1, xl); - reconstruct_history(thist, p2, yl); - tag_path_t::const_reverse_iterator - i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1, - i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2; - - const std::vector &tags = ctx.dc_dfa.tags; - size_t nclos = 0; - const prectable_t *prectbl = NULL; - const bool fork_frame = xo == yo; - - if (fork_frame) { - // find fork - for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2); - } - else { - // get precedence table and size of the origin state - const kernel_t *k = ctx.dc_kernels[ctx.dc_origin]; - nclos = k->size; - prectbl = k->prectbl; - } - - // longest precedence - if (!fork_frame) { - rhox = unpack_longest(prectbl[xo * nclos + yo]); - rhoy = unpack_longest(prectbl[yo * nclos + xo]); - } - else { - rhox = rhoy = std::numeric_limits::max(); - if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height; - } - for (g1 = j1; g1 != e1; ++g1) { - rhox = std::min(rhox, tags[g1->idx].height); - } - for (g2 = j2; g2 != e2; ++g2) { - rhoy = std::min(rhoy, tags[g2->idx].height); - } - if (rhox > rhoy) return -1; - if (rhox < rhoy) return 1; - - // leftmost precedence - if (!fork_frame) { - return unpack_leftmost(prectbl[xo * nclos + yo]); - } - else { - // equal => not less - if (j1 == e1 && j2 == e2) return 0; - - // shorter => less - if (j1 == e1) return -1; - if (j2 == e2) return 1; - - const uint32_t idx1 = j1->idx, idx2 = j2->idx; - const bool neg1 = j1->neg, neg2 = j2->neg; - - // can't be both closing - assert(!(idx1 % 2 == 1 && idx2 % 2 == 1)); - - // closing vs opening: closing wins - if (idx1 % 2 == 1) return -1; - if (idx2 % 2 == 1) return 1; - - // can't be both negative - assert(!(neg1 && neg2)); - - // positive vs negative: positive wins - if (neg1) return 1; - if (neg2) return -1; - - // positive vs positive: smaller wins - // (this case is only possible because multiple - // top-level RE don't have proper negative tags) - if (idx1 < idx2) return -1; - if (idx1 > idx2) return 1; - } - - // unreachable - assert(false); - return 0; + const hidx_t xl = x.tlook, yl = y.tlook; + const uint32_t xo = x.origin, yo = y.origin; + + if (xl == yl && xo == yo) { + rhox = rhoy = -1; + return 0; + } + + tag_history_t &thist = ctx.dc_taghistory; + tag_path_t &p1 = thist.path1, &p2 = thist.path2; + reconstruct_history(thist, p1, xl); + reconstruct_history(thist, p2, yl); + tag_path_t::const_reverse_iterator + i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1, + i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2; + + const std::vector &tags = ctx.dc_dfa.tags; + size_t nclos = 0; + const prectable_t *prectbl = NULL; + const bool fork_frame = xo == yo; + + if (fork_frame) { + // find fork + for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2); + } + else { + // get precedence table and size of the origin state + const kernel_t *k = ctx.dc_kernels[ctx.dc_origin]; + nclos = k->size; + prectbl = k->prectbl; + } + + // longest precedence + if (!fork_frame) { + rhox = unpack_longest(prectbl[xo * nclos + yo]); + rhoy = unpack_longest(prectbl[yo * nclos + xo]); + } + else { + rhox = rhoy = std::numeric_limits::max(); + if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height; + } + for (g1 = j1; g1 != e1; ++g1) { + rhox = std::min(rhox, tags[g1->idx].height); + } + for (g2 = j2; g2 != e2; ++g2) { + rhoy = std::min(rhoy, tags[g2->idx].height); + } + if (rhox > rhoy) return -1; + if (rhox < rhoy) return 1; + + // leftmost precedence + if (!fork_frame) { + return unpack_leftmost(prectbl[xo * nclos + yo]); + } + else { + // equal => not less + if (j1 == e1 && j2 == e2) return 0; + + // shorter => less + if (j1 == e1) return -1; + if (j2 == e2) return 1; + + const uint32_t idx1 = j1->idx, idx2 = j2->idx; + const bool neg1 = j1->neg, neg2 = j2->neg; + + // can't be both closing + assert(!(idx1 % 2 == 1 && idx2 % 2 == 1)); + + // closing vs opening: closing wins + if (idx1 % 2 == 1) return -1; + if (idx2 % 2 == 1) return 1; + + // can't be both negative + assert(!(neg1 && neg2)); + + // positive vs negative: positive wins + if (neg1) return 1; + if (neg2) return -1; + + // positive vs positive: smaller wins + // (this case is only possible because multiple + // top-level RE don't have proper negative tags) + if (idx1 < idx2) return -1; + if (idx1 > idx2) return 1; + } + + // unreachable + assert(false); + return 0; } } // namespace re2c diff --git a/re2c/src/dfa/tag_history.h b/re2c/src/dfa/tag_history.h index bca3d1d9..9b907578 100644 --- a/re2c/src/dfa/tag_history.h +++ b/re2c/src/dfa/tag_history.h @@ -22,28 +22,28 @@ typedef std::vector tag_path_t; struct tag_history_t { - // the whole tree of tags found by the epsilon-closure - // (a bunch of separate subtrees for each tag with common root) - struct node_t { - hidx_t pred; - tag_info_t info; - }; - std::vector nodes; - - // reconstruct paths for comparison - tag_path_t path1; - tag_path_t path2; - - tag_history_t(); - hidx_t pred(hidx_t i) const; - tag_info_t info(hidx_t i) const; - tagver_t elem(hidx_t i) const; - size_t tag(hidx_t i) const; - hidx_t push(hidx_t i, tag_info_t info); - tagver_t last(hidx_t i, size_t t) const; - int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const; - - FORBID_COPY(tag_history_t); + // the whole tree of tags found by the epsilon-closure + // (a bunch of separate subtrees for each tag with common root) + struct node_t { + hidx_t pred; + tag_info_t info; + }; + std::vector nodes; + + // reconstruct paths for comparison + tag_path_t path1; + tag_path_t path2; + + tag_history_t(); + hidx_t pred(hidx_t i) const; + tag_info_t info(hidx_t i) const; + tagver_t elem(hidx_t i) const; + size_t tag(hidx_t i) const; + hidx_t push(hidx_t i, tag_info_t info); + tagver_t last(hidx_t i, size_t t) const; + int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const; + + FORBID_COPY(tag_history_t); }; } // namespace re2c diff --git a/re2c/src/dfa/tagver_table.cc b/re2c/src/dfa/tagver_table.cc index e4ee24cb..8e9e225f 100644 --- a/re2c/src/dfa/tagver_table.cc +++ b/re2c/src/dfa/tagver_table.cc @@ -10,69 +10,69 @@ namespace re2c struct eqtag_t { - size_t ntags; + size_t ntags; - explicit eqtag_t(size_t n): ntags(n) {} - inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const - { - return memcmp(x, y, ntags * sizeof(tagver_t)) == 0; - } + explicit eqtag_t(size_t n): ntags(n) {} + inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const + { + return memcmp(x, y, ntags * sizeof(tagver_t)) == 0; + } }; tagver_table_t::tagver_table_t(size_t n) - : lookup() - , ntags(n) - , buffer(new tagver_t[n]) + : lookup() + , ntags(n) + , buffer(new tagver_t[n]) {} tagver_table_t::~tagver_table_t() { - delete[] buffer; - const size_t n = lookup.size(); - for (uint32_t i = 0; i < n; ++i) { - free(const_cast(lookup[i])); - } + delete[] buffer; + const size_t n = lookup.size(); + for (uint32_t i = 0; i < n; ++i) { + free(const_cast(lookup[i])); + } } uint32_t tagver_table_t::insert_const(tagver_t ver) { - std::fill(buffer, buffer + ntags, ver); - return insert(buffer); + std::fill(buffer, buffer + ntags, ver); + return insert(buffer); } uint32_t tagver_table_t::insert_succ(tagver_t fst) { - for (uint32_t i = 0; i < ntags; ++i) { - buffer[i] = fst++; - } - return insert(buffer); + for (uint32_t i = 0; i < ntags; ++i) { + buffer[i] = fst++; + } + return insert(buffer); } uint32_t tagver_table_t::insert(const tagver_t *tags) { - const size_t size = ntags * sizeof(tagver_t); - const uint32_t hash = hash32(0, tags, size); - - eqtag_t eq(ntags); - const uint32_t idx = lookup.find_with(hash, tags, eq); - if (idx != taglookup_t::NIL) { - return idx; - } - - tagver_t *copy = static_cast(malloc(size)); - memcpy(copy, tags, size); - return lookup.push(hash, copy); + const size_t size = ntags * sizeof(tagver_t); + const uint32_t hash = hash32(0, tags, size); + + eqtag_t eq(ntags); + const uint32_t idx = lookup.find_with(hash, tags, eq); + if (idx != taglookup_t::NIL) { + return idx; + } + + tagver_t *copy = static_cast(malloc(size)); + memcpy(copy, tags, size); + return lookup.push(hash, copy); } const tagver_t *tagver_table_t::operator[](uint32_t idx) const { - return lookup[idx]; + return lookup[idx]; } } // namespace re2c diff --git a/re2c/src/dfa/tagver_table.h b/re2c/src/dfa/tagver_table.h index 4d584fa6..69d41a97 100644 --- a/re2c/src/dfa/tagver_table.h +++ b/re2c/src/dfa/tagver_table.h @@ -17,20 +17,20 @@ static const size_t ZERO_TAGS = 0; struct tagver_table_t { private: - typedef lookup_t taglookup_t; - taglookup_t lookup; + typedef lookup_t taglookup_t; + taglookup_t lookup; public: - const size_t ntags; - tagver_t *buffer; - - explicit tagver_table_t(size_t n); - ~tagver_table_t(); - uint32_t insert_const(tagver_t ver); - uint32_t insert_succ(tagver_t fst); - uint32_t insert(const tagver_t *tags); - const tagver_t *operator[](uint32_t idx) const; - FORBID_COPY(tagver_table_t); + const size_t ntags; + tagver_t *buffer; + + explicit tagver_table_t(size_t n); + ~tagver_table_t(); + uint32_t insert_const(tagver_t ver); + uint32_t insert_succ(tagver_t fst); + uint32_t insert(const tagver_t *tags); + const tagver_t *operator[](uint32_t idx) const; + FORBID_COPY(tagver_table_t); }; } // namespace re2c diff --git a/re2c/src/dfa/tcmd.cc b/re2c/src/dfa/tcmd.cc index 9fd9258d..7d3503bd 100644 --- a/re2c/src/dfa/tcmd.cc +++ b/re2c/src/dfa/tcmd.cc @@ -40,206 +40,206 @@ static uint32_t hash_tcmd(const tcmd_t *tcmd); bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y) { - return x.lhs == y.lhs - && x.rhs == y.rhs - && equal_history(x.history, y.history); + return x.lhs == y.lhs + && x.rhs == y.rhs + && equal_history(x.history, y.history); } bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g) { - for (;;) { - if (*h != *g) return false; - if (*h == TAGVER_ZERO) return true; - ++h; ++g; - } + for (;;) { + if (*h != *g) return false; + if (*h == TAGVER_ZERO) return true; + ++h; ++g; + } } bool tcmd_t::iscopy(const tcmd_t *x) { - return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO; + return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO; } bool tcmd_t::isset(const tcmd_t *x) { - if (x->rhs == TAGVER_ZERO) { - assert(x->history[0] != TAGVER_ZERO); - return true; - } - return false; + if (x->rhs == TAGVER_ZERO) { + assert(x->history[0] != TAGVER_ZERO); + return true; + } + return false; } bool tcmd_t::isadd(const tcmd_t *x) { - return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO; + return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO; } bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg) { - tcmd_t *x0 = *phead, *x, *y0 = NULL, **py; - bool nontrivial_cycles = false; - - // initialize in-degree - for (x = x0; x; x = x->next) { - indeg[x->lhs] = indeg[x->rhs] = 0; - } - for (x = x0; x; x = x->next) { - ++indeg[x->rhs]; - } - - for (py = &y0;;) { - // reached end of list - if (!x0) break; - - tcmd_t **px = &x0, **py1 = py; - for (x = x0; x; x = x->next) { - if (indeg[x->lhs] == 0) { - --indeg[x->rhs]; - *py = x; - py = &x->next; - } else { - *px = x; - px = &x->next; - } - } - *px = NULL; - - // only cycles left - if (py == py1) { - // look for cycles of length 2 or more - for (x = x0; x && x->lhs == x->rhs; x = x->next); - nontrivial_cycles = x != NULL; - break; - } - } - *py = x0; - - *phead = y0; - return nontrivial_cycles; + tcmd_t *x0 = *phead, *x, *y0 = NULL, **py; + bool nontrivial_cycles = false; + + // initialize in-degree + for (x = x0; x; x = x->next) { + indeg[x->lhs] = indeg[x->rhs] = 0; + } + for (x = x0; x; x = x->next) { + ++indeg[x->rhs]; + } + + for (py = &y0;;) { + // reached end of list + if (!x0) break; + + tcmd_t **px = &x0, **py1 = py; + for (x = x0; x; x = x->next) { + if (indeg[x->lhs] == 0) { + --indeg[x->rhs]; + *py = x; + py = &x->next; + } else { + *px = x; + px = &x->next; + } + } + *px = NULL; + + // only cycles left + if (py == py1) { + // look for cycles of length 2 or more + for (x = x0; x && x->lhs == x->rhs; x = x->next); + nontrivial_cycles = x != NULL; + break; + } + } + *py = x0; + + *phead = y0; + return nontrivial_cycles; } tcpool_t::tcpool_t() - : alc() - , index() + : alc() + , index() { - // empty command must have static number zero - assert(TCID0 == insert(NULL)); + // empty command must have static number zero + assert(TCID0 == insert(NULL)); } tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs) { - tcmd_t *p = alc.alloct(1); - p->next = next; - p->lhs = lhs; - p->rhs = rhs; - p->history[0] = TAGVER_ZERO; - return p; + tcmd_t *p = alc.alloct(1); + p->next = next; + p->lhs = lhs; + p->rhs = rhs; + p->history[0] = TAGVER_ZERO; + return p; } tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set) { - const size_t size = sizeof(tcmd_t) + sizeof(tagver_t); - tcmd_t *p = static_cast(alc.alloc(size)); - p->next = next; - p->lhs = lhs; - p->rhs = TAGVER_ZERO; - p->history[0] = set; - p->history[1] = TAGVER_ZERO; - return p; + const size_t size = sizeof(tcmd_t) + sizeof(tagver_t); + tcmd_t *p = static_cast(alc.alloc(size)); + p->next = next; + p->lhs = lhs; + p->rhs = TAGVER_ZERO; + p->history[0] = set; + p->history[1] = TAGVER_ZERO; + return p; } tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, - const tag_history_t &history, hidx_t hidx, size_t tag) -{ - size_t hlen = 0; - for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { - if (history.tag(i) == tag) ++hlen; - } - - const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); - tcmd_t *p = static_cast(alc.alloc(size)); - p->next = next; - p->lhs = lhs; - p->rhs = rhs; - tagver_t *h = p->history; - for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { - if (history.tag(i) == tag) { - *h++ = history.elem(i); - } - } - *h++ = TAGVER_ZERO; - return p; + const tag_history_t &history, hidx_t hidx, size_t tag) +{ + size_t hlen = 0; + for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { + if (history.tag(i) == tag) ++hlen; + } + + const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); + tcmd_t *p = static_cast(alc.alloc(size)); + p->next = next; + p->lhs = lhs; + p->rhs = rhs; + tagver_t *h = p->history; + for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { + if (history.tag(i) == tag) { + *h++ = history.elem(i); + } + } + *h++ = TAGVER_ZERO; + return p; } tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, - const tagver_t *history) -{ - size_t hlen = 0; - for (const tagver_t *h = history; *h != TAGVER_ZERO; ++h) ++hlen; - - const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); - tcmd_t *p = static_cast(alc.alloc(size)); - p->next = next; - p->lhs = lhs; - p->rhs = rhs; - memcpy(p->history, history, (hlen + 1) * sizeof(tagver_t)); - return p; + const tagver_t *history) +{ + size_t hlen = 0; + for (const tagver_t *h = history; *h != TAGVER_ZERO; ++h) ++hlen; + + const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); + tcmd_t *p = static_cast(alc.alloc(size)); + p->next = next; + p->lhs = lhs; + p->rhs = rhs; + memcpy(p->history, history, (hlen + 1) * sizeof(tagver_t)); + return p; } uint32_t hash_tcmd(const tcmd_t *tcmd) { - uint32_t h = 0; - for (const tcmd_t *p = tcmd; p; p = p->next) { - h = hash32(h, &p->lhs, sizeof(p->lhs)); - h = hash32(h, &p->rhs, sizeof(p->rhs)); - h = hash32(h, &p->history[0], sizeof(p->history[0])); - } - return h; + uint32_t h = 0; + for (const tcmd_t *p = tcmd; p; p = p->next) { + h = hash32(h, &p->lhs, sizeof(p->lhs)); + h = hash32(h, &p->rhs, sizeof(p->rhs)); + h = hash32(h, &p->history[0], sizeof(p->history[0])); + } + return h; } struct tcmd_eq_t { - bool operator()(const tcmd_t *x, const tcmd_t *y) const - { - for (;;) { - if (!x && !y) return true; - if (!x || !y) return false; - if (!tcmd_t::equal(*x, *y)) return false; - x = x->next; - y = y->next; - } - } + bool operator()(const tcmd_t *x, const tcmd_t *y) const + { + for (;;) { + if (!x && !y) return true; + if (!x || !y) return false; + if (!tcmd_t::equal(*x, *y)) return false; + x = x->next; + y = y->next; + } + } }; tcid_t tcpool_t::insert(const tcmd_t *tcmd) { - const uint32_t h = hash_tcmd(tcmd); + const uint32_t h = hash_tcmd(tcmd); - tcmd_eq_t eq; - size_t id = index.find_with(h, tcmd, eq); - if (id == index_t::NIL) { - id = index.push(h, tcmd); - } + tcmd_eq_t eq; + size_t id = index.find_with(h, tcmd, eq); + if (id == index_t::NIL) { + id = index.push(h, tcmd); + } - return static_cast(id); + return static_cast(id); } const tcmd_t *tcpool_t::operator[](tcid_t id) const { - return index[id]; + return index[id]; } } // namespace re2c diff --git a/re2c/src/dfa/tcmd.h b/re2c/src/dfa/tcmd.h index 40f70b28..46ce86fc 100644 --- a/re2c/src/dfa/tcmd.h +++ b/re2c/src/dfa/tcmd.h @@ -14,17 +14,17 @@ namespace re2c struct tcmd_t { - tcmd_t *next; - tagver_t lhs; // left hand side - tagver_t rhs; // right hand side - tagver_t history[1]; - - static bool equal(const tcmd_t &x, const tcmd_t &y); - static bool equal_history(const tagver_t *h, const tagver_t *g); - static bool topsort(tcmd_t **phead, uint32_t *indeg); - static bool iscopy(const tcmd_t *x); - static bool isset(const tcmd_t *x); - static bool isadd(const tcmd_t *x); + tcmd_t *next; + tagver_t lhs; // left hand side + tagver_t rhs; // right hand side + tagver_t history[1]; + + static bool equal(const tcmd_t &x, const tcmd_t &y); + static bool equal_history(const tagver_t *h, const tagver_t *g); + static bool topsort(tcmd_t **phead, uint32_t *indeg); + static bool iscopy(const tcmd_t *x); + static bool isset(const tcmd_t *x); + static bool isadd(const tcmd_t *x); }; typedef uint32_t tcid_t; @@ -33,20 +33,20 @@ static const tcid_t TCID0 = 0; class tcpool_t { - typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t; - typedef lookup_t index_t; + typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t; + typedef lookup_t index_t; - alc_t alc; - index_t index; + alc_t alc; + index_t index; public: - tcpool_t(); - tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs); - tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set); - tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag); - tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history); - tcid_t insert(const tcmd_t *tcmd); - const tcmd_t *operator[](tcid_t id) const; + tcpool_t(); + tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs); + tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set); + tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag); + tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history); + tcid_t insert(const tcmd_t *tcmd); + const tcmd_t *operator[](tcid_t id) const; }; } // namespace re2c diff --git a/re2c/src/main.cc b/re2c/src/main.cc index 55d0499e..999ffed7 100644 --- a/re2c/src/main.cc +++ b/re2c/src/main.cc @@ -10,26 +10,26 @@ using namespace re2c; int main(int, char *argv[]) { - conopt_t globopts; - Opt opts(globopts); - Warn warn; + conopt_t globopts; + Opt opts(globopts); + Warn warn; - switch (parse_opts(argv, globopts, opts, warn)) { - case OK: break; - case EXIT_OK: return 0; - case EXIT_FAIL: return 1; - } + switch (parse_opts(argv, globopts, opts, warn)) { + case OK: break; + case EXIT_OK: return 0; + case EXIT_FAIL: return 1; + } - re2c::Input input(opts.source_file); - if (!input.open()) { - error("cannot open source file: %s", opts.source_file); - return 1; - } - Scanner scanner(input, warn); - Output output(warn); + re2c::Input input(opts.source_file); + if (!input.open()) { + error("cannot open source file: %s", opts.source_file); + return 1; + } + Scanner scanner(input, warn); + Output output(warn); - compile(scanner, output, opts); - if (!output.emit()) return 1; + compile(scanner, output, opts); + if (!output.emit()) return 1; - return warn.error() ? 1 : 0; + return warn.error() ? 1 : 0; } diff --git a/re2c/src/nfa/dump.cc b/re2c/src/nfa/dump.cc index 8516b2fa..13f6142f 100644 --- a/re2c/src/nfa/dump.cc +++ b/re2c/src/nfa/dump.cc @@ -12,70 +12,70 @@ namespace re2c static uint32_t index(const nfa_t &nfa, const nfa_state_t *s) { - return static_cast(s - nfa.states); + return static_cast(s - nfa.states); } void dump_nfa(const nfa_t &nfa) { - fprintf(stderr, - "digraph NFA {\n" - " rankdir=LR\n" - " node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n" - " edge[arrowhead=vee fontname=Courier label=\" \"]\n\n"); + fprintf(stderr, + "digraph NFA {\n" + " rankdir=LR\n" + " node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n" + " edge[arrowhead=vee fontname=Courier label=\" \"]\n\n"); - for (uint32_t i = static_cast(nfa.size); i --> 0;) { - const nfa_state_t *n = &nfa.states[i]; + for (uint32_t i = static_cast(nfa.size); i --> 0;) { + const nfa_state_t *n = &nfa.states[i]; - fprintf(stderr, " n%u [label=\"%u(%d)\"]", i, i, n->indeg); - if (n->type == nfa_state_t::FIN) { - fprintf(stderr, " [fillcolor=gray]"); - } - fprintf(stderr, "\n"); + fprintf(stderr, " n%u [label=\"%u(%d)\"]", i, i, n->indeg); + if (n->type == nfa_state_t::FIN) { + fprintf(stderr, " [fillcolor=gray]"); + } + fprintf(stderr, "\n"); - switch (n->type) { - case nfa_state_t::ALT: - fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->alt.out1)); - fprintf(stderr, " n%u -> n%u [color=lightgray]\n", i, index(nfa, n->alt.out2)); - break; - case nfa_state_t::RAN: { - fprintf(stderr, " n%u -> n%u [label=\"", i, index(nfa, n->ran.out)); - for (const Range *r = n->ran.ran; r; r = r->next()) { - const uint32_t - l = r->lower(), - u = r->upper() - 1; - fprintf(stderr, "%u", l); - if (u > l) fprintf(stderr, "-%u", u); - if (r->next()) fprintf(stderr, ","); - } - fprintf(stderr, "\"]\n"); - break; - } - case nfa_state_t::TAG: { - const Tag &tag = nfa.tags[n->tag.info.idx]; - fprintf(stderr, " n%u -> n%u [label=\"/", i, index(nfa, n->tag.out)); - if (capture(tag)) { - fprintf(stderr, "%u", (uint32_t)tag.ncap); - } else if (!trailing(tag)) { - fprintf(stderr, "%s", tag.name->c_str()); - } - if (n->tag.info.neg) { - fprintf(stderr, "↓"); - } else { - fprintf(stderr, "↑"); - } - fprintf(stderr, "(%d)", tag.height); - fprintf(stderr, "\"]\n"); - break; - } - case nfa_state_t::FIN: - break; - case nfa_state_t::NIL: - fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->nil.out)); - break; - } - } + switch (n->type) { + case nfa_state_t::ALT: + fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->alt.out1)); + fprintf(stderr, " n%u -> n%u [color=lightgray]\n", i, index(nfa, n->alt.out2)); + break; + case nfa_state_t::RAN: { + fprintf(stderr, " n%u -> n%u [label=\"", i, index(nfa, n->ran.out)); + for (const Range *r = n->ran.ran; r; r = r->next()) { + const uint32_t + l = r->lower(), + u = r->upper() - 1; + fprintf(stderr, "%u", l); + if (u > l) fprintf(stderr, "-%u", u); + if (r->next()) fprintf(stderr, ","); + } + fprintf(stderr, "\"]\n"); + break; + } + case nfa_state_t::TAG: { + const Tag &tag = nfa.tags[n->tag.info.idx]; + fprintf(stderr, " n%u -> n%u [label=\"/", i, index(nfa, n->tag.out)); + if (capture(tag)) { + fprintf(stderr, "%u", (uint32_t)tag.ncap); + } else if (!trailing(tag)) { + fprintf(stderr, "%s", tag.name->c_str()); + } + if (n->tag.info.neg) { + fprintf(stderr, "↓"); + } else { + fprintf(stderr, "↑"); + } + fprintf(stderr, "(%d)", tag.height); + fprintf(stderr, "\"]\n"); + break; + } + case nfa_state_t::FIN: + break; + case nfa_state_t::NIL: + fprintf(stderr, " n%u -> n%u\n", i, index(nfa, n->nil.out)); + break; + } + } - fprintf(stderr, "}\n"); + fprintf(stderr, "}\n"); } } // namespace re2c diff --git a/re2c/src/nfa/estimate_size.cc b/re2c/src/nfa/estimate_size.cc index 98c2143f..2ee804dc 100644 --- a/re2c/src/nfa/estimate_size.cc +++ b/re2c/src/nfa/estimate_size.cc @@ -9,38 +9,38 @@ namespace re2c { static size_t estimate(const RE *re) { - switch (re->type) { - case RE::NIL: return 0; - case RE::SYM: return 1; - case RE::TAG: return 1; - case RE::ALT: - return estimate(re->alt.re1) - + estimate(re->alt.re2) - + 1; - case RE::CAT: - return estimate(re->cat.re1) - + estimate(re->cat.re2); - case RE::ITER: { - const size_t - iter = estimate(re->iter.re), - min = re->iter.min, - max = re->iter.max; - return max == AST::MANY - ? iter * min + 1 - : iter * max + (max - min); - } - } - return 0; /* unreachable */ + switch (re->type) { + case RE::NIL: return 0; + case RE::SYM: return 1; + case RE::TAG: return 1; + case RE::ALT: + return estimate(re->alt.re1) + + estimate(re->alt.re2) + + 1; + case RE::CAT: + return estimate(re->cat.re1) + + estimate(re->cat.re2); + case RE::ITER: { + const size_t + iter = estimate(re->iter.re), + min = re->iter.min, + max = re->iter.max; + return max == AST::MANY + ? iter * min + 1 + : iter * max + (max - min); + } + } + return 0; /* unreachable */ } size_t estimate_size(const std::vector &res) { - const size_t nre = res.size(); - size_t size = nre - 1; - for (size_t i = 0; i < nre; ++i) { - size += estimate(res[i]) + 1; - } - return size; + const size_t nre = res.size(); + size_t size = nre - 1; + for (size_t i = 0; i < nre; ++i) { + size += estimate(res[i]) + 1; + } + return size; } } // namespace re2c diff --git a/re2c/src/nfa/nfa.h b/re2c/src/nfa/nfa.h index 880f32fb..24286a59 100644 --- a/re2c/src/nfa/nfa.h +++ b/re2c/src/nfa/nfa.h @@ -24,96 +24,96 @@ static const uint32_t NOCLOS = ~0u; struct nfa_state_t { - enum type_t {ALT, RAN, TAG, FIN, NIL} type; - union - { - struct - { - nfa_state_t *out1; - nfa_state_t *out2; - } alt; - struct - { - nfa_state_t *out; - const Range *ran; - } ran; - struct - { - nfa_state_t *out; - tag_info_t info; - } tag; - struct - { - nfa_state_t *out; - } nil; - }; - size_t rule; - - // stuff needed for GOR1 - uint32_t clos; - gor_status_t status : 2; // values 0, 1, 2 - uint32_t arcidx : 2; // maximum out-dergee is 2 - uint32_t active : 1; // boolean - uint32_t indeg : 27; // the rest; we are unlikely to have more than 2^27 states - - void init(size_t r) - { - rule = r; - clos = NOCLOS; - status = GOR_NOPASS; - arcidx = 0; - active = 0; - indeg = 0; - } - - void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2) - { - type = ALT; - alt.out1 = s1; - alt.out2 = s2; - init(r); - } - void make_ran(size_t r, nfa_state_t *s, const Range *p) - { - type = RAN; - ran.out = s; - ran.ran = p; - init(r); - } - void make_tag(size_t r, nfa_state_t *s, tag_info_t info) - { - type = TAG; - tag.out = s; - tag.info = info; - init(r); - } - void make_fin(size_t r) - { - type = FIN; - init(r); - } - void make_nil(size_t r, nfa_state_t *s) - { - type = NIL; - nil.out = s; - init(r); - } + enum type_t {ALT, RAN, TAG, FIN, NIL} type; + union + { + struct + { + nfa_state_t *out1; + nfa_state_t *out2; + } alt; + struct + { + nfa_state_t *out; + const Range *ran; + } ran; + struct + { + nfa_state_t *out; + tag_info_t info; + } tag; + struct + { + nfa_state_t *out; + } nil; + }; + size_t rule; + + // stuff needed for GOR1 + uint32_t clos; + gor_status_t status : 2; // values 0, 1, 2 + uint32_t arcidx : 2; // maximum out-dergee is 2 + uint32_t active : 1; // boolean + uint32_t indeg : 27; // the rest; we are unlikely to have more than 2^27 states + + void init(size_t r) + { + rule = r; + clos = NOCLOS; + status = GOR_NOPASS; + arcidx = 0; + active = 0; + indeg = 0; + } + + void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2) + { + type = ALT; + alt.out1 = s1; + alt.out2 = s2; + init(r); + } + void make_ran(size_t r, nfa_state_t *s, const Range *p) + { + type = RAN; + ran.out = s; + ran.ran = p; + init(r); + } + void make_tag(size_t r, nfa_state_t *s, tag_info_t info) + { + type = TAG; + tag.out = s; + tag.info = info; + init(r); + } + void make_fin(size_t r) + { + type = FIN; + init(r); + } + void make_nil(size_t r, nfa_state_t *s) + { + type = NIL; + nil.out = s; + init(r); + } }; struct nfa_t { - size_t max_size; - size_t size; - nfa_state_t *states; - std::vector &charset; - std::valarray &rules; - std::vector &tags; - nfa_state_t *root; - - explicit nfa_t(const RESpec &spec); - ~nfa_t(); - - FORBID_COPY(nfa_t); + size_t max_size; + size_t size; + nfa_state_t *states; + std::vector &charset; + std::valarray &rules; + std::vector &tags; + nfa_state_t *root; + + explicit nfa_t(const RESpec &spec); + ~nfa_t(); + + FORBID_COPY(nfa_t); }; size_t estimate_size(const std::vector &res); diff --git a/re2c/src/nfa/re_to_nfa.cc b/re2c/src/nfa/re_to_nfa.cc index 81e81726..a855a09d 100644 --- a/re2c/src/nfa/re_to_nfa.cc +++ b/re2c/src/nfa/re_to_nfa.cc @@ -31,120 +31,120 @@ namespace re2c { static nfa_state_t *re_to_nfa(nfa_t &nfa, size_t nrule, const RE *re, nfa_state_t *t) { - nfa_state_t *s = NULL; - switch (re->type) { - case RE::NIL: - s = t; - break; - case RE::SYM: - s = &nfa.states[nfa.size++]; - s->make_ran(nrule, t, re->sym); - break; - case RE::ALT: { - nfa_state_t - *s1 = re_to_nfa(nfa, nrule, re->alt.re1, t), - *s2 = re_to_nfa(nfa, nrule, re->alt.re2, t); - s = &nfa.states[nfa.size++]; - s->make_alt(nrule, s1, s2); - break; - } - case RE::CAT: - s = re_to_nfa(nfa, nrule, re->cat.re2, t); - s = re_to_nfa(nfa, nrule, re->cat.re1, s); - break; - case RE::ITER: { - const uint32_t - min = re->iter.min, - max = re->iter.max; - const RE *iter = re->iter.re; - // see note [counted repetition and iteration expansion] - if (max == AST::MANY) { - nfa_state_t *q = &nfa.states[nfa.size++]; - s = re_to_nfa(nfa, nrule, iter, q); - q->make_alt(nrule, s, t); - } else { - s = re_to_nfa(nfa, nrule, iter, t); - for (uint32_t i = min; i < max; ++i) { - nfa_state_t *q = &nfa.states[nfa.size++]; - q->make_alt(nrule, s, t); - s = re_to_nfa(nfa, nrule, iter, q); - } - } - for (uint32_t i = 1; i < min; ++i) { - s = re_to_nfa(nfa, nrule, iter, s); - } - break; - } - case RE::TAG: { - const Tag &tag = nfa.tags[re->tag.idx]; - if (fixed(tag) && !capture(tag)) { - s = t; - } else { - s = &nfa.states[nfa.size++]; - s->make_tag(nrule, t, re->tag); - } - break; - } - } - return s; + nfa_state_t *s = NULL; + switch (re->type) { + case RE::NIL: + s = t; + break; + case RE::SYM: + s = &nfa.states[nfa.size++]; + s->make_ran(nrule, t, re->sym); + break; + case RE::ALT: { + nfa_state_t + *s1 = re_to_nfa(nfa, nrule, re->alt.re1, t), + *s2 = re_to_nfa(nfa, nrule, re->alt.re2, t); + s = &nfa.states[nfa.size++]; + s->make_alt(nrule, s1, s2); + break; + } + case RE::CAT: + s = re_to_nfa(nfa, nrule, re->cat.re2, t); + s = re_to_nfa(nfa, nrule, re->cat.re1, s); + break; + case RE::ITER: { + const uint32_t + min = re->iter.min, + max = re->iter.max; + const RE *iter = re->iter.re; + // see note [counted repetition and iteration expansion] + if (max == AST::MANY) { + nfa_state_t *q = &nfa.states[nfa.size++]; + s = re_to_nfa(nfa, nrule, iter, q); + q->make_alt(nrule, s, t); + } else { + s = re_to_nfa(nfa, nrule, iter, t); + for (uint32_t i = min; i < max; ++i) { + nfa_state_t *q = &nfa.states[nfa.size++]; + q->make_alt(nrule, s, t); + s = re_to_nfa(nfa, nrule, iter, q); + } + } + for (uint32_t i = 1; i < min; ++i) { + s = re_to_nfa(nfa, nrule, iter, s); + } + break; + } + case RE::TAG: { + const Tag &tag = nfa.tags[re->tag.idx]; + if (fixed(tag) && !capture(tag)) { + s = t; + } else { + s = &nfa.states[nfa.size++]; + s->make_tag(nrule, t, re->tag); + } + break; + } + } + return s; } void calc_indegrees(nfa_state_t *n) { - ++n->indeg; - if (n->indeg > 1) return; + ++n->indeg; + if (n->indeg > 1) return; - switch (n->type) { - case nfa_state_t::NIL: - calc_indegrees(n->nil.out); - break; - case nfa_state_t::ALT: - calc_indegrees(n->alt.out1); - calc_indegrees(n->alt.out2); - break; - case nfa_state_t::TAG: - calc_indegrees(n->tag.out); - break; - case nfa_state_t::RAN: - calc_indegrees(n->ran.out); - case nfa_state_t::FIN: - break; - } + switch (n->type) { + case nfa_state_t::NIL: + calc_indegrees(n->nil.out); + break; + case nfa_state_t::ALT: + calc_indegrees(n->alt.out1); + calc_indegrees(n->alt.out2); + break; + case nfa_state_t::TAG: + calc_indegrees(n->tag.out); + break; + case nfa_state_t::RAN: + calc_indegrees(n->ran.out); + case nfa_state_t::FIN: + break; + } } nfa_t::nfa_t(const RESpec &spec) - : max_size(estimate_size(spec.res)) - , size(0) - , states(new nfa_state_t[max_size]) - , charset(spec.charset) - , rules(spec.rules) - , tags(spec.tags) - , root(NULL) + : max_size(estimate_size(spec.res)) + , size(0) + , states(new nfa_state_t[max_size]) + , charset(spec.charset) + , rules(spec.rules) + , tags(spec.tags) + , root(NULL) { - const size_t nre = spec.res.size(); + const size_t nre = spec.res.size(); - if (nre == 0) return; + if (nre == 0) return; - for (size_t i = 0; i < nre; ++i) { - nfa_state_t *s = &states[size++]; - s->make_fin(i); - s = re_to_nfa(*this, i, spec.res[i], s); + for (size_t i = 0; i < nre; ++i) { + nfa_state_t *s = &states[size++]; + s->make_fin(i); + s = re_to_nfa(*this, i, spec.res[i], s); - if (root) { - nfa_state_t *t = &states[size++]; - t->make_alt(i, root, s); - root = t; - } else { - root = s; - } - } + if (root) { + nfa_state_t *t = &states[size++]; + t->make_alt(i, root, s); + root = t; + } else { + root = s; + } + } - calc_indegrees(root); + calc_indegrees(root); } nfa_t::~nfa_t() { - delete[] states; + delete[] states; } } // namespace re2c diff --git a/re2c/src/re/ast_to_re.cc b/re2c/src/re/ast_to_re.cc index c2738649..d4acfb88 100644 --- a/re2c/src/re/ast_to_re.cc +++ b/re2c/src/re/ast_to_re.cc @@ -56,316 +56,316 @@ static void init_rule(Rule &, const Code *, const std::vector &, size_t, si RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) - : alc() - , res() - , charset(*new std::vector) - , tags(*new std::vector) - , rules(*new std::valarray(ast.size())) - , opts(o) - , warn(w) + : alc() + , res() + , charset(*new std::vector) + , tags(*new std::vector) + , rules(*new std::valarray(ast.size())) + , opts(o) + , warn(w) { - for (size_t i = 0; i < ast.size(); ++i) { - size_t ltag = tags.size(), ncap = 0; - res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0)); - init_rule(rules[i], ast[i].code, tags, ltag, ncap); - } + for (size_t i = 0; i < ast.size(); ++i) { + size_t ltag = tags.size(), ncap = 0; + res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0)); + init_rule(rules[i], ast[i].code, tags, ltag, ncap); + } } bool has_tags(const AST *ast) { - switch (ast->type) { - case AST::NIL: - case AST::STR: - case AST::CLS: - case AST::DOT: - case AST::DEFAULT: - case AST::DIFF: return false; - case AST::TAG: - case AST::CAP: return true; - case AST::ALT: return has_tags(ast->alt.ast1) || has_tags(ast->alt.ast2); - case AST::CAT: return has_tags(ast->cat.ast1) || has_tags(ast->cat.ast2); - case AST::REF: return has_tags(ast->ref.ast); - case AST::ITER: return has_tags(ast->iter.ast); - } - return false; /* unreachable */ + switch (ast->type) { + case AST::NIL: + case AST::STR: + case AST::CLS: + case AST::DOT: + case AST::DEFAULT: + case AST::DIFF: return false; + case AST::TAG: + case AST::CAP: return true; + case AST::ALT: return has_tags(ast->alt.ast1) || has_tags(ast->alt.ast2); + case AST::CAT: return has_tags(ast->cat.ast1) || has_tags(ast->cat.ast2); + case AST::REF: return has_tags(ast->ref.ast); + case AST::ITER: return has_tags(ast->iter.ast); + } + return false; /* unreachable */ } RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap, int32_t height) { - RE::alc_t &alc = spec.alc; - std::vector &tags = spec.tags; - const opt_t *opts = spec.opts; - Warn &warn = spec.warn; - - if (ast->type != AST::CAP && ast->type != AST::REF) ++height; - - switch (ast->type) { - case AST::NIL: - return re_nil(alc); - case AST::STR: { - const bool icase = opts->bCaseInsensitive - || (ast->str.icase != opts->bCaseInverted); - RE *x = NULL; - std::vector::const_iterator - i = ast->str.chars->begin(), - e = ast->str.chars->end(); - for (; i != e; ++i) { - x = re_cat(alc, x, icase - ? re_ichar(alc, ast->line, i->column, i->chr, opts) - : re_schar(alc, ast->line, i->column, i->chr, opts)); - } - return x ? x : re_nil(alc); - } - case AST::CLS: { - Range *r = NULL; - std::vector::const_iterator - i = ast->cls.ranges->begin(), - e = ast->cls.ranges->end(); - for (; i != e; ++i) { - Range *s = opts->encoding.encodeRange(i->lower, i->upper); - if (!s) fatal_lc(ast->line, i->column, - "bad code point range: '0x%X - 0x%X'", i->lower, i->upper); - r = Range::add(r, s); - } - if (ast->cls.negated) { - r = Range::sub(opts->encoding.fullRange(), r); - } - return re_class(alc, ast->line, ast->column, r, opts, warn); - } - case AST::DOT: { - uint32_t c = '\n'; - if (!opts->encoding.encode(c)) { - fatal_lc(ast->line, ast->column, "bad code point: '0x%X'", c); - } - return re_class(alc, ast->line, ast->column, - Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn); - } - case AST::DEFAULT: - // see note [default regexp] - return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits())); - case AST::ALT: { - RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; - if (opts->posix_captures && has_tags(ast)) { - // see note [POSIX subexpression hierarchy] - if (ast->cat.ast1->type != AST::CAP) { - t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); - t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height)); - } - if (ast->cat.ast2->type != AST::CAP) { - t3 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); - t4 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height)); - } - } - x = ast_to_re(spec, ast->alt.ast1, ncap, height); - x = re_cat(alc, t1, re_cat(alc, x, t2)); - y = ast_to_re(spec, ast->alt.ast2, ncap, height); - y = re_cat(alc, t3, re_cat(alc, y, t4)); - return re_alt(alc, x, y); - } - case AST::DIFF: { - RE *x = ast_to_re(spec, ast->diff.ast1, ncap, height); - RE *y = ast_to_re(spec, ast->diff.ast2, ncap, height); - if (x->type != RE::SYM || y->type != RE::SYM) { - fatal_lc(ast->line, ast->column, "can only difference char sets"); - } - return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn); - } - case AST::CAT: { - RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; - if (opts->posix_captures && has_tags(ast)) { - // see note [POSIX subexpression hierarchy] - if (ast->cat.ast1->type != AST::CAP) { - t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); - t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height)); - } - if (ast->cat.ast2->type != AST::CAP) { - t3 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); - t4 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false, height)); - } - } - x = ast_to_re(spec, ast->cat.ast1, ncap, height); - x = re_cat(alc, t1, re_cat(alc, x, t2)); - y = ast_to_re(spec, ast->cat.ast2, ncap, height); - y = re_cat(alc, t3, re_cat(alc, y, t4)); - return re_cat(alc, x, y); - } - case AST::TAG: { - if (ast->tag.name && !opts->tags) { - fatal_lc(ast->line, ast->column, - "tags are only allowed with '-T, --tags' option"); - } - if (opts->posix_captures) { - fatal_lc(ast->line, ast->column, - "simple tags are not allowed with '--posix-captures' option"); - } - RE *t = re_tag(alc, tags.size(), false); - tags.push_back(Tag(ast->tag.name, ast->tag.history, height)); - return t; - } - case AST::CAP: { - if (!opts->posix_captures) { - return ast_to_re(spec, ast->cap, ncap, height); - } - const AST *x = ast->cap; - if (x->type == AST::REF) x = x->ref.ast; - - RE *t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap, false, height + 1)); - - RE *t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap + 1, false, height)); - - ++ncap; - return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, height), t2)); - } - case AST::REF: - if (!opts->posix_captures) { - return ast_to_re(spec, ast->ref.ast, ncap, height); - } - fatal_l(ast->line, - "implicit grouping is forbidden with '--posix-captures'" - " option, please wrap '%s' in capturing parenthesis", - ast->ref.name->c_str()); - return NULL; - case AST::ITER: { - const uint32_t - n = ast->iter.min, - n1 = std::max(n, 1u), - m = std::max(n, ast->iter.max); - const AST *x = ast->iter.ast; - - RE *t1 = NULL, *t2 = NULL; - if (opts->posix_captures && x->type == AST::CAP) { - x = x->cap; - if (x->type == AST::REF) x = x->ref.ast; - - t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap, m > 1, height + 1)); - - t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap + 1, m > 1, height)); - - ++ncap; - } - - RE *y = NULL; - if (m == 0) { - y = re_cat(alc, t1, t2); - } else if (m == 1) { - y = ast_to_re(spec, x, ncap, height); - y = re_cat(alc, t1, re_cat(alc, y, t2)); - } else { - y = ast_to_re(spec, x, ncap, height); - y = re_cat(alc, t1, y); - y = re_cat(alc, y, t2); - y = re_iter(alc, y, n1, m); - } - if (n == 0) { - y = re_alt(alc, y, re_nil(alc)); - } - return y; - } - } - return NULL; /* unreachable */ + RE::alc_t &alc = spec.alc; + std::vector &tags = spec.tags; + const opt_t *opts = spec.opts; + Warn &warn = spec.warn; + + if (ast->type != AST::CAP && ast->type != AST::REF) ++height; + + switch (ast->type) { + case AST::NIL: + return re_nil(alc); + case AST::STR: { + const bool icase = opts->bCaseInsensitive + || (ast->str.icase != opts->bCaseInverted); + RE *x = NULL; + std::vector::const_iterator + i = ast->str.chars->begin(), + e = ast->str.chars->end(); + for (; i != e; ++i) { + x = re_cat(alc, x, icase + ? re_ichar(alc, ast->line, i->column, i->chr, opts) + : re_schar(alc, ast->line, i->column, i->chr, opts)); + } + return x ? x : re_nil(alc); + } + case AST::CLS: { + Range *r = NULL; + std::vector::const_iterator + i = ast->cls.ranges->begin(), + e = ast->cls.ranges->end(); + for (; i != e; ++i) { + Range *s = opts->encoding.encodeRange(i->lower, i->upper); + if (!s) fatal_lc(ast->line, i->column, + "bad code point range: '0x%X - 0x%X'", i->lower, i->upper); + r = Range::add(r, s); + } + if (ast->cls.negated) { + r = Range::sub(opts->encoding.fullRange(), r); + } + return re_class(alc, ast->line, ast->column, r, opts, warn); + } + case AST::DOT: { + uint32_t c = '\n'; + if (!opts->encoding.encode(c)) { + fatal_lc(ast->line, ast->column, "bad code point: '0x%X'", c); + } + return re_class(alc, ast->line, ast->column, + Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn); + } + case AST::DEFAULT: + // see note [default regexp] + return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits())); + case AST::ALT: { + RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; + if (opts->posix_captures && has_tags(ast)) { + // see note [POSIX subexpression hierarchy] + if (ast->cat.ast1->type != AST::CAP) { + t1 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); + t2 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height)); + } + if (ast->cat.ast2->type != AST::CAP) { + t3 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); + t4 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height)); + } + } + x = ast_to_re(spec, ast->alt.ast1, ncap, height); + x = re_cat(alc, t1, re_cat(alc, x, t2)); + y = ast_to_re(spec, ast->alt.ast2, ncap, height); + y = re_cat(alc, t3, re_cat(alc, y, t4)); + return re_alt(alc, x, y); + } + case AST::DIFF: { + RE *x = ast_to_re(spec, ast->diff.ast1, ncap, height); + RE *y = ast_to_re(spec, ast->diff.ast2, ncap, height); + if (x->type != RE::SYM || y->type != RE::SYM) { + fatal_lc(ast->line, ast->column, "can only difference char sets"); + } + return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn); + } + case AST::CAT: { + RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; + if (opts->posix_captures && has_tags(ast)) { + // see note [POSIX subexpression hierarchy] + if (ast->cat.ast1->type != AST::CAP) { + t1 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); + t2 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height)); + } + if (ast->cat.ast2->type != AST::CAP) { + t3 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height + 1)); + t4 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false, height)); + } + } + x = ast_to_re(spec, ast->cat.ast1, ncap, height); + x = re_cat(alc, t1, re_cat(alc, x, t2)); + y = ast_to_re(spec, ast->cat.ast2, ncap, height); + y = re_cat(alc, t3, re_cat(alc, y, t4)); + return re_cat(alc, x, y); + } + case AST::TAG: { + if (ast->tag.name && !opts->tags) { + fatal_lc(ast->line, ast->column, + "tags are only allowed with '-T, --tags' option"); + } + if (opts->posix_captures) { + fatal_lc(ast->line, ast->column, + "simple tags are not allowed with '--posix-captures' option"); + } + RE *t = re_tag(alc, tags.size(), false); + tags.push_back(Tag(ast->tag.name, ast->tag.history, height)); + return t; + } + case AST::CAP: { + if (!opts->posix_captures) { + return ast_to_re(spec, ast->cap, ncap, height); + } + const AST *x = ast->cap; + if (x->type == AST::REF) x = x->ref.ast; + + RE *t1 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(2 * ncap, false, height + 1)); + + RE *t2 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(2 * ncap + 1, false, height)); + + ++ncap; + return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, height), t2)); + } + case AST::REF: + if (!opts->posix_captures) { + return ast_to_re(spec, ast->ref.ast, ncap, height); + } + fatal_l(ast->line, + "implicit grouping is forbidden with '--posix-captures'" + " option, please wrap '%s' in capturing parenthesis", + ast->ref.name->c_str()); + return NULL; + case AST::ITER: { + const uint32_t + n = ast->iter.min, + n1 = std::max(n, 1u), + m = std::max(n, ast->iter.max); + const AST *x = ast->iter.ast; + + RE *t1 = NULL, *t2 = NULL; + if (opts->posix_captures && x->type == AST::CAP) { + x = x->cap; + if (x->type == AST::REF) x = x->ref.ast; + + t1 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(2 * ncap, m > 1, height + 1)); + + t2 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(2 * ncap + 1, m > 1, height)); + + ++ncap; + } + + RE *y = NULL; + if (m == 0) { + y = re_cat(alc, t1, t2); + } else if (m == 1) { + y = ast_to_re(spec, x, ncap, height); + y = re_cat(alc, t1, re_cat(alc, y, t2)); + } else { + y = ast_to_re(spec, x, ncap, height); + y = re_cat(alc, t1, y); + y = re_cat(alc, y, t2); + y = re_iter(alc, y, n1, m); + } + if (n == 0) { + y = re_alt(alc, y, re_nil(alc)); + } + return y; + } + } + return NULL; /* unreachable */ } RE *re_schar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts) { - if (!opts->encoding.encode(c)) { - fatal_lc(line, column, "bad code point: '0x%X'", c); - } - switch (opts->encoding.type()) { - case Enc::UTF16: - return UTF16Symbol(alc, c); - case Enc::UTF8: - return UTF8Symbol(alc, c); - case Enc::ASCII: - case Enc::EBCDIC: - case Enc::UTF32: - case Enc::UCS2: - return re_sym(alc, Range::sym(c)); - } - return NULL; /* unreachable */ + if (!opts->encoding.encode(c)) { + fatal_lc(line, column, "bad code point: '0x%X'", c); + } + switch (opts->encoding.type()) { + case Enc::UTF16: + return UTF16Symbol(alc, c); + case Enc::UTF8: + return UTF8Symbol(alc, c); + case Enc::ASCII: + case Enc::EBCDIC: + case Enc::UTF32: + case Enc::UCS2: + return re_sym(alc, Range::sym(c)); + } + return NULL; /* unreachable */ } RE *re_ichar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts) { - if (is_alpha(c)) { - return re_alt(alc, - re_schar(alc, line, column, to_lower_unsafe(c), opts), - re_schar(alc, line, column, to_upper_unsafe(c), opts)); - } else { - return re_schar(alc, line, column, c, opts); - } + if (is_alpha(c)) { + return re_alt(alc, + re_schar(alc, line, column, to_lower_unsafe(c), opts), + re_schar(alc, line, column, to_upper_unsafe(c), opts)); + } else { + return re_schar(alc, line, column, c, opts); + } } RE *re_class(RE::alc_t &alc, uint32_t line, uint32_t column, const Range *r, const opt_t *opts, Warn &warn) { - if (!r) { - switch (opts->empty_class_policy) { - case EMPTY_CLASS_MATCH_EMPTY: - warn.empty_class(line); - return re_nil(alc); - case EMPTY_CLASS_MATCH_NONE: - warn.empty_class(line); - break; - case EMPTY_CLASS_ERROR: - fatal_lc(line, column, "empty character class"); - } - } - switch (opts->encoding.type()) { - case Enc::UTF16: - return UTF16Range(alc, r); - case Enc::UTF8: - return UTF8Range(alc, r); - case Enc::ASCII: - case Enc::EBCDIC: - case Enc::UTF32: - case Enc::UCS2: - return re_sym(alc, r); - } - return NULL; /* unreachable */ + if (!r) { + switch (opts->empty_class_policy) { + case EMPTY_CLASS_MATCH_EMPTY: + warn.empty_class(line); + return re_nil(alc); + case EMPTY_CLASS_MATCH_NONE: + warn.empty_class(line); + break; + case EMPTY_CLASS_ERROR: + fatal_lc(line, column, "empty character class"); + } + } + switch (opts->encoding.type()) { + case Enc::UTF16: + return UTF16Range(alc, r); + case Enc::UTF8: + return UTF8Range(alc, r); + case Enc::ASCII: + case Enc::EBCDIC: + case Enc::UTF32: + case Enc::UCS2: + return re_sym(alc, r); + } + return NULL; /* unreachable */ } void assert_tags_used_once(const Rule &rule, const std::vector &tags) { - std::set names; - const std::string *name = NULL; - - for (size_t t = rule.ltag; t < rule.htag; ++t) { - name = tags[t].name; - if (name && !names.insert(*name).second) { - fatal_l(rule.code->fline, - "tag '%s' is used multiple times in the same rule", - name->c_str()); - } - } + std::set names; + const std::string *name = NULL; + + for (size_t t = rule.ltag; t < rule.htag; ++t) { + name = tags[t].name; + if (name && !names.insert(*name).second) { + fatal_l(rule.code->fline, + "tag '%s' is used multiple times in the same rule", + name->c_str()); + } + } } void init_rule(Rule &rule, const Code *code, const std::vector &tags, - size_t ltag, size_t ncap) + size_t ltag, size_t ncap) { - rule.code = code; - rule.ltag = ltag; - rule.htag = tags.size(); - for (rule.ttag = ltag; rule.ttag < rule.htag && !trailing(tags[rule.ttag]); ++rule.ttag); - rule.ncap = ncap; - assert_tags_used_once(rule, tags); + rule.code = code; + rule.ltag = ltag; + rule.htag = tags.size(); + for (rule.ttag = ltag; rule.ttag < rule.htag && !trailing(tags[rule.ttag]); ++rule.ttag); + rule.ncap = ncap; + assert_tags_used_once(rule, tags); } } // namespace re2c diff --git a/re2c/src/re/default_tags.cc b/re2c/src/re/default_tags.cc index 26ceb7e4..0a6a7b41 100644 --- a/re2c/src/re/default_tags.cc +++ b/re2c/src/re/default_tags.cc @@ -11,50 +11,50 @@ namespace re2c { // in future it might change. static void insert_default_tags(RESpec &spec, RE *re, size_t *&tidx) { - RE::alc_t &alc = spec.alc; - switch (re->type) { - case RE::NIL: break; - case RE::SYM: break; - case RE::ALT: { - size_t *i = tidx; - RE *x = NULL, *y = NULL; - insert_default_tags(spec, re->alt.re1, tidx); - for (; i < tidx; ++i) { - x = re_cat(alc, x, re_tag(alc, *i, true)); - } - insert_default_tags(spec, re->alt.re2, tidx); - for (; i < tidx; ++i) { - y = re_cat(alc, y, re_tag(alc, *i, true)); - } - re->alt.re1 = re_cat(alc, re->alt.re1, y); - re->alt.re2 = spec.opts->posix_captures - ? re_cat(alc, x, re->alt.re2) - : re_cat(alc, re->alt.re2, x); - break; - } - case RE::CAT: - insert_default_tags(spec, re->cat.re1, tidx); - insert_default_tags(spec, re->cat.re2, tidx); - break; - case RE::ITER: - insert_default_tags(spec, re->iter.re, tidx); - break; - case RE::TAG: - *tidx++ = re->tag.idx; - break; - } + RE::alc_t &alc = spec.alc; + switch (re->type) { + case RE::NIL: break; + case RE::SYM: break; + case RE::ALT: { + size_t *i = tidx; + RE *x = NULL, *y = NULL; + insert_default_tags(spec, re->alt.re1, tidx); + for (; i < tidx; ++i) { + x = re_cat(alc, x, re_tag(alc, *i, true)); + } + insert_default_tags(spec, re->alt.re2, tidx); + for (; i < tidx; ++i) { + y = re_cat(alc, y, re_tag(alc, *i, true)); + } + re->alt.re1 = re_cat(alc, re->alt.re1, y); + re->alt.re2 = spec.opts->posix_captures + ? re_cat(alc, x, re->alt.re2) + : re_cat(alc, re->alt.re2, x); + break; + } + case RE::CAT: + insert_default_tags(spec, re->cat.re1, tidx); + insert_default_tags(spec, re->cat.re2, tidx); + break; + case RE::ITER: + insert_default_tags(spec, re->iter.re, tidx); + break; + case RE::TAG: + *tidx++ = re->tag.idx; + break; + } } void insert_default_tags(RESpec &spec) { - size_t *tidx0 = new size_t[spec.tags.size()], *tidx = tidx0; - std::vector::iterator - i = spec.res.begin(), - e = spec.res.end(); - for (; i != e; ++i) { - insert_default_tags(spec, *i, tidx); - } - delete[] tidx0; + size_t *tidx0 = new size_t[spec.tags.size()], *tidx = tidx0; + std::vector::iterator + i = spec.res.begin(), + e = spec.res.end(); + for (; i != e; ++i) { + insert_default_tags(spec, *i, tidx); + } + delete[] tidx0; } } // namespace re2c diff --git a/re2c/src/re/empty_class_policy.h b/re2c/src/re/empty_class_policy.h index caf60a41..14ef8ade 100644 --- a/re2c/src/re/empty_class_policy.h +++ b/re2c/src/re/empty_class_policy.h @@ -5,9 +5,9 @@ namespace re2c { enum empty_class_policy_t { - EMPTY_CLASS_MATCH_EMPTY, // match on empty input - EMPTY_CLASS_MATCH_NONE, // fail to match on any input - EMPTY_CLASS_ERROR // compilation error + EMPTY_CLASS_MATCH_EMPTY, // match on empty input + EMPTY_CLASS_MATCH_NONE, // fail to match on any input + EMPTY_CLASS_ERROR // compilation error }; } // namespace re2c diff --git a/re2c/src/re/encoding/case.h b/re2c/src/re/encoding/case.h index dc793608..e33ababd 100644 --- a/re2c/src/re/encoding/case.h +++ b/re2c/src/re/encoding/case.h @@ -12,18 +12,18 @@ uint32_t to_upper_unsafe (uint32_t c); inline bool is_alpha (uint32_t c) { - return (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z'); + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z'); } inline uint32_t to_lower_unsafe (uint32_t c) { - return c | 0x20u; + return c | 0x20u; } inline uint32_t to_upper_unsafe (uint32_t c) { - return c & ~0x20u; + return c & ~0x20u; } } diff --git a/re2c/src/re/encoding/enc.cc b/re2c/src/re/encoding/enc.cc index 5f65e821..411f0804 100644 --- a/re2c/src/re/encoding/enc.cc +++ b/re2c/src/re/encoding/enc.cc @@ -64,39 +64,39 @@ const uint32_t Enc::ebc2asc[256] = */ bool Enc::encode(uint32_t & c) const { - if (c >= nCodePoints ()) - { - return false; - } + if (c >= nCodePoints ()) + { + return false; + } - switch (type_) - { - case ASCII: - return true; - case EBCDIC: - c = asc2ebc[c]; - return true; - case UCS2: - case UTF16: - case UTF32: - case UTF8: - if (c < SURR_MIN || c > SURR_MAX) - return true; - else - { - switch (policy_) - { - case POLICY_FAIL: - return false; - case POLICY_SUBSTITUTE: - c = UNICODE_ERROR; - return true; - case POLICY_IGNORE: - return true; - } - } - } - return false; // to silence gcc warning + switch (type_) + { + case ASCII: + return true; + case EBCDIC: + c = asc2ebc[c]; + return true; + case UCS2: + case UTF16: + case UTF32: + case UTF8: + if (c < SURR_MIN || c > SURR_MAX) + return true; + else + { + switch (policy_) + { + case POLICY_FAIL: + return false; + case POLICY_SUBSTITUTE: + c = UNICODE_ERROR; + return true; + case POLICY_IGNORE: + return true; + } + } + } + return false; // to silence gcc warning } /* @@ -105,19 +105,19 @@ bool Enc::encode(uint32_t & c) const */ uint32_t Enc::decodeUnsafe(uint32_t c) const { - switch (type_) - { - case EBCDIC: - c = ebc2asc[c & 0xFF]; - break; - case ASCII: - case UCS2: - case UTF16: - case UTF32: - case UTF8: - break; - } - return c; + switch (type_) + { + case EBCDIC: + c = ebc2asc[c & 0xFF]; + break; + case ASCII: + case UCS2: + case UTF16: + case UTF32: + case UTF8: + break; + } + return c; } /* @@ -134,55 +134,55 @@ uint32_t Enc::decodeUnsafe(uint32_t c) const */ Range * Enc::encodeRange(uint32_t l, uint32_t h) const { - if (l >= nCodePoints () || h >= nCodePoints ()) - { - return NULL; - } + if (l >= nCodePoints () || h >= nCodePoints ()) + { + return NULL; + } - Range * r = NULL; - switch (type_) - { - case ASCII: - r = Range::ran (l, h + 1); - break; - case EBCDIC: - { - const uint32_t el = asc2ebc[l]; - r = Range::sym (el); - for (uint32_t c = l + 1; c <= h; ++c) - { - const uint32_t ec = asc2ebc[c]; - r = Range::add (r, Range::sym (ec)); - } - break; - } - case UCS2: - case UTF16: - case UTF32: - case UTF8: - r = Range::ran (l, h + 1); - if (l <= SURR_MAX && h >= SURR_MIN) - { - switch (policy_) - { - case POLICY_FAIL: - r = NULL; - break; - case POLICY_SUBSTITUTE: - { - Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); - Range * error = Range::sym (UNICODE_ERROR); - r = Range::sub (r, surrs); - r = Range::add (r, error); - break; - } - case POLICY_IGNORE: - break; - } - } - break; - } - return r; + Range * r = NULL; + switch (type_) + { + case ASCII: + r = Range::ran (l, h + 1); + break; + case EBCDIC: + { + const uint32_t el = asc2ebc[l]; + r = Range::sym (el); + for (uint32_t c = l + 1; c <= h; ++c) + { + const uint32_t ec = asc2ebc[c]; + r = Range::add (r, Range::sym (ec)); + } + break; + } + case UCS2: + case UTF16: + case UTF32: + case UTF8: + r = Range::ran (l, h + 1); + if (l <= SURR_MAX && h >= SURR_MIN) + { + switch (policy_) + { + case POLICY_FAIL: + r = NULL; + break; + case POLICY_SUBSTITUTE: + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + Range * error = Range::sym (UNICODE_ERROR); + r = Range::sub (r, surrs); + r = Range::add (r, error); + break; + } + case POLICY_IGNORE: + break; + } + } + break; + } + return r; } /* @@ -197,13 +197,13 @@ Range * Enc::encodeRange(uint32_t l, uint32_t h) const */ Range * Enc::fullRange() const { - Range * r = Range::ran (0, nCodePoints()); - if (policy_ != POLICY_IGNORE) - { - Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); - r = Range::sub (r, surrs); - } - return r; + Range * r = Range::ran (0, nCodePoints()); + if (policy_ != POLICY_IGNORE) + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + r = Range::sub (r, surrs); + } + return r; } } // namespace re2c diff --git a/re2c/src/re/encoding/enc.h b/re2c/src/re/encoding/enc.h index b4afa835..2dbb1d2d 100644 --- a/re2c/src/re/encoding/enc.h +++ b/re2c/src/re/encoding/enc.h @@ -38,145 +38,145 @@ class Range; class Enc { public: - // Supported encodings. - enum type_t - { ASCII - , EBCDIC - , UCS2 - , UTF16 - , UTF32 - , UTF8 - }; - - // What to do with invalid code points - enum policy_t - { POLICY_FAIL - , POLICY_SUBSTITUTE - , POLICY_IGNORE - }; + // Supported encodings. + enum type_t + { ASCII + , EBCDIC + , UCS2 + , UTF16 + , UTF32 + , UTF8 + }; + + // What to do with invalid code points + enum policy_t + { POLICY_FAIL + , POLICY_SUBSTITUTE + , POLICY_IGNORE + }; private: - static const uint32_t asc2ebc[256]; - static const uint32_t ebc2asc[256]; - static const uint32_t SURR_MIN; - static const uint32_t SURR_MAX; - static const uint32_t UNICODE_ERROR; + static const uint32_t asc2ebc[256]; + static const uint32_t ebc2asc[256]; + static const uint32_t SURR_MIN; + static const uint32_t SURR_MAX; + static const uint32_t UNICODE_ERROR; - type_t type_; - policy_t policy_; + type_t type_; + policy_t policy_; public: - Enc() - : type_ (ASCII) - , policy_ (POLICY_IGNORE) - { } + Enc() + : type_ (ASCII) + , policy_ (POLICY_IGNORE) + { } - static const char * name (type_t t); + static const char * name (type_t t); - bool operator != (const Enc & e) const { return type_ != e.type_; } + bool operator != (const Enc & e) const { return type_ != e.type_; } - inline uint32_t nCodePoints() const; - inline uint32_t nCodeUnits() const; - inline uint32_t szCodePoint() const; - inline uint32_t szCodeUnit() const; + inline uint32_t nCodePoints() const; + inline uint32_t nCodeUnits() const; + inline uint32_t szCodePoint() const; + inline uint32_t szCodeUnit() const; - inline void set(type_t t); - inline void unset(type_t); - inline type_t type () const; + inline void set(type_t t); + inline void unset(type_t); + inline type_t type () const; - inline void setPolicy(policy_t t); + inline void setPolicy(policy_t t); - bool encode(uint32_t & c) const; - uint32_t decodeUnsafe(uint32_t c) const; - Range * encodeRange(uint32_t l, uint32_t h) const; - Range * fullRange() const; + bool encode(uint32_t & c) const; + uint32_t decodeUnsafe(uint32_t c) const; + Range * encodeRange(uint32_t l, uint32_t h) const; + Range * fullRange() const; }; inline const char * Enc::name (type_t t) { - switch (t) { - case ASCII: return "ASCII"; - case EBCDIC: return "EBCDIC"; - case UTF8: return "UTF8"; - case UCS2: return "USC2"; - case UTF16: return "UTF16"; - case UTF32: return "UTF32"; - } - return ""; /* error */ + switch (t) { + case ASCII: return "ASCII"; + case EBCDIC: return "EBCDIC"; + case UTF8: return "UTF8"; + case UCS2: return "USC2"; + case UTF16: return "UTF16"; + case UTF32: return "UTF32"; + } + return ""; /* error */ } inline uint32_t Enc::nCodePoints() const { - switch (type_) { - case ASCII: - case EBCDIC: return 0x100; - case UCS2: return 0x10000; - case UTF16: - case UTF32: - case UTF8: return 0x110000; - } - return 0; /* error */ + switch (type_) { + case ASCII: + case EBCDIC: return 0x100; + case UCS2: return 0x10000; + case UTF16: + case UTF32: + case UTF8: return 0x110000; + } + return 0; /* error */ } inline uint32_t Enc::nCodeUnits() const { - switch (type_) { - case ASCII: - case EBCDIC: - case UTF8: return 0x100; - case UCS2: - case UTF16: return 0x10000; - case UTF32: return 0x110000; - } - return 0; /* error */ + switch (type_) { + case ASCII: + case EBCDIC: + case UTF8: return 0x100; + case UCS2: + case UTF16: return 0x10000; + case UTF32: return 0x110000; + } + return 0; /* error */ } // returns *maximal* code point size for encoding inline uint32_t Enc::szCodePoint() const { - switch (type_) { - case ASCII: - case EBCDIC: return 1; - case UCS2: return 2; - case UTF16: - case UTF32: - case UTF8: return 4; - } - return 0; /* error */ + switch (type_) { + case ASCII: + case EBCDIC: return 1; + case UCS2: return 2; + case UTF16: + case UTF32: + case UTF8: return 4; + } + return 0; /* error */ } inline uint32_t Enc::szCodeUnit() const { - switch (type_) { - case ASCII: - case EBCDIC: - case UTF8: return 1; - case UCS2: - case UTF16: return 2; - case UTF32: return 4; - } - return 0; /* error */ + switch (type_) { + case ASCII: + case EBCDIC: + case UTF8: return 1; + case UCS2: + case UTF16: return 2; + case UTF32: return 4; + } + return 0; /* error */ } inline void Enc::set(type_t t) { - type_ = t; + type_ = t; } inline void Enc::unset(type_t t) { - if (type_ == t) - type_ = ASCII; + if (type_ == t) + type_ = ASCII; } inline Enc::type_t Enc::type () const { - return type_; + return type_; } inline void Enc::setPolicy(policy_t t) { - policy_ = t; + policy_ = t; } } // namespace re2c diff --git a/re2c/src/re/encoding/range_suffix.cc b/re2c/src/re/encoding/range_suffix.cc index 99826fe1..6527e778 100644 --- a/re2c/src/re/encoding/range_suffix.cc +++ b/re2c/src/re/encoding/range_suffix.cc @@ -9,7 +9,7 @@ free_list RangeSuffix::freeList; RE *to_regexp(RE::alc_t &alc, RangeSuffix *p) { - return p ? emit(alc, p, NULL) : re_sym(alc, NULL); + return p ? emit(alc, p, NULL) : re_sym(alc, NULL); } /* @@ -17,16 +17,16 @@ RE *to_regexp(RE::alc_t &alc, RangeSuffix *p) */ RE *emit(RE::alc_t &alc, RangeSuffix *p, RE *re) { - if (p == NULL) { - return re; - } else { - RE *regexp = NULL; - for (; p != NULL; p = p->next) { - RE *re1 = re_cat(alc, re_sym(alc, Range::ran(p->l, p->h + 1)), re); - regexp = re_alt(alc, regexp, emit(alc, p->child, re1)); - } - return regexp; - } + if (p == NULL) { + return re; + } else { + RE *regexp = NULL; + for (; p != NULL; p = p->next) { + RE *re1 = re_cat(alc, re_sym(alc, Range::ran(p->l, p->h + 1)), re); + regexp = re_alt(alc, regexp, emit(alc, p->child, re1)); + } + return regexp; + } } } // namespace re2c diff --git a/re2c/src/re/encoding/range_suffix.h b/re2c/src/re/encoding/range_suffix.h index ae7a6a59..d3077b85 100644 --- a/re2c/src/re/encoding/range_suffix.h +++ b/re2c/src/re/encoding/range_suffix.h @@ -12,23 +12,23 @@ namespace re2c { struct RangeSuffix { - static free_list freeList; - - uint32_t l; - uint32_t h; - RangeSuffix * next; - RangeSuffix * child; - - RangeSuffix (uint32_t lo, uint32_t hi) - : l (lo) - , h (hi) - , next (NULL) - , child (NULL) - { - freeList.insert(this); - } - - FORBID_COPY (RangeSuffix); + static free_list freeList; + + uint32_t l; + uint32_t h; + RangeSuffix * next; + RangeSuffix * child; + + RangeSuffix (uint32_t lo, uint32_t hi) + : l (lo) + , h (hi) + , next (NULL) + , child (NULL) + { + freeList.insert(this); + } + + FORBID_COPY (RangeSuffix); }; RE *to_regexp(RE::alc_t &alc, RangeSuffix *p); diff --git a/re2c/src/re/encoding/utf16/utf16.cc b/re2c/src/re/encoding/utf16/utf16.cc index 7fad3c9c..b99e9718 100644 --- a/re2c/src/re/encoding/utf16/utf16.cc +++ b/re2c/src/re/encoding/utf16/utf16.cc @@ -2,9 +2,9 @@ namespace re2c { -const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu; -const uint32_t utf16::MIN_LEAD_SURR = 0xD800u; -const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u; -const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu; +const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu; +const uint32_t utf16::MIN_LEAD_SURR = 0xD800u; +const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u; +const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu; } // namespace re2c diff --git a/re2c/src/re/encoding/utf16/utf16.h b/re2c/src/re/encoding/utf16/utf16.h index 81dc71e0..b1238c05 100644 --- a/re2c/src/re/encoding/utf16/utf16.h +++ b/re2c/src/re/encoding/utf16/utf16.h @@ -8,28 +8,28 @@ namespace re2c { class utf16 { public: - typedef uint32_t rune; + typedef uint32_t rune; - static const uint32_t MAX_1WORD_RUNE; - static const uint32_t MIN_LEAD_SURR; - static const uint32_t MIN_TRAIL_SURR; - static const uint32_t MAX_TRAIL_SURR; + static const uint32_t MAX_1WORD_RUNE; + static const uint32_t MIN_LEAD_SURR; + static const uint32_t MIN_TRAIL_SURR; + static const uint32_t MAX_TRAIL_SURR; - /* leading surrogate of UTF-16 symbol */ - static inline uint32_t lead_surr(rune r); + /* leading surrogate of UTF-16 symbol */ + static inline uint32_t lead_surr(rune r); - /* trailing surrogate of UTF-16 symbol */ - static inline uint32_t trail_surr(rune r); + /* trailing surrogate of UTF-16 symbol */ + static inline uint32_t trail_surr(rune r); }; inline uint32_t utf16::lead_surr(rune r) { - return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR; + return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR; } inline uint32_t utf16::trail_surr(rune r) { - return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR; + return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR; } } // namespace re2c diff --git a/re2c/src/re/encoding/utf16/utf16_range.cc b/re2c/src/re/encoding/utf16/utf16_range.cc index f0164d12..0e162018 100644 --- a/re2c/src/re/encoding/utf16/utf16_range.cc +++ b/re2c/src/re/encoding/utf16/utf16_range.cc @@ -11,21 +11,21 @@ namespace re2c { */ void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h) { - RangeSuffix ** p = &root; - for (;;) - { - if (*p == NULL) - { - *p = new RangeSuffix(l, h); - break; - } - else if ((*p)->l == l && (*p)->h == h) - { - break; - } - else - p = &(*p)->next; - } + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l, h); + break; + } + else if ((*p)->l == l && (*p)->h == h) + { + break; + } + else + p = &(*p)->next; + } } /* @@ -34,37 +34,37 @@ void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h) */ void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) { - RangeSuffix ** p = &root; - for (;;) - { - if (*p == NULL) - { - *p = new RangeSuffix(l_tr, h_tr); - p = &(*p)->child; - break; - } - else if ((*p)->l == l_tr && (*p)->h == h_tr) - { - p = &(*p)->child; - break; - } - else - p = &(*p)->next; - } - for (;;) - { - if (*p == NULL) - { - *p = new RangeSuffix(l_ld, h_ld); - break; - } - else if ((*p)->l == l_ld && (*p)->h == h_ld) - { - break; - } - else - p = &(*p)->next; - } + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_tr, h_tr); + p = &(*p)->child; + break; + } + else if ((*p)->l == l_tr && (*p)->h == h_tr) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_ld, h_ld); + break; + } + else if ((*p)->l == l_ld && (*p)->h == h_ld) + { + break; + } + else + p = &(*p)->next; + } } /* @@ -78,8 +78,8 @@ void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uin * This is only possible if the following condition holds: * if L1 /= H1, then L2 == 0xdc00 and H2 == 0xdfff. * This condition ensures that: - * 1) all possible UTF-16 sequences between L and H are allowed - * 2) no word ranges [w1 - w2] appear, such that w1 > w2 + * 1) all possible UTF-16 sequences between L and H are allowed + * 2) no word ranges [w1 - w2] appear, such that w1 > w2 * * E.g.: * [\U00010001-\U00010400] => [d800-d801],[dc01-dc00]. @@ -95,22 +95,22 @@ void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uin */ void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) { - if (l_ld != h_ld) - { - if (l_tr > utf16::MIN_TRAIL_SURR) - { - UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR); - UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr); - return; - } - if (h_tr < utf16::MAX_TRAIL_SURR) - { - UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR); - UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr); - return; - } - } - UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr); + if (l_ld != h_ld) + { + if (l_tr > utf16::MIN_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + if (h_tr < utf16::MAX_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + } + UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr); } /* @@ -122,28 +122,28 @@ void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, */ void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h) { - if (l <= utf16::MAX_1WORD_RUNE) - { - if (h <= utf16::MAX_1WORD_RUNE) - { - UTF16addContinuous1(root, l, h); - } - else - { - UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE); - const uint32_t h_ld = utf16::lead_surr(h); - const uint32_t h_tr = utf16::trail_surr(h); - UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr); - } - } - else - { - const uint32_t l_ld = utf16::lead_surr(l); - const uint32_t l_tr = utf16::trail_surr(l); - const uint32_t h_ld = utf16::lead_surr(h); - const uint32_t h_tr = utf16::trail_surr(h); - UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr); - } + if (l <= utf16::MAX_1WORD_RUNE) + { + if (h <= utf16::MAX_1WORD_RUNE) + { + UTF16addContinuous1(root, l, h); + } + else + { + UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + } + } + else + { + const uint32_t l_ld = utf16::lead_surr(l); + const uint32_t l_tr = utf16::trail_surr(l); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr); + } } } // namespace re2c diff --git a/re2c/src/re/encoding/utf16/utf16_regexp.cc b/re2c/src/re/encoding/utf16/utf16_regexp.cc index 43fed267..e014d32f 100644 --- a/re2c/src/re/encoding/utf16/utf16_regexp.cc +++ b/re2c/src/re/encoding/utf16/utf16_regexp.cc @@ -10,15 +10,15 @@ namespace re2c { RE *UTF16Symbol(RE::alc_t &alc, utf16::rune r) { - if (r <= utf16::MAX_1WORD_RUNE) { - return re_sym(alc, Range::sym(r)); - } else { - const uint32_t ld = utf16::lead_surr(r); - const uint32_t tr = utf16::trail_surr(r); - return re_cat(alc, - re_sym(alc, Range::sym(ld)), - re_sym(alc, Range::sym(tr))); - } + if (r <= utf16::MAX_1WORD_RUNE) { + return re_sym(alc, Range::sym(r)); + } else { + const uint32_t ld = utf16::lead_surr(r); + const uint32_t tr = utf16::trail_surr(r); + return re_cat(alc, + re_sym(alc, Range::sym(ld)), + re_sym(alc, Range::sym(tr))); + } } /* @@ -29,10 +29,10 @@ RE *UTF16Symbol(RE::alc_t &alc, utf16::rune r) */ RE *UTF16Range(RE::alc_t &alc, const Range *r) { - RangeSuffix * root = NULL; - for (; r != NULL; r = r->next ()) - UTF16splitByRuneLength(root, r->lower (), r->upper () - 1); - return to_regexp(alc, root); + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF16splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp(alc, root); } } // namespace re2c diff --git a/re2c/src/re/encoding/utf8/utf8.cc b/re2c/src/re/encoding/utf8/utf8.cc index 9255565c..051e09f5 100644 --- a/re2c/src/re/encoding/utf8/utf8.cc +++ b/re2c/src/re/encoding/utf8/utf8.cc @@ -21,64 +21,64 @@ const uint32_t utf8::MASK = 0x3Fu; // 0011 1111 uint32_t utf8::rune_to_bytes(uint32_t *str, rune c) { - // one byte sequence: 0-0x7F => 0xxxxxxx - if (c <= MAX_1BYTE_RUNE) - { - str[0] = PREFIX_1BYTE | c; - return 1; - } + // one byte sequence: 0-0x7F => 0xxxxxxx + if (c <= MAX_1BYTE_RUNE) + { + str[0] = PREFIX_1BYTE | c; + return 1; + } - // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx - if (c <= MAX_2BYTE_RUNE) - { - str[0] = PREFIX_2BYTE | (c >> 1*SHIFT); - str[1] = INFIX | (c & MASK); - return 2; - } + // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx + if (c <= MAX_2BYTE_RUNE) + { + str[0] = PREFIX_2BYTE | (c >> 1*SHIFT); + str[1] = INFIX | (c & MASK); + return 2; + } - // If the Rune is out of range, convert it to the error rune. - // Do this test here because the error rune encodes to three bytes. - // Doing it earlier would duplicate work, since an out of range - // Rune wouldn't have fit in one or two bytes. - if (c > MAX_RUNE) - c = ERROR; + // If the Rune is out of range, convert it to the error rune. + // Do this test here because the error rune encodes to three bytes. + // Doing it earlier would duplicate work, since an out of range + // Rune wouldn't have fit in one or two bytes. + if (c > MAX_RUNE) + c = ERROR; - // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx - if (c <= MAX_3BYTE_RUNE) - { - str[0] = PREFIX_3BYTE | (c >> 2*SHIFT); - str[1] = INFIX | ((c >> 1*SHIFT) & MASK); - str[2] = INFIX | (c & MASK); - return 3; - } + // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx + if (c <= MAX_3BYTE_RUNE) + { + str[0] = PREFIX_3BYTE | (c >> 2*SHIFT); + str[1] = INFIX | ((c >> 1*SHIFT) & MASK); + str[2] = INFIX | (c & MASK); + return 3; + } - // four byte sequence (21-bit value): - // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - str[0] = PREFIX_4BYTE | (c >> 3*SHIFT); - str[1] = INFIX | ((c >> 2*SHIFT) & MASK); - str[2] = INFIX | ((c >> 1*SHIFT) & MASK); - str[3] = INFIX | (c & MASK); - return 4; + // four byte sequence (21-bit value): + // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + str[0] = PREFIX_4BYTE | (c >> 3*SHIFT); + str[1] = INFIX | ((c >> 2*SHIFT) & MASK); + str[2] = INFIX | ((c >> 1*SHIFT) & MASK); + str[3] = INFIX | (c & MASK); + return 4; } uint32_t utf8::rune_length(rune r) { - if (r <= MAX_2BYTE_RUNE) - return r <= MAX_1BYTE_RUNE ? 1 : 2; - else - return r <= MAX_3BYTE_RUNE ? 3 : 4; + if (r <= MAX_2BYTE_RUNE) + return r <= MAX_1BYTE_RUNE ? 1 : 2; + else + return r <= MAX_3BYTE_RUNE ? 3 : 4; } utf8::rune utf8::max_rune(uint32_t i) { - switch (i) - { - case 1: return MAX_1BYTE_RUNE; - case 2: return MAX_2BYTE_RUNE; - case 3: return MAX_3BYTE_RUNE; - case 4: return MAX_4BYTE_RUNE; - default: return ERROR; - } + switch (i) + { + case 1: return MAX_1BYTE_RUNE; + case 2: return MAX_2BYTE_RUNE; + case 3: return MAX_3BYTE_RUNE; + case 4: return MAX_4BYTE_RUNE; + default: return ERROR; + } } } // namespace re2c diff --git a/re2c/src/re/encoding/utf8/utf8.h b/re2c/src/re/encoding/utf8/utf8.h index 50327766..efb0a309 100644 --- a/re2c/src/re/encoding/utf8/utf8.h +++ b/re2c/src/re/encoding/utf8/utf8.h @@ -8,39 +8,39 @@ namespace re2c { class utf8 { public: - typedef uint32_t rune; + typedef uint32_t rune; - // maximum characters per rune - // enum instead of static const member because of [-Wvla] - enum { MAX_RUNE_LENGTH = 4u }; + // maximum characters per rune + // enum instead of static const member because of [-Wvla] + enum { MAX_RUNE_LENGTH = 4u }; - // decoding error - static const uint32_t ERROR; + // decoding error + static const uint32_t ERROR; - // maximal runes for each rune length - static const rune MAX_1BYTE_RUNE; - static const rune MAX_2BYTE_RUNE; - static const rune MAX_3BYTE_RUNE; - static const rune MAX_4BYTE_RUNE; - static const rune MAX_RUNE; + // maximal runes for each rune length + static const rune MAX_1BYTE_RUNE; + static const rune MAX_2BYTE_RUNE; + static const rune MAX_3BYTE_RUNE; + static const rune MAX_4BYTE_RUNE; + static const rune MAX_RUNE; - static const uint32_t PREFIX_1BYTE; - static const uint32_t INFIX; - static const uint32_t PREFIX_2BYTE; - static const uint32_t PREFIX_3BYTE; - static const uint32_t PREFIX_4BYTE; + static const uint32_t PREFIX_1BYTE; + static const uint32_t INFIX; + static const uint32_t PREFIX_2BYTE; + static const uint32_t PREFIX_3BYTE; + static const uint32_t PREFIX_4BYTE; - static const uint32_t SHIFT; - static const uint32_t MASK; + static const uint32_t SHIFT; + static const uint32_t MASK; - // UTF-8 bytestring for given Unicode rune - static uint32_t rune_to_bytes(uint32_t * s, rune r); + // UTF-8 bytestring for given Unicode rune + static uint32_t rune_to_bytes(uint32_t * s, rune r); - // length of UTF-8 bytestring for given Unicode rune - static uint32_t rune_length(rune r); + // length of UTF-8 bytestring for given Unicode rune + static uint32_t rune_length(rune r); - // maximal Unicode rune with given length of UTF-8 bytestring - static rune max_rune(uint32_t i); + // maximal Unicode rune with given length of UTF-8 bytestring + static rune max_rune(uint32_t i); }; } // namespace re2c diff --git a/re2c/src/re/encoding/utf8/utf8_range.cc b/re2c/src/re/encoding/utf8/utf8_range.cc index 18ef7565..75fdf66d 100644 --- a/re2c/src/re/encoding/utf8/utf8_range.cc +++ b/re2c/src/re/encoding/utf8/utf8_range.cc @@ -12,33 +12,33 @@ namespace re2c { */ void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) { - uint32_t lcs[utf8::MAX_RUNE_LENGTH]; - uint32_t hcs[utf8::MAX_RUNE_LENGTH]; - utf8::rune_to_bytes(lcs, l); - utf8::rune_to_bytes(hcs, h); + uint32_t lcs[utf8::MAX_RUNE_LENGTH]; + uint32_t hcs[utf8::MAX_RUNE_LENGTH]; + utf8::rune_to_bytes(lcs, l); + utf8::rune_to_bytes(hcs, h); - RangeSuffix ** p = &root; - for (uint32_t i = 1; i <= n; ++i) - { - const uint32_t lc = lcs[n - i]; - const uint32_t hc = hcs[n - i]; - for (;;) - { - if (*p == NULL) - { - *p = new RangeSuffix(lc, hc); - p = &(*p)->child; - break; - } - else if ((*p)->l == lc && (*p)->h == hc) - { - p = &(*p)->child; - break; - } - else - p = &(*p)->next; - } - } + RangeSuffix ** p = &root; + for (uint32_t i = 1; i <= n; ++i) + { + const uint32_t lc = lcs[n - i]; + const uint32_t hc = hcs[n - i]; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(lc, hc); + p = &(*p)->child; + break; + } + else if ((*p)->l == lc && (*p)->h == hc) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + } } /* @@ -52,8 +52,8 @@ void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_ * This is only possible if for all i > 1: * if L_i /= H_i, then L_(i+1) == 0x80 and H_(i+1) == 0xbf. * This condition ensures that: - * 1) all possible UTF-8 sequences between L and H are allowed - * 2) no byte ranges [b1 - b2] appear, such that b1 > b2 + * 1) all possible UTF-8 sequences between L and H are allowed + * 2) no byte ranges [b1 - b2] appear, such that b1 > b2 * * E.g.: * [\U000e0031-\U000e0043] => [f3-f3],[a0-a0],[80-81],[b1-83]. @@ -69,26 +69,26 @@ void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_ */ void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) { - for (uint32_t i = 1; i < n; ++i) - { - uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence - if ((l & ~m) != (h & ~m)) - { - if ((l & m) != 0) - { - UTF8splitByContinuity(root, l, l | m, n); - UTF8splitByContinuity(root, (l | m) + 1, h, n); - return; - } - if ((h & m) != m) - { - UTF8splitByContinuity(root, l, (h & ~m) - 1, n); - UTF8splitByContinuity(root, h & ~m, h, n); - return; - } - } - } - UTF8addContinuous(root, l, h, n); + for (uint32_t i = 1; i < n; ++i) + { + uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence + if ((l & ~m) != (h & ~m)) + { + if ((l & m) != 0) + { + UTF8splitByContinuity(root, l, l | m, n); + UTF8splitByContinuity(root, (l | m) + 1, h, n); + return; + } + if ((h & m) != m) + { + UTF8splitByContinuity(root, l, (h & ~m) - 1, n); + UTF8splitByContinuity(root, h & ~m, h, n); + return; + } + } + } + UTF8addContinuous(root, l, h, n); } /* @@ -102,14 +102,14 @@ void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uin */ void UTF8splitByRuneLength(RangeSuffix * & root, utf8::rune l, utf8::rune h) { - const uint32_t nh = utf8::rune_length(h); - for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl) - { - utf8::rune r = utf8::max_rune(nl); - UTF8splitByContinuity(root, l, r, nl); - l = r + 1; - } - UTF8splitByContinuity(root, l, h, nh); + const uint32_t nh = utf8::rune_length(h); + for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl) + { + utf8::rune r = utf8::max_rune(nl); + UTF8splitByContinuity(root, l, r, nl); + l = r + 1; + } + UTF8splitByContinuity(root, l, h, nh); } } // namespace re2c diff --git a/re2c/src/re/encoding/utf8/utf8_regexp.cc b/re2c/src/re/encoding/utf8/utf8_regexp.cc index 370663dd..62b94723 100644 --- a/re2c/src/re/encoding/utf8/utf8_regexp.cc +++ b/re2c/src/re/encoding/utf8/utf8_regexp.cc @@ -10,13 +10,13 @@ namespace re2c { RE *UTF8Symbol(RE::alc_t &alc, utf8::rune r) { - uint32_t chars[utf8::MAX_RUNE_LENGTH]; - const uint32_t chars_count = utf8::rune_to_bytes(chars, r); - RE *re = re_sym(alc, Range::sym(chars[0])); - for (uint32_t i = 1; i < chars_count; ++i) { - re = re_cat(alc, re, re_sym(alc, Range::sym(chars[i]))); - } - return re; + uint32_t chars[utf8::MAX_RUNE_LENGTH]; + const uint32_t chars_count = utf8::rune_to_bytes(chars, r); + RE *re = re_sym(alc, Range::sym(chars[0])); + for (uint32_t i = 1; i < chars_count; ++i) { + re = re_cat(alc, re, re_sym(alc, Range::sym(chars[i]))); + } + return re; } /* @@ -27,10 +27,10 @@ RE *UTF8Symbol(RE::alc_t &alc, utf8::rune r) */ RE *UTF8Range(RE::alc_t &alc, const Range *r) { - RangeSuffix * root = NULL; - for (; r != NULL; r = r->next ()) - UTF8splitByRuneLength(root, r->lower (), r->upper () - 1); - return to_regexp(alc, root); + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF8splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp(alc, root); } } // namespace re2c diff --git a/re2c/src/re/fixed_tags.cc b/re2c/src/re/fixed_tags.cc index 03d5f495..27330d2e 100644 --- a/re2c/src/re/fixed_tags.cc +++ b/re2c/src/re/fixed_tags.cc @@ -29,56 +29,56 @@ namespace re2c { */ static void find_fixed_tags(RE *re, std::vector &tags, - size_t &dist, size_t &base, bool toplevel) + size_t &dist, size_t &base, bool toplevel) { - switch (re->type) { - case RE::NIL: break; - case RE::SYM: - if (dist != Tag::VARDIST) ++dist; - break; - case RE::ALT: { - size_t d1 = dist, d2 = dist; - find_fixed_tags(re->alt.re1, tags, d1, base, false); - find_fixed_tags(re->alt.re2, tags, d2, base, false); - dist = (d1 == d2) ? d1 : Tag::VARDIST; - break; - } - case RE::CAT: - find_fixed_tags(re->cat.re2, tags, dist, base, toplevel); - find_fixed_tags(re->cat.re1, tags, dist, base, toplevel); - break; - case RE::ITER: - find_fixed_tags(re->iter.re, tags, dist, base, false); - dist = Tag::VARDIST; - break; - case RE::TAG: { - // see note [fixed and variable tags] - Tag &tag = tags[re->tag.idx]; - if (fictive(tag)) { - tag.base = tag.dist = 0; - } else if (toplevel && dist != Tag::VARDIST && !history(tag)) { - tag.base = base; - tag.dist = dist; - } else if (toplevel) { - base = re->tag.idx; - dist = 0; - } - if (trailing(tag)) dist = 0; - break; - } - } + switch (re->type) { + case RE::NIL: break; + case RE::SYM: + if (dist != Tag::VARDIST) ++dist; + break; + case RE::ALT: { + size_t d1 = dist, d2 = dist; + find_fixed_tags(re->alt.re1, tags, d1, base, false); + find_fixed_tags(re->alt.re2, tags, d2, base, false); + dist = (d1 == d2) ? d1 : Tag::VARDIST; + break; + } + case RE::CAT: + find_fixed_tags(re->cat.re2, tags, dist, base, toplevel); + find_fixed_tags(re->cat.re1, tags, dist, base, toplevel); + break; + case RE::ITER: + find_fixed_tags(re->iter.re, tags, dist, base, false); + dist = Tag::VARDIST; + break; + case RE::TAG: { + // see note [fixed and variable tags] + Tag &tag = tags[re->tag.idx]; + if (fictive(tag)) { + tag.base = tag.dist = 0; + } else if (toplevel && dist != Tag::VARDIST && !history(tag)) { + tag.base = base; + tag.dist = dist; + } else if (toplevel) { + base = re->tag.idx; + dist = 0; + } + if (trailing(tag)) dist = 0; + break; + } + } } void find_fixed_tags(RESpec &spec) { - const bool generic = spec.opts->input_api == INPUT_CUSTOM; - std::vector::iterator - i = spec.res.begin(), - e = spec.res.end(); - for (; i != e; ++i) { - size_t base = Tag::RIGHTMOST, dist = 0; - find_fixed_tags(*i, spec.tags, dist, base, !generic); - } + const bool generic = spec.opts->input_api == INPUT_CUSTOM; + std::vector::iterator + i = spec.res.begin(), + e = spec.res.end(); + for (; i != e; ++i) { + size_t base = Tag::RIGHTMOST, dist = 0; + find_fixed_tags(*i, spec.tags, dist, base, !generic); + } } } // namespace re2c diff --git a/re2c/src/re/nullable.cc b/re2c/src/re/nullable.cc index 40cb7921..f4e2d323 100644 --- a/re2c/src/re/nullable.cc +++ b/re2c/src/re/nullable.cc @@ -12,24 +12,24 @@ namespace re2c { static bool nullable(const RESpec &spec, const RE *re, bool &trail) { - if (trail) return true; + if (trail) return true; - switch (re->type) { - case RE::NIL: return true; - case RE::SYM: return false; - case RE::ITER: - return nullable(spec, re->iter.re, trail); - case RE::TAG: - trail |= trailing(spec.tags[re->tag.idx]); - return true; - case RE::ALT: - return nullable(spec, re->alt.re1, trail) - || nullable(spec, re->alt.re2, trail); - case RE::CAT: - return nullable(spec, re->cat.re1, trail) - && nullable(spec, re->cat.re2, trail); - } - return false; /* unreachable */ + switch (re->type) { + case RE::NIL: return true; + case RE::SYM: return false; + case RE::ITER: + return nullable(spec, re->iter.re, trail); + case RE::TAG: + trail |= trailing(spec.tags[re->tag.idx]); + return true; + case RE::ALT: + return nullable(spec, re->alt.re1, trail) + || nullable(spec, re->alt.re2, trail); + case RE::CAT: + return nullable(spec, re->cat.re1, trail) + && nullable(spec, re->cat.re2, trail); + } + return false; /* unreachable */ } /* @@ -39,13 +39,13 @@ static bool nullable(const RESpec &spec, const RE *re, bool &trail) */ void warn_nullable(const RESpec &spec, const std::string &cond) { - const size_t nre = spec.res.size(); - for (size_t i = 0; i < nre; ++i) { - bool trail = false; - if (nullable(spec, spec.res[i], trail)) { - spec.warn.match_empty_string(spec.rules[i].code->fline, cond); - } - } + const size_t nre = spec.res.size(); + for (size_t i = 0; i < nre; ++i) { + bool trail = false; + if (nullable(spec, spec.res[i], trail)) { + spec.warn.match_empty_string(spec.rules[i].code->fline, cond); + } + } } } // namespace re2c diff --git a/re2c/src/re/re.h b/re2c/src/re/re.h index c30cd0ff..e7137d9e 100644 --- a/re2c/src/re/re.h +++ b/re2c/src/re/re.h @@ -15,39 +15,39 @@ namespace re2c struct RE { - typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t; - enum type_t {NIL, SYM, ALT, CAT, ITER, TAG} type; - union { - const Range *sym; - struct { - RE *re1; - RE *re2; - } alt; - struct { - RE *re1; - RE *re2; - } cat; - struct { - RE *re; - uint32_t min; - uint32_t max; - } iter; - tag_info_t tag; - }; + typedef slab_allocator_t<~0u, 4096, sizeof(void*)> alc_t; + enum type_t {NIL, SYM, ALT, CAT, ITER, TAG} type; + union { + const Range *sym; + struct { + RE *re1; + RE *re2; + } alt; + struct { + RE *re1; + RE *re2; + } cat; + struct { + RE *re; + uint32_t min; + uint32_t max; + } iter; + tag_info_t tag; + }; }; struct RESpec { - RE::alc_t alc; - std::vector res; - std::vector &charset; - std::vector &tags; - std::valarray &rules; - const opt_t *opts; - Warn &warn; - - explicit RESpec(const std::vector &ast, const opt_t *o, Warn &w); - FORBID_COPY(RESpec); + RE::alc_t alc; + std::vector res; + std::vector &charset; + std::vector &tags; + std::valarray &rules; + const opt_t *opts; + Warn &warn; + + explicit RESpec(const std::vector &ast, const opt_t *o, Warn &w); + FORBID_COPY(RESpec); }; void split_charset(RESpec &spec); @@ -57,64 +57,64 @@ void warn_nullable(const RESpec &spec, const std::string &cond); inline RE *re_nil(RE::alc_t &alc) { - RE *x = alc.alloct(1); - x->type = RE::NIL; - return x; + RE *x = alc.alloct(1); + x->type = RE::NIL; + return x; } inline RE *re_sym(RE::alc_t &alc, const Range *r) { - RE *x = alc.alloct(1); - x->type = RE::SYM; - x->sym = r; - return x; + RE *x = alc.alloct(1); + x->type = RE::SYM; + x->sym = r; + return x; } inline RE *re_alt(RE::alc_t &alc, RE *x, RE *y) { - if (!x) return y; - if (!y) return x; - if (x->type == RE::SYM && y->type == RE::SYM) { - return re_sym(alc, Range::add(x->sym, y->sym)); - } - - RE *z = alc.alloct(1); - z->type = RE::ALT; - z->alt.re1 = x; - z->alt.re2 = y; - return z; + if (!x) return y; + if (!y) return x; + if (x->type == RE::SYM && y->type == RE::SYM) { + return re_sym(alc, Range::add(x->sym, y->sym)); + } + + RE *z = alc.alloct(1); + z->type = RE::ALT; + z->alt.re1 = x; + z->alt.re2 = y; + return z; } inline RE *re_cat(RE::alc_t &alc, RE *x, RE *y) { - if (!x) return y; - if (!y) return x; - - RE *z = alc.alloct(1); - z->type = RE::CAT; - z->cat.re1 = x; - z->cat.re2 = y; - return z; + if (!x) return y; + if (!y) return x; + + RE *z = alc.alloct(1); + z->type = RE::CAT; + z->cat.re1 = x; + z->cat.re2 = y; + return z; } inline RE *re_iter(RE::alc_t &alc, RE *x, uint32_t n, uint32_t m) { - RE *y = alc.alloct(1); - y->type = RE::ITER; - y->iter.re = x; - y->iter.min = n; - y->iter.max = m; - return y; + RE *y = alc.alloct(1); + y->type = RE::ITER; + y->iter.re = x; + y->iter.min = n; + y->iter.max = m; + return y; } inline RE *re_tag(RE::alc_t &alc, size_t idx, bool neg) { - RE *x = alc.alloct(1); - x->type = RE::TAG; - x->tag.idx = idx & 0x7FFFffff; - assert(idx == x->tag.idx); - x->tag.neg = neg; - return x; + RE *x = alc.alloct(1); + x->type = RE::TAG; + x->tag.idx = idx & 0x7FFFffff; + assert(idx == x->tag.idx); + x->tag.neg = neg; + return x; } } // namespace re2c diff --git a/re2c/src/re/rule.h b/re2c/src/re/rule.h index 803281cc..e53f2d84 100644 --- a/re2c/src/re/rule.h +++ b/re2c/src/re/rule.h @@ -14,74 +14,74 @@ namespace re2c struct Code { - static free_list flist; + static free_list flist; - std::string fname; - uint32_t fline; - bool autogen; - const std::string text; - std::string cond; + std::string fname; + uint32_t fline; + bool autogen; + const std::string text; + std::string cond; - Code(const std::string &file, uint32_t line) - : fname(file) - , fline(line) - , autogen(true) - , text("") - , cond("") - { - flist.insert(this); - } - Code(const std::string &file, uint32_t line, const char *s, size_t slen) - : fname(file) - , fline(line) - , autogen(false) - , text(s, slen) - , cond("") - { - flist.insert(this); - } - ~Code() - { - flist.erase(this); - } + Code(const std::string &file, uint32_t line) + : fname(file) + , fline(line) + , autogen(true) + , text("") + , cond("") + { + flist.insert(this); + } + Code(const std::string &file, uint32_t line, const char *s, size_t slen) + : fname(file) + , fline(line) + , autogen(false) + , text(s, slen) + , cond("") + { + flist.insert(this); + } + ~Code() + { + flist.erase(this); + } }; struct Rule { - static const size_t NONE; + static const size_t NONE; - const Code *code; - std::set shadow; + const Code *code; + std::set shadow; - // tags - size_t ltag; // first - size_t htag; // next to last - size_t ttag; // trailing context + // tags + size_t ltag; // first + size_t htag; // next to last + size_t ttag; // trailing context - size_t ncap; // number of POSIX captures + size_t ncap; // number of POSIX captures - Rule(): code(NULL), shadow(), - ltag(0), htag(0), ttag(0), ncap(0) {} + Rule(): code(NULL), shadow(), + ltag(0), htag(0), ttag(0), ncap(0) {} - // copy ctor and assignment are required for containers on macOS - Rule(const Rule &r) - : code(r.code) - , shadow(r.shadow) - , ltag(r.ltag) - , htag(r.htag) - , ttag(r.ttag) - , ncap(r.ncap) - {} - Rule& operator= (const Rule &r) - { - code = r.code; - shadow = r.shadow; - ltag = r.ltag; - htag = r.htag; - ttag = r.ttag; - ncap = r.ncap; - return *this; - } + // copy ctor and assignment are required for containers on macOS + Rule(const Rule &r) + : code(r.code) + , shadow(r.shadow) + , ltag(r.ltag) + , htag(r.htag) + , ttag(r.ttag) + , ncap(r.ncap) + {} + Rule& operator= (const Rule &r) + { + code = r.code; + shadow = r.shadow; + ltag = r.ltag; + htag = r.htag; + ttag = r.ttag; + ncap = r.ncap; + return *this; + } }; } // namespace re2c diff --git a/re2c/src/re/split_charset.cc b/re2c/src/re/split_charset.cc index d2bef13a..fda1ee57 100644 --- a/re2c/src/re/split_charset.cc +++ b/re2c/src/re/split_charset.cc @@ -19,42 +19,42 @@ namespace re2c { */ void split_charset(RESpec &spec) { - std::set cs; - std::stack todo; + std::set cs; + std::stack todo; - std::vector::const_iterator - i = spec.res.begin(), - e = spec.res.end(); - for (; i != e; ++i) todo.push(*i); - while (!todo.empty()) { - const RE *re = todo.top(); - todo.pop(); - switch (re->type) { - case RE::NIL: break; - case RE::TAG: break; - case RE::SYM: - for (const Range *r = re->sym; r; r = r->next()) { - cs.insert(r->lower()); - cs.insert(r->upper()); - } - break; - case RE::ALT: - todo.push(re->alt.re2); - todo.push(re->alt.re1); - break; - case RE::CAT: - todo.push(re->cat.re2); - todo.push(re->cat.re1); - break; - case RE::ITER: - todo.push(re->iter.re); - break; - } - } - cs.insert(0); - cs.insert(spec.opts->encoding.nCodeUnits()); + std::vector::const_iterator + i = spec.res.begin(), + e = spec.res.end(); + for (; i != e; ++i) todo.push(*i); + while (!todo.empty()) { + const RE *re = todo.top(); + todo.pop(); + switch (re->type) { + case RE::NIL: break; + case RE::TAG: break; + case RE::SYM: + for (const Range *r = re->sym; r; r = r->next()) { + cs.insert(r->lower()); + cs.insert(r->upper()); + } + break; + case RE::ALT: + todo.push(re->alt.re2); + todo.push(re->alt.re1); + break; + case RE::CAT: + todo.push(re->cat.re2); + todo.push(re->cat.re1); + break; + case RE::ITER: + todo.push(re->iter.re); + break; + } + } + cs.insert(0); + cs.insert(spec.opts->encoding.nCodeUnits()); - spec.charset.insert(spec.charset.end(), cs.begin(), cs.end()); + spec.charset.insert(spec.charset.end(), cs.begin(), cs.end()); } } // namespace re2c diff --git a/re2c/src/re/tag.cc b/re2c/src/re/tag.cc index 7c2b4d13..11da36a3 100644 --- a/re2c/src/re/tag.cc +++ b/re2c/src/re/tag.cc @@ -11,24 +11,24 @@ const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1; Tag::Tag(const std::string *nm, bool hi, int32_t ht) - : name(nm) - , ncap(Tag::RIGHTMOST) - , base(Tag::RIGHTMOST) - , dist(Tag::VARDIST) - , history(hi) - , orbit(false) - , height(ht) + : name(nm) + , ncap(Tag::RIGHTMOST) + , base(Tag::RIGHTMOST) + , dist(Tag::VARDIST) + , history(hi) + , orbit(false) + , height(ht) {} Tag::Tag(size_t nc, bool ob, int32_t ht) - : name(NULL) - , ncap(nc) - , base(Tag::RIGHTMOST) - , dist(Tag::VARDIST) - , history(false) - , orbit(ob) - , height(ht) + : name(NULL) + , ncap(nc) + , base(Tag::RIGHTMOST) + , dist(Tag::VARDIST) + , history(false) + , orbit(ob) + , height(ht) {} } // namespace re2c diff --git a/re2c/src/re/tag.h b/re2c/src/re/tag.h index e44295b4..342bf982 100644 --- a/re2c/src/re/tag.h +++ b/re2c/src/re/tag.h @@ -18,64 +18,64 @@ static const tagver_t TAGVER_CURSOR = std::numeric_limits::max(); // c struct tag_info_t { - uint32_t idx : 31; - uint32_t neg : 1; + uint32_t idx : 31; + uint32_t neg : 1; }; struct Tag { - static const size_t RIGHTMOST; - static const size_t VARDIST; - static const size_t FICTIVE; - - const std::string *name; - size_t ncap; - size_t base; - size_t dist; - bool history; - bool orbit; - int32_t height; - - Tag(const std::string *nm, bool hi, int32_t ht); - Tag(size_t nc, bool ob, int32_t ht); + static const size_t RIGHTMOST; + static const size_t VARDIST; + static const size_t FICTIVE; + + const std::string *name; + size_t ncap; + size_t base; + size_t dist; + bool history; + bool orbit; + int32_t height; + + Tag(const std::string *nm, bool hi, int32_t ht); + Tag(size_t nc, bool ob, int32_t ht); }; inline bool operator == (const tag_info_t &x, const tag_info_t &y) { - // per-component comparison is slower - RE2C_STATIC_ASSERT(sizeof(tag_info_t) == sizeof(uint32_t)); - return *reinterpret_cast(&x) - == *reinterpret_cast(&y); + // per-component comparison is slower + RE2C_STATIC_ASSERT(sizeof(tag_info_t) == sizeof(uint32_t)); + return *reinterpret_cast(&x) + == *reinterpret_cast(&y); } inline bool fixed(const Tag &tag) { - return tag.dist != Tag::VARDIST; + return tag.dist != Tag::VARDIST; } inline bool fictive(const Tag &tag) { - return tag.ncap == Tag::FICTIVE; + return tag.ncap == Tag::FICTIVE; } inline bool capture(const Tag &tag) { - return tag.ncap != Tag::RIGHTMOST; + return tag.ncap != Tag::RIGHTMOST; } inline bool orbit(const Tag &tag) { - return tag.orbit; + return tag.orbit; } inline bool trailing(const Tag &tag) { - return !capture(tag) && tag.name == NULL; + return !capture(tag) && tag.name == NULL; } inline bool history(const Tag &tag) { - return tag.history; + return tag.history; } } // namespace re2c diff --git a/re2c/src/skeleton/control_flow.cc b/re2c/src/skeleton/control_flow.cc index 138def51..89bf7b9c 100644 --- a/re2c/src/skeleton/control_flow.cc +++ b/re2c/src/skeleton/control_flow.cc @@ -24,93 +24,93 @@ typedef u32lim_t<1024> ucf_size_t; // ~1Kb // UCF stands for 'undefined control flow' struct ucf_t { - std::valarray loops; - std::vector paths; - path_t prefix; - ucf_size_t size; + std::valarray loops; + std::vector paths; + path_t prefix; + ucf_size_t size; - explicit ucf_t(size_t nnodes): loops(nnodes), paths(), - prefix(0), size(ucf_size_t::from32(0u)) {} + explicit ucf_t(size_t nnodes): loops(nnodes), paths(), + prefix(0), size(ucf_size_t::from32(0u)) {} }; // We don't need all patterns that cause undefined behaviour. // We only need some examples, the shorter the better. static void naked_paths( - const Skeleton &skel, - ucf_t &ucf, - size_t i) + const Skeleton &skel, + ucf_t &ucf, + size_t i) { - const Node &node = skel.nodes[i]; - bool &loop = ucf.loops[i]; - path_t &prefix = ucf.prefix; - ucf_size_t &size = ucf.size; + const Node &node = skel.nodes[i]; + bool &loop = ucf.loops[i]; + path_t &prefix = ucf.prefix; + ucf_size_t &size = ucf.size; - if (node.rule != Rule::NONE) { - return; - } else if (node.end()) { - ucf.paths.push_back(prefix); - size = size + ucf_size_t::from64(prefix.len()); - } else if (!loop) { - loop = true; - Node::arcs_t::const_iterator - arc = node.arcs.begin(), - end = node.arcs.end(); - for (; arc != end && !size.overflow(); ++arc) { - const size_t j = arc->first; - prefix.push(j); - naked_paths(skel, ucf, j); - prefix.pop(); - } - } + if (node.rule != Rule::NONE) { + return; + } else if (node.end()) { + ucf.paths.push_back(prefix); + size = size + ucf_size_t::from64(prefix.len()); + } else if (!loop) { + loop = true; + Node::arcs_t::const_iterator + arc = node.arcs.begin(), + end = node.arcs.end(); + for (; arc != end && !size.overflow(); ++arc) { + const size_t j = arc->first; + prefix.push(j); + naked_paths(skel, ucf, j); + prefix.pop(); + } + } } void warn_undefined_control_flow(const Skeleton &skel, Warn &warn) { - ucf_t ucf(skel.nodes_count); - naked_paths(skel, ucf, 0); - if (!ucf.paths.empty()) { - warn.undefined_control_flow(skel, ucf.paths, ucf.size.overflow()); - } else if (ucf.size.overflow()) { - warn.fail(Warn::UNDEFINED_CONTROL_FLOW, skel.line, - "DFA is too large to check undefined control flow"); - } + ucf_t ucf(skel.nodes_count); + naked_paths(skel, ucf, 0); + if (!ucf.paths.empty()) { + warn.undefined_control_flow(skel, ucf.paths, ucf.size.overflow()); + } else if (ucf.size.overflow()) { + warn.fail(Warn::UNDEFINED_CONTROL_FLOW, skel.line, + "DFA is too large to check undefined control flow"); + } } static void fprint_default_arc(FILE *f, const Node::arc_t &arc) { - const size_t ranges = arc.size(); - if (ranges == 1 && arc[0].lower == arc[0].upper) { - fprintf(f, "\\x%X", arc[0].lower); - } else { - fprintf(f, "["); - for (size_t i = 0; i < ranges; ++i) { - const uint32_t - l = arc[i].lower, - u = arc[i].upper; - fprintf(f, "\\x%X", l); - if (l != u) { - fprintf(f, "-\\x%X", u); - } - } - fprintf(f, "]"); - } + const size_t ranges = arc.size(); + if (ranges == 1 && arc[0].lower == arc[0].upper) { + fprintf(f, "\\x%X", arc[0].lower); + } else { + fprintf(f, "["); + for (size_t i = 0; i < ranges; ++i) { + const uint32_t + l = arc[i].lower, + u = arc[i].upper; + fprintf(f, "\\x%X", l); + if (l != u) { + fprintf(f, "-\\x%X", u); + } + } + fprintf(f, "]"); + } } void fprint_default_path( - FILE *f, - const Skeleton &skel, - const path_t &p) + FILE *f, + const Skeleton &skel, + const path_t &p) { - fprintf(f, "'"); - const size_t len = p.len(); - for (size_t i = 0; i < len; ++i) { - if (i > 0) { - fprintf(f, " "); - } - const Node::arc_t &arc = p.arc(skel, i); - fprint_default_arc(stderr, arc); - } - fprintf(f, "'"); + fprintf(f, "'"); + const size_t len = p.len(); + for (size_t i = 0; i < len; ++i) { + if (i > 0) { + fprintf(f, " "); + } + const Node::arc_t &arc = p.arc(skel, i); + fprint_default_arc(stderr, arc); + } + fprintf(f, "'"); } } // namespace re2c diff --git a/re2c/src/skeleton/generate_code.cc b/re2c/src/skeleton/generate_code.cc index ca85d585..475d9bf5 100644 --- a/re2c/src/skeleton/generate_code.cc +++ b/re2c/src/skeleton/generate_code.cc @@ -20,502 +20,502 @@ namespace re2c static void exact_uint(OutputFile &o, size_t width) { - if (width == sizeof(char)) { - o.ws("unsigned char"); - } else if (width == sizeof(short)) { - o.ws("unsigned short"); - } else if (width == sizeof(int)) { - o.ws("unsigned int"); - } else if (width == sizeof(long)) { - o.ws("unsigned long"); - } else { - o.ws("uint").wu64(width * 8).ws("_t"); - } + if (width == sizeof(char)) { + o.ws("unsigned char"); + } else if (width == sizeof(short)) { + o.ws("unsigned short"); + } else if (width == sizeof(int)) { + o.ws("unsigned int"); + } else if (width == sizeof(long)) { + o.ws("unsigned long"); + } else { + o.ws("uint").wu64(width * 8).ws("_t"); + } } static void from_le(OutputFile &o, uint32_t ind, size_t size, const char *expr) { - o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */"); - o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";"); - o.ws("\n").wind(ind).ws(expr).ws(" = p[0]"); - for (uint32_t i = 1; i < size; ++i) { - o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)"); - } - o.ws(";"); + o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */"); + o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";"); + o.ws("\n").wind(ind).ws(expr).ws(" = p[0]"); + for (uint32_t i = 1; i < size; ++i) { + o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)"); + } + o.ws(";"); } void emit_prolog(OutputFile &o) { - o.ws("\n#include /* size_t */"); - o.ws("\n#include "); - o.ws("\n#include /* malloc, free */"); - o.ws("\n#include /* memcpy */"); - o.ws("\n"); - o.ws("\nstatic void *read_file"); - o.ws("\n").wind(1).ws("( const char *fname"); - o.ws("\n").wind(1).ws(", size_t unit"); - o.ws("\n").wind(1).ws(", size_t padding"); - o.ws("\n").wind(1).ws(", size_t *pfsize"); - o.ws("\n").wind(1).ws(")"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("void *buffer = NULL;"); - o.ws("\n").wind(1).ws("size_t fsize = 0;"); - o.ws("\n"); - o.ws("\n").wind(1).ws("/* open file */"); - o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");"); - o.ws("\n").wind(1).ws("if(f == NULL) {"); - o.ws("\n").wind(2).ws("goto error;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - o.ws("\n").wind(1).ws("/* get file size */"); - o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);"); - o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;"); - o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);"); - o.ws("\n"); - o.ws("\n").wind(1).ws("/* allocate memory for file and padding */"); - o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));"); - o.ws("\n").wind(1).ws("if (buffer == NULL) {"); - o.ws("\n").wind(2).ws("goto error;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - o.ws("\n").wind(1).ws("/* read the whole file in memory */"); - o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {"); - o.ws("\n").wind(2).ws("goto error;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - o.ws("\n").wind(1).ws("fclose(f);"); - o.ws("\n").wind(1).ws("*pfsize = fsize;"); - o.ws("\n").wind(1).ws("return buffer;"); - o.ws("\n"); - o.ws("\nerror:"); - o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);"); - o.ws("\n").wind(1).ws("free(buffer);"); - o.ws("\n").wind(1).ws("if (f != NULL) {"); - o.ws("\n").wind(2).ws("fclose(f);"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("return NULL;"); - o.ws("\n}"); - o.ws("\n"); + o.ws("\n#include /* size_t */"); + o.ws("\n#include "); + o.ws("\n#include /* malloc, free */"); + o.ws("\n#include /* memcpy */"); + o.ws("\n"); + o.ws("\nstatic void *read_file"); + o.ws("\n").wind(1).ws("( const char *fname"); + o.ws("\n").wind(1).ws(", size_t unit"); + o.ws("\n").wind(1).ws(", size_t padding"); + o.ws("\n").wind(1).ws(", size_t *pfsize"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("void *buffer = NULL;"); + o.ws("\n").wind(1).ws("size_t fsize = 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* open file */"); + o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");"); + o.ws("\n").wind(1).ws("if(f == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* get file size */"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);"); + o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* allocate memory for file and padding */"); + o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));"); + o.ws("\n").wind(1).ws("if (buffer == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* read the whole file in memory */"); + o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("fclose(f);"); + o.ws("\n").wind(1).ws("*pfsize = fsize;"); + o.ws("\n").wind(1).ws("return buffer;"); + o.ws("\n"); + o.ws("\nerror:"); + o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);"); + o.ws("\n").wind(1).ws("free(buffer);"); + o.ws("\n").wind(1).ws("if (f != NULL) {"); + o.ws("\n").wind(2).ws("fclose(f);"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("return NULL;"); + o.ws("\n}"); + o.ws("\n"); } void emit_start(OutputFile &o, size_t maxfill, size_t maxnmatch, const std::string &name, - size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker, - const std::set &stagnames, const std::set &stagvars, - const std::set &mtagnames, const std::set &mtagvars, - bitmaps_t &bitmaps) + size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker, + const std::set &stagnames, const std::set &stagvars, + const std::set &mtagnames, const std::set &mtagvars, + bitmaps_t &bitmaps) { - const opt_t *opts = o.block().opts; - const size_t sizeof_cunit = opts->encoding.szCodeUnit(); - const uint64_t norule = rule2key(Rule::NONE, sizeof_key, def); - std::string filename = opts->output_file; - if (filename.empty()) { - filename = ""; - } + const opt_t *opts = o.block().opts; + const size_t sizeof_cunit = opts->encoding.szCodeUnit(); + const uint64_t norule = rule2key(Rule::NONE, sizeof_key, def); + std::string filename = opts->output_file; + if (filename.empty()) { + filename = ""; + } - o.ws("\n#define YYCTYPE "); - exact_uint (o, sizeof_cunit); - o.ws("\n#define YYKEYTYPE "); - exact_uint (o, sizeof_key); - o.ws("\n#define YYPEEK() *cursor"); - o.ws("\n#define YYSKIP() ++cursor"); - if (backup) { - o.ws("\n#define YYBACKUP() marker = cursor"); - o.ws("\n#define YYRESTORE() cursor = marker"); - } - if (oldstyle_ctxmarker) { - o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor"); - o.ws("\n#define YYRESTORECTX() cursor = ctxmarker"); - } - if (opts->tags) { - o.ws("\n#define YYSTAGP(t) t = cursor"); - o.ws("\n#define YYSTAGN(t) t = NULL"); - o.ws("\n#define YYMTAGP(t) yymtag(&t, cursor, &yytp)"); - o.ws("\n#define YYMTAGN(t) yymtag(&t, NULL, &yytp)"); - o.ws("\n#define YYRESTORETAG(t) cursor = t"); - } - o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n"); - o.ws("\n#define YYFILL(n) { break; }"); - o.ws("\n"); + o.ws("\n#define YYCTYPE "); + exact_uint (o, sizeof_cunit); + o.ws("\n#define YYKEYTYPE "); + exact_uint (o, sizeof_key); + o.ws("\n#define YYPEEK() *cursor"); + o.ws("\n#define YYSKIP() ++cursor"); + if (backup) { + o.ws("\n#define YYBACKUP() marker = cursor"); + o.ws("\n#define YYRESTORE() cursor = marker"); + } + if (oldstyle_ctxmarker) { + o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor"); + o.ws("\n#define YYRESTORECTX() cursor = ctxmarker"); + } + if (opts->tags) { + o.ws("\n#define YYSTAGP(t) t = cursor"); + o.ws("\n#define YYSTAGN(t) t = NULL"); + o.ws("\n#define YYMTAGP(t) yymtag(&t, cursor, &yytp)"); + o.ws("\n#define YYMTAGN(t) yymtag(&t, NULL, &yytp)"); + o.ws("\n#define YYRESTORETAG(t) cursor = t"); + } + o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n"); + o.ws("\n#define YYFILL(n) { break; }"); + o.ws("\n"); - o.ws("\nstatic int action_").wstring(name); - o.ws("\n").wind(1).ws("( unsigned *pkix"); - o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys"); - o.ws("\n").wind(1).ws(", const YYCTYPE *start"); - o.ws("\n").wind(1).ws(", const YYCTYPE *token"); - o.ws("\n").wind(1).ws(", const YYCTYPE **cursor"); - o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act"); - o.ws("\n").wind(1).ws(")"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); - o.ws("\n").wind(1).ws("const long pos = token - start;"); - o.ws("\n").wind(1).ws("const long len_act = *cursor - token;"); - o.ws("\n").wind(1).ws("const long len_exp = (long) keys[kix + 1];"); - o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys[kix + 2];"); - o.ws("\n").wind(1).ws("*pkix = kix + 3;"); - o.ws("\n").wind(1).ws("if (rule_exp == ").wu64(norule).ws(") {"); - o.ws("\n").wind(2).ws("fprintf"); - o.ws("\n").wind(3).ws("( stderr"); - o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\""); - o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\""); - o.ws("\n").wind(3).ws(", pos"); - o.ws("\n").wind(3).ws(");"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {"); - o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[kix];"); - o.ws("\n").wind(2).ws("*cursor = token + offset;"); - o.ws("\n").wind(2).ws("return 0;"); - o.ws("\n").wind(1).ws("} else {"); - o.ws("\n").wind(2).ws("fprintf"); - o.ws("\n").wind(3).ws("( stderr"); - o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (key %u):\\n\""); - o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\""); - o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\""); - o.ws("\n").wind(3).ws(", pos"); - o.ws("\n").wind(3).ws(", kix"); - o.ws("\n").wind(3).ws(", len_exp"); - o.ws("\n").wind(3).ws(", rule_exp"); - o.ws("\n").wind(3).ws(", len_act"); - o.ws("\n").wind(3).ws(", rule_act"); - o.ws("\n").wind(3).ws(");"); - o.ws("\n").wind(2).ws("return 1;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n}"); + o.ws("\nstatic int action_").wstring(name); + o.ws("\n").wind(1).ws("( unsigned *pkix"); + o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys"); + o.ws("\n").wind(1).ws(", const YYCTYPE *start"); + o.ws("\n").wind(1).ws(", const YYCTYPE *token"); + o.ws("\n").wind(1).ws(", const YYCTYPE **cursor"); + o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); + o.ws("\n").wind(1).ws("const long pos = token - start;"); + o.ws("\n").wind(1).ws("const long len_act = *cursor - token;"); + o.ws("\n").wind(1).ws("const long len_exp = (long) keys[kix + 1];"); + o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys[kix + 2];"); + o.ws("\n").wind(1).ws("*pkix = kix + 3;"); + o.ws("\n").wind(1).ws("if (rule_exp == ").wu64(norule).ws(") {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\""); + o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {"); + o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[kix];"); + o.ws("\n").wind(2).ws("*cursor = token + offset;"); + o.ws("\n").wind(2).ws("return 0;"); + o.ws("\n").wind(1).ws("} else {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (key %u):\\n\""); + o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\""); + o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(", kix"); + o.ws("\n").wind(3).ws(", len_exp"); + o.ws("\n").wind(3).ws(", rule_exp"); + o.ws("\n").wind(3).ws(", len_act"); + o.ws("\n").wind(3).ws(", rule_act"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n}"); - if (!stagnames.empty()) { - o.ws("\n"); - o.ws("\nstatic int check_stag_").wstring(name) - .ws("(unsigned *pkix, YYKEYTYPE *keys, const YYCTYPE *tag,\n") - .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); - o.ws("\n").wind(1).ws("const YYKEYTYPE\n") - .wind(2).ws("exp = keys[kix],\n") - .wind(2).ws("act = (YYKEYTYPE)(tag - token),\n") - .wind(2).ws("NIL = (YYKEYTYPE)~0u;"); - o.ws("\n").wind(1).ws("*pkix = kix + 1;"); - o.ws("\n"); - o.ws("\n").wind(1).ws("if (exp == act || (exp == NIL && tag == NULL)) return 0;"); - o.ws("\n"); - o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") - .ws("\n").wind(2).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",") - .ws("\n").wind(2).ws("token - input, kix, name, exp, act);"); - o.ws("\n").wind(1).ws("return 1;"); - o.ws("\n}"); - } + if (!stagnames.empty()) { + o.ws("\n"); + o.ws("\nstatic int check_stag_").wstring(name) + .ws("(unsigned *pkix, YYKEYTYPE *keys, const YYCTYPE *tag,\n") + .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); + o.ws("\n").wind(1).ws("const YYKEYTYPE\n") + .wind(2).ws("exp = keys[kix],\n") + .wind(2).ws("act = (YYKEYTYPE)(tag - token),\n") + .wind(2).ws("NIL = (YYKEYTYPE)~0u;"); + o.ws("\n").wind(1).ws("*pkix = kix + 1;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("if (exp == act || (exp == NIL && tag == NULL)) return 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") + .ws("\n").wind(2).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",") + .ws("\n").wind(2).ws("token - input, kix, name, exp, act);"); + o.ws("\n").wind(1).ws("return 1;"); + o.ws("\n}"); + } - if (!mtagnames.empty()) { - o.ws("\n"); - o.ws("\ntypedef struct yymtag_t {"); - o.ws("\n").wind(1).ws("struct yymtag_t *pred;"); - o.ws("\n").wind(1).ws("const YYCTYPE *elem;"); - o.ws("\n} yymtag_t;"); + if (!mtagnames.empty()) { + o.ws("\n"); + o.ws("\ntypedef struct yymtag_t {"); + o.ws("\n").wind(1).ws("struct yymtag_t *pred;"); + o.ws("\n").wind(1).ws("const YYCTYPE *elem;"); + o.ws("\n} yymtag_t;"); - o.ws("\n"); - o.ws("\ntypedef struct yymtagpool_t {"); - o.ws("\n").wind(1).ws("yymtag_t *head;"); - o.ws("\n").wind(1).ws("yymtag_t *next;"); - o.ws("\n").wind(1).ws("yymtag_t *last;"); - o.ws("\n} yymtagpool_t;"); + o.ws("\n"); + o.ws("\ntypedef struct yymtagpool_t {"); + o.ws("\n").wind(1).ws("yymtag_t *head;"); + o.ws("\n").wind(1).ws("yymtag_t *next;"); + o.ws("\n").wind(1).ws("yymtag_t *last;"); + o.ws("\n} yymtagpool_t;"); - o.ws("\n"); - o.ws("\nstatic void yymtagpool_clear(yymtagpool_t *tp)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("tp->next = tp->head;"); - o.ws("\n}"); + o.ws("\n"); + o.ws("\nstatic void yymtagpool_clear(yymtagpool_t *tp)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("tp->next = tp->head;"); + o.ws("\n}"); - o.ws("\n"); - o.ws("\nstatic void yymtagpool_init(yymtagpool_t *tp)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("static const unsigned size = 256;"); - o.ws("\n").wind(1).ws("tp->head = (yymtag_t*)malloc(size * sizeof(yymtag_t));"); - o.ws("\n").wind(1).ws("tp->next = tp->head;"); - o.ws("\n").wind(1).ws("tp->last = tp->head + size;"); - o.ws("\n}"); + o.ws("\n"); + o.ws("\nstatic void yymtagpool_init(yymtagpool_t *tp)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("static const unsigned size = 256;"); + o.ws("\n").wind(1).ws("tp->head = (yymtag_t*)malloc(size * sizeof(yymtag_t));"); + o.ws("\n").wind(1).ws("tp->next = tp->head;"); + o.ws("\n").wind(1).ws("tp->last = tp->head + size;"); + o.ws("\n}"); - o.ws("\n"); - o.ws("\nstatic void yymtagpool_free(yymtagpool_t *tp)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("free(tp->head);"); - o.ws("\n").wind(1).ws("tp->head = tp->next = tp->last = NULL;"); - o.ws("\n}"); + o.ws("\n"); + o.ws("\nstatic void yymtagpool_free(yymtagpool_t *tp)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("free(tp->head);"); + o.ws("\n").wind(1).ws("tp->head = tp->next = tp->last = NULL;"); + o.ws("\n}"); - o.ws("\n"); - o.ws("\nstatic yymtag_t *yymtagpool_next(yymtagpool_t *tp)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("if (tp->next == tp->last) {"); - o.ws("\n").wind(2).ws("const unsigned size = tp->last - tp->head;"); - o.ws("\n").wind(2).ws("yymtag_t *head = (yymtag_t*)malloc(2 * size * sizeof(yymtag_t));"); - o.ws("\n").wind(2).ws("memcpy(head, tp->head, size * sizeof(yymtag_t));"); - o.ws("\n").wind(2).ws("free(tp->head);"); - o.ws("\n").wind(2).ws("tp->head = head;"); - o.ws("\n").wind(2).ws("tp->next = head + size;"); - o.ws("\n").wind(2).ws("tp->last = head + size * 2;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("return tp->next++;"); - o.ws("\n}"); + o.ws("\n"); + o.ws("\nstatic yymtag_t *yymtagpool_next(yymtagpool_t *tp)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("if (tp->next == tp->last) {"); + o.ws("\n").wind(2).ws("const unsigned size = tp->last - tp->head;"); + o.ws("\n").wind(2).ws("yymtag_t *head = (yymtag_t*)malloc(2 * size * sizeof(yymtag_t));"); + o.ws("\n").wind(2).ws("memcpy(head, tp->head, size * sizeof(yymtag_t));"); + o.ws("\n").wind(2).ws("free(tp->head);"); + o.ws("\n").wind(2).ws("tp->head = head;"); + o.ws("\n").wind(2).ws("tp->next = head + size;"); + o.ws("\n").wind(2).ws("tp->last = head + size * 2;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("return tp->next++;"); + o.ws("\n}"); - o.ws("\n"); - o.ws("\nstatic void yymtag(yymtag_t **pt, const YYCTYPE *t, yymtagpool_t *tp)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("yymtag_t *n = yymtagpool_next(tp);"); - o.ws("\n").wind(1).ws("n->pred = *pt;"); - o.ws("\n").wind(1).ws("n->elem = t;"); - o.ws("\n").wind(1).ws("*pt = n;"); - o.ws("\n}"); + o.ws("\n"); + o.ws("\nstatic void yymtag(yymtag_t **pt, const YYCTYPE *t, yymtagpool_t *tp)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("yymtag_t *n = yymtagpool_next(tp);"); + o.ws("\n").wind(1).ws("n->pred = *pt;"); + o.ws("\n").wind(1).ws("n->elem = t;"); + o.ws("\n").wind(1).ws("*pt = n;"); + o.ws("\n}"); - o.ws("\n"); - o.ws("\nstatic int check_mtag_").wstring(name) - .ws("(unsigned *pkix, YYKEYTYPE *keys, const yymtag_t *mtag,\n") - .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)"); - o.ws("\n{"); -// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(1) && return 1;"); - o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); - o.ws("\n").wind(1).ws("YYKEYTYPE n = keys[kix];"); - o.ws("\n").wind(1).ws("*pkix = kix + n + 1;"); -// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(n) && return 1;"); - o.ws("\n").wind(1).ws("for (; n > 0; --n) {"); - o.ws("\n").wind(2).ws("if (mtag == NULL) {"); - o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") - .ws("\n").wind(4).ws("\"history for tag '%s' is too short\\n\",") - .ws("\n").wind(4).ws("token - input, kix + n, name);"); - o.ws("\n").wind(3).ws("return 1;"); - o.ws("\n").wind(2).ws("}"); - o.ws("\n").wind(2).ws("const YYCTYPE *tag = mtag->elem;"); - o.ws("\n").wind(2).ws("mtag = mtag->pred;"); - o.ws("\n").wind(2).ws("const YYKEYTYPE\n") - .wind(3).ws("exp = keys[kix + n],\n") - .wind(3).ws("act = (YYKEYTYPE)(tag - token),\n") - .wind(3).ws("NIL = (YYKEYTYPE)~0u;"); - o.ws("\n").wind(2).ws("if (!(exp == act || (exp == NIL && tag == NULL))) {"); - o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") - .ws("\n").wind(4).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",") - .ws("\n").wind(4).ws("token - input, kix + n, name, exp, act);"); - o.ws("\n").wind(3).ws("return 1;"); - o.ws("\n").wind(2).ws("}"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("if (mtag != NULL) {"); - o.ws("\n").wind(2).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") - .ws("\n").wind(3).ws("\"history for tag '%s' is too long\\n\",") - .ws("\n").wind(3).ws("token - input, kix, name);"); - o.ws("\n").wind(2).ws("return 1;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("return 0;"); - o.ws("\n}"); - } + o.ws("\n"); + o.ws("\nstatic int check_mtag_").wstring(name) + .ws("(unsigned *pkix, YYKEYTYPE *keys, const yymtag_t *mtag,\n") + .wind(1).ws("const YYCTYPE *input, const YYCTYPE *token, const char *name)"); + o.ws("\n{"); +// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(1) && return 1;"); + o.ws("\n").wind(1).ws("const unsigned kix = *pkix;"); + o.ws("\n").wind(1).ws("YYKEYTYPE n = keys[kix];"); + o.ws("\n").wind(1).ws("*pkix = kix + n + 1;"); +// o.ws("\n").wind(1).ws("check_key_count_").wstring(name).ws("(n) && return 1;"); + o.ws("\n").wind(1).ws("for (; n > 0; --n) {"); + o.ws("\n").wind(2).ws("if (mtag == NULL) {"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") + .ws("\n").wind(4).ws("\"history for tag '%s' is too short\\n\",") + .ws("\n").wind(4).ws("token - input, kix + n, name);"); + o.ws("\n").wind(3).ws("return 1;"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(2).ws("const YYCTYPE *tag = mtag->elem;"); + o.ws("\n").wind(2).ws("mtag = mtag->pred;"); + o.ws("\n").wind(2).ws("const YYKEYTYPE\n") + .wind(3).ws("exp = keys[kix + n],\n") + .wind(3).ws("act = (YYKEYTYPE)(tag - token),\n") + .wind(3).ws("NIL = (YYKEYTYPE)~0u;"); + o.ws("\n").wind(2).ws("if (!(exp == act || (exp == NIL && tag == NULL))) {"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") + .ws("\n").wind(4).ws("\"wrong value for tag '%s': expected %u, actual %u\\n\",") + .ws("\n").wind(4).ws("token - input, kix + n, name, exp, act);"); + o.ws("\n").wind(3).ws("return 1;"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (mtag != NULL) {"); + o.ws("\n").wind(2).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": at position %ld, key %u: \"") + .ws("\n").wind(3).ws("\"history for tag '%s' is too long\\n\",") + .ws("\n").wind(3).ws("token - input, kix, name);"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("return 0;"); + o.ws("\n}"); + } - o.ws("\n"); - o.ws("\nstatic int check_key_count_").wstring(name).ws("(unsigned have, unsigned used, unsigned need)"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("if (used + need <= have) return 0;"); - o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": not enough keys\\n\");"); - o.ws("\n").wind(1).ws("return 1;"); - o.ws("\n}"); - o.ws("\n"); + o.ws("\n"); + o.ws("\nstatic int check_key_count_").wstring(name).ws("(unsigned have, unsigned used, unsigned need)"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("if (used + need <= have) return 0;"); + o.ws("\n").wind(1).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": not enough keys\\n\");"); + o.ws("\n").wind(1).ws("return 1;"); + o.ws("\n}"); + o.ws("\n"); - o.ws("\nint lex_").wstring(name).ws("()"); - o.ws("\n{"); - o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */"); - o.ws("\n").wind(1).ws("int status = 0;"); - o.ws("\n").wind(1).ws("size_t input_len = 0;"); - o.ws("\n").wind(1).ws("size_t keys_count = 0;"); - o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;"); - o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;"); - o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;"); - o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;"); - o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;"); - o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;"); - if (opts->posix_captures) { - o.ws("\n").wind(1).ws("size_t yynmatch;"); - o.ws("\n").wind(1).ws("const YYCTYPE *yypmatch[").wu64(maxnmatch).ws(" * 2];"); - } - o.ws("\n").wind(1).ws("unsigned int i = 0;"); - if (!mtagnames.empty()) { - o.ws("\n"); - o.ws("\n").wind(1).ws("yymtagpool_t yytp;"); - o.ws("\n").wind(1).ws("yymtagpool_init(&yytp);"); - } - o.ws("\n"); - o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file"); - o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".input\""); - o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)"); - o.ws("\n").wind(2).ws(", padding"); - o.ws("\n").wind(2).ws(", &input_len"); - o.ws("\n").wind(2).ws(");"); - o.ws("\n").wind(1).ws("if (input == NULL) {"); - o.ws("\n").wind(2).ws("status = 1;"); - o.ws("\n").wind(2).ws("goto end;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - if (sizeof_cunit > 1) { - o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {"); - from_le(o, 2, sizeof_cunit, "input[i]"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - } - o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file"); - o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".keys\""); - o.ws("\n").wind(2).ws(", sizeof (YYKEYTYPE)"); - o.ws("\n").wind(2).ws(", 0"); - o.ws("\n").wind(2).ws(", &keys_count"); - o.ws("\n").wind(2).ws(");"); - o.ws("\n").wind(1).ws("if (keys == NULL) {"); - o.ws("\n").wind(2).ws("status = 1;"); - o.ws("\n").wind(2).ws("goto end;"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - if (sizeof_key > 1) - { - o.ws("\n").wind(1).ws("for (i = 0; i < keys_count; ++i) {"); - from_le(o, 2, sizeof_key, "keys[i]"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - } - o.ws("\n").wind(1).ws("cursor = input;"); - o.ws("\n").wind(1).ws("limit = input + input_len + padding;"); - o.ws("\n").wind(1).ws("eof = input + input_len;"); - o.ws("\n"); - o.ws("\n").wind(1).ws("for (i = 0; status == 0 && cursor < eof && i < keys_count;) {"); - o.ws("\n").wind(2).ws("token = cursor;"); - if (backup) { - o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;"); - } - if (oldstyle_ctxmarker) { - o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;"); - } - o.ws("\n").wind(2).ws("YYCTYPE yych;"); - if (accept) { - o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;"); - } + o.ws("\nint lex_").wstring(name).ws("()"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */"); + o.ws("\n").wind(1).ws("int status = 0;"); + o.ws("\n").wind(1).ws("size_t input_len = 0;"); + o.ws("\n").wind(1).ws("size_t keys_count = 0;"); + o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;"); + o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;"); + if (opts->posix_captures) { + o.ws("\n").wind(1).ws("size_t yynmatch;"); + o.ws("\n").wind(1).ws("const YYCTYPE *yypmatch[").wu64(maxnmatch).ws(" * 2];"); + } + o.ws("\n").wind(1).ws("unsigned int i = 0;"); + if (!mtagnames.empty()) { + o.ws("\n"); + o.ws("\n").wind(1).ws("yymtagpool_t yytp;"); + o.ws("\n").wind(1).ws("yymtagpool_init(&yytp);"); + } + o.ws("\n"); + o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".input\""); + o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)"); + o.ws("\n").wind(2).ws(", padding"); + o.ws("\n").wind(2).ws(", &input_len"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (input == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_cunit > 1) { + o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {"); + from_le(o, 2, sizeof_cunit, "input[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(filename).ws(".").wstring(name).ws(".keys\""); + o.ws("\n").wind(2).ws(", sizeof (YYKEYTYPE)"); + o.ws("\n").wind(2).ws(", 0"); + o.ws("\n").wind(2).ws(", &keys_count"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (keys == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_key > 1) + { + o.ws("\n").wind(1).ws("for (i = 0; i < keys_count; ++i) {"); + from_le(o, 2, sizeof_key, "keys[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("cursor = input;"); + o.ws("\n").wind(1).ws("limit = input + input_len + padding;"); + o.ws("\n").wind(1).ws("eof = input + input_len;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("for (i = 0; status == 0 && cursor < eof && i < keys_count;) {"); + o.ws("\n").wind(2).ws("token = cursor;"); + if (backup) { + o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;"); + } + if (oldstyle_ctxmarker) { + o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;"); + } + o.ws("\n").wind(2).ws("YYCTYPE yych;"); + if (accept) { + o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;"); + } - // autogenerated stag variables - ConfTags conf("\n" + indent(2, opts->indString) + "const YYCTYPE *@@ = NULL;", ""); - output_tags(o.stream(), 0, conf, stagnames, opts); - // user-defined stag variables - std::set::const_iterator - var1 = stagvars.begin(), - var2 = stagvars.end(); - if (var1 != var2) { - o.ws("\n").wind(2).ws("const YYCTYPE *").wstring(*var1); - for (++var1; var1 != var2; ++var1) { - o.ws(", *").wstring(*var1); - } - o.ws(";"); - } - if (!mtagnames.empty()) { - o.ws("\n").wind(2).ws("yymtagpool_clear(&yytp);"); - // autogenerated mtag variables - conf.format = "yymtag_t *@@ = NULL;"; - output_tags(o.stream(), 0, conf, mtagnames, opts); - // user-defined mtag variables - var1 = mtagvars.begin(); - var2 = mtagvars.end(); - if (var1 != var2) { - o.ws("\n").wind(2).ws("yymtag_t *").wstring(*var1); - for (++var1; var1 != var2; ++var1) { - o.ws(", *").wstring(*var1); - } - o.ws(";"); - } - } + // autogenerated stag variables + ConfTags conf("\n" + indent(2, opts->indString) + "const YYCTYPE *@@ = NULL;", ""); + output_tags(o.stream(), 0, conf, stagnames, opts); + // user-defined stag variables + std::set::const_iterator + var1 = stagvars.begin(), + var2 = stagvars.end(); + if (var1 != var2) { + o.ws("\n").wind(2).ws("const YYCTYPE *").wstring(*var1); + for (++var1; var1 != var2; ++var1) { + o.ws(", *").wstring(*var1); + } + o.ws(";"); + } + if (!mtagnames.empty()) { + o.ws("\n").wind(2).ws("yymtagpool_clear(&yytp);"); + // autogenerated mtag variables + conf.format = "yymtag_t *@@ = NULL;"; + output_tags(o.stream(), 0, conf, mtagnames, opts); + // user-defined mtag variables + var1 = mtagvars.begin(); + var2 = mtagvars.end(); + if (var1 != var2) { + o.ws("\n").wind(2).ws("yymtag_t *").wstring(*var1); + for (++var1; var1 != var2; ++var1) { + o.ws(", *").wstring(*var1); + } + o.ws(";"); + } + } - o.ws("\n"); - if (opts->bFlag) { - bitmaps.gen(o, 2); - } - o.ws("\n"); + o.ws("\n"); + if (opts->bFlag) { + bitmaps.gen(o, 2); + } + o.ws("\n"); } void emit_end(OutputFile &o, const std::string &name, bool backup, bool oldstyle_ctxmarker, - const std::set &mtagnames) + const std::set &mtagnames) { - o.ws("\n").wind(1).ws("}"); - o.ws("\n").wind(1).ws("if (status == 0) {"); - o.ws("\n").wind(2).ws("if (cursor != eof) {"); - o.ws("\n").wind(3).ws("status = 1;"); - o.ws("\n").wind(3).ws("const long pos = token - input;"); - o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);"); - o.ws("\n").wind(2).ws("}"); - o.ws("\n").wind(2).ws("if (i != keys_count) {"); - o.ws("\n").wind(3).ws("status = 1;"); - o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u keys\\n\", i);"); - o.ws("\n").wind(2).ws("}"); - o.ws("\n").wind(1).ws("}"); - o.ws("\n"); - o.ws("\nend:"); - o.ws("\n").wind(1).ws("free(input);"); - o.ws("\n").wind(1).ws("free(keys);"); - if (!mtagnames.empty()) { - o.ws("\n").wind(1).ws("yymtagpool_free(&yytp);"); - } - o.ws("\n"); - o.ws("\n").wind(1).ws("return status;"); - o.ws("\n}"); - o.ws("\n"); - o.ws("\n#undef YYCTYPE"); - o.ws("\n#undef YYKEYTYPE"); - o.ws("\n#undef YYPEEK"); - o.ws("\n#undef YYSKIP"); - if (backup) { - o.ws("\n#undef YYBACKUP"); - o.ws("\n#undef YYRESTORE"); - } - if (oldstyle_ctxmarker) { - o.ws("\n#undef YYBACKUPCTX"); - o.ws("\n#undef YYRESTORECTX"); - } - if (o.block().opts->tags) { - o.ws("\n#undef YYBACKUPTAG"); - o.ws("\n#undef YYRESTORETAG"); - o.ws("\n#undef YYCOPYTAG"); - } - o.ws("\n#undef YYLESSTHAN"); - o.ws("\n#undef YYFILL"); - o.ws("\n"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (status == 0) {"); + o.ws("\n").wind(2).ws("if (cursor != eof) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("const long pos = token - input;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(2).ws("if (i != keys_count) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u keys\\n\", i);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\nend:"); + o.ws("\n").wind(1).ws("free(input);"); + o.ws("\n").wind(1).ws("free(keys);"); + if (!mtagnames.empty()) { + o.ws("\n").wind(1).ws("yymtagpool_free(&yytp);"); + } + o.ws("\n"); + o.ws("\n").wind(1).ws("return status;"); + o.ws("\n}"); + o.ws("\n"); + o.ws("\n#undef YYCTYPE"); + o.ws("\n#undef YYKEYTYPE"); + o.ws("\n#undef YYPEEK"); + o.ws("\n#undef YYSKIP"); + if (backup) { + o.ws("\n#undef YYBACKUP"); + o.ws("\n#undef YYRESTORE"); + } + if (oldstyle_ctxmarker) { + o.ws("\n#undef YYBACKUPCTX"); + o.ws("\n#undef YYRESTORECTX"); + } + if (o.block().opts->tags) { + o.ws("\n#undef YYBACKUPTAG"); + o.ws("\n#undef YYRESTORETAG"); + o.ws("\n#undef YYCOPYTAG"); + } + o.ws("\n#undef YYLESSTHAN"); + o.ws("\n#undef YYFILL"); + o.ws("\n"); } void emit_epilog(OutputFile &o, const std::set &names) { - o.ws("\n").ws("int main()"); - o.ws("\n").ws("{"); + o.ws("\n").ws("int main()"); + o.ws("\n").ws("{"); - for (std::set::const_iterator i = names.begin(); i != names.end(); ++i) { - o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {"); - o.ws("\n").wind(2).ws("return 1;"); - o.ws("\n").wind(1).ws("}"); - } + for (std::set::const_iterator i = names.begin(); i != names.end(); ++i) { + o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + } - o.ws("\n").wind(1).ws("return 0;"); - o.ws("\n}"); - o.ws("\n"); + o.ws("\n").wind(1).ws("return 0;"); + o.ws("\n}"); + o.ws("\n"); } void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid) { - const std::string &name = dfa.name; - const Rule &r = dfa.rules[rid]; - const uint64_t rkey = rule2key(rid, dfa.key_size, dfa.def_rule); - size_t ntag = 3; - for (size_t t = r.ltag; t < r.htag; ++t) { - const Tag &tag = dfa.tags[t]; - if (t != r.ttag && !fictive(tag)) ++ntag; - } + const std::string &name = dfa.name; + const Rule &r = dfa.rules[rid]; + const uint64_t rkey = rule2key(rid, dfa.key_size, dfa.def_rule); + size_t ntag = 3; + for (size_t t = r.ltag; t < r.htag; ++t) { + const Tag &tag = dfa.tags[t]; + if (t != r.ttag && !fictive(tag)) ++ntag; + } - o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ") - .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name) - .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")"); + o.wind(ind).ws("status = check_key_count_").wstring(name).ws("(keys_count, i, ") + .wu64(ntag).ws(")\n").wind(ind + 1).ws(" || action_").wstring(name) + .ws("(&i, keys, input, token, &cursor, ").wu64(rkey).ws(")"); - for (size_t t = r.ltag; t < r.htag; ++t) { - const Tag &tag = dfa.tags[t]; - if (t == r.ttag || fictive(tag)) continue; - const std::string tname = tagname(tag), - prefix = history(tag) ? "m" : "s"; - o.ws("\n").wind(ind + 1).ws(" || check_").wstring(prefix).ws("tag").ws("_").wstring(name) - .ws("(&i, keys, ").wstring(tname).ws(", input, token, \"") - .wstring(tname).ws("\")"); - } + for (size_t t = r.ltag; t < r.htag; ++t) { + const Tag &tag = dfa.tags[t]; + if (t == r.ttag || fictive(tag)) continue; + const std::string tname = tagname(tag), + prefix = history(tag) ? "m" : "s"; + o.ws("\n").wind(ind + 1).ws(" || check_").wstring(prefix).ws("tag").ws("_").wstring(name) + .ws("(&i, keys, ").wstring(tname).ws(", input, token, \"") + .wstring(tname).ws("\")"); + } - o.ws(";\n"); - o.wind(ind).ws("continue;\n"); + o.ws(";\n"); + o.wind(ind).ws("continue;\n"); } } // namespace re2c diff --git a/re2c/src/skeleton/generate_data.cc b/re2c/src/skeleton/generate_data.cc index cf4ff067..df3fe397 100644 --- a/re2c/src/skeleton/generate_data.cc +++ b/re2c/src/skeleton/generate_data.cc @@ -54,29 +54,29 @@ typedef u32lim_t<1024 * 1024 * 1024> cover_size_t; // ~1Gb struct cover_t { - FILE *input; - FILE *keys; - std::vector loops; - std::vector suffixes; - path_t prefix; - cover_size_t size; - - cover_t(FILE *fi, FILE *fk, size_t nnodes): - input(fi), keys(fk), loops(nnodes), - suffixes(nnodes), prefix(0), - size(cover_size_t::from32(0u)) {} - - FORBID_COPY(cover_t); + FILE *input; + FILE *keys; + std::vector loops; + std::vector suffixes; + path_t prefix; + cover_size_t size; + + cover_t(FILE *fi, FILE *fk, size_t nnodes): + input(fi), keys(fk), loops(nnodes), + suffixes(nnodes), prefix(0), + size(cover_size_t::from32(0u)) {} + + FORBID_COPY(cover_t); }; template static uintn_t to_le(uintn_t n) { - uintn_t m; - uint8_t *p = reinterpret_cast(&m); - for (size_t i = 0; i < sizeof(uintn_t); ++i) { - p[i] = static_cast(n >> (i * 8)); - } - return m; + uintn_t m; + uint8_t *p = reinterpret_cast(&m); + for (size_t i = 0; i < sizeof(uintn_t); ++i) { + p[i] = static_cast(n >> (i * 8)); + } + return m; } // pick at most 0x100 unique edges from this range @@ -86,197 +86,197 @@ template static uintn_t to_le(uintn_t n) // - values should be deterministic static uint32_t step(uint32_t lower, uint32_t upper) { - return 1 + (upper - lower) / 0x100; + return 1 + (upper - lower) / 0x100; } static uint32_t nsteps(uint32_t lower, uint32_t upper) { - return 2 + (upper - lower - 1) / step(lower, upper); + return 2 + (upper - lower - 1) / step(lower, upper); } static void apply(std::vector *tags, const tcmd_t *cmd, size_t pos) { - for (const tcmd_t *p = cmd; p; p = p->next) { - const tagver_t l = p->lhs, r = p->rhs, *h = p->history; - std::vector &t = tags[l]; - if (tcmd_t::iscopy(p)) { - t = tags[r]; - } else if (tcmd_t::isset(p)) { - t.clear(); - t.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos); - } else { - if (l != r) t = tags[r]; - std::vector x; - for (; *h != TAGVER_ZERO; ++h) { - x.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos); - } - t.insert(t.end(), x.rbegin(), x.rend()); - } - } + for (const tcmd_t *p = cmd; p; p = p->next) { + const tagver_t l = p->lhs, r = p->rhs, *h = p->history; + std::vector &t = tags[l]; + if (tcmd_t::iscopy(p)) { + t = tags[r]; + } else if (tcmd_t::isset(p)) { + t.clear(); + t.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos); + } else { + if (l != r) t = tags[r]; + std::vector x; + for (; *h != TAGVER_ZERO; ++h) { + x.push_back(*h == TAGVER_BOTTOM ? Skeleton::DEFTAG : pos); + } + t.insert(t.end(), x.rbegin(), x.rend()); + } + } } static size_t path_width(const path_t &path, const Skeleton &skel) { - size_t width = 0; - for (size_t i = 0; i < path.len(); ++i) { - - // width of multiarc: total number of characters picked from all ranges - size_t w = 0; - const Node::arc_t &arc = path.arc(skel, i); - for (Node::citer_t a = arc.begin(); a != arc.end(); ++a) { - w += nsteps(a->lower, a->upper); - } - - // width of multipath: maximal width of multiarc - width = std::max(width, w); - } - return width; + size_t width = 0; + for (size_t i = 0; i < path.len(); ++i) { + + // width of multiarc: total number of characters picked from all ranges + size_t w = 0; + const Node::arc_t &arc = path.arc(skel, i); + for (Node::citer_t a = arc.begin(); a != arc.end(); ++a) { + w += nsteps(a->lower, a->upper); + } + + // width of multipath: maximal width of multiarc + width = std::max(width, w); + } + return width; } template static void write_input(const path_t &path, const Skeleton &skel, - size_t width, FILE *file) + size_t width, FILE *file) { - const size_t - len = path.len(), - size = len * width; - cunit_t *buffer = new cunit_t[size]; - - // pick characters from ranges - for (size_t i = 0; i < len; ++i) { - Node::wciter_t a(path.arc(skel, i)); - for (size_t w = 0; w < width; ++a) { - const uint32_t - l = a->lower, - u = a->upper, - d = step(l, u); - for (uint32_t m = l; m < u + d && w < width; m += d, ++w) { - buffer[w * len + i] = to_le(static_cast(std::min(m, u))); - } - } - } - - fwrite(buffer, sizeof(cunit_t), size, file); - - delete[] buffer; + const size_t + len = path.len(), + size = len * width; + cunit_t *buffer = new cunit_t[size]; + + // pick characters from ranges + for (size_t i = 0; i < len; ++i) { + Node::wciter_t a(path.arc(skel, i)); + for (size_t w = 0; w < width; ++a) { + const uint32_t + l = a->lower, + u = a->upper, + d = step(l, u); + for (uint32_t m = l; m < u + d && w < width; m += d, ++w) { + buffer[w * len + i] = to_le(static_cast(std::min(m, u))); + } + } + } + + fwrite(buffer, sizeof(cunit_t), size, file); + + delete[] buffer; } template static void write_keys(const path_t &path, const Skeleton &skel, - size_t width, FILE *file) + size_t width, FILE *file) { - // find last accepting node - size_t f; - for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f); - - // calculate tags: start with default and apply commands step by step - const size_t - nver = skel.ntagver, - ntag = width * nver, - offby = skel.opts->lookahead ? 0 : 1; - std::vector *tags = new std::vector[ntag]; - for (size_t w = 0; w < width; ++w) { - apply(&tags[w * nver], skel.cmd0, 0); // absent in LATDFA - } - for (size_t i = 0; i < f; ++i) { - Node::wciter_t a(path.arc(skel, i)); - for (size_t w = 0; w < width; ++a) { - uint32_t n = nsteps(a->lower, a->upper); - for (; n --> 0 && w < width; ++w) { - apply(&tags[w * nver], a->cmd, i + offby); - } - } - } - const tcmd_t *fcmd = path.node(skel, f).cmd; - for (size_t w = 0; w < width; ++w) { - apply(&tags[w * nver], fcmd, f); // only present in LATDFA - } - - const size_t rule = path.node(skel, f).rule; - size_t matched = 0, ltag = 0, htag = 0, trail = 0; - if (rule != Rule::NONE) { - - const Rule &r = skel.rules[rule]; - ltag = r.ltag; - htag = r.htag; - trail = r.ttag; - - // matched length might depend on tag values - if (trail == htag) { - matched = f; - } else { - assert(!fixed(skel.tags[trail])); // no fixed trailing context - matched = tags[skel.finvers[trail]].back(); - assert(matched != Skeleton::DEFTAG); - } - } - - // count keys - size_t nkey = 0; - for (size_t w = 0; w < width; ++w) { - nkey += 3; - for (size_t t = ltag; t < htag; ++t) { - const Tag &tag = skel.tags[t]; - if (t == trail || fictive(tag)) continue; - const size_t - base = fixed(tag) ? tag.base : t, - bver = static_cast(skel.finvers[base]); - if (history(tag)) nkey += tags[w * nver + bver].size(); - ++nkey; - } - } - - // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags - key_t *keys = new key_t[nkey], *k = keys; - for (size_t w = 0; w < width; ++w) { - *k++ = to_le(static_cast(path.len())); - *k++ = to_le(static_cast(matched)); - *k++ = to_le(rule2key(rule, skel.defrule)); - - for (size_t t = ltag; t < htag; ++t) { - const Tag &tag = skel.tags[t]; - if (t == trail || fictive(tag)) continue; - - const size_t - base = fixed(tag) ? tag.base : t, - bver = static_cast(skel.finvers[base]); - const std::vector &h = tags[w * nver + bver]; - if (history(tag)) { - const size_t hlen = h.size(); - *k++ = to_le(static_cast(hlen)); - for (size_t i = 0; i < hlen; ++i) { - *k++ = to_le(static_cast(h[i])); - } - } else { - *k++ = to_le(static_cast(h.back())); - } - } - } - - // dump to file - fwrite(keys, sizeof(key_t), nkey, file); - - delete[] tags; - delete[] keys; + // find last accepting node + size_t f; + for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f); + + // calculate tags: start with default and apply commands step by step + const size_t + nver = skel.ntagver, + ntag = width * nver, + offby = skel.opts->lookahead ? 0 : 1; + std::vector *tags = new std::vector[ntag]; + for (size_t w = 0; w < width; ++w) { + apply(&tags[w * nver], skel.cmd0, 0); // absent in LATDFA + } + for (size_t i = 0; i < f; ++i) { + Node::wciter_t a(path.arc(skel, i)); + for (size_t w = 0; w < width; ++a) { + uint32_t n = nsteps(a->lower, a->upper); + for (; n --> 0 && w < width; ++w) { + apply(&tags[w * nver], a->cmd, i + offby); + } + } + } + const tcmd_t *fcmd = path.node(skel, f).cmd; + for (size_t w = 0; w < width; ++w) { + apply(&tags[w * nver], fcmd, f); // only present in LATDFA + } + + const size_t rule = path.node(skel, f).rule; + size_t matched = 0, ltag = 0, htag = 0, trail = 0; + if (rule != Rule::NONE) { + + const Rule &r = skel.rules[rule]; + ltag = r.ltag; + htag = r.htag; + trail = r.ttag; + + // matched length might depend on tag values + if (trail == htag) { + matched = f; + } else { + assert(!fixed(skel.tags[trail])); // no fixed trailing context + matched = tags[skel.finvers[trail]].back(); + assert(matched != Skeleton::DEFTAG); + } + } + + // count keys + size_t nkey = 0; + for (size_t w = 0; w < width; ++w) { + nkey += 3; + for (size_t t = ltag; t < htag; ++t) { + const Tag &tag = skel.tags[t]; + if (t == trail || fictive(tag)) continue; + const size_t + base = fixed(tag) ? tag.base : t, + bver = static_cast(skel.finvers[base]); + if (history(tag)) nkey += tags[w * nver + bver].size(); + ++nkey; + } + } + + // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags + key_t *keys = new key_t[nkey], *k = keys; + for (size_t w = 0; w < width; ++w) { + *k++ = to_le(static_cast(path.len())); + *k++ = to_le(static_cast(matched)); + *k++ = to_le(rule2key(rule, skel.defrule)); + + for (size_t t = ltag; t < htag; ++t) { + const Tag &tag = skel.tags[t]; + if (t == trail || fictive(tag)) continue; + + const size_t + base = fixed(tag) ? tag.base : t, + bver = static_cast(skel.finvers[base]); + const std::vector &h = tags[w * nver + bver]; + if (history(tag)) { + const size_t hlen = h.size(); + *k++ = to_le(static_cast(hlen)); + for (size_t i = 0; i < hlen; ++i) { + *k++ = to_le(static_cast(h[i])); + } + } else { + *k++ = to_le(static_cast(h.back())); + } + } + } + + // dump to file + fwrite(keys, sizeof(key_t), nkey, file); + + delete[] tags; + delete[] keys; } template static cover_size_t cover_one(const Skeleton &skel, cover_t &cover) { - const path_t &path = cover.prefix; + const path_t &path = cover.prefix; - const size_t width = path_width(path, skel); + const size_t width = path_width(path, skel); - const cover_size_t size - = cover_size_t::from64(path.len()) - * cover_size_t::from64(width); + const cover_size_t size + = cover_size_t::from64(path.len()) + * cover_size_t::from64(width); - if (!size.overflow()) { - write_input(path, skel, width, cover.input); - write_keys(path, skel, width, cover.keys); - } + if (!size.overflow()) { + write_input(path, skel, width, cover.input); + write_keys(path, skel, width, cover.keys); + } - return size; + return size; } /* @@ -309,118 +309,118 @@ static cover_size_t cover_one(const Skeleton &skel, cover_t &cover) * */ template static void gencover( - const Skeleton &skel, - cover_t &cover, - size_t i) + const Skeleton &skel, + cover_t &cover, + size_t i) { - const Node &node = skel.nodes[i]; - uint8_t &loop = cover.loops[i]; - suffix_t &suffix = cover.suffixes[i]; - path_t &prefix = cover.prefix; - cover_size_t &size = cover.size; - - if (node.end()) { - suffix.init = true; - } - - if (suffix.init) - { - prefix.push_sfx(suffix); - size = size + cover_one(skel, cover); - prefix.pop_sfx(suffix); - } - - // unroll one iteration of the loop - else if (loop < 2) { - local_inc _(loop); - - Node::arcs_t::const_iterator - arc = node.arcs.begin(), - end = node.arcs.end(); - const suffix_t *min_sfx = NULL; - size_t min_idx; - - // pick the shortest suffix to minimize cover size - // handle all child states before setting this state's suffix - for (; arc != end && !size.overflow(); ++arc) { - const size_t j = arc->first; - - prefix.push(j); - gencover(skel, cover, j); - prefix.pop(); - - const suffix_t &sfx = cover.suffixes[j]; - if (sfx.init && (!min_sfx || sfx.length() < min_sfx->length())) { - min_sfx = &sfx; - min_idx = j; - } - } - - if (min_sfx == NULL) { - // all outgoing paths loop back into this node - // this can happen in cases like [^]* - } - else { - suffix = *min_sfx; - suffix.push(min_idx); - } - } + const Node &node = skel.nodes[i]; + uint8_t &loop = cover.loops[i]; + suffix_t &suffix = cover.suffixes[i]; + path_t &prefix = cover.prefix; + cover_size_t &size = cover.size; + + if (node.end()) { + suffix.init = true; + } + + if (suffix.init) + { + prefix.push_sfx(suffix); + size = size + cover_one(skel, cover); + prefix.pop_sfx(suffix); + } + + // unroll one iteration of the loop + else if (loop < 2) { + local_inc _(loop); + + Node::arcs_t::const_iterator + arc = node.arcs.begin(), + end = node.arcs.end(); + const suffix_t *min_sfx = NULL; + size_t min_idx; + + // pick the shortest suffix to minimize cover size + // handle all child states before setting this state's suffix + for (; arc != end && !size.overflow(); ++arc) { + const size_t j = arc->first; + + prefix.push(j); + gencover(skel, cover, j); + prefix.pop(); + + const suffix_t &sfx = cover.suffixes[j]; + if (sfx.init && (!min_sfx || sfx.length() < min_sfx->length())) { + min_sfx = &sfx; + min_idx = j; + } + } + + if (min_sfx == NULL) { + // all outgoing paths loop back into this node + // this can happen in cases like [^]* + } + else { + suffix = *min_sfx; + suffix.push(min_idx); + } + } } template - static void generate_paths_cunit_key(const Skeleton &skel, cover_t &cover) + static void generate_paths_cunit_key(const Skeleton &skel, cover_t &cover) { - gencover(skel, cover, 0); - if (cover.size.overflow()) { - warning(NULL, skel.line, false, - "DFA %sis too large: can only generate partial path cover", - incond(skel.cond).c_str()); - } + gencover(skel, cover, 0); + if (cover.size.overflow()) { + warning(NULL, skel.line, false, + "DFA %sis too large: can only generate partial path cover", + incond(skel.cond).c_str()); + } } template - static void generate_paths_cunit(const Skeleton &skel, cover_t &cover) + static void generate_paths_cunit(const Skeleton &skel, cover_t &cover) { - switch (skel.sizeof_key) { - case 8: generate_paths_cunit_key(skel, cover); break; - case 4: generate_paths_cunit_key(skel, cover); break; - case 2: generate_paths_cunit_key(skel, cover); break; - case 1: generate_paths_cunit_key(skel, cover); break; - } + switch (skel.sizeof_key) { + case 8: generate_paths_cunit_key(skel, cover); break; + case 4: generate_paths_cunit_key(skel, cover); break; + case 2: generate_paths_cunit_key(skel, cover); break; + case 1: generate_paths_cunit_key(skel, cover); break; + } } static void generate_paths(const Skeleton &skel, cover_t &cover) { - switch (skel.opts->encoding.szCodeUnit()) { - case 4: generate_paths_cunit(skel, cover); break; - case 2: generate_paths_cunit(skel, cover); break; - case 1: generate_paths_cunit(skel, cover); break; - } + switch (skel.opts->encoding.szCodeUnit()) { + case 4: generate_paths_cunit(skel, cover); break; + case 2: generate_paths_cunit(skel, cover); break; + case 1: generate_paths_cunit(skel, cover); break; + } } void emit_data(const Skeleton &skel) { - std::string fname = skel.opts->output_file; - if (fname.empty()) { - fname = ""; - } - - const std::string input_name = fname + "." + skel.name + ".input"; - FILE *input = fopen(input_name.c_str(), "wb"); - if (!input) { - fatal("cannot open file: %s", input_name.c_str()); - } - const std::string keys_name = std::string(fname) + "." + skel.name + ".keys"; - FILE *keys = fopen (keys_name.c_str(), "wb"); - if (!keys) { - fatal("cannot open file: %s", keys_name.c_str()); - } - - cover_t cover(input, keys, skel.nodes_count); - generate_paths(skel, cover); - - fclose(input); - fclose(keys); + std::string fname = skel.opts->output_file; + if (fname.empty()) { + fname = ""; + } + + const std::string input_name = fname + "." + skel.name + ".input"; + FILE *input = fopen(input_name.c_str(), "wb"); + if (!input) { + fatal("cannot open file: %s", input_name.c_str()); + } + const std::string keys_name = std::string(fname) + "." + skel.name + ".keys"; + FILE *keys = fopen (keys_name.c_str(), "wb"); + if (!keys) { + fatal("cannot open file: %s", keys_name.c_str()); + } + + cover_t cover(input, keys, skel.nodes_count); + generate_paths(skel, cover); + + fclose(input); + fclose(keys); } } // namespace re2c diff --git a/re2c/src/skeleton/maxpath.cc b/re2c/src/skeleton/maxpath.cc index 8d3c90c1..5a88fc54 100644 --- a/re2c/src/skeleton/maxpath.cc +++ b/re2c/src/skeleton/maxpath.cc @@ -22,61 +22,61 @@ static const uint32_t DIST_MAX = DIST_ERROR - 1; // different from YYMAXFILL calculation // in the way it handles loops and empty regexp static uint32_t calc_dist(const Skeleton &skel - , std::vector &loops - , std::vector &dists - , size_t i) + , std::vector &loops + , std::vector &dists + , size_t i) { - const Node &node = skel.nodes[i]; - uint32_t dist = dists[i]; + const Node &node = skel.nodes[i]; + uint32_t dist = dists[i]; - if (dist != DIST_ERROR) { - return dist; - } + if (dist != DIST_ERROR) { + return dist; + } - else if (node.end()) { - return dists[i] = 0; - } + else if (node.end()) { + return dists[i] = 0; + } - // we cut the looping path, so the current node is like - // the "end" node; but the actual value for this node - // is yet to be calculated on the recursive return - else if (loops[i] > 1) { - return 0; - } + // we cut the looping path, so the current node is like + // the "end" node; but the actual value for this node + // is yet to be calculated on the recursive return + else if (loops[i] > 1) { + return 0; + } - // unroll one iteration of loops - // (must be consistent with skeleton data generation) - else { - local_inc _(loops[i]); + // unroll one iteration of loops + // (must be consistent with skeleton data generation) + else { + local_inc _(loops[i]); - Node::arcs_t::const_iterator - arc = node.arcs.begin(), - end = node.arcs.end(); + Node::arcs_t::const_iterator + arc = node.arcs.begin(), + end = node.arcs.end(); - // handle all child states before setting this state's suffix - for (; arc != end; ++arc) { - const uint32_t d = calc_dist(skel, loops, dists, arc->first); + // handle all child states before setting this state's suffix + for (; arc != end; ++arc) { + const uint32_t d = calc_dist(skel, loops, dists, arc->first); - // not necessarily true for dists[arc->first] - assert (d != DIST_ERROR); + // not necessarily true for dists[arc->first] + assert (d != DIST_ERROR); - dist = (dist == DIST_ERROR) ? d : std::max(dist, d); - } + dist = (dist == DIST_ERROR) ? d : std::max(dist, d); + } - return dists[i] = std::min(dist + 1, DIST_MAX); - } + return dists[i] = std::min(dist + 1, DIST_MAX); + } } // calculate maximal path length, check overflow uint32_t maxpath(const Skeleton &skel) { - std::vector loops(skel.nodes_count); - std::vector dists(skel.nodes_count, DIST_ERROR); - const uint32_t maxlen = calc_dist(skel, loops, dists, 0); - if (maxlen == DIST_MAX) { - fatal("DFA path %sis too long", incond(skel.cond).c_str()); - } - return maxlen; + std::vector loops(skel.nodes_count); + std::vector dists(skel.nodes_count, DIST_ERROR); + const uint32_t maxlen = calc_dist(skel, loops, dists, 0); + if (maxlen == DIST_MAX) { + fatal("DFA path %sis too long", incond(skel.cond).c_str()); + } + return maxlen; } } // namespace re2c diff --git a/re2c/src/skeleton/path.h b/re2c/src/skeleton/path.h index 7b6899f3..681e0e28 100644 --- a/re2c/src/skeleton/path.h +++ b/re2c/src/skeleton/path.h @@ -11,70 +11,70 @@ namespace re2c struct suffix_t { - bool init; + bool init; private: - std::vector arcs; + std::vector arcs; public: - suffix_t(): init(false), arcs() {} - size_t length () const - { - return arcs.size (); - } - void push(size_t i) - { - arcs.push_back(i); - } - friend class path_t; + suffix_t(): init(false), arcs() {} + size_t length () const + { + return arcs.size (); + } + void push(size_t i) + { + arcs.push_back(i); + } + friend class path_t; }; class path_t { - std::vector arcs; + std::vector arcs; public: - explicit path_t(size_t i) : arcs() - { - arcs.push_back(i); - } - size_t len() const - { - return arcs.size() - 1; - } - const Node& node(const Skeleton &skel, size_t i) const - { - return skel.nodes[arcs[i]]; - } - const Node::arc_t& arc(const Skeleton &skel, size_t i) const - { - return skel.nodes[arcs[i]].arcs.find(arcs[i + 1])->second; - } - void push(size_t n) - { - arcs.push_back(n); - } - void pop() - { - arcs.pop_back(); - } - void push_sfx(const suffix_t &suffix) - { - arcs.insert(arcs.end(), suffix.arcs.rbegin(), suffix.arcs.rend()); - } - void pop_sfx(const suffix_t &suffix) - { - arcs.resize(arcs.size() - suffix.arcs.size()); - } - bool operator<(const path_t &p) const - { - const size_t - s1 = arcs.size(), - s2 = p.arcs.size(); - return (s1 == s2 && arcs < p.arcs) - || s1 < s2; - } + explicit path_t(size_t i) : arcs() + { + arcs.push_back(i); + } + size_t len() const + { + return arcs.size() - 1; + } + const Node& node(const Skeleton &skel, size_t i) const + { + return skel.nodes[arcs[i]]; + } + const Node::arc_t& arc(const Skeleton &skel, size_t i) const + { + return skel.nodes[arcs[i]].arcs.find(arcs[i + 1])->second; + } + void push(size_t n) + { + arcs.push_back(n); + } + void pop() + { + arcs.pop_back(); + } + void push_sfx(const suffix_t &suffix) + { + arcs.insert(arcs.end(), suffix.arcs.rbegin(), suffix.arcs.rend()); + } + void pop_sfx(const suffix_t &suffix) + { + arcs.resize(arcs.size() - suffix.arcs.size()); + } + bool operator<(const path_t &p) const + { + const size_t + s1 = arcs.size(), + s2 = p.arcs.size(); + return (s1 == s2 && arcs < p.arcs) + || s1 < s2; + } }; } // namespace re2c diff --git a/re2c/src/skeleton/skeleton.cc b/re2c/src/skeleton/skeleton.cc index 5f551ab5..cc27f752 100644 --- a/re2c/src/skeleton/skeleton.cc +++ b/re2c/src/skeleton/skeleton.cc @@ -11,97 +11,97 @@ struct opt_t; struct tcmd_t; Node::Node() - : arcs() - , rule(Rule::NONE) - , cmd(NULL) + : arcs() + , rule(Rule::NONE) + , cmd(NULL) {} void Node::init(const dfa_state_t *s, - const std::vector &charset, size_t nil) + const std::vector &charset, size_t nil) { - const size_t nc = charset.size() - 1; - for (uint32_t c = 0, l = 0; c < nc;) { + const size_t nc = charset.size() - 1; + for (uint32_t c = 0, l = 0; c < nc;) { - size_t j = s->arcs[c]; - const tcmd_t *t = s->tcmd[c]; - for (; ++c < nc && s->arcs[c] == j && s->tcmd[c] == t;); - if (j == dfa_t::NIL) j = nil; + size_t j = s->arcs[c]; + const tcmd_t *t = s->tcmd[c]; + for (; ++c < nc && s->arcs[c] == j && s->tcmd[c] == t;); + if (j == dfa_t::NIL) j = nil; - // all arcs go to default node => this node is final - if (l == 0 && c == nc && j == nil) break; + // all arcs go to default node => this node is final + if (l == 0 && c == nc && j == nil) break; - const uint32_t u = charset[c]; - arcs[j].push_back(Node::range_t(l, u - 1, t)); + const uint32_t u = charset[c]; + arcs[j].push_back(Node::range_t(l, u - 1, t)); - l = u; - } + l = u; + } - rule = s->rule; - cmd = s->tcmd[nc]; + rule = s->rule; + cmd = s->tcmd[nc]; } bool Node::end() const { - return arcs.size() == 0; + return arcs.size() == 0; } const size_t Skeleton::DEFTAG = std::numeric_limits::max(); Skeleton::Skeleton( - const dfa_t &dfa, - const opt_t *op, - size_t def, - const std::string &dfa_name, - const std::string &dfa_cond, - uint32_t dfa_line) - : opts(op) - , name(dfa_name) - , cond(dfa_cond) - , line(dfa_line) - , nodes_count(dfa.states.size() + 1) // +1 for default state - , nodes(new Node[nodes_count]) - , cmd0(dfa.tcmd0) - , sizeof_key(8) - , defrule(def) - , ntagver(static_cast(dfa.maxtagver) + 1) - , charset(dfa.charset) - , rules(dfa.rules) - , tags(dfa.tags) - , finvers(dfa.finvers) + const dfa_t &dfa, + const opt_t *op, + size_t def, + const std::string &dfa_name, + const std::string &dfa_cond, + uint32_t dfa_line) + : opts(op) + , name(dfa_name) + , cond(dfa_cond) + , line(dfa_line) + , nodes_count(dfa.states.size() + 1) // +1 for default state + , nodes(new Node[nodes_count]) + , cmd0(dfa.tcmd0) + , sizeof_key(8) + , defrule(def) + , ntagver(static_cast(dfa.maxtagver) + 1) + , charset(dfa.charset) + , rules(dfa.rules) + , tags(dfa.tags) + , finvers(dfa.finvers) { - // initialize nodes - const size_t nil = nodes_count - 1; - for (size_t i = 0; i < nil; ++i) { - nodes[i].init(dfa.states[i], charset, nil); - } - - // initialize size of key - const size_t maxlen = maxpath(*this); - const size_t maxrule = dfa.rules.size() + 1; // +1 for none-rule - const size_t max = std::max(maxlen, maxrule); - if (max <= std::numeric_limits::max()) { - sizeof_key = 1; - } else if (max <= std::numeric_limits::max()) { - sizeof_key = 2; - } else if (max <= std::numeric_limits::max()) { - sizeof_key = 4; - } + // initialize nodes + const size_t nil = nodes_count - 1; + for (size_t i = 0; i < nil; ++i) { + nodes[i].init(dfa.states[i], charset, nil); + } + + // initialize size of key + const size_t maxlen = maxpath(*this); + const size_t maxrule = dfa.rules.size() + 1; // +1 for none-rule + const size_t max = std::max(maxlen, maxrule); + if (max <= std::numeric_limits::max()) { + sizeof_key = 1; + } else if (max <= std::numeric_limits::max()) { + sizeof_key = 2; + } else if (max <= std::numeric_limits::max()) { + sizeof_key = 4; + } } Skeleton::~Skeleton() { - delete[] nodes; + delete[] nodes; } uint64_t rule2key(size_t rule, size_t key, size_t def) { - switch (key) { - default: assert(false); // shouldn't happen - case 8: return rule2key(rule, def); - case 4: return rule2key(rule, def); - case 2: return rule2key(rule, def); - case 1: return rule2key(rule, def); - } + switch (key) { + default: assert(false); // shouldn't happen + case 8: return rule2key(rule, def); + case 4: return rule2key(rule, def); + case 2: return rule2key(rule, def); + case 1: return rule2key(rule, def); + } } } // namespace re2c diff --git a/re2c/src/skeleton/skeleton.h b/re2c/src/skeleton/skeleton.h index d565ccba..03468622 100644 --- a/re2c/src/skeleton/skeleton.h +++ b/re2c/src/skeleton/skeleton.h @@ -37,71 +37,71 @@ typedef local_increment_t local_inc; struct Node { - struct range_t { - uint32_t lower; - uint32_t upper; - const tcmd_t *cmd; - - range_t(): lower(0), upper(0), cmd(NULL) {} - range_t(uint32_t l, uint32_t u, const tcmd_t *c) - : lower(l), upper(u), cmd(c) {} - }; - - typedef std::vector arc_t; - typedef std::map arcs_t; - typedef arc_t::const_iterator citer_t; - typedef wrap_citer_t wciter_t; - - arcs_t arcs; - size_t rule; - const tcmd_t *cmd; - - Node(); - void init(const dfa_state_t *s, - const std::vector &charset, size_t nil); - bool end() const; - - FORBID_COPY(Node); + struct range_t { + uint32_t lower; + uint32_t upper; + const tcmd_t *cmd; + + range_t(): lower(0), upper(0), cmd(NULL) {} + range_t(uint32_t l, uint32_t u, const tcmd_t *c) + : lower(l), upper(u), cmd(c) {} + }; + + typedef std::vector arc_t; + typedef std::map arcs_t; + typedef arc_t::const_iterator citer_t; + typedef wrap_citer_t wciter_t; + + arcs_t arcs; + size_t rule; + const tcmd_t *cmd; + + Node(); + void init(const dfa_state_t *s, + const std::vector &charset, size_t nil); + bool end() const; + + FORBID_COPY(Node); }; struct Skeleton { - static const size_t DEFTAG; - - const opt_t *opts; - const std::string name; - const std::string cond; - const uint32_t line; - - const size_t nodes_count; - Node *nodes; - const tcmd_t *cmd0; - - size_t sizeof_key; - size_t defrule; - size_t ntagver; - const std::vector &charset; - const std::valarray &rules; - const std::vector &tags; - const tagver_t *finvers; - - Skeleton(const dfa_t &dfa, const opt_t *op, size_t def, - const std::string &dfa_name, const std::string &dfa_cond, - uint32_t dfa_line); - ~Skeleton (); - FORBID_COPY(Skeleton); + static const size_t DEFTAG; + + const opt_t *opts; + const std::string name; + const std::string cond; + const uint32_t line; + + const size_t nodes_count; + Node *nodes; + const tcmd_t *cmd0; + + size_t sizeof_key; + size_t defrule; + size_t ntagver; + const std::vector &charset; + const std::valarray &rules; + const std::vector &tags; + const tagver_t *finvers; + + Skeleton(const dfa_t &dfa, const opt_t *op, size_t def, + const std::string &dfa_name, const std::string &dfa_cond, + uint32_t dfa_line); + ~Skeleton (); + FORBID_COPY(Skeleton); }; template key_t rule2key(size_t r, size_t def) { - if (r == Rule::NONE) { - return std::numeric_limits::max(); - } else if (r == def) { - key_t k = std::numeric_limits::max(); - return --k; - } else { - return static_cast(r); - } + if (r == Rule::NONE) { + return std::numeric_limits::max(); + } else if (r == def) { + key_t k = std::numeric_limits::max(); + return --k; + } else { + return static_cast(r); + } } uint64_t rule2key(size_t rule, size_t key, size_t def); @@ -111,12 +111,12 @@ void fprint_default_path(FILE *f, const Skeleton &skel, const path_t &p); void emit_data(const Skeleton &skel); void emit_prolog(OutputFile & o); void emit_start(OutputFile &o, size_t maxfill, size_t maxnmatch, const std::string &name, - size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker, - const std::set &stagnames, const std::set &stagvars, - const std::set &mtagnames, const std::set &mtagvars, - bitmaps_t &bitmaps); + size_t sizeof_key, size_t def, bool backup, bool accept, bool oldstyle_ctxmarker, + const std::set &stagnames, const std::set &stagvars, + const std::set &mtagnames, const std::set &mtagvars, + bitmaps_t &bitmaps); void emit_end(OutputFile &o, const std::string &name, bool backup, bool oldstyle_ctxmarker, - const std::set &mtagnames); + const std::set &mtagnames); void emit_epilog(OutputFile &o, const std::set &names); void emit_action(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rid); diff --git a/re2c/src/test/range/test-impl.h b/re2c/src/test/range/test-impl.h index 3f1d7840..0a16f0b5 100644 --- a/re2c/src/test/range/test-impl.h +++ b/re2c/src/test/range/test-impl.h @@ -9,40 +9,40 @@ namespace re2c_test { static inline bool bit_set (uint32_t n, uint32_t bit) { - return n & (1u << bit); + return n & (1u << bit); } template re2c::Range * range (uint32_t n) { - RE2C_STATIC_ASSERT (BITS <= 31); - - re2c::Range * r = NULL; - re2c::Range ** p = &r; - for (uint32_t i = 0; i < BITS; ++i) - { - for (; i < BITS && !bit_set (n, i); ++i); - if (i == BITS && !bit_set (n, BITS - 1)) - { - break; - } - const uint32_t lb = i; - for (; i < BITS && bit_set (n, i); ++i); - re2c::Range::append (p, lb, i); - } - return r; + RE2C_STATIC_ASSERT (BITS <= 31); + + re2c::Range * r = NULL; + re2c::Range ** p = &r; + for (uint32_t i = 0; i < BITS; ++i) + { + for (; i < BITS && !bit_set (n, i); ++i); + if (i == BITS && !bit_set (n, BITS - 1)) + { + break; + } + const uint32_t lb = i; + for (; i < BITS && bit_set (n, i); ++i); + re2c::Range::append (p, lb, i); + } + return r; } template re2c::Range * add (uint32_t n1, uint32_t n2) { - return range (n1 | n2); + return range (n1 | n2); } template re2c::Range * sub (uint32_t n1, uint32_t n2) { - return range (n1 & ~n2); + return range (n1 & ~n2); } } // namespace re2c_test diff --git a/re2c/src/test/range/test.cc b/re2c/src/test/range/test.cc index a602f79d..18181b7a 100644 --- a/re2c/src/test/range/test.cc +++ b/re2c/src/test/range/test.cc @@ -7,88 +7,88 @@ namespace re2c_test { static bool equal (const re2c::Range * r1, const re2c::Range * r2) { - for (; r1 && r2; r1 = r1->next (), r2 = r2->next ()) - { - if (r1->lower () != r2->lower () - || r1->upper () != r2->upper ()) - { - return false; - } - } - return !r1 && !r2; + for (; r1 && r2; r1 = r1->next (), r2 = r2->next ()) + { + if (r1->lower () != r2->lower () + || r1->upper () != r2->upper ()) + { + return false; + } + } + return !r1 && !r2; } static void show (const re2c::Range * r) { - if (!r) - { - fprintf (stderr, "[]"); - } - for (; r; r = r->next ()) - { - const uint32_t l = r->lower (); - const uint32_t u = r->upper () - 1; - if (l < u) - { - fprintf (stderr, "[%X-%X]", l, u); - } - else - { - fprintf (stderr, "[%X]", l); - } - } + if (!r) + { + fprintf (stderr, "[]"); + } + for (; r; r = r->next ()) + { + const uint32_t l = r->lower (); + const uint32_t u = r->upper () - 1; + if (l < u) + { + fprintf (stderr, "[%X-%X]", l, u); + } + else + { + fprintf (stderr, "[%X]", l); + } + } } static int32_t diff - ( const re2c::Range * r1 - , const re2c::Range * r2 - , const re2c::Range * op1 - , const re2c::Range * op2 - , const char * op) + ( const re2c::Range * r1 + , const re2c::Range * r2 + , const re2c::Range * op1 + , const re2c::Range * op2 + , const char * op) { - if (equal (op1, op2)) - { - return 0; - } - else - { - fprintf (stderr, "%s error: ", op); - show (r1); - fprintf (stderr, " %s ", op); - show (r2); - fprintf (stderr, " ====> "); - show (op2); - fprintf (stderr, " =/= "); - show (op1); - fprintf (stderr, "\n"); - return 1; - } + if (equal (op1, op2)) + { + return 0; + } + else + { + fprintf (stderr, "%s error: ", op); + show (r1); + fprintf (stderr, " %s ", op); + show (r2); + fprintf (stderr, " ====> "); + show (op2); + fprintf (stderr, " =/= "); + show (op1); + fprintf (stderr, "\n"); + return 1; + } } static int32_t test () { - int32_t ok = 0; + int32_t ok = 0; - static const uint32_t BITS = 8; - static const uint32_t N = 1u << BITS; - for (uint32_t i = 0; i <= N; ++i) - { - for (uint32_t j = 0; j <= N; ++j) - { - re2c::Range * r1 = range (i); - re2c::Range * r2 = range (j); - ok |= diff (r1, r2, add (i, j), re2c::Range::add (r1, r2), "U"); - ok |= diff (r1, r2, sub (i, j), re2c::Range::sub (r1, r2), "D"); - re2c::Range::vFreeList.clear (); - } - } + static const uint32_t BITS = 8; + static const uint32_t N = 1u << BITS; + for (uint32_t i = 0; i <= N; ++i) + { + for (uint32_t j = 0; j <= N; ++j) + { + re2c::Range * r1 = range (i); + re2c::Range * r2 = range (j); + ok |= diff (r1, r2, add (i, j), re2c::Range::add (r1, r2), "U"); + ok |= diff (r1, r2, sub (i, j), re2c::Range::sub (r1, r2), "D"); + re2c::Range::vFreeList.clear (); + } + } - return ok; + return ok; } } // namespace re2c_test int main () { - return re2c_test::test (); + return re2c_test::test (); } diff --git a/re2c/src/test/s_to_n32_unsafe/test.cc b/re2c/src/test/s_to_n32_unsafe/test.cc index 47b3d47d..01743924 100644 --- a/re2c/src/test/s_to_n32_unsafe/test.cc +++ b/re2c/src/test/s_to_n32_unsafe/test.cc @@ -11,92 +11,92 @@ static const uint32_t DIGITS = 256; // no terminating null as we don't need it static char * u64_to_s_fastest_ever (uint64_t u, char * s) { - while (u > 0) - { - const uint64_t d = u % 10 + '0'; - *--s = static_cast (d); - u /= 10; - } - return s; + while (u > 0) + { + const uint64_t d = u % 10 + '0'; + *--s = static_cast (d); + u /= 10; + } + return s; } static int32_t test_u (uint64_t i) { - char s [DIGITS]; - char * const s_end = s + DIGITS; - char * const s_start = u64_to_s_fastest_ever (i, s_end); - uint32_t u = i == 0; // not equal to i - if (s_to_u32_unsafe (s_start, s_end, u) && u != i) - { - fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u); - return 1; - } - return 0; + char s [DIGITS]; + char * const s_end = s + DIGITS; + char * const s_start = u64_to_s_fastest_ever (i, s_end); + uint32_t u = i == 0; // not equal to i + if (s_to_u32_unsafe (s_start, s_end, u) && u != i) + { + fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u); + return 1; + } + return 0; } static int32_t test_i (int64_t i) { - char s [DIGITS]; - char * const s_end = s + DIGITS; - const uint64_t i_abs = i < 0 - ? static_cast (-i) - : static_cast (i); - char * s_start = u64_to_s_fastest_ever (i_abs, s_end); - if (i < 0) - { - *--s_start = '-'; - } - int32_t j = i == 0; // not equal to i - if (s_to_i32_unsafe (s_start, s_end, j) && j != i) - { - fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j); - return 1; - } - return 0; + char s [DIGITS]; + char * const s_end = s + DIGITS; + const uint64_t i_abs = i < 0 + ? static_cast (-i) + : static_cast (i); + char * s_start = u64_to_s_fastest_ever (i_abs, s_end); + if (i < 0) + { + *--s_start = '-'; + } + int32_t j = i == 0; // not equal to i + if (s_to_i32_unsafe (s_start, s_end, j) && j != i) + { + fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j); + return 1; + } + return 0; } static int32_t test () { - int32_t ok = 0; + int32_t ok = 0; - static const uint64_t UDELTA = 0xFFFF; - // zero neighbourhood - for (uint64_t i = 0; i <= UDELTA; ++i) - { - ok |= test_u (i); - } - // u32_max neighbourhood - static const uint64_t u32_max = std::numeric_limits::max(); - for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i) - { - ok |= test_u (i); - } + static const uint64_t UDELTA = 0xFFFF; + // zero neighbourhood + for (uint64_t i = 0; i <= UDELTA; ++i) + { + ok |= test_u (i); + } + // u32_max neighbourhood + static const uint64_t u32_max = std::numeric_limits::max(); + for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i) + { + ok |= test_u (i); + } - static const int64_t IDELTA = 0xFFFF; - // i32_min neighbourhood - static const int64_t i32_min = std::numeric_limits::min(); - for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i) - { - ok |= test_i (i); - } - // zero neighbourhood - for (int64_t i = -IDELTA; i <= IDELTA; ++i) - { - ok |= test_i (i); - } - // i32_max neighbourhood - static const int64_t i32_max = std::numeric_limits::max(); - for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i) - { - ok |= test_i (i); - } + static const int64_t IDELTA = 0xFFFF; + // i32_min neighbourhood + static const int64_t i32_min = std::numeric_limits::min(); + for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i) + { + ok |= test_i (i); + } + // zero neighbourhood + for (int64_t i = -IDELTA; i <= IDELTA; ++i) + { + ok |= test_i (i); + } + // i32_max neighbourhood + static const int64_t i32_max = std::numeric_limits::max(); + for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i) + { + ok |= test_i (i); + } - return ok; + return ok; } } // namespace re2c_test int main () { - return re2c_test::test (); + return re2c_test::test (); } diff --git a/re2c/src/util/allocate.h b/re2c/src/util/allocate.h index f664910c..47eaf887 100644 --- a/re2c/src/util/allocate.h +++ b/re2c/src/util/allocate.h @@ -10,8 +10,8 @@ namespace re2c { // this can be unacceptable for performance reasons template T * allocate (size_t n) { - void * p = operator new (n * sizeof (T)); - return static_cast (p); + void * p = operator new (n * sizeof (T)); + return static_cast (p); } } // namespace re2c diff --git a/re2c/src/util/counter.h b/re2c/src/util/counter.h index f4f58394..c8e83389 100644 --- a/re2c/src/util/counter.h +++ b/re2c/src/util/counter.h @@ -6,22 +6,22 @@ namespace re2c { template class counter_t { - num_t num; + num_t num; public: - counter_t () - : num () - {} - num_t next () - { - num_t n = num; - num.inc (); - return n; - } - void reset () - { - num = num_t (); - } + counter_t () + : num () + {} + num_t next () + { + num_t n = num; + num.inc (); + return n; + } + void reset () + { + num = num_t (); + } }; } // namespace re2c diff --git a/re2c/src/util/forbid_copy.h b/re2c/src/util/forbid_copy.h index 9c5701d7..df283639 100644 --- a/re2c/src/util/forbid_copy.h +++ b/re2c/src/util/forbid_copy.h @@ -4,8 +4,8 @@ // must be used at the end of class definition // (since this macro changes scope to private) #define FORBID_COPY(type) \ - private: \ - type (const type &); \ - type & operator = (const type &) + private: \ + type (const type &); \ + type & operator = (const type &) #endif // _RE2C_UTIL_FORBID_COPY_ diff --git a/re2c/src/util/free_list.h b/re2c/src/util/free_list.h index 8d3ac654..f2ad1eac 100644 --- a/re2c/src/util/free_list.h +++ b/re2c/src/util/free_list.h @@ -10,45 +10,45 @@ template class free_list: protected std::set<_Ty> { public: - typedef typename std::set<_Ty>::iterator iterator; - typedef typename std::set<_Ty>::size_type size_type; - typedef typename std::set<_Ty>::key_type key_type; - - free_list(): in_clear(false) - { - } - - using std::set<_Ty>::insert; - - size_type erase(const key_type& key) - { - if (!in_clear) - { - return std::set<_Ty>::erase(key); - } - return 0; - } - - void clear() - { - in_clear = true; - - for(iterator it = this->begin(); it != this->end(); ++it) - { - delete *it; - } - std::set<_Ty>::clear(); - - in_clear = false; - } - - ~free_list() - { - clear(); - } + typedef typename std::set<_Ty>::iterator iterator; + typedef typename std::set<_Ty>::size_type size_type; + typedef typename std::set<_Ty>::key_type key_type; + + free_list(): in_clear(false) + { + } + + using std::set<_Ty>::insert; + + size_type erase(const key_type& key) + { + if (!in_clear) + { + return std::set<_Ty>::erase(key); + } + return 0; + } + + void clear() + { + in_clear = true; + + for(iterator it = this->begin(); it != this->end(); ++it) + { + delete *it; + } + std::set<_Ty>::clear(); + + in_clear = false; + } + + ~free_list() + { + clear(); + } protected: - bool in_clear; + bool in_clear; }; } // end namespace re2c diff --git a/re2c/src/util/hash32.h b/re2c/src/util/hash32.h index 87d7cee0..9a031415 100644 --- a/re2c/src/util/hash32.h +++ b/re2c/src/util/hash32.h @@ -9,11 +9,11 @@ namespace re2c inline uint32_t hash32(uint32_t h, const void *data, size_t size) { - const uint8_t *bytes = static_cast(data); - for (size_t i = 0; i < size; ++i) { - h = h ^ ((h << 5) + (h >> 2) + bytes[i]); - } - return h; + const uint8_t *bytes = static_cast(data); + for (size_t i = 0; i < size; ++i) { + h = h ^ ((h << 5) + (h >> 2) + bytes[i]); + } + return h; } } // namespace re2c diff --git a/re2c/src/util/local_increment.h b/re2c/src/util/local_increment.h index 799ced43..e93a57be 100644 --- a/re2c/src/util/local_increment.h +++ b/re2c/src/util/local_increment.h @@ -7,14 +7,14 @@ namespace re2c template struct local_increment_t { - counter_t & counter; - inline explicit local_increment_t (counter_t & c) - : counter (++c) - {} - inline ~local_increment_t () - { - --counter; - } + counter_t & counter; + inline explicit local_increment_t (counter_t & c) + : counter (++c) + {} + inline ~local_increment_t () + { + --counter; + } }; } // namespace re2c diff --git a/re2c/src/util/lookup.h b/re2c/src/util/lookup.h index 7416a54f..9ae7d738 100644 --- a/re2c/src/util/lookup.h +++ b/re2c/src/util/lookup.h @@ -18,33 +18,33 @@ namespace re2c template struct lookup_t { - static const uint32_t NIL; + static const uint32_t NIL; private: - struct elem_t - { - uint32_t next; - data_t data; + struct elem_t + { + uint32_t next; + data_t data; - elem_t(uint32_t n, const data_t &d) - : next(n), data(d) {} - }; + elem_t(uint32_t n, const data_t &d) + : next(n), data(d) {} + }; - std::vector elems; - std::map lookup; + std::vector elems; + std::map lookup; public: - lookup_t(); - uint32_t size() const; - data_t& operator[](uint32_t idx); - const data_t& operator[](uint32_t idx) const; - uint32_t push(hash_t hash, const data_t &data); - template uint32_t find_with(hash_t hash, const data_t &data, pred_t &pred) const; - template uint32_t find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const; + lookup_t(); + uint32_t size() const; + data_t& operator[](uint32_t idx); + const data_t& operator[](uint32_t idx) const; + uint32_t push(hash_t hash, const data_t &data); + template uint32_t find_with(hash_t hash, const data_t &data, pred_t &pred) const; + template uint32_t find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const; private: - uint32_t head(hash_t) const; - template uint32_t find(uint32_t next, const data_t &data, pred_t &pred) const; + uint32_t head(hash_t) const; + template uint32_t find(uint32_t next, const data_t &data, pred_t &pred) const; }; template @@ -52,71 +52,71 @@ const uint32_t lookup_t::NIL = ~0u; template lookup_t::lookup_t() - : elems() - , lookup() + : elems() + , lookup() {} template uint32_t lookup_t::size() const { - return static_cast(elems.size()); + return static_cast(elems.size()); } template data_t& lookup_t::operator[](uint32_t idx) { - return elems[idx].data; + return elems[idx].data; } template const data_t& lookup_t::operator[](uint32_t idx) const { - return elems[idx].data; + return elems[idx].data; } template uint32_t lookup_t::head(hash_t h) const { - typename std::map::const_iterator x = lookup.find(h); - return x == lookup.end() ? NIL : x->second; + typename std::map::const_iterator x = lookup.find(h); + return x == lookup.end() ? NIL : x->second; } template uint32_t lookup_t::push(hash_t hash, const data_t &data) { - assert(elems.size() < NIL); - const uint32_t idx = static_cast(elems.size()); - elems.push_back(elem_t(head(hash), data)); - lookup[hash] = idx; - return idx; + assert(elems.size() < NIL); + const uint32_t idx = static_cast(elems.size()); + elems.push_back(elem_t(head(hash), data)); + lookup[hash] = idx; + return idx; } template template uint32_t lookup_t::find(uint32_t next, const data_t &data, pred_t &pred) const { - for (uint32_t i = next; i != NIL;) { - const elem_t &e = elems[i]; - if (pred(e.data, data)) { - return i; - } - i = e.next; - } - return NIL; + for (uint32_t i = next; i != NIL;) { + const elem_t &e = elems[i]; + if (pred(e.data, data)) { + return i; + } + i = e.next; + } + return NIL; } template template uint32_t lookup_t::find_with(hash_t hash, const data_t &data, pred_t &pred) const { - return find(head(hash), data, pred); + return find(head(hash), data, pred); } template template uint32_t lookup_t::find_next_with(uint32_t prev, const data_t &data, pred_t &pred) const { - return find(elems[prev].next, data, pred); + return find(elems[prev].next, data, pred); } } // namespace re2c diff --git a/re2c/src/util/range.cc b/re2c/src/util/range.cc index fa46ab33..d44edcc5 100644 --- a/re2c/src/util/range.cc +++ b/re2c/src/util/range.cc @@ -7,91 +7,91 @@ free_list Range::vFreeList; void Range::append_overlapping (Range * & head, Range * & tail, const Range * r) { - if (!head) - { - head = Range::ran (r->lb, r->ub); - tail = head; - } - else if (tail->ub < r->lb) - { - tail->nx = Range::ran (r->lb, r->ub); - tail = tail->nx; - } - else if (tail->ub < r->ub) - { - tail->ub = r->ub; - } + if (!head) + { + head = Range::ran (r->lb, r->ub); + tail = head; + } + else if (tail->ub < r->lb) + { + tail->nx = Range::ran (r->lb, r->ub); + tail = tail->nx; + } + else if (tail->ub < r->ub) + { + tail->ub = r->ub; + } } Range * Range::add (const Range * r1, const Range * r2) { - Range * head = NULL; - Range * tail = NULL; - for (; r1 && r2;) - { - if (r1->lb < r2->lb) - { - append_overlapping (head, tail, r1); - r1 = r1->nx; - } - else - { - append_overlapping (head, tail, r2); - r2 = r2->nx; - } - } - for (; r1; r1 = r1->nx) - { - append_overlapping (head, tail, r1); - } - for (; r2; r2 = r2->nx) - { - append_overlapping (head, tail, r2); - } - return head; + Range * head = NULL; + Range * tail = NULL; + for (; r1 && r2;) + { + if (r1->lb < r2->lb) + { + append_overlapping (head, tail, r1); + r1 = r1->nx; + } + else + { + append_overlapping (head, tail, r2); + r2 = r2->nx; + } + } + for (; r1; r1 = r1->nx) + { + append_overlapping (head, tail, r1); + } + for (; r2; r2 = r2->nx) + { + append_overlapping (head, tail, r2); + } + return head; } void Range::append (Range ** & ptail, uint32_t l, uint32_t u) { - Range * & tail = * ptail; - tail = Range::ran (l, u); - ptail = &tail->nx; + Range * & tail = * ptail; + tail = Range::ran (l, u); + ptail = &tail->nx; } Range * Range::sub (const Range * r1, const Range * r2) { - Range * head = NULL; - Range ** ptail = &head; - while (r1) - { - if (!r2 || r2->lb >= r1->ub) - { - append (ptail, r1->lb, r1->ub); - r1 = r1->nx; - } - else if (r2->ub <= r1->lb) - { - r2 = r2->nx; - } - else - { - if (r1->lb < r2->lb) - { - append (ptail, r1->lb, r2->lb); - } - while (r2 && r2->ub < r1->ub) - { - const uint32_t lb = r2->ub; - r2 = r2->nx; - const uint32_t ub = r2 && r2->lb < r1->ub - ? r2->lb - : r1->ub; - append (ptail, lb, ub); - } - r1 = r1->nx; - } - } - return head; + Range * head = NULL; + Range ** ptail = &head; + while (r1) + { + if (!r2 || r2->lb >= r1->ub) + { + append (ptail, r1->lb, r1->ub); + r1 = r1->nx; + } + else if (r2->ub <= r1->lb) + { + r2 = r2->nx; + } + else + { + if (r1->lb < r2->lb) + { + append (ptail, r1->lb, r2->lb); + } + while (r2 && r2->ub < r1->ub) + { + const uint32_t lb = r2->ub; + r2 = r2->nx; + const uint32_t ub = r2 && r2->lb < r1->ub + ? r2->lb + : r1->ub; + append (ptail, lb, ub); + } + r1 = r1->nx; + } + } + return head; } } // namespace re2c diff --git a/re2c/src/util/range.h b/re2c/src/util/range.h index afc142cc..2c509338 100644 --- a/re2c/src/util/range.h +++ b/re2c/src/util/range.h @@ -15,49 +15,49 @@ namespace re2c class Range { public: - static free_list vFreeList; + static free_list vFreeList; private: - Range * nx; - // [lb,ub) - uint32_t lb; - uint32_t ub; + Range * nx; + // [lb,ub) + uint32_t lb; + uint32_t ub; public: - static Range * sym (uint32_t c) - { - return new Range (NULL, c, c + 1); - } - static Range * ran (uint32_t l, uint32_t u) - { - return new Range (NULL, l, u); - } - ~Range () - { - vFreeList.erase (this); - } - Range * next () const { return nx; } - uint32_t lower () const { return lb; } - uint32_t upper () const { return ub; } - static Range * add (const Range * r1, const Range * r2); - static Range * sub (const Range * r1, const Range * r2); + static Range * sym (uint32_t c) + { + return new Range (NULL, c, c + 1); + } + static Range * ran (uint32_t l, uint32_t u) + { + return new Range (NULL, l, u); + } + ~Range () + { + vFreeList.erase (this); + } + Range * next () const { return nx; } + uint32_t lower () const { return lb; } + uint32_t upper () const { return ub; } + static Range * add (const Range * r1, const Range * r2); + static Range * sub (const Range * r1, const Range * r2); private: - Range (Range * n, uint32_t l, uint32_t u) - : nx (n) - , lb (l) - , ub (u) - { - assert (lb < ub); - vFreeList.insert (this); - } - static void append_overlapping (Range * & head, Range * & tail, const Range * r); - static void append (Range ** & ptail, uint32_t l, uint32_t u); + Range (Range * n, uint32_t l, uint32_t u) + : nx (n) + , lb (l) + , ub (u) + { + assert (lb < ub); + vFreeList.insert (this); + } + static void append_overlapping (Range * & head, Range * & tail, const Range * r); + static void append (Range ** & ptail, uint32_t l, uint32_t u); - // test addition and subtraction - template friend Range * re2c_test::range (uint32_t n); + // test addition and subtraction + template friend Range * re2c_test::range (uint32_t n); - FORBID_COPY (Range); + FORBID_COPY (Range); }; } // namespace re2c diff --git a/re2c/src/util/s_to_n32_unsafe.cc b/re2c/src/util/s_to_n32_unsafe.cc index fa7590b9..6aaabe37 100644 --- a/re2c/src/util/s_to_n32_unsafe.cc +++ b/re2c/src/util/s_to_n32_unsafe.cc @@ -6,50 +6,50 @@ // returns false on overflow bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number) { - uint64_t u = 0; - for (; s != s_end; ++s) - { - u *= 10; - u += static_cast (*s) - 0x30; - if (u >= std::numeric_limits::max()) - { - return false; - } - } - number = static_cast (u); - return true; + uint64_t u = 0; + for (; s != s_end; ++s) + { + u *= 10; + u += static_cast (*s) - 0x30; + if (u >= std::numeric_limits::max()) + { + return false; + } + } + number = static_cast (u); + return true; } // assumes that string matches regexp "-"? [0-9]+ // returns false on underflow/overflow bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number) { - int64_t i = 0; - if (*s == '-') - { - ++s; - for (; s != s_end; ++s) - { - i *= 10; - i -= *s - 0x30; - if (i < std::numeric_limits::min()) - { - return false; - } - } - } - else - { - for (; s != s_end; ++s) - { - i *= 10; - i += *s - 0x30; - if (i > std::numeric_limits::max()) - { - return false; - } - } - } - number = static_cast (i); - return true; + int64_t i = 0; + if (*s == '-') + { + ++s; + for (; s != s_end; ++s) + { + i *= 10; + i -= *s - 0x30; + if (i < std::numeric_limits::min()) + { + return false; + } + } + } + else + { + for (; s != s_end; ++s) + { + i *= 10; + i += *s - 0x30; + if (i > std::numeric_limits::max()) + { + return false; + } + } + } + number = static_cast (i); + return true; } diff --git a/re2c/src/util/slab_allocator.h b/re2c/src/util/slab_allocator.h index f34600d9..50ca3797 100644 --- a/re2c/src/util/slab_allocator.h +++ b/re2c/src/util/slab_allocator.h @@ -15,56 +15,56 @@ * Works ~20 times faster, than linux's glibc allocator :] */ template + uint32_t SLAB_SIZE = 1024 * 1024, + size_t ALIGN = 1> class slab_allocator_t { - typedef std::vector slabs_t; + typedef std::vector slabs_t; - slabs_t slabs_; /* quasilist of allocated slabs of 'SLAB_SIZE' bytes */ - char *current_slab_; - char *current_slab_end_; + slabs_t slabs_; /* quasilist of allocated slabs of 'SLAB_SIZE' bytes */ + char *current_slab_; + char *current_slab_end_; public: - slab_allocator_t(): slabs_(), current_slab_(0), current_slab_end_(0) {} + slab_allocator_t(): slabs_(), current_slab_(0), current_slab_end_(0) {} - ~slab_allocator_t() { std::for_each(slabs_.rbegin(), slabs_.rend(), free); } + ~slab_allocator_t() { std::for_each(slabs_.rbegin(), slabs_.rend(), free); } - void *alloc(size_t size) - { - char *result; + void *alloc(size_t size) + { + char *result; - /* alignment */ - size += ALIGN - size % ALIGN; + /* alignment */ + size += ALIGN - size % ALIGN; - /* very large objects */ - if (size > MAXIMUM_INLINE) { - result = static_cast(malloc(size)); - slabs_.push_back(result); - return result; - } + /* very large objects */ + if (size > MAXIMUM_INLINE) { + result = static_cast(malloc(size)); + slabs_.push_back(result); + return result; + } - /* no space in slab */ - const size_t yet_in_slab = static_cast(current_slab_end_ - current_slab_); - if (yet_in_slab < size) { - current_slab_ = static_cast(malloc(SLAB_SIZE)); - current_slab_end_ = current_slab_ + SLAB_SIZE; - slabs_.push_back(current_slab_); - } + /* no space in slab */ + const size_t yet_in_slab = static_cast(current_slab_end_ - current_slab_); + if (yet_in_slab < size) { + current_slab_ = static_cast(malloc(SLAB_SIZE)); + current_slab_end_ = current_slab_ + SLAB_SIZE; + slabs_.push_back(current_slab_); + } - result = current_slab_; - current_slab_ += size; + result = current_slab_; + current_slab_ += size; - return result; - } + return result; + } - template - inline data_t *alloct(size_t n) - { - return static_cast(alloc(n * sizeof(data_t))); - } + template + inline data_t *alloct(size_t n) + { + return static_cast(alloc(n * sizeof(data_t))); + } - FORBID_COPY(slab_allocator_t); + FORBID_COPY(slab_allocator_t); }; #endif // _RE2C_UTIL_SLAB_ALLOCATOR_ diff --git a/re2c/src/util/smart_ptr.h b/re2c/src/util/smart_ptr.h index c138cf55..b1b775b7 100644 --- a/re2c/src/util/smart_ptr.h +++ b/re2c/src/util/smart_ptr.h @@ -4,66 +4,66 @@ namespace re2c { - template - class smart_ptr - { - private: - T* ptr; - long* count; // shared number of owners + template + class smart_ptr + { + private: + T* ptr; + long* count; // shared number of owners - public: - explicit smart_ptr (T* p=0) - : ptr(p), count(new long(1)) {} + public: + explicit smart_ptr (T* p=0) + : ptr(p), count(new long(1)) {} - smart_ptr (const smart_ptr& p) throw() - : ptr(p.ptr), count(p.count) - { - ++*count; - } + smart_ptr (const smart_ptr& p) throw() + : ptr(p.ptr), count(p.count) + { + ++*count; + } - ~smart_ptr () - { - dispose(); - } + ~smart_ptr () + { + dispose(); + } - smart_ptr& operator= (const smart_ptr& p) - { - if (this != &p) - { - dispose(); - ptr = p.ptr; - count = p.count; - ++*count; - } - return *this; - } + smart_ptr& operator= (const smart_ptr& p) + { + if (this != &p) + { + dispose(); + ptr = p.ptr; + count = p.count; + ++*count; + } + return *this; + } - T& operator*() const - { - return *ptr; - } + T& operator*() const + { + return *ptr; + } - T* operator->() const - { - return ptr; - } + T* operator->() const + { + return ptr; + } - private: - void dispose() - { - if (--*count == 0) - { - delete count; - delete ptr; - } - } - }; + private: + void dispose() + { + if (--*count == 0) + { + delete count; + delete ptr; + } + } + }; - template - smart_ptr make_smart_ptr(T* p) - { - return smart_ptr(p); - } + template + smart_ptr make_smart_ptr(T* p) + { + return smart_ptr(p); + } } #endif // _RE2C_UTIL_SMART_PTR_ diff --git a/re2c/src/util/string_utils.h b/re2c/src/util/string_utils.h index bb7c0a81..53eceea6 100644 --- a/re2c/src/util/string_utils.h +++ b/re2c/src/util/string_utils.h @@ -8,29 +8,29 @@ namespace re2c { template void strrreplace( - std::string &s, - const std::string &s1, - const type_t &v) + std::string &s, + const std::string &s1, + const type_t &v) { - std::ostringstream sv; - sv << v; - const std::string &s2 = sv.str(); - const size_t l1 = s1.length(), l2 = s2.length(); - if (l1 != 0) { - std::string::size_type pos = s.find(s1); - while (pos != std::string::npos) { - s.replace(pos, l1, s2); - pos = s.find(s1, pos + l2); - } - } + std::ostringstream sv; + sv << v; + const std::string &s2 = sv.str(); + const size_t l1 = s1.length(), l2 = s2.length(); + if (l1 != 0) { + std::string::size_type pos = s.find(s1); + while (pos != std::string::npos) { + s.replace(pos, l1, s2); + pos = s.find(s1, pos + l2); + } + } } template static std::string to_string(const T &v) { - std::ostringstream s; - s << v; - return s.str(); + std::ostringstream s; + s << v; + return s.str(); } } // namespace re2c diff --git a/re2c/src/util/u32lim.h b/re2c/src/util/u32lim.h index d9c356cc..f9d03701 100644 --- a/re2c/src/util/u32lim.h +++ b/re2c/src/util/u32lim.h @@ -10,63 +10,63 @@ template class u32lim_t { - uint32_t value; - explicit u32lim_t (uint32_t x) - : value (x < LIMIT ? x : LIMIT) - {} - explicit u32lim_t (uint64_t x) - : value (x < LIMIT ? static_cast (x) : LIMIT) - {} + uint32_t value; + explicit u32lim_t (uint32_t x) + : value (x < LIMIT ? x : LIMIT) + {} + explicit u32lim_t (uint64_t x) + : value (x < LIMIT ? static_cast (x) : LIMIT) + {} public: - // implicit conversion is forbidden, because - // operands should be converted before operation: - // uint32_t x, y; ... u32lim_t z = x + y; - // will result in 32-bit addition and may overflow - // Don't export overloaded constructors: it breaks OS X builds - // ('size_t' causes resolution ambiguity) - static u32lim_t from32 (uint32_t x) { return u32lim_t(x); } - static u32lim_t from64 (uint64_t x) { return u32lim_t(x); } + // implicit conversion is forbidden, because + // operands should be converted before operation: + // uint32_t x, y; ... u32lim_t z = x + y; + // will result in 32-bit addition and may overflow + // Don't export overloaded constructors: it breaks OS X builds + // ('size_t' causes resolution ambiguity) + static u32lim_t from32 (uint32_t x) { return u32lim_t(x); } + static u32lim_t from64 (uint64_t x) { return u32lim_t(x); } - static u32lim_t limit () - { - return u32lim_t (LIMIT); - } + static u32lim_t limit () + { + return u32lim_t (LIMIT); + } - uint32_t uint32 () const - { - return value; - } + uint32_t uint32 () const + { + return value; + } - bool overflow () const - { - return value == LIMIT; - } + bool overflow () const + { + return value == LIMIT; + } - friend u32lim_t operator + (u32lim_t x, u32lim_t y) - { - const uint64_t z - = static_cast (x.value) - + static_cast (y.value); - return z < LIMIT - ? u32lim_t (z) - : u32lim_t (LIMIT); - } + friend u32lim_t operator + (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + + static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } - friend u32lim_t operator * (u32lim_t x, u32lim_t y) - { - const uint64_t z - = static_cast (x.value) - * static_cast (y.value); - return z < LIMIT - ? u32lim_t (z) - : u32lim_t (LIMIT); - } + friend u32lim_t operator * (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + * static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } - friend bool operator < (u32lim_t x, u32lim_t y) - { - return x.value < y.value; - } + friend bool operator < (u32lim_t x, u32lim_t y) + { + return x.value < y.value; + } }; #endif // _RE2C_UTIL_U32LIM_ diff --git a/re2c/src/util/uniq_vector.h b/re2c/src/util/uniq_vector.h index 76c0512f..3c4b27c0 100644 --- a/re2c/src/util/uniq_vector.h +++ b/re2c/src/util/uniq_vector.h @@ -12,33 +12,33 @@ namespace re2c template class uniq_vector_t { - typedef std::vector elems_t; - elems_t elems; + typedef std::vector elems_t; + elems_t elems; public: - uniq_vector_t () - : elems () - {} - size_t size () const - { - return elems.size (); - } - const value_t & operator [] (size_t i) const - { - return elems[i]; - } - size_t find_or_add (const value_t & v) - { - const size_t size = elems.size (); - for (size_t i = 0; i < size; ++i) - { - if (elems[i] == v) - { - return i; - } - } - elems.push_back (v); - return size; - } + uniq_vector_t () + : elems () + {} + size_t size () const + { + return elems.size (); + } + const value_t & operator [] (size_t i) const + { + return elems[i]; + } + size_t find_or_add (const value_t & v) + { + const size_t size = elems.size (); + for (size_t i = 0; i < size; ++i) + { + if (elems[i] == v) + { + return i; + } + } + elems.push_back (v); + return size; + } }; } // namespace re2c diff --git a/re2c/src/util/wrap_iter.h b/re2c/src/util/wrap_iter.h index 419c18f7..bf58a867 100644 --- a/re2c/src/util/wrap_iter.h +++ b/re2c/src/util/wrap_iter.h @@ -8,17 +8,17 @@ namespace re2c template class wrap_citer_t { - typedef typename container_t::const_iterator citer_t; - typedef const typename container_t::value_type* cpval_t; + typedef typename container_t::const_iterator citer_t; + typedef const typename container_t::value_type* cpval_t; - const citer_t beg; - const citer_t end; - citer_t cur; + const citer_t beg; + const citer_t end; + citer_t cur; public: - explicit wrap_citer_t(const container_t &c): beg(c.begin()), end(c.end()), cur(beg) {} - wrap_citer_t& operator++() { if (++cur == end) cur = beg; return *this; } - cpval_t operator->() const { return cur.operator->(); } + explicit wrap_citer_t(const container_t &c): beg(c.begin()), end(c.end()), cur(beg) {} + wrap_citer_t& operator++() { if (++cur == end) cur = beg; return *this; } + cpval_t operator->() const { return cur.operator->(); } }; } // namespace re2c