From 14bf192969a3256d93c185dd5b9f5a5f2d63d8de Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 12 May 2015 12:26:07 +0100 Subject: [PATCH] Split 'src/codegen/code.cc' into parts. First, DFA is built ('re2c::DFA::DFA'), then it must be prepared for code generation: some states must be split, backtracking points must be marked, etc. ('re2c::DFA::prepare'), then finally code can be generated ('re2c::DFA::genCode'). I haven't yet fully decided whether second stage (preparing) is closer to DFA construction in general (and thus should be moved to 'src/dfa') or to code generation (and should be moved to 'src/codegen'). Since it deals a lot with bitmaps, second variant will suffice for now. Perhaps later on I'll split preparation into general and codegen-related parts. --- re2c/Makefile.am | 5 +- re2c/src/codegen/{code.cc => dfa_emit.cc} | 454 ---------------------- re2c/src/codegen/dfa_prepare.cc | 296 ++++++++++++++ re2c/src/codegen/scc.cc | 134 +++++++ re2c/src/codegen/scc.h | 33 ++ 5 files changed, 467 insertions(+), 455 deletions(-) rename re2c/src/codegen/{code.cc => dfa_emit.cc} (74%) create mode 100644 re2c/src/codegen/dfa_prepare.cc create mode 100644 re2c/src/codegen/scc.cc create mode 100644 re2c/src/codegen/scc.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 382faa57..c3efd9d0 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -34,6 +34,7 @@ SRC_HDR = \ $(srcdir)/src/codegen/input_api.h \ $(srcdir)/src/codegen/output.h \ $(srcdir)/src/codegen/print.h \ + $(srcdir)/src/codegen/scc.h \ $(srcdir)/src/codegen/skeleton/skeleton.h \ $(srcdir)/src/dfa/encoding/enc.h \ $(srcdir)/src/dfa/encoding/range_suffix.h \ @@ -61,8 +62,9 @@ SRC_HDR = \ SRC = \ $(SRC_SCANNER) \ $(srcdir)/src/codegen/bitmap.cc \ - $(srcdir)/src/codegen/code.cc \ $(srcdir)/src/codegen/code_names.cc \ + $(srcdir)/src/codegen/dfa_emit.cc \ + $(srcdir)/src/codegen/dfa_prepare.cc \ $(srcdir)/src/codegen/go_construct.cc \ $(srcdir)/src/codegen/go_destruct.cc \ $(srcdir)/src/codegen/go_emit.cc \ @@ -70,6 +72,7 @@ SRC = \ $(srcdir)/src/codegen/input_api.cc \ $(srcdir)/src/codegen/output.cc \ $(srcdir)/src/codegen/print.cc \ + $(srcdir)/src/codegen/scc.cc \ $(srcdir)/src/codegen/skeleton/skeleton.cc \ $(srcdir)/src/codegen/translate.cc \ $(srcdir)/src/dfa/encoding/enc.cc \ diff --git a/re2c/src/codegen/code.cc b/re2c/src/codegen/dfa_emit.cc similarity index 74% rename from re2c/src/codegen/code.cc rename to re2c/src/codegen/dfa_emit.cc index 50ddcb3e..035566e3 100644 --- a/re2c/src/codegen/code.cc +++ b/re2c/src/codegen/dfa_emit.cc @@ -464,460 +464,6 @@ void State::emit(Output & output, uint32_t ind, bool &readCh, const std::string& action->emit(output, ind, readCh, condName); } -static uint32_t merge(Span *x0, State *fg, State *bg) -{ - Span *x = x0, *f = fg->go.span, *b = bg->go.span; - uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans; - State *prev = NULL, *to; - // NB: we assume both spans are for same range - - for (;;) - { - if (f->ub == b->ub) - { - to = f->to == b->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = f->ub; - ++x; - ++f; - --nf; - ++b; - --nb; - - if (nf == 0 && nb == 0) - { - return x - x0; - } - } - - while (f->ub < b->ub) - { - to = f->to == b->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = f->ub; - ++x; - ++f; - --nf; - } - - while (b->ub < f->ub) - { - to = b->to == f->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = b->ub; - ++x; - ++b; - --nb; - } - } -} - -static const uint32_t cInfinity = ~0u; - -class SCC -{ - -public: - State **top, **stk; - -public: - SCC(uint32_t); - ~SCC(); - void traverse(State*); - -#ifdef PEDANTIC -private: - SCC(const SCC& oth) - : top(oth.top) - , stk(oth.stk) - { - } - SCC& operator = (const SCC& oth) - { - new(this) SCC(oth); - return *this; - } -#endif -}; - -SCC::SCC(uint32_t size) - : top(new State * [size]) - , stk(top) -{ -} - -SCC::~SCC() -{ - delete [] stk; -} - -void SCC::traverse(State *x) -{ - *top = x; - uint32_t k = ++top - stk; - x->depth = k; - - for (uint32_t i = 0; i < x->go.nSpans; ++i) - { - State *y = x->go.span[i].to; - - if (y) - { - if (y->depth == 0) - { - traverse(y); - } - - if (y->depth < x->depth) - { - x->depth = y->depth; - } - } - } - - if (x->depth == k) - { - do - { - (*--top)->depth = cInfinity; - (*top)->link = x; - } - while (*top != x); - } -} - -static bool state_is_in_non_trivial_SCC(const State* s) -{ - - // does not link to self - if (s->link != s) - { - return true; - } - - // or exists i: (s->go.spans[i].to->link == s) - // - // Note: (s->go.spans[i].to == s) is allowed, corresponds to s - // looping back to itself. - // - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - const State* t = s->go.span[i].to; - - if (t && t->link == s) - { - return true; - } - } - // otherwise no - return false; -} - -static uint32_t maxDist(State *s) -{ - if (s->depth != cInfinity) - { - // Already calculated, just return result. - return s->depth; - } - uint32_t mm = 0; - - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - State *t = s->go.span[i].to; - - if (t) - { - uint32_t m = 1; - - if (!t->link) // marked as non-key state - { - if (t->depth == cInfinity) - { - t->depth = maxDist(t); - } - m += t->depth; - } - - if (m > mm) - { - mm = m; - } - } - } - - s->depth = mm; - return mm; -} - -static void calcDepth(State *head) -{ - State* s; - - // mark non-key states by s->link = NULL ; - for (s = head; s; s = s->next) - { - if (s != head && !state_is_in_non_trivial_SCC(s)) - { - s->link = NULL; - } - //else: key state, leave alone - } - - for (s = head; s; s = s->next) - { - s->depth = cInfinity; - } - - // calculate max number of transitions before guarantied to reach - // a key state. - for (s = head; s; s = s->next) - { - maxDist(s); - } -} - -void DFA::findSCCs() -{ - SCC scc(nStates); - State *s; - - for (s = head; s; s = s->next) - { - s->depth = 0; - s->link = NULL; - } - - for (s = head; s; s = s->next) - { - if (!s->depth) - { - scc.traverse(s); - } - } - - calcDepth(head); -} - -void DFA::split(State *s) -{ - State *move = new State; - (void) new Move(move); - addState(&s->next, move); - move->link = s->link; - move->rule = s->rule; - move->go = s->go; - s->rule = NULL; - s->go.nSpans = 1; - s->go.span = new Span[1]; - s->go.span[0].ub = ubChar; - s->go.span[0].to = move; -} - -void DFA::findBaseState() -{ - Span *span = new Span[ubChar - lbChar]; - - for (State *s = head; s; s = s->next) - { - if (!s->link) - { - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - State *to = s->go.span[i].to; - - if (to->isBase) - { - to = to->go.span[0].to; - uint32_t nSpans = merge(span, s, to); - - if (nSpans < s->go.nSpans) - { - delete [] s->go.span; - s->go.nSpans = nSpans; - s->go.span = new Span[nSpans]; - memcpy(s->go.span, span, nSpans*sizeof(Span)); - } - - break; - } - } - } - } - - delete [] span; -} - -void DFA::prepare(uint32_t & max_fill) -{ - State *s; - uint32_t i; - - bUsedYYBitmap = false; - - findSCCs(); - head->link = head; - - uint32_t nRules = 0; - - for (s = head; s; s = s->next) - { - s->depth = maxDist(s); - if (max_fill < s->depth) - { - max_fill = s->depth; - } - if (s->rule && s->rule->accept >= nRules) - { - nRules = s->rule->accept + 1; - } - } - - uint32_t nSaves = 0; - saves = new uint32_t[nRules]; - memset(saves, ~0, (nRules)*sizeof(*saves)); - - // mark backtracking points - bSaveOnHead = false; - - for (s = head; s; s = s->next) - { - if (s->rule) - { - for (i = 0; i < s->go.nSpans; ++i) - { - if (s->go.span[i].to && !s->go.span[i].to->rule) - { - delete s->action; - s->action = NULL; - - if (saves[s->rule->accept] == ~0u) - { - saves[s->rule->accept] = nSaves++; - } - - bSaveOnHead |= s == head; - (void) new Save(s, saves[s->rule->accept]); // sets s->action - } - } - } - } - - // insert actions - rules = new State * [nRules]; - - memset(rules, 0, (nRules)*sizeof(*rules)); - - State *accept = NULL; - Accept *accfixup = NULL; - - for (s = head; s; s = s->next) - { - State * ow; - - if (!s->rule) - { - ow = accept; - } - else - { - if (!rules[s->rule->accept]) - { - State *n = new State; - (void) new Rule(n, s->rule); - rules[s->rule->accept] = n; - addState(&s->next, n); - } - - ow = rules[s->rule->accept]; - } - - for (i = 0; i < s->go.nSpans; ++i) - { - if (!s->go.span[i].to) - { - if (!ow) - { - ow = accept = new State; - accfixup = new Accept(accept, nRules, saves, rules); - addState(&s->next, accept); - } - - s->go.span[i].to = ow; - } - } - } - - if (accfixup) - { - accfixup->genRuleMap(); - } - - // split ``base'' states into two parts - for (s = head; s; s = s->next) - { - s->isBase = false; - - if (s->link) - { - for (i = 0; i < s->go.nSpans; ++i) - { - if (s->go.span[i].to == s) - { - s->isBase = true; - split(s); - - if (bFlag) - { - BitMap::find(&s->next->go, s); - } - - s = s->next; - break; - } - } - } - } - - // find ``base'' state, if possible - findBaseState(); - - delete head->action; - head->action = NULL; - - for (s = head; s; s = s->next) - { - s->go.init (s); - } -} - void DFA::emit(Output & output, uint32_t& ind, const RegExpMap* specMap, const std::string& condName, bool isLastCond, bool& bPrologBrace) { OutputFile & o = output.source; diff --git a/re2c/src/codegen/dfa_prepare.cc b/re2c/src/codegen/dfa_prepare.cc new file mode 100644 index 00000000..2a703d37 --- /dev/null +++ b/re2c/src/codegen/dfa_prepare.cc @@ -0,0 +1,296 @@ +#include "src/codegen/bitmap.h" +#include "src/codegen/scc.h" +#include "src/dfa/dfa.h" + +namespace re2c { + +void DFA::findSCCs() +{ + SCC scc(nStates); + State *s; + + for (s = head; s; s = s->next) + { + s->depth = 0; + s->link = NULL; + } + + for (s = head; s; s = s->next) + { + if (!s->depth) + { + scc.traverse(s); + } + } + + calcDepth(head); +} + +void DFA::split(State *s) +{ + State *move = new State; + (void) new Move(move); + addState(&s->next, move); + move->link = s->link; + move->rule = s->rule; + move->go = s->go; + s->rule = NULL; + s->go.nSpans = 1; + s->go.span = new Span[1]; + s->go.span[0].ub = ubChar; + s->go.span[0].to = move; +} + +static uint32_t merge(Span *x0, State *fg, State *bg) +{ + Span *x = x0, *f = fg->go.span, *b = bg->go.span; + uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans; + State *prev = NULL, *to; + // NB: we assume both spans are for same range + + for (;;) + { + if (f->ub == b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + ++b; + --nb; + + if (nf == 0 && nb == 0) + { + return x - x0; + } + } + + while (f->ub < b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + } + + while (b->ub < f->ub) + { + to = b->to == f->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = b->ub; + ++x; + ++b; + --nb; + } + } +} + +void DFA::findBaseState() +{ + Span *span = new Span[ubChar - lbChar]; + + for (State *s = head; s; s = s->next) + { + if (!s->link) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *to = s->go.span[i].to; + + if (to->isBase) + { + to = to->go.span[0].to; + uint32_t nSpans = merge(span, s, to); + + if (nSpans < s->go.nSpans) + { + delete [] s->go.span; + s->go.nSpans = nSpans; + s->go.span = new Span[nSpans]; + memcpy(s->go.span, span, nSpans*sizeof(Span)); + } + + break; + } + } + } + } + + delete [] span; +} + +void DFA::prepare(uint32_t & max_fill) +{ + State *s; + uint32_t i; + + bUsedYYBitmap = false; + + findSCCs(); + head->link = head; + + uint32_t nRules = 0; + + for (s = head; s; s = s->next) + { + s->depth = maxDist(s); + if (max_fill < s->depth) + { + max_fill = s->depth; + } + if (s->rule && s->rule->accept >= nRules) + { + nRules = s->rule->accept + 1; + } + } + + uint32_t nSaves = 0; + saves = new uint32_t[nRules]; + memset(saves, ~0, (nRules)*sizeof(*saves)); + + // mark backtracking points + bSaveOnHead = false; + + for (s = head; s; s = s->next) + { + if (s->rule) + { + for (i = 0; i < s->go.nSpans; ++i) + { + if (s->go.span[i].to && !s->go.span[i].to->rule) + { + delete s->action; + s->action = NULL; + + if (saves[s->rule->accept] == ~0u) + { + saves[s->rule->accept] = nSaves++; + } + + bSaveOnHead |= s == head; + (void) new Save(s, saves[s->rule->accept]); // sets s->action + } + } + } + } + + // insert actions + rules = new State * [nRules]; + + memset(rules, 0, (nRules)*sizeof(*rules)); + + State *accept = NULL; + Accept *accfixup = NULL; + + for (s = head; s; s = s->next) + { + State * ow; + + if (!s->rule) + { + ow = accept; + } + else + { + if (!rules[s->rule->accept]) + { + State *n = new State; + (void) new Rule(n, s->rule); + rules[s->rule->accept] = n; + addState(&s->next, n); + } + + ow = rules[s->rule->accept]; + } + + for (i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + if (!ow) + { + ow = accept = new State; + accfixup = new Accept(accept, nRules, saves, rules); + addState(&s->next, accept); + } + + s->go.span[i].to = ow; + } + } + } + + if (accfixup) + { + accfixup->genRuleMap(); + } + + // split ``base'' states into two parts + for (s = head; s; s = s->next) + { + s->isBase = false; + + if (s->link) + { + for (i = 0; i < s->go.nSpans; ++i) + { + if (s->go.span[i].to == s) + { + s->isBase = true; + split(s); + + if (bFlag) + { + BitMap::find(&s->next->go, s); + } + + s = s->next; + break; + } + } + } + } + + // find ``base'' state, if possible + findBaseState(); + + delete head->action; + head->action = NULL; + + for (s = head; s; s = s->next) + { + s->go.init (s); + } +} + +} // namespace re2c diff --git a/re2c/src/codegen/scc.cc b/re2c/src/codegen/scc.cc new file mode 100644 index 00000000..ba79dbb0 --- /dev/null +++ b/re2c/src/codegen/scc.cc @@ -0,0 +1,134 @@ +#include "src/codegen/scc.h" +#include "src/dfa/dfa.h" + +namespace re2c { + +SCC::SCC (uint32_t size) + : top (new State * [size]) + , stk (top) +{} + +SCC::~SCC () +{ + delete [] stk; +} + +void SCC::traverse (State * x) +{ + *top = x; + uint32_t k = ++top - stk; + x->depth = k; + + for (uint32_t i = 0; i < x->go.nSpans; ++i) + { + State *y = x->go.span[i].to; + if (y) + { + if (y->depth == 0) + { + traverse(y); + } + if (y->depth < x->depth) + { + x->depth = y->depth; + } + } + } + + if (x->depth == k) + { + do + { + (*--top)->depth = cInfinity; + (*top)->link = x; + } + while (*top != x); + } +} + +bool state_is_in_non_trivial_SCC (const State * s) +{ + // does not link to self + if (s->link != s) + { + return true; + } + + // or exists i: (s->go.spans[i].to->link == s) + // + // Note: (s->go.spans[i].to == s) is allowed, corresponds to s + // looping back to itself. + // + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + const State* t = s->go.span[i].to; + if (t && t->link == s) + { + return true; + } + } + // otherwise no + return false; +} + +uint32_t maxDist (State * s) +{ + if (s->depth != cInfinity) + { + // Already calculated, just return result. + return s->depth; + } + uint32_t mm = 0; + + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *t = s->go.span[i].to; + if (t) + { + uint32_t m = 1; + if (!t->link) // marked as non-key state + { + if (t->depth == cInfinity) + { + t->depth = maxDist(t); + } + m += t->depth; + } + if (m > mm) + { + mm = m; + } + } + } + + s->depth = mm; + return mm; +} + +void calcDepth (State * head) +{ + State * s; + + // mark non-key states by s->link = NULL ; + for (s = head; s; s = s->next) + { + if (s != head && !state_is_in_non_trivial_SCC(s)) + { + s->link = NULL; + } + //else: key state, leave alone + } + for (s = head; s; s = s->next) + { + s->depth = cInfinity; + } + + // calculate max number of transitions before guarantied to reach + // a key state. + for (s = head; s; s = s->next) + { + maxDist(s); + } +} + +} // namespace re2c diff --git a/re2c/src/codegen/scc.h b/re2c/src/codegen/scc.h new file mode 100644 index 00000000..a08966f4 --- /dev/null +++ b/re2c/src/codegen/scc.h @@ -0,0 +1,33 @@ +#ifndef __SCC__ +#define __SCC__ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class State; + +static const uint32_t cInfinity = ~0u; + +class SCC +{ +public: + State ** top; + State ** stk; + + SCC (uint32_t); + ~SCC (); + void traverse (State *); + +private: + SCC (const SCC &); + SCC & operator = (const SCC &); +}; + +bool state_is_in_non_trivial_SCC (const State * s); +uint32_t maxDist (State * s); +void calcDepth (State * head); + +} // namespace re2c + +#endif // __SCC__ -- 2.40.0