]> granicus.if.org Git - re2c/commitdiff
Split 'src/codegen/code.cc' into parts.
authorUlya Trofimovich <skvadrik@gmail.com>
Tue, 12 May 2015 11:26:07 +0000 (12:26 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Tue, 12 May 2015 11:26:07 +0000 (12:26 +0100)
First, DFA is built ('re2c::DFA::DFA'), then it must be prepared
for code generation: some states must be split, backtracking points
must be marked, etc. ('re2c::DFA::prepare'), then finally code
can be generated ('re2c::DFA::genCode').

I haven't yet fully decided whether second stage (preparing) is
closer to DFA construction in general (and thus should be moved to
'src/dfa') or to code generation (and should be moved to 'src/codegen').
Since it deals a lot with bitmaps, second variant will suffice for
now. Perhaps later on I'll split preparation into general and
codegen-related parts.

re2c/Makefile.am
re2c/src/codegen/dfa_emit.cc [moved from re2c/src/codegen/code.cc with 74% similarity]
re2c/src/codegen/dfa_prepare.cc [new file with mode: 0644]
re2c/src/codegen/scc.cc [new file with mode: 0644]
re2c/src/codegen/scc.h [new file with mode: 0644]

index 382faa57879b84a2124d6c9d03435c3cb0daebcd..c3efd9d07c023a4270bfb8f572cc5f415d11c3d3 100644 (file)
@@ -34,6 +34,7 @@ SRC_HDR = \
        $(srcdir)/src/codegen/input_api.h \
        $(srcdir)/src/codegen/output.h \
        $(srcdir)/src/codegen/print.h \
+       $(srcdir)/src/codegen/scc.h \
        $(srcdir)/src/codegen/skeleton/skeleton.h \
        $(srcdir)/src/dfa/encoding/enc.h \
        $(srcdir)/src/dfa/encoding/range_suffix.h \
@@ -61,8 +62,9 @@ SRC_HDR = \
 SRC = \
        $(SRC_SCANNER) \
        $(srcdir)/src/codegen/bitmap.cc \
-       $(srcdir)/src/codegen/code.cc \
        $(srcdir)/src/codegen/code_names.cc \
+       $(srcdir)/src/codegen/dfa_emit.cc \
+       $(srcdir)/src/codegen/dfa_prepare.cc \
        $(srcdir)/src/codegen/go_construct.cc \
        $(srcdir)/src/codegen/go_destruct.cc \
        $(srcdir)/src/codegen/go_emit.cc \
@@ -70,6 +72,7 @@ SRC = \
        $(srcdir)/src/codegen/input_api.cc \
        $(srcdir)/src/codegen/output.cc \
        $(srcdir)/src/codegen/print.cc \
+       $(srcdir)/src/codegen/scc.cc \
        $(srcdir)/src/codegen/skeleton/skeleton.cc \
        $(srcdir)/src/codegen/translate.cc \
        $(srcdir)/src/dfa/encoding/enc.cc \
similarity index 74%
rename from re2c/src/codegen/code.cc
rename to re2c/src/codegen/dfa_emit.cc
index 50ddcb3e15347765effb27ae588d31b53ea3d94a..035566e3868d4d9d64e0c224ef645f293a109ce1 100644 (file)
@@ -464,460 +464,6 @@ void State::emit(Output & output, uint32_t ind, bool &readCh, const std::string&
        action->emit(output, ind, readCh, condName);
 }
 
-static uint32_t merge(Span *x0, State *fg, State *bg)
-{
-       Span *x = x0, *f = fg->go.span, *b = bg->go.span;
-       uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans;
-       State *prev = NULL, *to;
-       // NB: we assume both spans are for same range
-
-       for (;;)
-       {
-               if (f->ub == b->ub)
-               {
-                       to = f->to == b->to ? bg : f->to;
-
-                       if (to == prev)
-                       {
-                               --x;
-                       }
-                       else
-                       {
-                               x->to = prev = to;
-                       }
-
-                       x->ub = f->ub;
-                       ++x;
-                       ++f;
-                       --nf;
-                       ++b;
-                       --nb;
-
-                       if (nf == 0 && nb == 0)
-                       {
-                               return x - x0;
-                       }
-               }
-
-               while (f->ub < b->ub)
-               {
-                       to = f->to == b->to ? bg : f->to;
-
-                       if (to == prev)
-                       {
-                               --x;
-                       }
-                       else
-                       {
-                               x->to = prev = to;
-                       }
-
-                       x->ub = f->ub;
-                       ++x;
-                       ++f;
-                       --nf;
-               }
-
-               while (b->ub < f->ub)
-               {
-                       to = b->to == f->to ? bg : f->to;
-
-                       if (to == prev)
-                       {
-                               --x;
-                       }
-                       else
-                       {
-                               x->to = prev = to;
-                       }
-
-                       x->ub = b->ub;
-                       ++x;
-                       ++b;
-                       --nb;
-               }
-       }
-}
-
-static const uint32_t cInfinity = ~0u;
-
-class SCC
-{
-
-public:
-       State   **top, **stk;
-
-public:
-       SCC(uint32_t);
-       ~SCC();
-       void traverse(State*);
-
-#ifdef PEDANTIC
-private:
-       SCC(const SCC& oth)
-               : top(oth.top)
-               , stk(oth.stk)
-       {
-       }
-       SCC& operator = (const SCC& oth)
-       {
-               new(this) SCC(oth);
-               return *this;
-       }
-#endif
-};
-
-SCC::SCC(uint32_t size)
-       : top(new State * [size])
-       , stk(top)
-{
-}
-
-SCC::~SCC()
-{
-       delete [] stk;
-}
-
-void SCC::traverse(State *x)
-{
-       *top = x;
-       uint32_t k = ++top - stk;
-       x->depth = k;
-
-       for (uint32_t i = 0; i < x->go.nSpans; ++i)
-       {
-               State *y = x->go.span[i].to;
-
-               if (y)
-               {
-                       if (y->depth == 0)
-                       {
-                               traverse(y);
-                       }
-
-                       if (y->depth < x->depth)
-                       {
-                               x->depth = y->depth;
-                       }
-               }
-       }
-
-       if (x->depth == k)
-       {
-               do
-               {
-                       (*--top)->depth = cInfinity;
-                       (*top)->link = x;
-               }
-               while (*top != x);
-       }
-}
-
-static bool state_is_in_non_trivial_SCC(const State* s)
-{
-       
-       // does not link to self
-       if (s->link != s)
-       {
-               return true;
-       }
-       
-       // or exists i: (s->go.spans[i].to->link == s)
-       //
-       // Note: (s->go.spans[i].to == s) is allowed, corresponds to s
-       // looping back to itself.
-       //
-       for (uint32_t i = 0; i < s->go.nSpans; ++i)
-       {
-               const State* t = s->go.span[i].to;
-       
-               if (t && t->link == s)
-               {
-                       return true;
-               }
-       }
-       // otherwise no
-       return false;
-}
-
-static uint32_t maxDist(State *s)
-{
-       if (s->depth != cInfinity)
-       {
-               // Already calculated, just return result.
-       return s->depth;
-       }
-       uint32_t mm = 0;
-
-       for (uint32_t i = 0; i < s->go.nSpans; ++i)
-       {
-               State *t = s->go.span[i].to;
-
-               if (t)
-               {
-                       uint32_t m = 1;
-
-                       if (!t->link) // marked as non-key state
-                       {
-                               if (t->depth == cInfinity)
-                               {
-                                       t->depth = maxDist(t);
-                               }
-                               m += t->depth;
-                       }
-
-                       if (m > mm)
-                       {
-                               mm = m;
-                       }
-               }
-       }
-
-       s->depth = mm;
-       return mm;
-}
-
-static void calcDepth(State *head)
-{
-       State* s;
-
-       // mark non-key states by s->link = NULL ;
-       for (s = head; s; s = s->next)
-       {
-               if (s != head && !state_is_in_non_trivial_SCC(s))
-               {
-                       s->link = NULL;
-               }
-               //else: key state, leave alone
-       }
-       
-       for (s = head; s; s = s->next)
-       {
-               s->depth = cInfinity;
-       }
-
-       // calculate max number of transitions before guarantied to reach
-       // a key state.
-       for (s = head; s; s = s->next)
-       {
-               maxDist(s);
-       }
-}
-
-void DFA::findSCCs()
-{
-       SCC scc(nStates);
-       State *s;
-
-       for (s = head; s; s = s->next)
-       {
-               s->depth = 0;
-               s->link = NULL;
-       }
-
-       for (s = head; s; s = s->next)
-       {
-               if (!s->depth)
-               {
-                       scc.traverse(s);
-               }
-       }
-
-       calcDepth(head);
-}
-
-void DFA::split(State *s)
-{
-       State *move = new State;
-       (void) new Move(move);
-       addState(&s->next, move);
-       move->link = s->link;
-       move->rule = s->rule;
-       move->go = s->go;
-       s->rule = NULL;
-       s->go.nSpans = 1;
-       s->go.span = new Span[1];
-       s->go.span[0].ub = ubChar;
-       s->go.span[0].to = move;
-}
-
-void DFA::findBaseState()
-{
-       Span *span = new Span[ubChar - lbChar];
-
-       for (State *s = head; s; s = s->next)
-       {
-               if (!s->link)
-               {
-                       for (uint32_t i = 0; i < s->go.nSpans; ++i)
-                       {
-                               State *to = s->go.span[i].to;
-
-                               if (to->isBase)
-                               {
-                                       to = to->go.span[0].to;
-                                       uint32_t nSpans = merge(span, s, to);
-
-                                       if (nSpans < s->go.nSpans)
-                                       {
-                                               delete [] s->go.span;
-                                               s->go.nSpans = nSpans;
-                                               s->go.span = new Span[nSpans];
-                                               memcpy(s->go.span, span, nSpans*sizeof(Span));
-                                       }
-
-                                       break;
-                               }
-                       }
-               }
-       }
-
-       delete [] span;
-}
-
-void DFA::prepare(uint32_t & max_fill)
-{
-       State *s;
-       uint32_t i;
-
-       bUsedYYBitmap = false;
-
-       findSCCs();
-       head->link = head;
-
-       uint32_t nRules = 0;
-
-       for (s = head; s; s = s->next)
-       {
-               s->depth = maxDist(s);
-               if (max_fill < s->depth)
-               {
-                       max_fill = s->depth;
-               }
-               if (s->rule && s->rule->accept >= nRules)
-               {
-                       nRules = s->rule->accept + 1;
-               }
-       }
-
-       uint32_t nSaves = 0;
-       saves = new uint32_t[nRules];
-       memset(saves, ~0, (nRules)*sizeof(*saves));
-
-       // mark backtracking points
-       bSaveOnHead = false;
-
-       for (s = head; s; s = s->next)
-       {
-               if (s->rule)
-               {
-                       for (i = 0; i < s->go.nSpans; ++i)
-                       {
-                               if (s->go.span[i].to && !s->go.span[i].to->rule)
-                               {
-                                       delete s->action;
-                                       s->action = NULL;
-
-                                       if (saves[s->rule->accept] == ~0u)
-                                       {
-                                               saves[s->rule->accept] = nSaves++;
-                                       }
-
-                                       bSaveOnHead |= s == head;
-                                       (void) new Save(s, saves[s->rule->accept]); // sets s->action
-                               }
-                       }
-               }
-       }
-
-       // insert actions
-       rules = new State * [nRules];
-
-       memset(rules, 0, (nRules)*sizeof(*rules));
-
-       State *accept = NULL;
-       Accept *accfixup = NULL;
-
-       for (s = head; s; s = s->next)
-       {
-               State * ow;
-
-               if (!s->rule)
-               {
-                       ow = accept;
-               }
-               else
-               {
-                       if (!rules[s->rule->accept])
-                       {
-                               State *n = new State;
-                               (void) new Rule(n, s->rule);
-                               rules[s->rule->accept] = n;
-                               addState(&s->next, n);
-                       }
-
-                       ow = rules[s->rule->accept];
-               }
-
-               for (i = 0; i < s->go.nSpans; ++i)
-               {
-                       if (!s->go.span[i].to)
-                       {
-                               if (!ow)
-                               {
-                                       ow = accept = new State;
-                                       accfixup = new Accept(accept, nRules, saves, rules);
-                                       addState(&s->next, accept);
-                               }
-
-                               s->go.span[i].to = ow;
-                       }
-               }
-       }
-       
-       if (accfixup)
-       {
-               accfixup->genRuleMap();
-       }
-
-       // split ``base'' states into two parts
-       for (s = head; s; s = s->next)
-       {
-               s->isBase = false;
-
-               if (s->link)
-               {
-                       for (i = 0; i < s->go.nSpans; ++i)
-                       {
-                               if (s->go.span[i].to == s)
-                               {
-                                       s->isBase = true;
-                                       split(s);
-
-                                       if (bFlag)
-                                       {
-                                               BitMap::find(&s->next->go, s);
-                                       }
-
-                                       s = s->next;
-                                       break;
-                               }
-                       }
-               }
-       }
-
-       // find ``base'' state, if possible
-       findBaseState();
-
-       delete head->action;
-       head->action = NULL;
-
-       for (s = head; s; s = s->next)
-       {
-               s->go.init (s);
-       }
-}
-
 void DFA::emit(Output & output, uint32_t& ind, const RegExpMap* specMap, const std::string& condName, bool isLastCond, bool& bPrologBrace)
 {
        OutputFile & o = output.source;
diff --git a/re2c/src/codegen/dfa_prepare.cc b/re2c/src/codegen/dfa_prepare.cc
new file mode 100644 (file)
index 0000000..2a703d3
--- /dev/null
@@ -0,0 +1,296 @@
+#include "src/codegen/bitmap.h"
+#include "src/codegen/scc.h"
+#include "src/dfa/dfa.h"
+
+namespace re2c {
+
+void DFA::findSCCs()
+{
+       SCC scc(nStates);
+       State *s;
+
+       for (s = head; s; s = s->next)
+       {
+               s->depth = 0;
+               s->link = NULL;
+       }
+
+       for (s = head; s; s = s->next)
+       {
+               if (!s->depth)
+               {
+                       scc.traverse(s);
+               }
+       }
+
+       calcDepth(head);
+}
+
+void DFA::split(State *s)
+{
+       State *move = new State;
+       (void) new Move(move);
+       addState(&s->next, move);
+       move->link = s->link;
+       move->rule = s->rule;
+       move->go = s->go;
+       s->rule = NULL;
+       s->go.nSpans = 1;
+       s->go.span = new Span[1];
+       s->go.span[0].ub = ubChar;
+       s->go.span[0].to = move;
+}
+
+static uint32_t merge(Span *x0, State *fg, State *bg)
+{
+       Span *x = x0, *f = fg->go.span, *b = bg->go.span;
+       uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans;
+       State *prev = NULL, *to;
+       // NB: we assume both spans are for same range
+
+       for (;;)
+       {
+               if (f->ub == b->ub)
+               {
+                       to = f->to == b->to ? bg : f->to;
+
+                       if (to == prev)
+                       {
+                               --x;
+                       }
+                       else
+                       {
+                               x->to = prev = to;
+                       }
+
+                       x->ub = f->ub;
+                       ++x;
+                       ++f;
+                       --nf;
+                       ++b;
+                       --nb;
+
+                       if (nf == 0 && nb == 0)
+                       {
+                               return x - x0;
+                       }
+               }
+
+               while (f->ub < b->ub)
+               {
+                       to = f->to == b->to ? bg : f->to;
+
+                       if (to == prev)
+                       {
+                               --x;
+                       }
+                       else
+                       {
+                               x->to = prev = to;
+                       }
+
+                       x->ub = f->ub;
+                       ++x;
+                       ++f;
+                       --nf;
+               }
+
+               while (b->ub < f->ub)
+               {
+                       to = b->to == f->to ? bg : f->to;
+
+                       if (to == prev)
+                       {
+                               --x;
+                       }
+                       else
+                       {
+                               x->to = prev = to;
+                       }
+
+                       x->ub = b->ub;
+                       ++x;
+                       ++b;
+                       --nb;
+               }
+       }
+}
+
+void DFA::findBaseState()
+{
+       Span *span = new Span[ubChar - lbChar];
+
+       for (State *s = head; s; s = s->next)
+       {
+               if (!s->link)
+               {
+                       for (uint32_t i = 0; i < s->go.nSpans; ++i)
+                       {
+                               State *to = s->go.span[i].to;
+
+                               if (to->isBase)
+                               {
+                                       to = to->go.span[0].to;
+                                       uint32_t nSpans = merge(span, s, to);
+
+                                       if (nSpans < s->go.nSpans)
+                                       {
+                                               delete [] s->go.span;
+                                               s->go.nSpans = nSpans;
+                                               s->go.span = new Span[nSpans];
+                                               memcpy(s->go.span, span, nSpans*sizeof(Span));
+                                       }
+
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       delete [] span;
+}
+
+void DFA::prepare(uint32_t & max_fill)
+{
+       State *s;
+       uint32_t i;
+
+       bUsedYYBitmap = false;
+
+       findSCCs();
+       head->link = head;
+
+       uint32_t nRules = 0;
+
+       for (s = head; s; s = s->next)
+       {
+               s->depth = maxDist(s);
+               if (max_fill < s->depth)
+               {
+                       max_fill = s->depth;
+               }
+               if (s->rule && s->rule->accept >= nRules)
+               {
+                       nRules = s->rule->accept + 1;
+               }
+       }
+
+       uint32_t nSaves = 0;
+       saves = new uint32_t[nRules];
+       memset(saves, ~0, (nRules)*sizeof(*saves));
+
+       // mark backtracking points
+       bSaveOnHead = false;
+
+       for (s = head; s; s = s->next)
+       {
+               if (s->rule)
+               {
+                       for (i = 0; i < s->go.nSpans; ++i)
+                       {
+                               if (s->go.span[i].to && !s->go.span[i].to->rule)
+                               {
+                                       delete s->action;
+                                       s->action = NULL;
+
+                                       if (saves[s->rule->accept] == ~0u)
+                                       {
+                                               saves[s->rule->accept] = nSaves++;
+                                       }
+
+                                       bSaveOnHead |= s == head;
+                                       (void) new Save(s, saves[s->rule->accept]); // sets s->action
+                               }
+                       }
+               }
+       }
+
+       // insert actions
+       rules = new State * [nRules];
+
+       memset(rules, 0, (nRules)*sizeof(*rules));
+
+       State *accept = NULL;
+       Accept *accfixup = NULL;
+
+       for (s = head; s; s = s->next)
+       {
+               State * ow;
+
+               if (!s->rule)
+               {
+                       ow = accept;
+               }
+               else
+               {
+                       if (!rules[s->rule->accept])
+                       {
+                               State *n = new State;
+                               (void) new Rule(n, s->rule);
+                               rules[s->rule->accept] = n;
+                               addState(&s->next, n);
+                       }
+
+                       ow = rules[s->rule->accept];
+               }
+
+               for (i = 0; i < s->go.nSpans; ++i)
+               {
+                       if (!s->go.span[i].to)
+                       {
+                               if (!ow)
+                               {
+                                       ow = accept = new State;
+                                       accfixup = new Accept(accept, nRules, saves, rules);
+                                       addState(&s->next, accept);
+                               }
+
+                               s->go.span[i].to = ow;
+                       }
+               }
+       }
+       
+       if (accfixup)
+       {
+               accfixup->genRuleMap();
+       }
+
+       // split ``base'' states into two parts
+       for (s = head; s; s = s->next)
+       {
+               s->isBase = false;
+
+               if (s->link)
+               {
+                       for (i = 0; i < s->go.nSpans; ++i)
+                       {
+                               if (s->go.span[i].to == s)
+                               {
+                                       s->isBase = true;
+                                       split(s);
+
+                                       if (bFlag)
+                                       {
+                                               BitMap::find(&s->next->go, s);
+                                       }
+
+                                       s = s->next;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       // find ``base'' state, if possible
+       findBaseState();
+
+       delete head->action;
+       head->action = NULL;
+
+       for (s = head; s; s = s->next)
+       {
+               s->go.init (s);
+       }
+}
+
+} // namespace re2c
diff --git a/re2c/src/codegen/scc.cc b/re2c/src/codegen/scc.cc
new file mode 100644 (file)
index 0000000..ba79dbb
--- /dev/null
@@ -0,0 +1,134 @@
+#include "src/codegen/scc.h"
+#include "src/dfa/dfa.h"
+
+namespace re2c {
+
+SCC::SCC (uint32_t size)
+       : top (new State * [size])
+       , stk (top)
+{}
+
+SCC::~SCC ()
+{
+       delete [] stk;
+}
+
+void SCC::traverse (State * x)
+{
+       *top = x;
+       uint32_t k = ++top - stk;
+       x->depth = k;
+
+       for (uint32_t i = 0; i < x->go.nSpans; ++i)
+       {
+               State *y = x->go.span[i].to;
+               if (y)
+               {
+                       if (y->depth == 0)
+                       {
+                               traverse(y);
+                       }
+                       if (y->depth < x->depth)
+                       {
+                               x->depth = y->depth;
+                       }
+               }
+       }
+
+       if (x->depth == k)
+       {
+               do
+               {
+                       (*--top)->depth = cInfinity;
+                       (*top)->link = x;
+               }
+               while (*top != x);
+       }
+}
+
+bool state_is_in_non_trivial_SCC (const State * s)
+{
+       // does not link to self
+       if (s->link != s)
+       {
+               return true;
+       }
+
+       // or exists i: (s->go.spans[i].to->link == s)
+       //
+       // Note: (s->go.spans[i].to == s) is allowed, corresponds to s
+       // looping back to itself.
+       //
+       for (uint32_t i = 0; i < s->go.nSpans; ++i)
+       {
+               const State* t = s->go.span[i].to;
+               if (t && t->link == s)
+               {
+                       return true;
+               }
+       }
+       // otherwise no
+       return false;
+}
+
+uint32_t maxDist (State * s)
+{
+       if (s->depth != cInfinity)
+       {
+               // Already calculated, just return result.
+               return s->depth;
+       }
+       uint32_t mm = 0;
+
+       for (uint32_t i = 0; i < s->go.nSpans; ++i)
+       {
+               State *t = s->go.span[i].to;
+               if (t)
+               {
+                       uint32_t m = 1;
+                       if (!t->link) // marked as non-key state
+                       {
+                               if (t->depth == cInfinity)
+                               {
+                                       t->depth = maxDist(t);
+                               }
+                               m += t->depth;
+                       }
+                       if (m > mm)
+                       {
+                               mm = m;
+                       }
+               }
+       }
+
+       s->depth = mm;
+       return mm;
+}
+
+void calcDepth (State * head)
+{
+       State * s;
+
+       // mark non-key states by s->link = NULL ;
+       for (s = head; s; s = s->next)
+       {
+               if (s != head && !state_is_in_non_trivial_SCC(s))
+               {
+                       s->link = NULL;
+               }
+               //else: key state, leave alone
+       }
+       for (s = head; s; s = s->next)
+       {
+               s->depth = cInfinity;
+       }
+
+       // calculate max number of transitions before guarantied to reach
+       // a key state.
+       for (s = head; s; s = s->next)
+       {
+               maxDist(s);
+       }
+}
+
+} // namespace re2c
diff --git a/re2c/src/codegen/scc.h b/re2c/src/codegen/scc.h
new file mode 100644 (file)
index 0000000..a08966f
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef __SCC__
+#define __SCC__
+
+#include "src/util/c99_stdint.h"
+
+namespace re2c {
+
+class State;
+
+static const uint32_t cInfinity = ~0u;
+
+class SCC
+{
+public:
+       State ** top;
+       State ** stk;
+
+       SCC (uint32_t);
+       ~SCC ();
+       void traverse (State *);
+
+private:
+       SCC (const SCC &);
+       SCC & operator = (const SCC &);
+};
+
+bool state_is_in_non_trivial_SCC (const State * s);
+uint32_t maxDist (State * s);
+void calcDepth (State * head);
+
+} // namespace re2c
+
+#endif // __SCC__