No serious changes intended (mostly cleanup and comments).
The underlying algorithm for finding strongly connected components
(SCC) remains the same: it's a slightly modified Tarjan's algorithm.
We now mark non-YYFILL states by setting YYFILL argument to zero,
which is only logical: why would anyone call YYFILL to provide zero
characters. In fact, re2c didn't generate 'YYFILL(0)' call itself,
but some remnants of YYFILL did remain (which caused changes in tests).
src/ir/bytecode/nfa.h \
src/ir/adfa/action.h \
src/ir/adfa/adfa.h \
- src/ir/adfa/scc.h \
src/ir/dfa/dfa.h \
src/ir/regexp/encoding/case.h \
src/ir/regexp/encoding/enc.h \
src/ir/nfa/split.cc \
src/ir/adfa/adfa.cc \
src/ir/adfa/prepare.cc \
- src/ir/adfa/scc.cc \
src/ir/dfa/determinization.cc \
+ src/ir/dfa/fillpoints.cc \
src/ir/dfa/minimization.cc \
src/ir/regexp/display.cc \
src/ir/regexp/encoding/enc.cc \
class label_t;
-static void need (OutputFile & o, uint32_t ind, bool & readCh, uint32_t n, bool bSetMarker);
+static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker);
static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s);
static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set<label_t> & used_labels);
static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept);
static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r);
static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept);
static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton);
-static void genYYFill (OutputFile & o, uint32_t need);
+static void genYYFill (OutputFile & o, size_t need);
static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond);
static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex);
const bool read_ahead = s
&& s->next
&& s->next->action.type != Action::RULE;
- if (s->link)
+ if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip (ind));
}
readCh = false;
}
- if (s->link)
+ if (s->fill != 0)
{
- need(o, ind, readCh, s->depth, false);
+ need(o, ind, readCh, s->fill, false);
}
}
if (used_labels.count(s->label))
{
- if (s->link)
+ if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip (ind));
}
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n");
}
- if (s->link)
+ if (s->fill != 0)
{
- need(o, ind, readCh, s->depth, initial.setMarker);
+ need(o, ind, readCh, s->fill, initial.setMarker);
}
else
{
o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n");
}
- if (s->link)
+ if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip_backup (ind));
- need(o, ind, readCh, s->depth, false);
+ need(o, ind, readCh, s->fill, false);
}
else
{
}
}
-void need (OutputFile & o, uint32_t ind, bool & readCh, uint32_t n, bool bSetMarker)
+void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker)
{
if (opts->target == opt_t::DOT)
{
}
}
-void genYYFill (OutputFile & o, uint32_t need)
+void genYYFill (OutputFile & o, size_t need)
{
o.wstring(replaceParam (opts->fill, opts->fill_arg, need));
if (!opts->fill_naked)
{
if (opts->fill_arg_use)
{
- o.ws("(").wu32(need).ws(")");
+ o.ws("(").wu64(need).ws(")");
}
o.ws(";");
}
: expr_lessthan (1);
}
-std::string InputAPI::expr_lessthan (uint32_t n) const
+std::string InputAPI::expr_lessthan (size_t n) const
{
std::ostringstream s;
switch (type_)
std::string stmt_backup_peek (uint32_t ind) const;
std::string stmt_skip_backup_peek (uint32_t ind) const;
std::string expr_lessthan_one () const;
- std::string expr_lessthan (uint32_t n) const;
+ std::string expr_lessthan (size_t n) const;
};
} // end namespace re2c
void OutputFile::emit
( const std::vector<std::string> & types
- , uint32_t max_fill
+ , size_t max_fill
)
{
if (file != NULL)
}
}
-void output_yymaxfill (std::ostream & o, uint32_t max_fill)
+void output_yymaxfill (std::ostream & o, size_t max_fill)
{
o << "#define YYMAXFILL " << max_fill << "\n";
}
void set_block_line (uint32_t l);
uint32_t get_block_line () const;
- void emit (const std::vector<std::string> & types, uint32_t max_fill);
+ void emit (const std::vector<std::string> & types, size_t max_fill);
FORBID_COPY (OutputFile);
};
HeaderFile header;
std::vector<std::string> types;
std::set<std::string> skeletons;
- uint32_t max_fill;
+ size_t max_fill;
Output (const char * source_name, const char * header_name);
~Output ();
void output_types (std::ostream &, uint32_t, const std::vector<std::string> &);
void output_version_time (std::ostream &);
void output_yyaccept_init (std::ostream &, uint32_t, bool);
-void output_yymaxfill (std::ostream &, uint32_t);
+void output_yymaxfill (std::ostream &, size_t);
// helpers
std::string output_get_state ();
DFA::DFA
( const dfa_t &dfa
+ , const std::vector<size_t> &fill
, Skeleton *skel
, const charset_t &charset
, const std::string &n
dfa_state_t *t = dfa.states[i];
s->isPreCtxt = t->ctx;
s->rule = t->rule;
+ s->fill = fill[i];
s->go.span = allocate<Span>(nchars);
uint32_t j = 0;
for (uint32_t c = 0; c < nchars; ++j)
label_t label;
RuleOp * rule;
State * next;
- State * link;
- uint32_t depth; // for finding SCCs
+ size_t fill;
bool isPreCtxt;
bool isBase;
: label (label_t::first ())
, rule (NULL)
, next (0)
- , link (NULL)
- , depth (0)
+ , fill (0)
, isPreCtxt (false)
, isBase (false)
, go ()
State * head;
// statistics
- uint32_t max_fill;
+ size_t max_fill;
bool need_backup;
bool need_backupctx;
bool need_accept;
public:
DFA ( const dfa_t &dfa
+ , const std::vector<size_t> &fill
, Skeleton *skel
, const charset_t &charset
, const std::string &n
private:
void addState(State*, State *);
void split (State *);
- void findSCCs ();
void findBaseState ();
void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start) const;
void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
#include "src/globals.h"
#include "src/ir/adfa/action.h"
#include "src/ir/adfa/adfa.h"
-#include "src/ir/adfa/scc.h"
#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/rule_rank.h"
#include "src/util/allocate.h"
namespace re2c {
-void DFA::findSCCs()
-{
- SCC scc(nStates);
- State *s;
-
- for (s = head; s; s = s->next)
- {
- s->depth = 0;
- s->link = NULL;
- }
-
- for (s = head; s; s = s->next)
- {
- if (!s->depth)
- {
- scc.traverse(s);
- }
- }
-
- calcDepth(head);
-}
-
void DFA::split(State *s)
{
State *move = new State;
addState(move, s);
move->action.set_move ();
- move->link = s->link;
move->rule = s->rule;
+ move->fill = s->fill;
move->go = s->go;
s->rule = NULL;
s->go.nSpans = 1;
for (State *s = head; s; s = s->next)
{
- if (!s->link)
+ if (s->fill == 0)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
{
bUsedYYBitmap = false;
- findSCCs();
- head->link = head;
-
// create rule states
std::map<rule_rank_t, State *> rules;
for (State * s = head; s; s = s->next)
{
s->isBase = false;
- if (s->link)
+ if (s->fill != 0)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
max_fill = 0;
for (State * s = head; s; s = s->next)
{
- s->depth = maxDist(s);
- if (max_fill < s->depth)
+ if (max_fill < s->fill)
{
- max_fill = s->depth;
+ max_fill = s->fill;
}
}
+++ /dev/null
-#include "src/codegen/go.h"
-#include "src/ir/adfa/adfa.h"
-#include "src/ir/adfa/scc.h"
-
-namespace re2c {
-
-SCC::SCC (uint32_t size)
- : top (new State * [size])
- , stk (top)
-{}
-
-SCC::~SCC ()
-{
- delete [] stk;
-}
-
-void SCC::traverse (State * x)
-{
- *top = x;
- const uint32_t k = static_cast<uint32_t> (++top - stk);
- x->depth = k;
-
- for (uint32_t i = 0; i < x->go.nSpans; ++i)
- {
- State *y = x->go.span[i].to;
- if (y)
- {
- if (y->depth == 0)
- {
- traverse(y);
- }
- if (y->depth < x->depth)
- {
- x->depth = y->depth;
- }
- }
- }
-
- if (x->depth == k)
- {
- do
- {
- (*--top)->depth = cInfinity;
- (*top)->link = x;
- }
- while (*top != x);
- }
-}
-
-bool state_is_in_non_trivial_SCC (const State * s)
-{
- // does not link to self
- if (s->link != s)
- {
- return true;
- }
-
- // or exists i: (s->go.spans[i].to->link == s)
- //
- // Note: (s->go.spans[i].to == s) is allowed, corresponds to s
- // looping back to itself.
- //
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- const State* t = s->go.span[i].to;
- if (t && t->link == s)
- {
- return true;
- }
- }
- // otherwise no
- return false;
-}
-
-uint32_t maxDist (State * s)
-{
- if (s->depth != cInfinity)
- {
- // Already calculated, just return result.
- return s->depth;
- }
- uint32_t mm = 0;
-
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- State *t = s->go.span[i].to;
- if (t)
- {
- uint32_t m = 1;
- if (!t->link) // marked as non-key state
- {
- if (t->depth == cInfinity)
- {
- t->depth = maxDist(t);
- }
- m += t->depth;
- }
- if (m > mm)
- {
- mm = m;
- }
- }
- }
-
- s->depth = mm;
- return mm;
-}
-
-void calcDepth (State * head)
-{
- State * s;
-
- // mark non-key states by s->link = NULL ;
- for (s = head; s; s = s->next)
- {
- if (s != head && !state_is_in_non_trivial_SCC(s))
- {
- s->link = NULL;
- }
- //else: key state, leave alone
- }
- for (s = head; s; s = s->next)
- {
- s->depth = cInfinity;
- }
-
- // calculate max number of transitions before guarantied to reach
- // a key state.
- for (s = head; s; s = s->next)
- {
- maxDist(s);
- }
-}
-
-} // namespace re2c
+++ /dev/null
-#ifndef _RE2C_IR_ADFA_SCC_
-#define _RE2C_IR_ADFA_SCC_
-
-#include "src/util/c99_stdint.h"
-#include "src/util/forbid_copy.h"
-
-namespace re2c {
-
-class State;
-
-static const uint32_t cInfinity = ~0u;
-
-class SCC
-{
-public:
- State ** top;
- State ** stk;
-
- SCC (uint32_t);
- ~SCC ();
- void traverse (State *);
-
- FORBID_COPY (SCC);
-};
-
-bool state_is_in_non_trivial_SCC (const State * s);
-uint32_t maxDist (State * s);
-void calcDepth (State * head);
-
-} // namespace re2c
-
-#endif // _RE2C_IR_ADFA_SCC_
dfa.minimization();
+ // find YYFILL states and calculate argument to YYFILL
+ std::vector<size_t> fill;
+ fillpoints(dfa, fill);
+
// ADFA stands for 'DFA with actions'
- DFA *adfa = new DFA(dfa, skeleton, cs, name, cond, line);
+ DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
/*
* note [reordering DFA states]
void minimization_moore(size_t *part);
};
+void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
+
} // namespace re2c
#endif // _RE2C_IR_DFA_DFA_
--- /dev/null
+#include <limits>
+#include <stack>
+#include <string.h>
+
+#include "src/ir/dfa/dfa.h"
+
+namespace re2c
+{
+
+static const size_t INFINITY = std::numeric_limits<size_t>::max();
+static const size_t UNDEFINED = INFINITY - 1;
+
+static bool loopback(size_t node, size_t narcs, const size_t *arcs)
+{
+ for (size_t i = 0; i < narcs; ++i)
+ {
+ if (arcs[i] == node)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * node [finding strongly connected components of DFA]
+ *
+ * A slight modification of Tarjan's algorithm.
+ *
+ * The algorithm walks graph in deep-first order. It maintains a stack
+ * of nodes that have already been visited but haven't been assigned to
+ * SCC yet. For each node the algorithm calculates 'lowlink': index of
+ * the highest ancestor node reachable in one step from a descendant of
+ * the node. Lowlink is used to determine when a set of nodes should be
+ * popped off the stack into a new SCC.
+ *
+ * We use lowlink to hold different kinds of information:
+ * - values in range [0 .. stack size] mean that this node is on stack
+ * (link to a node with the smallest index reachable from this one)
+ * - UNDEFINED means that this node has not been visited yet
+ * - INFINITY means that this node has already been popped off stack
+ *
+ * We use stack size (rather than topological sort index) as unique index
+ * of a node on stack. This is safe because indices of nodes on stack are
+ * still unique and less than indices of nodes that have been popped off
+ * stack (INFINITY).
+ *
+ */
+static void scc(
+ const dfa_t &dfa,
+ std::stack<size_t> &stack,
+ std::vector<size_t> &lowlink,
+ std::vector<bool> &trivial,
+ size_t i)
+{
+ const size_t link = stack.size();
+ lowlink[i] = link;
+ stack.push(i);
+
+ const size_t *arcs = dfa.states[i]->arcs;
+ for (size_t c = 0; c < dfa.nchars; ++c)
+ {
+ const size_t j = arcs[c];
+ if (j != dfa_t::NIL)
+ {
+ if (lowlink[j] == UNDEFINED)
+ {
+ scc(dfa, stack, lowlink, trivial, j);
+ }
+ if (lowlink[j] < lowlink[i])
+ {
+ lowlink[i] = lowlink[j];
+ }
+ }
+ }
+
+ if (lowlink[i] == link)
+ {
+ // SCC is non-trivial (has loops) iff it either:
+ // - consists of multiple nodes (they all must be interconnected)
+ // - consists of single node which loops back to itself
+ trivial[i] = i == stack.top()
+ && !loopback(i, dfa.nchars, arcs);
+
+ size_t j;
+ do
+ {
+ j = stack.top();
+ stack.pop();
+ lowlink[j] = INFINITY;
+ }
+ while (j != i);
+ }
+}
+
+static void calc_fill(
+ const dfa_t &dfa,
+ const std::vector<bool> &trivial,
+ std::vector<size_t> &fill,
+ size_t i)
+{
+ if (fill[i] == UNDEFINED)
+ {
+ fill[i] = 0;
+ const size_t *arcs = dfa.states[i]->arcs;
+ for (size_t c = 0; c < dfa.nchars; ++c)
+ {
+ const size_t j = arcs[c];
+ if (j != dfa_t::NIL)
+ {
+ calc_fill(dfa, trivial, fill, j);
+ size_t max = 1;
+ if (trivial[j])
+ {
+ max += fill[j];
+ }
+ if (max > fill[i])
+ {
+ fill[i] = max;
+ }
+ }
+ }
+ }
+}
+
+void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
+{
+ const size_t size = dfa.states.size();
+
+ // find DFA states that belong to non-trivial SCC
+ std::stack<size_t> stack;
+ std::vector<size_t> lowlink(size, UNDEFINED);
+ std::vector<bool> trivial(size, false);
+ scc(dfa, stack, lowlink, trivial, 0);
+
+ // for each DFA state, calculate YYFILL argument:
+ // maximal path length to the next YYFILL state
+ fill.resize(size, UNDEFINED);
+ calc_fill(dfa, trivial, fill, 0);
+
+ // The following states must trigger YYFILL:
+ // - inital state
+ // - all states in non-trivial SCCs
+ // for other states, reset YYFILL argument to zero
+ for (size_t i = 1; i < size; ++i)
+ {
+ if (trivial[i])
+ {
+ fill[i] = 0;
+ }
+ }
+}
+
+} // namespace re2c
void Skeleton::emit_start
( OutputFile & o
- , uint32_t maxfill
+ , size_t maxfill
, bool backup
, bool backupctx
, bool accept
o.ws("\n");
o.ws("\nint lex_").wstring(name).ws("()");
o.ws("\n{");
- o.ws("\n").wind(1).ws("const size_t padding = ").wu32(maxfill).ws("; /* YYMAXFILL */");
+ o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */");
o.ws("\n").wind(1).ws("int status = 0;");
o.ws("\n").wind(1).ws("size_t input_len = 0;");
o.ws("\n").wind(1).ws("size_t keys_count = 0;");
static void emit_prolog (OutputFile & o);
void emit_start
( OutputFile & o
- , uint32_t maxfill
+ , size_t maxfill
, bool backup
, bool backupctx
, bool accept
switch (YYGETSTATE()) {
default: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string]
switch (YYGETSTATE()) {
default: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
}
switch (YYGETSTATE()) {
default: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string]
switch (YYGETSTATE()) {
default: abort();
case -1: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string]
switch (YYGETSTATE()) {
default: abort();
case -1: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
}
switch (YYGETSTATE()) {
default: abort();
case -1: goto yy0;
- case 0: goto yyFillLabel0;
}
yy0:
- YYSETSTATE(0);
-yyFillLabel0:
{}
re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string]