From: Ulya Trofimovich Date: Thu, 2 Apr 2015 14:31:14 +0000 (+0100) Subject: Continued adding "--skeleton" switch. X-Git-Tag: 0.15~322 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5818aee2c96705bc57d60c7ad3dc74f2fce80703;p=re2c Continued adding "--skeleton" switch. A naive attempt to generate input strings from DFA. The problem is, if the number of spans equals 1, it's hard to determine whether it's some kind of a 'transit' state or a normal state with just one span. All states have spans, but do they use them? The situation is further complicated with 'readCh' which makes it hard to trace how actions influence input operations. --- diff --git a/re2c/code.cc b/re2c/code.cc index 1ada05e5..9408f222 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -600,7 +600,7 @@ void Rule::emit(Output & output, uint ind, bool &, const std::string& condName) o << indent(ind); if (flag_skeleton) { - o << "{ continue; }"; + o << "{ printf (\"%u\\n\", cursor - data); continue; }"; } else if (rule->code->autogen) { @@ -1040,6 +1040,7 @@ void DFA::prepare(uint & max_fill) } s->go.span[i].to = ow; + s->go.span[i].is_default = true; } } } @@ -1087,8 +1088,10 @@ void DFA::prepare(uint & max_fill) } } -static void output_skeleton_prolog (OutputFile & o, uint ind) +void DFA::output_skeleton_prolog (Output & output, uint ind) { + OutputFile & o = output.source; + std::string yyctype; switch (encoding.szCodeUnit ()) { @@ -1103,6 +1106,7 @@ static void output_skeleton_prolog (OutputFile & o, uint ind) break; } + o << "#include \n"; o << "int main () {\n"; o << "#define " << mapCodeName["YYCTYPE"] << yyctype << "\n"; @@ -1112,10 +1116,9 @@ static void output_skeleton_prolog (OutputFile & o, uint ind) o << "#define " << mapCodeName["YYBACKUPCTX"] << "() ctxmarker = cursor\n"; o << "#define " << mapCodeName["YYRESTORE"] << "() cursor = marker\n"; o << "#define " << mapCodeName["YYRESTORECTX"] << "() cursor = ctxmarker\n"; - o << "#define " << mapCodeName["YYLESSTHAN"] << "(n) cursor < limit\n"; + o << "#define " << mapCodeName["YYLESSTHAN"] << "(n) (limit - cursor) < n\n"; o << "#define " << mapCodeName["YYFILL"] << "(n) { break; }\n"; - o << indent (ind) << "const unsigned int data_size = 0xFF;\n"; - o << indent (ind) << "YYCTYPE * data = new YYCTYPE [data_size];\n"; + generate (output, ind); o << indent (ind) << "const YYCTYPE * cursor = data;\n"; o << indent (ind) << "const YYCTYPE * marker = data;\n"; o << indent (ind) << "const YYCTYPE * ctxmarker = data;\n"; @@ -1124,21 +1127,89 @@ static void output_skeleton_prolog (OutputFile & o, uint ind) o << indent (ind) << "{\n"; } -static void output_skeleton_epilog (OutputFile & o, uint ind) +void DFA::output_skeleton_epilog (OutputFile & o, uint ind) { o << indent (ind) << "}\n"; - o << indent (ind) << "delete [] data;\n"; o << "return 0; }\n"; } +static void generate_data (State * s, bool def, const std::vector > & xs, std::vector, bool> > & ys) +{ + if (s->go.nSpans <= 1) + { + for (uint i = 0; i < xs.size (); ++i) + { + ys.push_back (std::make_pair (std::vector (xs[i]), def)); + } + } + else if (s->generated) + { + return; + } + else + { + s->generated = true; + for (uint i = 0; i < s->go.nSpans; ++i) + { + std::vector > zs; + for (uint j = 0; j < xs.size (); ++j) + { + std::vector z (xs[j]); + z.push_back (s->go.span[i].ub - 1); + zs.push_back (z); + } + generate_data (s->go.span[i].to, s->go.span[i].is_default, zs, ys); + } + } +} + +void DFA::generate (Output & output, uint ind) +{ + OutputFile & o = output.source; + + std::vector > xs; + std::vector, bool> > ys; + std::vector x; + xs.push_back (x); + generate_data (head, false, xs, ys); + ys.push_back (std::make_pair (std::vector (output.max_fill), false)); // pad with YYMAXFILL zeroes + + o << indent (ind) << "// These strings correspond to paths in DFA.\n"; + o << indent (ind) << "YYCTYPE data [] =\n"; + o << indent (ind) << "{\n"; + for (uint i = 0; i < ys.size (); ++i) + { + o << indent (ind + 1); + for (uint j = 0 ; j < ys[i].first.size (); ++j) + { + o << ys[i].first[j] << ","; + } + o << "\n"; + } + o << indent (ind) << "};\n"; + o << indent (ind) << "const unsigned int data_size = sizeof (data) / sizeof (YYCTYPE);\n"; + + uint pos = 0; + const uint pos_num = ys.size () - 1; // skip padding + o << indent (ind) << "unsigned int positions [] =\n"; + o << indent (ind) << "{\n"; + for (uint i = 0; i < pos_num; ++i) + { + pos += ys[i].first.size (); + o << indent (ind + 1) << pos << "," << ys[i].second << ",\n"; + } + o << indent (ind) << "};\n"; + o << indent (ind) << "const unsigned int positions_size = " << pos_num * 2 << ";\n"; +} + void DFA::emit(Output & output, uint& ind, const RegExpMap* specMap, const std::string& condName, bool isLastCond, bool& bPrologBrace) { OutputFile & o = output.source; if (flag_skeleton) { - output_skeleton_prolog (o, ind); + output_skeleton_prolog (output, ind); } bool bProlog = (!cFlag || !bWroteCondCheck); diff --git a/re2c/dfa.cc b/re2c/dfa.cc index d9abf63b..7032d5b6 100644 --- a/re2c/dfa.cc +++ b/re2c/dfa.cc @@ -50,6 +50,7 @@ State::State() , kernel(NULL) , isPreCtxt(false) , isBase(false) + , generated(false) , go() , action(NULL) { diff --git a/re2c/dfa.h b/re2c/dfa.h index a415ebd9..d528ec3d 100644 --- a/re2c/dfa.h +++ b/re2c/dfa.h @@ -168,6 +168,7 @@ public: bool isPreCtxt; bool isBase; + bool generated; Go go; Action *action; @@ -189,6 +190,7 @@ private: , kCount(oth.kCount) , kernel(oth.kernel) , isBase(oth.isBase) + , generated(oth.generated) , go(oth.go) , action(oth.action) { @@ -228,6 +230,9 @@ public: void findSCCs(); void findBaseState(); void prepare(uint &); + void generate (Output & o, uint ind); + void output_skeleton_prolog (Output & o, uint ind); + void output_skeleton_epilog (OutputFile & o, uint ind); void emit(Output &, uint&, const RegExpMap*, const std::string&, bool, bool&); friend std::ostream& operator<<(std::ostream&, const DFA&); diff --git a/re2c/go.h b/re2c/go.h index 15ae8431..2a99e1df 100644 --- a/re2c/go.h +++ b/re2c/go.h @@ -16,7 +16,14 @@ struct If; // forward struct Span { uint ub; + bool is_default; State * to; + + inline Span () + : ub (0) + , is_default (false) + , to (NULL) + {} uint show (std::ostream&, uint) const; };