From dddf15692a146a73da80f16bf81eb318c41a8222 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 7 Apr 2015 17:01:46 +0100 Subject: [PATCH] Continued adding "--skeleton" switch. Output input data to a separate file (otherwize we'll have to keep all generated data in memory, cause output has a complex structure and cannot be written to file until it's fully generated) This reduces memory usage significantly (so that there remain no memory consumption problems with "--skeleton" switch). However on some files re2c generated too much data, e.g. case-insensitive strings: /*!re2c 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' {} */ Exponential growth is a bad thing; must deal with it somehow. Time grows exponentially as well, of course. --- re2c/actions.cc | 3 +- re2c/code.cc | 95 ++++++++++++++++++++++++++----------------------- re2c/dfa.h | 3 +- re2c/main.cc | 5 +++ re2c/output.cc | 22 ++++++++++++ re2c/output.h | 12 +++++++ 6 files changed, 94 insertions(+), 46 deletions(-) diff --git a/re2c/actions.cc b/re2c/actions.cc index 46359003..627cbb17 100644 --- a/re2c/actions.cc +++ b/re2c/actions.cc @@ -1102,7 +1102,8 @@ smart_ptr genCode(RegExp *re, Output & output, uint ind) smart_ptr dfa = make_smart_ptr(new DFA(ins, size, 0, encoding.nCodeUnits(), rep)); if (flag_skeleton) { - dfa->output_skeleton_prolog (output.source, ind); + dfa->output_skeleton_data (output.data); + dfa->output_skeleton_prolog (output.source, ind, output.data.file_name.c_str ()); } dfa->prepare (output.max_fill); diff --git a/re2c/code.cc b/re2c/code.cc index a8d12bce..c2520797 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -1073,7 +1073,7 @@ struct Result {} }; -static void generate_data (OutputFile & o, uint ind, State * s, const std::vector & xs, std::vector & ys) +static void generate_data (DataFile & o, uint ind, State * s, const std::vector & xs, std::vector & ys) { const bool is_default = s == NULL; const bool is_final = !is_default && s->go.nSpans == 1 && s->go.span[0].to == NULL; @@ -1081,13 +1081,13 @@ static void generate_data (OutputFile & o, uint ind, State * s, const std::vecto { for (uint i = 0; i < xs.size (); ++i) { - o << indent (ind); + o.file << indent (ind); for (uint j = 0 ; j < xs[i].chars.size (); ++j) { - o.write_char_hex (xs[i].chars[j]); - o << ","; + prtChOrHex (o.file, xs[i].chars[j]); + o.file << ","; } - o << "\n"; + o.file << "\n"; const uint processed = xs[i].chars.size (); const uint consumed = is_final ? xs[i].chars.size () @@ -1132,8 +1132,10 @@ static void generate_data (OutputFile & o, uint ind, State * s, const std::vecto } } -void DFA::output_skeleton_prolog (OutputFile & o, uint ind) +void DFA::output_skeleton_data (DataFile & o) { + uint ind = 0; + std::string yyctype; switch (encoding.szCodeUnit ()) { @@ -1148,22 +1150,19 @@ void DFA::output_skeleton_prolog (OutputFile & o, uint ind) break; } - o << "#include \n"; - o << "int main () {\n"; + o.file << "#define " << mapCodeName["YYCTYPE"] << yyctype << "\n"; + o.file << "#define " << mapCodeName["YYPEEK"] << "() *cursor\n"; + o.file << "#define " << mapCodeName["YYSKIP"] << "() ++cursor\n"; + o.file << "#define " << mapCodeName["YYBACKUP"] << "() marker = cursor\n"; + o.file << "#define " << mapCodeName["YYBACKUPCTX"] << "() ctxmarker = cursor\n"; + o.file << "#define " << mapCodeName["YYRESTORE"] << "() cursor = marker\n"; + o.file << "#define " << mapCodeName["YYRESTORECTX"] << "() cursor = ctxmarker\n"; + o.file << "#define " << mapCodeName["YYLESSTHAN"] << "(n) (limit - cursor) < n\n"; + o.file << "#define " << mapCodeName["YYFILL"] << "(n) { break; }\n"; - o << "#define " << mapCodeName["YYCTYPE"] << yyctype << "\n"; - o << "#define " << mapCodeName["YYPEEK"] << "() *cursor\n"; - o << "#define " << mapCodeName["YYSKIP"] << "() ++cursor\n"; - o << "#define " << mapCodeName["YYBACKUP"] << "() marker = cursor\n"; - o << "#define " << mapCodeName["YYBACKUPCTX"] << "() ctxmarker = cursor\n"; - o << "#define " << mapCodeName["YYRESTORE"] << "() cursor = marker\n"; - o << "#define " << mapCodeName["YYRESTORECTX"] << "() cursor = ctxmarker\n"; - o << "#define " << mapCodeName["YYLESSTHAN"] << "(n) (limit - cursor) < n\n"; - o << "#define " << mapCodeName["YYFILL"] << "(n) { break; }\n"; - - o << indent (ind) << "// These strings correspond to paths in DFA.\n"; - o << indent (ind) << "YYCTYPE data [] =\n"; - o << indent (ind) << "{\n"; + o.file << indent (ind) << "// These strings correspond to paths in DFA.\n"; + o.file << indent (ind) << "YYCTYPE data [] =\n"; + o.file << indent (ind) << "{\n"; std::vector xs; std::vector ys; @@ -1181,46 +1180,54 @@ void DFA::output_skeleton_prolog (OutputFile & o, uint ind) max_len = ys[i].consumed; } } - o << indent (ind + 1); + o.file << indent (ind + 1); for (uint j = 0 ; j < max_len; ++j) // pad with YMAXFILL zeroes { - o << "0,"; + o.file << "0,"; } - o << "\n"; - o << indent (ind) << "};\n"; - o << indent (ind) << "const unsigned int data_size = sizeof (data) / sizeof (YYCTYPE);\n"; + o.file << "\n"; + o.file << indent (ind) << "};\n"; + o.file << indent (ind) << "const unsigned int data_size = sizeof (data) / sizeof (YYCTYPE);\n"; - o << indent (ind) << "const unsigned int count = " << count << ";\n"; + o.file << indent (ind) << "const unsigned int count = " << count << ";\n"; uint pos = 0; - o << indent (ind) << "struct Result {\n"; - o << indent (ind + 1) << "unsigned int endpos;\n"; - o << indent (ind + 1) << "unsigned int startpos;\n"; - o << indent (ind + 1) << "unsigned int rule;\n"; - o << indent (ind + 1) << "Result (unsigned int e, unsigned int s, unsigned int r) : endpos (e), startpos (s), rule (r) {}\n"; - o << indent (ind) << "};\n"; - o << indent (ind) << "Result result [] =\n"; - o << indent (ind) << "{\n"; + o.file << indent (ind) << "struct Result {\n"; + o.file << indent (ind + 1) << "unsigned int endpos;\n"; + o.file << indent (ind + 1) << "unsigned int startpos;\n"; + o.file << indent (ind + 1) << "unsigned int rule;\n"; + o.file << indent (ind + 1) << "Result (unsigned int e, unsigned int s, unsigned int r) : endpos (e), startpos (s), rule (r) {}\n"; + o.file << indent (ind) << "};\n"; + o.file << indent (ind) << "Result result [] =\n"; + o.file << indent (ind) << "{\n"; for (uint i = 0; i < count; ++i) { - o << indent (ind + 1) << "Result (" << pos + ys[i].consumed << "," << pos + ys[i].processed << "," << ys[i].rule << "),\n"; + o.file << indent (ind + 1) << "Result (" << pos + ys[i].consumed << "," << pos + ys[i].processed << "," << ys[i].rule << "),\n"; pos += ys[i].processed; } - o << indent (ind) << "};\n"; + o.file << indent (ind) << "};\n"; + + o.file << indent (ind) << "const YYCTYPE * cursor = data;\n"; + o.file << indent (ind) << "const YYCTYPE * marker = data;\n"; + o.file << indent (ind) << "const YYCTYPE * ctxmarker = data;\n"; + o.file << indent (ind) << "const YYCTYPE * const limit = &data[data_size - 1];\n"; +} - o << indent (ind) << "const YYCTYPE * cursor = data;\n"; - o << indent (ind) << "const YYCTYPE * marker = data;\n"; - o << indent (ind) << "const YYCTYPE * ctxmarker = data;\n"; - o << indent (ind) << "const YYCTYPE * const limit = &data[data_size - 1];\n"; - o << indent (ind) << "for (unsigned int i = 0; i < count; ++i)\n"; +void DFA::output_skeleton_prolog (OutputFile & o, uint ind, const char * data_name) +{ + o << indent (ind) << "#include \n"; + o << indent (ind) << "#include \"" << data_name << "\"\n"; + o << indent (ind) << "int main ()\n"; o << indent (ind) << "{\n"; + o << indent (ind + 1) << "for (unsigned int i = 0; i < count; ++i)\n"; + o << indent (ind + 1) << "{\n"; } void DFA::output_skeleton_epilog (OutputFile & o, uint ind) { + o << indent (ind + 1) << "}\n"; + o << indent (ind + 1) << "return 0;\n"; o << indent (ind) << "}\n"; - - o << "return 0; }\n"; } void DFA::emit(Output & output, uint& ind, const RegExpMap* specMap, const std::string& condName, bool isLastCond, bool& bPrologBrace) diff --git a/re2c/dfa.h b/re2c/dfa.h index a8a737c6..5dd3c5b1 100644 --- a/re2c/dfa.h +++ b/re2c/dfa.h @@ -226,7 +226,8 @@ public: void findSCCs(); void findBaseState(); void prepare(uint &); - void output_skeleton_prolog (OutputFile & o, uint ind); + void output_skeleton_data (DataFile & o); + void output_skeleton_prolog (OutputFile & o, uint ind, const char * data_name); void output_skeleton_epilog (OutputFile & o, uint ind); void emit(Output &, uint&, const RegExpMap*, const std::string&, bool, bool&); diff --git a/re2c/main.cc b/re2c/main.cc index 777006b7..6d7b2a5f 100644 --- a/re2c/main.cc +++ b/re2c/main.cc @@ -466,6 +466,11 @@ int main(int argc, char *argv[]) cerr << "re2c: error: cannot open " << headerFileName << "\n"; return 1; } + if (flag_skeleton && !output.data.open ()) + { + cerr << "re2c: error: cannot open " << output.data.file_name << "\n"; + return 1; + } Scanner scanner (input, output.source); parse (scanner, output); diff --git a/re2c/output.cc b/re2c/output.cc index 49ddf8dc..b2945d52 100644 --- a/re2c/output.cc +++ b/re2c/output.cc @@ -283,9 +283,31 @@ HeaderFile::~HeaderFile () } } +DataFile::DataFile (const char * fn) + : file_name (fn == NULL ? "" : fn) + , file () +{ + file_name += ".data"; +} + +bool DataFile::open () +{ + file.open (file_name.c_str (), std::ofstream::out | std::ofstream::binary); + return file.is_open (); +} + +DataFile::~DataFile () +{ + if (file.is_open ()) + { + file.close (); + } +} + Output::Output (const char * source_name, const char * header_name) : source (source_name) , header (header_name) + , data (source_name) , types () , max_fill (1) {} diff --git a/re2c/output.h b/re2c/output.h index 18dc7e35..a3d81d37 100644 --- a/re2c/output.h +++ b/re2c/output.h @@ -1,6 +1,7 @@ #ifndef _output_h #define _output_h +#include #include #include #include @@ -104,10 +105,21 @@ private: FILE * file; }; +struct DataFile +{ + DataFile (const char * fn); + ~DataFile (); + bool open (); + + std::string file_name; + std::ofstream file; +}; + struct Output { OutputFile source; HeaderFile header; + DataFile data; std::vector types; uint max_fill; -- 2.40.0