From 70f1cb9628c028cc03d3d6ba5146a33712e83774 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 17 Sep 2015 13:52:09 +0100 Subject: [PATCH] Gather some DFA statistics and use it to omit unused code with '--skeleton'. --- re2c/src/codegen/emit_dfa.cc | 4 +- re2c/src/codegen/prepare_dfa.cc | 44 +++++++++++------ re2c/src/codegen/skeleton/generate_code.cc | 56 ++++++++++++++++------ re2c/src/codegen/skeleton/skeleton.h | 14 +++++- re2c/src/ir/bytecode/bytecode.cc | 7 ++- re2c/src/ir/dfa/dfa.cc | 12 +++++ re2c/src/ir/dfa/dfa.h | 17 +++++-- 7 files changed, 115 insertions(+), 39 deletions(-) diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 603927b0..b3b0f390 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -131,10 +131,10 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra if (flag_skeleton) { skeleton->emit_data (o.file_name); - skeleton->emit_start (o, output.max_fill, accepts.size () > 1); + skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept); uint32_t i = 2; emit_body (o, i, used_labels); - skeleton->emit_end (o); + skeleton->emit_end (o, need_backup, need_backupctx); } else { diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index 397ab4bb..20972251 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -154,22 +154,13 @@ void DFA::findBaseState() operator delete (span); } -void DFA::prepare(OutputFile & o, uint32_t & max_fill) +void DFA::prepare () { bUsedYYBitmap = false; findSCCs(); head->link = head; - for (State * s = head; s; s = s->next) - { - s->depth = maxDist(s); - if (max_fill < s->depth) - { - max_fill = s->depth; - } - } - // create rule states std::map rules; for (State * s = head; s; s = s->next) @@ -228,10 +219,6 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) } } } - if (accepts.size () > 1) - { - o.set_used_yyaccept (); - } default_state->action.set_accept (&accepts); } @@ -289,4 +276,33 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) } } +void DFA::calc_stats () +{ + // calculate 'YYMAXFILL' + max_fill = 0; + for (State * s = head; s; s = s->next) + { + s->depth = maxDist(s); + if (max_fill < s->depth) + { + max_fill = s->depth; + } + } + + // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used + need_backup = accepts.size () > 0; + + // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used + for (State * s = head; s; s = s->next) + { + if (s->isPreCtxt) + { + need_backupctx = true; + } + } + + // determine if 'yyaccept' variable is used + need_accept = accepts.size () > 1; +} + } // namespace re2c diff --git a/re2c/src/codegen/skeleton/generate_code.cc b/re2c/src/codegen/skeleton/generate_code.cc index 21cfd621..4cf2498a 100644 --- a/re2c/src/codegen/skeleton/generate_code.cc +++ b/re2c/src/codegen/skeleton/generate_code.cc @@ -86,7 +86,13 @@ void Skeleton::emit_prolog (OutputFile & o) o << "\n"; } -void Skeleton::emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) const +void Skeleton::emit_start + ( OutputFile & o + , uint32_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const { const uint32_t default_rule = maxkey (); @@ -96,10 +102,16 @@ void Skeleton::emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) cons exact_uint (o, sizeof_key); o << "\n" << "#define YYPEEK() *cursor"; o << "\n" << "#define YYSKIP() ++cursor"; - o << "\n" << "#define YYBACKUP() marker = cursor"; - o << "\n" << "#define YYBACKUPCTX() ctxmarker = cursor"; - o << "\n" << "#define YYRESTORE() cursor = marker"; - o << "\n" << "#define YYRESTORECTX() cursor = ctxmarker"; + if (backup) + { + o << "\n" << "#define YYBACKUP() marker = cursor"; + o << "\n" << "#define YYRESTORE() cursor = marker"; + } + if (backupctx) + { + o << "\n" << "#define YYBACKUPCTX() ctxmarker = cursor"; + o << "\n" << "#define YYRESTORECTX() cursor = ctxmarker"; + } o << "\n" << "#define YYLESSTHAN(n) (limit - cursor) < n"; o << "\n" << "#define YYFILL(n) { break; }"; o << "\n"; @@ -158,8 +170,6 @@ void Skeleton::emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) cons o << "\n" << indString << "YYCTYPE * input = NULL;"; o << "\n" << indString << "YYKEYTYPE * keys = NULL;"; o << "\n" << indString << "const YYCTYPE * cursor = NULL;"; - o << "\n" << indString << "const YYCTYPE * marker = NULL;"; - o << "\n" << indString << "const YYCTYPE * ctxmarker = NULL;"; o << "\n" << indString << "const YYCTYPE * limit = NULL;"; o << "\n" << indString << "const YYCTYPE * eof = NULL;"; o << "\n" << indString << "unsigned int i = 0;"; @@ -189,16 +199,22 @@ void Skeleton::emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) cons o << "\n" << indString << "}"; o << "\n"; o << "\n" << indString << "cursor = input;"; - o << "\n" << indString << "marker = input;"; - o << "\n" << indString << "ctxmarker = input;"; o << "\n" << indString << "limit = input + input_len + padding;"; o << "\n" << indString << "eof = input + input_len;"; o << "\n"; o << "\n" << indString << "for (i = 0; status == 0 && cursor < eof && i < keys_count; ++i)"; o << "\n" << indString << "{"; + if (backup) + { + o << "\n" << indString << indString << "const YYCTYPE * marker = NULL;"; + } + if (backupctx) + { + o << "\n" << indString << indString << "const YYCTYPE * ctxmarker = NULL;"; + } o << "\n" << indString << indString << "const YYCTYPE * token = cursor;"; o << "\n" << indString << indString << "YYCTYPE yych;"; - if (yyaccept) + if (accept) { o << "\n" << indString << indString << "unsigned int yyaccept = 0;"; } @@ -209,7 +225,11 @@ void Skeleton::emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) cons o << "\n"; } -void Skeleton::emit_end (OutputFile & o) const +void Skeleton::emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const { o << "\n" << indString << "}"; o << "\n" << indString << "if (status == 0)"; @@ -237,10 +257,16 @@ void Skeleton::emit_end (OutputFile & o) const o << "\n" << "#undef YYKEYTYPE"; o << "\n" << "#undef YYPEEK"; o << "\n" << "#undef YYSKIP"; - o << "\n" << "#undef YYBACKUP"; - o << "\n" << "#undef YYBACKUPCTX"; - o << "\n" << "#undef YYRESTORE"; - o << "\n" << "#undef YYRESTORECTX"; + if (backup) + { + o << "\n" << "#undef YYBACKUP"; + o << "\n" << "#undef YYRESTORE"; + } + if (backupctx) + { + o << "\n" << "#undef YYBACKUPCTX"; + o << "\n" << "#undef YYRESTORECTX"; + } o << "\n" << "#undef YYLESSTHAN"; o << "\n" << "#undef YYFILL"; o << "\n"; diff --git a/re2c/src/codegen/skeleton/skeleton.h b/re2c/src/codegen/skeleton/skeleton.h index 147505c3..85835486 100644 --- a/re2c/src/codegen/skeleton/skeleton.h +++ b/re2c/src/codegen/skeleton/skeleton.h @@ -73,8 +73,18 @@ struct Skeleton void warn_undefined_control_flow (); void emit_data (const char * fname); static void emit_prolog (OutputFile & o); - void emit_start (OutputFile & o, uint32_t maxfill, bool yyaccept) const; - void emit_end (OutputFile & o) const; + void emit_start + ( OutputFile & o + , uint32_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const; + void emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const; static void emit_epilog (OutputFile & o, const std::vector & names); static void emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank, const std::string & name); diff --git a/re2c/src/ir/bytecode/bytecode.cc b/re2c/src/ir/bytecode/bytecode.cc index 8d2ed241..bd88ab54 100644 --- a/re2c/src/ir/bytecode/bytecode.cc +++ b/re2c/src/ir/bytecode/bytecode.cc @@ -64,8 +64,13 @@ smart_ptr genCode (RegExp *re, Output & output, const std::string & cond) , rep )); + // accumulate global statistics from this particular DFA output.names.push_back (dfa->name); - dfa->prepare (output.source, output.max_fill); + output.max_fill = std::max (output.max_fill, dfa->max_fill); + if (dfa->need_accept) + { + output.source.set_used_yyaccept (); + } return dfa; } diff --git a/re2c/src/ir/dfa/dfa.cc b/re2c/src/ir/dfa/dfa.cc index a58e4225..f0e45f18 100644 --- a/re2c/src/ir/dfa/dfa.cc +++ b/re2c/src/ir/dfa/dfa.cc @@ -59,6 +59,12 @@ DFA::DFA , toDo(NULL) , free_ins(ins) , free_rep(rep) + + // statistics + , max_fill (0) + , need_backup (false) + , need_backupctx (false) + , need_accept (false) { if (name.empty ()) { @@ -149,6 +155,12 @@ DFA::DFA // skeleton must be constructed after DFA construction // but prior to any other DFA transformations skeleton = new Skeleton (*this); + + // skeleton is constructed, do further DFA transformations + prepare (); + + // finally gather overall DFA statistics + calc_stats (); } DFA::~DFA() diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 9725ac99..66eec1c0 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -29,6 +29,12 @@ public: const Ins * free_ins; const Char * free_rep; + // statistics + uint32_t max_fill; + bool need_backup; + bool need_backupctx; + bool need_accept; + public: DFA ( const std::string & @@ -40,18 +46,19 @@ public: , const Char * ); ~DFA (); + void emit (Output &, uint32_t &, bool, bool &); + friend std::ostream & operator << (std::ostream &, const DFA &); + +private: void addState (State **, State *); State * findState (Ins **, Ins **); void split (State *); - void findSCCs (); void findBaseState (); - void prepare (OutputFile & o, uint32_t &); + void calc_stats (); + void prepare (); void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start) const; void emit_body (OutputFile &, uint32_t &, const std::set & used_labels) const; - void emit (Output &, uint32_t &, bool, bool &); - - friend std::ostream & operator << (std::ostream &, const DFA &); FORBID_COPY (DFA); }; -- 2.40.0