From: Ulya Trofimovich Date: Tue, 26 May 2015 13:00:01 +0000 (+0100) Subject: Don't hide the ugly fact that default state in '-f' mode is always state 0. X-Git-Tag: 0.15~246 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e315bb4526480d3bd6ffd04baa36144c6ce6de07;p=re2c Don't hide the ugly fact that default state in '-f' mode is always state 0. In '-f' mode, state dispatch generation can be triggered in two ways. Both ways use 're2c::OutputFile::insert_state_goto', which generates state dispatch only if it hasn't been already generated. The two ways are: 1. Explicitly, using '/*!getstate:re2c*/'. In this case default state must be state 0 because it's hardcoded in the invocation of 're2c::OutputFile::insert_state_goto' in 'src/parse/scanner_lex.re'. 2. Implicitly, in 're2c::DFA::emit'. In this case default state must be state 0 because if 'prolog_label' is not 0, it means that it's not the first time 're2c::DFA::emit' is called and state dispatch has already been generated. This commit makes it explicit that re2c always uses state 0. Note: Currently in '-f' mode re2c generates one global dispatch for the whole file (the enumeration of yyFillLabel's is also global). All re2c blocks share the same state dispatch, so in '-f' mode all re2c blocks must reside in the same function and must be parts of the same lexer (exception: in '-r' mode re2c generates one state dispatch per use block). This is clearly an ugly limitation: one is forced put disconnected lexers in different files in '-f' mode. Now re2c provides conditions as a way to express related blocks, so if users used multiple blocks only for unrelated lexers, we could safely limit the scope of state dispatch to a single block. But conditions can conflict with other re2c features, they are a bit broken and I'm pretty sure some users use multiple blocks (e.g. I used to do it). Thus we cannot just make '-f' generate state dispatch on per-block basis (there're some other obstacles: it's not quite clear which block '/*!getstate:re2c*/ directive is related to, etc.). I leave the situation 'as is' until better times (when lexer- parser loop is fixed and the whole code generation model is more robust). --- diff --git a/re2c/bootstrap/scanner_lex.cc b/re2c/bootstrap/scanner_lex.cc index 311c78ed..469393ad 100644 --- a/re2c/bootstrap/scanner_lex.cc +++ b/re2c/bootstrap/scanner_lex.cc @@ -328,7 +328,7 @@ yy50: ++YYCURSOR; { tok = pos = cursor; - out.insert_state_goto (topIndent, 0); + out.insert_state_goto (topIndent); ignore_eoc = true; goto echo; } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index a331cc28..cd3acaff 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -102,6 +102,10 @@ void DFA::emit(Output & output, uint32_t& ind, const RegExpMap* specMap, const s s->label = next_label++; } + if (fFlag) + { + vUsedLabels.insert(0); + } for (s = head; s; s = s->next) { s->go.used_labels (); @@ -157,7 +161,7 @@ void DFA::emit(Output & output, uint32_t& ind, const RegExpMap* specMap, const s if (bProlog) { genCondTable(o, ind, *specMap); - o.insert_state_goto (ind, prolog_label); + o.insert_state_goto (ind); if (cFlag && !DFlag) { if (vUsedLabels.count(prolog_label)) diff --git a/re2c/src/codegen/output.cc b/re2c/src/codegen/output.cc index 9d50ad8c..72a88c90 100644 --- a/re2c/src/codegen/output.cc +++ b/re2c/src/codegen/output.cc @@ -51,7 +51,6 @@ OutputFile::OutputFile (const char * fn) : file_name (fn) , file (NULL) , blocks () - , prolog_label (0) { new_block (); } @@ -150,12 +149,10 @@ void OutputFile::insert_line_info () insert_code (); } -void OutputFile::insert_state_goto (uint32_t ind, uint32_t start_label) +void OutputFile::insert_state_goto (uint32_t ind) { if (fFlag && !bWroteGetState) { - prolog_label = start_label; - vUsedLabels.insert (start_label); blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind)); insert_code (); bWroteGetState = true; @@ -226,7 +223,7 @@ void OutputFile::emit output_line_info (f.stream, line_count + 1, file_name); break; case OutputFragment::STATE_GOTO: - output_state_goto (f.stream, f.indent, prolog_label); + output_state_goto (f.stream, f.indent, 0); break; case OutputFragment::TYPES: output_types (f.stream, f.indent, types); diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h index 860bae25..fae5dea4 100644 --- a/re2c/src/codegen/output.h +++ b/re2c/src/codegen/output.h @@ -69,7 +69,7 @@ struct OutputFile friend OutputFile & operator << (OutputFile & o, const char * s); void insert_line_info (); - void insert_state_goto (uint32_t ind, uint32_t start_label); + void insert_state_goto (uint32_t ind); void insert_types (); void insert_yyaccept_init (uint32_t ind); void insert_yyaccept_selector (uint32_t ind, uint32_t selector); @@ -84,7 +84,6 @@ private: const char * file_name; FILE * file; std::vector blocks; - uint32_t prolog_label; std::ostream & stream (); void insert_code (); diff --git a/re2c/src/parse/scanner_lex.re b/re2c/src/parse/scanner_lex.re index 8a5c24c1..6ddc5082 100644 --- a/re2c/src/parse/scanner_lex.re +++ b/re2c/src/parse/scanner_lex.re @@ -112,7 +112,7 @@ echo: } "/*!getstate:re2c" { tok = pos = cursor; - out.insert_state_goto (topIndent, 0); + out.insert_state_goto (topIndent); ignore_eoc = true; goto echo; }