From e705710cb038d5734e66546f7b304bed19b40a1f Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 13 May 2016 13:51:44 +0100 Subject: [PATCH] Fixed tag substitution. If one tag name, say 'p', is a prefix of another tag name, say 'p1', and we try to substitute all occurences of '@p' in the given string, we might occasionally substitute '@p1'. I made a small re2c lexer that recognizes tag-like lexemes and tries to match them agains rule's tags. If a tag's name matches the recognized substring, than it is the perfect match (name cannot be a prefix of a longer name because lexer recognizes the longest match). --- re2c/Makefile.am | 12 +- re2c/bootstrap/src/codegen/subst_tags.cc | 134 +++++++++++++++++++++++ re2c/src/codegen/emit.h | 2 + re2c/src/codegen/emit_action.cc | 15 +-- re2c/src/codegen/subst_tags.re | 59 ++++++++++ re2c/test/tags/subst.i--tags.c | 90 +++++++++++++++ re2c/test/tags/subst.i--tags.re | 8 ++ 7 files changed, 303 insertions(+), 17 deletions(-) create mode 100644 re2c/bootstrap/src/codegen/subst_tags.cc create mode 100644 re2c/src/codegen/subst_tags.re create mode 100644 re2c/test/tags/subst.i--tags.c create mode 100644 re2c/test/tags/subst.i--tags.re diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 9d652cb8..f4deee0e 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -130,6 +130,7 @@ re2c_SOURCES = \ AUTOGEN_LEX = src/parse/lex.cc AUTOGEN_LEX_CONF = src/parse/lex_conf.cc AUTOGEN_PARSEOPT = src/conf/parse_opts.cc +AUTOGEN_SUBSTTAGS = src/codegen/subst_tags.cc AUTOGEN_PARSER = src/parse/parser.cc AUTOGEN_PARSER_HDR = src/parse/y.tab.h AUTOGEN = \ @@ -137,13 +138,15 @@ AUTOGEN = \ $(AUTOGEN_LEX_CONF) \ $(AUTOGEN_PARSER) \ $(AUTOGEN_PARSER_HDR) \ - $(AUTOGEN_PARSEOPT) + $(AUTOGEN_PARSEOPT) \ + $(AUTOGEN_SUBSTTAGS) nodist_re2c_SOURCES = $(AUTOGEN) # bootstrap sources BOOTSTRAP_LEX = bootstrap/src/parse/lex.cc BOOTSTRAP_LEX_CONF = bootstrap/src/parse/lex_conf.cc BOOTSTRAP_PARSEOPT = bootstrap/src/conf/parse_opts.cc +BOOTSTRAP_SUBSTTAGS = bootstrap/src/codegen/subst_tags.cc BOOTSTRAP_PARSER = bootstrap/src/parse/parser.cc BOOTSTRAP_PARSER_HDR = bootstrap/src/parse/y.tab.h BOOTSTRAP_DOC = bootstrap/doc/re2c.1 @@ -153,18 +156,21 @@ BOOTSTRAP = \ $(BOOTSTRAP_DOC) \ $(BOOTSTRAP_PARSER) \ $(BOOTSTRAP_PARSER_HDR) \ - $(BOOTSTRAP_PARSEOPT) + $(BOOTSTRAP_PARSEOPT) \ + $(BOOTSTRAP_SUBSTTAGS) # custom sources CUSTOM_LEX = src/parse/lex.re CUSTOM_LEX_CONF = src/parse/lex_conf.re CUSTOM_PARSEOPT = src/conf/parse_opts.re +CUSTOM_SUBSTTAGS = src/codegen/subst_tags.re CUSTOM_PARSER = src/parse/parser.ypp CUSTOM = \ $(CUSTOM_LEX) \ $(CUSTOM_LEX_CONF) \ $(CUSTOM_PARSER) \ - $(CUSTOM_PARSEOPT) + $(CUSTOM_PARSEOPT) \ + $(CUSTOM_SUBSTTAGS) # docs SRC_DOC = doc/manpage.rst diff --git a/re2c/bootstrap/src/codegen/subst_tags.cc b/re2c/bootstrap/src/codegen/subst_tags.cc new file mode 100644 index 00000000..1cf27ee1 --- /dev/null +++ b/re2c/bootstrap/src/codegen/subst_tags.cc @@ -0,0 +1,134 @@ +/* Generated by re2c 0.16 on Fri May 13 13:49:35 2016 */ +#line 1 "../src/codegen/subst_tags.re" +#include "src/codegen/emit.h" +#include "src/codegen/input_api.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +std::string subst_tags(const std::string &action, + const std::valarray &tags, size_t ltag, size_t htag) +{ + if (ltag >= htag) { + return action; + } + + std::string result; + const char + *head = action.c_str(), + *last = head + action.length(), + *tail = head, + *base; + + loop: +#line 27 "src/codegen/subst_tags.cc" +{ + char yych; + long yytag1name; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + base = head; + yych = *head; + if (yych <= 0x00) goto yy2; + if (yych == '@') goto yy6; + goto yy4; +yy2: + ++head; +#line 33 "../src/codegen/subst_tags.re" + { + if (base == last) { + result.append(tail, base); + return result; + } + goto loop; + } +#line 80 "src/codegen/subst_tags.cc" +yy4: + ++head; +yy5: +#line 31 "../src/codegen/subst_tags.re" + { goto loop; } +#line 86 "src/codegen/subst_tags.cc" +yy6: + yych = *++head; + if (yych <= 'Z') { + if (yych <= '/') goto yy5; + if (yych <= '9') { + yytag1name = (head - base); + goto yy7; + } + if (yych <= '@') goto yy5; + yytag1name = (head - base); + } else { + if (yych <= '_') { + if (yych <= '^') goto yy5; + yytag1name = (head - base); + } else { + if (yych <= '`') goto yy5; + if (yych >= '{') goto yy5; + yytag1name = (head - base); + } + } +yy7: + ++head; + yych = *head; + if (yybm[0+yych] & 128) { + goto yy7; + } +#line 41 "../src/codegen/subst_tags.re" + { + const std::string name((base + yytag1name), head); + for (size_t i = ltag; i < htag; ++i) { + const Tag &t = tags[i]; + if (name == *t.name) { + result.append(tail, base); + result.append(t.type == Tag::VAR + ? opts->input_api.expr_tag(vartag_expr(t.name, t.rule)) + : opts->input_api.expr_tag_fix(t, tags)); + tail = head; + break; + } + } + goto loop; + } +#line 129 "src/codegen/subst_tags.cc" +} +#line 56 "../src/codegen/subst_tags.re" + +} + +} // namespace re2c diff --git a/re2c/src/codegen/emit.h b/re2c/src/codegen/emit.h index c08af69f..07f06b96 100644 --- a/re2c/src/codegen/emit.h +++ b/re2c/src/codegen/emit.h @@ -17,6 +17,8 @@ void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh, void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags); std::string vartag_name(const std::string *name, size_t rule); std::string vartag_expr(const std::string *name, size_t rule); +std::string subst_tags(const std::string &action, + const std::valarray &tags, size_t ltag, size_t htag); } // namespace re2c diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 990bc18d..eb2efc2f 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -209,18 +209,6 @@ void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, o.wind(ind).ws("}\n"); } -static void subst_tags(std::string &action, - const Rule &rule, const std::valarray &tags) -{ - for (size_t i = rule.ltag; i < rule.htag; ++i) { - const Tag &tag = tags[i]; - const std::string s = tag.type == Tag::VAR - ? opts->input_api.expr_tag(vartag_expr(tag.name, tag.rule)) - : opts->input_api.expr_tag_fix(tag, tags); - strrreplace(action, "@" + *tag.name, s); - } -} - void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) { const Rule &rule = dfa.rules[rule_idx]; @@ -252,8 +240,7 @@ void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) if (!yySetupRule.empty()) { o.wind(ind).wstring(yySetupRule).ws("\n"); } - std::string action = code->text; - subst_tags(action, rule, dfa.tags); + const std::string action = subst_tags(code->text, dfa.tags, rule.ltag, rule.htag); o.wline_info(code->loc.line, code->loc.filename.c_str()) .wind(ind).wstring(action).ws("\n") .wdelay_line_info(); diff --git a/re2c/src/codegen/subst_tags.re b/re2c/src/codegen/subst_tags.re new file mode 100644 index 00000000..940aa8a8 --- /dev/null +++ b/re2c/src/codegen/subst_tags.re @@ -0,0 +1,59 @@ +#include "src/codegen/emit.h" +#include "src/codegen/input_api.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +std::string subst_tags(const std::string &action, + const std::valarray &tags, size_t ltag, size_t htag) +{ + if (ltag >= htag) { + return action; + } + + std::string result; + const char + *head = action.c_str(), + *last = head + action.length(), + *tail = head, + *base; + + loop: /*!re2c + + re2c:define:YYCTYPE = char; + re2c:define:YYCURSOR = head; + re2c:define:YYCTXMARKER = base; + re2c:yyfill:enable = 0; + re2c:flags:tags = 1; + + * { goto loop; } + + "\x00" { + if (base == last) { + result.append(tail, base); + return result; + } + goto loop; + } + + "@" @name [a-zA-Z0-9_]+ { + const std::string name(@name, head); + for (size_t i = ltag; i < htag; ++i) { + const Tag &t = tags[i]; + if (name == *t.name) { + result.append(tail, base); + result.append(t.type == Tag::VAR + ? opts->input_api.expr_tag(vartag_expr(t.name, t.rule)) + : opts->input_api.expr_tag_fix(t, tags)); + tail = head; + break; + } + } + goto loop; + } + */ +} + +} // namespace re2c diff --git a/re2c/test/tags/subst.i--tags.c b/re2c/test/tags/subst.i--tags.c new file mode 100644 index 00000000..0a70510b --- /dev/null +++ b/re2c/test/tags/subst.i--tags.c @@ -0,0 +1,90 @@ +/* Generated by re2c */ +// Be careful with substitution of tag names: +// if one tag's name is a prefix of another tag's name, +// tag with longer name must be substituted first. + + +{ + YYCTYPE yych; + long yytag0p; + long yytag0p1; + long yytag0p12; + long yytag0p123; + YYCTXMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy4; + default: goto yy2; + } +yy2: + ++YYCURSOR; +yy3: + {} +yy4: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'a': + case 'b': goto yy6; + default: goto yy3; + } +yy5: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy6: + switch (yych) { + case 'a': goto yy5; + case 'b': + yytag0p12 = (YYCURSOR - YYCTXMARKER); + goto yy8; + default: goto yy7; + } +yy7: + YYCURSOR = YYMARKER; + goto yy3; +yy8: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy8; + case 'c': + yytag0p123 = (YYCURSOR - YYCTXMARKER); + goto yy10; + default: goto yy7; + } +yy10: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'c': goto yy10; + case 'd': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy12; + default: goto yy7; + } +yy12: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'd': goto yy12; + case 'e': + yytag0p1 = (YYCURSOR - YYCTXMARKER); + goto yy14; + default: goto yy7; + } +yy14: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'e': goto yy14; + default: goto yy16; + } +yy16: + { (YYCTXMARKER + yytag0p) (YYCTXMARKER + yytag0p1) (YYCTXMARKER + yytag0p12) (YYCTXMARKER + yytag0p123) } +} + diff --git a/re2c/test/tags/subst.i--tags.re b/re2c/test/tags/subst.i--tags.re new file mode 100644 index 00000000..efc3b7ca --- /dev/null +++ b/re2c/test/tags/subst.i--tags.re @@ -0,0 +1,8 @@ +// Be careful with substitution of tag names: +// if one tag's name is a prefix of another tag's name, +// tag with longer name must be substituted first. + +/*!re2c + "a"+ @p12 "b"+ @p123 "c"+ @p "d"+ @p1 "e"+ { @p @p1 @p12 @p123 } + * {} +*/ -- 2.40.0