From fc93270e94c8c411f36a42dbaae1f8fd65dcf78f Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sun, 5 Mar 2017 11:43:26 +0000 Subject: [PATCH] Renamed 'RegExp' to 'AST'. --- re2c/Makefile.am | 4 +- re2c/bootstrap/src/ast/lex.cc | 34 +++--- re2c/bootstrap/src/ast/parser.cc | 42 +++---- re2c/bootstrap/src/ast/y.tab.h | 2 +- re2c/src/ast/ast.cc | 145 ++++++++++++++++++++++++ re2c/src/ast/ast.h | 88 +++++++++++++++ re2c/src/ast/lex.re | 32 +++--- re2c/src/ast/parser.h | 6 +- re2c/src/ast/parser.ypp | 42 +++---- re2c/src/ast/regexp.cc | 34 ------ re2c/src/ast/regexp.h | 176 ----------------------------- re2c/src/ast/scanner.h | 6 +- re2c/src/code/emit_action.cc | 1 - re2c/src/compile.cc | 4 +- re2c/src/dfa/cfg/liveanal.cc | 1 + re2c/src/dfa/determinization.cc | 1 - re2c/src/dfa/dfa.h | 1 - re2c/src/nfa/estimate_size.cc | 2 +- re2c/src/nfa/re_to_nfa.cc | 2 +- re2c/src/re/ast_to_re.cc | 54 ++++----- re2c/src/re/re.h | 4 +- re2c/src/skeleton/generate_data.cc | 1 + re2c/src/skeleton/skeleton.cc | 1 + 23 files changed, 353 insertions(+), 330 deletions(-) create mode 100644 re2c/src/ast/ast.cc create mode 100644 re2c/src/ast/ast.h delete mode 100644 re2c/src/ast/regexp.cc delete mode 100644 re2c/src/ast/regexp.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index c5a3f1f9..b9860b8b 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -47,10 +47,10 @@ SRC_HDR = \ src/tcmd.h \ src/skeleton/path.h \ src/skeleton/skeleton.h \ + src/ast/ast.h \ src/ast/extop.h \ src/ast/input.h \ src/ast/parser.h \ - src/ast/regexp.h \ src/ast/rules.h \ src/ast/scanner.h \ src/ast/unescape.h \ @@ -135,8 +135,8 @@ SRC = \ src/tag.cc \ src/tcmd.cc \ src/main.cc \ + src/ast/ast.cc \ src/ast/input.cc \ - src/ast/regexp.cc \ src/ast/scanner.cc \ src/ast/unescape.cc \ src/util/s_to_n32_unsafe.cc \ diff --git a/re2c/bootstrap/src/ast/lex.cc b/re2c/bootstrap/src/ast/lex.cc index fc35a4c6..9807588f 100644 --- a/re2c/bootstrap/src/ast/lex.cc +++ b/re2c/bootstrap/src/ast/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Sun Mar 5 10:37:44 2017 */ +/* Generated by re2c 0.16 on Sun Mar 5 11:37:40 2017 */ #line 1 "../src/ast/lex.re" #include "src/util/c99_stdint.h" #include @@ -11,7 +11,7 @@ #include "src/re/encoding/enc.h" #include "src/ast/extop.h" #include "src/ast/input.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/ast/scanner.h" #include "src/ast/parser.h" // needed by "y.tab.h" #include "src/ast/unescape.h" @@ -1050,7 +1050,7 @@ yy175: ++YYCURSOR; #line 358 "../src/ast/lex.re" { - yylval.regexp = RegExp::make_dot(cline, get_column()); + yylval.regexp = ast_dot(cline, get_column()); return TOKEN_REGEXP; } #line 1057 "src/ast/lex.cc" @@ -1210,17 +1210,17 @@ yy198: yylval.str = new std::string (tok, tok_len()); return TOKEN_ID; } else { - const RegExp *r = NULL; + const AST *r = NULL; const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; for (char *s = tok; s < cur; ++s) { const uint32_t c = static_cast(*s), column = static_cast(s - pos); - r = RegExp::make_cat(r, casing - ? RegExp::make_ichar(cline, column, c) - : RegExp::make_schar(cline, column, c)); + r = ast_cat(r, casing + ? ast_ichar(cline, column, c) + : ast_schar(cline, column, c)); } - yylval.regexp = r ? r : RegExp::make_nil(cline, get_column()); + yylval.regexp = r ? r : ast_nil(cline, get_column()); return TOKEN_REGEXP; } } @@ -1284,7 +1284,7 @@ yy208: fatal("tags are only allowed with '-T, --tags' option"); } const std::string *name = new std::string(tok + 1, tok_len() - 1); - yylval.regexp = RegExp::make_tag(cline, get_column(), name); + yylval.regexp = ast_tag(cline, get_column(), name); return TOKEN_REGEXP; } #line 1291 "src/ast/lex.cc" @@ -2208,7 +2208,7 @@ static void escape (std::string & dest, const std::string & src) } } -const RegExp *Scanner::lex_cls(bool neg) +const AST *Scanner::lex_cls(bool neg) { const uint32_t column = get_column(); Range *r = NULL, *s; @@ -2275,7 +2275,7 @@ end: if (neg) { r = Range::sub(opts->encoding.fullRange(), r); } - return RegExp::make_class(cline, column, r); + return ast_class(cline, column, r); } uint32_t Scanner::lex_cls_chr() @@ -2853,17 +2853,17 @@ yy455: } -const RegExp *Scanner::lex_str(char quote, bool casing) +const AST *Scanner::lex_str(char quote, bool casing) { - const RegExp *r = NULL; + const AST *r = NULL; for (bool end;;) { const uint32_t c = lex_str_chr(quote, end); if (end) { - return r ? r : RegExp::make_nil(cline, get_column()); + return r ? r : ast_nil(cline, get_column()); } - r = RegExp::make_cat(r, casing - ? RegExp::make_ichar(cline, get_column(), c) - : RegExp::make_schar(cline, get_column(), c)); + r = ast_cat(r, casing + ? ast_ichar(cline, get_column(), c) + : ast_schar(cline, get_column(), c)); } } diff --git a/re2c/bootstrap/src/ast/parser.cc b/re2c/bootstrap/src/ast/parser.cc index b1ca22c5..4e80ea59 100644 --- a/re2c/bootstrap/src/ast/parser.cc +++ b/re2c/bootstrap/src/ast/parser.cc @@ -86,7 +86,7 @@ #include "src/skeleton/skeleton.h" #include "src/ast/extop.h" #include "src/ast/parser.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/ast/scanner.h" #include "src/util/free_list.h" #include "src/util/range.h" @@ -207,8 +207,8 @@ static void prepare(specs_t &specs) for (i = b; i != e; ++i) { if (!i->defs.empty()) { const Code *c = i->defs[0]; - const RegExp *r = RegExp::make_default(c->fline, 0); - i->rules.push_back(RegExpRule(r, c)); + const AST *r = ast_default(c->fline, 0); + i->rules.push_back(ASTRule(r, c)); } } @@ -283,7 +283,7 @@ union YYSTYPE { - const re2c::RegExp * regexp; + const re2c::AST * regexp; re2c::Code * code; char op; re2c::ExtOp extop; @@ -1463,7 +1463,7 @@ yyreduce: case 12: { - find(context.specs, "").rules.push_back(RegExpRule((yyvsp[-1].regexp), (yyvsp[0].code))); + find(context.specs, "").rules.push_back(ASTRule((yyvsp[-1].regexp), (yyvsp[0].code))); } break; @@ -1480,7 +1480,7 @@ yyreduce: { for(CondList::const_iterator i = (yyvsp[-3].clist)->begin(); i != (yyvsp[-3].clist)->end(); ++i) { - find(context.specs, *i).rules.push_back(RegExpRule((yyvsp[-1].regexp), (yyvsp[0].code))); + find(context.specs, *i).rules.push_back(ASTRule((yyvsp[-1].regexp), (yyvsp[0].code))); } delete (yyvsp[-3].clist); } @@ -1512,8 +1512,8 @@ yyreduce: case 17: { - const RegExp *r = RegExp::make_nil(context.input.get_cline(), 0); - find(context.specs, "0").rules.push_back(RegExpRule(r, (yyvsp[0].code))); + const AST *r = ast_nil(context.input.get_cline(), 0); + find(context.specs, "0").rules.push_back(ASTRule(r, (yyvsp[0].code))); } break; @@ -1570,7 +1570,7 @@ yyreduce: case 26: { - (yyval.regexp) = RegExp::make_cat((yyvsp[-2].regexp), RegExp::make_cat(RegExp::make_tag(context.input.get_cline(), 0, NULL), (yyvsp[0].regexp))); + (yyval.regexp) = ast_cat((yyvsp[-2].regexp), ast_cat(ast_tag(context.input.get_cline(), 0, NULL), (yyvsp[0].regexp))); } break; @@ -1579,7 +1579,7 @@ yyreduce: { if (context.input.opts->posix_captures) { - (yyvsp[0].regexp) = RegExp::make_cap((yyvsp[0].regexp)); + (yyvsp[0].regexp) = ast_cap((yyvsp[0].regexp)); } (yyval.regexp) = (yyvsp[0].regexp); } @@ -1597,7 +1597,7 @@ yyreduce: case 29: { - (yyval.regexp) = RegExp::make_alt((yyvsp[-2].regexp), (yyvsp[0].regexp)); + (yyval.regexp) = ast_alt((yyvsp[-2].regexp), (yyvsp[0].regexp)); } break; @@ -1613,7 +1613,7 @@ yyreduce: case 31: { - (yyval.regexp) = RegExp::make_diff((yyvsp[-2].regexp), (yyvsp[0].regexp)); + (yyval.regexp) = ast_diff((yyvsp[-2].regexp), (yyvsp[0].regexp)); } break; @@ -1629,7 +1629,7 @@ yyreduce: case 33: { - (yyval.regexp) = RegExp::make_cat((yyvsp[-1].regexp), (yyvsp[0].regexp)); + (yyval.regexp) = ast_cat((yyvsp[-1].regexp), (yyvsp[0].regexp)); } break; @@ -1638,9 +1638,9 @@ yyreduce: { switch((yyvsp[0].op)) { - case '*': (yyval.regexp) = RegExp::make_iter((yyvsp[-1].regexp), 0, RegExp::MANY); break; - case '+': (yyval.regexp) = RegExp::make_iter((yyvsp[-1].regexp), 1, RegExp::MANY); break; - case '?': (yyval.regexp) = RegExp::make_iter((yyvsp[-1].regexp), 0, 1); break; + case '*': (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, AST::MANY); break; + case '+': (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 1, AST::MANY); break; + case '?': (yyval.regexp) = ast_iter((yyvsp[-1].regexp), 0, 1); break; } } @@ -1649,7 +1649,7 @@ yyreduce: case 36: { - (yyval.regexp) = RegExp::make_iter((yyvsp[-1].regexp), (yyvsp[0].extop).min, (yyvsp[0].extop).max); + (yyval.regexp) = ast_iter((yyvsp[-1].regexp), (yyvsp[0].extop).min, (yyvsp[0].extop).max); } break; @@ -1686,8 +1686,8 @@ yyreduce: context.input.fatal("can't find symbol"); } (yyval.regexp) = i->second; - if (context.input.opts->posix_captures && RegExp::need_wrap((yyval.regexp))) { - (yyval.regexp) = RegExp::make_ref((yyval.regexp), *(yyvsp[0].str)); + if (context.input.opts->posix_captures && ast_need_wrap((yyval.regexp))) { + (yyval.regexp) = ast_ref((yyval.regexp), *(yyvsp[0].str)); } delete (yyvsp[0].str); } @@ -1699,7 +1699,7 @@ yyreduce: { (yyval.regexp) = (yyvsp[-1].regexp); if (context.input.opts->posix_captures) { - (yyval.regexp) = RegExp::make_cap((yyval.regexp)); + (yyval.regexp) = ast_cap((yyval.regexp)); } } @@ -2025,7 +2025,7 @@ void parse(Scanner &input, Output &output) emit_epilog (o, output.skeletons); } - RegExp::flist.clear(); + AST::flist.clear(); Code::flist.clear(); Range::vFreeList.clear(); RangeSuffix::freeList.clear(); diff --git a/re2c/bootstrap/src/ast/y.tab.h b/re2c/bootstrap/src/ast/y.tab.h index 4bccc679..318b8a28 100644 --- a/re2c/bootstrap/src/ast/y.tab.h +++ b/re2c/bootstrap/src/ast/y.tab.h @@ -62,7 +62,7 @@ union YYSTYPE { - const re2c::RegExp * regexp; + const re2c::AST * regexp; re2c::Code * code; char op; re2c::ExtOp extop; diff --git a/re2c/src/ast/ast.cc b/re2c/src/ast/ast.cc new file mode 100644 index 00000000..bf78d2b3 --- /dev/null +++ b/re2c/src/ast/ast.cc @@ -0,0 +1,145 @@ +#include + +#include "src/ast/ast.h" + +namespace re2c +{ + +free_list AST::flist; + +const uint32_t AST::MANY = std::numeric_limits::max(); + +AST::AST(uint32_t l, uint32_t c, type_t t) + : type(t), line(l), column(c) +{ + flist.insert(this); +} + +AST::~AST() +{ + flist.erase(this); + if (type == TAG) { + delete tag; + } else if (type == REF) { + delete ref.name; + } +} + +const AST *ast_nil(uint32_t l, uint32_t c) +{ + return new AST(l, c, AST::NIL); +} + +const AST *ast_schar(uint32_t l, uint32_t c, uint32_t x) +{ + AST *ast = new AST(l, c, AST::SCHAR); + ast->schar = x; + return ast; +} + +const AST *ast_ichar(uint32_t l, uint32_t c, uint32_t x) +{ + AST *ast = new AST(l, c, AST::ICHAR); + ast->ichar = x; + return ast; +} + +const AST *ast_class(uint32_t l, uint32_t c, const Range *r) +{ + AST *ast = new AST(l, c, AST::CLASS); + ast->cls = r; + return ast; +} + +const AST *ast_dot(uint32_t l, uint32_t c) +{ + return new AST(l, c, AST::DOT); +} + +const AST *ast_default(uint32_t l, uint32_t c) +{ + return new AST(l, c, AST::DEFAULT); +} + +const AST *ast_alt(const AST *a1, const AST *a2) +{ + if (!a1) return a2; + if (!a2) return a1; + AST *ast = new AST(a1->line, a1->column, AST::ALT); + ast->alt.ast1 = a1; + ast->alt.ast2 = a2; + return ast; +} + +const AST *ast_cat(const AST *a1, const AST *a2) +{ + if (!a1) return a2; + if (!a2) return a1; + AST *ast = new AST(a1->line, a1->column, AST::CAT); + ast->cat.ast1 = a1; + ast->cat.ast2 = a2; + return ast; +} + +const AST *ast_iter(const AST *r, uint32_t n, uint32_t m) +{ + AST *ast = new AST(r->line, r->column, AST::ITER); + ast->iter.ast = r; + ast->iter.min = n; + ast->iter.max = m; + return ast; +} + +const AST *ast_diff(const AST *a1, const AST *a2) +{ + AST *ast = new AST(a1->line, a1->column, AST::DIFF); + ast->cat.ast1 = a1; + ast->cat.ast2 = a2; + return ast; +} + +const AST *ast_tag(uint32_t l, uint32_t c, const std::string *t) +{ + AST *ast = new AST(l, c, AST::TAG); + ast->tag = t; + return ast; +} + +const AST *ast_cap(const AST *r) +{ + AST *ast = new AST(r->line, r->column, AST::CAP); + ast->cap = r; + return ast; +} + +const AST *ast_ref(const AST *r, const std::string &n) +{ + AST *ast = new AST(r->line, r->column, AST::REF); + ast->ref.ast = r; + ast->ref.name = new std::string(n); + return ast; +} + +bool ast_need_wrap(const AST *ast) +{ + switch (ast->type) { + case AST::ITER: + case AST::NIL: + case AST::SCHAR: + case AST::ICHAR: + case AST::CLASS: + case AST::DOT: + case AST::DEFAULT: + case AST::TAG: + case AST::CAP: + return false; + case AST::ALT: + case AST::CAT: + case AST::DIFF: + case AST::REF: + return true; + } + assert(false); +} + +} // namespace re2c diff --git a/re2c/src/ast/ast.h b/re2c/src/ast/ast.h new file mode 100644 index 00000000..42c32e89 --- /dev/null +++ b/re2c/src/ast/ast.h @@ -0,0 +1,88 @@ +#ifndef _RE2C_AST_AST_ +#define _RE2C_AST_AST_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/rule.h" +#include "src/util/free_list.h" +#include "src/util/range.h" + +namespace re2c +{ + +/* AST must be immutable and independent of options */ +struct AST +{ + static free_list flist; + static const uint32_t MANY; + + enum type_t + { NIL, SCHAR, ICHAR, CLASS, DOT, DEFAULT + , ALT, CAT, ITER, DIFF, TAG, CAP + , REF } type; + union { + uint32_t schar; + uint32_t ichar; + const Range *cls; + struct { + const AST *ast1; + const AST *ast2; + } alt; + struct { + const AST *ast1; + const AST *ast2; + } cat; + struct { + const AST *ast; + uint32_t min; + uint32_t max; + } iter; + struct { + const AST *ast1; + const AST *ast2; + } diff; + const std::string *tag; + const AST *cap; + struct { + const AST *ast; + const std::string *name; + } ref; + }; + uint32_t line; + uint32_t column; + + AST(uint32_t l, uint32_t c, type_t t); + ~AST(); +}; + +struct ASTRule +{ + const AST *ast; + const Code *code; + + ASTRule(const AST *r, const Code *c) + : ast(r) + , code(c) + {} +}; + +const AST *ast_nil(uint32_t l, uint32_t c); +const AST *ast_schar(uint32_t l, uint32_t c, uint32_t x); +const AST *ast_ichar(uint32_t l, uint32_t c, uint32_t x); +const AST *ast_class(uint32_t l, uint32_t c, const Range *r); +const AST *ast_dot(uint32_t l, uint32_t c); +const AST *ast_default(uint32_t l, uint32_t c); +const AST *ast_alt(const AST *r1, const AST *r2); +const AST *ast_cat(const AST *r1, const AST *r2); +const AST *ast_iter(const AST *r, uint32_t n, uint32_t m); +const AST *ast_diff(const AST *r1, const AST *r2); +const AST *ast_tag(uint32_t l, uint32_t c, const std::string *t); +const AST *ast_cap(const AST *r); +const AST *ast_ref(const AST *r, const std::string &n); +bool ast_need_wrap(const AST *ast); + +} // namespace re2c + +#endif // _RE2C_AST_AST_ diff --git a/re2c/src/ast/lex.re b/re2c/src/ast/lex.re index bfa6d6b3..f74c8bfe 100644 --- a/re2c/src/ast/lex.re +++ b/re2c/src/ast/lex.re @@ -9,7 +9,7 @@ #include "src/re/encoding/enc.h" #include "src/ast/extop.h" #include "src/ast/input.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/ast/scanner.h" #include "src/ast/parser.h" // needed by "y.tab.h" #include "src/ast/unescape.h" @@ -266,7 +266,7 @@ start: fatal("tags are only allowed with '-T, --tags' option"); } const std::string *name = new std::string(tok + 1, tok_len() - 1); - yylval.regexp = RegExp::make_tag(cline, get_column(), name); + yylval.regexp = ast_tag(cline, get_column(), name); return TOKEN_REGEXP; } @@ -340,23 +340,23 @@ start: yylval.str = new std::string (tok, tok_len()); return TOKEN_ID; } else { - const RegExp *r = NULL; + const AST *r = NULL; const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; for (char *s = tok; s < cur; ++s) { const uint32_t c = static_cast(*s), column = static_cast(s - pos); - r = RegExp::make_cat(r, casing - ? RegExp::make_ichar(cline, column, c) - : RegExp::make_schar(cline, column, c)); + r = ast_cat(r, casing + ? ast_ichar(cline, column, c) + : ast_schar(cline, column, c)); } - yylval.regexp = r ? r : RegExp::make_nil(cline, get_column()); + yylval.regexp = r ? r : ast_nil(cline, get_column()); return TOKEN_REGEXP; } } "." { - yylval.regexp = RegExp::make_dot(cline, get_column()); + yylval.regexp = ast_dot(cline, get_column()); return TOKEN_REGEXP; } @@ -547,7 +547,7 @@ static void escape (std::string & dest, const std::string & src) } } -const RegExp *Scanner::lex_cls(bool neg) +const AST *Scanner::lex_cls(bool neg) { const uint32_t column = get_column(); Range *r = NULL, *s; @@ -579,7 +579,7 @@ end: if (neg) { r = Range::sub(opts->encoding.fullRange(), r); } - return RegExp::make_class(cline, column, r); + return ast_class(cline, column, r); } uint32_t Scanner::lex_cls_chr() @@ -644,17 +644,17 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) */ } -const RegExp *Scanner::lex_str(char quote, bool casing) +const AST *Scanner::lex_str(char quote, bool casing) { - const RegExp *r = NULL; + const AST *r = NULL; for (bool end;;) { const uint32_t c = lex_str_chr(quote, end); if (end) { - return r ? r : RegExp::make_nil(cline, get_column()); + return r ? r : ast_nil(cline, get_column()); } - r = RegExp::make_cat(r, casing - ? RegExp::make_ichar(cline, get_column(), c) - : RegExp::make_schar(cline, get_column(), c)); + r = ast_cat(r, casing + ? ast_ichar(cline, get_column(), c) + : ast_schar(cline, get_column(), c)); } } diff --git a/re2c/src/ast/parser.h b/re2c/src/ast/parser.h index 4d988a71..543ad5de 100644 --- a/re2c/src/ast/parser.h +++ b/re2c/src/ast/parser.h @@ -5,7 +5,7 @@ #include #include "src/code/output.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/ast/scanner.h" #include "src/util/smart_ptr.h" @@ -19,7 +19,7 @@ void parse(Scanner &input, Output &output); struct spec_t { std::string name; - std::vector rules; + std::vector rules; std::vector defs; std::vector setup; @@ -29,7 +29,7 @@ struct spec_t typedef std::vector specs_t; typedef std::set CondList; -typedef std::map symtab_t; +typedef std::map symtab_t; typedef std::vector > dfas_t; struct context_t diff --git a/re2c/src/ast/parser.ypp b/re2c/src/ast/parser.ypp index 1d26f041..1d313198 100644 --- a/re2c/src/ast/parser.ypp +++ b/re2c/src/ast/parser.ypp @@ -21,7 +21,7 @@ #include "src/skeleton/skeleton.h" #include "src/ast/extop.h" #include "src/ast/parser.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/ast/scanner.h" #include "src/util/free_list.h" #include "src/util/range.h" @@ -142,8 +142,8 @@ static void prepare(specs_t &specs) for (i = b; i != e; ++i) { if (!i->defs.empty()) { const Code *c = i->defs[0]; - const RegExp *r = RegExp::make_default(c->fline, 0); - i->rules.push_back(RegExpRule(r, c)); + const AST *r = ast_default(c->fline, 0); + i->rules.push_back(ASTRule(r, c)); } } @@ -173,7 +173,7 @@ static spec_t &find(specs_t &specs, const std::string &name) %parse-param {re2c::context_t &context} %union { - const re2c::RegExp * regexp; + const re2c::AST * regexp; re2c::Code * code; char op; re2c::ExtOp extop; @@ -229,7 +229,7 @@ enddef: ';' | TOKEN_FID_END; rule : trailexpr TOKEN_CODE { - find(context.specs, "").rules.push_back(RegExpRule($1, $2)); + find(context.specs, "").rules.push_back(ASTRule($1, $2)); } | '*' TOKEN_CODE { @@ -238,7 +238,7 @@ rule | '<' clist '>' trailexpr ccode { for(CondList::const_iterator i = $2->begin(); i != $2->end(); ++i) { - find(context.specs, *i).rules.push_back(RegExpRule($4, $5)); + find(context.specs, *i).rules.push_back(ASTRule($4, $5)); } delete $2; } @@ -258,8 +258,8 @@ rule } | '<' '>' ccode { - const RegExp *r = RegExp::make_nil(context.input.get_cline(), 0); - find(context.specs, "0").rules.push_back(RegExpRule(r, $3)); + const AST *r = ast_nil(context.input.get_cline(), 0); + find(context.specs, "0").rules.push_back(ASTRule(r, $3)); }; ccode @@ -300,13 +300,13 @@ conds trailexpr : capexpr | capexpr '/' expr { - $$ = RegExp::make_cat($1, RegExp::make_cat(RegExp::make_tag(context.input.get_cline(), 0, NULL), $3)); + $$ = ast_cat($1, ast_cat(ast_tag(context.input.get_cline(), 0, NULL), $3)); }; capexpr : expr { if (context.input.opts->posix_captures) { - $1 = RegExp::make_cap($1); + $1 = ast_cap($1); } $$ = $1; }; @@ -318,7 +318,7 @@ expr: } | expr '|' diff { - $$ = RegExp::make_alt($1, $3); + $$ = ast_alt($1, $3); } ; @@ -329,7 +329,7 @@ diff: } | diff '\\' term { - $$ = RegExp::make_diff($1, $3); + $$ = ast_diff($1, $3); } ; @@ -340,7 +340,7 @@ term: } | term factor { - $$ = RegExp::make_cat($1, $2); + $$ = ast_cat($1, $2); } ; @@ -348,12 +348,12 @@ factor : primary | primary closes { switch($2) { - case '*': $$ = RegExp::make_iter($1, 0, RegExp::MANY); break; - case '+': $$ = RegExp::make_iter($1, 1, RegExp::MANY); break; - case '?': $$ = RegExp::make_iter($1, 0, 1); break; + case '*': $$ = ast_iter($1, 0, AST::MANY); break; + case '+': $$ = ast_iter($1, 1, AST::MANY); break; + case '?': $$ = ast_iter($1, 0, 1); break; } } | primary TOKEN_CLOSESIZE { - $$ = RegExp::make_iter($1, $2.min, $2.max); + $$ = ast_iter($1, $2.min, $2.max); }; closes @@ -375,14 +375,14 @@ primary context.input.fatal("can't find symbol"); } $$ = i->second; - if (context.input.opts->posix_captures && RegExp::need_wrap($$)) { - $$ = RegExp::make_ref($$, *$1); + if (context.input.opts->posix_captures && ast_need_wrap($$)) { + $$ = ast_ref($$, *$1); } delete $1; } | '(' expr ')' { $$ = $2; if (context.input.opts->posix_captures) { - $$ = RegExp::make_cap($$); + $$ = ast_cap($$); } }; @@ -476,7 +476,7 @@ void parse(Scanner &input, Output &output) emit_epilog (o, output.skeletons); } - RegExp::flist.clear(); + AST::flist.clear(); Code::flist.clear(); Range::vFreeList.clear(); RangeSuffix::freeList.clear(); diff --git a/re2c/src/ast/regexp.cc b/re2c/src/ast/regexp.cc deleted file mode 100644 index f516fbe0..00000000 --- a/re2c/src/ast/regexp.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include - -#include "src/ast/regexp.h" - -namespace re2c -{ - -free_list RegExp::flist; - -const uint32_t RegExp::MANY = std::numeric_limits::max(); - -bool RegExp::need_wrap(const RegExp *re) -{ - switch (re->type) { - case RegExp::ITER: - case RegExp::NIL: - case RegExp::SCHAR: - case RegExp::ICHAR: - case RegExp::CLASS: - case RegExp::DOT: - case RegExp::DEFAULT: - case RegExp::TAG: - case RegExp::CAP: - return false; - case RegExp::ALT: - case RegExp::CAT: - case RegExp::DIFF: - case RegExp::REF: - return true; - } - assert(false); -} - -} // namespace re2c diff --git a/re2c/src/ast/regexp.h b/re2c/src/ast/regexp.h deleted file mode 100644 index 9ea7297e..00000000 --- a/re2c/src/ast/regexp.h +++ /dev/null @@ -1,176 +0,0 @@ -#ifndef _RE2C_AST_REGEXP_ -#define _RE2C_AST_REGEXP_ - -#include "src/util/c99_stdint.h" -#include -#include -#include - -#include "src/conf/warn.h" -#include "src/rule.h" -#include "src/util/free_list.h" -#include "src/util/range.h" - -namespace re2c -{ - -struct Opt; - -struct RegExp -{ - static free_list flist; - static const uint32_t MANY; - - enum type_t {NIL, SCHAR, ICHAR, CLASS, DOT, DEFAULT, ALT, CAT, ITER, DIFF, TAG, CAP, REF} type; - union - { - uint32_t schar; - uint32_t ichar; - const Range *cls; - struct - { - const RegExp *re1; - const RegExp *re2; - } alt; - struct - { - const RegExp *re1; - const RegExp *re2; - } cat; - struct - { - const RegExp *re; - uint32_t min; - uint32_t max; - } iter; - struct - { - const RegExp *re1; - const RegExp *re2; - } diff; - const std::string *tag; - const RegExp *cap; - struct - { - const RegExp *re; - const std::string *name; - } ref; - }; - uint32_t line; - uint32_t column; - - static const RegExp *make_nil(uint32_t l, uint32_t c) - { - return new RegExp(l, c, NIL); - } - static const RegExp *make_schar(uint32_t l, uint32_t c, uint32_t x) - { - RegExp *re = new RegExp(l, c, SCHAR); - re->schar = x; - return re; - } - static const RegExp *make_ichar(uint32_t l, uint32_t c, uint32_t x) - { - RegExp *re = new RegExp(l, c, ICHAR); - re->ichar = x; - return re; - } - static const RegExp *make_class(uint32_t l, uint32_t c, const Range *r) - { - RegExp *re = new RegExp(l, c, CLASS); - re->cls = r; - return re; - } - static const RegExp *make_dot(uint32_t l, uint32_t c) - { - return new RegExp(l, c, DOT); - } - static const RegExp *make_default(uint32_t l, uint32_t c) - { - return new RegExp(l, c, DEFAULT); - } - static const RegExp *make_alt(const RegExp *r1, const RegExp *r2) - { - if (!r1) return r2; - if (!r2) return r1; - RegExp *re = new RegExp(r1->line, r1->column, ALT); - re->alt.re1 = r1; - re->alt.re2 = r2; - return re; - } - static const RegExp *make_cat(const RegExp *r1, const RegExp *r2) - { - if (!r1) return r2; - if (!r2) return r1; - RegExp *re = new RegExp(r1->line, r1->column, CAT); - re->cat.re1 = r1; - re->cat.re2 = r2; - return re; - } - static const RegExp *make_iter(const RegExp *r, uint32_t n, uint32_t m) - { - RegExp *re = new RegExp(r->line, r->column, ITER); - re->iter.re = r; - re->iter.min = n; - re->iter.max = m; - return re; - } - static const RegExp *make_diff(const RegExp *r1, const RegExp *r2) - { - RegExp *re = new RegExp(r1->line, r1->column, DIFF); - re->cat.re1 = r1; - re->cat.re2 = r2; - return re; - } - static const RegExp *make_tag(uint32_t l, uint32_t c, const std::string *t) - { - RegExp *re = new RegExp(l, c, TAG); - re->tag = t; - return re; - } - static const RegExp *make_cap(const RegExp *r) - { - RegExp *re = new RegExp(r->line, r->column, CAP); - re->cap = r; - return re; - } - static const RegExp *make_ref(const RegExp *r, const std::string &n) - { - RegExp *re = new RegExp(r->line, r->column, REF); - re->ref.re = r; - re->ref.name = new std::string(n); - return re; - } - inline ~RegExp() - { - flist.erase(this); - if (type == TAG) { - delete tag; - } else if (type == REF) { - delete ref.name; - } - } - static bool need_wrap(const RegExp *re); - -private: - inline RegExp(uint32_t l, uint32_t c, type_t t) - : type(t), line(l), column(c) - { - flist.insert(this); - } -}; - -struct RegExpRule -{ - const RegExp *re; - const Code *code; - - RegExpRule(const RegExp *r, const Code *c) - : re(r) - , code(c) - {} -}; - -} // end namespace re2c - -#endif // _RE2C_AST_REGEXP_ diff --git a/re2c/src/ast/scanner.h b/re2c/src/ast/scanner.h index 9262608d..e34e743e 100644 --- a/re2c/src/ast/scanner.h +++ b/re2c/src/ast/scanner.h @@ -15,7 +15,7 @@ namespace re2c { class Range; -struct RegExp; +struct AST; class OutputFile; struct ScannerState @@ -65,8 +65,8 @@ private: void set_sourceline (); uint32_t lex_cls_chr(); uint32_t lex_str_chr(char quote, bool &end); - const RegExp *lex_cls(bool neg); - const RegExp *lex_str(char quote, bool casing); + const AST *lex_cls(bool neg); + const AST *lex_str(char quote, bool casing); void lex_conf (); void lex_conf_encoding_policy(); void lex_conf_input(); diff --git a/re2c/src/code/emit_action.cc b/re2c/src/code/emit_action.cc index 19941cb2..2be491ad 100644 --- a/re2c/src/code/emit_action.cc +++ b/re2c/src/code/emit_action.cc @@ -9,7 +9,6 @@ #include "src/adfa/action.h" #include "src/adfa/adfa.h" #include "src/skeleton/skeleton.h" -#include "src/ast/regexp.h" #include "src/util/string_utils.h" namespace re2c diff --git a/re2c/src/compile.cc b/re2c/src/compile.cc index aaebe305..6e099e88 100644 --- a/re2c/src/compile.cc +++ b/re2c/src/compile.cc @@ -10,7 +10,7 @@ #include "src/dfa/dump.h" #include "src/nfa/nfa.h" #include "src/skeleton/skeleton.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" namespace re2c { @@ -31,7 +31,7 @@ smart_ptr compile(const spec_t &spec, Output &output) { const opt_t *opts = output.source.block().opts; Warn &warn = output.source.warn; - const std::vector &rules = spec.rules; + const std::vector &rules = spec.rules; const size_t defrule = spec.defs.empty() ? Rule::NONE : rules.size() - 1; diff --git a/re2c/src/dfa/cfg/liveanal.cc b/re2c/src/dfa/cfg/liveanal.cc index 11570446..e662edca 100644 --- a/re2c/src/dfa/cfg/liveanal.cc +++ b/re2c/src/dfa/cfg/liveanal.cc @@ -1,3 +1,4 @@ +#include #include #include "src/dfa/cfg/cfg.h" diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 2c20494e..1b9acd85 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -8,7 +8,6 @@ #include "src/dfa/dump.h" #include "src/dfa/find_state.h" #include "src/nfa/nfa.h" -#include "src/ast/regexp.h" #include "src/util/range.h" namespace re2c diff --git a/re2c/src/dfa/dfa.h b/re2c/src/dfa/dfa.h index c915b42d..403d2427 100644 --- a/re2c/src/dfa/dfa.h +++ b/re2c/src/dfa/dfa.h @@ -11,7 +11,6 @@ #include "src/tag.h" #include "src/tcmd.h" #include "src/dfa/tagpool.h" -#include "src/ast/regexp.h" #include "src/util/forbid_copy.h" namespace re2c diff --git a/re2c/src/nfa/estimate_size.cc b/re2c/src/nfa/estimate_size.cc index 5694f259..1930eceb 100644 --- a/re2c/src/nfa/estimate_size.cc +++ b/re2c/src/nfa/estimate_size.cc @@ -21,7 +21,7 @@ static size_t estimate(const RE *re) iter = estimate(re->iter.re), min = re->iter.min, max = re->iter.max; - return max == RegExp::MANY + return max == AST::MANY ? iter * min + 1 : iter * max + (max - min); } diff --git a/re2c/src/nfa/re_to_nfa.cc b/re2c/src/nfa/re_to_nfa.cc index 4dbd3be7..f80e8b1d 100644 --- a/re2c/src/nfa/re_to_nfa.cc +++ b/re2c/src/nfa/re_to_nfa.cc @@ -51,7 +51,7 @@ static nfa_state_t *re_to_nfa(nfa_t &nfa, size_t nrule, const RE *re, nfa_state_ max = re->iter.max; const RE *iter = re->iter.re; // see note [counted repetition and iteration expansion] - if (max == RegExp::MANY) { + if (max == AST::MANY) { nfa_state_t *q = &nfa.states[nfa.size++]; s = re_to_nfa(nfa, nrule, iter, q); q->make_alt(nrule, s, t); diff --git a/re2c/src/re/ast_to_re.cc b/re2c/src/re/ast_to_re.cc index 14fbd43f..2044c43f 100644 --- a/re2c/src/re/ast_to_re.cc +++ b/re2c/src/re/ast_to_re.cc @@ -20,7 +20,7 @@ namespace re2c { * (the way invalid code points are treated). */ -static RE *ast_to_re(RESpec &spec, const RegExp *ast, size_t &ncap) +static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) { RE::alc_t &alc = spec.alc; std::vector &tags = spec.tags; @@ -29,26 +29,26 @@ static RE *ast_to_re(RESpec &spec, const RegExp *ast, size_t &ncap) switch (ast->type) { default: assert(false); - case RegExp::NIL: + case AST::NIL: return re_nil(alc); - case RegExp::ALT: { - RE *x = ast_to_re(spec, ast->alt.re1, ncap); - RE *y = ast_to_re(spec, ast->alt.re2, ncap); + case AST::ALT: { + RE *x = ast_to_re(spec, ast->alt.ast1, ncap); + RE *y = ast_to_re(spec, ast->alt.ast2, ncap); return re_alt(alc, x, y); } - case RegExp::CAT: { - RE *x = ast_to_re(spec, ast->cat.re1, ncap); - RE *y = ast_to_re(spec, ast->cat.re2, ncap); + case AST::CAT: { + RE *x = ast_to_re(spec, ast->cat.ast1, ncap); + RE *y = ast_to_re(spec, ast->cat.ast2, ncap); return re_cat(alc, x, y); } - case RegExp::TAG: { + case AST::TAG: { RE *t = re_tag(alc, tags.size(), false); tags.push_back(Tag(ast->tag)); return t; } - case RegExp::CAP: { - const RegExp *x = ast->cap; - if (x->type == RegExp::REF) x = x->ref.re; + case AST::CAP: { + const AST *x = ast->cap; + if (x->type == AST::REF) x = x->ref.ast; RE *t1 = re_tag(alc, tags.size(), false); tags.push_back(Tag(3 * ncap)); @@ -59,22 +59,22 @@ static RE *ast_to_re(RESpec &spec, const RegExp *ast, size_t &ncap) ++ncap; return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap), t2)); } - case RegExp::REF: + case AST::REF: error("implicit grouping is forbidden with '--posix-captures'" " option, please wrap '%s' in capturing parenthesis", ast->ref.name->c_str()); exit(1); - case RegExp::ITER: { + case AST::ITER: { const uint32_t n = ast->iter.min, n1 = std::max(n, 1u), m = std::max(n, ast->iter.max); - const RegExp *x = ast->iter.re; + const AST *x = ast->iter.ast; RE *t1 = NULL, *t2 = NULL, *t3 = NULL; - if (x->type == RegExp::CAP) { + if (x->type == AST::CAP) { x = x->cap; - if (x->type == RegExp::REF) x = x->ref.re; + if (x->type == AST::REF) x = x->ref.ast; t1 = re_tag(alc, tags.size(), false); tags.push_back(Tag(3 * ncap)); @@ -102,21 +102,21 @@ static RE *ast_to_re(RESpec &spec, const RegExp *ast, size_t &ncap) } return y; } - case RegExp::SCHAR: + case AST::SCHAR: return re_schar(alc, ast->line, ast->column, ast->schar, opts); - case RegExp::ICHAR: + case AST::ICHAR: return re_ichar(alc, ast->line, ast->column, ast->ichar, opts); - case RegExp::CLASS: + case AST::CLASS: return re_class(alc, ast->line, ast->column, ast->cls, opts, warn); - case RegExp::DIFF: { - RE *x = ast_to_re(spec, ast->diff.re1, ncap); - RE *y = ast_to_re(spec, ast->diff.re2, ncap); + case AST::DIFF: { + RE *x = ast_to_re(spec, ast->diff.ast1, ncap); + RE *y = ast_to_re(spec, ast->diff.ast2, ncap); if (x->type != RE::SYM || y->type != RE::SYM) { fatal_error(ast->line, ast->column, "can only difference char sets"); } return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn); } - case RegExp::DOT: { + case AST::DOT: { uint32_t c = '\n'; if (!opts->encoding.encode(c)) { fatal_error(ast->line, ast->column, "bad code point: '0x%X'", c); @@ -124,7 +124,7 @@ static RE *ast_to_re(RESpec &spec, const RegExp *ast, size_t &ncap) return re_class(alc, ast->line, ast->column, Range::sub(opts->encoding.fullRange(), Range::sym(c)), opts, warn); } - case RegExp::DEFAULT: + case AST::DEFAULT: // see note [default regexp] return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits())); } @@ -201,7 +201,7 @@ static void init_rule(Rule &rule, const Code *code, const std::vector &tags assert_tags_used_once(rule, tags); } -RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) +RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) : alc() , res() , charset(*new std::vector) @@ -212,7 +212,7 @@ RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) { for (size_t i = 0; i < ast.size(); ++i) { size_t ltag = tags.size(), ncap = 0; - res.push_back(ast_to_re(*this, ast[i].re, ncap)); + res.push_back(ast_to_re(*this, ast[i].ast, ncap)); init_rule(rules[i], ast[i].code, tags, ltag, ncap); } } diff --git a/re2c/src/re/re.h b/re2c/src/re/re.h index 2d962fd3..146b42d2 100644 --- a/re2c/src/re/re.h +++ b/re2c/src/re/re.h @@ -5,7 +5,7 @@ #include "src/conf/opt.h" #include "src/rule.h" -#include "src/ast/regexp.h" +#include "src/ast/ast.h" #include "src/util/forbid_copy.h" #include "src/util/range.h" #include "src/util/slab_allocator.h" @@ -49,7 +49,7 @@ struct RESpec const opt_t *opts; Warn &warn; - explicit RESpec(const std::vector &ast, const opt_t *o, Warn &w); + explicit RESpec(const std::vector &ast, const opt_t *o, Warn &w); FORBID_COPY(RESpec); }; diff --git a/re2c/src/skeleton/generate_data.cc b/re2c/src/skeleton/generate_data.cc index b8932d2f..977deaaf 100644 --- a/re2c/src/skeleton/generate_data.cc +++ b/re2c/src/skeleton/generate_data.cc @@ -1,4 +1,5 @@ #include "src/util/c99_stdint.h" +#include #include #include #include diff --git a/re2c/src/skeleton/skeleton.cc b/re2c/src/skeleton/skeleton.cc index 8a93b009..098a4ed2 100644 --- a/re2c/src/skeleton/skeleton.cc +++ b/re2c/src/skeleton/skeleton.cc @@ -1,3 +1,4 @@ +#include #include #include -- 2.40.0