From: Ulya Trofimovich Date: Mon, 22 Feb 2016 13:03:38 +0000 (+0000) Subject: Code cleanup: replaced Regexp inheritance hierarchy with tagged union. X-Git-Tag: 1.0~39^2~367 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2f643f40faeab9ebf0d780e6d0c205b4949ffb7d;p=re2c Code cleanup: replaced Regexp inheritance hierarchy with tagged union. --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index c058e01a..563983b8 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -34,13 +34,7 @@ SRC_HDR = \ src/ir/regexp/encoding/utf16/utf16_regexp.h \ src/ir/regexp/encoding/utf16/utf16.h \ src/ir/regexp/empty_class_policy.h \ - src/ir/regexp/regexp_alt.h \ - src/ir/regexp/regexp_match.h \ - src/ir/regexp/regexp_rule.h \ - src/ir/regexp/regexp_cat.h \ - src/ir/regexp/regexp_null.h \ src/ir/regexp/regexp.h \ - src/ir/regexp/regexp_close.h \ src/ir/compile.h \ src/ir/rule_rank.h \ src/ir/skeleton/path.h \ @@ -85,15 +79,12 @@ SRC = \ src/conf/msg.cc \ src/conf/opt.cc \ src/conf/warn.cc \ - src/ir/nfa/calc_size.cc \ src/ir/nfa/nfa.cc \ - src/ir/nfa/split.cc \ src/ir/adfa/adfa.cc \ src/ir/adfa/prepare.cc \ src/ir/dfa/determinization.cc \ src/ir/dfa/fillpoints.cc \ src/ir/dfa/minimization.cc \ - src/ir/regexp/display.cc \ src/ir/regexp/encoding/enc.cc \ src/ir/regexp/encoding/range_suffix.cc \ src/ir/regexp/encoding/utf8/utf8_regexp.cc \ @@ -102,9 +93,9 @@ SRC = \ src/ir/regexp/encoding/utf16/utf16_regexp.cc \ src/ir/regexp/encoding/utf16/utf16.cc \ src/ir/regexp/encoding/utf16/utf16_range.cc \ - src/ir/regexp/fixed_length.cc \ src/ir/regexp/nullable.cc \ src/ir/regexp/regexp.cc \ + src/ir/regexp/split_charset.cc \ src/ir/compile.cc \ src/ir/rule_rank.cc \ src/ir/skeleton/control_flow.cc \ diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index 514e1f5c..c9ce0c2c 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Sat Feb 20 16:51:22 2016 */ +/* Generated by re2c 0.16 on Mon Feb 22 12:14:07 2016 */ #line 1 "../src/parse/lex.re" #include "src/util/c99_stdint.h" #include @@ -13,7 +13,6 @@ #include "src/globals.h" #include "src/ir/regexp/encoding/enc.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_null.h" #include "src/parse/code.h" #include "src/parse/extop.h" #include "src/parse/input.h" @@ -39,10 +38,10 @@ namespace re2c // source code is in ASCII: pointers have type 'char *' // but re2c makes an implicit assumption that YYCTYPE is unsigned // when it generates comparisons -#line 42 "../src/parse/lex.re" +#line 41 "../src/parse/lex.re" -#line 62 "../src/parse/lex.re" +#line 61 "../src/parse/lex.re" Scanner::ParseMode Scanner::echo() @@ -58,7 +57,7 @@ Scanner::ParseMode Scanner::echo() tok = cur; echo: -#line 62 "src/parse/lex.cc" +#line 61 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -116,7 +115,7 @@ echo: } } ++YYCURSOR; -#line 202 "../src/parse/lex.re" +#line 201 "../src/parse/lex.re" { if (!ignore_eoc && opts->target == opt_t::CODE) { @@ -128,15 +127,15 @@ echo: return Stop; } } -#line 132 "src/parse/lex.cc" +#line 131 "src/parse/lex.cc" yy4: ++YYCURSOR; yy5: -#line 213 "../src/parse/lex.re" +#line 212 "../src/parse/lex.re" { goto echo; } -#line 140 "src/parse/lex.cc" +#line 139 "src/parse/lex.cc" yy6: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -145,7 +144,7 @@ yy6: } if (yych == '#') goto yy14; yy7: -#line 189 "../src/parse/lex.re" +#line 188 "../src/parse/lex.re" { if (ignore_eoc) { @@ -159,7 +158,7 @@ yy7: cline++; goto echo; } -#line 163 "src/parse/lex.cc" +#line 162 "src/parse/lex.cc" yy8: yych = (YYCTYPE)*++YYCURSOR; if (yych == '{') goto yy16; @@ -206,7 +205,7 @@ yy14: } yy16: ++YYCURSOR; -#line 78 "../src/parse/lex.re" +#line 77 "../src/parse/lex.re" { if (opts->rFlag) { @@ -222,14 +221,14 @@ yy16: tok = cur; return Parse; } -#line 226 "src/parse/lex.cc" +#line 225 "src/parse/lex.cc" yy18: yyaccept = 2; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych == '\n') goto yy22; if (yych == '\r') goto yy24; yy19: -#line 168 "../src/parse/lex.re" +#line 167 "../src/parse/lex.re" { if (ignore_eoc) { @@ -247,7 +246,7 @@ yy19: tok = pos = cur; goto echo; } -#line 251 "src/parse/lex.cc" +#line 250 "src/parse/lex.cc" yy20: yych = (YYCTYPE)*++YYCURSOR; if (yych == '!') goto yy25; @@ -258,7 +257,7 @@ yy21: goto yy13; yy22: ++YYCURSOR; -#line 150 "../src/parse/lex.re" +#line 149 "../src/parse/lex.re" { cline++; if (ignore_eoc) @@ -277,7 +276,7 @@ yy22: tok = pos = cur; goto echo; } -#line 281 "src/parse/lex.cc" +#line 280 "src/parse/lex.cc" yy24: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy22; @@ -482,12 +481,12 @@ yy65: yy67: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 185 "../src/parse/lex.re" +#line 184 "../src/parse/lex.re" { set_sourceline (); goto echo; } -#line 491 "src/parse/lex.cc" +#line 490 "src/parse/lex.cc" yy69: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy67; @@ -571,7 +570,7 @@ yy87: goto yy13; yy88: ++YYCURSOR; -#line 119 "../src/parse/lex.re" +#line 118 "../src/parse/lex.re" { if (opts->target != opt_t::DOT) { @@ -581,7 +580,7 @@ yy88: ignore_eoc = true; goto echo; } -#line 585 "src/parse/lex.cc" +#line 584 "src/parse/lex.cc" yy90: yych = (YYCTYPE)*++YYCURSOR; if (yych == '2') goto yy96; @@ -592,7 +591,7 @@ yy91: goto yy13; yy92: ++YYCURSOR; -#line 105 "../src/parse/lex.re" +#line 104 "../src/parse/lex.re" { if (!opts->rFlag) { @@ -607,7 +606,7 @@ yy92: tok = cur; return Reuse; } -#line 611 "src/parse/lex.cc" +#line 610 "src/parse/lex.cc" yy94: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'r') goto yy98; @@ -634,7 +633,7 @@ yy99: goto yy13; yy100: ++YYCURSOR; -#line 93 "../src/parse/lex.re" +#line 92 "../src/parse/lex.re" { if (opts->rFlag) { @@ -647,10 +646,10 @@ yy100: tok = cur; return Rules; } -#line 651 "src/parse/lex.cc" +#line 650 "src/parse/lex.cc" yy102: ++YYCURSOR; -#line 139 "../src/parse/lex.re" +#line 138 "../src/parse/lex.re" { tok = pos = cur; ignore_eoc = true; @@ -662,34 +661,34 @@ yy102: } goto echo; } -#line 666 "src/parse/lex.cc" +#line 665 "src/parse/lex.cc" yy104: yych = (YYCTYPE)*++YYCURSOR; if (yych == '2') goto yy107; goto yy13; yy105: ++YYCURSOR; -#line 134 "../src/parse/lex.re" +#line 133 "../src/parse/lex.re" { tok = pos = cur; ignore_eoc = true; goto echo; } -#line 679 "src/parse/lex.cc" +#line 678 "src/parse/lex.cc" yy107: yych = (YYCTYPE)*++YYCURSOR; if (yych != 'c') goto yy13; ++YYCURSOR; -#line 128 "../src/parse/lex.re" +#line 127 "../src/parse/lex.re" { tok = pos = cur; out.wdelay_state_goto (opts->topIndent); ignore_eoc = true; goto echo; } -#line 691 "src/parse/lex.cc" +#line 690 "src/parse/lex.cc" } -#line 216 "../src/parse/lex.re" +#line 215 "../src/parse/lex.re" } @@ -709,7 +708,7 @@ scan: start: -#line 713 "src/parse/lex.cc" +#line 712 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -814,12 +813,12 @@ start: yy112: ++YYCURSOR; yy113: -#line 388 "../src/parse/lex.re" +#line 387 "../src/parse/lex.re" { fatalf("unexpected character: '%c'", *tok); goto scan; } -#line 823 "src/parse/lex.cc" +#line 822 "src/parse/lex.cc" yy114: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -827,11 +826,11 @@ yy114: if (yybm[0+yych] & 16) { goto yy114; } -#line 372 "../src/parse/lex.re" +#line 371 "../src/parse/lex.re" { goto scan; } -#line 835 "src/parse/lex.cc" +#line 834 "src/parse/lex.cc" yy117: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -842,65 +841,65 @@ yy117: if (yych == '#') goto yy145; } yy118: -#line 381 "../src/parse/lex.re" +#line 380 "../src/parse/lex.re" { if (cur == eof) return 0; pos = cur; cline++; goto scan; } -#line 853 "src/parse/lex.cc" +#line 852 "src/parse/lex.cc" yy119: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy117; goto yy113; yy120: ++YYCURSOR; -#line 265 "../src/parse/lex.re" +#line 264 "../src/parse/lex.re" { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } -#line 862 "src/parse/lex.cc" +#line 861 "src/parse/lex.cc" yy122: yych = (YYCTYPE)*++YYCURSOR; if (yych == '}') goto yy147; goto yy113; yy123: ++YYCURSOR; -#line 264 "../src/parse/lex.re" +#line 263 "../src/parse/lex.re" { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } -#line 871 "src/parse/lex.cc" +#line 870 "src/parse/lex.cc" yy125: ++YYCURSOR; yy126: -#line 275 "../src/parse/lex.re" +#line 274 "../src/parse/lex.re" { return *tok; } -#line 879 "src/parse/lex.cc" +#line 878 "src/parse/lex.cc" yy127: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy147; -#line 279 "../src/parse/lex.re" +#line 278 "../src/parse/lex.re" { yylval.op = *tok; return TOKEN_STAR; } -#line 888 "src/parse/lex.cc" +#line 887 "src/parse/lex.cc" yy129: ++YYCURSOR; -#line 283 "../src/parse/lex.re" +#line 282 "../src/parse/lex.re" { yylval.op = *tok; return TOKEN_CLOSE; } -#line 896 "src/parse/lex.cc" +#line 895 "src/parse/lex.cc" yy131: ++YYCURSOR; -#line 367 "../src/parse/lex.re" +#line 366 "../src/parse/lex.re" { yylval.regexp = mkDot(); return TOKEN_REGEXP; } -#line 904 "src/parse/lex.cc" +#line 903 "src/parse/lex.cc" yy133: yych = (YYCTYPE)*++YYCURSOR; if (yych == '*') goto yy149; @@ -923,9 +922,9 @@ yy136: yy137: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy165; -#line 266 "../src/parse/lex.re" +#line 265 "../src/parse/lex.re" { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } -#line 929 "src/parse/lex.cc" +#line 928 "src/parse/lex.cc" yy139: yych = (YYCTYPE)*++YYCURSOR; YYCTXMARKER = YYCURSOR; @@ -949,12 +948,12 @@ yy140: } } yy141: -#line 235 "../src/parse/lex.re" +#line 234 "../src/parse/lex.re" { depth = 1; goto code; } -#line 958 "src/parse/lex.cc" +#line 957 "src/parse/lex.cc" yy142: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -994,31 +993,31 @@ yy145: } yy147: ++YYCURSOR; -#line 259 "../src/parse/lex.re" +#line 258 "../src/parse/lex.re" { tok = cur; return 0; } -#line 1003 "src/parse/lex.cc" +#line 1002 "src/parse/lex.cc" yy149: ++YYCURSOR; -#line 253 "../src/parse/lex.re" +#line 252 "../src/parse/lex.re" { depth = 1; goto comment; } -#line 1011 "src/parse/lex.cc" +#line 1010 "src/parse/lex.cc" yy151: ++YYCURSOR; -#line 250 "../src/parse/lex.re" +#line 249 "../src/parse/lex.re" { goto nextLine; } -#line 1018 "src/parse/lex.cc" +#line 1017 "src/parse/lex.cc" yy153: ++YYCURSOR; YYCURSOR -= 1; -#line 351 "../src/parse/lex.re" +#line 350 "../src/parse/lex.re" { if (!opts->FFlag) { yylval.str = new std::string (tok, tok_len()); @@ -1030,23 +1029,23 @@ yy153: const uint32_t c = static_cast(*s); r = doCat(r, casing ? ichr(c) : schr(c)); } - yylval.regexp = r ? r : new NullOp; + yylval.regexp = r ? r : RegExp::nil(); return TOKEN_REGEXP; } } -#line 1038 "src/parse/lex.cc" +#line 1037 "src/parse/lex.cc" yy155: yych = (YYCTYPE)*++YYCURSOR; goto yy178; yy156: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 346 "../src/parse/lex.re" +#line 345 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); return TOKEN_ID; } -#line 1050 "src/parse/lex.cc" +#line 1049 "src/parse/lex.cc" yy158: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1073,29 +1072,29 @@ yy159: yy160: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '>') goto yy179; -#line 244 "../src/parse/lex.re" +#line 243 "../src/parse/lex.re" { tok += 2; /* skip ":=" */ depth = 0; goto code; } -#line 1083 "src/parse/lex.cc" +#line 1082 "src/parse/lex.cc" yy162: ++YYCURSOR; -#line 272 "../src/parse/lex.re" +#line 271 "../src/parse/lex.re" { return TOKEN_SETUP; } -#line 1090 "src/parse/lex.cc" +#line 1089 "src/parse/lex.cc" yy164: yych = (YYCTYPE)*++YYCURSOR; YYCTXMARKER = YYCURSOR; goto yy182; yy165: ++YYCURSOR; -#line 267 "../src/parse/lex.re" +#line 266 "../src/parse/lex.re" { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } -#line 1099 "src/parse/lex.cc" +#line 1098 "src/parse/lex.cc" yy167: yych = (YYCTYPE)*++YYCURSOR; YYCTXMARKER = YYCURSOR; @@ -1104,11 +1103,11 @@ yy167: yy168: ++YYCURSOR; yy169: -#line 319 "../src/parse/lex.re" +#line 318 "../src/parse/lex.re" { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } -#line 1112 "src/parse/lex.cc" +#line 1111 "src/parse/lex.cc" yy170: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1161,7 +1160,7 @@ yy174: yy175: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 333 "../src/parse/lex.re" +#line 332 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); if (opts->FFlag) @@ -1174,7 +1173,7 @@ yy175: return TOKEN_ID; } } -#line 1178 "src/parse/lex.cc" +#line 1177 "src/parse/lex.cc" yy177: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1197,11 +1196,11 @@ yy178: yy179: ++YYCURSOR; YYCURSOR -= 2; -#line 240 "../src/parse/lex.re" +#line 239 "../src/parse/lex.re" { return *tok; } -#line 1205 "src/parse/lex.cc" +#line 1204 "src/parse/lex.cc" yy181: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1235,11 +1234,11 @@ yy184: yy185: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 269 "../src/parse/lex.re" +#line 268 "../src/parse/lex.re" { return TOKEN_NOCOND; } -#line 1243 "src/parse/lex.cc" +#line 1242 "src/parse/lex.cc" yy187: yych = (YYCTYPE)*++YYCURSOR; YYCTXMARKER = YYCURSOR; @@ -1254,7 +1253,7 @@ yy188: goto yy169; yy189: ++YYCURSOR; -#line 288 "../src/parse/lex.re" +#line 287 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) { @@ -1263,10 +1262,10 @@ yy189: yylval.extop.max = yylval.extop.min; return TOKEN_CLOSESIZE; } -#line 1267 "src/parse/lex.cc" +#line 1266 "src/parse/lex.cc" yy191: ++YYCURSOR; -#line 323 "../src/parse/lex.re" +#line 322 "../src/parse/lex.re" { if (!opts->FFlag) { fatal("curly braces for names only allowed with -F switch"); @@ -1274,7 +1273,7 @@ yy191: yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces return TOKEN_ID; } -#line 1278 "src/parse/lex.cc" +#line 1277 "src/parse/lex.cc" yy193: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'n') goto yy199; @@ -1294,7 +1293,7 @@ yy195: goto yy144; yy197: ++YYCURSOR; -#line 310 "../src/parse/lex.re" +#line 309 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) { @@ -1303,19 +1302,19 @@ yy197: yylval.extop.max = std::numeric_limits::max(); return TOKEN_CLOSESIZE; } -#line 1307 "src/parse/lex.cc" +#line 1306 "src/parse/lex.cc" yy199: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'e') goto yy204; goto yy144; yy200: ++YYCURSOR; -#line 331 "../src/parse/lex.re" +#line 330 "../src/parse/lex.re" { lex_conf (); return TOKEN_CONF; } -#line 1316 "src/parse/lex.cc" +#line 1315 "src/parse/lex.cc" yy202: ++YYCURSOR; -#line 297 "../src/parse/lex.re" +#line 296 "../src/parse/lex.re" { const char * p = strchr (tok, ','); if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) @@ -1328,7 +1327,7 @@ yy202: } return TOKEN_CLOSESIZE; } -#line 1332 "src/parse/lex.cc" +#line 1331 "src/parse/lex.cc" yy204: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '0') goto yy206; @@ -1384,12 +1383,12 @@ yy209: yy211: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 376 "../src/parse/lex.re" +#line 375 "../src/parse/lex.re" { set_sourceline (); goto scan; } -#line 1393 "src/parse/lex.cc" +#line 1392 "src/parse/lex.cc" yy213: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy211; @@ -1414,12 +1413,12 @@ yy217: if (yych == '\n') goto yy144; goto yy214; } -#line 392 "../src/parse/lex.re" +#line 391 "../src/parse/lex.re" flex_name: -#line 1423 "src/parse/lex.cc" +#line 1422 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1428,32 +1427,32 @@ flex_name: if (yych == '\r') goto yy224; ++YYCURSOR; yy221: -#line 403 "../src/parse/lex.re" +#line 402 "../src/parse/lex.re" { YYCURSOR = tok; goto start; } -#line 1437 "src/parse/lex.cc" +#line 1436 "src/parse/lex.cc" yy222: ++YYCURSOR; -#line 397 "../src/parse/lex.re" +#line 396 "../src/parse/lex.re" { YYCURSOR = tok; lexer_state = LEX_NORMAL; return TOKEN_FID_END; } -#line 1446 "src/parse/lex.cc" +#line 1445 "src/parse/lex.cc" yy224: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '\n') goto yy222; goto yy221; } -#line 407 "../src/parse/lex.re" +#line 406 "../src/parse/lex.re" code: -#line 1457 "src/parse/lex.cc" +#line 1456 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1514,7 +1513,7 @@ code: } yy227: ++YYCURSOR; -#line 470 "../src/parse/lex.re" +#line 469 "../src/parse/lex.re" { if (cur == eof) { @@ -1526,15 +1525,15 @@ yy227: } goto code; } -#line 1530 "src/parse/lex.cc" +#line 1529 "src/parse/lex.cc" yy229: ++YYCURSOR; yy230: -#line 484 "../src/parse/lex.re" +#line 483 "../src/parse/lex.re" { goto code; } -#line 1538 "src/parse/lex.cc" +#line 1537 "src/parse/lex.cc" yy231: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1553,7 +1552,7 @@ yy231: } } yy232: -#line 451 "../src/parse/lex.re" +#line 450 "../src/parse/lex.re" { if (depth == 0) { @@ -1573,7 +1572,7 @@ yy232: cline++; goto code; } -#line 1577 "src/parse/lex.cc" +#line 1576 "src/parse/lex.cc" yy233: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1586,7 +1585,7 @@ yy234: goto yy251; yy235: ++YYCURSOR; -#line 423 "../src/parse/lex.re" +#line 422 "../src/parse/lex.re" { if (depth == 0) { @@ -1598,10 +1597,10 @@ yy235: } goto code; } -#line 1602 "src/parse/lex.cc" +#line 1601 "src/parse/lex.cc" yy237: ++YYCURSOR; -#line 411 "../src/parse/lex.re" +#line 410 "../src/parse/lex.re" { if (depth == 0) { @@ -1614,7 +1613,7 @@ yy237: } goto code; } -#line 1618 "src/parse/lex.cc" +#line 1617 "src/parse/lex.cc" yy239: yyaccept = 2; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1626,7 +1625,7 @@ yy239: } yy240: YYCURSOR -= 1; -#line 438 "../src/parse/lex.re" +#line 437 "../src/parse/lex.re" { if (depth == 0) { @@ -1640,7 +1639,7 @@ yy240: cline++; goto code; } -#line 1644 "src/parse/lex.cc" +#line 1643 "src/parse/lex.cc" yy241: yych = (YYCTYPE)*++YYCURSOR; goto yy240; @@ -1675,11 +1674,11 @@ yy246: if (yych >= '#') goto yy249; yy247: ++YYCURSOR; -#line 481 "../src/parse/lex.re" +#line 480 "../src/parse/lex.re" { goto code; } -#line 1683 "src/parse/lex.cc" +#line 1682 "src/parse/lex.cc" yy249: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1769,12 +1768,12 @@ yy263: yy265: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 434 "../src/parse/lex.re" +#line 433 "../src/parse/lex.re" { set_sourceline (); goto code; } -#line 1778 "src/parse/lex.cc" +#line 1777 "src/parse/lex.cc" yy267: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy265; @@ -1803,12 +1802,12 @@ yy271: if (yych == '\n') goto yy244; goto yy268; } -#line 487 "../src/parse/lex.re" +#line 486 "../src/parse/lex.re" comment: -#line 1812 "src/parse/lex.cc" +#line 1811 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -1855,7 +1854,7 @@ comment: } ++YYCURSOR; yy275: -#line 519 "../src/parse/lex.re" +#line 518 "../src/parse/lex.re" { if (cur == eof) { @@ -1863,7 +1862,7 @@ yy275: } goto comment; } -#line 1867 "src/parse/lex.cc" +#line 1866 "src/parse/lex.cc" yy276: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yybm[0+yych] & 32) { @@ -1871,7 +1870,7 @@ yy276: } if (yych == '#') goto yy283; yy277: -#line 510 "../src/parse/lex.re" +#line 509 "../src/parse/lex.re" { if (cur == eof) { @@ -1881,7 +1880,7 @@ yy277: cline++; goto comment; } -#line 1885 "src/parse/lex.cc" +#line 1884 "src/parse/lex.cc" yy278: yych = (YYCTYPE)*++YYCURSOR; if (yych == '/') goto yy285; @@ -1915,7 +1914,7 @@ yy283: } yy285: ++YYCURSOR; -#line 491 "../src/parse/lex.re" +#line 490 "../src/parse/lex.re" { if (--depth == 0) { @@ -1926,16 +1925,16 @@ yy285: goto comment; } } -#line 1930 "src/parse/lex.cc" +#line 1929 "src/parse/lex.cc" yy287: ++YYCURSOR; -#line 501 "../src/parse/lex.re" +#line 500 "../src/parse/lex.re" { ++depth; fatal("ambiguous /* found"); goto comment; } -#line 1939 "src/parse/lex.cc" +#line 1938 "src/parse/lex.cc" yy289: yych = (YYCTYPE)*++YYCURSOR; if (yych != 'i') goto yy282; @@ -1992,12 +1991,12 @@ yy297: yy299: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 506 "../src/parse/lex.re" +#line 505 "../src/parse/lex.re" { set_sourceline (); goto comment; } -#line 2001 "src/parse/lex.cc" +#line 2000 "src/parse/lex.cc" yy301: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy299; @@ -2022,28 +2021,28 @@ yy305: if (yych == '\n') goto yy282; goto yy302; } -#line 526 "../src/parse/lex.re" +#line 525 "../src/parse/lex.re" nextLine: -#line 2031 "src/parse/lex.cc" +#line 2030 "src/parse/lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == '\n') goto yy310; ++YYCURSOR; -#line 537 "../src/parse/lex.re" +#line 536 "../src/parse/lex.re" { if(cur == eof) { return 0; } goto nextLine; } -#line 2044 "src/parse/lex.cc" +#line 2043 "src/parse/lex.cc" yy310: ++YYCURSOR; -#line 530 "../src/parse/lex.re" +#line 529 "../src/parse/lex.re" { if(cur == eof) { return 0; } @@ -2051,9 +2050,9 @@ yy310: cline++; goto scan; } -#line 2055 "src/parse/lex.cc" +#line 2054 "src/parse/lex.cc" } -#line 542 "../src/parse/lex.re" +#line 541 "../src/parse/lex.re" } @@ -2077,35 +2076,35 @@ RegExp *Scanner::lex_cls(bool neg) uint32_t u, l; fst: -#line 2081 "src/parse/lex.cc" +#line 2080 "src/parse/lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == ']') goto yy315; -#line 566 "../src/parse/lex.re" +#line 565 "../src/parse/lex.re" { l = lex_cls_chr(); goto snd; } -#line 2089 "src/parse/lex.cc" +#line 2088 "src/parse/lex.cc" yy315: ++YYCURSOR; -#line 565 "../src/parse/lex.re" +#line 564 "../src/parse/lex.re" { goto end; } -#line 2094 "src/parse/lex.cc" +#line 2093 "src/parse/lex.cc" } -#line 567 "../src/parse/lex.re" +#line 566 "../src/parse/lex.re" snd: -#line 2100 "src/parse/lex.cc" +#line 2099 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*(YYMARKER = YYCURSOR); if (yych == '-') goto yy320; yy319: -#line 570 "../src/parse/lex.re" +#line 569 "../src/parse/lex.re" { u = l; goto add; } -#line 2109 "src/parse/lex.cc" +#line 2108 "src/parse/lex.cc" yy320: yych = (YYCTYPE)*++YYCURSOR; if (yych != ']') goto yy322; @@ -2114,7 +2113,7 @@ yy320: yy322: ++YYCURSOR; YYCURSOR -= 1; -#line 571 "../src/parse/lex.re" +#line 570 "../src/parse/lex.re" { u = lex_cls_chr(); if (l > u) { @@ -2123,9 +2122,9 @@ yy322: } goto add; } -#line 2127 "src/parse/lex.cc" +#line 2126 "src/parse/lex.cc" } -#line 579 "../src/parse/lex.re" +#line 578 "../src/parse/lex.re" add: if (!(s = opts->encoding.encodeRange(l, u))) { @@ -2144,7 +2143,7 @@ uint32_t Scanner::lex_cls_chr() { tok = cur; -#line 2148 "src/parse/lex.cc" +#line 2147 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -2153,14 +2152,14 @@ uint32_t Scanner::lex_cls_chr() if (yych == '\n') goto yy328; if (yych == '\\') goto yy330; ++YYCURSOR; -#line 602 "../src/parse/lex.re" +#line 601 "../src/parse/lex.re" { return static_cast(tok[0]); } -#line 2159 "src/parse/lex.cc" +#line 2158 "src/parse/lex.cc" yy328: ++YYCURSOR; -#line 597 "../src/parse/lex.re" +#line 596 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2164 "src/parse/lex.cc" +#line 2163 "src/parse/lex.cc" yy330: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { @@ -2209,31 +2208,31 @@ yy330: } } } -#line 600 "../src/parse/lex.re" +#line 599 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2215 "src/parse/lex.cc" +#line 2214 "src/parse/lex.cc" yy332: ++YYCURSOR; -#line 615 "../src/parse/lex.re" +#line 614 "../src/parse/lex.re" { warn.useless_escape(tline, tok - pos, tok[1]); return static_cast(tok[1]); } -#line 2223 "src/parse/lex.cc" +#line 2222 "src/parse/lex.cc" yy334: ++YYCURSOR; -#line 613 "../src/parse/lex.re" +#line 612 "../src/parse/lex.re" { return static_cast('-'); } -#line 2228 "src/parse/lex.cc" +#line 2227 "src/parse/lex.cc" yy336: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '/') goto yy337; if (yych <= '7') goto yy361; yy337: -#line 599 "../src/parse/lex.re" +#line 598 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2237 "src/parse/lex.cc" +#line 2236 "src/parse/lex.cc" yy338: yych = (YYCTYPE)*++YYCURSOR; goto yy337; @@ -2249,9 +2248,9 @@ yy339: if (yych <= 'f') goto yy363; } yy340: -#line 598 "../src/parse/lex.re" +#line 597 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2255 "src/parse/lex.cc" +#line 2254 "src/parse/lex.cc" yy341: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2267,49 +2266,49 @@ yy341: } yy342: ++YYCURSOR; -#line 612 "../src/parse/lex.re" +#line 611 "../src/parse/lex.re" { return static_cast('\\'); } -#line 2273 "src/parse/lex.cc" +#line 2272 "src/parse/lex.cc" yy344: ++YYCURSOR; -#line 614 "../src/parse/lex.re" +#line 613 "../src/parse/lex.re" { return static_cast(']'); } -#line 2278 "src/parse/lex.cc" +#line 2277 "src/parse/lex.cc" yy346: ++YYCURSOR; -#line 605 "../src/parse/lex.re" +#line 604 "../src/parse/lex.re" { return static_cast('\a'); } -#line 2283 "src/parse/lex.cc" +#line 2282 "src/parse/lex.cc" yy348: ++YYCURSOR; -#line 606 "../src/parse/lex.re" +#line 605 "../src/parse/lex.re" { return static_cast('\b'); } -#line 2288 "src/parse/lex.cc" +#line 2287 "src/parse/lex.cc" yy350: ++YYCURSOR; -#line 607 "../src/parse/lex.re" +#line 606 "../src/parse/lex.re" { return static_cast('\f'); } -#line 2293 "src/parse/lex.cc" +#line 2292 "src/parse/lex.cc" yy352: ++YYCURSOR; -#line 608 "../src/parse/lex.re" +#line 607 "../src/parse/lex.re" { return static_cast('\n'); } -#line 2298 "src/parse/lex.cc" +#line 2297 "src/parse/lex.cc" yy354: ++YYCURSOR; -#line 609 "../src/parse/lex.re" +#line 608 "../src/parse/lex.re" { return static_cast('\r'); } -#line 2303 "src/parse/lex.cc" +#line 2302 "src/parse/lex.cc" yy356: ++YYCURSOR; -#line 610 "../src/parse/lex.re" +#line 609 "../src/parse/lex.re" { return static_cast('\t'); } -#line 2308 "src/parse/lex.cc" +#line 2307 "src/parse/lex.cc" yy358: ++YYCURSOR; -#line 611 "../src/parse/lex.re" +#line 610 "../src/parse/lex.re" { return static_cast('\v'); } -#line 2313 "src/parse/lex.cc" +#line 2312 "src/parse/lex.cc" yy360: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2372,9 +2371,9 @@ yy365: } yy366: ++YYCURSOR; -#line 604 "../src/parse/lex.re" +#line 603 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2378 "src/parse/lex.cc" +#line 2377 "src/parse/lex.cc" yy368: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2401,9 +2400,9 @@ yy369: } yy370: ++YYCURSOR; -#line 603 "../src/parse/lex.re" +#line 602 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2407 "src/parse/lex.cc" +#line 2406 "src/parse/lex.cc" yy372: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2427,7 +2426,7 @@ yy373: goto yy362; } } -#line 619 "../src/parse/lex.re" +#line 618 "../src/parse/lex.re" } @@ -2436,7 +2435,7 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) end = false; tok = cur; -#line 2440 "src/parse/lex.cc" +#line 2439 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -2445,17 +2444,17 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) if (yych == '\n') goto yy378; if (yych == '\\') goto yy380; ++YYCURSOR; -#line 632 "../src/parse/lex.re" +#line 631 "../src/parse/lex.re" { end = tok[0] == quote; return static_cast(tok[0]); } -#line 2454 "src/parse/lex.cc" +#line 2453 "src/parse/lex.cc" yy378: ++YYCURSOR; -#line 627 "../src/parse/lex.re" +#line 626 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2459 "src/parse/lex.cc" +#line 2458 "src/parse/lex.cc" yy380: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { @@ -2501,28 +2500,28 @@ yy380: } } } -#line 630 "../src/parse/lex.re" +#line 629 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2507 "src/parse/lex.cc" +#line 2506 "src/parse/lex.cc" yy382: ++YYCURSOR; -#line 646 "../src/parse/lex.re" +#line 645 "../src/parse/lex.re" { if (tok[1] != quote) { warn.useless_escape(tline, tok - pos, tok[1]); } return static_cast(tok[1]); } -#line 2517 "src/parse/lex.cc" +#line 2516 "src/parse/lex.cc" yy384: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '/') goto yy385; if (yych <= '7') goto yy407; yy385: -#line 629 "../src/parse/lex.re" +#line 628 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2526 "src/parse/lex.cc" +#line 2525 "src/parse/lex.cc" yy386: yych = (YYCTYPE)*++YYCURSOR; goto yy385; @@ -2538,9 +2537,9 @@ yy387: if (yych <= 'f') goto yy409; } yy388: -#line 628 "../src/parse/lex.re" +#line 627 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2544 "src/parse/lex.cc" +#line 2543 "src/parse/lex.cc" yy389: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2556,44 +2555,44 @@ yy389: } yy390: ++YYCURSOR; -#line 645 "../src/parse/lex.re" +#line 644 "../src/parse/lex.re" { return static_cast('\\'); } -#line 2562 "src/parse/lex.cc" +#line 2561 "src/parse/lex.cc" yy392: ++YYCURSOR; -#line 638 "../src/parse/lex.re" +#line 637 "../src/parse/lex.re" { return static_cast('\a'); } -#line 2567 "src/parse/lex.cc" +#line 2566 "src/parse/lex.cc" yy394: ++YYCURSOR; -#line 639 "../src/parse/lex.re" +#line 638 "../src/parse/lex.re" { return static_cast('\b'); } -#line 2572 "src/parse/lex.cc" +#line 2571 "src/parse/lex.cc" yy396: ++YYCURSOR; -#line 640 "../src/parse/lex.re" +#line 639 "../src/parse/lex.re" { return static_cast('\f'); } -#line 2577 "src/parse/lex.cc" +#line 2576 "src/parse/lex.cc" yy398: ++YYCURSOR; -#line 641 "../src/parse/lex.re" +#line 640 "../src/parse/lex.re" { return static_cast('\n'); } -#line 2582 "src/parse/lex.cc" +#line 2581 "src/parse/lex.cc" yy400: ++YYCURSOR; -#line 642 "../src/parse/lex.re" +#line 641 "../src/parse/lex.re" { return static_cast('\r'); } -#line 2587 "src/parse/lex.cc" +#line 2586 "src/parse/lex.cc" yy402: ++YYCURSOR; -#line 643 "../src/parse/lex.re" +#line 642 "../src/parse/lex.re" { return static_cast('\t'); } -#line 2592 "src/parse/lex.cc" +#line 2591 "src/parse/lex.cc" yy404: ++YYCURSOR; -#line 644 "../src/parse/lex.re" +#line 643 "../src/parse/lex.re" { return static_cast('\v'); } -#line 2597 "src/parse/lex.cc" +#line 2596 "src/parse/lex.cc" yy406: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2656,9 +2655,9 @@ yy411: } yy412: ++YYCURSOR; -#line 637 "../src/parse/lex.re" +#line 636 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2662 "src/parse/lex.cc" +#line 2661 "src/parse/lex.cc" yy414: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2685,9 +2684,9 @@ yy415: } yy416: ++YYCURSOR; -#line 636 "../src/parse/lex.re" +#line 635 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2691 "src/parse/lex.cc" +#line 2690 "src/parse/lex.cc" yy418: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2711,7 +2710,7 @@ yy419: goto yy408; } } -#line 652 "../src/parse/lex.re" +#line 651 "../src/parse/lex.re" } @@ -2721,7 +2720,7 @@ RegExp *Scanner::lex_str(char quote, bool casing) for (bool end;;) { const uint32_t c = lex_str_chr(quote, end); if (end) { - return r ? r : new NullOp; + return r ? r : RegExp::nil(); } r = doCat(r, casing ? ichr(c) : schr(c)); } @@ -2732,7 +2731,7 @@ void Scanner::set_sourceline () sourceline: tok = cur; -#line 2736 "src/parse/lex.cc" +#line 2735 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2781,14 +2780,14 @@ sourceline: yy422: ++YYCURSOR; yy423: -#line 695 "../src/parse/lex.re" +#line 694 "../src/parse/lex.re" { goto sourceline; } -#line 2789 "src/parse/lex.cc" +#line 2788 "src/parse/lex.cc" yy424: ++YYCURSOR; -#line 683 "../src/parse/lex.re" +#line 682 "../src/parse/lex.re" { if (cur == eof) { @@ -2801,7 +2800,7 @@ yy424: tok = cur; return; } -#line 2805 "src/parse/lex.cc" +#line 2804 "src/parse/lex.cc" yy426: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych == '\n') goto yy423; @@ -2813,7 +2812,7 @@ yy427: if (yybm[0+yych] & 64) { goto yy427; } -#line 672 "../src/parse/lex.re" +#line 671 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok, cur, cline)) { @@ -2821,7 +2820,7 @@ yy427: } goto sourceline; } -#line 2825 "src/parse/lex.cc" +#line 2824 "src/parse/lex.cc" yy430: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -2838,12 +2837,12 @@ yy432: goto yy423; yy433: ++YYCURSOR; -#line 679 "../src/parse/lex.re" +#line 678 "../src/parse/lex.re" { escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } -#line 2847 "src/parse/lex.cc" +#line 2846 "src/parse/lex.cc" yy435: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -2851,7 +2850,7 @@ yy435: if (yych == '\n') goto yy432; goto yy430; } -#line 698 "../src/parse/lex.re" +#line 697 "../src/parse/lex.re" } diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index 831c74c1..3b8f7de1 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -88,10 +88,6 @@ #include "src/ir/regexp/encoding/enc.h" #include "src/ir/regexp/encoding/range_suffix.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" #include "src/ir/rule_rank.h" #include "src/ir/skeleton/skeleton.h" #include "src/parse/code.h" @@ -120,9 +116,9 @@ static counter_t rank_counter; static std::vector condnames; static re2c::SpecMap specMap; static Spec spec; -static RuleOp *specNone = NULL; -static RuleOpList specStar; -static RuleOp * star_default = NULL; +static RegExp *specNone = NULL; +static RuleList specStar; +static RegExp *star_default = NULL; static Scanner *in = NULL; static Scanner::ParseMode parseMode; static SetupMap ruleSetupMap; @@ -169,7 +165,7 @@ void context_rule condnames.push_back (*it); } - RuleOp * rule = new RuleOp + RegExp *rule = RegExp::rule ( loc , expr , look @@ -206,10 +202,10 @@ void default_rule(CondList *clist, const Code * code) context_check(clist); for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - RuleOp * def = new RuleOp + RegExp * def = RegExp::rule ( code->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , code , NULL @@ -593,11 +589,11 @@ static const yytype_int8 yyrhs[] = /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 193, 193, 195, 199, 203, 211, 219, 223, 227, - 231, 247, 264, 268, 274, 279, 285, 289, 303, 319, - 324, 330, 345, 362, 381, 387, 395, 398, 405, 411, - 421, 424, 432, 435, 442, 446, 453, 457, 464, 468, - 475, 479, 494, 513, 517, 521, 525, 532, 542, 546 + 0, 189, 189, 191, 195, 199, 207, 215, 219, 223, + 227, 243, 260, 264, 270, 275, 281, 285, 299, 315, + 320, 326, 341, 358, 377, 383, 391, 394, 401, 407, + 417, 420, 428, 431, 438, 442, 449, 453, 460, 464, + 471, 475, 490, 509, 513, 517, 521, 528, 538, 542 }; #endif @@ -1623,7 +1619,7 @@ yyreduce: { in->fatal("condition or '<*>' required when using -c switch"); } - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( (yyvsp[(3) - (3)].code)->loc , (yyvsp[(1) - (3)].regexp) , (yyvsp[(2) - (3)].regexp) @@ -1640,10 +1636,10 @@ yyreduce: { if (opts->cFlag) in->fatal("condition or '<*>' required when using -c switch"); - RuleOp * def = new RuleOp + RegExp * def = RegExp::rule ( (yyvsp[(2) - (2)].code)->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , (yyvsp[(2) - (2)].code) , NULL @@ -1699,7 +1695,7 @@ yyreduce: { context_check(NULL); - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( (yyvsp[(7) - (7)].code)->loc , (yyvsp[(4) - (7)].regexp) , (yyvsp[(5) - (7)].regexp) @@ -1718,7 +1714,7 @@ yyreduce: assert((yyvsp[(7) - (7)].str)); context_check(NULL); Loc loc (in->get_fname (), in->get_cline ()); - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( loc , (yyvsp[(4) - (7)].regexp) , (yyvsp[(5) - (7)].regexp) @@ -1755,10 +1751,10 @@ yyreduce: { in->fatal ("code to default rule '*' is already defined"); } - star_default = new RuleOp + star_default = RegExp::rule ( (yyvsp[(5) - (5)].code)->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , (yyvsp[(5) - (5)].code) , NULL @@ -1774,10 +1770,10 @@ yyreduce: { in->fatal("code to handle illegal condition already defined"); } - (yyval.regexp) = specNone = new RuleOp + (yyval.regexp) = specNone = RegExp::rule ( (yyvsp[(3) - (3)].code)->loc - , new NullOp - , new NullOp + , RegExp::nil() + , RegExp::nil() , rank_counter.next () , (yyvsp[(3) - (3)].code) , (yyvsp[(2) - (3)].str) @@ -1796,10 +1792,10 @@ yyreduce: in->fatal("code to handle illegal condition already defined"); } Loc loc (in->get_fname (), in->get_cline ()); - (yyval.regexp) = specNone = new RuleOp + (yyval.regexp) = specNone = RegExp::rule ( loc - , new NullOp - , new NullOp + , RegExp::nil() + , RegExp::nil() , rank_counter.next () , NULL , (yyvsp[(3) - (3)].str) @@ -1873,7 +1869,7 @@ yyreduce: case 32: { - (yyval.regexp) = new NullOp; + (yyval.regexp) = RegExp::nil(); ;} break; @@ -1922,7 +1918,7 @@ yyreduce: case 39: { - (yyval.regexp) = new CatOp((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp)); + (yyval.regexp) = RegExp::cat((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp)); ;} break; @@ -1939,13 +1935,13 @@ yyreduce: switch((yyvsp[(2) - (2)].op)) { case '*': - (yyval.regexp) = new CloseOp((yyvsp[(1) - (2)].regexp)); + (yyval.regexp) = RegExp::iter((yyvsp[(1) - (2)].regexp)); break; case '+': - (yyval.regexp) = new CatOp (new CloseOp((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp)); + (yyval.regexp) = RegExp::cat(RegExp::iter((yyvsp[(1) - (2)].regexp)), (yyvsp[(1) - (2)].regexp)); break; case '?': - (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), new NullOp()); + (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), RegExp::nil()); break; } ;} @@ -1966,7 +1962,7 @@ yyreduce: { (yyval.regexp) = repeat_from_to ((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].extop).min, (yyvsp[(2) - (2)].extop).max); } - (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : new NullOp; + (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : RegExp::nil(); ;} break; @@ -2339,9 +2335,9 @@ void parse(Scanner& i, Output & o) // merge <*> rules to all conditions with lowest priority for (it = specMap.begin(); it != specMap.end(); ++it) { - for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) { - RuleOp *r = new RuleOp(*itOp, rank_counter.next()); + RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next()); it->second.add (r); } if (star_default) @@ -2443,7 +2439,7 @@ void parse(Scanner& i, Output & o) void parse_cleanup() { - RegExp::vFreeList.clear(); + RegExp::flist.clear(); Range::vFreeList.clear(); RangeSuffix::freeList.clear(); Code::freelist.clear(); diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index 274be97e..6826cd8c 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -38,7 +38,7 @@ smart_ptr compile (Spec & spec, Output & output, const std::string & cond, // Don't forget to include zero and upper bound, even if they // do not explicitely apper in ranges. std::set bounds; - spec.re->split(bounds); + split(spec.re, bounds); bounds.insert(0); bounds.insert(cunits); charset_t cs; @@ -54,7 +54,7 @@ smart_ptr compile (Spec & spec, Output & output, const std::string & cond, // skeleton must be constructed after DFA construction // but prior to any other DFA transformations Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line); - spec.re->nullable_rules(skeleton->nullable_rules); + nullable_rules(spec.re, skeleton->nullable_rules); minimization(dfa); diff --git a/re2c/src/ir/nfa/calc_size.cc b/re2c/src/ir/nfa/calc_size.cc deleted file mode 100644 index 39f0b4e1..00000000 --- a/re2c/src/ir/nfa/calc_size.cc +++ /dev/null @@ -1,50 +0,0 @@ -#include "src/util/c99_stdint.h" - -#include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" - -namespace re2c -{ - -uint32_t AltOp::calc_size() const -{ - return exp1->calc_size() - + exp2->calc_size() - + 1; -} - -uint32_t CatOp::calc_size() const -{ - return exp1->calc_size() - + exp2->calc_size(); -} - -uint32_t CloseOp::calc_size() const -{ - return exp->calc_size() + 1; -} - -uint32_t MatchOp::calc_size() const -{ - return 1; -} - -uint32_t NullOp::calc_size() const -{ - return 0; -} - -uint32_t RuleOp::calc_size() const -{ - const uint32_t n = ctx->calc_size(); - return exp->calc_size() - + (n > 0 ? n + 1 : 0) - + 1; -} - -} // end namespace re2c diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index c1ed2409..5769aa7b 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -1,19 +1,16 @@ #include "src/ir/nfa/nfa.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" namespace re2c { +static uint32_t calc_size(const RegExp *re); +static nfa_state_t *compile(const RegExp *re, nfa_t &nfa, nfa_state_t *n); + nfa_t::nfa_t(RegExp *re) - : max_size(re->calc_size()) + : max_size(calc_size(re)) , size(0) , states(new nfa_state_t[max_size]) - , root(re->compile(*this, NULL)) + , root(compile(re, *this, NULL)) {} nfa_t::~nfa_t() @@ -21,58 +18,78 @@ nfa_t::~nfa_t() delete[] states; } -nfa_state_t *AltOp::compile(nfa_t &nfa, nfa_state_t *t) -{ - nfa_state_t *s = &nfa.states[nfa.size++]; - s->alt(exp1->compile(nfa, t), - exp2->compile(nfa, t)); - return s; -} - -nfa_state_t *CatOp::compile(nfa_t &nfa, nfa_state_t *t) +uint32_t calc_size(const RegExp *re) { - nfa_state_t *s2 = exp2->compile(nfa, t); - nfa_state_t *s1 = exp1->compile(nfa, s2); - return s1; -} - -nfa_state_t *CloseOp::compile(nfa_t &nfa, nfa_state_t *t) -{ - nfa_state_t *s = &nfa.states[nfa.size++]; - s->alt(t, exp->compile(nfa, s)); - return s; + switch (re->tag) { + default: + case RegExp::NIL: + return 0; + case RegExp::SYM: + return 1; + case RegExp::ALT: + return calc_size(re->pld.alt.re1) + + calc_size(re->pld.alt.re2) + + 1; + case RegExp::CAT: + return calc_size(re->pld.cat.re1) + + calc_size(re->pld.cat.re2); + case RegExp::ITER: + return calc_size(re->pld.iter.re) + + 1; + case RegExp::RULE: { + const uint32_t re_size = calc_size(re->pld.rule.re) + 1; + uint32_t ctx_size = calc_size(re->pld.rule.ctx); + if (ctx_size > 0) { + ctx_size += 1; + } + return re_size + ctx_size; + } + } } -nfa_state_t *MatchOp::compile(nfa_t &nfa, nfa_state_t *t) +nfa_state_t *compile(const RegExp *re, nfa_t &nfa, nfa_state_t *t) { - nfa_state_t *s = &nfa.states[nfa.size++]; - s->ran(t, match); + nfa_state_t *s = NULL; + switch (re->tag) { + case RegExp::NIL: + s = t; + break; + case RegExp::SYM: + s = &nfa.states[nfa.size++]; + s->ran(t, re->pld.sym.range); + break; + case RegExp::ALT: + s = &nfa.states[nfa.size++]; + s->alt(compile(re->pld.alt.re1, nfa, t), + compile(re->pld.alt.re2, nfa, t)); + break; + case RegExp::CAT: + s = compile(re->pld.cat.re2, nfa, t); + s = compile(re->pld.cat.re1, nfa, s); + break; + case RegExp::ITER: + s = &nfa.states[nfa.size++]; + s->alt(t, compile(re->pld.iter.re, nfa, s)); + break; + case RegExp::RULE: { + s = &nfa.states[nfa.size++]; + RuleInfo *info = re->pld.rule.info; + s->fin(info); + if (info->ctx_len == ~0u) { + // dynamic context + nfa_state_t *q = compile(re->pld.rule.ctx, nfa, s); + s = &nfa.states[nfa.size++]; + s->ctx(q); + } else if (info->ctx_len > 0) { + // static context + s = compile(re->pld.rule.ctx, nfa, s); + } + s = compile(re->pld.rule.re, nfa, s); + break; + } + } return s; } -nfa_state_t *NullOp::compile(nfa_t &, nfa_state_t *t) -{ - return t; -} - -nfa_state_t *RuleOp::compile(nfa_t &nfa, nfa_state_t *) -{ - nfa_state_t *s3 = &nfa.states[nfa.size++]; - s3->fin(info); - - if (info->ctx_len == ~0u) - { - nfa_state_t *s2 = &nfa.states[nfa.size++]; - s2->ctx(ctx->compile(nfa, s3)); - s3 = s2; - } - else if (info->ctx_len > 0) - { - s3 = ctx->compile(nfa, s3); - } - - nfa_state_t *s1 = exp->compile(nfa, s3); - return s1; -} } // namespace re2c diff --git a/re2c/src/ir/nfa/split.cc b/re2c/src/ir/nfa/split.cc deleted file mode 100644 index 73e63040..00000000 --- a/re2c/src/ir/nfa/split.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "src/util/c99_stdint.h" -#include - -#include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" -#include "src/util/range.h" - -namespace re2c { - -void AltOp::split (std::set & cs) -{ - exp1->split (cs); - exp2->split (cs); -} - -void CatOp::split (std::set & cs) -{ - exp1->split (cs); - exp2->split (cs); -} - -void CloseOp::split (std::set & cs) -{ - exp->split (cs); -} - -void MatchOp::split (std::set & cs) -{ - for (Range *r = match; r; r = r->next ()) - { - cs.insert (r->lower ()); - cs.insert (r->upper ()); - } -} - -void NullOp::split (std::set &) {} - -void RuleOp::split (std::set & cs) -{ - exp->split (cs); - ctx->split (cs); -} - -} // namespace re2c diff --git a/re2c/src/ir/regexp/display.cc b/re2c/src/ir/regexp/display.cc deleted file mode 100644 index d139dc53..00000000 --- a/re2c/src/ir/regexp/display.cc +++ /dev/null @@ -1,51 +0,0 @@ -#include - -#include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" - -namespace re2c -{ - -std::ostream & operator << (std::ostream & o, const RegExp & re) -{ - re.display (o); - return o; -} - -void AltOp::display (std::ostream & o) const -{ - o << exp1 << "|" << exp2; -} - -void CatOp::display (std::ostream & o) const -{ - o << exp1 << exp2; -} - -void CloseOp::display (std::ostream & o) const -{ - o << exp << "+"; -} - -void MatchOp::display (std::ostream & o) const -{ - o << match; -} - -void NullOp::display (std::ostream & o) const -{ - o << "_"; -} - -void RuleOp::display (std::ostream & o) const -{ - o << exp << "/" << ctx << ";"; -} - -} // end namespace re2c - diff --git a/re2c/src/ir/regexp/encoding/range_suffix.cc b/re2c/src/ir/regexp/encoding/range_suffix.cc index 486bd558..c5e519c4 100644 --- a/re2c/src/ir/regexp/encoding/range_suffix.cc +++ b/re2c/src/ir/regexp/encoding/range_suffix.cc @@ -1,6 +1,5 @@ #include "src/ir/regexp/encoding/range_suffix.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_match.h" #include "src/util/range.h" namespace re2c { @@ -13,7 +12,7 @@ RegExp * to_regexp (RangeSuffix * p) { return p ? emit (p, NULL) - : new MatchOp (NULL); + : RegExp::sym(NULL); } /* @@ -28,7 +27,7 @@ RegExp * emit(RangeSuffix * p, RegExp * re) RegExp * regexp = NULL; for (; p != NULL; p = p->next) { - RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re); + RegExp * re1 = doCat(RegExp::sym(Range::ran (p->l, p->h + 1)), re); regexp = doAlt(regexp, emit(p->child, re1)); } return regexp; diff --git a/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc index 3b244290..02b00a50 100644 --- a/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc +++ b/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc @@ -3,8 +3,7 @@ #include "src/ir/regexp/encoding/utf16/utf16_regexp.h" #include "src/ir/regexp/encoding/range_suffix.h" #include "src/ir/regexp/encoding/utf16/utf16_range.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp.h" #include "src/util/range.h" namespace re2c { @@ -12,12 +11,12 @@ namespace re2c { RegExp * UTF16Symbol(utf16::rune r) { if (r <= utf16::MAX_1WORD_RUNE) - return new MatchOp(Range::sym (r)); + return RegExp::sym(Range::sym (r)); else { const uint32_t ld = utf16::lead_surr(r); const uint32_t tr = utf16::trail_surr(r); - return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr))); + return RegExp::cat(RegExp::sym(Range::sym (ld)), RegExp::sym(Range::sym (tr))); } } diff --git a/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc index 54ef6f0e..70b864cd 100644 --- a/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc +++ b/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc @@ -3,8 +3,7 @@ #include "src/ir/regexp/encoding/utf8/utf8_regexp.h" #include "src/ir/regexp/encoding/range_suffix.h" #include "src/ir/regexp/encoding/utf8/utf8_range.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp.h" #include "src/util/range.h" namespace re2c { @@ -13,9 +12,9 @@ RegExp * UTF8Symbol(utf8::rune r) { uint32_t chars[utf8::MAX_RUNE_LENGTH]; const uint32_t chars_count = utf8::rune_to_bytes(chars, r); - RegExp * re = new MatchOp(Range::sym (chars[0])); + RegExp * re = RegExp::sym(Range::sym (chars[0])); for (uint32_t i = 1; i < chars_count; ++i) - re = new CatOp(re, new MatchOp(Range::sym (chars[i]))); + re = RegExp::cat(re, RegExp::sym(Range::sym (chars[i]))); return re; } diff --git a/re2c/src/ir/regexp/fixed_length.cc b/re2c/src/ir/regexp/fixed_length.cc deleted file mode 100644 index 264737be..00000000 --- a/re2c/src/ir/regexp/fixed_length.cc +++ /dev/null @@ -1,55 +0,0 @@ -#include "src/util/c99_stdint.h" - -#include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" - -namespace re2c -{ - -uint32_t RegExp::fixedLength () -{ - return ~0u; -} - -uint32_t AltOp::fixedLength () -{ - uint32_t l1 = exp1->fixedLength (); - uint32_t l2 = exp2->fixedLength (); - - if (l1 != l2 || l1 == ~0u) - { - return ~0u; - } - - return l1; -} - -uint32_t CatOp::fixedLength () -{ - const uint32_t l1 = exp1->fixedLength (); - if (l1 != ~0u) - { - const uint32_t l2 = exp2->fixedLength (); - if (l2 != ~0u) - { - return l1 + l2; - } - } - return ~0u; -} - -uint32_t MatchOp::fixedLength () -{ - return 1; -} - -uint32_t NullOp::fixedLength () -{ - return 0; -} - -} // end namespace re2c - diff --git a/re2c/src/ir/regexp/nullable.cc b/re2c/src/ir/regexp/nullable.cc index 727c47e8..e58096fe 100644 --- a/re2c/src/ir/regexp/nullable.cc +++ b/re2c/src/ir/regexp/nullable.cc @@ -1,59 +1,43 @@ #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" namespace re2c { -bool AltOp::nullable() const -{ - return exp1->nullable() - || exp2->nullable(); -} - -bool CatOp::nullable() const -{ - return exp1->nullable() - && exp2->nullable(); -} - -bool CloseOp::nullable() const -{ - return true; -} - -bool MatchOp::nullable() const -{ - return false; -} - -bool NullOp::nullable() const -{ - return true; -} - -bool RuleOp::nullable() const -{ - return exp->nullable(); -} - -void RegExp::nullable_rules(std::vector&) const {} - -void AltOp::nullable_rules(std::vector &rs) const -{ - exp1->nullable_rules(rs); - exp2->nullable_rules(rs); +static bool nullable(const RegExp *re) +{ + switch (re->tag) { + default: + case RegExp::NIL: + return true; + case RegExp::SYM: + return false; + case RegExp::ALT: + return nullable(re->pld.alt.re1) + || nullable(re->pld.alt.re2); + case RegExp::CAT: + return nullable(re->pld.cat.re1) + && nullable(re->pld.cat.re2); + case RegExp::ITER: + return true; + case RegExp::RULE: + return nullable(re->pld.rule.re); + } } -void RuleOp::nullable_rules(std::vector &rs) const -{ - if (exp->nullable()) - { - rs.push_back(info); +void nullable_rules(const RegExp *re, std::vector &rs) +{ + switch (re->tag) { + case RegExp::ALT: + nullable_rules(re->pld.alt.re1, rs); + nullable_rules(re->pld.alt.re2, rs); + break; + case RegExp::RULE: + if (nullable(re->pld.rule.re)) { + rs.push_back(re->pld.rule.info); + } + break; + default: + break; } } diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index e5a7d9bd..f362778f 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -9,103 +9,79 @@ #include "src/ir/regexp/encoding/utf16/utf16_regexp.h" #include "src/ir/regexp/encoding/utf8/utf8_regexp.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_alt.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_match.h" -#include "src/ir/regexp/regexp_null.h" #include "src/parse/scanner.h" #include "src/util/range.h" namespace re2c { -static MatchOp * merge (MatchOp * m1, MatchOp * m2); +static uint32_t fixlen(const RegExp *re); -free_list RegExp::vFreeList; +free_list RegExp::flist; -RegExp * doAlt (RegExp * e1, RegExp * e2) +RegExp *doAlt(RegExp *re1, RegExp *re2) { - if (!e1) - { - return e2; + if (!re1) { + return re2; } - if (!e2) - { - return e1; + if (!re2) { + return re1; } - return new AltOp (e1, e2); + return RegExp::alt(re1, re2); } -RegExp * mkAlt (RegExp * e1, RegExp * e2) +static RegExp *merge(RegExp *sym1, RegExp *sym2) { - AltOp * a; - MatchOp * m1; - MatchOp * m2; - - a = dynamic_cast (e1); - if (a != NULL) - { - m1 = dynamic_cast (a->exp1); - if (m1 != NULL) - { - e1 = a->exp2; - } + if (!sym1) { + return sym2; } - else - { - m1 = dynamic_cast (e1); - if (m1 != NULL) - { - e1 = NULL; - } + if (!sym2) { + return sym1; } - a = dynamic_cast (e2); - if (a != NULL) - { - m2 = dynamic_cast (a->exp1); - if (m2 != NULL) - { - e2 = a->exp2; - } + return RegExp::sym(Range::add( + sym1->pld.sym.range, + sym2->pld.sym.range)); +} + +static RegExp *lift_sym(RegExp *&re) +{ + if (!re) { + return NULL; } - else - { - m2 = dynamic_cast (e2); - if (m2 != NULL) - { - e2 = NULL; + if (re->tag == RegExp::SYM) { + RegExp *sym = re; + re = NULL; + return sym; + } + if (re->tag == RegExp::ALT) { + // second alternative cannot be SYM by construction + RegExp *alt1 = re->pld.alt.re1; + if (alt1 && alt1->tag == RegExp::SYM) { + re = re->pld.alt.re2; + return alt1; } } - - return doAlt (merge (m1, m2), doAlt (e1, e2)); + return NULL; } -MatchOp * merge (MatchOp * m1, MatchOp * m2) +RegExp *mkAlt(RegExp *re1, RegExp *re2) { - if (!m1) - { - return m2; - } - if (!m2) - { - return m1; - } - MatchOp * m = new MatchOp (Range::add (m1->match, m2->match)); - return m; + RegExp *sym1 = lift_sym(re1); + RegExp *sym2 = lift_sym(re2); + return doAlt( + merge(sym1, sym2), + doAlt(re1, re2)); } -RegExp * doCat (RegExp * e1, RegExp * e2) +RegExp *doCat(RegExp *re1, RegExp *re2) { - if (!e1) - { - return e2; + if (!re1) { + return re2; } - if (!e2) - { - return e1; + if (!re2) { + return re1; } - return new CatOp (e1, e2); + return RegExp::cat(re1, re2); } RegExp *Scanner::schr(uint32_t c) const @@ -116,7 +92,7 @@ RegExp *Scanner::schr(uint32_t c) const switch (opts->encoding.type ()) { case Enc::UTF16: return UTF16Symbol(c); case Enc::UTF8: return UTF8Symbol(c); - default: return new MatchOp(Range::sym(c)); + default: return RegExp::sym(Range::sym(c)); } } @@ -133,53 +109,49 @@ RegExp *Scanner::ichr(uint32_t c) const RegExp *Scanner::cls(Range *r) const { - if (!r) - { - switch (opts->empty_class_policy) - { + if (!r) { + switch (opts->empty_class_policy) { case EMPTY_CLASS_MATCH_EMPTY: - warn.empty_class (get_line ()); - return new NullOp; + warn.empty_class(get_line()); + return RegExp::nil(); case EMPTY_CLASS_MATCH_NONE: - warn.empty_class (get_line ()); + warn.empty_class(get_line()); break; case EMPTY_CLASS_ERROR: - fatal ("empty character class"); + fatal("empty character class"); break; } } - switch (opts->encoding.type ()) - { + switch (opts->encoding.type()) { case Enc::UTF16: return UTF16Range(r); case Enc::UTF8: return UTF8Range(r); - default: return new MatchOp(r); + default: return RegExp::sym(r); } } -RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const +RegExp *Scanner::mkDiff(RegExp *re1, RegExp *re2) const { - MatchOp * m1 = dynamic_cast (e1); - MatchOp * m2 = dynamic_cast (e2); - if (m1 == NULL || m2 == NULL) - { - fatal("can only difference char sets"); + if (re1 && re2 + && re1->tag == RegExp::SYM + && re2->tag == RegExp::SYM) { + return cls(Range::sub( + re1->pld.sym.range, + re2->pld.sym.range)); } - Range * r = Range::sub (m1->match, m2->match); - - return cls(r); + fatal("can only difference char sets"); + return NULL; } -RegExp * Scanner::mkDot() const +RegExp *Scanner::mkDot() const { - Range * full = opts->encoding.fullRange(); uint32_t c = '\n'; - if (!opts->encoding.encode(c)) + if (!opts->encoding.encode(c)) { fatalf("Bad code point: '0x%X'", c); - Range * ran = Range::sym (c); - Range * inv = Range::sub (full, ran); - - return cls(inv); + } + return cls(Range::sub( + opts->encoding.fullRange(), + Range::sym(c))); } /* @@ -192,10 +164,10 @@ RegExp * Scanner::mkDot() const * Also note that default range doesn't respect encoding policy * (the way invalid code points are treated). */ -RegExp * Scanner::mkDefault() const +RegExp *Scanner::mkDefault() const { - Range * def = Range::ran (0, opts->encoding.nCodeUnits()); - return new MatchOp(def); + return RegExp::sym(Range::ran(0, + opts->encoding.nCodeUnits())); } /* @@ -208,34 +180,98 @@ RegExp * Scanner::mkDefault() const */ // see note [counted repetition expansion] -RegExp * repeat (RegExp * e, uint32_t n) +RegExp *repeat(RegExp *re, uint32_t n) { - RegExp * r = NULL; - for (uint32_t i = 0; i < n; ++i) - { - r = doCat (r, e); + RegExp *r = NULL; + for (uint32_t i = 0; i < n; ++i) { + r = doCat(r, re); } return r; } // see note [counted repetition expansion] -RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m) +RegExp *repeat_from_to(RegExp *re, uint32_t n, uint32_t m) { - RegExp * r1 = repeat (e, n); - RegExp * r2 = NULL; - for (uint32_t i = n; i < m; ++i) - { - r2 = mkAlt (new NullOp, doCat (e, r2)); + RegExp *r1 = repeat(re, n); + RegExp *r2 = NULL; + for (uint32_t i = n; i < m; ++i) { + r2 = mkAlt( + RegExp::nil(), + doCat(re, r2)); } - return doCat (r1, r2); + return doCat(r1, r2); } // see note [counted repetition expansion] -RegExp * repeat_from (RegExp * e, uint32_t n) +RegExp *repeat_from(RegExp *re, uint32_t n) +{ + return doCat( + repeat(re, n), + RegExp::iter(re)); +} + +RegExp* RegExp::rule(const Loc &loc, RegExp *r1, RegExp *r2, + rule_rank_t rank, const Code *code, const std::string *newcond) +{ + RegExp *re = new RegExp(RULE); + re->pld.rule.re = r1; + re->pld.rule.ctx = r2; + + uint32_t ctx_len = fixlen(r2); + // cannot emulate 'YYCURSOR -= N' operation with generic API + if (ctx_len != 0 + && opts->input_api.type() == InputAPI::CUSTOM) + { + ctx_len = ~0u; + } + + re->pld.rule.info = new RuleInfo(loc, rank, code, newcond, ctx_len); + return re; +} + +// shallow-copies regexps, but deep-copies rule info +// used to duplicate <*> rules in conditions +RegExp* RegExp::rule_copy(const RegExp *rule, rule_rank_t rank) { - RegExp * r1 = repeat (e, n); - RegExp * r2 = new CloseOp (e); - return doCat (r1, r2); + RegExp *re = new RegExp(RULE); + re->pld.rule.re = rule->pld.rule.re; + re->pld.rule.ctx = rule->pld.rule.ctx; + const RuleInfo *info = rule->pld.rule.info; + re->pld.rule.info = new RuleInfo(info->loc, rank, + info->code, &info->newcond, info->ctx_len); + return re; +} + +uint32_t fixlen(const RegExp *re) +{ + switch (re->tag) { + case RegExp::NIL: + return 0; + case RegExp::SYM: + return 1; + case RegExp::ALT: + { + const uint32_t l1 = fixlen(re->pld.alt.re1); + const uint32_t l2 = fixlen(re->pld.alt.re2); + return l1 == l2 ? l1 : ~0u; + } + case RegExp::CAT: + { + const uint32_t l1 = fixlen(re->pld.cat.re1); + if (l1 == ~0u) { + return ~0u; + } + const uint32_t l2 = fixlen(re->pld.cat.re2); + if (l2 == ~0u) { + return ~0u; + } + return l1 + l2; + } + case RegExp::ITER: + case RegExp::RULE: + default: + return ~0u; + } } } // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h index 5d999c2f..39923c28 100644 --- a/re2c/src/ir/regexp/regexp.h +++ b/re2c/src/ir/regexp/regexp.h @@ -2,13 +2,12 @@ #define _RE2C_IR_REGEXP_REGEXP_ #include "src/util/c99_stdint.h" -#include #include #include #include "src/parse/rules.h" #include "src/util/free_list.h" -#include "src/util/forbid_copy.h" +#include "src/util/range.h" namespace re2c { @@ -18,37 +17,107 @@ struct nfa_t; typedef std::vector charset_t; -class RegExp +struct RegExp { -public: - static free_list vFreeList; + enum tag_t + { + NIL, + SYM, + ALT, + CAT, + ITER, + RULE + }; + union payload_t + { + struct + { + Range *range; + } sym; + struct + { + RegExp *re1; + RegExp *re2; + } alt; + struct + { + RegExp *re1; + RegExp *re2; + } cat; + struct + { + RegExp *re; + } iter; + struct + { + RegExp *re; + RegExp *ctx; + RuleInfo *info; + } rule; + }; + + static free_list flist; + + tag_t tag; + payload_t pld; - inline RegExp () + static RegExp *nil() + { + return new RegExp(NIL); + } + static RegExp *sym(Range *r) + { + RegExp *re = new RegExp(SYM); + re->pld.sym.range = r; + return re; + } + static RegExp *alt(RegExp *r1, RegExp *r2) { - vFreeList.insert (this); + RegExp *re = new RegExp(ALT); + re->pld.alt.re1 = r1; + re->pld.alt.re2 = r2; + return re; } - inline virtual ~RegExp () + static RegExp *cat(RegExp *r1, RegExp *r2) { - vFreeList.erase (this); + RegExp *re = new RegExp(CAT); + re->pld.cat.re1 = r1; + re->pld.cat.re2 = r2; + return re; + } + static RegExp *iter(RegExp *r) + { + RegExp *re = new RegExp(ITER); + re->pld.iter.re = r; + return re; + } + static RegExp *rule(const Loc &loc, RegExp *r1, RegExp *r2, + rule_rank_t rank, const Code *code, const std::string *newcond); + static RegExp *rule_copy(const RegExp *rule, rule_rank_t rank); + inline ~RegExp() + { + if (tag == RULE) { + delete pld.rule.info; + } + flist.erase(this); + } + +private: + inline RegExp(tag_t t) : tag(t), pld() + { + flist.insert(this); } - virtual void split (std::set &) = 0; - virtual uint32_t calc_size() const = 0; - virtual uint32_t fixedLength (); - virtual bool nullable() const = 0; - virtual void nullable_rules(std::vector&) const; - virtual nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n) = 0; - virtual void display (std::ostream &) const = 0; - friend std::ostream & operator << (std::ostream & o, const RegExp & re); - - FORBID_COPY (RegExp); }; -RegExp * doAlt (RegExp * e1, RegExp * e2); -RegExp * mkAlt (RegExp * e1, RegExp * e2); -RegExp * doCat (RegExp * e1, RegExp * e2); -RegExp * repeat (RegExp * e, uint32_t n); -RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m); -RegExp * repeat_from (RegExp * e, uint32_t n); +void split(const RegExp *re, std::set &cs); +void nullable_rules(const RegExp *re, std::vector &rs); + +RegExp *mkAlt(RegExp *re1, RegExp *re2); +RegExp *doAlt(RegExp *re1, RegExp *re2); +RegExp *doCat(RegExp *re1, RegExp *re2); +RegExp *repeat(RegExp *re, uint32_t n); +RegExp *repeat_from_to(RegExp *re, uint32_t n, uint32_t m); +RegExp *repeat_from(RegExp *re, uint32_t n); } // end namespace re2c diff --git a/re2c/src/ir/regexp/regexp_alt.h b/re2c/src/ir/regexp/regexp_alt.h deleted file mode 100644 index 267e469d..00000000 --- a/re2c/src/ir/regexp/regexp_alt.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_ -#define _RE2C_IR_REGEXP_REGEXP_ALT_ - -#include "src/ir/regexp/regexp.h" - -namespace re2c -{ - -class AltOp: public RegExp -{ - RegExp * exp1; - RegExp * exp2; - -public: - inline AltOp (RegExp * e1, RegExp * e2) - : exp1 (e1) - , exp2 (e2) - {} - void split (std::set &); - uint32_t calc_size() const; - uint32_t fixedLength (); - bool nullable() const; - void nullable_rules(std::vector&) const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - void display (std::ostream & o) const; - friend RegExp * mkAlt (RegExp *, RegExp *); - - FORBID_COPY (AltOp); -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_ALT_ diff --git a/re2c/src/ir/regexp/regexp_cat.h b/re2c/src/ir/regexp/regexp_cat.h deleted file mode 100644 index 93c13a11..00000000 --- a/re2c/src/ir/regexp/regexp_cat.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_ -#define _RE2C_IR_REGEXP_REGEXP_CAT_ - -#include "src/ir/regexp/regexp.h" - -namespace re2c -{ - -class CatOp: public RegExp -{ - RegExp * exp1; - RegExp * exp2; - -public: - inline CatOp (RegExp * e1, RegExp * e2) - : exp1 (e1) - , exp2 (e2) - {} - void split (std::set &); - uint32_t calc_size() const; - uint32_t fixedLength (); - bool nullable() const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - void display (std::ostream & o) const; - - FORBID_COPY (CatOp); -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_CAT_ diff --git a/re2c/src/ir/regexp/regexp_close.h b/re2c/src/ir/regexp/regexp_close.h deleted file mode 100644 index afd72373..00000000 --- a/re2c/src/ir/regexp/regexp_close.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_ -#define _RE2C_IR_REGEXP_REGEXP_CLOSE_ - -#include "src/ir/regexp/regexp.h" - -namespace re2c -{ - -class CloseOp: public RegExp -{ - RegExp * exp; - -public: - inline CloseOp (RegExp * e) - : exp (e) - {} - void split (std::set &); - uint32_t calc_size() const; - bool nullable() const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - void display (std::ostream & o) const; - - FORBID_COPY (CloseOp); -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_ diff --git a/re2c/src/ir/regexp/regexp_match.h b/re2c/src/ir/regexp/regexp_match.h deleted file mode 100644 index cccbf4aa..00000000 --- a/re2c/src/ir/regexp/regexp_match.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_ -#define _RE2C_IR_REGEXP_REGEXP_MATCH_ - -#include "src/ir/regexp/regexp.h" -#include "src/util/range.h" - -namespace re2c -{ - -class MatchOp: public RegExp -{ -public: - Range * match; - - inline MatchOp (Range * m) - : match (m) - {} - void split (std::set &); - uint32_t calc_size() const; - uint32_t fixedLength (); - bool nullable() const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - void display (std::ostream & o) const; - - FORBID_COPY (MatchOp); -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_ diff --git a/re2c/src/ir/regexp/regexp_null.h b/re2c/src/ir/regexp/regexp_null.h deleted file mode 100644 index ebefb24d..00000000 --- a/re2c/src/ir/regexp/regexp_null.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_ -#define _RE2C_IR_REGEXP_REGEXP_NULL_ - -#include "src/ir/regexp/regexp.h" - -namespace re2c -{ - -class NullOp: public RegExp -{ -public: - void split (std::set &); - uint32_t calc_size() const; - uint32_t fixedLength (); - bool nullable() const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - void display (std::ostream & o) const; -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_NULL_ diff --git a/re2c/src/ir/regexp/regexp_rule.h b/re2c/src/ir/regexp/regexp_rule.h deleted file mode 100644 index 65df447d..00000000 --- a/re2c/src/ir/regexp/regexp_rule.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _RE2C_IR_REGEXP_REGEXP_RULE_ -#define _RE2C_IR_REGEXP_REGEXP_RULE_ - -#include - -#include "src/ir/regexp/regexp.h" -#include "src/globals.h" - -namespace re2c -{ - -class RuleOp: public RegExp -{ - RegExp * exp; - -public: - RegExp * ctx; - RuleInfo *info; - - inline RuleOp - ( const Loc & l - , RegExp * r1 - , RegExp * r2 - , rule_rank_t r - , const Code * c - , const std::string * cond - ) - : exp (r1) - , ctx (r2) - , info (NULL) - - { - uint32_t ctx_len = r2->fixedLength(); - // cannot emulate 'YYCURSOR -= N' operation with generic API - if (ctx_len != 0 - && opts->input_api.type() == InputAPI::CUSTOM) - { - ctx_len = ~0u; - } - info = new RuleInfo(l, r, c, cond, ctx_len); - } - RuleOp(RuleOp *rule, rule_rank_t r) - : exp(rule->exp) - , ctx(rule->ctx) - , info(new RuleInfo(rule->info->loc, r, rule->info->code, - &rule->info->newcond, rule->info->ctx_len)) - {} - ~RuleOp() - { - delete info; - } - bool nullable() const; - void nullable_rules(std::vector&) const; - void display (std::ostream & o) const; - void split (std::set &); - uint32_t calc_size() const; - nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); - - FORBID_COPY (RuleOp); -}; - -} // end namespace re2c - -#endif // _RE2C_IR_REGEXP_REGEXP_RULE_ diff --git a/re2c/src/ir/regexp/split_charset.cc b/re2c/src/ir/regexp/split_charset.cc new file mode 100644 index 00000000..ce0dd759 --- /dev/null +++ b/re2c/src/ir/regexp/split_charset.cc @@ -0,0 +1,38 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/ir/regexp/regexp.h" +#include "src/util/range.h" + +namespace re2c { + +void split(const RegExp* re, std::set &cs) +{ + switch (re->tag) { + case RegExp::NIL: + break; + case RegExp::SYM: + for (Range *r = re->pld.sym.range; r; r = r->next()) { + cs.insert(r->lower()); + cs.insert(r->upper()); + } + break; + case RegExp::ALT: + split(re->pld.alt.re1, cs); + split(re->pld.alt.re2, cs); + break; + case RegExp::CAT: + split(re->pld.cat.re1, cs); + split(re->pld.cat.re2, cs); + break; + case RegExp::ITER: + split(re->pld.iter.re, cs); + break; + case RegExp::RULE: + split(re->pld.rule.re, cs); + split(re->pld.rule.ctx, cs); + break; + } +} + +} // namespace re2c diff --git a/re2c/src/parse/lex.re b/re2c/src/parse/lex.re index 2fd98fb9..cf1c2642 100644 --- a/re2c/src/parse/lex.re +++ b/re2c/src/parse/lex.re @@ -11,7 +11,6 @@ #include "src/globals.h" #include "src/ir/regexp/encoding/enc.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_null.h" #include "src/parse/code.h" #include "src/parse/extop.h" #include "src/parse/input.h" @@ -359,7 +358,7 @@ start: const uint32_t c = static_cast(*s); r = doCat(r, casing ? ichr(c) : schr(c)); } - yylval.regexp = r ? r : new NullOp; + yylval.regexp = r ? r : RegExp::nil(); return TOKEN_REGEXP; } } @@ -658,7 +657,7 @@ RegExp *Scanner::lex_str(char quote, bool casing) for (bool end;;) { const uint32_t c = lex_str_chr(quote, end); if (end) { - return r ? r : new NullOp; + return r ? r : RegExp::nil(); } r = doCat(r, casing ? ichr(c) : schr(c)); } diff --git a/re2c/src/parse/parser.h b/re2c/src/parse/parser.h index 965a8a52..53b7dd70 100644 --- a/re2c/src/parse/parser.h +++ b/re2c/src/parse/parser.h @@ -1,13 +1,12 @@ #ifndef _RE2C_PARSE_PARSER_ #define _RE2C_PARSE_PARSER_ -#include #include +#include #include #include "src/codegen/output.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_rule.h" #include "src/parse/scanner.h" #include "src/parse/spec.h" @@ -18,7 +17,7 @@ extern void parse(Scanner &, Output &); extern void parse_cleanup(); typedef std::set CondList; -typedef std::list RuleOpList; +typedef std::list RuleList; typedef std::map SpecMap; typedef std::map > SetupMap; typedef std::map DefaultMap; diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index 47a3f8a9..e959421b 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -20,10 +20,6 @@ #include "src/ir/regexp/encoding/enc.h" #include "src/ir/regexp/encoding/range_suffix.h" #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_cat.h" -#include "src/ir/regexp/regexp_close.h" -#include "src/ir/regexp/regexp_null.h" -#include "src/ir/regexp/regexp_rule.h" #include "src/ir/rule_rank.h" #include "src/ir/skeleton/skeleton.h" #include "src/parse/code.h" @@ -52,9 +48,9 @@ static counter_t rank_counter; static std::vector condnames; static re2c::SpecMap specMap; static Spec spec; -static RuleOp *specNone = NULL; -static RuleOpList specStar; -static RuleOp * star_default = NULL; +static RegExp *specNone = NULL; +static RuleList specStar; +static RegExp *star_default = NULL; static Scanner *in = NULL; static Scanner::ParseMode parseMode; static SetupMap ruleSetupMap; @@ -101,7 +97,7 @@ void context_rule condnames.push_back (*it); } - RuleOp * rule = new RuleOp + RegExp *rule = RegExp::rule ( loc , expr , look @@ -138,10 +134,10 @@ void default_rule(CondList *clist, const Code * code) context_check(clist); for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - RuleOp * def = new RuleOp + RegExp * def = RegExp::rule ( code->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , code , NULL @@ -234,7 +230,7 @@ rule: { in->fatal("condition or '<*>' required when using -c switch"); } - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( $3->loc , $1 , $2 @@ -248,10 +244,10 @@ rule: { if (opts->cFlag) in->fatal("condition or '<*>' required when using -c switch"); - RuleOp * def = new RuleOp + RegExp * def = RegExp::rule ( $2->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , $2 , NULL @@ -289,7 +285,7 @@ rule: | '<' TOKEN_STAR '>' expr look newcond TOKEN_CODE { context_check(NULL); - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( $7->loc , $4 , $5 @@ -305,7 +301,7 @@ rule: assert($7); context_check(NULL); Loc loc (in->get_fname (), in->get_cline ()); - RuleOp * rule = new RuleOp + RegExp * rule = RegExp::rule ( loc , $4 , $5 @@ -333,10 +329,10 @@ rule: { in->fatal ("code to default rule '*' is already defined"); } - star_default = new RuleOp + star_default = RegExp::rule ( $5->loc , in->mkDefault () - , new NullOp + , RegExp::nil() , rule_rank_t::def () , $5 , NULL @@ -349,10 +345,10 @@ rule: { in->fatal("code to handle illegal condition already defined"); } - $$ = specNone = new RuleOp + $$ = specNone = RegExp::rule ( $3->loc - , new NullOp - , new NullOp + , RegExp::nil() + , RegExp::nil() , rank_counter.next () , $3 , $2 @@ -368,10 +364,10 @@ rule: in->fatal("code to handle illegal condition already defined"); } Loc loc (in->get_fname (), in->get_cline ()); - $$ = specNone = new RuleOp + $$ = specNone = RegExp::rule ( loc - , new NullOp - , new NullOp + , RegExp::nil() + , RegExp::nil() , rank_counter.next () , NULL , $3 @@ -430,7 +426,7 @@ newcond: look: /* empty */ { - $$ = new NullOp; + $$ = RegExp::nil(); } | '/' expr { @@ -467,7 +463,7 @@ term: } | term factor { - $$ = new CatOp($1, $2); + $$ = RegExp::cat($1, $2); } ; @@ -481,13 +477,13 @@ factor: switch($2) { case '*': - $$ = new CloseOp($1); + $$ = RegExp::iter($1); break; case '+': - $$ = new CatOp (new CloseOp($1), $1); + $$ = RegExp::cat(RegExp::iter($1), $1); break; case '?': - $$ = mkAlt($1, new NullOp()); + $$ = mkAlt($1, RegExp::nil()); break; } } @@ -505,7 +501,7 @@ factor: { $$ = repeat_from_to ($1, $2.min, $2.max); } - $$ = $$ ? $$ : new NullOp; + $$ = $$ ? $$ : RegExp::nil(); } ; @@ -651,9 +647,9 @@ void parse(Scanner& i, Output & o) // merge <*> rules to all conditions with lowest priority for (it = specMap.begin(); it != specMap.end(); ++it) { - for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) { - RuleOp *r = new RuleOp(*itOp, rank_counter.next()); + RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next()); it->second.add (r); } if (star_default) @@ -755,7 +751,7 @@ void parse(Scanner& i, Output & o) void parse_cleanup() { - RegExp::vFreeList.clear(); + RegExp::flist.clear(); Range::vFreeList.clear(); RangeSuffix::freeList.clear(); Code::freelist.clear(); diff --git a/re2c/src/parse/spec.h b/re2c/src/parse/spec.h index 27ee264b..b34e7410 100644 --- a/re2c/src/parse/spec.h +++ b/re2c/src/parse/spec.h @@ -4,7 +4,6 @@ #include #include "src/ir/regexp/regexp.h" -#include "src/ir/regexp/regexp_rule.h" #include "src/parse/rules.h" namespace re2c @@ -35,7 +34,7 @@ struct Spec rules = spec.rules; return *this; } - bool add_def (RuleOp * r) + bool add_def (RegExp * r) { if (std::find_if(rules.begin(), rules.end(), is_def) != rules.end()) { @@ -47,9 +46,9 @@ struct Spec return true; } } - void add (RuleOp * r) + void add (RegExp * r) { - rules.push_back(r->info); + rules.push_back(r->pld.rule.info); re = mkAlt (re, r); } void clear ()