From: Ulya Trofimovich Date: Thu, 5 May 2016 16:17:06 +0000 (+0100) Subject: Allow tags in any part of regexp, not only on top-level concatenation. X-Git-Tag: 1.0~39^2~316 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=04b37b1bbcb290ff3a0f4e4ada395bbd6d2775c6;p=re2c Allow tags in any part of regexp, not only on top-level concatenation. Fixed-length tag optimization applies only to top-level concatenation: since we cannot predict which path lexer will choose, we cannot be sure that any tags except top-level concatenation will ever be initialized. If a tag may be uninitialized, we cannot fix other tags on this one (we cannot even fix same-level tags relative to each other, because fixed tags won't preserve the default value). --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 73559056..721202c9 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -82,7 +82,12 @@ SRC = \ src/conf/msg.cc \ src/conf/opt.cc \ src/conf/warn.cc \ + src/ir/nfa/init_rules.cc \ + src/ir/nfa/make_tags.cc \ src/ir/nfa/nfa.cc \ + src/ir/nfa/nullable.cc \ + src/ir/nfa/regexps2nfa.cc \ + src/ir/nfa/sizeof_regexps.cc \ src/ir/adfa/adfa.cc \ src/ir/adfa/prepare.cc \ src/ir/dfa/context_deduplication.cc \ diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index ced8ae14..eace75f8 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Tue Apr 5 23:00:02 2016 */ +/* Generated by re2c 0.16 on Thu May 5 17:05:17 2016 */ #line 1 "../src/parse/lex.re" #include "src/util/c99_stdint.h" #include @@ -328,7 +328,6 @@ yy47: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - YYCTXMARKER = YYCURSOR; yy48: if (yych <= 0x1F) { if (yych == '\t') goto yy47; @@ -336,7 +335,10 @@ yy48: } else { if (yych <= ' ') goto yy47; if (yych <= '0') goto yy13; - if (yych <= '9') goto yy56; + if (yych <= '9') { + YYCTXMARKER = YYCURSOR; + goto yy56; + } goto yy13; } yy49: @@ -432,7 +434,7 @@ yy67: set_sourceline(); goto echo; } -#line 436 "src/parse/lex.cc" +#line 438 "src/parse/lex.cc" yy69: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy67; @@ -537,7 +539,7 @@ yy91: lex_end_of_comment(); goto echo; } -#line 541 "src/parse/lex.cc" +#line 543 "src/parse/lex.cc" yy93: yych = (YYCTYPE)*++YYCURSOR; if (yych == '2') goto yy100; @@ -559,7 +561,7 @@ yy95: reuse(); return Reuse; } -#line 563 "src/parse/lex.cc" +#line 565 "src/parse/lex.cc" yy97: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'r') goto yy102; @@ -605,7 +607,7 @@ yy105: opts.reset_mapCodeName (); return Rules; } -#line 609 "src/parse/lex.cc" +#line 611 "src/parse/lex.cc" yy107: ++YYCURSOR; #line 134 "../src/parse/lex.re" @@ -619,7 +621,7 @@ yy107: lex_end_of_comment(); goto echo; } -#line 623 "src/parse/lex.cc" +#line 625 "src/parse/lex.cc" yy109: yych = (YYCTYPE)*++YYCURSOR; if (yych == '2') goto yy113; @@ -638,7 +640,7 @@ yy111: lex_end_of_comment(); goto echo; } -#line 642 "src/parse/lex.cc" +#line 644 "src/parse/lex.cc" yy113: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'c') goto yy115; @@ -657,7 +659,7 @@ yy115: lex_contexts(); goto echo; } -#line 661 "src/parse/lex.cc" +#line 663 "src/parse/lex.cc" yy117: ++YYCURSOR; #line 125 "../src/parse/lex.re" @@ -669,7 +671,7 @@ yy117: lex_end_of_comment(); goto echo; } -#line 673 "src/parse/lex.cc" +#line 675 "src/parse/lex.cc" } #line 174 "../src/parse/lex.re" @@ -679,7 +681,7 @@ void Scanner::lex_end_of_comment() { uint32_t ignored = 0; for (;;) { -#line 683 "src/parse/lex.cc" +#line 685 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -697,18 +699,18 @@ yy121: ++YYCURSOR; #line 181 "../src/parse/lex.re" { fatal("expected end of block"); } -#line 701 "src/parse/lex.cc" +#line 703 "src/parse/lex.cc" yy123: ++YYCURSOR; yy124: #line 183 "../src/parse/lex.re" { continue; } -#line 707 "src/parse/lex.cc" +#line 709 "src/parse/lex.cc" yy125: ++YYCURSOR; #line 184 "../src/parse/lex.re" { ++ignored; continue; } -#line 712 "src/parse/lex.cc" +#line 714 "src/parse/lex.cc" yy127: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy125; @@ -726,7 +728,7 @@ yy128: tok = pos = cur; return; } -#line 730 "src/parse/lex.cc" +#line 732 "src/parse/lex.cc" } #line 193 "../src/parse/lex.re" } @@ -736,7 +738,7 @@ void Scanner::lex_contexts() { ConfContexts conf; for (;;) { -#line 740 "src/parse/lex.cc" +#line 742 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -796,7 +798,7 @@ void Scanner::lex_contexts() yy134: #line 200 "../src/parse/lex.re" { fatal("unrecognized configuration"); } -#line 800 "src/parse/lex.cc" +#line 802 "src/parse/lex.cc" yy135: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -806,12 +808,12 @@ yy135: } #line 205 "../src/parse/lex.re" { continue; } -#line 810 "src/parse/lex.cc" +#line 812 "src/parse/lex.cc" yy138: ++YYCURSOR; #line 206 "../src/parse/lex.re" { ++cline; continue; } -#line 815 "src/parse/lex.cc" +#line 817 "src/parse/lex.cc" yy140: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy138; @@ -839,7 +841,7 @@ yy144: tok = pos = cur; return; } -#line 843 "src/parse/lex.cc" +#line 845 "src/parse/lex.cc" yy146: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'n') goto yy149; @@ -858,12 +860,12 @@ yy150: ++YYCURSOR; #line 203 "../src/parse/lex.re" { conf.sep = lex_conf_string(); continue; } -#line 862 "src/parse/lex.cc" +#line 864 "src/parse/lex.cc" yy152: ++YYCURSOR; #line 202 "../src/parse/lex.re" { conf.line = lex_conf_string(); continue; } -#line 867 "src/parse/lex.cc" +#line 869 "src/parse/lex.cc" } #line 215 "../src/parse/lex.re" } @@ -885,7 +887,7 @@ scan: start: -#line 889 "src/parse/lex.cc" +#line 891 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -991,12 +993,12 @@ start: yy156: ++YYCURSOR; yy157: -#line 393 "../src/parse/lex.re" +#line 397 "../src/parse/lex.re" { fatalf("unexpected character: '%c'", *tok); goto scan; } -#line 1000 "src/parse/lex.cc" +#line 1002 "src/parse/lex.cc" yy158: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1004,11 +1006,11 @@ yy158: if (yybm[0+yych] & 16) { goto yy158; } -#line 377 "../src/parse/lex.re" +#line 381 "../src/parse/lex.re" { goto scan; } -#line 1012 "src/parse/lex.cc" +#line 1014 "src/parse/lex.cc" yy161: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1019,14 +1021,14 @@ yy161: if (yych == '#') goto yy190; } yy162: -#line 386 "../src/parse/lex.re" +#line 390 "../src/parse/lex.re" { if (cur == eof) return 0; pos = cur; cline++; goto scan; } -#line 1030 "src/parse/lex.cc" +#line 1032 "src/parse/lex.cc" yy163: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy161; @@ -1035,7 +1037,7 @@ yy164: ++YYCURSOR; #line 264 "../src/parse/lex.re" { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } -#line 1039 "src/parse/lex.cc" +#line 1041 "src/parse/lex.cc" yy166: yych = (YYCTYPE)*++YYCURSOR; if (yych == '}') goto yy192; @@ -1044,40 +1046,40 @@ yy167: ++YYCURSOR; #line 263 "../src/parse/lex.re" { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } -#line 1048 "src/parse/lex.cc" +#line 1050 "src/parse/lex.cc" yy169: ++YYCURSOR; yy170: -#line 280 "../src/parse/lex.re" +#line 284 "../src/parse/lex.re" { return *tok; } -#line 1056 "src/parse/lex.cc" +#line 1058 "src/parse/lex.cc" yy171: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy192; -#line 284 "../src/parse/lex.re" +#line 288 "../src/parse/lex.re" { yylval.op = *tok; return TOKEN_STAR; } -#line 1065 "src/parse/lex.cc" +#line 1067 "src/parse/lex.cc" yy173: ++YYCURSOR; -#line 288 "../src/parse/lex.re" +#line 292 "../src/parse/lex.re" { yylval.op = *tok; return TOKEN_CLOSE; } -#line 1073 "src/parse/lex.cc" +#line 1075 "src/parse/lex.cc" yy175: ++YYCURSOR; -#line 372 "../src/parse/lex.re" +#line 376 "../src/parse/lex.re" { yylval.regexp = mkDot(); return TOKEN_REGEXP; } -#line 1081 "src/parse/lex.cc" +#line 1083 "src/parse/lex.cc" yy177: yych = (YYCTYPE)*++YYCURSOR; if (yych == '*') goto yy194; @@ -1085,7 +1087,6 @@ yy177: goto yy170; yy178: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; goto yy204; yy179: yych = (YYCTYPE)*++YYCURSOR; @@ -1119,10 +1120,9 @@ yy182: if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy213; #line 265 "../src/parse/lex.re" { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } -#line 1123 "src/parse/lex.cc" +#line 1124 "src/parse/lex.cc" yy184: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; if (yych == 'e') goto yy215; goto yy204; yy185: @@ -1212,7 +1212,7 @@ yy196: yy198: ++YYCURSOR; YYCURSOR -= 1; -#line 356 "../src/parse/lex.re" +#line 360 "../src/parse/lex.re" { if (!opts->FFlag) { yylval.str = new std::string (tok, tok_len()); @@ -1235,7 +1235,7 @@ yy200: yy201: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 351 "../src/parse/lex.re" +#line 355 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); return TOKEN_ID; @@ -1245,22 +1245,29 @@ yy203: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*YYCURSOR; - YYCTXMARKER = YYCURSOR; yy204: if (yybm[0+yych] & 32) { goto yy203; } if (yych <= ' ') { - if (yych == '\t') goto yy200; + if (yych == '\t') { + YYCTXMARKER = YYCURSOR; + goto yy200; + } if (yych <= 0x1F) goto yy198; + YYCTXMARKER = YYCURSOR; goto yy200; } else { if (yych <= ',') { if (yych <= '+') goto yy198; + YYCTXMARKER = YYCURSOR; goto yy201; } else { if (yych <= '<') goto yy198; - if (yych <= '>') goto yy201; + if (yych <= '>') { + YYCTXMARKER = YYCURSOR; + goto yy201; + } goto yy198; } } @@ -1273,18 +1280,45 @@ yy205: depth = 0; goto code; } -#line 1277 "src/parse/lex.cc" +#line 1284 "src/parse/lex.cc" yy207: ++YYCURSOR; #line 271 "../src/parse/lex.re" { return TOKEN_SETUP; } -#line 1284 "src/parse/lex.cc" +#line 1291 "src/parse/lex.cc" yy209: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; - goto yy230; + if (yych <= '9') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy189; + YYCTXMARKER = YYCURSOR; + goto yy229; + } else { + if (yych == ' ') { + YYCTXMARKER = YYCURSOR; + goto yy229; + } + goto yy189; + } + } else { + if (yych <= '=') { + if (yych <= ':') { + YYCTXMARKER = YYCURSOR; + goto yy231; + } + if (yych <= '<') goto yy189; + YYCTXMARKER = YYCURSOR; + goto yy232; + } else { + if (yych == '{') { + YYCTXMARKER = YYCURSOR; + goto yy233; + } + goto yy189; + } + } yy210: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1304,28 +1338,31 @@ yy210: yy212: #line 275 "../src/parse/lex.re" { - yylval.str = new std::string(tok + 1, tok_len() - 1); - return TOKEN_CTX; + if (!opts->contexts) { + fatal("tags are only allowed with '-T, --tags' option"); + } + const std::string *name = new std::string(tok + 1, tok_len() - 1); + yylval.regexp = RegExp::ctx(name); + return TOKEN_REGEXP; } -#line 1311 "src/parse/lex.cc" +#line 1349 "src/parse/lex.cc" yy213: ++YYCURSOR; #line 266 "../src/parse/lex.re" { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } -#line 1316 "src/parse/lex.cc" +#line 1354 "src/parse/lex.cc" yy215: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; if (yych == '2') goto yy235; goto yy204; yy216: ++YYCURSOR; yy217: -#line 324 "../src/parse/lex.re" +#line 328 "../src/parse/lex.re" { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } -#line 1329 "src/parse/lex.cc" +#line 1366 "src/parse/lex.cc" yy218: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1378,7 +1415,7 @@ yy222: yy223: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 338 "../src/parse/lex.re" +#line 342 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); if (opts->FFlag) @@ -1391,7 +1428,7 @@ yy223: return TOKEN_ID; } } -#line 1395 "src/parse/lex.cc" +#line 1432 "src/parse/lex.cc" yy225: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1418,12 +1455,11 @@ yy227: { return *tok; } -#line 1422 "src/parse/lex.cc" +#line 1459 "src/parse/lex.cc" yy229: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*YYCURSOR; -yy230: if (yych <= '9') { if (yych <= '\t') { if (yych <= 0x08) goto yy189; @@ -1456,10 +1492,9 @@ yy233: { return TOKEN_NOCOND; } -#line 1460 "src/parse/lex.cc" +#line 1496 "src/parse/lex.cc" yy235: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; if (yych == 'c') goto yy242; goto yy204; yy236: @@ -1471,7 +1506,7 @@ yy236: goto yy217; yy237: ++YYCURSOR; -#line 293 "../src/parse/lex.re" +#line 297 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) { @@ -1480,10 +1515,10 @@ yy237: yylval.extop.max = yylval.extop.min; return TOKEN_CLOSESIZE; } -#line 1484 "src/parse/lex.cc" +#line 1519 "src/parse/lex.cc" yy239: ++YYCURSOR; -#line 328 "../src/parse/lex.re" +#line 332 "../src/parse/lex.re" { if (!opts->FFlag) { fatal("curly braces for names only allowed with -F switch"); @@ -1491,14 +1526,13 @@ yy239: yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces return TOKEN_ID; } -#line 1495 "src/parse/lex.cc" +#line 1530 "src/parse/lex.cc" yy241: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'n') goto yy247; goto yy189; yy242: yych = (YYCTYPE)*++YYCURSOR; - YYCTXMARKER = YYCURSOR; if (yych == ':') goto yy248; goto yy204; yy243: @@ -1511,7 +1545,7 @@ yy243: goto yy189; yy245: ++YYCURSOR; -#line 315 "../src/parse/lex.re" +#line 319 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) { @@ -1520,19 +1554,19 @@ yy245: yylval.extop.max = std::numeric_limits::max(); return TOKEN_CLOSESIZE; } -#line 1524 "src/parse/lex.cc" +#line 1558 "src/parse/lex.cc" yy247: yych = (YYCTYPE)*++YYCURSOR; if (yych == 'e') goto yy252; goto yy189; yy248: ++YYCURSOR; -#line 336 "../src/parse/lex.re" +#line 340 "../src/parse/lex.re" { lex_conf (); return TOKEN_CONF; } -#line 1533 "src/parse/lex.cc" +#line 1567 "src/parse/lex.cc" yy250: ++YYCURSOR; -#line 302 "../src/parse/lex.re" +#line 306 "../src/parse/lex.re" { const char * p = strchr (tok, ','); if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) @@ -1545,7 +1579,7 @@ yy250: } return TOKEN_CLOSESIZE; } -#line 1549 "src/parse/lex.cc" +#line 1583 "src/parse/lex.cc" yy252: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '0') goto yy254; @@ -1555,7 +1589,6 @@ yy253: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - YYCTXMARKER = YYCURSOR; yy254: if (yych <= 0x1F) { if (yych == '\t') goto yy253; @@ -1564,6 +1597,7 @@ yy254: if (yych <= ' ') goto yy253; if (yych <= '0') goto yy189; if (yych >= ':') goto yy189; + YYCTXMARKER = YYCURSOR; } yy255: ++YYCURSOR; @@ -1601,12 +1635,12 @@ yy257: yy259: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 381 "../src/parse/lex.re" +#line 385 "../src/parse/lex.re" { set_sourceline (); goto scan; } -#line 1610 "src/parse/lex.cc" +#line 1644 "src/parse/lex.cc" yy261: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy259; @@ -1631,12 +1665,12 @@ yy265: if (yych == '\n') goto yy189; goto yy262; } -#line 397 "../src/parse/lex.re" +#line 401 "../src/parse/lex.re" flex_name: -#line 1640 "src/parse/lex.cc" +#line 1674 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1645,32 +1679,32 @@ flex_name: if (yych == '\r') goto yy272; ++YYCURSOR; yy269: -#line 408 "../src/parse/lex.re" +#line 412 "../src/parse/lex.re" { YYCURSOR = tok; goto start; } -#line 1654 "src/parse/lex.cc" +#line 1688 "src/parse/lex.cc" yy270: ++YYCURSOR; -#line 402 "../src/parse/lex.re" +#line 406 "../src/parse/lex.re" { YYCURSOR = tok; lexer_state = LEX_NORMAL; return TOKEN_FID_END; } -#line 1663 "src/parse/lex.cc" +#line 1697 "src/parse/lex.cc" yy272: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '\n') goto yy270; goto yy269; } -#line 412 "../src/parse/lex.re" +#line 416 "../src/parse/lex.re" code: -#line 1674 "src/parse/lex.cc" +#line 1708 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1731,7 +1765,7 @@ code: } yy275: ++YYCURSOR; -#line 475 "../src/parse/lex.re" +#line 479 "../src/parse/lex.re" { if (cur == eof) { @@ -1743,15 +1777,15 @@ yy275: } goto code; } -#line 1747 "src/parse/lex.cc" +#line 1781 "src/parse/lex.cc" yy277: ++YYCURSOR; yy278: -#line 489 "../src/parse/lex.re" +#line 493 "../src/parse/lex.re" { goto code; } -#line 1755 "src/parse/lex.cc" +#line 1789 "src/parse/lex.cc" yy279: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1770,7 +1804,7 @@ yy279: } } yy280: -#line 456 "../src/parse/lex.re" +#line 460 "../src/parse/lex.re" { if (depth == 0) { @@ -1790,7 +1824,7 @@ yy280: cline++; goto code; } -#line 1794 "src/parse/lex.cc" +#line 1828 "src/parse/lex.cc" yy281: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1803,7 +1837,7 @@ yy282: goto yy299; yy283: ++YYCURSOR; -#line 428 "../src/parse/lex.re" +#line 432 "../src/parse/lex.re" { if (depth == 0) { @@ -1815,10 +1849,10 @@ yy283: } goto code; } -#line 1819 "src/parse/lex.cc" +#line 1853 "src/parse/lex.cc" yy285: ++YYCURSOR; -#line 416 "../src/parse/lex.re" +#line 420 "../src/parse/lex.re" { if (depth == 0) { @@ -1831,7 +1865,7 @@ yy285: } goto code; } -#line 1835 "src/parse/lex.cc" +#line 1869 "src/parse/lex.cc" yy287: yyaccept = 2; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1843,7 +1877,7 @@ yy287: } yy288: YYCURSOR -= 1; -#line 443 "../src/parse/lex.re" +#line 447 "../src/parse/lex.re" { if (depth == 0) { @@ -1857,7 +1891,7 @@ yy288: cline++; goto code; } -#line 1861 "src/parse/lex.cc" +#line 1895 "src/parse/lex.cc" yy289: yych = (YYCTYPE)*++YYCURSOR; goto yy288; @@ -1892,11 +1926,11 @@ yy294: if (yych >= '#') goto yy297; yy295: ++YYCURSOR; -#line 486 "../src/parse/lex.re" +#line 490 "../src/parse/lex.re" { goto code; } -#line 1900 "src/parse/lex.cc" +#line 1934 "src/parse/lex.cc" yy297: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1945,7 +1979,6 @@ yy307: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - YYCTXMARKER = YYCURSOR; yy308: if (yych <= 0x1F) { if (yych == '\t') goto yy307; @@ -1954,6 +1987,7 @@ yy308: if (yych <= ' ') goto yy307; if (yych <= '0') goto yy292; if (yych >= ':') goto yy292; + YYCTXMARKER = YYCURSOR; } yy309: ++YYCURSOR; @@ -1986,12 +2020,12 @@ yy311: yy313: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 439 "../src/parse/lex.re" +#line 443 "../src/parse/lex.re" { set_sourceline (); goto code; } -#line 1995 "src/parse/lex.cc" +#line 2029 "src/parse/lex.cc" yy315: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy313; @@ -2020,12 +2054,12 @@ yy319: if (yych == '\n') goto yy292; goto yy316; } -#line 492 "../src/parse/lex.re" +#line 496 "../src/parse/lex.re" comment: -#line 2029 "src/parse/lex.cc" +#line 2063 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2072,7 +2106,7 @@ comment: } ++YYCURSOR; yy323: -#line 524 "../src/parse/lex.re" +#line 528 "../src/parse/lex.re" { if (cur == eof) { @@ -2080,7 +2114,7 @@ yy323: } goto comment; } -#line 2084 "src/parse/lex.cc" +#line 2118 "src/parse/lex.cc" yy324: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yybm[0+yych] & 32) { @@ -2088,7 +2122,7 @@ yy324: } if (yych == '#') goto yy331; yy325: -#line 515 "../src/parse/lex.re" +#line 519 "../src/parse/lex.re" { if (cur == eof) { @@ -2098,7 +2132,7 @@ yy325: cline++; goto comment; } -#line 2102 "src/parse/lex.cc" +#line 2136 "src/parse/lex.cc" yy326: yych = (YYCTYPE)*++YYCURSOR; if (yych == '/') goto yy333; @@ -2132,7 +2166,7 @@ yy331: } yy333: ++YYCURSOR; -#line 496 "../src/parse/lex.re" +#line 500 "../src/parse/lex.re" { if (--depth == 0) { @@ -2143,16 +2177,16 @@ yy333: goto comment; } } -#line 2147 "src/parse/lex.cc" +#line 2181 "src/parse/lex.cc" yy335: ++YYCURSOR; -#line 506 "../src/parse/lex.re" +#line 510 "../src/parse/lex.re" { ++depth; fatal("ambiguous /* found"); goto comment; } -#line 2156 "src/parse/lex.cc" +#line 2190 "src/parse/lex.cc" yy337: yych = (YYCTYPE)*++YYCURSOR; if (yych != 'i') goto yy330; @@ -2168,7 +2202,6 @@ yy341: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - YYCTXMARKER = YYCURSOR; yy342: if (yych <= 0x1F) { if (yych == '\t') goto yy341; @@ -2177,6 +2210,7 @@ yy342: if (yych <= ' ') goto yy341; if (yych <= '0') goto yy330; if (yych >= ':') goto yy330; + YYCTXMARKER = YYCURSOR; } yy343: ++YYCURSOR; @@ -2209,12 +2243,12 @@ yy345: yy347: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 511 "../src/parse/lex.re" +#line 515 "../src/parse/lex.re" { set_sourceline (); goto comment; } -#line 2218 "src/parse/lex.cc" +#line 2252 "src/parse/lex.cc" yy349: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy347; @@ -2239,28 +2273,28 @@ yy353: if (yych == '\n') goto yy330; goto yy350; } -#line 531 "../src/parse/lex.re" +#line 535 "../src/parse/lex.re" nextLine: -#line 2248 "src/parse/lex.cc" +#line 2282 "src/parse/lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == '\n') goto yy358; ++YYCURSOR; -#line 542 "../src/parse/lex.re" +#line 546 "../src/parse/lex.re" { if(cur == eof) { return 0; } goto nextLine; } -#line 2261 "src/parse/lex.cc" +#line 2295 "src/parse/lex.cc" yy358: ++YYCURSOR; -#line 535 "../src/parse/lex.re" +#line 539 "../src/parse/lex.re" { if(cur == eof) { return 0; } @@ -2268,9 +2302,9 @@ yy358: cline++; goto scan; } -#line 2272 "src/parse/lex.cc" +#line 2306 "src/parse/lex.cc" } -#line 547 "../src/parse/lex.re" +#line 551 "../src/parse/lex.re" } @@ -2294,35 +2328,35 @@ const RegExp *Scanner::lex_cls(bool neg) uint32_t u, l; fst: -#line 2298 "src/parse/lex.cc" +#line 2332 "src/parse/lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == ']') goto yy363; -#line 571 "../src/parse/lex.re" +#line 575 "../src/parse/lex.re" { l = lex_cls_chr(); goto snd; } -#line 2306 "src/parse/lex.cc" +#line 2340 "src/parse/lex.cc" yy363: ++YYCURSOR; -#line 570 "../src/parse/lex.re" +#line 574 "../src/parse/lex.re" { goto end; } -#line 2311 "src/parse/lex.cc" +#line 2345 "src/parse/lex.cc" } -#line 572 "../src/parse/lex.re" +#line 576 "../src/parse/lex.re" snd: -#line 2317 "src/parse/lex.cc" +#line 2351 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*(YYMARKER = YYCURSOR); if (yych == '-') goto yy368; yy367: -#line 575 "../src/parse/lex.re" +#line 579 "../src/parse/lex.re" { u = l; goto add; } -#line 2326 "src/parse/lex.cc" +#line 2360 "src/parse/lex.cc" yy368: yych = (YYCTYPE)*++YYCURSOR; if (yych != ']') goto yy370; @@ -2331,7 +2365,7 @@ yy368: yy370: ++YYCURSOR; YYCURSOR -= 1; -#line 576 "../src/parse/lex.re" +#line 580 "../src/parse/lex.re" { u = lex_cls_chr(); if (l > u) { @@ -2340,9 +2374,9 @@ yy370: } goto add; } -#line 2344 "src/parse/lex.cc" +#line 2378 "src/parse/lex.cc" } -#line 584 "../src/parse/lex.re" +#line 588 "../src/parse/lex.re" add: if (!(s = opts->encoding.encodeRange(l, u))) { @@ -2361,7 +2395,7 @@ uint32_t Scanner::lex_cls_chr() { tok = cur; -#line 2365 "src/parse/lex.cc" +#line 2399 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -2370,14 +2404,14 @@ uint32_t Scanner::lex_cls_chr() if (yych == '\n') goto yy376; if (yych == '\\') goto yy378; ++YYCURSOR; -#line 607 "../src/parse/lex.re" +#line 611 "../src/parse/lex.re" { return static_cast(tok[0]); } -#line 2376 "src/parse/lex.cc" +#line 2410 "src/parse/lex.cc" yy376: ++YYCURSOR; -#line 602 "../src/parse/lex.re" +#line 606 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2381 "src/parse/lex.cc" +#line 2415 "src/parse/lex.cc" yy378: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { @@ -2426,31 +2460,31 @@ yy378: } } } -#line 605 "../src/parse/lex.re" +#line 609 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2432 "src/parse/lex.cc" +#line 2466 "src/parse/lex.cc" yy380: ++YYCURSOR; -#line 620 "../src/parse/lex.re" +#line 624 "../src/parse/lex.re" { warn.useless_escape(tline, tok - pos, tok[1]); return static_cast(tok[1]); } -#line 2440 "src/parse/lex.cc" +#line 2474 "src/parse/lex.cc" yy382: ++YYCURSOR; -#line 618 "../src/parse/lex.re" +#line 622 "../src/parse/lex.re" { return static_cast('-'); } -#line 2445 "src/parse/lex.cc" +#line 2479 "src/parse/lex.cc" yy384: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '/') goto yy385; if (yych <= '7') goto yy409; yy385: -#line 604 "../src/parse/lex.re" +#line 608 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2454 "src/parse/lex.cc" +#line 2488 "src/parse/lex.cc" yy386: yych = (YYCTYPE)*++YYCURSOR; goto yy385; @@ -2466,9 +2500,9 @@ yy387: if (yych <= 'f') goto yy411; } yy388: -#line 603 "../src/parse/lex.re" +#line 607 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2472 "src/parse/lex.cc" +#line 2506 "src/parse/lex.cc" yy389: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2484,49 +2518,49 @@ yy389: } yy390: ++YYCURSOR; -#line 617 "../src/parse/lex.re" +#line 621 "../src/parse/lex.re" { return static_cast('\\'); } -#line 2490 "src/parse/lex.cc" +#line 2524 "src/parse/lex.cc" yy392: ++YYCURSOR; -#line 619 "../src/parse/lex.re" +#line 623 "../src/parse/lex.re" { return static_cast(']'); } -#line 2495 "src/parse/lex.cc" +#line 2529 "src/parse/lex.cc" yy394: ++YYCURSOR; -#line 610 "../src/parse/lex.re" +#line 614 "../src/parse/lex.re" { return static_cast('\a'); } -#line 2500 "src/parse/lex.cc" +#line 2534 "src/parse/lex.cc" yy396: ++YYCURSOR; -#line 611 "../src/parse/lex.re" +#line 615 "../src/parse/lex.re" { return static_cast('\b'); } -#line 2505 "src/parse/lex.cc" +#line 2539 "src/parse/lex.cc" yy398: ++YYCURSOR; -#line 612 "../src/parse/lex.re" +#line 616 "../src/parse/lex.re" { return static_cast('\f'); } -#line 2510 "src/parse/lex.cc" +#line 2544 "src/parse/lex.cc" yy400: ++YYCURSOR; -#line 613 "../src/parse/lex.re" +#line 617 "../src/parse/lex.re" { return static_cast('\n'); } -#line 2515 "src/parse/lex.cc" +#line 2549 "src/parse/lex.cc" yy402: ++YYCURSOR; -#line 614 "../src/parse/lex.re" +#line 618 "../src/parse/lex.re" { return static_cast('\r'); } -#line 2520 "src/parse/lex.cc" +#line 2554 "src/parse/lex.cc" yy404: ++YYCURSOR; -#line 615 "../src/parse/lex.re" +#line 619 "../src/parse/lex.re" { return static_cast('\t'); } -#line 2525 "src/parse/lex.cc" +#line 2559 "src/parse/lex.cc" yy406: ++YYCURSOR; -#line 616 "../src/parse/lex.re" +#line 620 "../src/parse/lex.re" { return static_cast('\v'); } -#line 2530 "src/parse/lex.cc" +#line 2564 "src/parse/lex.cc" yy408: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2589,9 +2623,9 @@ yy413: } yy414: ++YYCURSOR; -#line 609 "../src/parse/lex.re" +#line 613 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2595 "src/parse/lex.cc" +#line 2629 "src/parse/lex.cc" yy416: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2618,9 +2652,9 @@ yy417: } yy418: ++YYCURSOR; -#line 608 "../src/parse/lex.re" +#line 612 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2624 "src/parse/lex.cc" +#line 2658 "src/parse/lex.cc" yy420: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2644,7 +2678,7 @@ yy421: goto yy410; } } -#line 624 "../src/parse/lex.re" +#line 628 "../src/parse/lex.re" } @@ -2653,7 +2687,7 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) end = false; tok = cur; -#line 2657 "src/parse/lex.cc" +#line 2691 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -2662,17 +2696,17 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) if (yych == '\n') goto yy426; if (yych == '\\') goto yy428; ++YYCURSOR; -#line 637 "../src/parse/lex.re" +#line 641 "../src/parse/lex.re" { end = tok[0] == quote; return static_cast(tok[0]); } -#line 2671 "src/parse/lex.cc" +#line 2705 "src/parse/lex.cc" yy426: ++YYCURSOR; -#line 632 "../src/parse/lex.re" +#line 636 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2676 "src/parse/lex.cc" +#line 2710 "src/parse/lex.cc" yy428: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { @@ -2718,28 +2752,28 @@ yy428: } } } -#line 635 "../src/parse/lex.re" +#line 639 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2724 "src/parse/lex.cc" +#line 2758 "src/parse/lex.cc" yy430: ++YYCURSOR; -#line 651 "../src/parse/lex.re" +#line 655 "../src/parse/lex.re" { if (tok[1] != quote) { warn.useless_escape(tline, tok - pos, tok[1]); } return static_cast(tok[1]); } -#line 2734 "src/parse/lex.cc" +#line 2768 "src/parse/lex.cc" yy432: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '/') goto yy433; if (yych <= '7') goto yy455; yy433: -#line 634 "../src/parse/lex.re" +#line 638 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2743 "src/parse/lex.cc" +#line 2777 "src/parse/lex.cc" yy434: yych = (YYCTYPE)*++YYCURSOR; goto yy433; @@ -2755,9 +2789,9 @@ yy435: if (yych <= 'f') goto yy457; } yy436: -#line 633 "../src/parse/lex.re" +#line 637 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2761 "src/parse/lex.cc" +#line 2795 "src/parse/lex.cc" yy437: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2773,44 +2807,44 @@ yy437: } yy438: ++YYCURSOR; -#line 650 "../src/parse/lex.re" +#line 654 "../src/parse/lex.re" { return static_cast('\\'); } -#line 2779 "src/parse/lex.cc" +#line 2813 "src/parse/lex.cc" yy440: ++YYCURSOR; -#line 643 "../src/parse/lex.re" +#line 647 "../src/parse/lex.re" { return static_cast('\a'); } -#line 2784 "src/parse/lex.cc" +#line 2818 "src/parse/lex.cc" yy442: ++YYCURSOR; -#line 644 "../src/parse/lex.re" +#line 648 "../src/parse/lex.re" { return static_cast('\b'); } -#line 2789 "src/parse/lex.cc" +#line 2823 "src/parse/lex.cc" yy444: ++YYCURSOR; -#line 645 "../src/parse/lex.re" +#line 649 "../src/parse/lex.re" { return static_cast('\f'); } -#line 2794 "src/parse/lex.cc" +#line 2828 "src/parse/lex.cc" yy446: ++YYCURSOR; -#line 646 "../src/parse/lex.re" +#line 650 "../src/parse/lex.re" { return static_cast('\n'); } -#line 2799 "src/parse/lex.cc" +#line 2833 "src/parse/lex.cc" yy448: ++YYCURSOR; -#line 647 "../src/parse/lex.re" +#line 651 "../src/parse/lex.re" { return static_cast('\r'); } -#line 2804 "src/parse/lex.cc" +#line 2838 "src/parse/lex.cc" yy450: ++YYCURSOR; -#line 648 "../src/parse/lex.re" +#line 652 "../src/parse/lex.re" { return static_cast('\t'); } -#line 2809 "src/parse/lex.cc" +#line 2843 "src/parse/lex.cc" yy452: ++YYCURSOR; -#line 649 "../src/parse/lex.re" +#line 653 "../src/parse/lex.re" { return static_cast('\v'); } -#line 2814 "src/parse/lex.cc" +#line 2848 "src/parse/lex.cc" yy454: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -2873,9 +2907,9 @@ yy459: } yy460: ++YYCURSOR; -#line 642 "../src/parse/lex.re" +#line 646 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2879 "src/parse/lex.cc" +#line 2913 "src/parse/lex.cc" yy462: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2902,9 +2936,9 @@ yy463: } yy464: ++YYCURSOR; -#line 641 "../src/parse/lex.re" +#line 645 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2908 "src/parse/lex.cc" +#line 2942 "src/parse/lex.cc" yy466: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -2928,7 +2962,7 @@ yy467: goto yy456; } } -#line 657 "../src/parse/lex.re" +#line 661 "../src/parse/lex.re" } @@ -2949,7 +2983,7 @@ void Scanner::set_sourceline () sourceline: tok = cur; -#line 2953 "src/parse/lex.cc" +#line 2987 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2998,14 +3032,14 @@ sourceline: yy470: ++YYCURSOR; yy471: -#line 700 "../src/parse/lex.re" +#line 704 "../src/parse/lex.re" { goto sourceline; } -#line 3006 "src/parse/lex.cc" +#line 3040 "src/parse/lex.cc" yy472: ++YYCURSOR; -#line 688 "../src/parse/lex.re" +#line 692 "../src/parse/lex.re" { if (cur == eof) { @@ -3018,7 +3052,7 @@ yy472: tok = cur; return; } -#line 3022 "src/parse/lex.cc" +#line 3056 "src/parse/lex.cc" yy474: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych == '\n') goto yy471; @@ -3030,7 +3064,7 @@ yy475: if (yybm[0+yych] & 64) { goto yy475; } -#line 677 "../src/parse/lex.re" +#line 681 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok, cur, cline)) { @@ -3038,7 +3072,7 @@ yy475: } goto sourceline; } -#line 3042 "src/parse/lex.cc" +#line 3076 "src/parse/lex.cc" yy478: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -3055,12 +3089,12 @@ yy480: goto yy471; yy481: ++YYCURSOR; -#line 684 "../src/parse/lex.re" +#line 688 "../src/parse/lex.re" { escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } -#line 3064 "src/parse/lex.cc" +#line 3098 "src/parse/lex.cc" yy483: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -3068,7 +3102,7 @@ yy483: if (yych == '\n') goto yy480; goto yy478; } -#line 703 "../src/parse/lex.re" +#line 707 "../src/parse/lex.re" } diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index 714eb1bf..ed9d06bc 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -218,14 +218,13 @@ void default_rule(CondList *clist, RegExpRule *rule) TOKEN_CLOSESIZE = 259, TOKEN_CODE = 260, TOKEN_CONF = 261, - TOKEN_CTX = 262, - TOKEN_ID = 263, - TOKEN_FID = 264, - TOKEN_FID_END = 265, - TOKEN_NOCOND = 266, - TOKEN_REGEXP = 267, - TOKEN_SETUP = 268, - TOKEN_STAR = 269 + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 }; #endif @@ -473,17 +472,17 @@ union yyalloc #define YYLAST 82 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 26 +#define YYNTOKENS 25 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 18 +#define YYNNTS 16 /* YYNRULES -- Number of rules. */ -#define YYNRULES 51 +#define YYNRULES 47 /* YYNRULES -- Number of states. */ -#define YYNSTATES 85 +#define YYNSTATES 80 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 -#define YYMAXUTOK 269 +#define YYMAXUTOK 268 #define YYTRANSLATE(YYX) \ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) @@ -495,15 +494,15 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 24, 25, 2, 2, 21, 2, 2, 17, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 20, 16, - 18, 15, 19, 2, 2, 2, 2, 2, 2, 2, + 23, 24, 2, 2, 20, 2, 2, 14, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 19, 16, + 17, 15, 18, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 23, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 22, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -517,7 +516,7 @@ static const yytype_uint8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + 5, 6, 7, 8, 9, 10, 11, 12, 13 }; #if YYDEBUG @@ -526,44 +525,41 @@ static const yytype_uint8 yytranslate[] = static const yytype_uint8 yyprhs[] = { 0, 0, 3, 4, 7, 10, 13, 17, 21, 24, - 26, 28, 30, 32, 34, 37, 40, 47, 54, 60, - 67, 74, 80, 84, 88, 93, 98, 99, 101, 103, - 107, 108, 112, 114, 118, 120, 124, 126, 130, 132, - 136, 138, 141, 143, 146, 149, 151, 153, 156, 159, - 161, 163 + 26, 28, 30, 33, 36, 43, 50, 56, 63, 70, + 76, 80, 84, 89, 94, 95, 97, 99, 103, 104, + 108, 110, 114, 116, 120, 122, 126, 128, 131, 133, + 136, 139, 141, 143, 146, 149, 151, 153 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int8 yyrhs[] = { - 27, 0, -1, -1, 27, 6, -1, 27, 28, -1, - 27, 32, -1, 29, 38, 30, -1, 29, 38, 31, - -1, 8, 15, -1, 9, -1, 16, -1, 10, -1, - 17, -1, 7, -1, 36, 5, -1, 14, 5, -1, - 18, 33, 19, 36, 35, 5, -1, 18, 33, 19, - 36, 20, 35, -1, 18, 33, 19, 14, 5, -1, - 18, 14, 19, 36, 35, 5, -1, 18, 14, 19, - 36, 20, 35, -1, 18, 14, 19, 14, 5, -1, - 11, 35, 5, -1, 11, 20, 35, -1, 13, 14, - 19, 5, -1, 13, 33, 19, 5, -1, -1, 34, - -1, 8, -1, 34, 21, 8, -1, -1, 15, 19, - 8, -1, 37, -1, 37, 17, 38, -1, 38, -1, - 37, 7, 38, -1, 39, -1, 38, 22, 39, -1, - 40, -1, 39, 23, 40, -1, 41, -1, 40, 41, - -1, 43, -1, 43, 42, -1, 43, 4, -1, 3, - -1, 14, -1, 42, 3, -1, 42, 14, -1, 8, - -1, 12, -1, 24, 38, 25, -1 + 26, 0, -1, -1, 26, 6, -1, 26, 27, -1, + 26, 30, -1, 28, 35, 29, -1, 28, 35, 14, + -1, 7, 15, -1, 8, -1, 16, -1, 9, -1, + 34, 5, -1, 13, 5, -1, 17, 31, 18, 34, + 33, 5, -1, 17, 31, 18, 34, 19, 33, -1, + 17, 31, 18, 13, 5, -1, 17, 13, 18, 34, + 33, 5, -1, 17, 13, 18, 34, 19, 33, -1, + 17, 13, 18, 13, 5, -1, 10, 33, 5, -1, + 10, 19, 33, -1, 12, 13, 18, 5, -1, 12, + 31, 18, 5, -1, -1, 32, -1, 7, -1, 32, + 20, 7, -1, -1, 15, 18, 7, -1, 35, -1, + 35, 14, 35, -1, 36, -1, 35, 21, 36, -1, + 37, -1, 36, 22, 37, -1, 38, -1, 37, 38, + -1, 40, -1, 40, 39, -1, 40, 4, -1, 3, + -1, 13, -1, 39, 3, -1, 39, 13, -1, 7, + -1, 11, -1, 23, 35, 24, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 158, 158, 160, 161, 162, 167, 174, 179, 182, - 186, 186, 188, 188, 191, 200, 211, 215, 221, 227, - 234, 243, 251, 261, 272, 278, 284, 287, 294, 300, - 310, 313, 320, 324, 332, 336, 348, 352, 359, 363, - 370, 374, 381, 385, 400, 419, 423, 427, 431, 438, - 448, 452 + 0, 157, 157, 159, 160, 161, 166, 173, 178, 181, + 185, 185, 188, 197, 208, 212, 218, 224, 231, 240, + 248, 258, 269, 275, 281, 284, 291, 297, 307, 310, + 317, 321, 326, 330, 337, 341, 348, 352, 359, 363, + 378, 397, 401, 405, 409, 416, 426, 430 }; #endif @@ -573,12 +569,12 @@ static const yytype_uint16 yyrline[] = static const char *const yytname[] = { "$end", "error", "$undefined", "TOKEN_CLOSE", "TOKEN_CLOSESIZE", - "TOKEN_CODE", "TOKEN_CONF", "TOKEN_CTX", "TOKEN_ID", "TOKEN_FID", - "TOKEN_FID_END", "TOKEN_NOCOND", "TOKEN_REGEXP", "TOKEN_SETUP", - "TOKEN_STAR", "'='", "';'", "'/'", "'<'", "'>'", "':'", "','", "'|'", - "'\\\\'", "'('", "')'", "$accept", "spec", "def", "name", "enddef", - "ctx", "rule", "cond", "clist", "newcond", "trailexpr", "ctxexpr", - "expr", "diff", "term", "factor", "close", "primary", 0 + "TOKEN_CODE", "TOKEN_CONF", "TOKEN_ID", "TOKEN_FID", "TOKEN_FID_END", + "TOKEN_NOCOND", "TOKEN_REGEXP", "TOKEN_SETUP", "TOKEN_STAR", "'/'", + "'='", "';'", "'<'", "'>'", "':'", "','", "'|'", "'\\\\'", "'('", "')'", + "$accept", "spec", "def", "name", "enddef", "rule", "cond", "clist", + "newcond", "trailexpr", "expr", "diff", "term", "factor", "close", + "primary", 0 }; #endif @@ -588,31 +584,29 @@ static const char *const yytname[] = static const yytype_uint16 yytoknum[] = { 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 61, 59, 47, 60, 62, - 58, 44, 124, 92, 40, 41 + 265, 266, 267, 268, 47, 61, 59, 60, 62, 58, + 44, 124, 92, 40, 41 }; # endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 26, 27, 27, 27, 27, 28, 28, 29, 29, - 30, 30, 31, 31, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, - 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, - 40, 40, 41, 41, 41, 42, 42, 42, 42, 43, - 43, 43 + 0, 25, 26, 26, 26, 26, 27, 27, 28, 28, + 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 31, 31, 32, 32, 33, 33, + 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, + 38, 39, 39, 39, 39, 40, 40, 40 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ static const yytype_uint8 yyr2[] = { 0, 2, 0, 2, 2, 2, 3, 3, 2, 1, - 1, 1, 1, 1, 2, 2, 6, 6, 5, 6, - 6, 5, 3, 3, 4, 4, 0, 1, 1, 3, - 0, 3, 1, 3, 1, 3, 1, 3, 1, 3, - 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, - 1, 3 + 1, 1, 2, 2, 6, 6, 5, 6, 6, 5, + 3, 3, 4, 4, 0, 1, 1, 3, 0, 3, + 1, 3, 1, 3, 1, 3, 1, 2, 1, 2, + 2, 1, 1, 2, 2, 1, 1, 3 }; /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state @@ -620,45 +614,43 @@ static const yytype_uint8 yyr2[] = means the default is an error. */ static const yytype_uint8 yydefact[] = { - 2, 0, 1, 3, 49, 9, 30, 50, 26, 0, - 26, 0, 4, 0, 5, 0, 32, 34, 36, 38, - 40, 42, 8, 0, 30, 0, 28, 0, 0, 27, - 15, 0, 0, 49, 0, 0, 14, 0, 0, 0, - 0, 41, 45, 44, 46, 43, 0, 23, 22, 0, - 0, 0, 0, 0, 51, 13, 11, 10, 12, 6, - 7, 35, 33, 37, 39, 47, 48, 31, 24, 25, - 29, 0, 30, 0, 30, 21, 30, 0, 18, 30, - 0, 20, 19, 17, 16 + 2, 0, 1, 3, 45, 9, 28, 46, 24, 0, + 24, 0, 4, 0, 5, 0, 30, 32, 34, 36, + 38, 8, 0, 28, 0, 26, 0, 0, 25, 13, + 0, 0, 45, 0, 0, 12, 0, 0, 0, 37, + 41, 40, 42, 39, 0, 21, 20, 0, 0, 0, + 0, 0, 47, 11, 7, 10, 6, 31, 33, 35, + 43, 44, 29, 22, 23, 27, 0, 28, 0, 28, + 19, 28, 0, 16, 28, 0, 18, 17, 15, 14 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int8 yydefgoto[] = { - -1, 1, 12, 13, 59, 60, 14, 28, 29, 25, - 15, 16, 17, 18, 19, 20, 45, 21 + -1, 1, 12, 13, 56, 14, 27, 28, 24, 15, + 16, 17, 18, 19, 43, 20 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -25 +#define YYPACT_NINF -45 static const yytype_int8 yypact[] = { - -25, 3, -25, -25, -7, -25, -10, -25, -1, 1, - 43, 18, -25, 18, -25, 51, 32, 38, 39, 18, - -25, 29, -25, 44, 49, 60, -25, 47, 48, 50, - -25, 53, 54, -25, 36, 37, -25, 18, 18, 18, - 18, -25, -25, -25, -25, 17, 61, -25, -25, 63, - 65, 66, 10, 11, -25, -25, -25, -25, -25, -25, - -25, 38, 38, 39, 18, -25, -25, -25, -25, -25, - -25, 70, 21, 71, 25, -25, 49, 72, -25, 49, - 73, -25, -25, -25, -25 + -45, 3, -45, -45, 15, -45, 13, -45, 40, 47, + 43, 18, -45, 18, -45, 50, -2, 37, 18, -45, + 36, -45, 19, 45, 57, -45, 46, 48, 49, -45, + 52, 53, -45, -16, 22, -45, 18, 18, 18, -45, + -45, -45, -45, 32, 56, -45, -45, 60, 62, 61, + 10, 11, -45, -45, -45, -45, -45, 51, 37, 18, + -45, -45, -45, -45, -45, -45, 68, 39, 69, 42, + -45, 45, 70, -45, 45, 71, -45, -45, -45, -45 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { - -25, -25, -25, -25, -25, -25, -25, 69, -25, -24, - -15, -25, -9, 41, 42, -18, -25, -25 + -45, -45, -45, -45, -45, -45, 67, -45, -23, -44, + -9, 41, 44, -17, -45, -45 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If @@ -668,43 +660,42 @@ static const yytype_int8 yypgoto[] = #define YYTABLE_NINF -1 static const yytype_uint8 yytable[] = { - 47, 41, 34, 2, 35, 23, 30, 26, 22, 3, - 24, 4, 5, 27, 6, 7, 8, 9, 33, 33, - 65, 10, 7, 7, 71, 73, 33, 11, 61, 62, - 7, 66, 42, 43, 11, 11, 23, 72, 74, 37, - 23, 76, 11, 44, 55, 79, 41, 56, 77, 38, - 80, 26, 81, 57, 58, 83, 36, 31, 39, 39, - 39, 54, 40, 46, 23, 48, 49, 50, 68, 67, - 69, 51, 52, 53, 70, 75, 78, 82, 84, 32, - 63, 0, 64 + 45, 39, 33, 2, 34, 37, 67, 69, 52, 3, + 4, 5, 36, 6, 7, 8, 9, 32, 32, 37, + 10, 7, 7, 66, 68, 32, 11, 57, 22, 7, + 21, 53, 23, 11, 11, 60, 54, 44, 55, 40, + 41, 11, 39, 37, 72, 61, 75, 25, 76, 42, + 25, 78, 29, 26, 22, 35, 30, 22, 71, 38, + 22, 74, 46, 62, 47, 63, 48, 64, 65, 49, + 50, 51, 37, 70, 73, 77, 79, 31, 58, 0, + 0, 0, 59 }; static const yytype_int8 yycheck[] = { - 24, 19, 11, 0, 13, 15, 5, 8, 15, 6, - 20, 8, 9, 14, 11, 12, 13, 14, 8, 8, - 3, 18, 12, 12, 14, 14, 8, 24, 37, 38, - 12, 14, 3, 4, 24, 24, 15, 52, 53, 7, - 15, 20, 24, 14, 7, 20, 64, 10, 72, 17, - 74, 8, 76, 16, 17, 79, 5, 14, 22, 22, - 22, 25, 23, 19, 15, 5, 19, 19, 5, 8, - 5, 21, 19, 19, 8, 5, 5, 5, 5, 10, - 39, -1, 40 + 23, 18, 11, 0, 13, 21, 50, 51, 24, 6, + 7, 8, 14, 10, 11, 12, 13, 7, 7, 21, + 17, 11, 11, 13, 13, 7, 23, 36, 15, 11, + 15, 9, 19, 23, 23, 3, 14, 18, 16, 3, + 4, 23, 59, 21, 67, 13, 69, 7, 71, 13, + 7, 74, 5, 13, 15, 5, 13, 15, 19, 22, + 15, 19, 5, 7, 18, 5, 18, 5, 7, 20, + 18, 18, 21, 5, 5, 5, 5, 10, 37, -1, + -1, -1, 38 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 27, 0, 6, 8, 9, 11, 12, 13, 14, - 18, 24, 28, 29, 32, 36, 37, 38, 39, 40, - 41, 43, 15, 15, 20, 35, 8, 14, 33, 34, - 5, 14, 33, 8, 38, 38, 5, 7, 17, 22, - 23, 41, 3, 4, 14, 42, 19, 35, 5, 19, - 19, 21, 19, 19, 25, 7, 10, 16, 17, 30, - 31, 38, 38, 39, 40, 3, 14, 8, 5, 5, - 8, 14, 36, 14, 36, 5, 20, 35, 5, 20, - 35, 35, 5, 35, 5 + 0, 26, 0, 6, 7, 8, 10, 11, 12, 13, + 17, 23, 27, 28, 30, 34, 35, 36, 37, 38, + 40, 15, 15, 19, 33, 7, 13, 31, 32, 5, + 13, 31, 7, 35, 35, 5, 14, 21, 22, 38, + 3, 4, 13, 39, 18, 33, 5, 18, 18, 20, + 18, 18, 24, 9, 14, 16, 29, 35, 36, 37, + 3, 13, 7, 5, 5, 7, 13, 34, 13, 34, + 5, 19, 33, 5, 19, 33, 33, 5, 33, 5 }; #define yyerrok (yyerrstatus = 0) @@ -1544,7 +1535,7 @@ yyreduce: case 7: { - in->fatal("contexts are not allowed in named definitions"); + in->fatal("trailing contexts are not allowed in named definitions"); ;} break; @@ -1562,7 +1553,7 @@ yyreduce: ;} break; - case 14: + case 12: { if (opts->cFlag) { @@ -1573,7 +1564,7 @@ yyreduce: ;} break; - case 15: + case 13: { if (opts->cFlag) { @@ -1587,14 +1578,14 @@ yyreduce: ;} break; - case 16: + case 14: { context_rule((yyvsp[(2) - (6)].clist), (yyvsp[(6) - (6)].code)->loc, (yyvsp[(4) - (6)].rule), (yyvsp[(6) - (6)].code), (yyvsp[(5) - (6)].str)); ;} break; - case 17: + case 15: { Loc loc(in->get_fname(), in->get_cline()); @@ -1602,7 +1593,7 @@ yyreduce: ;} break; - case 18: + case 16: { RegExpRule *def = new RegExpRule(in->mkDefault()); @@ -1611,7 +1602,7 @@ yyreduce: ;} break; - case 19: + case 17: { context_check(NULL); @@ -1621,7 +1612,7 @@ yyreduce: ;} break; - case 20: + case 18: { context_check(NULL); @@ -1632,7 +1623,7 @@ yyreduce: ;} break; - case 21: + case 19: { RegExpRule *def = new RegExpRule(in->mkDefault()); @@ -1643,7 +1634,7 @@ yyreduce: ;} break; - case 22: + case 20: { context_check(NULL); @@ -1656,7 +1647,7 @@ yyreduce: ;} break; - case 23: + case 21: { context_check(NULL); @@ -1670,7 +1661,7 @@ yyreduce: ;} break; - case 24: + case 22: { CondList *clist = new CondList; @@ -1679,28 +1670,28 @@ yyreduce: ;} break; - case 25: + case 23: { setup_rule((yyvsp[(2) - (4)].clist), (yyvsp[(4) - (4)].code)); ;} break; - case 26: + case 24: { in->fatal("unnamed condition not supported"); ;} break; - case 27: + case 25: { (yyval.clist) = (yyvsp[(1) - (1)].clist); ;} break; - case 28: + case 26: { (yyval.clist) = new CondList(); @@ -1709,7 +1700,7 @@ yyreduce: ;} break; - case 29: + case 27: { (yyvsp[(1) - (3)].clist)->insert(* (yyvsp[(3) - (3)].str)); @@ -1718,108 +1709,84 @@ yyreduce: ;} break; - case 30: + case 28: { (yyval.str) = NULL; ;} break; - case 31: + case 29: { (yyval.str) = (yyvsp[(3) - (3)].str); ;} break; - case 32: - - { - (yyval.rule) = (yyvsp[(1) - (1)].rule); - ;} - break; - - case 33: - - { - // multiple trailing contexts on the same rule are not allowed - (yyval.rule) = (yyvsp[(1) - (3)].rule); - (yyval.rule)->regexps.push_back((yyvsp[(3) - (3)].regexp)); - (yyval.rule)->ctxnames.push_back(NULL); - ;} - break; - - case 34: + case 30: { (yyval.rule) = new RegExpRule((yyvsp[(1) - (1)].regexp)); ;} break; - case 35: + case 31: { - if (!opts->contexts) { - delete (yyvsp[(2) - (3)].str); - in->fatal("non-trailing contexts are only allowed" - " with '-C, --contexts' option"); - } - (yyval.rule) = (yyvsp[(1) - (3)].rule); - (yyval.rule)->regexps.push_back((yyvsp[(3) - (3)].regexp)); - (yyval.rule)->ctxnames.push_back((yyvsp[(2) - (3)].str)); + (yyval.rule) = new RegExpRule(RegExp::cat((yyvsp[(1) - (3)].regexp), RegExp::cat(RegExp::ctx(NULL), (yyvsp[(3) - (3)].regexp)))); ;} break; - case 36: + case 32: { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} break; - case 37: + case 33: { (yyval.regexp) = mkAlt((yyvsp[(1) - (3)].regexp), (yyvsp[(3) - (3)].regexp)); ;} break; - case 38: + case 34: { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} break; - case 39: + case 35: { (yyval.regexp) = in->mkDiff((yyvsp[(1) - (3)].regexp), (yyvsp[(3) - (3)].regexp)); ;} break; - case 40: + case 36: { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} break; - case 41: + case 37: { (yyval.regexp) = RegExp::cat((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp)); ;} break; - case 42: + case 38: { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} break; - case 43: + case 39: { switch((yyvsp[(2) - (2)].op)) @@ -1837,7 +1804,7 @@ yyreduce: ;} break; - case 44: + case 40: { if ((yyvsp[(2) - (2)].extop).max == std::numeric_limits::max()) @@ -1856,35 +1823,35 @@ yyreduce: ;} break; - case 45: + case 41: { (yyval.op) = (yyvsp[(1) - (1)].op); ;} break; - case 46: + case 42: { (yyval.op) = (yyvsp[(1) - (1)].op); ;} break; - case 47: + case 43: { (yyval.op) = ((yyvsp[(1) - (2)].op) == (yyvsp[(2) - (2)].op)) ? (yyvsp[(1) - (2)].op) : '*'; ;} break; - case 48: + case 44: { (yyval.op) = ((yyvsp[(1) - (2)].op) == (yyvsp[(2) - (2)].op)) ? (yyvsp[(1) - (2)].op) : '*'; ;} break; - case 49: + case 45: { symbol_table_t::iterator i = symbol_table.find (* (yyvsp[(1) - (1)].str)); @@ -1897,14 +1864,14 @@ yyreduce: ;} break; - case 50: + case 46: { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} break; - case 51: + case 47: { (yyval.regexp) = (yyvsp[(2) - (3)].regexp); diff --git a/re2c/bootstrap/src/parse/y.tab.h b/re2c/bootstrap/src/parse/y.tab.h index fc9758ab..ee2b08ed 100644 --- a/re2c/bootstrap/src/parse/y.tab.h +++ b/re2c/bootstrap/src/parse/y.tab.h @@ -42,14 +42,13 @@ TOKEN_CLOSESIZE = 259, TOKEN_CODE = 260, TOKEN_CONF = 261, - TOKEN_CTX = 262, - TOKEN_ID = 263, - TOKEN_FID = 264, - TOKEN_FID_END = 265, - TOKEN_NOCOND = 266, - TOKEN_REGEXP = 267, - TOKEN_SETUP = 268, - TOKEN_STAR = 269 + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 }; #endif diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index c331ab60..aae5bdf5 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -210,18 +210,19 @@ void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, } static void subst_contexts(std::string &action, const Rule &rule, - const std::vector &contexts) + const std::vector &vartags, + const std::vector &fixtags) { - for (size_t i = rule.ltag; i < rule.htag; ++i) { - const CtxVar &ctx = contexts[i]; + for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { + const CtxVar &ctx = vartags[i]; strrreplace(action, "@" + *ctx.codename, opts->input_api.expr_ctx(ctx.expr())); } - for (size_t i = 0; i < rule.ctxfix.size(); ++i) { - const CtxFix &ctx = rule.ctxfix[i]; + for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { + const CtxFix &ctx = fixtags[i]; strrreplace(action, "@" + *ctx.codename, - opts->input_api.expr_ctx_fix(ctx, contexts)); + opts->input_api.expr_ctx_fix(ctx, vartags)); } } @@ -236,13 +237,14 @@ void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) case Trail::VAR: if (dfa.base_ctxmarker) { o.wstring(opts->input_api.stmt_restorectx_var_base(ind, - dfa.contexts[trail.pld.var].expr())); + dfa.vartags[trail.var].expr())); } else { o.wstring(opts->input_api.stmt_restorectx_var(ind)); } break; case Trail::FIX: - o.wstring(opts->input_api.stmt_restorectx_fix(ind, trail.pld.fix)); + o.wstring(opts->input_api.stmt_restorectx_fix(ind, + dfa.fixtags[trail.fix].dist)); break; } @@ -259,7 +261,7 @@ void emit_rule(OutputFile &o, uint32_t ind, const DFA &dfa, size_t rule_idx) o.wind(ind).wstring(yySetupRule).ws("\n"); } std::string action = code->text; - subst_contexts(action, rule, dfa.contexts); + subst_contexts(action, rule, dfa.vartags, dfa.fixtags); o.wline_info(code->loc.line, code->loc.filename.c_str()) .wind(ind).wstring(action).ws("\n") .wdelay_line_info(); @@ -396,7 +398,7 @@ void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags) if (tags != 0) { if (dfa.base_ctxmarker) { o.wstring(opts->input_api.stmt_dist(ind, - dfa.tagpool[tags], dfa.contexts)); + dfa.tagpool[tags], dfa.vartags)); } else { o.wstring(opts->input_api.stmt_backupctx(ind)); } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 158e178b..66e3a150 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -145,8 +145,8 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra std::set ctxnames; if (base_ctxmarker) { - for (size_t i = 0; i < contexts.size(); ++i) { - ctxnames.insert(contexts[i].name()); + for (size_t i = 0; i < vartags.size(); ++i) { + ctxnames.insert(vartags[i].name()); } ob.contexts.insert(ctxnames.begin(), ctxnames.end()); } diff --git a/re2c/src/codegen/go_emit.cc b/re2c/src/codegen/go_emit.cc index 6b9cb3da..e86b70f2 100644 --- a/re2c/src/codegen/go_emit.cc +++ b/re2c/src/codegen/go_emit.cc @@ -221,7 +221,7 @@ void Dot::emit(OutputFile &o, const DFA &dfa) const bool *tags = dfa.tagpool[c.tags]; for (size_t j = 0; j < dfa.tagpool.ntags; ++j) { if (tags[j]) { - o.ws("<").wstring(dfa.contexts[j].name()).ws(">"); + o.ws("<").wstring(dfa.vartags[j].name()).ws(">"); } } o.ws("\"]\n"); diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index cce2aed0..2e4cb308 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -36,7 +36,8 @@ DFA::DFA , nStates(0) , head(NULL) , rules(dfa.rules) - , contexts(dfa.contexts) + , vartags(dfa.vartags) + , fixtags(dfa.fixtags) , tagpool(dfa.tagpool) // statistics @@ -116,7 +117,8 @@ DFA::~DFA() delete skeleton; delete &rules; - delete &contexts; + delete &vartags; + delete &fixtags; delete &tagpool; } diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 0c72b8b2..e19fefa8 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -67,7 +67,8 @@ struct DFA uint32_t nStates; State * head; std::valarray &rules; - std::vector &contexts; + std::vector &vartags; + std::vector &fixtags; Tagpool &tagpool; size_t max_fill; bool need_backup; diff --git a/re2c/src/ir/ctx.cc b/re2c/src/ir/ctx.cc index 51382181..c079f4b2 100644 --- a/re2c/src/ir/ctx.cc +++ b/re2c/src/ir/ctx.cc @@ -9,12 +9,13 @@ namespace re2c { -CtxVar::CtxVar(const std::string *n, size_t idx) - : codename(n) +CtxVar::CtxVar(const std::string *n, size_t r) + : rule(r) + , codename(n) , uniqname() { std::ostringstream s; - s << idx; + s << rule; if (codename != NULL) { s << *codename; } diff --git a/re2c/src/ir/ctx.h b/re2c/src/ir/ctx.h index 7aba08db..06b92bdb 100644 --- a/re2c/src/ir/ctx.h +++ b/re2c/src/ir/ctx.h @@ -8,16 +8,19 @@ namespace re2c struct CtxVar { + size_t rule; const std::string *codename; std::string uniqname; - CtxVar(const std::string *n, size_t idx); + CtxVar(const std::string *n, size_t r); CtxVar(const CtxVar &ctx) - : codename(ctx.codename) + : rule(ctx.rule) + , codename(ctx.codename) , uniqname(ctx.uniqname) {} CtxVar& operator=(const CtxVar &ctx) { + rule = ctx.rule; codename = ctx.codename; uniqname = ctx.uniqname; return *this; @@ -30,12 +33,14 @@ struct CtxFix { static const size_t RIGHTMOST; + size_t rule; const std::string *codename; size_t base; size_t dist; - CtxFix(const std::string *n, size_t b, size_t d) - : codename(n) + CtxFix(const std::string *n, size_t r, size_t b, size_t d) + : rule(r) + , codename(n) , base(b) , dist(d) {} @@ -47,19 +52,19 @@ struct Trail union { size_t var; - size_t fix; // trailing means rightmost; no need for base - } pld; + size_t fix; + }; - Trail(): type(NONE), pld() {} + Trail(): type(NONE) {} void make_var(size_t v) { type = VAR; - pld.var = v; + var = v; } void make_fix(size_t f) { type = FIX; - pld.fix = f; + fix = f; } }; diff --git a/re2c/src/ir/dfa/context_deduplication.cc b/re2c/src/ir/dfa/context_deduplication.cc index 45d9afa3..084cd950 100644 --- a/re2c/src/ir/dfa/context_deduplication.cc +++ b/re2c/src/ir/dfa/context_deduplication.cc @@ -53,7 +53,7 @@ static void calc_live(const dfa_t &dfa, visited[i] = true; dfa_state_t *s = dfa.states[i]; - const size_t ntags = dfa.contexts.size(); + const size_t ntags = dfa.vartags.size(); // add tags before recursing to child states, // so that tags propagate into loopbacks to this state @@ -89,7 +89,7 @@ static void mask_dead(dfa_t &dfa, const bool *livetags) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.contexts.size(); + const size_t ntags = dfa.vartags.size(); for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { @@ -130,7 +130,7 @@ static void incompatibility_table(const dfa_t &dfa, bool *incompattbl) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.contexts.size(); + const size_t ntags = dfa.vartags.size(); for (size_t i = 0; i < nstates; ++i) { const dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { @@ -237,16 +237,16 @@ static void patch_tags(dfa_t &dfa, const std::vector &represent) s->rule_tags = patch_tagset(dfa.tagpool, s->rule_tags, represent); } - const size_t ntags = dfa.contexts.size(); + const size_t ntags = dfa.vartags.size(); for (size_t i = 0; i < ntags; ++i) { - dfa.contexts[i].uniqname = dfa.contexts[represent[i]].uniqname; + dfa.vartags[i].uniqname = dfa.vartags[represent[i]].uniqname; } } size_t deduplicate_contexts(dfa_t &dfa, const std::vector &fallback) { - const size_t ntags = dfa.contexts.size(); + const size_t ntags = dfa.vartags.size(); if (ntags == 0) { return 0; } diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 21e0cb89..0f982050 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -136,10 +136,11 @@ dfa_t::dfa_t( : states() , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) - , contexts(nfa.contexts) - , tagpool(*new Tagpool(contexts.size())) + , vartags(nfa.vartags) + , fixtags(nfa.fixtags) + , tagpool(*new Tagpool(vartags.size())) { - const size_t ntags = contexts.size(); + const size_t ntags = vartags.size(); const size_t nrules = rules.size(); const size_t mask_size = (nchars + 1) * ntags; @@ -227,7 +228,7 @@ dfa_t::dfa_t( for (size_t i = 0; i < ntags; ++i) { if (badtags[i]) { // TODO: use rule line, add rule reference to context struct - warn.selfoverlapping_contexts(line, cond, contexts[i]); + warn.selfoverlapping_contexts(line, cond, vartags[i]); } } diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 4b0c86c4..f34b3028 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -46,7 +46,8 @@ struct dfa_t std::vector states; const size_t nchars; std::valarray &rules; - std::vector &contexts; + std::vector &vartags; + std::vector &fixtags; Tagpool &tagpool; dfa_t(const nfa_t &nfa, const charset_t &charset, diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc new file mode 100644 index 00000000..58fe0ccf --- /dev/null +++ b/re2c/src/ir/nfa/init_rules.cc @@ -0,0 +1,106 @@ +#include + +#include "src/conf/msg.h" +#include "src/ir/nfa/nfa.h" + +namespace re2c { + +static void fatal_tags_in_trail(uint32_t line) +{ + error("line %u: tags in trailing context", line); + exit(1); +} + +static void assert_no_tags_in_trailing_context(const Rule &rule, + const std::vector &vartags, + const std::vector &fixtags) +{ + const uint32_t line = rule.info->loc.line; + // rule tags should not contain other trailing contexts + for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { + if (fixtags[i].codename == NULL) { + fatal_tags_in_trail(line); + } + } + for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { + if (vartags[i].codename == NULL) { + fatal_tags_in_trail(line); + } + } + // fixed trailing context must be fixed on cursor + if (rule.trail.type == Trail::FIX + && fixtags[rule.trail.fix].base != CtxFix::RIGHTMOST) { + fatal_tags_in_trail(line); + } +} + +static void fatal_tag_reuse(uint32_t line, const char *tag) +{ + error("line %u: tag '%s' is used multiple times in the same rule", line, tag); + exit(1); +} + +static void assert_tags_used_once(const Rule &rule, + const std::vector &vartags, + const std::vector &fixtags) +{ + const uint32_t line = rule.info->loc.line; + std::set names; + for (size_t i = rule.lfixtag; i < rule.hfixtag; ++i) { + const std::string *name = fixtags[i].codename; + if (name && !names.insert(*name).second) { + fatal_tag_reuse(line, name->c_str()); + } + } + for (size_t i = rule.lvartag; i < rule.hvartag; ++i) { + const std::string *name = vartags[i].codename; + if (name && !names.insert(*name).second) { + fatal_tag_reuse(line, name->c_str()); + } + } +} + +void init_rules(std::valarray &rules, + const std::vector ®exps, + const std::vector &vartags, + const std::vector &fixtags) +{ + const size_t nf = fixtags.size(); + const size_t nv = vartags.size(); + const size_t nr = rules.size(); + + for (size_t r = 0, f = 0, v = 0; r < nr; ++r) { + Rule &rule = rules[r]; + rule.info = regexps[r]->info; + rule.nullable = nullable_rule(regexps[r]); + + rule.lfixtag = f; + for (; f < nf && fixtags[f].rule == r; ++f); + rule.hfixtag = f; + + rule.lvartag = v; + for (; v < nv && vartags[v].rule == r; ++v); + rule.hvartag = v; + + // mark *all* variable tags, including trailing context + rule.tags = new bool[nv](); + for (size_t t = rule.lvartag; t < rule.hvartag; ++t) { + rule.tags[t] = true; + } + + // tags in trailing context are forbidden (they make no sense), + // and since tags are constructed in reversed order, this implies + // that trailing context, if present, can only be the first tag + if (rule.lfixtag < rule.hfixtag && fixtags[rule.lfixtag].codename == NULL) { + rule.trail.make_fix(rule.lfixtag++); + } else if (rule.lvartag < rule.hvartag && vartags[rule.lvartag].codename == NULL) { + rule.trail.make_var(rule.lvartag++); + } + + // sanity checks + assert_no_tags_in_trailing_context(rule, vartags, fixtags); + assert_tags_used_once(rule, vartags, fixtags); + } +} + +} // namespace re2c diff --git a/re2c/src/ir/nfa/make_tags.cc b/re2c/src/ir/nfa/make_tags.cc new file mode 100644 index 00000000..b96be8e7 --- /dev/null +++ b/re2c/src/ir/nfa/make_tags.cc @@ -0,0 +1,94 @@ +#include + +#include "src/conf/opt.h" +#include "src/ir/nfa/nfa.h" +#include "src/globals.h" + +namespace re2c { + +static const size_t VARDIST = std::numeric_limits::max(); + +static void make_tags_var(size_t nrule, + std::vector &vartags, + const RegExp *re, size_t &dist) +{ + switch (re->tag) { + case RegExp::NIL: break; + case RegExp::SYM: + if (dist != VARDIST) { + ++dist; + } + break; + case RegExp::ALT: { + size_t d1 = dist, d2 = dist; + make_tags_var(nrule, vartags, re->pld.alt.re1, d1); + make_tags_var(nrule, vartags, re->pld.alt.re2, d2); + dist = (d1 == d2) ? d1 : VARDIST; + break; + } + case RegExp::CAT: + make_tags_var(nrule, vartags, re->pld.cat.re2, dist); + make_tags_var(nrule, vartags, re->pld.cat.re1, dist); + break; + case RegExp::ITER: + dist = VARDIST; + make_tags_var(nrule, vartags, re->pld.iter.re, dist); + break; + case RegExp::TAG: + (size_t&)re->pld.ctx.idx = vartags.size(); + vartags.push_back(CtxVar(re->pld.ctx.name, nrule)); + break; + } +} + +static void make_tags_var_fix(size_t nrule, + std::vector &vartags, std::vector &fixtags, + const RegExp *re, size_t &dist, size_t &base) +{ + switch (re->tag) { + case RegExp::NIL: + case RegExp::SYM: + case RegExp::ALT: + case RegExp::ITER: + make_tags_var(nrule, vartags, re, dist); + break; + case RegExp::CAT: + make_tags_var_fix(nrule, vartags, fixtags, re->pld.cat.re2, dist, base); + make_tags_var_fix(nrule, vartags, fixtags, re->pld.cat.re1, dist, base); + break; + case RegExp::TAG: { + const std::string *name = re->pld.ctx.name; + if (dist == VARDIST) { + base = (size_t&)re->pld.ctx.idx = vartags.size(); + vartags.push_back(CtxVar(name, nrule)); + dist = 0; + } else { + fixtags.push_back(CtxFix(name, nrule, base, dist)); + } + if (name == NULL) { + dist = 0; + } + break; + } + } +} + +void make_tags(const std::vector &rs, + std::vector &vartags, std::vector &fixtags) +{ + const size_t nrs = rs.size(); + for (size_t i = 0; i < nrs; ++i) { + size_t base = CtxFix::RIGHTMOST, dist = 0; + // don't optimize fixed-length trailing context with generic API + // unless tags are explicitly enabled: generic API needs base tag + // to restore fixed-length trailing context, and base existence + // is only guaranteed if tags are mandatory + if (!opts->contexts && opts->input_api.type() == InputAPI::CUSTOM) { + dist = VARDIST; + } + make_tags_var_fix(i, vartags, fixtags, rs[i]->re, dist, base); + } + +} + +} // namespace re2c diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index ad12dd08..85a92bc3 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -1,232 +1,19 @@ -#include "src/conf/opt.h" #include "src/ir/nfa/nfa.h" -#include "src/ir/regexp/regexp.h" -#include "src/globals.h" namespace re2c { -static size_t calc_size(const RegExp *re) -{ - switch (re->tag) { - case RegExp::NIL: - return 0; - case RegExp::SYM: - return 1; - case RegExp::ALT: - return calc_size(re->pld.alt.re1) - + calc_size(re->pld.alt.re2) - + 1; - case RegExp::CAT: - return calc_size(re->pld.cat.re1) - + calc_size(re->pld.cat.re2); - case RegExp::ITER: - return calc_size(re->pld.iter.re) - + 1; - default: - assert(false); - } -} - -static size_t calc_size_all(const std::vector &rs) -{ - size_t size = rs.size() - 1; - for (size_t i = 0; i < rs.size(); ++i) { - const std::vector ®exps = rs[i]->regexps; - for (size_t j = 0; j < regexps.size(); ++j) { - size += calc_size(regexps[j]); - } - size += regexps.size(); - } - return size; -} - -static nfa_state_t *compile(const RegExp *re, size_t rule, nfa_t &nfa, nfa_state_t *t) -{ - nfa_state_t *s = NULL; - switch (re->tag) { - case RegExp::NIL: - s = t; - break; - case RegExp::SYM: - s = &nfa.states[nfa.size++]; - s->ran(rule, t, re->pld.sym.range); - break; - case RegExp::ALT: - s = &nfa.states[nfa.size++]; - s->alt(rule, - compile(re->pld.alt.re1, rule, nfa, t), - compile(re->pld.alt.re2, rule, nfa, t)); - break; - case RegExp::CAT: - s = compile(re->pld.cat.re2, rule, nfa, t); - s = compile(re->pld.cat.re1, rule, nfa, s); - break; - case RegExp::ITER: - s = &nfa.states[nfa.size++]; - s->alt(rule, t, compile(re->pld.iter.re, rule, nfa, s)); - break; - } - return s; -} - -static uint32_t fixlen(const RegExp *re) -{ - switch (re->tag) { - case RegExp::NIL: - return 0; - case RegExp::SYM: - return 1; - case RegExp::ALT: { - const uint32_t l1 = fixlen(re->pld.alt.re1); - const uint32_t l2 = fixlen(re->pld.alt.re2); - return l1 == l2 ? l1 : ~0u; - } - case RegExp::CAT: { - const uint32_t l1 = fixlen(re->pld.cat.re1); - if (l1 == ~0u) { - return ~0u; - } - const uint32_t l2 = fixlen(re->pld.cat.re2); - if (l2 == ~0u) { - return ~0u; - } - return l1 + l2; - } - case RegExp::ITER: - return ~0u; - default: - assert(false); - } -} - -static bool nullable(const RegExp *re) -{ - switch (re->tag) { - case RegExp::NIL: - return true; - case RegExp::SYM: - return false; - case RegExp::ALT: - return nullable(re->pld.alt.re1) - || nullable(re->pld.alt.re2); - case RegExp::CAT: - return nullable(re->pld.cat.re1) - && nullable(re->pld.cat.re2); - case RegExp::ITER: - return true; - default: - assert(false); - } -} - -static nfa_state_t *compile_rule( - const RegExpRule *rule, - nfa_t &nfa, - size_t nrule) -{ - const std::vector &rs = rule->regexps; - const std::vector &ctxnames = rule->ctxnames; - const size_t nctxs = ctxnames.size(); - assert(rs.size() == nctxs + 1); - - std::vector dist(nctxs + 1, 0); - std::vector base(nctxs + 1, nctxs); - // right-fix all contexts - for (size_t i = nctxs; i > 0; --i) { - const size_t fl = fixlen(rs[i]); - - // generic API needs base to restore fixed-length trailing contexts - // and base is only guaranteed when non-trailing contexts are enabled - const bool lack_api = !opts->contexts // => every context is trailing - && opts->input_api.type() == InputAPI::CUSTOM; - - if (fl == ~0u || lack_api) { - base[i - 1] = i - 1; - } else { - base[i - 1] = base[i]; - dist[i - 1] = fl; - if (i < nctxs && ctxnames[i] != NULL) { - dist[i - 1] += dist[i]; - } - } - } - - Rule &r = nfa.rules[nrule]; - r.info = rule->info; - - nfa_state_t *t = &nfa.states[nfa.size++]; - t->fin(nrule); - - r.ltag = nfa.contexts.size(); - std::vector &ctxfix = r.ctxfix; - Trail &trail = r.trail; - // base2var is filled in right-to-left, this is crucial - std::vector base2var(nctxs + 1, CtxFix::RIGHTMOST); - for (size_t i = nctxs; i > 0; --i) { - t = compile(rs[i], nrule, nfa, t); - const std::string *name = ctxnames[i - 1]; - if (base[i - 1] == i - 1) { - const size_t idx = nfa.contexts.size(); - base2var[i - 1] = idx; - nfa.contexts.push_back(CtxVar(name, nrule)); - if (name == NULL) { - ++r.ltag; - trail.make_var(idx); - } - nfa_state_t *q = &nfa.states[nfa.size++]; - q->ctx(nrule, t, idx); - t = q; - } else { - if (name != NULL) { - CtxFix ctx(name, base2var[base[i - 1]], dist[i - 1]); - ctxfix.push_back(ctx); - } else { - trail.make_fix(dist[i - 1]); - } - } - } - r.htag = nfa.contexts.size(); - t = compile(rs[0], nrule, nfa, t); - - bool null = nullable(rs[0]); - for (size_t i = 0; i < nctxs && null && ctxnames[i] != NULL; ++i) { - null = nullable(rs[i + 1]); - } - r.nullable = null; - - return t; -} - -static nfa_state_t *compile_rules( - const std::vector &rs, - nfa_t &nfa) -{ - nfa_state_t *s = NULL; - const size_t nrs = rs.size(); - if (nrs > 0) { - s = compile_rule(rs[0], nfa, 0); - for (size_t i = 1; i < nrs; ++i) { - nfa_state_t *q = &nfa.states[nfa.size++]; - q->alt(i, s, compile_rule(rs[i], nfa, i)); - s = q; - } - } - return s; -} - -nfa_t::nfa_t(const std::vector &rs) - : max_size(calc_size_all(rs)) +nfa_t::nfa_t(const std::vector ®exps) + : max_size(sizeof_regexps(regexps)) , size(0) , states(new nfa_state_t[max_size]) - , rules(*new std::valarray(rs.size())) - , contexts(*new std::vector) - , root(compile_rules(rs, *this)) -{ - const size_t nrules = rules.size(); - const size_t ntags = contexts.size(); - for (size_t i = 0; i < nrules; ++i) { - init_tags(rules[i], ntags); - } + , rules(*new std::valarray(regexps.size())) + , vartags(*new std::vector) + , fixtags(*new std::vector) + , root(NULL) +{ + make_tags(regexps, vartags, fixtags); + regexps2nfa(regexps, *this); + init_rules(rules, regexps, vartags, fixtags); } nfa_t::~nfa_t() diff --git a/re2c/src/ir/nfa/nfa.h b/re2c/src/ir/nfa/nfa.h index 9d7d4fd2..ec247a27 100644 --- a/re2c/src/ir/nfa/nfa.h +++ b/re2c/src/ir/nfa/nfa.h @@ -6,16 +6,13 @@ #include #include +#include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" #include "src/util/forbid_copy.h" namespace re2c { -struct Range; -struct RegExp; -struct RuleInfo; - struct nfa_state_t { enum type_t @@ -84,7 +81,8 @@ struct nfa_t size_t size; nfa_state_t *states; std::valarray &rules; - std::vector &contexts; + std::vector &vartags; + std::vector &fixtags; nfa_state_t *root; nfa_t(const std::vector &rs); @@ -93,6 +91,16 @@ struct nfa_t FORBID_COPY(nfa_t); }; +size_t sizeof_regexps(const std::vector ®exps); +void make_tags(const std::vector &rs, + std::vector &vartags, std::vector &fixtags); +void regexps2nfa(const std::vector &rs, nfa_t &nfa); +bool nullable_rule(const RegExpRule *rule); +void init_rules(std::valarray &rules, + const std::vector ®exps, + const std::vector &vartags, + const std::vector &fixtags); + } // namespace re2c #endif // _RE2C_IR_NFA_NFA_ diff --git a/re2c/src/ir/nfa/nullable.cc b/re2c/src/ir/nfa/nullable.cc new file mode 100644 index 00000000..a1e88681 --- /dev/null +++ b/re2c/src/ir/nfa/nullable.cc @@ -0,0 +1,38 @@ +#include "src/ir/nfa/nfa.h" + +namespace re2c { + +static bool nullable(const RegExp *re, bool &trail) +{ + if (trail) { + return true; + } + switch (re->tag) { + case RegExp::NIL: + case RegExp::ITER: + return true; + case RegExp::TAG: + if (re->pld.ctx.name == NULL) { + trail = true; + } + return true; + case RegExp::SYM: + return false; + case RegExp::ALT: + return nullable(re->pld.alt.re1, trail) + || nullable(re->pld.alt.re2, trail); + case RegExp::CAT: + return nullable(re->pld.cat.re1, trail) + && nullable(re->pld.cat.re2, trail); + default: + assert(false); + } +} + +bool nullable_rule(const RegExpRule *rule) +{ + bool trail = false; + return nullable(rule->re, trail); +} + +} // namespace re2c diff --git a/re2c/src/ir/nfa/regexps2nfa.cc b/re2c/src/ir/nfa/regexps2nfa.cc new file mode 100644 index 00000000..e74f4a11 --- /dev/null +++ b/re2c/src/ir/nfa/regexps2nfa.cc @@ -0,0 +1,68 @@ +#include "src/ir/nfa/nfa.h" + +namespace re2c { + +static nfa_state_t *regexp2nfa(nfa_t &nfa, size_t nrule, const RegExp *re, nfa_state_t *t) +{ + nfa_state_t *s = NULL; + switch (re->tag) { + case RegExp::NIL: + s = t; + break; + case RegExp::SYM: + s = &nfa.states[nfa.size++]; + s->ran(nrule, t, re->pld.sym.range); + break; + case RegExp::ALT: + s = &nfa.states[nfa.size++]; + s->alt(nrule, + regexp2nfa(nfa, nrule, re->pld.alt.re1, t), + regexp2nfa(nfa, nrule, re->pld.alt.re2, t)); + break; + case RegExp::CAT: + s = regexp2nfa(nfa, nrule, re->pld.cat.re2, t); + s = regexp2nfa(nfa, nrule, re->pld.cat.re1, s); + break; + case RegExp::ITER: + s = &nfa.states[nfa.size++]; + s->alt(nrule, t, regexp2nfa(nfa, nrule, re->pld.iter.re, s)); + break; + case RegExp::TAG: { + const size_t idx = re->pld.ctx.idx; + if (idx != ~0u) { + s = &nfa.states[nfa.size++]; + s->ctx(nrule, t, idx); + } else { + s = t; + } + break; + } + } + return s; +} + +static nfa_state_t *regexp2nfa_rule(nfa_t &nfa, size_t nrule, const RegExpRule *rule) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->fin(nrule); + return regexp2nfa(nfa, nrule, rule->re, s); +} + +void regexps2nfa(const std::vector &rs, nfa_t &nfa) +{ + const size_t nrs = rs.size(); + + if (nrs == 0) { + return; + } + + nfa_state_t *s = regexp2nfa_rule(nfa, 0, rs[0]); + for (size_t i = 1; i < nrs; ++i) { + nfa_state_t *t = &nfa.states[nfa.size++]; + t->alt(i, s, regexp2nfa_rule(nfa, i, rs[i])); + s = t; + } + nfa.root = s; +} + +} // namespace re2c diff --git a/re2c/src/ir/nfa/sizeof_regexps.cc b/re2c/src/ir/nfa/sizeof_regexps.cc new file mode 100644 index 00000000..eba02b0e --- /dev/null +++ b/re2c/src/ir/nfa/sizeof_regexps.cc @@ -0,0 +1,39 @@ +#include "src/ir/nfa/nfa.h" + +namespace re2c { + +static size_t sizeof_regexp(const RegExp *re) +{ + switch (re->tag) { + case RegExp::NIL: + return 0; + case RegExp::SYM: + return 1; + case RegExp::ALT: + return sizeof_regexp(re->pld.alt.re1) + + sizeof_regexp(re->pld.alt.re2) + + 1; + case RegExp::CAT: + return sizeof_regexp(re->pld.cat.re1) + + sizeof_regexp(re->pld.cat.re2); + case RegExp::ITER: + return sizeof_regexp(re->pld.iter.re) + + 1; + case RegExp::TAG: + return 1; + default: + assert(false); + } +} + +size_t sizeof_regexps(const std::vector ®exps) +{ + const size_t nregexps = regexps.size(); + size_t size = nregexps - 1; + for (size_t i = 0; i < nregexps; ++i) { + size += sizeof_regexp(regexps[i]->re) + 1; + } + return size; +} + +} // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h index f88e329e..1ce22786 100644 --- a/re2c/src/ir/regexp/regexp.h +++ b/re2c/src/ir/regexp/regexp.h @@ -26,7 +26,8 @@ struct RegExp SYM, ALT, CAT, - ITER + ITER, + TAG }; union payload_t { @@ -48,6 +49,11 @@ struct RegExp { const RegExp *re; } iter; + struct + { + const std::string *name; + size_t idx; + } ctx; }; static free_list flist; @@ -85,6 +91,13 @@ struct RegExp re->pld.iter.re = r; return re; } + static const RegExp *ctx(const std::string *n) + { + RegExp *re = new RegExp(TAG); + re->pld.ctx.name = n; + re->pld.ctx.idx = ~0u; + return re; + } inline ~RegExp() { flist.erase(this); @@ -101,22 +114,17 @@ struct RegExpRule { static free_list flist; - std::vector regexps; - std::vector ctxnames; + const RegExp *re; RuleInfo *info; RegExpRule(const RegExp* r) - : regexps(1, r) - , ctxnames() + : re(r) , info(NULL) { flist.insert(this); } ~RegExpRule() { - for (size_t i = 0; i < ctxnames.size(); ++i) { - delete ctxnames[i]; - } delete info; flist.erase(this); } diff --git a/re2c/src/ir/regexp/split_charset.cc b/re2c/src/ir/regexp/split_charset.cc index 27619660..687c5033 100644 --- a/re2c/src/ir/regexp/split_charset.cc +++ b/re2c/src/ir/regexp/split_charset.cc @@ -10,6 +10,7 @@ static void split(const RegExp* re, std::set &cs) { switch (re->tag) { case RegExp::NIL: + case RegExp::TAG: break; case RegExp::SYM: for (const Range *r = re->pld.sym.range; r; r = r->next()) { @@ -33,11 +34,9 @@ static void split(const RegExp* re, std::set &cs) void split(const std::vector &rs, std::set &cs) { - for (size_t i = 0; i < rs.size(); ++i) { - const std::vector ®exps = rs[i]->regexps; - for (size_t j = 0; j < regexps.size(); ++j) { - split(regexps[j], cs); - } + const size_t nrs = rs.size(); + for (size_t i = 0; i < nrs; ++i) { + split(rs[i]->re, cs); } } diff --git a/re2c/src/ir/rule.cc b/re2c/src/ir/rule.cc index 6d962fe8..6b55421d 100644 --- a/re2c/src/ir/rule.cc +++ b/re2c/src/ir/rule.cc @@ -7,16 +7,4 @@ namespace re2c const size_t Rule::NONE = std::numeric_limits::max(); -void init_tags(Rule &rule, size_t ntags) -{ - bool *tags = new bool[ntags](); - for (size_t t = rule.ltag; t < rule.htag; ++t) { - tags[t] = true; - } - if (rule.trail.type == Trail::VAR) { - tags[rule.trail.pld.var] = true; - } - rule.tags = tags; -} - } // namespace re2c diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index 9b6a461a..ddab677c 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -35,9 +35,10 @@ struct Rule const RuleInfo *info; - size_t ltag; - size_t htag; - std::vector ctxfix; + size_t lvartag; + size_t hvartag; + size_t lfixtag; + size_t hfixtag; Trail trail; bool nullable; bool *tags; @@ -46,9 +47,10 @@ struct Rule Rule() : info(NULL) - , ltag(0) - , htag(0) - , ctxfix() + , lvartag(0) + , hvartag(0) + , lfixtag(0) + , hfixtag(0) , trail() , nullable(false) , tags(NULL) @@ -63,8 +65,6 @@ struct Rule FORBID_COPY(Rule); }; -void init_tags(Rule &rule, size_t ntags); - } // namespace re2c #endif // _RE2C_IR_RULE_ diff --git a/re2c/src/ir/skeleton/path.h b/re2c/src/ir/skeleton/path.h index 89185afb..418487c7 100644 --- a/re2c/src/ir/skeleton/path.h +++ b/re2c/src/ir/skeleton/path.h @@ -55,9 +55,9 @@ public: case Trail::NONE: return len; case Trail::FIX: - return len - trail.pld.fix; + return len - skel.fixtags[trail.fix].dist; case Trail::VAR: { - const size_t ctx = trail.pld.var; + const size_t ctx = trail.var; for (; tail != head; ++tail) { if (skel.nodes[*tail].tags[ctx]) { return static_cast(head - tail) - 1; diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index d6193831..6684fef5 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -69,7 +69,7 @@ Skeleton::Skeleton( , sizeof_key(8) , rules(dfa.rules) , defrule(def) - , contexts(dfa.contexts) + , fixtags(dfa.fixtags) { const size_t nc = cs.size() - 1; const size_t ntags = dfa.tagpool.ntags; diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index dd80c03e..90f0ea5c 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -59,7 +59,7 @@ struct Skeleton size_t sizeof_key; std::valarray &rules; const size_t defrule; - std::vector &contexts; + std::vector &fixtags; Skeleton(const dfa_t &dfa, const charset_t &cs, size_t def, const std::string &dfa_name, const std::string &dfa_cond, diff --git a/re2c/src/parse/lex.re b/re2c/src/parse/lex.re index 006c0b5a..0968c024 100644 --- a/re2c/src/parse/lex.re +++ b/re2c/src/parse/lex.re @@ -273,8 +273,12 @@ start: } "@" name { - yylval.str = new std::string(tok + 1, tok_len() - 1); - return TOKEN_CTX; + if (!opts->contexts) { + fatal("tags are only allowed with '-T, --tags' option"); + } + const std::string *name = new std::string(tok + 1, tok_len() - 1); + yylval.regexp = RegExp::ctx(name); + return TOKEN_REGEXP; } [<>,()|=;/\\] { diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index bfc4665b..3b8e7342 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -136,7 +136,6 @@ void default_rule(CondList *clist, RegExpRule *rule) %token TOKEN_CLOSESIZE %token TOKEN_CODE %token TOKEN_CONF -%token TOKEN_CTX %token TOKEN_ID %token TOKEN_FID %token TOKEN_FID_END @@ -149,9 +148,9 @@ void default_rule(CondList *clist, RegExpRule *rule) %type TOKEN_CLOSESIZE %type TOKEN_CODE %type TOKEN_REGEXP rule expr diff term factor primary -%type TOKEN_ID TOKEN_FID TOKEN_CTX newcond name +%type TOKEN_ID TOKEN_FID newcond name %type cond clist -%type ctxexpr trailexpr +%type trailexpr %% @@ -171,8 +170,8 @@ def delete $1; } /* errors */ - | name expr ctx { - in->fatal("contexts are not allowed in named definitions"); + | name expr '/' { + in->fatal("trailing contexts are not allowed in named definitions"); }; name @@ -185,8 +184,6 @@ name enddef: ';' | TOKEN_FID_END; -ctx: '/' | TOKEN_CTX; - rule : trailexpr TOKEN_CODE { if (opts->cFlag) { @@ -317,31 +314,12 @@ newcond: ; trailexpr - : ctxexpr { - $$ = $1; - } - - | ctxexpr '/' expr { - // multiple trailing contexts on the same rule are not allowed - $$ = $1; - $$->regexps.push_back($3); - $$->ctxnames.push_back(NULL); - }; - -ctxexpr : expr { $$ = new RegExpRule($1); } - | ctxexpr TOKEN_CTX expr { - if (!opts->contexts) { - delete $2; - in->fatal("non-trailing contexts are only allowed" - " with '-C, --contexts' option"); - } - $$ = $1; - $$->regexps.push_back($3); - $$->ctxnames.push_back($2); + | expr '/' expr { + $$ = new RegExpRule(RegExp::cat($1, RegExp::cat(RegExp::ctx(NULL), $3))); }; expr: diff --git a/re2c/test/contexts/syntax_error0.c b/re2c/test/contexts/syntax_error0.c index 76488666..2ee3bbf3 100644 --- a/re2c/test/contexts/syntax_error0.c +++ b/re2c/test/contexts/syntax_error0.c @@ -1 +1 @@ -re2c: error: line 2, column 13: contexts are not allowed in named definitions +re2c: error: line 2, column 13: tags are only allowed with '-T, --tags' option diff --git a/re2c/test/error12.c b/re2c/test/error12.c index 801aca9d..3ddbb09b 100644 --- a/re2c/test/error12.c +++ b/re2c/test/error12.c @@ -1 +1 @@ -re2c: error: line 3, column 17: contexts are not allowed in named definitions +re2c: error: line 3, column 17: trailing contexts are not allowed in named definitions