From cd4c7d830be98d7f957a084593bdeeac1b4f3496 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 19 Aug 2015 18:49:28 +0100 Subject: [PATCH] Parse unquoted flex-like strings in lexer. Such strings can only contain ASCII letters, digits and uderscore: no escapes. So there's very little parsing to do: just map code units directly to code points. --- re2c/bootstrap/src/parse/scanner_lex.cc | 219 ++++++++++++------------ re2c/src/ir/regexp/regexp.cc | 33 ---- re2c/src/parse/scanner.h | 2 - re2c/src/parse/scanner_lex.re | 11 +- 4 files changed, 110 insertions(+), 155 deletions(-) diff --git a/re2c/bootstrap/src/parse/scanner_lex.cc b/re2c/bootstrap/src/parse/scanner_lex.cc index f907bfc0..88f14d5f 100644 --- a/re2c/bootstrap/src/parse/scanner_lex.cc +++ b/re2c/bootstrap/src/parse/scanner_lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Wed Aug 19 17:56:07 2015 */ +/* Generated by re2c 0.14.3 on Wed Aug 19 18:44:26 2015 */ #line 1 "../src/parse/scanner_lex.re" #include #include @@ -712,7 +712,7 @@ start: yy112: ++YYCURSOR; yy113: -#line 397 "../src/parse/scanner_lex.re" +#line 392 "../src/parse/scanner_lex.re" { fatalf("unexpected character: '%c'", *tok); goto scan; @@ -723,7 +723,7 @@ yy114: yych = (YYCTYPE)*YYCURSOR; goto yy224; yy115: -#line 381 "../src/parse/scanner_lex.re" +#line 376 "../src/parse/scanner_lex.re" { goto scan; } @@ -738,7 +738,7 @@ yy116: if (yych == '#') goto yy204; } yy117: -#line 390 "../src/parse/scanner_lex.re" +#line 385 "../src/parse/scanner_lex.re" { if (cur == eof) return 0; pos = cur; @@ -786,7 +786,7 @@ yy126: #line 787 "src/parse/scanner_lex.cc" yy128: ++YYCURSOR; -#line 376 "../src/parse/scanner_lex.re" +#line 371 "../src/parse/scanner_lex.re" { yylval.regexp = mkDot(); return RANGE; @@ -976,20 +976,15 @@ yy158: yylval.str = new std::string (tok, tok_len ()); return ID; } else { - /* Add one char in front and one behind instead of 's or "s */ - SubStr s (tok, tok_len ()); - if (bCaseInsensitive || bCaseInverted) - { - yylval.regexp = strToCaseInsensitiveRE (s); - } - else + for (char * p = tok; p < cur; ++p) { - yylval.regexp = strToRE (s); + cpoints.push_back (static_cast (*p)); } + yylval.regexp = cpoint_string (cpoints, bCaseInsensitive || bCaseInverted); return STRING; } } -#line 993 "src/parse/scanner_lex.cc" +#line 988 "src/parse/scanner_lex.cc" yy159: yych = (YYCTYPE)*++YYCURSOR; goto yy178; @@ -1002,7 +997,7 @@ yy161: yylval.str = new std::string (tok, tok_len ()); return ID; } -#line 1006 "src/parse/scanner_lex.cc" +#line 1001 "src/parse/scanner_lex.cc" yy162: YYCTXMARKER = YYCURSOR + 1; ++YYCURSOR; @@ -1088,7 +1083,7 @@ yy170: yylval.str = new std::string (tok, tok_len ()); return CONFIG; } -#line 1092 "src/parse/scanner_lex.cc" +#line 1087 "src/parse/scanner_lex.cc" yy171: yych = (YYCTYPE)*++YYCURSOR; if (yych <= 'Z') { @@ -1160,7 +1155,7 @@ yy175: return ID; } } -#line 1164 "src/parse/scanner_lex.cc" +#line 1159 "src/parse/scanner_lex.cc" yy177: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1179,14 +1174,14 @@ yy180: ++YYCURSOR; #line 268 "../src/parse/scanner_lex.re" { quote = ']'; negated_class = true; goto cpoints; } -#line 1183 "src/parse/scanner_lex.cc" +#line 1178 "src/parse/scanner_lex.cc" yy182: ++YYCURSOR; #line 273 "../src/parse/scanner_lex.re" { return SETUP; } -#line 1190 "src/parse/scanner_lex.cc" +#line 1185 "src/parse/scanner_lex.cc" yy184: YYCTXMARKER = YYCURSOR + 1; yych = (YYCTYPE)*++YYCURSOR; @@ -1220,7 +1215,7 @@ yy187: { return NOCOND; } -#line 1224 "src/parse/scanner_lex.cc" +#line 1219 "src/parse/scanner_lex.cc" yy189: yych = (YYCTYPE)*++YYCURSOR; if (yych == '>') goto yy187; @@ -1238,7 +1233,7 @@ yy191: depth = 0; goto code; } -#line 1242 "src/parse/scanner_lex.cc" +#line 1237 "src/parse/scanner_lex.cc" yy193: ++YYCURSOR; YYCURSOR = YYCTXMARKER; @@ -1246,7 +1241,7 @@ yy193: { return *tok; } -#line 1250 "src/parse/scanner_lex.cc" +#line 1245 "src/parse/scanner_lex.cc" yy195: ++YYCURSOR; #line 255 "../src/parse/scanner_lex.re" @@ -1254,14 +1249,14 @@ yy195: depth = 1; goto comment; } -#line 1258 "src/parse/scanner_lex.cc" +#line 1253 "src/parse/scanner_lex.cc" yy197: ++YYCURSOR; #line 252 "../src/parse/scanner_lex.re" { goto nextLine; } -#line 1265 "src/parse/scanner_lex.cc" +#line 1260 "src/parse/scanner_lex.cc" yy199: ++YYCURSOR; #line 261 "../src/parse/scanner_lex.re" @@ -1269,7 +1264,7 @@ yy199: tok = cur; return 0; } -#line 1273 "src/parse/scanner_lex.cc" +#line 1268 "src/parse/scanner_lex.cc" yy201: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1362,12 +1357,12 @@ yy214: yy216: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 385 "../src/parse/scanner_lex.re" +#line 380 "../src/parse/scanner_lex.re" { set_sourceline (); goto scan; } -#line 1371 "src/parse/scanner_lex.cc" +#line 1366 "src/parse/scanner_lex.cc" yy218: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy216; @@ -1400,12 +1395,12 @@ yy224: if (yych == ' ') goto yy223; goto yy115; } -#line 401 "../src/parse/scanner_lex.re" +#line 396 "../src/parse/scanner_lex.re" flex_name: -#line 1409 "src/parse/scanner_lex.cc" +#line 1404 "src/parse/scanner_lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1414,22 +1409,22 @@ flex_name: if (yych == '\r') goto yy231; ++YYCURSOR; yy228: -#line 412 "../src/parse/scanner_lex.re" +#line 407 "../src/parse/scanner_lex.re" { YYCURSOR = tok; goto start; } -#line 1423 "src/parse/scanner_lex.cc" +#line 1418 "src/parse/scanner_lex.cc" yy229: ++YYCURSOR; yy230: -#line 406 "../src/parse/scanner_lex.re" +#line 401 "../src/parse/scanner_lex.re" { YYCURSOR = tok; lexer_state = LEX_NORMAL; return FID_END; } -#line 1433 "src/parse/scanner_lex.cc" +#line 1428 "src/parse/scanner_lex.cc" yy231: yych = (YYCTYPE)*++YYCURSOR; if (yych != '\n') goto yy228; @@ -1437,13 +1432,13 @@ yy231: yych = (YYCTYPE)*YYCURSOR; goto yy230; } -#line 416 "../src/parse/scanner_lex.re" +#line 411 "../src/parse/scanner_lex.re" cpoints: tok = cur; -#line 1447 "src/parse/scanner_lex.cc" +#line 1442 "src/parse/scanner_lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1492,12 +1487,12 @@ cpoints: } } } -#line 423 "../src/parse/scanner_lex.re" +#line 418 "../src/parse/scanner_lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 1498 "src/parse/scanner_lex.cc" +#line 1493 "src/parse/scanner_lex.cc" yy237: ++YYCURSOR; -#line 439 "../src/parse/scanner_lex.re" +#line 434 "../src/parse/scanner_lex.re" { const char c = tok[0]; if (c == quote) @@ -1521,10 +1516,10 @@ yy237: goto cpoints; } } -#line 1525 "src/parse/scanner_lex.cc" +#line 1520 "src/parse/scanner_lex.cc" yy239: ++YYCURSOR; -#line 429 "../src/parse/scanner_lex.re" +#line 424 "../src/parse/scanner_lex.re" { const char c = tok[1]; if (c != quote) @@ -1534,16 +1529,16 @@ yy239: cpoints.push_back (static_cast (c)); goto cpoints; } -#line 1538 "src/parse/scanner_lex.cc" +#line 1533 "src/parse/scanner_lex.cc" yy241: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '/') goto yy242; if (yych <= '7') goto yy264; yy242: -#line 422 "../src/parse/scanner_lex.re" +#line 417 "../src/parse/scanner_lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 1547 "src/parse/scanner_lex.cc" +#line 1542 "src/parse/scanner_lex.cc" yy243: yych = (YYCTYPE)*++YYCURSOR; goto yy242; @@ -1559,9 +1554,9 @@ yy244: if (yych <= 'f') goto yy257; } yy245: -#line 421 "../src/parse/scanner_lex.re" +#line 416 "../src/parse/scanner_lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 1565 "src/parse/scanner_lex.cc" +#line 1560 "src/parse/scanner_lex.cc" yy246: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1577,9 +1572,9 @@ yy246: } yy247: ++YYCURSOR; -#line 427 "../src/parse/scanner_lex.re" +#line 422 "../src/parse/scanner_lex.re" { cpoints.push_back (unesc_escapable (tok)); goto cpoints; } -#line 1583 "src/parse/scanner_lex.cc" +#line 1578 "src/parse/scanner_lex.cc" yy249: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1610,9 +1605,9 @@ yy251: } yy252: ++YYCURSOR; -#line 425 "../src/parse/scanner_lex.re" +#line 420 "../src/parse/scanner_lex.re" { cpoints.push_back (unesc_hex (tok, cur)); goto cpoints; } -#line 1616 "src/parse/scanner_lex.cc" +#line 1611 "src/parse/scanner_lex.cc" yy254: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { @@ -1722,16 +1717,16 @@ yy264: if (yych <= '/') goto yy251; if (yych >= '8') goto yy251; ++YYCURSOR; -#line 426 "../src/parse/scanner_lex.re" +#line 421 "../src/parse/scanner_lex.re" { cpoints.push_back (unesc_oct (tok, cur)); goto cpoints; } -#line 1728 "src/parse/scanner_lex.cc" +#line 1723 "src/parse/scanner_lex.cc" } -#line 462 "../src/parse/scanner_lex.re" +#line 457 "../src/parse/scanner_lex.re" code: -#line 1735 "src/parse/scanner_lex.cc" +#line 1730 "src/parse/scanner_lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1792,7 +1787,7 @@ code: } yy269: ++YYCURSOR; -#line 525 "../src/parse/scanner_lex.re" +#line 520 "../src/parse/scanner_lex.re" { if (cur == eof) { @@ -1804,15 +1799,15 @@ yy269: } goto code; } -#line 1808 "src/parse/scanner_lex.cc" +#line 1803 "src/parse/scanner_lex.cc" yy271: ++YYCURSOR; yy272: -#line 539 "../src/parse/scanner_lex.re" +#line 534 "../src/parse/scanner_lex.re" { goto code; } -#line 1816 "src/parse/scanner_lex.cc" +#line 1811 "src/parse/scanner_lex.cc" yy273: YYCTXMARKER = YYCURSOR + 1; yyaccept = 0; @@ -1832,7 +1827,7 @@ yy273: } } yy274: -#line 506 "../src/parse/scanner_lex.re" +#line 501 "../src/parse/scanner_lex.re" { if (depth == 0) { @@ -1852,7 +1847,7 @@ yy274: cline++; goto code; } -#line 1856 "src/parse/scanner_lex.cc" +#line 1851 "src/parse/scanner_lex.cc" yy275: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1865,7 +1860,7 @@ yy276: goto yy282; yy277: ++YYCURSOR; -#line 478 "../src/parse/scanner_lex.re" +#line 473 "../src/parse/scanner_lex.re" { if (depth == 0) { @@ -1877,10 +1872,10 @@ yy277: } goto code; } -#line 1881 "src/parse/scanner_lex.cc" +#line 1876 "src/parse/scanner_lex.cc" yy279: ++YYCURSOR; -#line 466 "../src/parse/scanner_lex.re" +#line 461 "../src/parse/scanner_lex.re" { if (depth == 0) { @@ -1893,7 +1888,7 @@ yy279: } goto code; } -#line 1897 "src/parse/scanner_lex.cc" +#line 1892 "src/parse/scanner_lex.cc" yy281: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1924,11 +1919,11 @@ yy284: goto yy281; yy285: ++YYCURSOR; -#line 536 "../src/parse/scanner_lex.re" +#line 531 "../src/parse/scanner_lex.re" { goto code; } -#line 1932 "src/parse/scanner_lex.cc" +#line 1927 "src/parse/scanner_lex.cc" yy287: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1955,7 +1950,7 @@ yy290: } yy291: YYCURSOR = YYCTXMARKER; -#line 493 "../src/parse/scanner_lex.re" +#line 488 "../src/parse/scanner_lex.re" { if (depth == 0) { @@ -1969,7 +1964,7 @@ yy291: cline++; goto code; } -#line 1973 "src/parse/scanner_lex.cc" +#line 1968 "src/parse/scanner_lex.cc" yy292: yych = (YYCTYPE)*++YYCURSOR; goto yy291; @@ -2036,12 +2031,12 @@ yy303: yy305: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 489 "../src/parse/scanner_lex.re" +#line 484 "../src/parse/scanner_lex.re" { set_sourceline (); goto code; } -#line 2045 "src/parse/scanner_lex.cc" +#line 2040 "src/parse/scanner_lex.cc" yy307: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy305; @@ -2080,12 +2075,12 @@ yy312: goto yy283; } } -#line 542 "../src/parse/scanner_lex.re" +#line 537 "../src/parse/scanner_lex.re" comment: -#line 2089 "src/parse/scanner_lex.cc" +#line 2084 "src/parse/scanner_lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2132,7 +2127,7 @@ comment: } ++YYCURSOR; yy317: -#line 574 "../src/parse/scanner_lex.re" +#line 569 "../src/parse/scanner_lex.re" { if (cur == eof) { @@ -2140,7 +2135,7 @@ yy317: } goto comment; } -#line 2144 "src/parse/scanner_lex.cc" +#line 2139 "src/parse/scanner_lex.cc" yy318: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yybm[0+yych] & 32) { @@ -2148,7 +2143,7 @@ yy318: } if (yych == '#') goto yy329; yy319: -#line 565 "../src/parse/scanner_lex.re" +#line 560 "../src/parse/scanner_lex.re" { if (cur == eof) { @@ -2158,7 +2153,7 @@ yy319: cline++; goto comment; } -#line 2162 "src/parse/scanner_lex.cc" +#line 2157 "src/parse/scanner_lex.cc" yy320: yych = (YYCTYPE)*++YYCURSOR; if (yych == '/') goto yy324; @@ -2167,16 +2162,16 @@ yy321: yych = (YYCTYPE)*++YYCURSOR; if (yych != '*') goto yy317; ++YYCURSOR; -#line 556 "../src/parse/scanner_lex.re" +#line 551 "../src/parse/scanner_lex.re" { ++depth; fatal("ambiguous /* found"); goto comment; } -#line 2177 "src/parse/scanner_lex.cc" +#line 2172 "src/parse/scanner_lex.cc" yy324: ++YYCURSOR; -#line 546 "../src/parse/scanner_lex.re" +#line 541 "../src/parse/scanner_lex.re" { if (--depth == 0) { @@ -2187,7 +2182,7 @@ yy324: goto comment; } } -#line 2191 "src/parse/scanner_lex.cc" +#line 2186 "src/parse/scanner_lex.cc" yy326: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -2265,12 +2260,12 @@ yy339: yy341: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 561 "../src/parse/scanner_lex.re" +#line 556 "../src/parse/scanner_lex.re" { set_sourceline (); goto comment; } -#line 2274 "src/parse/scanner_lex.cc" +#line 2269 "src/parse/scanner_lex.cc" yy343: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy341; @@ -2295,28 +2290,28 @@ yy347: if (yych == '\r') goto yy343; goto yy328; } -#line 581 "../src/parse/scanner_lex.re" +#line 576 "../src/parse/scanner_lex.re" nextLine: -#line 2304 "src/parse/scanner_lex.cc" +#line 2299 "src/parse/scanner_lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == '\n') goto yy352; ++YYCURSOR; -#line 592 "../src/parse/scanner_lex.re" +#line 587 "../src/parse/scanner_lex.re" { if(cur == eof) { return 0; } goto nextLine; } -#line 2317 "src/parse/scanner_lex.cc" +#line 2312 "src/parse/scanner_lex.cc" yy352: ++YYCURSOR; -#line 585 "../src/parse/scanner_lex.re" +#line 580 "../src/parse/scanner_lex.re" { if(cur == eof) { return 0; } @@ -2324,14 +2319,14 @@ yy352: cline++; goto scan; } -#line 2328 "src/parse/scanner_lex.cc" +#line 2323 "src/parse/scanner_lex.cc" } -#line 597 "../src/parse/scanner_lex.re" +#line 592 "../src/parse/scanner_lex.re" config: -#line 2335 "src/parse/scanner_lex.cc" +#line 2330 "src/parse/scanner_lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2377,32 +2372,32 @@ config: if (yych == '=') goto yy360; } ++YYCURSOR; -#line 608 "../src/parse/scanner_lex.re" +#line 603 "../src/parse/scanner_lex.re" { fatal("missing '='"); } -#line 2385 "src/parse/scanner_lex.cc" +#line 2380 "src/parse/scanner_lex.cc" yy358: ++YYCURSOR; yych = (YYCTYPE)*YYCURSOR; goto yy365; yy359: -#line 601 "../src/parse/scanner_lex.re" +#line 596 "../src/parse/scanner_lex.re" { goto config; } -#line 2395 "src/parse/scanner_lex.cc" +#line 2390 "src/parse/scanner_lex.cc" yy360: ++YYCURSOR; yych = (YYCTYPE)*YYCURSOR; goto yy363; yy361: -#line 604 "../src/parse/scanner_lex.re" +#line 599 "../src/parse/scanner_lex.re" { lexer_state = LEX_CONFIG_VALUE; return '='; } -#line 2406 "src/parse/scanner_lex.cc" +#line 2401 "src/parse/scanner_lex.cc" yy362: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -2421,12 +2416,12 @@ yy365: if (yych == ' ') goto yy364; goto yy359; } -#line 611 "../src/parse/scanner_lex.re" +#line 606 "../src/parse/scanner_lex.re" value: -#line 2430 "src/parse/scanner_lex.cc" +#line 2425 "src/parse/scanner_lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2493,20 +2488,20 @@ value: } } yy368: -#line 623 "../src/parse/scanner_lex.re" +#line 618 "../src/parse/scanner_lex.re" { yylval.str = new std::string (tok, tok_len ()); lexer_state = LEX_NORMAL; return VALUE; } -#line 2503 "src/parse/scanner_lex.cc" +#line 2498 "src/parse/scanner_lex.cc" yy369: ++YYCURSOR; if (yybm[0+(yych = (YYCTYPE)*YYCURSOR)] & 8) { goto yy374; } yy370: -#line 615 "../src/parse/scanner_lex.re" +#line 610 "../src/parse/scanner_lex.re" { if (!s_to_i32_unsafe (tok, cur, yylval.number)) { @@ -2515,7 +2510,7 @@ yy370: lexer_state = LEX_NORMAL; return NUMBER; } -#line 2519 "src/parse/scanner_lex.cc" +#line 2514 "src/parse/scanner_lex.cc" yy371: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '0') goto yy375; @@ -2658,7 +2653,7 @@ yy389: if (yych == '\n') goto yy382; goto yy386; } -#line 628 "../src/parse/scanner_lex.re" +#line 623 "../src/parse/scanner_lex.re" } @@ -2681,7 +2676,7 @@ void Scanner::set_sourceline () sourceline: tok = cur; -#line 2685 "src/parse/scanner_lex.cc" +#line 2680 "src/parse/scanner_lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2730,14 +2725,14 @@ sourceline: yy392: ++YYCURSOR; yy393: -#line 673 "../src/parse/scanner_lex.re" +#line 668 "../src/parse/scanner_lex.re" { goto sourceline; } -#line 2738 "src/parse/scanner_lex.cc" +#line 2733 "src/parse/scanner_lex.cc" yy394: ++YYCURSOR; -#line 661 "../src/parse/scanner_lex.re" +#line 656 "../src/parse/scanner_lex.re" { if (cur == eof) { @@ -2750,7 +2745,7 @@ yy394: tok = cur; return; } -#line 2754 "src/parse/scanner_lex.cc" +#line 2749 "src/parse/scanner_lex.cc" yy396: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych == '\n') goto yy393; @@ -2760,7 +2755,7 @@ yy397: yych = (YYCTYPE)*YYCURSOR; goto yy400; yy398: -#line 650 "../src/parse/scanner_lex.re" +#line 645 "../src/parse/scanner_lex.re" { if (!s_to_u32_unsafe (tok, cur, cline)) { @@ -2768,7 +2763,7 @@ yy398: } goto sourceline; } -#line 2772 "src/parse/scanner_lex.cc" +#line 2767 "src/parse/scanner_lex.cc" yy399: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -2800,14 +2795,14 @@ yy404: goto yy401; yy405: ++YYCURSOR; -#line 657 "../src/parse/scanner_lex.re" +#line 652 "../src/parse/scanner_lex.re" { escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } -#line 2809 "src/parse/scanner_lex.cc" +#line 2804 "src/parse/scanner_lex.cc" } -#line 676 "../src/parse/scanner_lex.re" +#line 671 "../src/parse/scanner_lex.re" } diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index 22e9ed97..fa2f46fd 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -123,39 +123,6 @@ RegExp * Scanner::matchSymbol(uint32_t c) const return new MatchOp (Range::sym (c)); } -RegExp * Scanner::strToRE (SubStr & s) const -{ - if (s.len == 0) - return new NullOp; - - RegExp *re = matchSymbol(unescape(s)); - - while (s.len > 0) - re = new CatOp(re, matchSymbol(unescape(s))); - - return re; -} - -RegExp * Scanner::strToCaseInsensitiveRE (SubStr & s) const -{ - RegExp * r = NULL; - while (s.len > 0) - { - const uint32_t c = unescape (s); - if (is_alpha (c)) - { - RegExp * rl = matchSymbol (to_lower_unsafe (c)); - RegExp * ru = matchSymbol (to_upper_unsafe (c)); - r = doCat (r, mkAlt (rl, ru)); - } - else - { - r = doCat (r, matchSymbol (c)); - } - } - return r ? r : new NullOp; -} - RegExp * Scanner::matchSymbolRange(Range * r) const { if (!r) diff --git a/re2c/src/parse/scanner.h b/re2c/src/parse/scanner.h index 7deba777..d7d8ba50 100644 --- a/re2c/src/parse/scanner.h +++ b/re2c/src/parse/scanner.h @@ -98,8 +98,6 @@ public: RegExp * matchSymbol (uint32_t c) const; RegExp * matchSymbolRange (Range * r) const; - RegExp * strToRE (SubStr & s) const; - RegExp * strToCaseInsensitiveRE (SubStr & s) const; RegExp * mkDiff (RegExp * e1, RegExp * e2) const; RegExp * mkDot () const; RegExp * mkDefault () const; diff --git a/re2c/src/parse/scanner_lex.re b/re2c/src/parse/scanner_lex.re index f1966730..e152ebc0 100644 --- a/re2c/src/parse/scanner_lex.re +++ b/re2c/src/parse/scanner_lex.re @@ -359,16 +359,11 @@ start: yylval.str = new std::string (tok, tok_len ()); return ID; } else { - /* Add one char in front and one behind instead of 's or "s */ - SubStr s (tok, tok_len ()); - if (bCaseInsensitive || bCaseInverted) + for (char * p = tok; p < cur; ++p) { - yylval.regexp = strToCaseInsensitiveRE (s); - } - else - { - yylval.regexp = strToRE (s); + cpoints.push_back (static_cast (*p)); } + yylval.regexp = cpoint_string (cpoints, bCaseInsensitive || bCaseInverted); return STRING; } } -- 2.40.0