From 26110738a05bae11661ea7ff0027a85087de6335 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 9 Nov 2015 16:06:40 +0000 Subject: [PATCH] Recognize escaped dash '\-' in character class. --- re2c/bootstrap/src/parse/lex.cc | 998 ++++++++++++++++++--------- re2c/bootstrap/src/parse/lex_conf.cc | 4 +- re2c/src/ir/regexp/regexp.cc | 29 - re2c/src/parse/lex.re | 113 +-- re2c/src/parse/lex_conf.re | 2 +- re2c/src/parse/scanner.h | 4 +- re2c/src/parse/unescape.cc | 17 - re2c/src/parse/unescape.h | 1 - re2c/test/bug1479044.b.c | 1 - re2c/test/bug1479044.c | 1 - re2c/test/bug1479044.s.c | 1 - re2c/test/cpoint_class_esc_dash.c | 24 + re2c/test/cpoint_class_esc_dash.re | 3 + 13 files changed, 792 insertions(+), 406 deletions(-) create mode 100644 re2c/test/cpoint_class_esc_dash.c create mode 100644 re2c/test/cpoint_class_esc_dash.re diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index 78d9212b..93e16cfc 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Mon Oct 12 15:10:45 2015 */ +/* Generated by re2c 0.14.3 on Mon Nov 9 16:04:50 2015 */ #line 1 "../src/parse/lex.re" #include #include @@ -707,7 +707,7 @@ start: yy112: ++YYCURSOR; yy113: -#line 404 "../src/parse/lex.re" +#line 393 "../src/parse/lex.re" { fatalf("unexpected character: '%c'", *tok); goto scan; @@ -718,7 +718,7 @@ yy114: yych = (YYCTYPE)*YYCURSOR; goto yy220; yy115: -#line 388 "../src/parse/lex.re" +#line 377 "../src/parse/lex.re" { goto scan; } @@ -733,7 +733,7 @@ yy116: if (yych == '#') goto yy200; } yy117: -#line 397 "../src/parse/lex.re" +#line 386 "../src/parse/lex.re" { if (cur == eof) return 0; pos = cur; @@ -750,7 +750,7 @@ yy119: #line 265 "../src/parse/lex.re" { std::vector cpoints; - lex_cpoints ('"', cpoints); + lex_str ('"', cpoints); yylval.regexp = cpoint_string (cpoints, opts->bCaseInsensitive || opts->bCaseInverted); return REGEXP; } @@ -764,7 +764,7 @@ yy122: #line 258 "../src/parse/lex.re" { std::vector cpoints; - lex_cpoints ('\'', cpoints); + lex_str ('\'', cpoints); yylval.regexp = cpoint_string (cpoints, opts->bCaseInsensitive || !opts->bCaseInverted); return REGEXP; } @@ -772,7 +772,7 @@ yy122: yy124: ++YYCURSOR; yy125: -#line 292 "../src/parse/lex.re" +#line 281 "../src/parse/lex.re" { return *tok; } @@ -780,7 +780,7 @@ yy125: yy126: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy195; -#line 296 "../src/parse/lex.re" +#line 285 "../src/parse/lex.re" { yylval.op = *tok; return STAR; @@ -788,7 +788,7 @@ yy126: #line 789 "src/parse/lex.cc" yy128: ++YYCURSOR; -#line 300 "../src/parse/lex.re" +#line 289 "../src/parse/lex.re" { yylval.op = *tok; return CLOSE; @@ -796,7 +796,7 @@ yy128: #line 797 "src/parse/lex.cc" yy130: ++YYCURSOR; -#line 383 "../src/parse/lex.re" +#line 372 "../src/parse/lex.re" { yylval.regexp = mkDot(); return REGEXP; @@ -826,13 +826,8 @@ yy136: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy176; #line 272 "../src/parse/lex.re" - { - std::vector cpoints; - lex_cpoints (']', cpoints); - yylval.regexp = cpoint_class (cpoints, false); - return REGEXP; - } -#line 836 "src/parse/lex.cc" + { yylval.regexp = lex_cls(false); return REGEXP; } +#line 831 "src/parse/lex.cc" yy138: YYCTXMARKER = YYCURSOR + 1; yych = (YYCTYPE)*++YYCURSOR; @@ -861,7 +856,7 @@ yy140: depth = 1; goto code; } -#line 865 "src/parse/lex.cc" +#line 860 "src/parse/lex.cc" yy141: ++YYCURSOR; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -910,14 +905,14 @@ yy144: yy146: ++YYCURSOR; yy147: -#line 336 "../src/parse/lex.re" +#line 325 "../src/parse/lex.re" { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } -#line 918 "src/parse/lex.cc" +#line 913 "src/parse/lex.cc" yy148: ++YYCURSOR; -#line 340 "../src/parse/lex.re" +#line 329 "../src/parse/lex.re" { if (!opts->FFlag) { fatal("curly braces for names only allowed with -F switch"); @@ -925,10 +920,10 @@ yy148: yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces return ID; } -#line 929 "src/parse/lex.cc" +#line 924 "src/parse/lex.cc" yy150: ++YYCURSOR; -#line 305 "../src/parse/lex.re" +#line 294 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) { @@ -937,7 +932,7 @@ yy150: yylval.extop.max = yylval.extop.min; return CLOSESIZE; } -#line 941 "src/parse/lex.cc" +#line 936 "src/parse/lex.cc" yy152: yyaccept = 3; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -945,7 +940,7 @@ yy152: if (yych <= '9') goto yy155; if (yych != '}') goto yy147; ++YYCURSOR; -#line 327 "../src/parse/lex.re" +#line 316 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) { @@ -954,7 +949,7 @@ yy152: yylval.extop.max = UINT32_MAX; return CLOSESIZE; } -#line 958 "src/parse/lex.cc" +#line 953 "src/parse/lex.cc" yy155: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -963,7 +958,7 @@ yy155: if (yych <= '9') goto yy155; if (yych != '}') goto yy143; ++YYCURSOR; -#line 314 "../src/parse/lex.re" +#line 303 "../src/parse/lex.re" { const char * p = strchr (tok, ','); if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) @@ -976,11 +971,11 @@ yy155: } return CLOSESIZE; } -#line 980 "src/parse/lex.cc" +#line 975 "src/parse/lex.cc" yy159: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 368 "../src/parse/lex.re" +#line 357 "../src/parse/lex.re" { if (!opts->FFlag) { yylval.str = new std::string (tok, tok_len ()); @@ -995,7 +990,7 @@ yy159: return REGEXP; } } -#line 999 "src/parse/lex.cc" +#line 994 "src/parse/lex.cc" yy161: yych = (YYCTYPE)*++YYCURSOR; goto yy174; @@ -1003,12 +998,12 @@ yy162: ++YYCURSOR; yy163: YYCURSOR = YYCTXMARKER; -#line 363 "../src/parse/lex.re" +#line 352 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); return ID; } -#line 1012 "src/parse/lex.cc" +#line 1007 "src/parse/lex.cc" yy164: YYCTXMARKER = YYCURSOR + 1; ++YYCURSOR; @@ -1053,13 +1048,13 @@ yy166: yych = (YYCTYPE)*++YYCURSOR; if (yych != ':') goto yy165; ++YYCURSOR; -#line 348 "../src/parse/lex.re" +#line 337 "../src/parse/lex.re" { lex_conf (); return CONF; } -#line 1059 "src/parse/lex.cc" +#line 1054 "src/parse/lex.cc" yy171: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 350 "../src/parse/lex.re" +#line 339 "../src/parse/lex.re" { yylval.str = new std::string (tok, tok_len ()); if (opts->FFlag) @@ -1072,7 +1067,7 @@ yy171: return ID; } } -#line 1076 "src/parse/lex.cc" +#line 1071 "src/parse/lex.cc" yy173: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1089,21 +1084,16 @@ yy175: goto yy163; yy176: ++YYCURSOR; -#line 279 "../src/parse/lex.re" - { - std::vector cpoints; - lex_cpoints (']', cpoints); - yylval.regexp = cpoint_class (cpoints, true); - return REGEXP; - } -#line 1100 "src/parse/lex.cc" +#line 273 "../src/parse/lex.re" + { yylval.regexp = lex_cls(true); return REGEXP; } +#line 1090 "src/parse/lex.cc" yy178: ++YYCURSOR; -#line 289 "../src/parse/lex.re" +#line 278 "../src/parse/lex.re" { return SETUP; } -#line 1107 "src/parse/lex.cc" +#line 1097 "src/parse/lex.cc" yy180: YYCTXMARKER = YYCURSOR + 1; yych = (YYCTYPE)*++YYCURSOR; @@ -1133,11 +1123,11 @@ yy182: yy183: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 286 "../src/parse/lex.re" +#line 275 "../src/parse/lex.re" { return NOCOND; } -#line 1141 "src/parse/lex.cc" +#line 1131 "src/parse/lex.cc" yy185: yych = (YYCTYPE)*++YYCURSOR; if (yych == '>') goto yy183; @@ -1155,7 +1145,7 @@ yy187: depth = 0; goto code; } -#line 1159 "src/parse/lex.cc" +#line 1149 "src/parse/lex.cc" yy189: ++YYCURSOR; YYCURSOR = YYCTXMARKER; @@ -1163,7 +1153,7 @@ yy189: { return *tok; } -#line 1167 "src/parse/lex.cc" +#line 1157 "src/parse/lex.cc" yy191: ++YYCURSOR; #line 246 "../src/parse/lex.re" @@ -1171,14 +1161,14 @@ yy191: depth = 1; goto comment; } -#line 1175 "src/parse/lex.cc" +#line 1165 "src/parse/lex.cc" yy193: ++YYCURSOR; #line 243 "../src/parse/lex.re" { goto nextLine; } -#line 1182 "src/parse/lex.cc" +#line 1172 "src/parse/lex.cc" yy195: ++YYCURSOR; #line 252 "../src/parse/lex.re" @@ -1186,7 +1176,7 @@ yy195: tok = cur; return 0; } -#line 1190 "src/parse/lex.cc" +#line 1180 "src/parse/lex.cc" yy197: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1279,12 +1269,12 @@ yy210: yy212: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 392 "../src/parse/lex.re" +#line 381 "../src/parse/lex.re" { set_sourceline (); goto scan; } -#line 1288 "src/parse/lex.cc" +#line 1278 "src/parse/lex.cc" yy214: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy212; @@ -1317,12 +1307,12 @@ yy220: if (yych == ' ') goto yy219; goto yy115; } -#line 408 "../src/parse/lex.re" +#line 397 "../src/parse/lex.re" flex_name: -#line 1326 "src/parse/lex.cc" +#line 1316 "src/parse/lex.cc" { YYCTYPE yych; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); @@ -1331,22 +1321,22 @@ flex_name: if (yych == '\r') goto yy227; ++YYCURSOR; yy224: -#line 419 "../src/parse/lex.re" +#line 408 "../src/parse/lex.re" { YYCURSOR = tok; goto start; } -#line 1340 "src/parse/lex.cc" +#line 1330 "src/parse/lex.cc" yy225: ++YYCURSOR; yy226: -#line 413 "../src/parse/lex.re" +#line 402 "../src/parse/lex.re" { YYCURSOR = tok; lexer_state = LEX_NORMAL; return FID_END; } -#line 1350 "src/parse/lex.cc" +#line 1340 "src/parse/lex.cc" yy227: yych = (YYCTYPE)*++YYCURSOR; if (yych != '\n') goto yy224; @@ -1354,12 +1344,12 @@ yy227: yych = (YYCTYPE)*YYCURSOR; goto yy226; } -#line 423 "../src/parse/lex.re" +#line 412 "../src/parse/lex.re" code: -#line 1363 "src/parse/lex.cc" +#line 1353 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1420,7 +1410,7 @@ code: } yy231: ++YYCURSOR; -#line 486 "../src/parse/lex.re" +#line 475 "../src/parse/lex.re" { if (cur == eof) { @@ -1432,15 +1422,15 @@ yy231: } goto code; } -#line 1436 "src/parse/lex.cc" +#line 1426 "src/parse/lex.cc" yy233: ++YYCURSOR; yy234: -#line 500 "../src/parse/lex.re" +#line 489 "../src/parse/lex.re" { goto code; } -#line 1444 "src/parse/lex.cc" +#line 1434 "src/parse/lex.cc" yy235: YYCTXMARKER = YYCURSOR + 1; yyaccept = 0; @@ -1460,7 +1450,7 @@ yy235: } } yy236: -#line 467 "../src/parse/lex.re" +#line 456 "../src/parse/lex.re" { if (depth == 0) { @@ -1480,7 +1470,7 @@ yy236: cline++; goto code; } -#line 1484 "src/parse/lex.cc" +#line 1474 "src/parse/lex.cc" yy237: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); @@ -1493,7 +1483,7 @@ yy238: goto yy244; yy239: ++YYCURSOR; -#line 439 "../src/parse/lex.re" +#line 428 "../src/parse/lex.re" { if (depth == 0) { @@ -1505,10 +1495,10 @@ yy239: } goto code; } -#line 1509 "src/parse/lex.cc" +#line 1499 "src/parse/lex.cc" yy241: ++YYCURSOR; -#line 427 "../src/parse/lex.re" +#line 416 "../src/parse/lex.re" { if (depth == 0) { @@ -1521,7 +1511,7 @@ yy241: } goto code; } -#line 1525 "src/parse/lex.cc" +#line 1515 "src/parse/lex.cc" yy243: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1552,11 +1542,11 @@ yy246: goto yy243; yy247: ++YYCURSOR; -#line 497 "../src/parse/lex.re" +#line 486 "../src/parse/lex.re" { goto code; } -#line 1560 "src/parse/lex.cc" +#line 1550 "src/parse/lex.cc" yy249: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1583,7 +1573,7 @@ yy252: } yy253: YYCURSOR = YYCTXMARKER; -#line 454 "../src/parse/lex.re" +#line 443 "../src/parse/lex.re" { if (depth == 0) { @@ -1597,7 +1587,7 @@ yy253: cline++; goto code; } -#line 1601 "src/parse/lex.cc" +#line 1591 "src/parse/lex.cc" yy254: yych = (YYCTYPE)*++YYCURSOR; goto yy253; @@ -1664,12 +1654,12 @@ yy265: yy267: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 450 "../src/parse/lex.re" +#line 439 "../src/parse/lex.re" { set_sourceline (); goto code; } -#line 1673 "src/parse/lex.cc" +#line 1663 "src/parse/lex.cc" yy269: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy267; @@ -1708,12 +1698,12 @@ yy274: goto yy245; } } -#line 503 "../src/parse/lex.re" +#line 492 "../src/parse/lex.re" comment: -#line 1717 "src/parse/lex.cc" +#line 1707 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -1760,7 +1750,7 @@ comment: } ++YYCURSOR; yy279: -#line 535 "../src/parse/lex.re" +#line 524 "../src/parse/lex.re" { if (cur == eof) { @@ -1768,7 +1758,7 @@ yy279: } goto comment; } -#line 1772 "src/parse/lex.cc" +#line 1762 "src/parse/lex.cc" yy280: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yybm[0+yych] & 32) { @@ -1776,7 +1766,7 @@ yy280: } if (yych == '#') goto yy291; yy281: -#line 526 "../src/parse/lex.re" +#line 515 "../src/parse/lex.re" { if (cur == eof) { @@ -1786,7 +1776,7 @@ yy281: cline++; goto comment; } -#line 1790 "src/parse/lex.cc" +#line 1780 "src/parse/lex.cc" yy282: yych = (YYCTYPE)*++YYCURSOR; if (yych == '/') goto yy286; @@ -1795,16 +1785,16 @@ yy283: yych = (YYCTYPE)*++YYCURSOR; if (yych != '*') goto yy279; ++YYCURSOR; -#line 517 "../src/parse/lex.re" +#line 506 "../src/parse/lex.re" { ++depth; fatal("ambiguous /* found"); goto comment; } -#line 1805 "src/parse/lex.cc" +#line 1795 "src/parse/lex.cc" yy286: ++YYCURSOR; -#line 507 "../src/parse/lex.re" +#line 496 "../src/parse/lex.re" { if (--depth == 0) { @@ -1815,7 +1805,7 @@ yy286: goto comment; } } -#line 1819 "src/parse/lex.cc" +#line 1809 "src/parse/lex.cc" yy288: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); @@ -1893,12 +1883,12 @@ yy301: yy303: ++YYCURSOR; YYCURSOR = YYCTXMARKER; -#line 522 "../src/parse/lex.re" +#line 511 "../src/parse/lex.re" { set_sourceline (); goto comment; } -#line 1902 "src/parse/lex.cc" +#line 1892 "src/parse/lex.cc" yy305: yych = (YYCTYPE)*++YYCURSOR; if (yych == '\n') goto yy303; @@ -1923,28 +1913,28 @@ yy309: if (yych == '\r') goto yy305; goto yy290; } -#line 542 "../src/parse/lex.re" +#line 531 "../src/parse/lex.re" nextLine: -#line 1932 "src/parse/lex.cc" +#line 1922 "src/parse/lex.cc" { YYCTYPE yych; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yych == '\n') goto yy314; ++YYCURSOR; -#line 553 "../src/parse/lex.re" +#line 542 "../src/parse/lex.re" { if(cur == eof) { return 0; } goto nextLine; } -#line 1945 "src/parse/lex.cc" +#line 1935 "src/parse/lex.cc" yy314: ++YYCURSOR; -#line 546 "../src/parse/lex.re" +#line 535 "../src/parse/lex.re" { if(cur == eof) { return 0; } @@ -1952,9 +1942,9 @@ yy314: cline++; goto scan; } -#line 1956 "src/parse/lex.cc" +#line 1946 "src/parse/lex.cc" } -#line 558 "../src/parse/lex.re" +#line 547 "../src/parse/lex.re" } @@ -1972,294 +1962,680 @@ static void escape (std::string & dest, const std::string & src) } } -void Scanner::lex_cpoints (char quote, std::vector & cs) +void Scanner::lex_str (char quote, std::vector & cs) { for (;;) { tok = cur; -#line 1982 "src/parse/lex.cc" +#line 1972 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); yych = (YYCTYPE)*YYCURSOR; - if (yych == '\n') goto yy320; - if (yych == '\\') goto yy322; - ++YYCURSOR; -#line 600 "../src/parse/lex.re" - { - const char c = tok[0]; - if (c == quote) - { - return; - } - else - { - cs.push_back (static_cast (c)); - continue; - } + if (yych <= '"') { + if (yych == '\n') goto yy320; + if (yych >= '"') goto yy322; + } else { + if (yych <= '\'') { + if (yych >= '\'') goto yy322; + } else { + if (yych == '\\') goto yy324; } -#line 2004 "src/parse/lex.cc" + } + ++YYCURSOR; +#line 587 "../src/parse/lex.re" + { cs.push_back(static_cast(tok[0])); continue; } +#line 1991 "src/parse/lex.cc" yy320: ++YYCURSOR; -#line 581 "../src/parse/lex.re" +#line 570 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2009 "src/parse/lex.cc" +#line 1996 "src/parse/lex.cc" yy322: ++YYCURSOR; - if ((yych = (YYCTYPE)*YYCURSOR) <= 'b') { +#line 575 "../src/parse/lex.re" + { if (quote == tok[0]) return; cs.push_back(static_cast(tok[0])); continue; } +#line 2001 "src/parse/lex.cc" +yy324: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { if (yych <= 'T') { if (yych <= '/') { - if (yych != '\n') goto yy324; + if (yych != '\n') goto yy326; } else { - if (yych <= '3') goto yy326; - if (yych <= '7') goto yy328; - goto yy324; + if (yych <= '3') goto yy328; + if (yych <= '7') goto yy330; + goto yy326; } } else { if (yych <= 'X') { - if (yych <= 'U') goto yy329; - if (yych <= 'W') goto yy324; - goto yy331; + if (yych <= 'U') goto yy331; + if (yych <= 'W') goto yy326; + goto yy333; } else { - if (yych == '\\') goto yy332; - if (yych <= '`') goto yy324; - goto yy332; + if (yych == '\\') goto yy334; + if (yych <= '`') goto yy326; + goto yy336; } } } else { if (yych <= 'r') { - if (yych <= 'm') { - if (yych == 'f') goto yy332; - goto yy324; + if (yych <= 'f') { + if (yych <= 'b') goto yy338; + if (yych <= 'e') goto yy326; + goto yy340; } else { - if (yych <= 'n') goto yy332; - if (yych <= 'q') goto yy324; - goto yy332; + if (yych == 'n') goto yy342; + if (yych <= 'q') goto yy326; + goto yy344; } } else { if (yych <= 'u') { - if (yych <= 's') goto yy324; - if (yych <= 't') goto yy332; - goto yy331; + if (yych <= 's') goto yy326; + if (yych <= 't') goto yy346; + goto yy333; } else { - if (yych <= 'v') goto yy332; - if (yych == 'x') goto yy334; - goto yy324; + if (yych <= 'v') goto yy348; + if (yych == 'x') goto yy350; + goto yy326; } } } -#line 584 "../src/parse/lex.re" +#line 573 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2056 "src/parse/lex.cc" -yy324: - ++YYCURSOR; -#line 590 "../src/parse/lex.re" - { - const char c = tok[1]; - if (c != quote) - { - warn.useless_escape (tline, tok - pos, c); - } - cs.push_back (static_cast (c)); - continue; - } -#line 2069 "src/parse/lex.cc" +#line 2049 "src/parse/lex.cc" yy326: + ++YYCURSOR; +#line 586 "../src/parse/lex.re" + { cs.push_back(static_cast(tok[1])); if (quote != tok[1]) warn.useless_escape(tline, tok - pos, tok[1]); continue; } +#line 2054 "src/parse/lex.cc" +yy328: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); - if (yych <= '/') goto yy327; - if (yych <= '7') goto yy349; -yy327: -#line 583 "../src/parse/lex.re" + if (yych <= '/') goto yy329; + if (yych <= '7') goto yy365; +yy329: +#line 572 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2078 "src/parse/lex.cc" -yy328: +#line 2063 "src/parse/lex.cc" +yy330: yych = (YYCTYPE)*++YYCURSOR; - goto yy327; -yy329: + goto yy329; +yy331: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy330; - if (yych <= '9') goto yy342; + if (yych <= '/') goto yy332; + if (yych <= '9') goto yy358; } else { - if (yych <= 'F') goto yy342; - if (yych <= '`') goto yy330; - if (yych <= 'f') goto yy342; + if (yych <= 'F') goto yy358; + if (yych <= '`') goto yy332; + if (yych <= 'f') goto yy358; } -yy330: -#line 582 "../src/parse/lex.re" +yy332: +#line 571 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2096 "src/parse/lex.cc" -yy331: +#line 2081 "src/parse/lex.cc" +yy333: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy330; - if (yych <= '9') goto yy339; - goto yy330; + if (yych <= '/') goto yy332; + if (yych <= '9') goto yy355; + goto yy332; } else { - if (yych <= 'F') goto yy339; - if (yych <= '`') goto yy330; - if (yych <= 'f') goto yy339; - goto yy330; + if (yych <= 'F') goto yy355; + if (yych <= '`') goto yy332; + if (yych <= 'f') goto yy355; + goto yy332; } -yy332: +yy334: ++YYCURSOR; -#line 588 "../src/parse/lex.re" - { cs.push_back (unesc_simple (tok)); continue; } +#line 585 "../src/parse/lex.re" + { cs.push_back(static_cast('\\')); continue; } +#line 2099 "src/parse/lex.cc" +yy336: + ++YYCURSOR; +#line 578 "../src/parse/lex.re" + { cs.push_back(static_cast('\a')); continue; } +#line 2104 "src/parse/lex.cc" +yy338: + ++YYCURSOR; +#line 579 "../src/parse/lex.re" + { cs.push_back(static_cast('\b')); continue; } +#line 2109 "src/parse/lex.cc" +yy340: + ++YYCURSOR; +#line 580 "../src/parse/lex.re" + { cs.push_back(static_cast('\f')); continue; } #line 2114 "src/parse/lex.cc" -yy334: +yy342: + ++YYCURSOR; +#line 581 "../src/parse/lex.re" + { cs.push_back(static_cast('\n')); continue; } +#line 2119 "src/parse/lex.cc" +yy344: + ++YYCURSOR; +#line 582 "../src/parse/lex.re" + { cs.push_back(static_cast('\r')); continue; } +#line 2124 "src/parse/lex.cc" +yy346: + ++YYCURSOR; +#line 583 "../src/parse/lex.re" + { cs.push_back(static_cast('\t')); continue; } +#line 2129 "src/parse/lex.cc" +yy348: + ++YYCURSOR; +#line 584 "../src/parse/lex.re" + { cs.push_back(static_cast('\v')); continue; } +#line 2134 "src/parse/lex.cc" +yy350: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy330; - if (yych >= ':') goto yy330; + if (yych <= '/') goto yy332; + if (yych >= ':') goto yy332; } else { - if (yych <= 'F') goto yy335; - if (yych <= '`') goto yy330; - if (yych >= 'g') goto yy330; + if (yych <= 'F') goto yy351; + if (yych <= '`') goto yy332; + if (yych >= 'g') goto yy332; } -yy335: +yy351: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych <= '9') goto yy337; + if (yych <= '/') goto yy352; + if (yych <= '9') goto yy353; } else { - if (yych <= 'F') goto yy337; - if (yych <= '`') goto yy336; - if (yych <= 'f') goto yy337; + if (yych <= 'F') goto yy353; + if (yych <= '`') goto yy352; + if (yych <= 'f') goto yy353; } -yy336: +yy352: YYCURSOR = YYMARKER; if (yyaccept == 0) { - goto yy327; + goto yy329; } else { - goto yy330; + goto yy332; } -yy337: +yy353: ++YYCURSOR; -#line 586 "../src/parse/lex.re" - { cs.push_back (unesc_hex (tok, cur)); continue; } -#line 2147 "src/parse/lex.cc" -yy339: +#line 576 "../src/parse/lex.re" + { cs.push_back(unesc_hex(tok, cur)); continue; } +#line 2167 "src/parse/lex.cc" +yy355: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy340; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy356; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy340: +yy356: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy341; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy357; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy341: +yy357: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych <= '9') goto yy337; - goto yy336; + if (yych <= '/') goto yy352; + if (yych <= '9') goto yy353; + goto yy352; } else { - if (yych <= 'F') goto yy337; - if (yych <= '`') goto yy336; - if (yych <= 'f') goto yy337; - goto yy336; + if (yych <= 'F') goto yy353; + if (yych <= '`') goto yy352; + if (yych <= 'f') goto yy353; + goto yy352; } -yy342: +yy358: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy343; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy359; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy343: +yy359: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy344; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy360; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy344: +yy360: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy345; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy361; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy345: +yy361: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy346; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy362; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy346: +yy362: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy347; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy363; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy347: +yy363: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych >= ':') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= ':') goto yy352; } else { - if (yych <= 'F') goto yy348; - if (yych <= '`') goto yy336; - if (yych >= 'g') goto yy336; + if (yych <= 'F') goto yy364; + if (yych <= '`') goto yy352; + if (yych >= 'g') goto yy352; } -yy348: +yy364: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy336; - if (yych <= '9') goto yy337; - goto yy336; + if (yych <= '/') goto yy352; + if (yych <= '9') goto yy353; + goto yy352; } else { - if (yych <= 'F') goto yy337; - if (yych <= '`') goto yy336; - if (yych <= 'f') goto yy337; - goto yy336; + if (yych <= 'F') goto yy353; + if (yych <= '`') goto yy352; + if (yych <= 'f') goto yy353; + goto yy352; } -yy349: +yy365: yych = (YYCTYPE)*++YYCURSOR; - if (yych <= '/') goto yy336; - if (yych >= '8') goto yy336; + if (yych <= '/') goto yy352; + if (yych >= '8') goto yy352; ++YYCURSOR; -#line 587 "../src/parse/lex.re" - { cs.push_back (unesc_oct (tok, cur)); continue; } -#line 2259 "src/parse/lex.cc" +#line 577 "../src/parse/lex.re" + { cs.push_back(unesc_oct(tok, cur)); continue; } +#line 2279 "src/parse/lex.cc" +} +#line 588 "../src/parse/lex.re" + + } } + +RegExp * Scanner::lex_cls (bool neg) +{ + std::vector cs; + std::set esc; + for (;;) + { + tok = cur; + +#line 2294 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '[') { + if (yych == '\n') goto yy372; + } else { + if (yych <= '\\') goto yy374; + if (yych <= ']') goto yy376; + } + ++YYCURSOR; +#line 619 "../src/parse/lex.re" + { cs.push_back(static_cast(tok[0])); continue; } +#line 2309 "src/parse/lex.cc" +yy372: + ++YYCURSOR; +#line 600 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error"); } +#line 2314 "src/parse/lex.cc" +yy374: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { + if (yych <= '7') { + if (yych <= ',') { + if (yych != '\n') goto yy378; + } else { + if (yych <= '-') goto yy380; + if (yych <= '/') goto yy378; + if (yych <= '3') goto yy382; + goto yy384; + } + } else { + if (yych <= 'X') { + if (yych == 'U') goto yy385; + if (yych <= 'W') goto yy378; + goto yy387; + } else { + if (yych <= '[') goto yy378; + if (yych <= '\\') goto yy388; + if (yych <= ']') goto yy390; + goto yy378; + } + } + } else { + if (yych <= 'q') { + if (yych <= 'e') { + if (yych <= 'a') goto yy392; + if (yych <= 'b') goto yy394; + goto yy378; + } else { + if (yych <= 'f') goto yy396; + if (yych == 'n') goto yy398; + goto yy378; + } + } else { + if (yych <= 'u') { + if (yych <= 'r') goto yy400; + if (yych <= 's') goto yy378; + if (yych <= 't') goto yy402; + goto yy387; + } else { + if (yych <= 'v') goto yy404; + if (yych == 'x') goto yy406; + goto yy378; + } + } + } +#line 603 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } +#line 2365 "src/parse/lex.cc" +yy376: + ++YYCURSOR; +#line 605 "../src/parse/lex.re" + { break; } +#line 2370 "src/parse/lex.cc" +yy378: + ++YYCURSOR; +#line 618 "../src/parse/lex.re" + { cs.push_back(static_cast(tok[1])); warn.useless_escape(tline, tok - pos, tok[1]); continue; } +#line 2375 "src/parse/lex.cc" +yy380: + ++YYCURSOR; +#line 617 "../src/parse/lex.re" + { cs.push_back(static_cast('-')); esc.insert(cs.size() - 1); continue; } +#line 2380 "src/parse/lex.cc" +yy382: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy383; + if (yych <= '7') goto yy421; +yy383: +#line 602 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } +#line 2389 "src/parse/lex.cc" +yy384: + yych = (YYCTYPE)*++YYCURSOR; + goto yy383; +yy385: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy386; + if (yych <= '9') goto yy414; + } else { + if (yych <= 'F') goto yy414; + if (yych <= '`') goto yy386; + if (yych <= 'f') goto yy414; + } +yy386: +#line 601 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } +#line 2407 "src/parse/lex.cc" +yy387: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy386; + if (yych <= '9') goto yy411; + goto yy386; + } else { + if (yych <= 'F') goto yy411; + if (yych <= '`') goto yy386; + if (yych <= 'f') goto yy411; + goto yy386; + } +yy388: + ++YYCURSOR; +#line 615 "../src/parse/lex.re" + { cs.push_back(static_cast('\\')); continue; } +#line 2425 "src/parse/lex.cc" +yy390: + ++YYCURSOR; +#line 616 "../src/parse/lex.re" + { cs.push_back(static_cast(']')); continue; } +#line 2430 "src/parse/lex.cc" +yy392: + ++YYCURSOR; +#line 608 "../src/parse/lex.re" + { cs.push_back(static_cast('\a')); continue; } +#line 2435 "src/parse/lex.cc" +yy394: + ++YYCURSOR; +#line 609 "../src/parse/lex.re" + { cs.push_back(static_cast('\b')); continue; } +#line 2440 "src/parse/lex.cc" +yy396: + ++YYCURSOR; +#line 610 "../src/parse/lex.re" + { cs.push_back(static_cast('\f')); continue; } +#line 2445 "src/parse/lex.cc" +yy398: + ++YYCURSOR; +#line 611 "../src/parse/lex.re" + { cs.push_back(static_cast('\n')); continue; } +#line 2450 "src/parse/lex.cc" +yy400: + ++YYCURSOR; #line 612 "../src/parse/lex.re" + { cs.push_back(static_cast('\r')); continue; } +#line 2455 "src/parse/lex.cc" +yy402: + ++YYCURSOR; +#line 613 "../src/parse/lex.re" + { cs.push_back(static_cast('\t')); continue; } +#line 2460 "src/parse/lex.cc" +yy404: + ++YYCURSOR; +#line 614 "../src/parse/lex.re" + { cs.push_back(static_cast('\v')); continue; } +#line 2465 "src/parse/lex.cc" +yy406: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy386; + if (yych >= ':') goto yy386; + } else { + if (yych <= 'F') goto yy407; + if (yych <= '`') goto yy386; + if (yych >= 'g') goto yy386; + } +yy407: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy409; + } else { + if (yych <= 'F') goto yy409; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy409; + } +yy408: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy383; + } else { + goto yy386; + } +yy409: + ++YYCURSOR; +#line 607 "../src/parse/lex.re" + { cs.push_back(unesc_hex(tok, cur)); continue; } +#line 2498 "src/parse/lex.cc" +yy411: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy412; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy412: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy413; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy413: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy409; + goto yy408; + } else { + if (yych <= 'F') goto yy409; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy409; + goto yy408; + } +yy414: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy415; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy415: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy416; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy416: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy417; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy417: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy418; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy418: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy419; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy419: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy420; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy420: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy409; + goto yy408; + } else { + if (yych <= 'F') goto yy409; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy409; + goto yy408; + } +yy421: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '/') goto yy408; + if (yych >= '8') goto yy408; + ++YYCURSOR; +#line 606 "../src/parse/lex.re" + { cs.push_back(unesc_oct(tok, cur)); continue; } +#line 2610 "src/parse/lex.cc" +} +#line 620 "../src/parse/lex.re" } + Range * r = NULL; + const size_t count = cs.size (); + for (size_t i = 0; i < count; ++i) + { + uint32_t l = cs[i]; + uint32_t u = count - i >= 3 && (cs[i + 1] == '-' && esc.find(i + 1) == esc.end()) + ? cs[i += 2] + : l; + if (l > u) + { + warn.swapped_range (get_line (), l, u); + std::swap (l, u); + } + Range * s = opts->encoding.encodeRange (l, u); + if (!s) + { + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); + } + r = Range::add (r, s); + } + if (neg) + { + r = Range::sub (opts->encoding.fullRange (), r); + } + return matchSymbolRange (r); } void Scanner::set_sourceline () @@ -2267,7 +2643,7 @@ void Scanner::set_sourceline () sourceline: tok = cur; -#line 2271 "src/parse/lex.cc" +#line 2647 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2307,23 +2683,23 @@ sourceline: if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*YYCURSOR; if (yych <= '!') { - if (yych == '\n') goto yy356; + if (yych == '\n') goto yy428; } else { - if (yych <= '"') goto yy358; - if (yych <= '0') goto yy354; - if (yych <= '9') goto yy359; + if (yych <= '"') goto yy430; + if (yych <= '0') goto yy426; + if (yych <= '9') goto yy431; } -yy354: +yy426: ++YYCURSOR; -yy355: -#line 644 "../src/parse/lex.re" +yy427: +#line 677 "../src/parse/lex.re" { goto sourceline; } -#line 2324 "src/parse/lex.cc" -yy356: +#line 2700 "src/parse/lex.cc" +yy428: ++YYCURSOR; -#line 632 "../src/parse/lex.re" +#line 665 "../src/parse/lex.re" { if (cur == eof) { @@ -2336,17 +2712,17 @@ yy356: tok = cur; return; } -#line 2340 "src/parse/lex.cc" -yy358: +#line 2716 "src/parse/lex.cc" +yy430: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); - if (yych == '\n') goto yy355; - goto yy364; -yy359: + if (yych == '\n') goto yy427; + goto yy436; +yy431: ++YYCURSOR; yych = (YYCTYPE)*YYCURSOR; - goto yy362; -yy360: -#line 621 "../src/parse/lex.re" + goto yy434; +yy432: +#line 654 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok, cur, cline)) { @@ -2354,46 +2730,46 @@ yy360: } goto sourceline; } -#line 2358 "src/parse/lex.cc" -yy361: +#line 2734 "src/parse/lex.cc" +yy433: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; -yy362: +yy434: if (yybm[0+yych] & 64) { - goto yy361; + goto yy433; } - goto yy360; -yy363: + goto yy432; +yy435: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; -yy364: +yy436: if (yybm[0+yych] & 128) { - goto yy363; + goto yy435; } - if (yych <= '\n') goto yy365; - if (yych <= '"') goto yy367; - goto yy366; -yy365: + if (yych <= '\n') goto yy437; + if (yych <= '"') goto yy439; + goto yy438; +yy437: YYCURSOR = YYMARKER; - goto yy355; -yy366: + goto yy427; +yy438: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - if (yych == '\n') goto yy365; - goto yy363; -yy367: + if (yych == '\n') goto yy437; + goto yy435; +yy439: ++YYCURSOR; -#line 628 "../src/parse/lex.re" +#line 661 "../src/parse/lex.re" { escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } -#line 2395 "src/parse/lex.cc" +#line 2771 "src/parse/lex.cc" } -#line 647 "../src/parse/lex.re" +#line 680 "../src/parse/lex.re" } diff --git a/re2c/bootstrap/src/parse/lex_conf.cc b/re2c/bootstrap/src/parse/lex_conf.cc index af6bdf23..7994c364 100644 --- a/re2c/bootstrap/src/parse/lex_conf.cc +++ b/re2c/bootstrap/src/parse/lex_conf.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.14.3 on Mon Oct 12 15:30:24 2015 */ +/* Generated by re2c 0.14.3 on Mon Nov 9 14:02:24 2015 */ #line 1 "../src/parse/lex_conf.re" #include "src/parse/scanner.h" #include "src/util/s_to_n32_unsafe.h" @@ -1700,7 +1700,7 @@ yy489: #line 192 "../src/parse/lex_conf.re" { std::vector cpoints; - lex_cpoints (tok[0], cpoints); + lex_str (tok[0], cpoints); s = cpoint_conf (cpoints); goto end; } diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index 6197f0cb..913696e2 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -170,35 +170,6 @@ RegExp * Scanner::cpoint_string (const std::vector & cs, bool case_ins return r ? r : new NullOp; } -RegExp * Scanner::cpoint_class (const std::vector & cs, bool neg) const -{ - Range * r = NULL; - const size_t count = cs.size (); - for (size_t i = 0; i < count; ++i) - { - uint32_t l = cs[i]; - uint32_t u = count - i >= 3 && cs[i + 1] == '-' - ? cs[i += 2] - : l; - if (l > u) - { - warn.swapped_range (get_line (), l, u); - std::swap (l, u); - } - Range * s = opts->encoding.encodeRange (l, u); - if (!s) - { - fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); - } - r = Range::add (r, s); - } - if (neg) - { - r = Range::sub (opts->encoding.fullRange (), r); - } - return matchSymbolRange (r); -} - RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const { MatchOp * m1 = dynamic_cast (e1); diff --git a/re2c/src/parse/lex.re b/re2c/src/parse/lex.re index 245dc65e..85f4e35b 100644 --- a/re2c/src/parse/lex.re +++ b/re2c/src/parse/lex.re @@ -257,31 +257,20 @@ start: "'" { std::vector cpoints; - lex_cpoints ('\'', cpoints); + lex_str ('\'', cpoints); yylval.regexp = cpoint_string (cpoints, opts->bCaseInsensitive || !opts->bCaseInverted); return REGEXP; } "\"" { std::vector cpoints; - lex_cpoints ('"', cpoints); + lex_str ('"', cpoints); yylval.regexp = cpoint_string (cpoints, opts->bCaseInsensitive || opts->bCaseInverted); return REGEXP; } - "[" - { - std::vector cpoints; - lex_cpoints (']', cpoints); - yylval.regexp = cpoint_class (cpoints, false); - return REGEXP; - } - "[^" - { - std::vector cpoints; - lex_cpoints (']', cpoints); - yylval.regexp = cpoint_class (cpoints, true); - return REGEXP; - } + + "[" { yylval.regexp = lex_cls(false); return REGEXP; } + "[^" { yylval.regexp = lex_cls(true); return REGEXP; } "<>" / (space* ("{" | "=>" | ":=")) { return NOCOND; @@ -572,8 +561,38 @@ static void escape (std::string & dest, const std::string & src) } } -void Scanner::lex_cpoints (char quote, std::vector & cs) +void Scanner::lex_str (char quote, std::vector & cs) +{ + for (;;) + { + tok = cur; + /*!re2c + * { fatal ((tok - pos) - tchar, "syntax error"); } + esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } + esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } + esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } + + ["'] { if (quote == tok[0]) return; cs.push_back(static_cast(tok[0])); continue; } + esc_hex { cs.push_back(unesc_hex(tok, cur)); continue; } + esc_oct { cs.push_back(unesc_oct(tok, cur)); continue; } + esc "a" { cs.push_back(static_cast('\a')); continue; } + esc "b" { cs.push_back(static_cast('\b')); continue; } + esc "f" { cs.push_back(static_cast('\f')); continue; } + esc "n" { cs.push_back(static_cast('\n')); continue; } + esc "r" { cs.push_back(static_cast('\r')); continue; } + esc "t" { cs.push_back(static_cast('\t')); continue; } + esc "v" { cs.push_back(static_cast('\v')); continue; } + esc "\\" { cs.push_back(static_cast('\\')); continue; } + esc . { cs.push_back(static_cast(tok[1])); if (quote != tok[1]) warn.useless_escape(tline, tok - pos, tok[1]); continue; } + . \ esc { cs.push_back(static_cast(tok[0])); continue; } + */ + } +} + +RegExp * Scanner::lex_cls (bool neg) { + std::vector cs; + std::set esc; for (;;) { tok = cur; @@ -583,34 +602,48 @@ void Scanner::lex_cpoints (char quote, std::vector & cs) esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } - esc_hex { cs.push_back (unesc_hex (tok, cur)); continue; } - esc_oct { cs.push_back (unesc_oct (tok, cur)); continue; } - esc_simple { cs.push_back (unesc_simple (tok)); continue; } - esc . + "]" { break; } + esc_oct { cs.push_back(unesc_oct(tok, cur)); continue; } + esc_hex { cs.push_back(unesc_hex(tok, cur)); continue; } + esc "a" { cs.push_back(static_cast('\a')); continue; } + esc "b" { cs.push_back(static_cast('\b')); continue; } + esc "f" { cs.push_back(static_cast('\f')); continue; } + esc "n" { cs.push_back(static_cast('\n')); continue; } + esc "r" { cs.push_back(static_cast('\r')); continue; } + esc "t" { cs.push_back(static_cast('\t')); continue; } + esc "v" { cs.push_back(static_cast('\v')); continue; } + esc "\\" { cs.push_back(static_cast('\\')); continue; } + esc "]" { cs.push_back(static_cast(']')); continue; } + esc "-" { cs.push_back(static_cast('-')); esc.insert(cs.size() - 1); continue; } + esc . { cs.push_back(static_cast(tok[1])); warn.useless_escape(tline, tok - pos, tok[1]); continue; } + . \ esc { cs.push_back(static_cast(tok[0])); continue; } + */ + } + Range * r = NULL; + const size_t count = cs.size (); + for (size_t i = 0; i < count; ++i) + { + uint32_t l = cs[i]; + uint32_t u = count - i >= 3 && (cs[i + 1] == '-' && esc.find(i + 1) == esc.end()) + ? cs[i += 2] + : l; + if (l > u) { - const char c = tok[1]; - if (c != quote) - { - warn.useless_escape (tline, tok - pos, c); - } - cs.push_back (static_cast (c)); - continue; + warn.swapped_range (get_line (), l, u); + std::swap (l, u); } - . \ esc + Range * s = opts->encoding.encodeRange (l, u); + if (!s) { - const char c = tok[0]; - if (c == quote) - { - return; - } - else - { - cs.push_back (static_cast (c)); - continue; - } + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); } - */ + r = Range::add (r, s); + } + if (neg) + { + r = Range::sub (opts->encoding.fullRange (), r); } + return matchSymbolRange (r); } void Scanner::set_sourceline () diff --git a/re2c/src/parse/lex_conf.re b/re2c/src/parse/lex_conf.re index ac8ff940..bc13b79c 100644 --- a/re2c/src/parse/lex_conf.re +++ b/re2c/src/parse/lex_conf.re @@ -191,7 +191,7 @@ std::string Scanner::lex_conf_string () ['"] { std::vector cpoints; - lex_cpoints (tok[0], cpoints); + lex_str (tok[0], cpoints); s = cpoint_conf (cpoints); goto end; } diff --git a/re2c/src/parse/scanner.h b/re2c/src/parse/scanner.h index 46bda44f..7375be17 100644 --- a/re2c/src/parse/scanner.h +++ b/re2c/src/parse/scanner.h @@ -58,7 +58,8 @@ public: private: void fill (uint32_t); void set_sourceline (); - void lex_cpoints (char quote, std::vector & cs); + void lex_str (char quote, std::vector & cs); + RegExp *lex_cls (bool neg); void lex_conf (); void lex_conf_assign (); @@ -103,7 +104,6 @@ public: RegExp * mkDot () const; RegExp * mkDefault () const; RegExp * cpoint_string (const std::vector & cs, bool case_sensitive) const; - RegExp * cpoint_class (const std::vector & cs, bool neg) const; FORBID_COPY (Scanner); }; diff --git a/re2c/src/parse/unescape.cc b/re2c/src/parse/unescape.cc index c2df9e0c..0a50cc47 100644 --- a/re2c/src/parse/unescape.cc +++ b/re2c/src/parse/unescape.cc @@ -36,23 +36,6 @@ static inline uint32_t hex_digit (const char c) } } -// expected string format: "\" [abfnrtv\\] -uint32_t unesc_simple (char * s) -{ - switch (s[1]) - { - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - case '\\': return '\\'; - default: return ~0u; // unexpected - } -} - // expected string format: "\" [xXuU] [0-9a-zA-Z]* uint32_t unesc_hex (const char * s, const char * s_end) { diff --git a/re2c/src/parse/unescape.h b/re2c/src/parse/unescape.h index d3b789dc..000c378d 100644 --- a/re2c/src/parse/unescape.h +++ b/re2c/src/parse/unescape.h @@ -5,7 +5,6 @@ namespace re2c { -uint32_t unesc_simple (char * s); uint32_t unesc_hex (const char * s, const char * s_end); uint32_t unesc_oct (const char * s, const char * s_end); diff --git a/re2c/test/bug1479044.b.c b/re2c/test/bug1479044.b.c index 1f1ee517..e7968e31 100644 --- a/re2c/test/bug1479044.b.c +++ b/re2c/test/bug1479044.b.c @@ -1244,4 +1244,3 @@ int main(int argc, char **argv) } return 0; } -re2c: warning: line 16: column 156: escape has no effect: '\-' [-Wuseless-escape] diff --git a/re2c/test/bug1479044.c b/re2c/test/bug1479044.c index 967b5cc8..811f90a7 100644 --- a/re2c/test/bug1479044.c +++ b/re2c/test/bug1479044.c @@ -1476,4 +1476,3 @@ int main(int argc, char **argv) } return 0; } -re2c: warning: line 16: column 156: escape has no effect: '\-' [-Wuseless-escape] diff --git a/re2c/test/bug1479044.s.c b/re2c/test/bug1479044.s.c index 65e76b2f..f9766061 100644 --- a/re2c/test/bug1479044.s.c +++ b/re2c/test/bug1479044.s.c @@ -1213,4 +1213,3 @@ int main(int argc, char **argv) } return 0; } -re2c: warning: line 16: column 156: escape has no effect: '\-' [-Wuseless-escape] diff --git a/re2c/test/cpoint_class_esc_dash.c b/re2c/test/cpoint_class_esc_dash.c new file mode 100644 index 00000000..08577422 --- /dev/null +++ b/re2c/test/cpoint_class_esc_dash.c @@ -0,0 +1,24 @@ +/* Generated by re2c */ +#line 1 "cpoint_class_esc_dash.re" + +#line 5 "cpoint_class_esc_dash.c" +{ + YYCTYPE yych; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case '-': + case 'a': + case 'c': goto yy3; + default: goto yy2; + } +yy2: +yy3: + ++YYCURSOR; +#line 2 "cpoint_class_esc_dash.re" + {} +#line 21 "cpoint_class_esc_dash.c" +} +#line 3 "cpoint_class_esc_dash.re" + +re2c: warning: line 3: control flow is undefined for strings that match '[\x0-\x2C\x2E-\x60\x62\x64-\xFF]', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/cpoint_class_esc_dash.re b/re2c/test/cpoint_class_esc_dash.re new file mode 100644 index 00000000..f611bc61 --- /dev/null +++ b/re2c/test/cpoint_class_esc_dash.re @@ -0,0 +1,3 @@ +/*!re2c + [a\-c] {} +*/ -- 2.40.0