From: Ulya Trofimovich Date: Mon, 7 Nov 2016 09:46:18 +0000 (+0000) Subject: Recognize newlines in character strings and classes. X-Git-Tag: 1.0~51 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=54b50c0bbfe4a865bf004ef51fb639156a7657d7;p=re2c Recognize newlines in character strings and classes. As for now, newline inside of a character string or class is an error: re2c should emit clear error message. Different styles of newlines should be recognized ("\n", "\r\n"). This commit fixes bug #162 reported by pauloscustodio: Reading files with "rb" causes issues in Windows --- diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index 27a90ace..84ec9398 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Sat Jun 25 16:16:23 2016 */ +/* Generated by re2c 0.16 on Mon Nov 7 09:30:24 2016 */ #line 1 "../src/parse/lex.re" #include "src/util/c99_stdint.h" #include @@ -2153,284 +2153,300 @@ uint32_t Scanner::lex_cls_chr() unsigned int yyaccept = 0; if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); yych = (YYCTYPE)*YYCURSOR; - if (yych == '\n') goto yy328; - if (yych == '\\') goto yy330; + if (yych <= '\f') { + if (yych == '\n') goto yy328; + } else { + if (yych <= '\r') goto yy330; + if (yych == '\\') goto yy331; + } ++YYCURSOR; -#line 602 "../src/parse/lex.re" +yy327: +#line 603 "../src/parse/lex.re" { return static_cast(tok[0]); } -#line 2162 "src/parse/lex.cc" +#line 2167 "src/parse/lex.cc" yy328: ++YYCURSOR; -#line 597 "../src/parse/lex.re" - { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2167 "src/parse/lex.cc" +#line 598 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "newline in character class"); } +#line 2172 "src/parse/lex.cc" yy330: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy328; + goto yy327; +yy331: ++YYCURSOR; - if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { - if (yych <= '7') { - if (yych <= ',') { - if (yych != '\n') goto yy332; + if ((yych = (YYCTYPE)*YYCURSOR) <= ']') { + if (yych <= '3') { + if (yych <= '\r') { + if (yych == '\n') goto yy328; + if (yych <= '\f') goto yy333; + goto yy335; } else { - if (yych <= '-') goto yy334; - if (yych <= '/') goto yy332; - if (yych <= '3') goto yy336; + if (yych == '-') goto yy336; + if (yych <= '/') goto yy333; goto yy338; } } else { - if (yych <= 'X') { - if (yych == 'U') goto yy339; - if (yych <= 'W') goto yy332; - goto yy341; + if (yych <= 'W') { + if (yych <= '7') goto yy340; + if (yych == 'U') goto yy341; + goto yy333; } else { - if (yych <= '[') goto yy332; - if (yych <= '\\') goto yy342; - if (yych <= ']') goto yy344; - goto yy332; + if (yych <= 'X') goto yy343; + if (yych <= '[') goto yy333; + if (yych <= '\\') goto yy344; + goto yy346; } } } else { if (yych <= 'q') { if (yych <= 'e') { - if (yych <= 'a') goto yy346; - if (yych <= 'b') goto yy348; - goto yy332; + if (yych <= '`') goto yy333; + if (yych <= 'a') goto yy348; + if (yych <= 'b') goto yy350; + goto yy333; } else { - if (yych <= 'f') goto yy350; - if (yych == 'n') goto yy352; - goto yy332; + if (yych <= 'f') goto yy352; + if (yych == 'n') goto yy354; + goto yy333; } } else { if (yych <= 'u') { - if (yych <= 'r') goto yy354; - if (yych <= 's') goto yy332; - if (yych <= 't') goto yy356; - goto yy341; + if (yych <= 'r') goto yy356; + if (yych <= 's') goto yy333; + if (yych <= 't') goto yy358; + goto yy343; } else { - if (yych <= 'v') goto yy358; - if (yych == 'x') goto yy360; - goto yy332; + if (yych <= 'v') goto yy360; + if (yych == 'x') goto yy362; + goto yy333; } } } -#line 600 "../src/parse/lex.re" +#line 601 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2218 "src/parse/lex.cc" -yy332: +#line 2229 "src/parse/lex.cc" +yy333: ++YYCURSOR; -#line 615 "../src/parse/lex.re" +yy334: +#line 616 "../src/parse/lex.re" { warn.useless_escape(tline, tok - pos, tok[1]); return static_cast(tok[1]); } -#line 2226 "src/parse/lex.cc" -yy334: +#line 2238 "src/parse/lex.cc" +yy335: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy328; + goto yy334; +yy336: ++YYCURSOR; -#line 613 "../src/parse/lex.re" +#line 614 "../src/parse/lex.re" { return static_cast('-'); } -#line 2231 "src/parse/lex.cc" -yy336: +#line 2247 "src/parse/lex.cc" +yy338: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); - if (yych <= '/') goto yy337; - if (yych <= '7') goto yy361; -yy337: -#line 599 "../src/parse/lex.re" + if (yych <= '/') goto yy339; + if (yych <= '7') goto yy363; +yy339: +#line 600 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2240 "src/parse/lex.cc" -yy338: +#line 2256 "src/parse/lex.cc" +yy340: yych = (YYCTYPE)*++YYCURSOR; - goto yy337; -yy339: + goto yy339; +yy341: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy340; - if (yych <= '9') goto yy363; + if (yych <= '/') goto yy342; + if (yych <= '9') goto yy365; } else { - if (yych <= 'F') goto yy363; - if (yych <= '`') goto yy340; - if (yych <= 'f') goto yy363; + if (yych <= 'F') goto yy365; + if (yych <= '`') goto yy342; + if (yych <= 'f') goto yy365; } -yy340: -#line 598 "../src/parse/lex.re" +yy342: +#line 599 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2258 "src/parse/lex.cc" -yy341: +#line 2274 "src/parse/lex.cc" +yy343: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy340; - if (yych <= '9') goto yy364; - goto yy340; + if (yych <= '/') goto yy342; + if (yych <= '9') goto yy366; + goto yy342; } else { - if (yych <= 'F') goto yy364; - if (yych <= '`') goto yy340; - if (yych <= 'f') goto yy364; - goto yy340; + if (yych <= 'F') goto yy366; + if (yych <= '`') goto yy342; + if (yych <= 'f') goto yy366; + goto yy342; } -yy342: - ++YYCURSOR; -#line 612 "../src/parse/lex.re" - { return static_cast('\\'); } -#line 2276 "src/parse/lex.cc" yy344: ++YYCURSOR; -#line 614 "../src/parse/lex.re" - { return static_cast(']'); } -#line 2281 "src/parse/lex.cc" +#line 613 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2292 "src/parse/lex.cc" yy346: ++YYCURSOR; -#line 605 "../src/parse/lex.re" - { return static_cast('\a'); } -#line 2286 "src/parse/lex.cc" +#line 615 "../src/parse/lex.re" + { return static_cast(']'); } +#line 2297 "src/parse/lex.cc" yy348: ++YYCURSOR; #line 606 "../src/parse/lex.re" - { return static_cast('\b'); } -#line 2291 "src/parse/lex.cc" + { return static_cast('\a'); } +#line 2302 "src/parse/lex.cc" yy350: ++YYCURSOR; #line 607 "../src/parse/lex.re" - { return static_cast('\f'); } -#line 2296 "src/parse/lex.cc" + { return static_cast('\b'); } +#line 2307 "src/parse/lex.cc" yy352: ++YYCURSOR; #line 608 "../src/parse/lex.re" - { return static_cast('\n'); } -#line 2301 "src/parse/lex.cc" + { return static_cast('\f'); } +#line 2312 "src/parse/lex.cc" yy354: ++YYCURSOR; #line 609 "../src/parse/lex.re" - { return static_cast('\r'); } -#line 2306 "src/parse/lex.cc" + { return static_cast('\n'); } +#line 2317 "src/parse/lex.cc" yy356: ++YYCURSOR; #line 610 "../src/parse/lex.re" - { return static_cast('\t'); } -#line 2311 "src/parse/lex.cc" + { return static_cast('\r'); } +#line 2322 "src/parse/lex.cc" yy358: ++YYCURSOR; #line 611 "../src/parse/lex.re" - { return static_cast('\v'); } -#line 2316 "src/parse/lex.cc" + { return static_cast('\t'); } +#line 2327 "src/parse/lex.cc" yy360: + ++YYCURSOR; +#line 612 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2332 "src/parse/lex.cc" +yy362: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy340; - if (yych <= '9') goto yy365; - goto yy340; + if (yych <= '/') goto yy342; + if (yych <= '9') goto yy367; + goto yy342; } else { - if (yych <= 'F') goto yy365; - if (yych <= '`') goto yy340; - if (yych <= 'f') goto yy365; - goto yy340; + if (yych <= 'F') goto yy367; + if (yych <= '`') goto yy342; + if (yych <= 'f') goto yy367; + goto yy342; } -yy361: +yy363: yych = (YYCTYPE)*++YYCURSOR; - if (yych <= '/') goto yy362; - if (yych <= '7') goto yy366; -yy362: + if (yych <= '/') goto yy364; + if (yych <= '7') goto yy368; +yy364: YYCURSOR = YYMARKER; if (yyaccept == 0) { - goto yy337; + goto yy339; } else { - goto yy340; + goto yy342; } -yy363: +yy365: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy368; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy370; + goto yy364; } else { - if (yych <= 'F') goto yy368; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy368; - goto yy362; + if (yych <= 'F') goto yy370; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy370; + goto yy364; } -yy364: +yy366: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy369; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy371; + goto yy364; } else { - if (yych <= 'F') goto yy369; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy369; - goto yy362; + if (yych <= 'F') goto yy371; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy371; + goto yy364; } -yy365: +yy367: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy370; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy372; + goto yy364; } else { - if (yych <= 'F') goto yy370; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy370; - goto yy362; + if (yych <= 'F') goto yy372; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy372; + goto yy364; } -yy366: +yy368: ++YYCURSOR; -#line 604 "../src/parse/lex.re" +#line 605 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2381 "src/parse/lex.cc" -yy368: +#line 2397 "src/parse/lex.cc" +yy370: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy372; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy374; + goto yy364; } else { - if (yych <= 'F') goto yy372; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy372; - goto yy362; + if (yych <= 'F') goto yy374; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy374; + goto yy364; } -yy369: +yy371: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy365; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy367; + goto yy364; } else { - if (yych <= 'F') goto yy365; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy365; - goto yy362; + if (yych <= 'F') goto yy367; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy367; + goto yy364; } -yy370: +yy372: ++YYCURSOR; -#line 603 "../src/parse/lex.re" +#line 604 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2410 "src/parse/lex.cc" -yy372: +#line 2426 "src/parse/lex.cc" +yy374: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy362; - if (yych >= ':') goto yy362; + if (yych <= '/') goto yy364; + if (yych >= ':') goto yy364; } else { - if (yych <= 'F') goto yy373; - if (yych <= '`') goto yy362; - if (yych >= 'g') goto yy362; + if (yych <= 'F') goto yy375; + if (yych <= '`') goto yy364; + if (yych >= 'g') goto yy364; } -yy373: +yy375: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { - if (yych <= '/') goto yy362; - if (yych <= '9') goto yy364; - goto yy362; + if (yych <= '/') goto yy364; + if (yych <= '9') goto yy366; + goto yy364; } else { - if (yych <= 'F') goto yy364; - if (yych <= '`') goto yy362; - if (yych <= 'f') goto yy364; - goto yy362; + if (yych <= 'F') goto yy366; + if (yych <= '`') goto yy364; + if (yych <= 'f') goto yy366; + goto yy364; } } -#line 619 "../src/parse/lex.re" +#line 620 "../src/parse/lex.re" } @@ -2439,282 +2455,298 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) end = false; tok = cur; -#line 2443 "src/parse/lex.cc" +#line 2459 "src/parse/lex.cc" { YYCTYPE yych; unsigned int yyaccept = 0; if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); yych = (YYCTYPE)*YYCURSOR; - if (yych == '\n') goto yy378; - if (yych == '\\') goto yy380; + if (yych <= '\f') { + if (yych == '\n') goto yy380; + } else { + if (yych <= '\r') goto yy382; + if (yych == '\\') goto yy383; + } ++YYCURSOR; -#line 632 "../src/parse/lex.re" +yy379: +#line 634 "../src/parse/lex.re" { end = tok[0] == quote; return static_cast(tok[0]); } -#line 2457 "src/parse/lex.cc" -yy378: - ++YYCURSOR; -#line 627 "../src/parse/lex.re" - { fatal ((tok - pos) - tchar, "syntax error"); } -#line 2462 "src/parse/lex.cc" +#line 2478 "src/parse/lex.cc" yy380: ++YYCURSOR; - if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { - if (yych <= 'T') { - if (yych <= '/') { - if (yych != '\n') goto yy382; +#line 629 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "newline in character string"); } +#line 2483 "src/parse/lex.cc" +yy382: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy380; + goto yy379; +yy383: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { + if (yych <= '7') { + if (yych <= '\f') { + if (yych == '\n') goto yy380; + goto yy385; } else { - if (yych <= '3') goto yy384; - if (yych <= '7') goto yy386; - goto yy382; + if (yych <= '\r') goto yy387; + if (yych <= '/') goto yy385; + if (yych <= '3') goto yy388; + goto yy390; } } else { - if (yych <= 'X') { - if (yych <= 'U') goto yy387; - if (yych <= 'W') goto yy382; - goto yy389; + if (yych <= 'W') { + if (yych == 'U') goto yy391; + goto yy385; } else { - if (yych == '\\') goto yy390; - if (yych <= '`') goto yy382; - goto yy392; + if (yych <= 'X') goto yy393; + if (yych == '\\') goto yy394; + goto yy385; } } } else { - if (yych <= 'r') { - if (yych <= 'f') { - if (yych <= 'b') goto yy394; - if (yych <= 'e') goto yy382; - goto yy396; + if (yych <= 'q') { + if (yych <= 'e') { + if (yych <= 'a') goto yy396; + if (yych <= 'b') goto yy398; + goto yy385; } else { - if (yych == 'n') goto yy398; - if (yych <= 'q') goto yy382; - goto yy400; + if (yych <= 'f') goto yy400; + if (yych == 'n') goto yy402; + goto yy385; } } else { if (yych <= 'u') { - if (yych <= 's') goto yy382; - if (yych <= 't') goto yy402; - goto yy389; + if (yych <= 'r') goto yy404; + if (yych <= 's') goto yy385; + if (yych <= 't') goto yy406; + goto yy393; } else { - if (yych <= 'v') goto yy404; - if (yych == 'x') goto yy406; - goto yy382; + if (yych <= 'v') goto yy408; + if (yych == 'x') goto yy410; + goto yy385; } } } -#line 630 "../src/parse/lex.re" +#line 632 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } -#line 2510 "src/parse/lex.cc" -yy382: +#line 2537 "src/parse/lex.cc" +yy385: ++YYCURSOR; -#line 646 "../src/parse/lex.re" +yy386: +#line 648 "../src/parse/lex.re" { if (tok[1] != quote) { warn.useless_escape(tline, tok - pos, tok[1]); } return static_cast(tok[1]); } -#line 2520 "src/parse/lex.cc" -yy384: +#line 2548 "src/parse/lex.cc" +yy387: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy380; + goto yy386; +yy388: yyaccept = 0; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); - if (yych <= '/') goto yy385; - if (yych <= '7') goto yy407; -yy385: -#line 629 "../src/parse/lex.re" + if (yych <= '/') goto yy389; + if (yych <= '7') goto yy411; +yy389: +#line 631 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } -#line 2529 "src/parse/lex.cc" -yy386: +#line 2561 "src/parse/lex.cc" +yy390: yych = (YYCTYPE)*++YYCURSOR; - goto yy385; -yy387: + goto yy389; +yy391: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy388; - if (yych <= '9') goto yy409; + if (yych <= '/') goto yy392; + if (yych <= '9') goto yy413; } else { - if (yych <= 'F') goto yy409; - if (yych <= '`') goto yy388; - if (yych <= 'f') goto yy409; + if (yych <= 'F') goto yy413; + if (yych <= '`') goto yy392; + if (yych <= 'f') goto yy413; } -yy388: -#line 628 "../src/parse/lex.re" +yy392: +#line 630 "../src/parse/lex.re" { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } -#line 2547 "src/parse/lex.cc" -yy389: +#line 2579 "src/parse/lex.cc" +yy393: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy388; - if (yych <= '9') goto yy410; - goto yy388; + if (yych <= '/') goto yy392; + if (yych <= '9') goto yy414; + goto yy392; } else { - if (yych <= 'F') goto yy410; - if (yych <= '`') goto yy388; - if (yych <= 'f') goto yy410; - goto yy388; + if (yych <= 'F') goto yy414; + if (yych <= '`') goto yy392; + if (yych <= 'f') goto yy414; + goto yy392; } -yy390: - ++YYCURSOR; -#line 645 "../src/parse/lex.re" - { return static_cast('\\'); } -#line 2565 "src/parse/lex.cc" -yy392: - ++YYCURSOR; -#line 638 "../src/parse/lex.re" - { return static_cast('\a'); } -#line 2570 "src/parse/lex.cc" yy394: ++YYCURSOR; -#line 639 "../src/parse/lex.re" - { return static_cast('\b'); } -#line 2575 "src/parse/lex.cc" +#line 647 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2597 "src/parse/lex.cc" yy396: ++YYCURSOR; #line 640 "../src/parse/lex.re" - { return static_cast('\f'); } -#line 2580 "src/parse/lex.cc" + { return static_cast('\a'); } +#line 2602 "src/parse/lex.cc" yy398: ++YYCURSOR; #line 641 "../src/parse/lex.re" - { return static_cast('\n'); } -#line 2585 "src/parse/lex.cc" + { return static_cast('\b'); } +#line 2607 "src/parse/lex.cc" yy400: ++YYCURSOR; #line 642 "../src/parse/lex.re" - { return static_cast('\r'); } -#line 2590 "src/parse/lex.cc" + { return static_cast('\f'); } +#line 2612 "src/parse/lex.cc" yy402: ++YYCURSOR; #line 643 "../src/parse/lex.re" - { return static_cast('\t'); } -#line 2595 "src/parse/lex.cc" + { return static_cast('\n'); } +#line 2617 "src/parse/lex.cc" yy404: ++YYCURSOR; #line 644 "../src/parse/lex.re" - { return static_cast('\v'); } -#line 2600 "src/parse/lex.cc" + { return static_cast('\r'); } +#line 2622 "src/parse/lex.cc" yy406: + ++YYCURSOR; +#line 645 "../src/parse/lex.re" + { return static_cast('\t'); } +#line 2627 "src/parse/lex.cc" +yy408: + ++YYCURSOR; +#line 646 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2632 "src/parse/lex.cc" +yy410: yyaccept = 1; yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); if (yych <= '@') { - if (yych <= '/') goto yy388; - if (yych <= '9') goto yy411; - goto yy388; + if (yych <= '/') goto yy392; + if (yych <= '9') goto yy415; + goto yy392; } else { - if (yych <= 'F') goto yy411; - if (yych <= '`') goto yy388; - if (yych <= 'f') goto yy411; - goto yy388; + if (yych <= 'F') goto yy415; + if (yych <= '`') goto yy392; + if (yych <= 'f') goto yy415; + goto yy392; } -yy407: +yy411: yych = (YYCTYPE)*++YYCURSOR; - if (yych <= '/') goto yy408; - if (yych <= '7') goto yy412; -yy408: + if (yych <= '/') goto yy412; + if (yych <= '7') goto yy416; +yy412: YYCURSOR = YYMARKER; if (yyaccept == 0) { - goto yy385; + goto yy389; } else { - goto yy388; + goto yy392; } -yy409: +yy413: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy414; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy418; + goto yy412; } else { - if (yych <= 'F') goto yy414; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy414; - goto yy408; + if (yych <= 'F') goto yy418; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy418; + goto yy412; } -yy410: +yy414: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy415; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy419; + goto yy412; } else { - if (yych <= 'F') goto yy415; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy415; - goto yy408; + if (yych <= 'F') goto yy419; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy419; + goto yy412; } -yy411: +yy415: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy416; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy420; + goto yy412; } else { - if (yych <= 'F') goto yy416; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy416; - goto yy408; + if (yych <= 'F') goto yy420; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy420; + goto yy412; } -yy412: +yy416: ++YYCURSOR; -#line 637 "../src/parse/lex.re" +#line 639 "../src/parse/lex.re" { return unesc_oct(tok, cur); } -#line 2665 "src/parse/lex.cc" -yy414: +#line 2697 "src/parse/lex.cc" +yy418: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy418; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy422; + goto yy412; } else { - if (yych <= 'F') goto yy418; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy418; - goto yy408; + if (yych <= 'F') goto yy422; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy422; + goto yy412; } -yy415: +yy419: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy411; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy415; + goto yy412; } else { - if (yych <= 'F') goto yy411; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy411; - goto yy408; + if (yych <= 'F') goto yy415; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy415; + goto yy412; } -yy416: +yy420: ++YYCURSOR; -#line 636 "../src/parse/lex.re" +#line 638 "../src/parse/lex.re" { return unesc_hex(tok, cur); } -#line 2694 "src/parse/lex.cc" -yy418: +#line 2726 "src/parse/lex.cc" +yy422: yych = (YYCTYPE)*++YYCURSOR; if (yych <= '@') { - if (yych <= '/') goto yy408; - if (yych >= ':') goto yy408; + if (yych <= '/') goto yy412; + if (yych >= ':') goto yy412; } else { - if (yych <= 'F') goto yy419; - if (yych <= '`') goto yy408; - if (yych >= 'g') goto yy408; + if (yych <= 'F') goto yy423; + if (yych <= '`') goto yy412; + if (yych >= 'g') goto yy412; } -yy419: +yy423: ++YYCURSOR; if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { - if (yych <= '/') goto yy408; - if (yych <= '9') goto yy410; - goto yy408; + if (yych <= '/') goto yy412; + if (yych <= '9') goto yy414; + goto yy412; } else { - if (yych <= 'F') goto yy410; - if (yych <= '`') goto yy408; - if (yych <= 'f') goto yy410; - goto yy408; + if (yych <= 'F') goto yy414; + if (yych <= '`') goto yy412; + if (yych <= 'f') goto yy414; + goto yy412; } } -#line 652 "../src/parse/lex.re" +#line 654 "../src/parse/lex.re" } @@ -2735,7 +2767,7 @@ void Scanner::set_sourceline () sourceline: tok = cur; -#line 2739 "src/parse/lex.cc" +#line 2771 "src/parse/lex.cc" { YYCTYPE yych; static const unsigned char yybm[] = { @@ -2775,23 +2807,23 @@ sourceline: if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = (YYCTYPE)*YYCURSOR; if (yych <= '!') { - if (yych == '\n') goto yy424; + if (yych == '\n') goto yy428; } else { - if (yych <= '"') goto yy426; - if (yych <= '0') goto yy422; - if (yych <= '9') goto yy427; + if (yych <= '"') goto yy430; + if (yych <= '0') goto yy426; + if (yych <= '9') goto yy431; } -yy422: +yy426: ++YYCURSOR; -yy423: -#line 695 "../src/parse/lex.re" +yy427: +#line 697 "../src/parse/lex.re" { goto sourceline; } -#line 2792 "src/parse/lex.cc" -yy424: +#line 2824 "src/parse/lex.cc" +yy428: ++YYCURSOR; -#line 683 "../src/parse/lex.re" +#line 685 "../src/parse/lex.re" { if (cur == eof) { @@ -2804,19 +2836,19 @@ yy424: tok = cur; return; } -#line 2808 "src/parse/lex.cc" -yy426: +#line 2840 "src/parse/lex.cc" +yy430: yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); - if (yych == '\n') goto yy423; - goto yy431; -yy427: + if (yych == '\n') goto yy427; + goto yy435; +yy431: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; if (yybm[0+yych] & 64) { - goto yy427; + goto yy431; } -#line 672 "../src/parse/lex.re" +#line 674 "../src/parse/lex.re" { if (!s_to_u32_unsafe (tok, cur, cline)) { @@ -2824,37 +2856,37 @@ yy427: } goto sourceline; } -#line 2828 "src/parse/lex.cc" -yy430: +#line 2860 "src/parse/lex.cc" +yy434: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; -yy431: +yy435: if (yybm[0+yych] & 128) { - goto yy430; + goto yy434; } - if (yych <= '\n') goto yy432; - if (yych <= '"') goto yy433; - goto yy435; -yy432: + if (yych <= '\n') goto yy436; + if (yych <= '"') goto yy437; + goto yy439; +yy436: YYCURSOR = YYMARKER; - goto yy423; -yy433: + goto yy427; +yy437: ++YYCURSOR; -#line 679 "../src/parse/lex.re" +#line 681 "../src/parse/lex.re" { escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes goto sourceline; } -#line 2850 "src/parse/lex.cc" -yy435: +#line 2882 "src/parse/lex.cc" +yy439: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = (YYCTYPE)*YYCURSOR; - if (yych == '\n') goto yy432; - goto yy430; + if (yych == '\n') goto yy436; + goto yy434; } -#line 698 "../src/parse/lex.re" +#line 700 "../src/parse/lex.re" } diff --git a/re2c/src/parse/lex.re b/re2c/src/parse/lex.re index 707c72aa..053d0dc3 100644 --- a/re2c/src/parse/lex.re +++ b/re2c/src/parse/lex.re @@ -595,6 +595,7 @@ uint32_t Scanner::lex_cls_chr() tok = cur; /*!re2c * { fatal ((tok - pos) - tchar, "syntax error"); } + esc? eol { fatal ((tok - pos) - tchar, "newline in character class"); } esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } @@ -625,6 +626,7 @@ uint32_t Scanner::lex_str_chr(char quote, bool &end) tok = cur; /*!re2c * { fatal ((tok - pos) - tchar, "syntax error"); } + esc? eol { fatal ((tok - pos) - tchar, "newline in character string"); } esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } diff --git a/re2c/test/code_points_error_esc.c b/re2c/test/code_points_error_esc.c deleted file mode 100644 index 84c9cfd7..00000000 --- a/re2c/test/code_points_error_esc.c +++ /dev/null @@ -1 +0,0 @@ -re2c: error: line 2, column 6: syntax error in escape sequence diff --git a/re2c/test/code_points_error_lf.c b/re2c/test/code_points_error_lf.c deleted file mode 100644 index 5919bf6d..00000000 --- a/re2c/test/code_points_error_lf.c +++ /dev/null @@ -1 +0,0 @@ -re2c: error: line 2, column 6: syntax error diff --git a/re2c/test/cpoint_class_error_eol.c b/re2c/test/cpoint_class_error_eol.c new file mode 100644 index 00000000..c628c46c --- /dev/null +++ b/re2c/test/cpoint_class_error_eol.c @@ -0,0 +1 @@ +re2c: error: line 2, column 5: newline in character class diff --git a/re2c/test/cpoint_class_error_eol.re b/re2c/test/cpoint_class_error_eol.re new file mode 100644 index 00000000..4d362b06 --- /dev/null +++ b/re2c/test/cpoint_class_error_eol.re @@ -0,0 +1,4 @@ +/*!re2c +[abc +de] {} +*/ diff --git a/re2c/test/cpoint_class_error_esc_eol.c b/re2c/test/cpoint_class_error_esc_eol.c new file mode 100644 index 00000000..c628c46c --- /dev/null +++ b/re2c/test/cpoint_class_error_esc_eol.c @@ -0,0 +1 @@ +re2c: error: line 2, column 5: newline in character class diff --git a/re2c/test/cpoint_class_error_esc_eol.re b/re2c/test/cpoint_class_error_esc_eol.re new file mode 100644 index 00000000..facfeb0d --- /dev/null +++ b/re2c/test/cpoint_class_error_esc_eol.re @@ -0,0 +1,4 @@ +/*!re2c +[abc\ +de] {} +*/ diff --git a/re2c/test/cpoint_string_error_eol.c b/re2c/test/cpoint_string_error_eol.c new file mode 100644 index 00000000..10269571 --- /dev/null +++ b/re2c/test/cpoint_string_error_eol.c @@ -0,0 +1 @@ +re2c: error: line 2, column 6: newline in character string diff --git a/re2c/test/code_points_error_lf.re b/re2c/test/cpoint_string_error_eol.re similarity index 100% rename from re2c/test/code_points_error_lf.re rename to re2c/test/cpoint_string_error_eol.re diff --git a/re2c/test/cpoint_string_error_esc_eol.c b/re2c/test/cpoint_string_error_esc_eol.c new file mode 100644 index 00000000..10269571 --- /dev/null +++ b/re2c/test/cpoint_string_error_esc_eol.c @@ -0,0 +1 @@ +re2c: error: line 2, column 6: newline in character string diff --git a/re2c/test/code_points_error_esc.re b/re2c/test/cpoint_string_error_esc_eol.re similarity index 100% rename from re2c/test/code_points_error_esc.re rename to re2c/test/cpoint_string_error_esc_eol.re