From a900fc21435595cb6dc5aa271dae693f3d0a5acf Mon Sep 17 00:00:00 2001 From: helly Date: Sat, 8 Apr 2006 19:59:51 +0000 Subject: [PATCH] - Update scanner tests from current scanner --- test/scanner.c | 1352 +++++++++++++++++++++++++++++++++++------------ test/scanner.re | 245 ++++++--- 2 files changed, 1182 insertions(+), 415 deletions(-) diff --git a/test/scanner.c b/test/scanner.c index 790bf2ae..77cd4631 100644 --- a/test/scanner.c +++ b/test/scanner.c @@ -1,12 +1,15 @@ /* Generated by re2c */ #line 1 "scanner.re" - +/* $Id$ */ #include #include #include +#include #include "scanner.h" #include "parser.h" #include "y.tab.h" +#include "globals.h" +#include "dfa.h" extern YYSTYPE yylval; @@ -24,136 +27,240 @@ extern YYSTYPE yylval; #define RETURN(i) {cur = cursor; return i;} +namespace re2c +{ -Scanner::Scanner(std::istream& i) : in(i), - bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL), - top(NULL), eof(NULL), tchar(0), tline(0), cline(1) { +Scanner::Scanner(std::istream& i, std::ostream& o) + : in(i) + , out(o) + , bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL) + , top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0) +{ ; } -char *Scanner::fill(char *cursor){ - if(!eof){ - uint cnt = tok - bot; - if(cnt){ - memcpy(bot, tok, lim - tok); - tok = bot; - ptr -= cnt; - cursor -= cnt; - pos -= cnt; - lim -= cnt; - } - if((top - lim) < BSIZE){ - char *buf = new char[(lim - bot) + BSIZE]; - memcpy(buf, tok, lim - tok); - tok = buf; - ptr = &buf[ptr - bot]; - cursor = &buf[cursor - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[BSIZE]; - delete [] bot; - bot = buf; - } - if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE){ - eof = &lim[cnt]; *eof++ = '\n'; - } - lim += cnt; - } - return cursor; +char *Scanner::fill(char *cursor) +{ + if(!eof) + { + uint cnt = tok - bot; + if(cnt) + { + memcpy(bot, tok, lim - tok); + tok = bot; + ptr -= cnt; + cursor -= cnt; + pos -= cnt; + lim -= cnt; + } + if((top - lim) < BSIZE) + { + char *buf = new char[(lim - bot) + BSIZE]; + memcpy(buf, tok, lim - tok); + tok = buf; + ptr = &buf[ptr - bot]; + cursor = &buf[cursor - bot]; + pos = &buf[pos - bot]; + lim = &buf[lim - bot]; + top = &lim[BSIZE]; + delete [] bot; + bot = buf; + } + in.read(lim, BSIZE); + if ((cnt = in.gcount()) != BSIZE ) + { + eof = &lim[cnt]; *eof++ = '\0'; + } + lim += cnt; + } + return cursor; } -#line 72 "scanner.re" +#line 95 "scanner.re" -int Scanner::echo(std::ostream &out){ +int Scanner::echo() +{ char *cursor = cur; + bool ignore_eoc = false; - // Catch EOF - if (eof && cursor == eof) + if (eof && cursor == eof) // Catch EOF + { return 0; + } tok = cursor; echo: -#line 79 "" +#line 96 "" { YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 7) YYFILL(7); + if((YYLIMIT - YYCURSOR) < 11) YYFILL(11); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy4; + case 0x00: goto yy7; + case 0x0A: goto yy5; + case '*': goto yy4; case '/': goto yy2; - default: goto yy6; + default: goto yy9; } yy2: yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case '*': goto yy7; + case '*': goto yy12; default: goto yy3; } yy3: -#line 91 "scanner.re" - { goto echo; } -#line 99 "" +#line 141 "scanner.re" + { + goto echo; + } +#line 120 "" yy4: + yych = *++YYCURSOR; + switch(yych){ + case '/': goto yy10; + default: goto yy3; + } +yy5: ++YYCURSOR; -#line 87 "scanner.re" - { if(cursor == eof) RETURN(0); - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - tok = pos = cursor; cline++; - goto echo; } -#line 107 "" -yy6: +#line 130 "scanner.re" + { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); + tok = pos = cursor; cline++; + goto echo; + } +#line 135 "" +yy7: + ++YYCURSOR; +#line 135 "scanner.re" + { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0 + if(cursor == eof) { + RETURN(0); + } + } +#line 145 "" +yy9: yych = *++YYCURSOR; goto yy3; -yy7: +yy10: + ++YYCURSOR; +#line 121 "scanner.re" + { + if (ignore_eoc) { + ignore_eoc = false; + } else { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); + } + tok = pos = cursor; + goto echo; + } +#line 161 "" +yy12: yych = *++YYCURSOR; switch(yych){ - case '!': goto yy9; - default: goto yy8; + case '!': goto yy14; + default: goto yy13; } -yy8: +yy13: YYCURSOR = YYMARKER; goto yy3; -yy9: +yy14: yych = *++YYCURSOR; switch(yych){ - case 'r': goto yy10; - default: goto yy8; + case 'm': goto yy15; + case 'r': goto yy16; + default: goto yy13; } -yy10: +yy15: yych = *++YYCURSOR; switch(yych){ - case 'e': goto yy11; - default: goto yy8; + case 'a': goto yy21; + default: goto yy13; } -yy11: +yy16: yych = *++YYCURSOR; switch(yych){ - case '2': goto yy12; - default: goto yy8; + case 'e': goto yy17; + default: goto yy13; } -yy12: +yy17: yych = *++YYCURSOR; switch(yych){ - case 'c': goto yy13; - default: goto yy8; + case '2': goto yy18; + default: goto yy13; } -yy13: +yy18: + yych = *++YYCURSOR; + switch(yych){ + case 'c': goto yy19; + default: goto yy13; + } +yy19: + ++YYCURSOR; +#line 110 "scanner.re" + { + out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); + tok = cursor; + RETURN(1); + } +#line 210 "" +yy21: + yych = *++YYCURSOR; + switch(yych){ + case 'x': goto yy22; + default: goto yy13; + } +yy22: + yych = *++YYCURSOR; + switch(yych){ + case ':': goto yy23; + default: goto yy13; + } +yy23: + yych = *++YYCURSOR; + switch(yych){ + case 'r': goto yy24; + default: goto yy13; + } +yy24: + yych = *++YYCURSOR; + switch(yych){ + case 'e': goto yy25; + default: goto yy13; + } +yy25: + yych = *++YYCURSOR; + switch(yych){ + case '2': goto yy26; + default: goto yy13; + } +yy26: + yych = *++YYCURSOR; + switch(yych){ + case 'c': goto yy27; + default: goto yy13; + } +yy27: ++YYCURSOR; -#line 84 "scanner.re" - { out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); - tok = cursor; - RETURN(1); } -#line 150 "" +#line 115 "scanner.re" + { + out << "#define YYMAXFILL " << maxFill << std::endl; + tok = pos = cursor; + ignore_eoc = true; + goto echo; + } +#line 256 "" } -#line 92 "scanner.re" +#line 144 "scanner.re" } -int Scanner::scan(){ +int Scanner::scan() +{ char *cursor = cur; uint depth; @@ -161,29 +268,39 @@ scan: tchar = cursor - pos; tline = cline; tok = cursor; + if (iscfg == 1) + { + goto config; + } + else if (iscfg == 2) + { + goto value; + } -#line 166 "" +#line 281 "" { YYCTYPE yych; unsigned int yyaccept = 0; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + if((YYLIMIT - YYCURSOR) < 5) YYFILL(5); yych = *YYCURSOR; switch(yych){ case 0x09: - case ' ': goto yy33; - case 0x0A: goto yy35; - case '"': goto yy23; - case '\'': goto yy25; + case ' ': goto yy50; + case 0x0A: goto yy52; + case 0x0D: goto yy54; + case '"': goto yy37; + case '\'': goto yy39; case '(': case ')': case ';': case '=': case '\\': - case '|': goto yy29; - case '*': goto yy21; + case '|': goto yy43; + case '*': goto yy35; case '+': - case '?': goto yy30; - case '/': goto yy19; + case '?': goto yy44; + case '.': goto yy48; + case '/': goto yy33; case 'A': case 'B': case 'C': @@ -227,7 +344,6 @@ scan: case 'o': case 'p': case 'q': - case 'r': case 's': case 't': case 'u': @@ -235,16 +351,18 @@ scan: case 'w': case 'x': case 'y': - case 'z': goto yy31; - case '[': goto yy27; - case '{': goto yy17; - default: goto yy37; + case 'z': goto yy47; + case '[': goto yy41; + case 'r': goto yy45; + case '{': goto yy31; + default: goto yy56; } -yy17: +yy31: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case '0': + case ',': goto yy97; + case '0': goto yy94; case '1': case '2': case '3': @@ -253,123 +371,152 @@ yy17: case '6': case '7': case '8': - case '9': goto yy63; - default: goto yy18; + case '9': goto yy95; + default: goto yy32; } -yy18: -#line 105 "scanner.re" +yy32: +#line 166 "scanner.re" { depth = 1; goto code; } -#line 265 "" -yy19: +#line 383 "" +yy33: ++YYCURSOR; switch((yych = *YYCURSOR)) { - case '*': goto yy61; - default: goto yy20; + case '*': goto yy92; + default: goto yy34; } -yy20: -#line 131 "scanner.re" +yy34: +#line 196 "scanner.re" { RETURN(*tok); } -#line 275 "" -yy21: +#line 393 "" +yy35: ++YYCURSOR; switch((yych = *YYCURSOR)) { - case '/': goto yy59; - default: goto yy22; + case '/': goto yy90; + default: goto yy36; } -yy22: -#line 133 "scanner.re" +yy36: +#line 198 "scanner.re" { yylval.op = *tok; RETURN(CLOSE); } -#line 286 "" -yy23: +#line 404 "" +yy37: yyaccept = 1; yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy24; - default: goto yy55; + case 0x0A: goto yy38; + default: goto yy86; } -yy24: -#line 122 "scanner.re" +yy38: +#line 183 "scanner.re" { fatal("unterminated string constant (missing \")"); } -#line 297 "" -yy25: +#line 415 "" +yy39: yyaccept = 2; yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy26; - default: goto yy50; + case 0x0A: goto yy40; + default: goto yy81; } -yy26: -#line 123 "scanner.re" +yy40: +#line 184 "scanner.re" { fatal("unterminated string constant (missing ')"); } -#line 308 "" -yy27: +#line 426 "" +yy41: yyaccept = 3; yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy28; - default: goto yy44; + case 0x0A: goto yy42; + case '^': goto yy72; + default: goto yy71; } -yy28: -#line 129 "scanner.re" +yy42: +#line 194 "scanner.re" { fatal("unterminated range (missing ])"); } -#line 319 "" -yy29: +#line 438 "" +yy43: yych = *++YYCURSOR; - goto yy20; -yy30: + goto yy34; +yy44: yych = *++YYCURSOR; - goto yy22; -yy31: + goto yy36; +yy45: ++YYCURSOR; - yych = *YYCURSOR; - goto yy42; -yy32: -#line 148 "scanner.re" + switch((yych = *YYCURSOR)) { + case 'e': goto yy62; + default: goto yy61; + } +yy46: +#line 225 "scanner.re" { cur = cursor; yylval.symbol = Symbol::find(token()); return ID; } -#line 335 "" -yy33: +#line 456 "" +yy47: + yych = *++YYCURSOR; + goto yy61; +yy48: + ++YYCURSOR; +#line 229 "scanner.re" + { cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } +#line 467 "" +yy50: ++YYCURSOR; yych = *YYCURSOR; - goto yy40; -yy34: -#line 152 "scanner.re" + goto yy59; +yy51: +#line 234 "scanner.re" { goto scan; } -#line 343 "" -yy35: +#line 475 "" +yy52: ++YYCURSOR; -#line 154 "scanner.re" +yy53: +#line 236 "scanner.re" { if(cursor == eof) RETURN(0); pos = cursor; cline++; goto scan; } -#line 351 "" -yy37: +#line 484 "" +yy54: ++YYCURSOR; -#line 159 "scanner.re" - { std::cerr << "unexpected character: " << *tok << std::endl; + switch((yych = *YYCURSOR)) { + case 0x0A: goto yy57; + default: goto yy55; + } +yy55: +#line 241 "scanner.re" + { std::ostringstream msg; + msg << "unexpected character: "; + prtChOrHex(msg, *tok); + fatal(msg.str().c_str()); goto scan; } -#line 358 "" -yy39: +#line 499 "" +yy56: + yych = *++YYCURSOR; + goto yy55; +yy57: + yych = *++YYCURSOR; + goto yy53; +yy58: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy40: +yy59: switch(yych){ case 0x09: - case ' ': goto yy39; - default: goto yy34; + case ' ': goto yy58; + default: goto yy51; } -yy41: +yy60: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy42: +yy61: switch(yych){ case '0': case '1': @@ -432,113 +579,104 @@ yy42: case 'w': case 'x': case 'y': - case 'z': goto yy41; - default: goto yy32; + case 'z': goto yy60; + default: goto yy46; } -yy43: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy44: +yy62: + yych = *++YYCURSOR; switch(yych){ - case 0x0A: goto yy45; - case '\\': goto yy46; - case ']': goto yy47; - default: goto yy43; - } -yy45: - YYCURSOR = YYMARKER; - switch(yyaccept) { - case 0: goto yy18; - case 1: goto yy24; - case 2: goto yy26; - case 3: goto yy28; + case '2': goto yy63; + default: goto yy61; } -yy46: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; +yy63: + yych = *++YYCURSOR; switch(yych){ - case 0x0A: goto yy45; - default: goto yy43; + case 'c': goto yy64; + default: goto yy61; } -yy47: - ++YYCURSOR; -#line 125 "scanner.re" - { cur = cursor; - yylval.regexp = ranToRE(token()); - return RANGE; } -#line 472 "" -yy49: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy50: +yy64: + yyaccept = 4; + yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy45; - case '\'': goto yy52; - case '\\': goto yy51; - default: goto yy49; + case ':': goto yy65; + default: goto yy61; } -yy51: +yy65: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy45; - default: goto yy49; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy67; + default: goto yy66; } -yy52: - ++YYCURSOR; -#line 118 "scanner.re" - { cur = cursor; - yylval.regexp = strToCaseInsensitiveRE(token()); - return STRING; } -#line 498 "" -yy54: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy55: - switch(yych){ - case 0x0A: goto yy45; - case '"': goto yy57; - case '\\': goto yy56; - default: goto yy54; +yy66: + YYCURSOR = YYMARKER; + switch(yyaccept) { + case 0: goto yy32; + case 1: goto yy38; + case 2: goto yy40; + case 3: goto yy42; + case 6: goto yy98; + case 5: goto yy69; + case 4: goto yy46; } -yy56: - ++YYCURSOR; +yy67: + yyaccept = 5; + YYMARKER = ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy45; - default: goto yy54; - } -yy57: - ++YYCURSOR; -#line 114 "scanner.re" - { cur = cursor; - yylval.regexp = strToRE(token()); - return STRING; } -#line 524 "" -yy59: - ++YYCURSOR; -#line 111 "scanner.re" - { tok = cursor; - RETURN(0); } -#line 530 "" -yy61: - ++YYCURSOR; -#line 108 "scanner.re" - { depth = 1; - goto comment; } -#line 536 "" -yy63: - ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - switch(yych){ - case ',': goto yy67; case '0': case '1': case '2': @@ -548,35 +686,254 @@ yy63: case '6': case '7': case '8': - case '9': goto yy63; - case '}': goto yy65; - default: goto yy45; - } -yy65: - ++YYCURSOR; -#line 136 "scanner.re" - { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = atoi((char *)tok+1); - RETURN(CLOSESIZE); } -#line 562 "" -yy67: + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy67; + case ':': goto yy65; + default: goto yy69; + } +yy69: +#line 218 "scanner.re" + { cur = cursor; + tok+= 5; /* skip "re2c:" */ + iscfg = 1; + yylval.str = new Str(token()); + return CONFIG; + } +#line 755 "" +yy70: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy71: + switch(yych){ + case 0x0A: goto yy66; + case '\\': goto yy74; + case ']': goto yy75; + default: goto yy70; + } +yy72: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy66; + case '\\': goto yy77; + case ']': goto yy78; + default: goto yy72; + } +yy74: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy66; + default: goto yy70; + } +yy75: + ++YYCURSOR; +#line 190 "scanner.re" + { cur = cursor; + yylval.regexp = ranToRE(token()); + return RANGE; } +#line 791 "" +yy77: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy66; + default: goto yy72; + } +yy78: + ++YYCURSOR; +#line 186 "scanner.re" + { cur = cursor; + yylval.regexp = invToRE(token()); + return RANGE; } +#line 806 "" +yy80: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy81: + switch(yych){ + case 0x0A: goto yy66; + case '\'': goto yy83; + case '\\': goto yy82; + default: goto yy80; + } +yy82: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy66; + default: goto yy80; + } +yy83: + ++YYCURSOR; +#line 179 "scanner.re" + { cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(token()); + return STRING; } +#line 832 "" +yy85: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy86: + switch(yych){ + case 0x0A: goto yy66; + case '"': goto yy88; + case '\\': goto yy87; + default: goto yy85; + } +yy87: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy66; + default: goto yy85; + } +yy88: + ++YYCURSOR; +#line 175 "scanner.re" + { cur = cursor; + yylval.regexp = strToRE(token()); + return STRING; } +#line 858 "" +yy90: + ++YYCURSOR; +#line 172 "scanner.re" + { tok = cursor; + RETURN(0); } +#line 864 "" +yy92: + ++YYCURSOR; +#line 169 "scanner.re" + { depth = 1; + goto comment; } +#line 870 "" +yy94: yych = *++YYCURSOR; switch(yych){ - case '}': goto yy68; - default: goto yy71; + case ',': goto yy108; + default: goto yy96; } -yy68: +yy95: ++YYCURSOR; -#line 144 "scanner.re" + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; +yy96: + switch(yych){ + case ',': goto yy101; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy95; + case '}': goto yy99; + default: goto yy66; + } +yy97: + ++YYCURSOR; +yy98: +#line 216 "scanner.re" + { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } +#line 902 "" +yy99: + ++YYCURSOR; +#line 204 "scanner.re" + { yylval.extop.minsize = atoi((char *)tok+1); + yylval.extop.maxsize = atoi((char *)tok+1); + RETURN(CLOSESIZE); } +#line 909 "" +yy101: + yyaccept = 6; + yych = *(YYMARKER = ++YYCURSOR); + switch(yych){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy104; + case '}': goto yy102; + default: goto yy98; + } +yy102: + ++YYCURSOR; +#line 212 "scanner.re" { yylval.extop.minsize = atoi((char *)tok+1); yylval.extop.maxsize = -1; RETURN(CLOSESIZE); } -#line 575 "" -yy70: +#line 933 "" +yy104: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy71: switch(yych){ case '0': case '1': @@ -587,184 +944,477 @@ yy71: case '6': case '7': case '8': - case '9': goto yy70; - case '}': goto yy72; - default: goto yy45; + case '9': goto yy104; + case '}': goto yy106; + default: goto yy66; } -yy72: +yy106: ++YYCURSOR; -#line 140 "scanner.re" +#line 208 "scanner.re" { yylval.extop.minsize = atoi((char *)tok+1); yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1)); RETURN(CLOSESIZE); } -#line 601 "" +#line 958 "" +yy108: + yyaccept = 6; + yych = *(YYMARKER = ++YYCURSOR); + switch(yych){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy104; + case '}': goto yy109; + default: goto yy98; + } +yy109: + ++YYCURSOR; +#line 201 "scanner.re" + { yylval.op = '*'; + RETURN(CLOSE); } +#line 981 "" } -#line 162 "scanner.re" +#line 247 "scanner.re" code: -#line 608 "" +#line 988 "" { YYCTYPE yych; if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy80; - case '"': goto yy84; - case '\'': goto yy85; - case '{': goto yy78; - case '}': goto yy76; - default: goto yy82; + case 0x0A: goto yy117; + case '"': goto yy121; + case '\'': goto yy122; + case '{': goto yy115; + case '}': goto yy113; + default: goto yy119; } -yy76: +yy113: ++YYCURSOR; -#line 166 "scanner.re" +#line 251 "scanner.re" { if(--depth == 0){ cur = cursor; yylval.token = new Token(token(), tline); return CODE; } goto code; } -#line 630 "" -yy78: +#line 1010 "" +yy115: ++YYCURSOR; -#line 172 "scanner.re" +#line 257 "scanner.re" { ++depth; goto code; } -#line 636 "" -yy80: +#line 1016 "" +yy117: ++YYCURSOR; -#line 174 "scanner.re" +#line 259 "scanner.re" { if(cursor == eof) fatal("missing '}'"); pos = cursor; cline++; goto code; } -#line 644 "" -yy82: +#line 1024 "" +yy119: ++YYCURSOR; -yy83: -#line 178 "scanner.re" +yy120: +#line 263 "scanner.re" { goto code; } -#line 650 "" -yy84: +#line 1030 "" +yy121: yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy83; - default: goto yy91; + case 0x0A: goto yy120; + default: goto yy128; } -yy85: +yy122: yych = *(YYMARKER = ++YYCURSOR); switch(yych){ - case 0x0A: goto yy83; - default: goto yy87; + case 0x0A: goto yy120; + default: goto yy124; } -yy86: +yy123: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy87: +yy124: switch(yych){ - case 0x0A: goto yy88; - case '\'': goto yy82; - case '\\': goto yy89; - default: goto yy86; + case 0x0A: goto yy125; + case '\'': goto yy119; + case '\\': goto yy126; + default: goto yy123; } -yy88: +yy125: YYCURSOR = YYMARKER; - goto yy83; -yy89: + goto yy120; +yy126: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy88; - default: goto yy86; + case 0x0A: goto yy125; + default: goto yy123; } -yy90: +yy127: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy91: +yy128: switch(yych){ - case 0x0A: goto yy88; - case '"': goto yy82; - case '\\': goto yy92; - default: goto yy90; + case 0x0A: goto yy125; + case '"': goto yy119; + case '\\': goto yy129; + default: goto yy127; } -yy92: +yy129: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy88; - default: goto yy90; + case 0x0A: goto yy125; + default: goto yy127; } } -#line 179 "scanner.re" +#line 264 "scanner.re" comment: -#line 710 "" +#line 1090 "" { YYCTYPE yych; if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; switch(yych){ - case 0x0A: goto yy98; - case '*': goto yy95; - case '/': goto yy97; - default: goto yy100; + case 0x0A: goto yy135; + case '*': goto yy132; + case '/': goto yy134; + default: goto yy137; } -yy95: +yy132: ++YYCURSOR; switch((yych = *YYCURSOR)) { - case '/': goto yy103; - default: goto yy96; + case '/': goto yy140; + default: goto yy133; } -yy96: -#line 193 "scanner.re" - { goto comment; } -#line 730 "" -yy97: +yy133: +#line 279 "scanner.re" + { if(cursor == eof) RETURN(0); + goto comment; } +#line 1111 "" +yy134: yych = *++YYCURSOR; switch(yych){ - case '*': goto yy101; - default: goto yy96; + case '*': goto yy138; + default: goto yy133; } -yy98: +yy135: ++YYCURSOR; -#line 189 "scanner.re" +#line 275 "scanner.re" { if(cursor == eof) RETURN(0); tok = pos = cursor; cline++; goto comment; } -#line 744 "" -yy100: +#line 1125 "" +yy137: yych = *++YYCURSOR; - goto yy96; -yy101: + goto yy133; +yy138: ++YYCURSOR; -#line 187 "scanner.re" +#line 272 "scanner.re" { ++depth; + fatal("ambiguous /* found"); goto comment; } -#line 753 "" -yy103: +#line 1135 "" +yy140: ++YYCURSOR; -#line 183 "scanner.re" +#line 268 "scanner.re" { if(--depth == 0) goto scan; else goto comment; } -#line 761 "" +#line 1143 "" } -#line 194 "scanner.re" +#line 281 "scanner.re" + + +config: + +#line 1150 "" +{ + YYCTYPE yych; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case ' ': goto yy144; + case '=': goto yy146; + default: goto yy148; + } +yy144: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy153; +yy145: +#line 285 "scanner.re" + { goto config; } +#line 1168 "" +yy146: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy151; +yy147: +#line 286 "scanner.re" + { iscfg = 2; + cur = cursor; + RETURN('='); + } +#line 1179 "" +yy148: + ++YYCURSOR; +#line 290 "scanner.re" + { fatal("missing '='"); } +#line 1184 "" +yy150: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy151: + switch(yych){ + case 0x09: + case ' ': goto yy150; + default: goto yy147; + } +yy152: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy153: + switch(yych){ + case 0x09: + case ' ': goto yy152; + default: goto yy145; + } +} +#line 291 "scanner.re" + + +value: + +#line 1211 "" +{ + YYCTYPE yych; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0A: + case 0x0D: + case ' ': + case ';': goto yy156; + case '"': goto yy164; + case '\'': goto yy166; + case '-': goto yy159; + case '0': goto yy157; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy160; + default: goto yy162; + } +yy156: +#line 300 "scanner.re" + { cur = cursor; + yylval.str = new Str(token()); + iscfg = 0; + return VALUE; + } +#line 1244 "" +yy157: + ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case 0x09: + case 0x0A: + case 0x0D: + case ' ': + case ';': goto yy158; + default: goto yy162; + } +yy158: +#line 295 "scanner.re" + { cur = cursor; + yylval.number = atoi(token().to_string().c_str()); + iscfg = 0; + return NUMBER; + } +#line 1262 "" +yy159: + yych = *++YYCURSOR; + switch(yych){ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy160; + default: goto yy163; + } +yy160: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0A: + case 0x0D: + case ' ': + case ';': goto yy158; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy160; + default: goto yy162; + } +yy162: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy163: + switch(yych){ + case 0x09: + case 0x0A: + case 0x0D: + case ' ': + case ';': goto yy156; + default: goto yy162; + } +yy164: + YYMARKER = ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0D: + case ' ': + case ';': goto yy174; + case 0x0A: goto yy156; + case '"': goto yy162; + case '\\': goto yy176; + default: goto yy164; + } +yy166: + YYMARKER = ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0D: + case ' ': + case ';': goto yy168; + case 0x0A: goto yy156; + case '\'': goto yy162; + case '\\': goto yy171; + default: goto yy166; + } +yy168: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy170; + case '\'': goto yy172; + case '\\': goto yy173; + default: goto yy168; + } +yy170: + YYCURSOR = YYMARKER; + goto yy156; +yy171: + YYMARKER = ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0D: + case ' ': + case ';': goto yy168; + case 0x0A: goto yy156; + default: goto yy166; + } +yy172: + yych = *++YYCURSOR; + goto yy156; +yy173: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy170; + default: goto yy168; + } +yy174: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy170; + case '"': goto yy172; + case '\\': goto yy177; + default: goto yy174; + } +yy176: + YYMARKER = ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x09: + case 0x0D: + case ' ': + case ';': goto yy174; + case 0x0A: goto yy156; + default: goto yy164; + } +yy177: + ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch(yych){ + case 0x0A: goto yy170; + default: goto yy174; + } +} +#line 305 "scanner.re" } -void Scanner::fatal(char *msg){ - std::cerr << "line " << tline << ", column " << (tchar + 1) << ": " - << msg << std::endl; - exit(1); +void Scanner::fatal(uint ofs, const char *msg) const +{ + out.flush(); + std::cerr << "re2c: error: " + << "line " << tline << ", column " << (tchar + ofs + 1) << ": " + << msg << std::endl; + exit(1); } + +} // end namespace re2c + diff --git a/test/scanner.re b/test/scanner.re index 93ca0128..aaf376ae 100644 --- a/test/scanner.re +++ b/test/scanner.re @@ -1,10 +1,13 @@ - +/* $Id$ */ #include #include #include +#include #include "scanner.h" #include "parser.h" #include "y.tab.h" +#include "globals.h" +#include "dfa.h" extern YYSTYPE yylval; @@ -22,78 +25,128 @@ extern YYSTYPE yylval; #define RETURN(i) {cur = cursor; return i;} +namespace re2c +{ -Scanner::Scanner(std::istream& i) : in(i), - bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL), - top(NULL), eof(NULL), tchar(0), tline(0), cline(1) { +Scanner::Scanner(std::istream& i, std::ostream& o) + : in(i) + , out(o) + , bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL) + , top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0) +{ ; } -char *Scanner::fill(char *cursor){ - if(!eof){ - uint cnt = tok - bot; - if(cnt){ - memcpy(bot, tok, lim - tok); - tok = bot; - ptr -= cnt; - cursor -= cnt; - pos -= cnt; - lim -= cnt; - } - if((top - lim) < BSIZE){ - char *buf = new char[(lim - bot) + BSIZE]; - memcpy(buf, tok, lim - tok); - tok = buf; - ptr = &buf[ptr - bot]; - cursor = &buf[cursor - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[BSIZE]; - delete [] bot; - bot = buf; - } - if((cnt = in.rdbuf()->sgetn((char*) lim, BSIZE)) != BSIZE){ - eof = &lim[cnt]; *eof++ = '\n'; +char *Scanner::fill(char *cursor) +{ + if(!eof) + { + uint cnt = tok - bot; + if(cnt) + { + memcpy(bot, tok, lim - tok); + tok = bot; + ptr -= cnt; + cursor -= cnt; + pos -= cnt; + lim -= cnt; + } + if((top - lim) < BSIZE) + { + char *buf = new char[(lim - bot) + BSIZE]; + memcpy(buf, tok, lim - tok); + tok = buf; + ptr = &buf[ptr - bot]; + cursor = &buf[cursor - bot]; + pos = &buf[pos - bot]; + lim = &buf[lim - bot]; + top = &lim[BSIZE]; + delete [] bot; + bot = buf; + } + in.read(lim, BSIZE); + if ((cnt = in.gcount()) != BSIZE ) + { + eof = &lim[cnt]; *eof++ = '\0'; + } + lim += cnt; } - lim += cnt; - } - return cursor; + return cursor; } /*!re2c -any = [\000-\377]; -dot = any \ [\n]; -esc = dot \ [\\]; -cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ; -dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\""; -sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ; -letter = [a-zA-Z]; -digit = [0-9]; +zero = "\000"; +any = [\000-\377]; +dot = any \ [\n]; +esc = dot \ [\\]; +istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ; +cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ; +dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\""; +sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ; +letter = [a-zA-Z]; +digit = [0-9]; +number = "0" | ("-"? [1-9] digit*); +name = letter (letter|digit)*; +cname = ":" letter (letter|digit|"_")*; +space = [ \t]; +eol = ("\r\n" | "\n"); +config = "re2c" cname+; +value = [^\r\n; \t]* | dstring | sstring; */ -int Scanner::echo(std::ostream &out){ +int Scanner::echo() +{ char *cursor = cur; + bool ignore_eoc = false; - // Catch EOF - if (eof && cursor == eof) + if (eof && cursor == eof) // Catch EOF + { return 0; + } tok = cursor; echo: /*!re2c - "/*!re2c" { out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); - tok = cursor; - RETURN(1); } - "\n" { if(cursor == eof) RETURN(0); - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - tok = pos = cursor; cline++; - goto echo; } - any { goto echo; } + "/*!re2c" { + out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); + tok = cursor; + RETURN(1); + } + "/*!max:re2c" { + out << "#define YYMAXFILL " << maxFill << std::endl; + tok = pos = cursor; + ignore_eoc = true; + goto echo; + } + "*" "/" { + if (ignore_eoc) { + ignore_eoc = false; + } else { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); + } + tok = pos = cursor; + goto echo; + } + "\n" { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); + tok = pos = cursor; cline++; + goto echo; + } + zero { + out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0 + if(cursor == eof) { + RETURN(0); + } + } + any { + goto echo; + } */ } -int Scanner::scan(){ +int Scanner::scan() +{ char *cursor = cur; uint depth; @@ -101,6 +154,14 @@ scan: tchar = cursor - pos; tline = cline; tok = cursor; + if (iscfg == 1) + { + goto config; + } + else if (iscfg == 2) + { + goto value; + } /*!re2c "{" { depth = 1; goto code; @@ -122,6 +183,10 @@ scan: "\"" { fatal("unterminated string constant (missing \")"); } "'" { fatal("unterminated string constant (missing ')"); } + istring { cur = cursor; + yylval.regexp = invToRE(token()); + return RANGE; } + cstring { cur = cursor; yylval.regexp = ranToRE(token()); return RANGE; } @@ -133,6 +198,9 @@ scan: [*+?] { yylval.op = *tok; RETURN(CLOSE); } + "{0,}" { yylval.op = '*'; + RETURN(CLOSE); } + "{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1); yylval.extop.maxsize = atoi((char *)tok+1); RETURN(CLOSESIZE); } @@ -145,18 +213,35 @@ scan: yylval.extop.maxsize = -1; RETURN(CLOSESIZE); } - letter (letter|digit)* { cur = cursor; + "{" [0-9]* "," { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } + + config { cur = cursor; + tok+= 5; /* skip "re2c:" */ + iscfg = 1; + yylval.str = new Str(token()); + return CONFIG; + } + + name { cur = cursor; yylval.symbol = Symbol::find(token()); return ID; } - [ \t]+ { goto scan; } + "." { cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } + + space+ { goto scan; } - "\n" { if(cursor == eof) RETURN(0); + eol { if(cursor == eof) RETURN(0); pos = cursor; cline++; goto scan; } - any { std::cerr << "unexpected character: " << *tok << std::endl; + any { std::ostringstream msg; + msg << "unexpected character: "; + prtChOrHex(msg, *tok); + fatal(msg.str().c_str()); goto scan; } */ @@ -180,22 +265,54 @@ code: comment: /*!re2c - "*/" { if(--depth == 0) + "*/" { if(--depth == 0) goto scan; else goto comment; } - "/*" { ++depth; + "/*" { ++depth; + fatal("ambiguous /* found"); goto comment; } - "\n" { if(cursor == eof) RETURN(0); + "\n" { if(cursor == eof) RETURN(0); tok = pos = cursor; cline++; goto comment; } - any { goto comment; } + any { if(cursor == eof) RETURN(0); + goto comment; } +*/ + +config: +/*!re2c + space+ { goto config; } + "=" space* { iscfg = 2; + cur = cursor; + RETURN('='); + } + any { fatal("missing '='"); } +*/ + +value: +/*!re2c + number { cur = cursor; + yylval.number = atoi(token().to_string().c_str()); + iscfg = 0; + return NUMBER; + } + value { cur = cursor; + yylval.str = new Str(token()); + iscfg = 0; + return VALUE; + } */ } -void Scanner::fatal(char *msg){ - std::cerr << "line " << tline << ", column " << (tchar + 1) << ": " - << msg << std::endl; - exit(1); +void Scanner::fatal(uint ofs, const char *msg) const +{ + out.flush(); + std::cerr << "re2c: error: " + << "line " << tline << ", column " << (tchar + ofs + 1) << ": " + << msg << std::endl; + exit(1); } + +} // end namespace re2c + -- 2.40.0