From: helly Date: Mon, 17 Apr 2006 15:03:53 +0000 (+0000) Subject: - Update lesson 2 and split YYMARKER/YYCTXMARKER stuff in two steps X-Git-Tag: 0.13.6~371 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7217047a24371cd483ad776a8e8cb6fcc9327e40;p=re2c - Update lesson 2 and split YYMARKER/YYCTXMARKER stuff in two steps --- diff --git a/lessons/002_strip_comments/.cvsignore b/lessons/002_strip_comments/.cvsignore index 1938665b..eb177879 100755 --- a/lessons/002_strip_comments/.cvsignore +++ b/lessons/002_strip_comments/.cvsignore @@ -2,3 +2,4 @@ a.out *.temp *.diff *.o +t.c diff --git a/lessons/002_strip_comments/readme.txt b/lessons/002_strip_comments/readme.txt index 9805d20b..353d6690 100755 --- a/lessons/002_strip_comments/readme.txt +++ b/lessons/002_strip_comments/readme.txt @@ -10,4 +10,12 @@ The first scanner can be generated with: re2c -s -o t.c strip_001.s.re In the second step we will learn about YYMARKER that stores backtracking -information and YYCTXMARKER that is used for trailing contexts. +information. + + re2c -s -0 t.c strip_002.b.re + +The third step brings trailing contexts that are stored in YYCTXMARKER. We also +change to use -b instead of -s option since the scanner gets more and more +complex. + + re2c -b -0 t.c strip_002.b.re diff --git a/lessons/002_strip_comments/strip_002.s.c b/lessons/002_strip_comments/strip_002.s.c index b87912cf..be5cff40 100755 --- a/lessons/002_strip_comments/strip_002.s.c +++ b/lessons/002_strip_comments/strip_002.s.c @@ -1,13 +1,13 @@ /* Generated by re2c */ #line 1 "strip_002.s.re" /* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 */ -#line 27 "strip_002.s.re" +#line 34 "strip_002.s.re" #include #include #include -#define YYMAXFILL 4 +#define YYMAXFILL 2 #define BSIZE 128 #if BSIZE < YYMAXFILL @@ -18,13 +18,12 @@ #define YYCURSOR s.cur #define YYLIMIT s.lim #define YYMARKER s.mrk -#define YYCTXMARKER s.ctx #define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } typedef struct Scanner { FILE *fp; - char *cur, *tok, *lim, *eof, *ctx, *mrk; + char *cur, *tok, *lim, *eof, *mrk; char buffer[BSIZE]; } Scanner; @@ -46,7 +45,6 @@ int fill(Scanner *s, int len) s->cur -= cnt; s->lim -= cnt; s->mrk -= cnt; - s->ctx -= cnt; } cnt = BSIZE - cnt; if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) @@ -86,130 +84,91 @@ int scan(FILE *fp) { s.tok = s.cur; -#line 90 "" +#line 88 "" { YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= 0x0C) { - if(yych == 0x0A) goto yy5; - goto yy6; - } else { - if(yych <= 0x0D) goto yy4; - if(yych != '/') goto yy6; - } + if(yych != '/') goto yy4; ++YYCURSOR; - if((yych = *YYCURSOR) == '*') goto yy12; - if(yych == '/') goto yy14; + if((yych = *YYCURSOR) == '*') goto yy5; + if(yych == '/') goto yy7; yy3: -#line 120 "strip_002.s.re" +#line 124 "strip_002.s.re" { fputc(*s.tok, stdout); continue; } -#line 109 "" +#line 101 "" yy4: - yych = *(YYMARKER = ++YYCURSOR); - if(yych == 0x0A) goto yy11; - goto yy3; -yy5: - YYCTXMARKER = YYCURSOR + 1; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '/') goto yy7; - goto yy3; -yy6: yych = *++YYCURSOR; goto yy3; -yy7: - yych = *++YYCURSOR; - if(yych == '*') goto yy9; -yy8: - YYCURSOR = YYMARKER; - goto yy3; -yy9: - ++YYCURSOR; - YYCURSOR = YYCTXMARKER; -#line 118 "strip_002.s.re" - { echo(&s); nlcomment = 1; continue; } -#line 133 "" -yy11: - YYCTXMARKER = YYCURSOR + 1; - yych = *++YYCURSOR; - if(yych == '/') goto yy7; - goto yy8; -yy12: +yy5: ++YYCURSOR; -#line 119 "strip_002.s.re" +#line 123 "strip_002.s.re" { goto comment; } -#line 143 "" -yy14: +#line 109 "" +yy7: ++YYCURSOR; -#line 117 "strip_002.s.re" +#line 122 "strip_002.s.re" { goto cppcomment; } -#line 148 "" +#line 114 "" } -#line 121 "strip_002.s.re" +#line 125 "strip_002.s.re" comment: s.tok = s.cur; -#line 155 "" +#line 121 "" { YYCTYPE yych; if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych != '*') goto yy20; + if(yych != '*') goto yy13; ++YYCURSOR; - if((yych = *YYCURSOR) == '/') goto yy21; -yy19: -#line 126 "strip_002.s.re" + if((yych = *YYCURSOR) == '/') goto yy14; +yy12: +#line 130 "strip_002.s.re" { goto comment; } -#line 166 "" -yy20: +#line 132 "" +yy13: yych = *++YYCURSOR; - goto yy19; -yy21: + goto yy12; +yy14: ++YYCURSOR; -#line 125 "strip_002.s.re" +#line 129 "strip_002.s.re" { goto commentws; } -#line 174 "" +#line 140 "" } -#line 127 "strip_002.s.re" +#line 131 "strip_002.s.re" commentws: s.tok = s.cur; -#line 181 "" +#line 147 "" { YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= 0x0D) { - if(yych <= 0x09) { - if(yych <= 0x08) goto yy32; - goto yy31; - } else { - if(yych <= 0x0A) goto yy27; - if(yych <= 0x0C) goto yy32; - } + if(yych <= 0x0C) { + if(yych <= 0x08) goto yy23; + if(yych <= 0x09) goto yy22; + if(yych <= 0x0A) goto yy20; + goto yy23; } else { - if(yych <= ' ') { - if(yych <= 0x1F) goto yy32; - goto yy31; - } else { - if(yych == '/') goto yy29; - goto yy32; - } + if(yych <= 0x0D) goto yy18; + if(yych == ' ') goto yy22; + goto yy23; } +yy18: ++YYCURSOR; - if((yych = *YYCURSOR) == 0x0A) goto yy37; -yy26: -#line 140 "strip_002.s.re" + if((yych = *YYCURSOR) == 0x0A) goto yy25; +yy19: +#line 143 "strip_002.s.re" { goto commentws; } -#line 208 "" -yy27: - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '/') goto yy35; -yy28: -#line 132 "strip_002.s.re" +#line 168 "" +yy20: + ++YYCURSOR; +yy21: +#line 135 "strip_002.s.re" { if (!nlcomment) { @@ -218,68 +177,53 @@ yy28: nlcomment = 0; continue; } -#line 222 "" -yy29: +#line 181 "" +yy22: + yych = *++YYCURSOR; + goto yy19; +yy23: ++YYCURSOR; - if((yych = *YYCURSOR) == '*') goto yy33; -yy30: -#line 141 "strip_002.s.re" +#line 144 "strip_002.s.re" { echo(&s); nlcomment = 0; continue; } -#line 229 "" -yy31: - yych = *++YYCURSOR; - goto yy26; -yy32: - yych = *++YYCURSOR; - goto yy30; -yy33: +#line 189 "" +yy25: ++YYCURSOR; -#line 131 "strip_002.s.re" - { goto comment; } -#line 240 "" -yy35: - yych = *++YYCURSOR; - if(yych == '*') goto yy33; - YYCURSOR = YYMARKER; - goto yy28; -yy37: - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '/') goto yy35; - goto yy28; + yych = *YYCURSOR; + goto yy21; } -#line 142 "strip_002.s.re" +#line 145 "strip_002.s.re" cppcomment: s.tok = s.cur; -#line 256 "" +#line 200 "" { YYCTYPE yych; if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych == 0x0A) goto yy42; - if(yych != 0x0D) goto yy44; + if(yych == 0x0A) goto yy30; + if(yych != 0x0D) goto yy32; ++YYCURSOR; - if((yych = *YYCURSOR) == 0x0A) goto yy45; -yy41: -#line 147 "strip_002.s.re" + if((yych = *YYCURSOR) == 0x0A) goto yy33; +yy29: +#line 150 "strip_002.s.re" { goto cppcomment; } -#line 268 "" -yy42: +#line 212 "" +yy30: ++YYCURSOR; -yy43: -#line 146 "strip_002.s.re" +yy31: +#line 149 "strip_002.s.re" { echo(&s); continue; } -#line 274 "" -yy44: +#line 218 "" +yy32: yych = *++YYCURSOR; - goto yy41; -yy45: + goto yy29; +yy33: ++YYCURSOR; yych = *YYCURSOR; - goto yy43; + goto yy31; } -#line 148 "strip_002.s.re" +#line 151 "strip_002.s.re" } diff --git a/lessons/002_strip_comments/strip_002.s.re b/lessons/002_strip_comments/strip_002.s.re index 9422005f..b440a6b4 100755 --- a/lessons/002_strip_comments/strip_002.s.re +++ b/lessons/002_strip_comments/strip_002.s.re @@ -2,7 +2,6 @@ /*!ignore:re2c - complexity - . When two comments are only separated by whitespace we want to drop both. . When a comemnt is preceeded by a new line and followed by whitespace and a new line then we can drop the trailing whitespace and new line. But we cannot simply use the following two rules: @@ -12,8 +11,16 @@ new scanner. . Meanwhile our scanner gets a bit more complex and we have to add two more things. First the scanner code now uses a YYMARKER to store backtracking - information. And second we have a new rule that utilizes trailing contexts. - Therefore we also need to add YYCTXMARKER. + information. + +- backtracking information + . When the scanner has two rules that can have the same beginning but a + different ending then it needs to store the position that identifies the + common part. This is called backtracking. As mentioned above re2c expects + you to provide compiler define YYMARKER and a pointer variable. + . When shifting buffer contents as done in our fill function the marker needs + to be corrected, too. + - formatting . Until now we only used single line expression code and we always had the opening { on the same line as the rule itself. If we have multiline rule @@ -40,13 +47,12 @@ #define YYCURSOR s.cur #define YYLIMIT s.lim #define YYMARKER s.mrk -#define YYCTXMARKER s.ctx #define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } typedef struct Scanner { FILE *fp; - char *cur, *tok, *lim, *eof, *ctx, *mrk; + char *cur, *tok, *lim, *eof, *mrk; char buffer[BSIZE]; } Scanner; @@ -68,7 +74,6 @@ int fill(Scanner *s, int len) s->cur -= cnt; s->lim -= cnt; s->mrk -= cnt; - s->ctx -= cnt; } cnt = BSIZE - cnt; if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) @@ -115,7 +120,6 @@ int scan(FILE *fp) ANY = [^] ; "/" "/" { goto cppcomment; } - NL / "/""*" { echo(&s); nlcomment = 1; continue; } "/" "*" { goto comment; } ANY { fputc(*s.tok, stdout); continue; } */ @@ -128,7 +132,6 @@ comment: commentws: s.tok = s.cur; /*!re2c - NL? "/" "*" { goto comment; } NL { if (!nlcomment) { diff --git a/lessons/002_strip_comments/strip_003.b.c b/lessons/002_strip_comments/strip_003.b.c new file mode 100755 index 00000000..8de0bf30 --- /dev/null +++ b/lessons/002_strip_comments/strip_003.b.c @@ -0,0 +1,312 @@ +/* Generated by re2c */ +#line 1 "strip_003.b.re" +/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 */ +#line 29 "strip_003.b.re" + +#include +#include +#include + +#define YYMAXFILL 4 +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYCTXMARKER s.ctx +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + char *cur, *tok, *lim, *eof, *ctx, *mrk; + char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->tok = s->cur = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + s->ctx -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + int nlcomment = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; + { + +#line 91 "" + { + YYCTYPE yych; + + if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + yych = *YYCURSOR; + if(yych <= 0x0C) { + if(yych == 0x0A) goto yy5; + goto yy6; + } else { + if(yych <= 0x0D) goto yy4; + if(yych != '/') goto yy6; + } + ++YYCURSOR; + if((yych = *YYCURSOR) == '*') goto yy12; + if(yych == '/') goto yy14; +yy3: +#line 122 "strip_003.b.re" + { fputc(*s.tok, stdout); continue; } +#line 110 "" +yy4: + yych = *(YYMARKER = ++YYCURSOR); + if(yych == 0x0A) goto yy11; + goto yy3; +yy5: + YYCTXMARKER = YYCURSOR + 1; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '/') goto yy7; + goto yy3; +yy6: + yych = *++YYCURSOR; + goto yy3; +yy7: + yych = *++YYCURSOR; + if(yych == '*') goto yy9; +yy8: + YYCURSOR = YYMARKER; + goto yy3; +yy9: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 120 "strip_003.b.re" + { echo(&s); nlcomment = 1; continue; } +#line 134 "" +yy11: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + if(yych == '/') goto yy7; + goto yy8; +yy12: + ++YYCURSOR; +#line 121 "strip_003.b.re" + { goto comment; } +#line 144 "" +yy14: + ++YYCURSOR; +#line 119 "strip_003.b.re" + { goto cppcomment; } +#line 149 "" + } + } +#line 123 "strip_003.b.re" + +comment: + s.tok = s.cur; + { + +#line 158 "" + { + YYCTYPE yych; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych != '*') goto yy20; + ++YYCURSOR; + if((yych = *YYCURSOR) == '/') goto yy21; +yy19: +#line 128 "strip_003.b.re" + { goto comment; } +#line 169 "" +yy20: + yych = *++YYCURSOR; + goto yy19; +yy21: + ++YYCURSOR; +#line 127 "strip_003.b.re" + { goto commentws; } +#line 177 "" + } + } +#line 129 "strip_003.b.re" + +commentws: + s.tok = s.cur; + { + +#line 186 "" + { + YYCTYPE yych; + if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + yych = *YYCURSOR; + if(yych <= 0x0D) { + if(yych <= 0x09) { + if(yych <= 0x08) goto yy32; + goto yy31; + } else { + if(yych <= 0x0A) goto yy27; + if(yych <= 0x0C) goto yy32; + } + } else { + if(yych <= ' ') { + if(yych <= 0x1F) goto yy32; + goto yy31; + } else { + if(yych == '/') goto yy29; + goto yy32; + } + } + ++YYCURSOR; + if((yych = *YYCURSOR) == 0x0A) goto yy37; +yy26: +#line 142 "strip_003.b.re" + { goto commentws; } +#line 213 "" +yy27: + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '/') goto yy35; +yy28: +#line 134 "strip_003.b.re" + { + if (!nlcomment) + { + echo(&s); + } + nlcomment = 0; + continue; + } +#line 227 "" +yy29: + ++YYCURSOR; + if((yych = *YYCURSOR) == '*') goto yy33; +yy30: +#line 143 "strip_003.b.re" + { echo(&s); nlcomment = 0; continue; } +#line 234 "" +yy31: + yych = *++YYCURSOR; + goto yy26; +yy32: + yych = *++YYCURSOR; + goto yy30; +yy33: + ++YYCURSOR; +#line 133 "strip_003.b.re" + { goto comment; } +#line 245 "" +yy35: + yych = *++YYCURSOR; + if(yych == '*') goto yy33; + YYCURSOR = YYMARKER; + goto yy28; +yy37: + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '/') goto yy35; + goto yy28; + } + } +#line 144 "strip_003.b.re" + +cppcomment: + s.tok = s.cur; + { + +#line 263 "" + { + YYCTYPE yych; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych == 0x0A) goto yy42; + if(yych != 0x0D) goto yy44; + ++YYCURSOR; + if((yych = *YYCURSOR) == 0x0A) goto yy45; +yy41: +#line 149 "strip_003.b.re" + { goto cppcomment; } +#line 275 "" +yy42: + ++YYCURSOR; +yy43: +#line 148 "strip_003.b.re" + { echo(&s); continue; } +#line 281 "" +yy44: + yych = *++YYCURSOR; + goto yy41; +yy45: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy43; + } + } +#line 150 "strip_003.b.re" + + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/lessons/002_strip_comments/strip_003.b.re b/lessons/002_strip_comments/strip_003.b.re new file mode 100755 index 00000000..41db8f20 --- /dev/null +++ b/lessons/002_strip_comments/strip_003.b.re @@ -0,0 +1,171 @@ +/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 */ +/*!ignore:re2c + +- more complexity + . Additional to what we strip out already what about two consequtive comment + blocks? When two comments are only separated by whitespace we want to drop + both. In other words when detecting the end of a comment block we need to + check whether it is followed by only whitespace and the a new comment in + which case we continure ignoring the input. If it is followed only by white + space and a new line we strip out the new white space and new line. In any + other case we start outputting all that follows. + . The solution to the above is to use trailing contexts. + +- trailing contexts + . Re2c allows to check for a portion of input and only recognize it when it + is followed by another portion. This is called a trailing context. + . The trailing context is not part of the identified input. That means that + it follows exactly at the cursor. A consequence is that the scanner has + already read more input and on the next run you need to restore begining + of input, in our case s.tok, from the cursor, here s.cur, rather then + restoring to the beginning of the buffer. This way the scanner can reuse + the portion it has already read. + . The position of the trailing context is stored in YYCTXMARKER for which + a pointer variable needs to be provided. + . As with YYMARKER the corrsponding variable needs to be corrected if we + shift in some buffer. + +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYCTXMARKER s.ctx +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + char *cur, *tok, *lim, *eof, *ctx, *mrk; + char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->tok = s->cur = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + s->ctx -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + int nlcomment = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + WS = [\r\n\t ] ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + NL / "/""*" { echo(&s); nlcomment = 1; continue; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { goto commentws; } + ANY { goto comment; } +*/ +commentws: + s.tok = s.cur; +/*!re2c + NL? "/" "*" { goto comment; } + NL { + if (!nlcomment) + { + echo(&s); + } + nlcomment = 0; + continue; + } + WS { goto commentws; } + ANY { echo(&s); nlcomment = 0; continue; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +}