From 35453721e670b66d8e6caa65660a39173f2b2667 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sun, 9 Oct 2005 00:50:31 +0000 Subject: [PATCH] * re2c: Start syncing back up to re2c HEAD. This gets us up to 2004-03-29. svn path=/trunk/yasm/; revision=1270 --- tools/re2c/actions.c | 36 ++++ tools/re2c/code.c | 7 +- tools/re2c/examples/repeater.re | 35 ++++ tools/re2c/re.h | 26 ++- tools/re2c/re2c-parser.y | 8 +- tools/re2c/scanner.c | 289 +++++++++++++++++++------------- tools/re2c/scanner.re | 16 ++ 7 files changed, 291 insertions(+), 126 deletions(-) create mode 100644 tools/re2c/examples/repeater.re diff --git a/tools/re2c/actions.c b/tools/re2c/actions.c index aca8fa4b..f78726a8 100644 --- a/tools/re2c/actions.c +++ b/tools/re2c/actions.c @@ -129,6 +129,16 @@ RegExp_calcSize(RegExp *re, Char *rep) RegExp_calcSize(re->d.exp, rep); re->size = re->d.exp->size + 1; break; + case CLOSEVOP: + RegExp_calcSize(re->d.CloseVOp.exp, rep); + + if (re->d.CloseVOp.max >= 0) + re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) + + ((1 + re->d.CloseVOp.exp->size) * + (re->d.CloseVOp.max - re->d.CloseVOp.min)); + else + re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) + 1; + break; } } @@ -171,6 +181,9 @@ AltOp_compile(RegExp *re, Char *rep, Ins *i){ void RegExp_compile(RegExp *re, Char *rep, Ins *i) { + Ins *jumppoint; + int st = 0; + switch (re->type) { case NULLOP: break; @@ -200,6 +213,26 @@ RegExp_compile(RegExp *re, Char *rep, Ins *i) i->i.tag = FORK; i->i.link = i - re->d.exp->size; break; + case CLOSEVOP: + jumppoint = i + ((1 + re->d.CloseVOp.exp->size) * + (re->d.CloseVOp.max - re->d.CloseVOp.min)); + for(st = re->d.CloseVOp.min; st < re->d.CloseVOp.max; st++) { + i->i.tag = FORK; + i->i.link = jumppoint; + i+=1; + RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]); + i += re->d.CloseVOp.exp->size; + } + for(st = 0; st < re->d.CloseVOp.min; st++) { + RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]); + i += re->d.CloseVOp.exp->size; + if(re->d.CloseVOp.max < 0 && st == 0) { + i->i.tag = FORK; + i->i.link = i - re->d.CloseVOp.exp->size; + i++; + } + } + break; } } @@ -257,6 +290,9 @@ RegExp_split(RegExp *re, CharSet *s) case CLOSEOP: RegExp_split(re->d.exp, s); break; + case CLOSEVOP: + RegExp_split(re->d.CloseVOp.exp, s); + break; } } diff --git a/tools/re2c/code.c b/tools/re2c/code.c index c4192f27..36d02854 100644 --- a/tools/re2c/code.c +++ b/tools/re2c/code.c @@ -273,6 +273,7 @@ static void doLinear(FILE *o, unsigned int i, Span *s, unsigned int n, while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){ if(s[1].to == next && n == 3){ indent(o, i); genIf(o, "!=", s[0].ub); genGoTo(o, bg); + indent(o, i); genGoTo(o, next); return; } else { indent(o, i); genIf(o, "==", s[0].ub); genGoTo(o, s[1].to); @@ -280,18 +281,18 @@ static void doLinear(FILE *o, unsigned int i, Span *s, unsigned int n, n -= 2; s += 2; } if(n == 1){ - if(bg != next){ - indent(o, i); genGoTo(o, s[0].to); - } + indent(o, i); genGoTo(o, s[0].to); return; } else if(n == 2 && bg == next){ indent(o, i); genIf(o, ">=", s[0].ub); genGoTo(o, s[1].to); + indent(o, i); genGoTo(o, next); return; } else { indent(o, i); genIf(o, "<=", s[0].ub - 1); genGoTo(o, bg); n -= 1; s += 1; } } + indent(o, i); genGoTo(o, next); } void diff --git a/tools/re2c/examples/repeater.re b/tools/re2c/examples/repeater.re new file mode 100644 index 00000000..c8e20557 --- /dev/null +++ b/tools/re2c/examples/repeater.re @@ -0,0 +1,35 @@ +#include +#include +#include + +#define RET(n) printf("%d\n", n); return n + +int scan(char *s, int l){ +char *p = s; +char *q; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l) +#define YYMARKER q +#define YYFILL(n) +/*!re2c + "a"{1}"\n" {RET(1);} + "a"{2,3}"\n" {RET(2);} + "a"{4,}"\n" {RET(3);} + [^aq]|"\n" {RET(0);} +*/ +} + +#define do_scan(str) scan(str, strlen(str)) + +main() +{ + do_scan("a\n"); + do_scan("aa\n"); + do_scan("aaa\n"); + do_scan("aaaa\n"); + do_scan("q"); + do_scan("a"); + do_scan("\n"); + do_scan("0"); +} diff --git a/tools/re2c/re.h b/tools/re2c/re.h index 99052add..a07d567b 100644 --- a/tools/re2c/re.h +++ b/tools/re2c/re.h @@ -5,6 +5,12 @@ #include "tools/re2c/token.h" #include "tools/re2c/ins.h" +typedef struct extop { + char op; + int minsize; + int maxsize; +} ExtOp; + typedef struct CharPtn { unsigned int card; struct CharPtn *fix; @@ -67,7 +73,8 @@ typedef enum { RULEOP, ALTOP, CATOP, - CLOSEOP + CLOSEOP, + CLOSEVOP } RegExpType; typedef struct RegExp { @@ -91,6 +98,12 @@ typedef struct RegExp { } AltCatOp; /* for CloseOp */ struct RegExp *exp; + /* for CloseVOp*/ + struct { + struct RegExp *exp; + int min; + int max; + } CloseVOp; } d; } RegExp; @@ -154,6 +167,17 @@ RegExp_new_CloseOp(RegExp *e) return r; } +static RegExp * +RegExp_new_CloseVOp(RegExp *e, int lb, int ub) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CLOSEVOP; + r->d.CloseVOp.exp = e; + r->d.CloseVOp.min = lb; + r->d.CloseVOp.max = ub; + return r; +} + extern void genCode(FILE *, RegExp*); extern RegExp *mkDiff(RegExp*, RegExp*); extern RegExp *strToRE(SubStr); diff --git a/tools/re2c/re2c-parser.y b/tools/re2c/re2c-parser.y index 96ab6384..1aed95fc 100644 --- a/tools/re2c/re2c-parser.y +++ b/tools/re2c/re2c-parser.y @@ -23,12 +23,14 @@ static Scanner *in; RegExp *regexp; Token *token; char op; + ExtOp extop; } -%token CLOSE ID CODE RANGE STRING +%token CLOSESIZE CLOSE ID CODE RANGE STRING %type CLOSE %type close +%type CLOSESIZE %type ID %type CODE %type RANGE STRING @@ -97,6 +99,10 @@ factor : primary break; } } + | primary CLOSESIZE + { + $$ = RegExp_new_CloseVOp($1, $2.minsize, $2.maxsize); + } ; close : CLOSE diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c index efb5f460..e94cc40a 100644 --- a/tools/re2c/scanner.c +++ b/tools/re2c/scanner.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.9.1-C on Thu Aug 14 20:37:33 2003 +/* Generated by re2c 0.9.1-C on Sat Oct 8 17:52:50 2005 */ #line 1 "scanner.re" #include @@ -8,6 +8,10 @@ #include "tools/re2c/globals.h" #include "re2c-parser.h" +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + #define BSIZE 8192 #define YYCTYPE unsigned char @@ -64,7 +68,7 @@ fill(Scanner *s, unsigned char *cursor) return cursor; } -#line 75 "scanner.re" +#line 77 "scanner.re" int @@ -79,7 +83,7 @@ Scanner_echo(Scanner *s, FILE *out) s->tok = cursor; echo: -#line 85 "scanner.c" +#line 87 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; @@ -94,17 +98,17 @@ yy0: yych = *(YYMARKER = ++YYCURSOR); if(yych == '*') goto yy7; yy3: -#line 96 "scanner.re" +#line 98 "scanner.re" { goto echo; } -#line 102 "scanner.c" +#line 104 "scanner.c" yy4: yych = *++YYCURSOR; -#line 92 "scanner.re" +#line 94 "scanner.re" { if(cursor == s->eof) RETURN(0); fwrite(s->tok, 1, cursor - s->tok, out); s->tok = s->pos = cursor; s->cline++; oline++; goto echo; } -#line 110 "scanner.c" +#line 112 "scanner.c" yy6: yych = *++YYCURSOR; goto yy3; yy7: yych = *++YYCURSOR; @@ -123,13 +127,13 @@ yy9: yych = *++YYCURSOR; if(yych != 'c') goto yy8; yych = *++YYCURSOR; -#line 89 "scanner.re" +#line 91 "scanner.re" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out); s->tok = cursor; RETURN(1); } -#line 133 "scanner.c" +#line 135 "scanner.c" } -#line 97 "scanner.re" +#line 99 "scanner.re" } @@ -145,7 +149,7 @@ scan: s->tline = s->cline; s->tok = cursor; -#line 151 "scanner.c" +#line 153 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; @@ -199,40 +203,43 @@ yy15: } } } -yy17: yych = *++YYCURSOR; - -#line 112 "scanner.re" +yy17: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych <= '/') goto yy18; + if(yych <= '9') goto yy56; +yy18: +#line 114 "scanner.re" { depth = 1; goto code; } -#line 211 "scanner.c" +#line 216 "scanner.c" yy19: yych = *++YYCURSOR; if(yych == '*') goto yy54; yy20: -#line 131 "scanner.re" +#line 133 "scanner.re" { RETURN(*s->tok); } -#line 217 "scanner.c" +#line 222 "scanner.c" yy21: yych = *++YYCURSOR; if(yych == '/') goto yy52; yy22: -#line 133 "scanner.re" +#line 135 "scanner.re" { yylval.op = *s->tok; RETURN(CLOSE); } -#line 224 "scanner.c" -yy23: yyaccept = 0; +#line 229 "scanner.c" +yy23: yyaccept = 1; yych = *(YYMARKER = ++YYCURSOR); if(yych != '\n') goto yy48; yy24: -#line 124 "scanner.re" +#line 126 "scanner.re" { Scanner_fatal(s, "bad string"); } -#line 231 "scanner.c" -yy25: yyaccept = 1; +#line 236 "scanner.c" +yy25: yyaccept = 2; yych = *(YYMARKER = ++YYCURSOR); if(yych != '\n') goto yy42; yy26: -#line 129 "scanner.re" +#line 131 "scanner.re" { Scanner_fatal(s, "bad character constant"); } -#line 238 "scanner.c" +#line 243 "scanner.c" yy27: yych = *++YYCURSOR; goto yy20; yy28: yych = *++YYCURSOR; @@ -240,34 +247,34 @@ yy28: yych = *++YYCURSOR; yy29: yych = *++YYCURSOR; goto yy40; yy30: -#line 136 "scanner.re" +#line 150 "scanner.re" { SubStr substr; s->cur = cursor; substr = Scanner_token(s); yylval.symbol = Symbol_find(&substr); return ID; } -#line 252 "scanner.c" +#line 257 "scanner.c" yy31: yych = *++YYCURSOR; goto yy38; yy32: -#line 142 "scanner.re" +#line 156 "scanner.re" { goto scan; } -#line 258 "scanner.c" +#line 263 "scanner.c" yy33: yych = *++YYCURSOR; -#line 144 "scanner.re" +#line 158 "scanner.re" { if(cursor == s->eof) RETURN(0); s->pos = cursor; s->cline++; goto scan; } -#line 266 "scanner.c" +#line 271 "scanner.c" yy35: yych = *++YYCURSOR; -#line 149 "scanner.re" +#line 163 "scanner.re" { fprintf(stderr, "unexpected character: '%c'\n", *s->tok); goto scan; } -#line 273 "scanner.c" +#line 278 "scanner.c" yy37: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; @@ -299,8 +306,9 @@ yy42: if(yych <= '['){ } yy43: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy24; - case 1: goto yy26; + case 0: goto yy18; + case 1: goto yy24; + case 2: goto yy26; } yy44: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); @@ -309,11 +317,11 @@ yy44: ++YYCURSOR; goto yy41; yy45: yych = *++YYCURSOR; -#line 126 "scanner.re" +#line 128 "scanner.re" { s->cur = cursor; yylval.regexp = ranToRE(Scanner_token(s)); return RANGE; } -#line 319 "scanner.c" +#line 325 "scanner.c" yy47: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; @@ -331,184 +339,223 @@ yy48: if(yych <= '!'){ goto yy47; yy50: yych = *++YYCURSOR; -#line 121 "scanner.re" +#line 123 "scanner.re" { s->cur = cursor; yylval.regexp = strToRE(Scanner_token(s)); return STRING; } -#line 341 "scanner.c" +#line 347 "scanner.c" yy52: yych = *++YYCURSOR; -#line 118 "scanner.re" +#line 120 "scanner.re" { s->tok = cursor; RETURN(0); } -#line 347 "scanner.c" +#line 353 "scanner.c" yy54: yych = *++YYCURSOR; -#line 115 "scanner.re" +#line 117 "scanner.re" { depth = 1; goto comment; } -#line 353 "scanner.c" +#line 359 "scanner.c" +yy56: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych <= '/'){ + if(yych == ',') goto yy60; + goto yy43; + } else { + if(yych <= '9') goto yy56; + if(yych != '}') goto yy43; + } + yych = *++YYCURSOR; + +#line 138 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = atoi((char *)s->tok+1); + RETURN(CLOSESIZE); } +#line 376 "scanner.c" +yy60: yych = *++YYCURSOR; + if(yych != '}') goto yy64; + yych = *++YYCURSOR; + +#line 146 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = -1; + RETURN(CLOSESIZE); } +#line 385 "scanner.c" +yy63: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy64: if(yych <= '/') goto yy43; + if(yych <= '9') goto yy63; + if(yych != '}') goto yy43; + yych = *++YYCURSOR; + +#line 142 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); + RETURN(CLOSESIZE); } +#line 398 "scanner.c" } -#line 152 "scanner.re" +#line 166 "scanner.re" code: -#line 360 "scanner.c" +#line 405 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; - goto yy56; + goto yy67; ++YYCURSOR; -yy56: +yy67: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= '&'){ if(yych <= '\n'){ - if(yych <= '\t') goto yy64; - goto yy62; + if(yych <= '\t') goto yy75; + goto yy73; } else { - if(yych == '"') goto yy66; - goto yy64; + if(yych == '"') goto yy77; + goto yy75; } } else { if(yych <= '{'){ - if(yych <= '\'') goto yy67; - if(yych <= 'z') goto yy64; - goto yy60; + if(yych <= '\'') goto yy78; + if(yych <= 'z') goto yy75; + goto yy71; } else { - if(yych != '}') goto yy64; + if(yych != '}') goto yy75; } } yych = *++YYCURSOR; -#line 156 "scanner.re" +#line 170 "scanner.re" { if(--depth == 0){ s->cur = cursor; yylval.token = Token_new(Scanner_token(s), s->tline); return CODE; } goto code; } -#line 395 "scanner.c" -yy60: yych = *++YYCURSOR; +#line 440 "scanner.c" +yy71: yych = *++YYCURSOR; -#line 162 "scanner.re" +#line 176 "scanner.re" { ++depth; goto code; } -#line 401 "scanner.c" -yy62: yych = *++YYCURSOR; +#line 446 "scanner.c" +yy73: yych = *++YYCURSOR; -#line 164 "scanner.re" +#line 178 "scanner.re" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); s->pos = cursor; s->cline++; goto code; } -#line 409 "scanner.c" -yy64: yych = *++YYCURSOR; -yy65: -#line 168 "scanner.re" +#line 454 "scanner.c" +yy75: yych = *++YYCURSOR; +yy76: +#line 182 "scanner.re" { goto code; } -#line 414 "scanner.c" -yy66: yyaccept = 0; +#line 459 "scanner.c" +yy77: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '\n') goto yy65; - goto yy73; -yy67: yyaccept = 0; + if(yych == '\n') goto yy76; + goto yy84; +yy78: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '\n') goto yy65; - goto yy69; -yy68: ++YYCURSOR; + if(yych == '\n') goto yy76; + goto yy80; +yy79: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy69: if(yych <= '&'){ - if(yych != '\n') goto yy68; +yy80: if(yych <= '&'){ + if(yych != '\n') goto yy79; } else { - if(yych <= '\'') goto yy64; - if(yych == '\\') goto yy71; - goto yy68; + if(yych <= '\'') goto yy75; + if(yych == '\\') goto yy82; + goto yy79; } -yy70: YYCURSOR = YYMARKER; +yy81: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy65; + case 0: goto yy76; } -yy71: ++YYCURSOR; +yy82: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy70; - goto yy68; -yy72: ++YYCURSOR; + if(yych == '\n') goto yy81; + goto yy79; +yy83: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy73: if(yych <= '!'){ - if(yych == '\n') goto yy70; - goto yy72; +yy84: if(yych <= '!'){ + if(yych == '\n') goto yy81; + goto yy83; } else { - if(yych <= '"') goto yy64; - if(yych != '\\') goto yy72; + if(yych <= '"') goto yy75; + if(yych != '\\') goto yy83; } ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy70; - goto yy72; + if(yych == '\n') goto yy81; + goto yy83; } -#line 169 "scanner.re" +#line 183 "scanner.re" comment: -#line 463 "scanner.c" +#line 508 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; - goto yy75; + goto yy86; ++YYCURSOR; -yy75: +yy86: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= ')'){ - if(yych == '\n') goto yy80; - goto yy82; + if(yych == '\n') goto yy91; + goto yy93; } else { - if(yych <= '*') goto yy77; - if(yych == '/') goto yy79; - goto yy82; + if(yych <= '*') goto yy88; + if(yych == '/') goto yy90; + goto yy93; } -yy77: yych = *++YYCURSOR; - if(yych == '/') goto yy85; -yy78: -#line 183 "scanner.re" +yy88: yych = *++YYCURSOR; + if(yych == '/') goto yy96; +yy89: +#line 197 "scanner.re" { goto comment; } -#line 485 "scanner.c" -yy79: yych = *++YYCURSOR; - if(yych == '*') goto yy83; - goto yy78; -yy80: yych = *++YYCURSOR; +#line 530 "scanner.c" +yy90: yych = *++YYCURSOR; + if(yych == '*') goto yy94; + goto yy89; +yy91: yych = *++YYCURSOR; -#line 179 "scanner.re" +#line 193 "scanner.re" { if(cursor == s->eof) RETURN(0); s->tok = s->pos = cursor; s->cline++; goto comment; } -#line 496 "scanner.c" -yy82: yych = *++YYCURSOR; - goto yy78; -yy83: yych = *++YYCURSOR; +#line 541 "scanner.c" +yy93: yych = *++YYCURSOR; + goto yy89; +yy94: yych = *++YYCURSOR; -#line 177 "scanner.re" +#line 191 "scanner.re" { ++depth; goto comment; } -#line 504 "scanner.c" -yy85: yych = *++YYCURSOR; +#line 549 "scanner.c" +yy96: yych = *++YYCURSOR; -#line 173 "scanner.re" +#line 187 "scanner.re" { if(--depth == 0) goto scan; else goto comment; } -#line 512 "scanner.c" +#line 557 "scanner.c" } -#line 184 "scanner.re" +#line 198 "scanner.re" } diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re index de5172ca..ee504f4c 100644 --- a/tools/re2c/scanner.re +++ b/tools/re2c/scanner.re @@ -5,6 +5,10 @@ #include "tools/re2c/globals.h" #include "re2c-parser.h" +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + #define BSIZE 8192 #define YYCTYPE unsigned char @@ -131,6 +135,18 @@ scan: [*+?] { yylval.op = *s->tok; RETURN(CLOSE); } + "{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = atoi((char *)s->tok+1); + RETURN(CLOSESIZE); } + + "{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); + RETURN(CLOSESIZE); } + + "{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = -1; + RETURN(CLOSESIZE); } + letter (letter|digit)* { SubStr substr; s->cur = cursor; substr = Scanner_token(s); -- 2.40.0