From 90b7a5d9da922115ca50172e3b1da8303c862f44 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sun, 9 Oct 2005 06:08:02 +0000 Subject: [PATCH] Update re2c to May 12, 2004 version. This adds an output file option, so also adjust cleanup program to take input/output file name, and update Makefiles to use it in this fashion. svn path=/trunk/yasm/; revision=1271 --- modules/arch/lc3b/Makefile.inc | 5 +- modules/arch/x86/Makefile.inc | 5 +- modules/parsers/gas/Makefile.inc | 5 +- modules/parsers/nasm/Makefile.inc | 5 +- tools/re2c/Makefile.inc | 7 + tools/re2c/actions.c | 27 ++ tools/re2c/cleanup.c | 30 +- tools/re2c/code.c | 153 ++++++--- tools/re2c/dfa.h | 33 +- tools/re2c/examples/repeater.re | 17 +- tools/re2c/globals.h | 1 + tools/re2c/main.c | 107 ++++-- tools/re2c/mbo_getopt.c | 194 +++++++++++ tools/re2c/mbo_getopt.h | 22 ++ tools/re2c/re.h | 1 + tools/re2c/scanner.c | 541 +++++++++++++++++------------- tools/re2c/scanner.re | 11 +- 17 files changed, 836 insertions(+), 328 deletions(-) create mode 100755 tools/re2c/mbo_getopt.c create mode 100755 tools/re2c/mbo_getopt.h diff --git a/modules/arch/lc3b/Makefile.inc b/modules/arch/lc3b/Makefile.inc index f9166081..85da54e3 100644 --- a/modules/arch/lc3b/Makefile.inc +++ b/modules/arch/lc3b/Makefile.inc @@ -8,9 +8,8 @@ libyasm_a_SOURCES += lc3bid.c YASM_MODULES += arch_lc3b lc3bid.c: $(srcdir)/modules/arch/lc3b/lc3bid.re re2c$(EXEEXT) cleanup$(EXEEXT) - $(top_builddir)/re2c$(EXEEXT) -s $(srcdir)/modules/arch/lc3b/lc3bid.re \ - | $(top_builddir)/cleanup$(EXEEXT) \ - | sed "/^#l/ s,re2c-out\.c,$@," > $@ + $(top_builddir)/re2c$(EXEEXT) -s -o $@ $(srcdir)/modules/arch/lc3b/lc3bid.re + $(top_builddir)/cleanup$(EXEEXT) $@ BUILT_SOURCES += lc3bid.c diff --git a/modules/arch/x86/Makefile.inc b/modules/arch/x86/Makefile.inc index 20aa6157..a2b6ad82 100644 --- a/modules/arch/x86/Makefile.inc +++ b/modules/arch/x86/Makefile.inc @@ -9,9 +9,8 @@ libyasm_a_SOURCES += x86id.c YASM_MODULES += arch_x86 x86id.c: $(srcdir)/modules/arch/x86/x86id.re re2c$(EXEEXT) cleanup$(EXEEXT) - $(top_builddir)/re2c$(EXEEXT) -s $(srcdir)/modules/arch/x86/x86id.re \ - | $(top_builddir)/cleanup$(EXEEXT) \ - | sed "/^#l/ s,re2c-out\.c,$@," > $@ + $(top_builddir)/re2c$(EXEEXT) -s -o $@ $(srcdir)/modules/arch/x86/x86id.re + $(top_builddir)/cleanup$(EXEEXT) $@ BUILT_SOURCES += x86id.c diff --git a/modules/parsers/gas/Makefile.inc b/modules/parsers/gas/Makefile.inc index 5b6e2f70..8ca77e4b 100644 --- a/modules/parsers/gas/Makefile.inc +++ b/modules/parsers/gas/Makefile.inc @@ -10,9 +10,8 @@ libyasm_a_SOURCES += gas-token.c YASM_MODULES += parser_gas gas-token.c: $(srcdir)/modules/parsers/gas/gas-token.re re2c$(EXEEXT) cleanup$(EXEEXT) - $(top_builddir)/re2c$(EXEEXT) -b $(srcdir)/modules/parsers/gas/gas-token.re \ - | $(top_builddir)/cleanup$(EXEEXT) \ - | sed "/^#l/ s,re2c-out\.c,$@," > $@ + $(top_builddir)/re2c$(EXEEXT) -b -o $@ $(srcdir)/modules/parsers/gas/gas-token.re + $(top_builddir)/cleanup$(EXEEXT) $@ BUILT_SOURCES += gas-bison.c BUILT_SOURCES += gas-bison.h diff --git a/modules/parsers/nasm/Makefile.inc b/modules/parsers/nasm/Makefile.inc index d7d533a7..e5eba58a 100644 --- a/modules/parsers/nasm/Makefile.inc +++ b/modules/parsers/nasm/Makefile.inc @@ -10,9 +10,8 @@ libyasm_a_SOURCES += nasm-token.c YASM_MODULES += parser_nasm nasm-token.c: $(srcdir)/modules/parsers/nasm/nasm-token.re re2c$(EXEEXT) cleanup$(EXEEXT) - $(top_builddir)/re2c$(EXEEXT) -b $(srcdir)/modules/parsers/nasm/nasm-token.re \ - | $(top_builddir)/cleanup$(EXEEXT) \ - | sed "/^#l/ s,re2c-out\.c,$@," > $@ + $(top_builddir)/re2c$(EXEEXT) -b -o $@ $(srcdir)/modules/parsers/nasm/nasm-token.re + $(top_builddir)/cleanup$(EXEEXT) $@ BUILT_SOURCES += nasm-bison.c BUILT_SOURCES += nasm-bison.h diff --git a/tools/re2c/Makefile.inc b/tools/re2c/Makefile.inc index d8b069dc..178643e9 100644 --- a/tools/re2c/Makefile.inc +++ b/tools/re2c/Makefile.inc @@ -20,6 +20,8 @@ EXTRA_DIST += tools/re2c/re2c-parser.y EXTRA_DIST += tools/re2c/actions.c EXTRA_DIST += tools/re2c/scanner.h EXTRA_DIST += tools/re2c/scanner.c +EXTRA_DIST += tools/re2c/mbo_getopt.h +EXTRA_DIST += tools/re2c/mbo_getopt.c EXTRA_DIST += tools/re2c/substr.h EXTRA_DIST += tools/re2c/substr.c EXTRA_DIST += tools/re2c/translate.c @@ -29,6 +31,7 @@ re2c_LDADD += re2c-dfa.$(OBJEXT) re2c_LDADD += re2c-parser.$(OBJEXT) re2c_LDADD += re2c-actions.$(OBJEXT) re2c_LDADD += re2c-scanner.$(OBJEXT) +re2c_LDADD += re2c-mbo_getopt.$(OBJEXT) re2c_LDADD += re2c-substr.$(OBJEXT) re2c_LDADD += re2c-translate.$(OBJEXT) re2c_LINK = $(CCLD_FOR_BUILD) -o $@ @@ -51,6 +54,9 @@ re2c-actions.$(OBJEXT): tools/re2c/actions.c re2c-scanner.$(OBJEXT): tools/re2c/scanner.c $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/scanner.c || echo '$(srcdir)/'`tools/re2c/scanner.c +re2c-mbo_getopt.$(OBJEXT): tools/re2c/mbo_getopt.c + $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/mbo_getopt.c || echo '$(srcdir)/'`tools/re2c/mbo_getopt.c + re2c-substr.$(OBJEXT): tools/re2c/substr.c $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/substr.c || echo '$(srcdir)/'`tools/re2c/substr.c @@ -115,6 +121,7 @@ EXTRA_DIST += tools/re2c/examples/cmmap.re EXTRA_DIST += tools/re2c/examples/cnokw.re EXTRA_DIST += tools/re2c/examples/cunroll.re EXTRA_DIST += tools/re2c/examples/modula.re +EXTRA_DIST += tools/re2c/examples/repeater.re EXTRA_DIST += tools/re2c/examples/sample.re EXTRA_DIST += tools/re2c/examples/simple.re EXTRA_DIST += tools/re2c/examples/rexx/README diff --git a/tools/re2c/actions.c b/tools/re2c/actions.c index f78726a8..64e4b52f 100644 --- a/tools/re2c/actions.c +++ b/tools/re2c/actions.c @@ -520,6 +520,33 @@ RegExp *strToRE(SubStr s){ return re; } +RegExp *strToCaseInsensitiveRE(SubStr s){ + unsigned char c; + RegExp *re, *reL, *reU; + s.len -= 2; s.str += 1; + if(s.len == 0) + return RegExp_new_NullOp(); + c = unescape(&s); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); + re = mkAlt(reL, reU); + } else { + re = matchChar(c); + } + while(s.len > 0) { + c = unescape(&s); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); + re = RegExp_new_CatOp(re, mkAlt(reL, reU)); + } else { + re = RegExp_new_CatOp(re, matchChar(c)); + } + } + return re; +} + RegExp *ranToRE(SubStr s){ Range *r; s.len -= 2; s.str += 1; diff --git a/tools/re2c/cleanup.c b/tools/re2c/cleanup.c index 63dc2468..bf5e1b05 100644 --- a/tools/re2c/cleanup.c +++ b/tools/re2c/cleanup.c @@ -32,7 +32,7 @@ #define MAXLINE 1024 int -main() +main(int argc, char *argv[]) { char str[MAXLINE]; size_t alloclines = 1000; @@ -48,6 +48,18 @@ main() unsigned long value; char *pos; size_t span1, span2; + FILE *f; + + if (argc < 2) { + fputs("Usage: cleanup \n", stderr); + return EXIT_FAILURE; + } + + f = fopen(argv[1], "rt"); + if (!f) { + fprintf(stderr, "Could not open %s for reading.\n", argv[1]); + return EXIT_FAILURE; + } inlines = malloc(alloclines * sizeof(char *)); if (!inlines) { @@ -55,7 +67,7 @@ main() return EXIT_FAILURE; } - while (fgets(str, MAXLINE, stdin)) { + while (fgets(str, MAXLINE, f)) { /* check array bounds */ if (numlines >= alloclines) { alloclines *= 2; @@ -136,6 +148,14 @@ main() usedvar[lastusedvarline] = 255; /* used */ } + fclose(f); + + f = fopen(argv[1], "wt"); + if (!f) { + fprintf(stderr, "Could not open %s for writing.\n", argv[1]); + return EXIT_FAILURE; + } + for (line = 1; line <= numlines; line++) { pos = inlines[line-1]; /* look for yy[0-9]+ labels */ @@ -148,9 +168,9 @@ main() pos = &pos[2+span1+1]; } if (line < allocvar && usedvar[line] != 0 && usedvar[line] != 255) - putc('\n', stdout); + putc('\n', f); else - fputs(pos, stdout); + fputs(pos, f); } free(usedvar); @@ -159,5 +179,7 @@ main() free(inlines[line]); free(inlines); + fclose(f); + return EXIT_SUCCESS; } diff --git a/tools/re2c/code.c b/tools/re2c/code.c index 36d02854..f0664351 100644 --- a/tools/re2c/code.c +++ b/tools/re2c/code.c @@ -163,12 +163,32 @@ void BitMap_stats(void){ } #endif -static void genGoTo(FILE *o, State *to){ +static void genGoTo(FILE *o, State *from, State *to, int *readCh) +{ +#if 0 + if (*readCh && from->label + 1 != to->label) + { + fputs("\tyych = *YYCURSOR;\n", o); oline++; + *readCh = 0; + } +#endif fprintf(o, "\tgoto yy%u;\n", to->label); oline++; } -static void genIf(FILE *o, const char *cmp, unsigned int v){ - fprintf(o, "\tif(yych %s '", cmp); +static void genIf(FILE *o, const char *cmp, unsigned int v, int *readCh) +{ +#if 0 + if (*readCh) + { + fputs("\tif((yych = *YYCURSOR) ", o); + *readCh = 0; + } else { +#endif + fputs("\tif(yych ", o); +#if 0 + } +#endif + fprintf(o, "%s '", cmp); prtCh(o, v); fputs("')", o); } @@ -178,7 +198,8 @@ static void indent(FILE *o, unsigned int i){ fputc('\t', o); } -static void need(FILE *o, unsigned int n){ +static void need(FILE *o, unsigned int n, int *readCh) +{ if(n == 1) { fputs("\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n", o); oline++; } else { @@ -186,10 +207,11 @@ static void need(FILE *o, unsigned int n){ oline++; } fputs("\tyych = *YYCURSOR;\n", o); oline++; + *readCh = 0; } void -Action_emit(Action *a, FILE *o) +Action_emit(Action *a, FILE *o, int *readCh) { int first = 1; unsigned int i; @@ -198,29 +220,40 @@ Action_emit(Action *a, FILE *o) switch (a->type) { case MATCHACT: if(a->state->link){ - fputs("\t++YYCURSOR;\n", o); oline++; - need(o, a->state->depth); + fputs("\t++YYCURSOR;\n", o); + need(o, a->state->depth, readCh); +#if 0 + } else if (!Action_readAhead(a)) { + /* do not read next char if match */ + fputs("\t++YYCURSOR;\n", o); + *readCh = 1; +#endif } else { - fputs("\tyych = *++YYCURSOR;\n", o); oline++; + fputs("\tyych = *++YYCURSOR;\n", o); + *readCh = 0; } + oline++; break; case ENTERACT: if(a->state->link){ fputs("\t++YYCURSOR;\n", o); fprintf(o, "yy%u:\n", a->d.label); oline+=2; - need(o, a->state->depth); + need(o, a->state->depth, readCh); } else { + /* we shouldn't need 'rule-following' protection here */ fputs("\tyych = *++YYCURSOR;\n", o); fprintf(o, "yy%u:\n", a->d.label); oline+=2; + *readCh = 0; } break; case SAVEMATCHACT: fprintf(o, "\tyyaccept = %u;\n", a->d.selector); oline++; if(a->state->link){ fputs("\tYYMARKER = ++YYCURSOR;\n", o); oline++; - need(o, a->state->depth); + need(o, a->state->depth, readCh); } else { fputs("\tyych = *(YYMARKER = ++YYCURSOR);\n", o); oline++; + *readCh = 0; } break; case MOVEACT: @@ -234,7 +267,7 @@ Action_emit(Action *a, FILE *o) fputs("\tswitch(yyaccept){\n", o); oline+=2; } fprintf(o, "\tcase %u:", a->d.Accept.saves[i]); - genGoTo(o, a->d.Accept.rules[i]); + genGoTo(o, a->state, a->d.Accept.rules[i], readCh); } if(!first) { fputs("\t}\n", o); oline++; @@ -248,7 +281,7 @@ Action_emit(Action *a, FILE *o) line_source(o, a->d.rule->d.RuleOp.code->line); SubStr_out(&a->d.rule->d.RuleOp.code->text, o); fprintf(o, "\n"); oline++; - fprintf(o, "#line %u \"re2c-out.c\"\n", ++oline); + fprintf(o, "#line %u \"%s\"\n", ++oline, outputFileName); break; } } @@ -267,37 +300,49 @@ Action_new_Accept(State *x, unsigned int n, unsigned int *s, State **r) } static void doLinear(FILE *o, unsigned int i, Span *s, unsigned int n, - State *next){ + State *from, State *next, int *readCh){ for(;;){ State *bg = s[0].to; while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){ if(s[1].to == next && n == 3){ - indent(o, i); genIf(o, "!=", s[0].ub); genGoTo(o, bg); - indent(o, i); genGoTo(o, next); + indent(o, i); + genIf(o, "!=", s[0].ub, readCh); + genGoTo(o, from, bg, readCh); + indent(o, i); + genGoTo(o, from, next, readCh); return; } else { - indent(o, i); genIf(o, "==", s[0].ub); genGoTo(o, s[1].to); + indent(o, i); + genIf(o, "==", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); } n -= 2; s += 2; } if(n == 1){ - indent(o, i); genGoTo(o, s[0].to); + indent(o, i); + genGoTo(o, from, s[0].to, readCh); return; } else if(n == 2 && bg == next){ - indent(o, i); genIf(o, ">=", s[0].ub); genGoTo(o, s[1].to); - indent(o, i); genGoTo(o, next); + indent(o, i); + genIf(o, ">=", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); + indent(o, i); + genGoTo(o, from, next, readCh); return; } else { - indent(o, i); genIf(o, "<=", s[0].ub - 1); genGoTo(o, bg); + indent(o, i); + genIf(o, "<=", s[0].ub - 1, readCh); + genGoTo(o, from, bg, readCh); n -= 1; s += 1; } } - indent(o, i); genGoTo(o, next); + indent(o, i); + genGoTo(o, from, next, readCh); } void -Go_genLinear(Go *g, FILE *o, State *next){ - doLinear(o, 0, g->span, g->nSpans, next); +Go_genLinear(Go *g, FILE *o, State *from, State *next, int *readCh){ + doLinear(o, 0, g->span, g->nSpans, from, next, readCh); } static void genCases(FILE *o, unsigned int lb, Span *s){ @@ -312,9 +357,9 @@ static void genCases(FILE *o, unsigned int lb, Span *s){ } void -Go_genSwitch(Go *g, FILE *o, State *next){ +Go_genSwitch(Go *g, FILE *o, State *from, State *next, int *readCh){ if(g->nSpans <= 2){ - Go_genLinear(g, o, next); + Go_genLinear(g, o, from, next, readCh); } else { State *def = g->span[g->nSpans-1].to; Span **sP = malloc(sizeof(Span*)*(g->nSpans-1)), **r, **s, **t; @@ -325,7 +370,14 @@ Go_genSwitch(Go *g, FILE *o, State *next){ if(g->span[i].to != def) *(t++) = &g->span[i]; - fputs("\tswitch(yych){\n", o); oline++; +#if 0 + if (*readCh) { + fputs("\tswitch((yych = *YYCURSOR)) {\n", o); + *readCh = 0; + } else +#endif + fputs("\tswitch(yych){\n", o); + oline++; while(t != &sP[0]){ State *to; r = s = &sP[0]; @@ -340,11 +392,11 @@ Go_genSwitch(Go *g, FILE *o, State *next){ else *(r++) = *s; } - genGoTo(o, to); + genGoTo(o, from, to, readCh); t = r; } fputs("\tdefault:", o); - genGoTo(o, def); + genGoTo(o, from, def, readCh); fputs("\t}\n", o); oline++; free(sP); @@ -352,30 +404,32 @@ Go_genSwitch(Go *g, FILE *o, State *next){ } static void doBinary(FILE *o, unsigned int i, Span *s, unsigned int n, - State *next){ + State *from, State *next, int *readCh){ if(n <= 4){ - doLinear(o, i, s, n, next); + doLinear(o, i, s, n, from, next, readCh); } else { unsigned int h = n/2; - indent(o, i); genIf(o, "<=", s[h-1].ub - 1); fputs("{\n", o); oline++; - doBinary(o, i+1, &s[0], h, next); + indent(o, i); + genIf(o, "<=", s[h-1].ub - 1, readCh); + fputs("{\n", o); oline++; + doBinary(o, i+1, &s[0], h, from, next, readCh); indent(o, i); fputs("\t} else {\n", o); oline++; - doBinary(o, i+1, &s[h], n - h, next); + doBinary(o, i+1, &s[h], n - h, from, next, readCh); indent(o, i); fputs("\t}\n", o); oline++; } } void -Go_genBinary(Go *g, FILE *o, State *next){ - doBinary(o, 0, g->span, g->nSpans, next); +Go_genBinary(Go *g, FILE *o, State *from, State *next, int *readCh){ + doBinary(o, 0, g->span, g->nSpans, from, next, readCh); } void -Go_genBase(Go *g, FILE *o, State *next){ +Go_genBase(Go *g, FILE *o, State *from, State *next, int *readCh){ if(g->nSpans == 0) return; if(!sFlag){ - Go_genSwitch(g, o, next); + Go_genSwitch(g, o, from, next, readCh); return; } if(g->nSpans > 8){ @@ -391,19 +445,19 @@ Go_genBase(Go *g, FILE *o, State *next){ } } if(util <= 2){ - Go_genSwitch(g, o, next); + Go_genSwitch(g, o, from, next, readCh); return; } } if(g->nSpans > 5){ - Go_genBinary(g, o, next); + Go_genBinary(g, o, from, next, readCh); } else { - Go_genLinear(g, o, next); + Go_genLinear(g, o, from, next, readCh); } } void -Go_genGoto(Go *g, FILE *o, State *next){ +Go_genGoto(Go *g, FILE *o, State *from, State *next, int *readCh){ unsigned int i; if(bFlag){ for(i = 0; i < g->nSpans; ++i){ @@ -416,20 +470,20 @@ Go_genGoto(Go *g, FILE *o, State *next){ Go_unmap(&go, g, to); fprintf(o, "\tif(yybm[%u+yych] & %u)", b->i, (unsigned int) b->m); - genGoTo(o, to); - Go_genBase(&go, o, next); + genGoTo(o, from, to, readCh); + Go_genBase(&go, o, from, next, readCh); free(go.span); return; } } } } - Go_genBase(g, o, next); + Go_genBase(g, o, from, next, readCh); } -void State_emit(State *s, FILE *o){ +void State_emit(State *s, FILE *o, int *readCh){ fprintf(o, "yy%u:", s->label); - Action_emit(s->action, o); + Action_emit(s->action, o, readCh); } static unsigned int merge(Span *x0, State *fg, State *bg){ @@ -705,7 +759,7 @@ void DFA_emit(DFA *d, FILE *o){ free(d->head->action); oline++; - fprintf(o, "\n#line %u \"re2c-out.c\"\n", ++oline); + fprintf(o, "\n#line %u \"%s\"\n", ++oline, outputFileName); fputs("{\n\tYYCTYPE yych;\n\tunsigned int yyaccept;\n", o); oline+=3; @@ -719,8 +773,9 @@ void DFA_emit(DFA *d, FILE *o){ s->label = label++; for(s = d->head; s; s = s->next){ - State_emit(s, o); - Go_genGoto(&s->go, o, s->next); + int readCh = 0; + State_emit(s, o, &readCh); + Go_genGoto(&s->go, o, s, s->next, &readCh); } fputs("}\n", o); oline++; diff --git a/tools/re2c/dfa.h b/tools/re2c/dfa.h index aaf3fe3d..da4d6736 100644 --- a/tools/re2c/dfa.h +++ b/tools/re2c/dfa.h @@ -38,7 +38,7 @@ typedef struct Action { } d; } Action; -void Action_emit(Action*, FILE *); +void Action_emit(Action*, FILE *, int *); typedef struct Span { unsigned int ub; @@ -65,17 +65,17 @@ typedef struct State { Action *action; } State; -void Go_genGoto(Go*, FILE *, State*); -void Go_genBase(Go*, FILE *, State*); -void Go_genLinear(Go*, FILE *, State*); -void Go_genBinary(Go*, FILE *, State*); -void Go_genSwitch(Go*, FILE *, State*); +void Go_genGoto(Go*, FILE *, State*, State*, int*); +void Go_genBase(Go*, FILE *, State*, State*, int*); +void Go_genLinear(Go*, FILE *, State*, State*, int*); +void Go_genBinary(Go*, FILE *, State*, State*, int*); +void Go_genSwitch(Go*, FILE *, State*, State*, int*); void Go_compact(Go*); void Go_unmap(Go*, Go*, State*); State *State_new(void); void State_delete(State*); -void State_emit(State*, FILE *); +void State_emit(State*, FILE *, int *); void State_out(FILE *, const State*); typedef struct DFA { @@ -151,4 +151,23 @@ Action_new_Rule(State *s, RegExp *r) /* RuleOp */ return a; } +static int +Action_isRule(Action *a) +{ + return a->type == RULEACT; +} + +static int +Action_isMatch(Action *a) +{ + return a->type == MATCHACT; +} + +static int +Action_readAhead(Action *a) +{ + return !Action_isMatch(a) || + (a->state && a->state->next && !Action_isRule(a->state->next->action)); +} + #endif diff --git a/tools/re2c/examples/repeater.re b/tools/re2c/examples/repeater.re index c8e20557..f84b5c7c 100644 --- a/tools/re2c/examples/repeater.re +++ b/tools/re2c/examples/repeater.re @@ -13,9 +13,10 @@ char *q; #define YYMARKER q #define YYFILL(n) /*!re2c - "a"{1}"\n" {RET(1);} - "a"{2,3}"\n" {RET(2);} - "a"{4,}"\n" {RET(3);} + 'a'{1}"\n" {RET(1);} + 'a'{2,3}"\n" {RET(2);} + 'a'{4,}"\n" {RET(3);} + 'a'{6}"\n" {RET(4);} [^aq]|"\n" {RET(0);} */ } @@ -28,8 +29,14 @@ main() do_scan("aa\n"); do_scan("aaa\n"); do_scan("aaaa\n"); - do_scan("q"); - do_scan("a"); + do_scan("A\n"); + do_scan("AA\n"); + do_scan("aAa\n"); + do_scan("AaaA\n"); + do_scan("Q"); + do_scan("AaaAa\n"); + do_scan("AaaAaA\n"); + do_scan("A"); do_scan("\n"); do_scan("0"); } diff --git a/tools/re2c/globals.h b/tools/re2c/globals.h index 9900da58..82f99375 100644 --- a/tools/re2c/globals.h +++ b/tools/re2c/globals.h @@ -4,6 +4,7 @@ #include "tools/re2c/basics.h" extern const char *fileName; +extern const char *outputFileName; extern int sFlag; extern int bFlag; extern unsigned int oline; diff --git a/tools/re2c/main.c b/tools/re2c/main.c index 0def4702..c5fa362a 100644 --- a/tools/re2c/main.c +++ b/tools/re2c/main.c @@ -4,22 +4,67 @@ #include "globals.h" #include "parse.h" #include "dfa.h" +#include "mbo_getopt.h" -const char *fileName; +const char *fileName = 0; +const char *outputFileName = 0; int sFlag = 0; int bFlag = 0; unsigned int oline = 1; -int main(int argc, char *argv[]){ - FILE *f; +static char *opt_arg = NULL; +static int opt_ind = 1; + +static const mbo_opt_struct OPTIONS[] = { + {'?', 0, "help"}, + {'b', 0, "bit-vectors"}, + {'e', 0, "ecb"}, + {'h', 0, "help"}, + {'s', 0, "nested-ifs"}, + {'o', 1, "output"}, + {'v', 0, "version"} +}; + +static void usage() +{ + fprintf(stderr, "usage: re2c [-esbvh] file\n" + "\n" + "-? -h --help Display this info.\n" + "\n" + "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" + " coax better code out of the compiler. Most useful for\n" + " specifications with more than a few keywords (e.g. for\n" + " most programming languages).\n" + "\n" + "-e --ecb Cross-compile from an ASCII platform to\n" + " an EBCDIC one.\n" + "\n" + "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" + " need this assist to generate better code.\n" + "\n" + "-o --output=output Specify the output file instead of stdout\n" + "\n" + "-v --version Show version information.\n"); +} + +int main(int argc, char *argv[]) +{ + int c; + FILE *f, *output; fileName = NULL; - if(argc == 1) - goto usage; - while(--argc > 1){ - char *p = *++argv; - while(*++p != '\0'){ - switch(*p){ + + if(argc == 1) { + usage(); + return 2; + } + + while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0))!=-1) { + switch (c) { + case 'b': + sFlag = 1; + bFlag = 1; + break; case 'e': xlat = asc2ebc; talx = ebc2asc; @@ -27,16 +72,28 @@ int main(int argc, char *argv[]){ case 's': sFlag = 1; break; - case 'b': - sFlag = 1; - bFlag = 1; + case 'o': + outputFileName = opt_arg; + break; + case 'v': + fputs("re2c\n", stderr); break; + case 'h': + case '?': default: - goto usage; - } - } + usage(); + return 2; + } } - fileName = *++argv; + + if (argc == opt_ind + 1) { + fileName = argv[opt_ind]; + } else { + usage(); + return 2; + } + + /* set up the input stream */ if(fileName[0] == '-' && fileName[1] == '\0'){ fileName = ""; f = stdin; @@ -46,9 +103,19 @@ int main(int argc, char *argv[]){ return 1; } } - parse(f, stdout); + + // set up the output stream + if (outputFileName == 0 || (fileName[0] == '-' && fileName[1] == '\0')) { + outputFileName = ""; + output = stdout; + } else { + output = fopen(outputFileName, "wt"); + if (!output) { + fprintf(stderr, "can't open %s\n", outputFileName); + return 1; + } + } + + parse(f, output); return 0; -usage: - fputs("usage: re2c [-esb] name\n", stderr); - return 2; } diff --git a/tools/re2c/mbo_getopt.c b/tools/re2c/mbo_getopt.c new file mode 100755 index 00000000..f4553dc6 --- /dev/null +++ b/tools/re2c/mbo_getopt.c @@ -0,0 +1,194 @@ +/* + Author: Marcus Boerger +*/ + +#include +#include +#include +#include +#include "mbo_getopt.h" +#define OPTERRCOLON (1) +#define OPTERRNF (2) +#define OPTERRARG (3) + +static int mbo_opt_error(int argc, char * const *argv, int oint, int optchr, int err, int show_err) +{ + if (show_err) + { + fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1); + + switch (err) + { + + case OPTERRCOLON: + fprintf(stderr, ": in flags\n"); + break; + + case OPTERRNF: + fprintf(stderr, "option not found %c\n", argv[oint][optchr]); + break; + + case OPTERRARG: + fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]); + break; + + default: + fprintf(stderr, "unknown\n"); + break; + } + } + + return ('?'); +} + +int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err) +{ + static int optchr = 0; + static int dash = 0; /* have already seen the - */ + int arg_start = 2; + + int opts_idx = -1; + + if (*optind >= argc) + { + return (EOF); + } + + if (!dash) + { + if ((argv[*optind][0] != '-')) + { + return (EOF); + } + else + { + if (!argv[*optind][1]) + { + /* + * use to specify stdin. Need to let pgm process this and + * the following args + */ + return (EOF); + } + } + } + + if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-')) + { + /* '--' indicates end of args if not followed by a known long option name */ + + while (1) + { + opts_idx++; + + if (opts[opts_idx].opt_char == '-') + { + (*optind)++; + return (EOF); + } + else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name)) + { + break; + } + } + + optchr = 0; + dash = 1; + arg_start = 2 + strlen(opts[opts_idx].opt_name); + } + + if (!dash) + { + dash = 1; + optchr = 1; + } + + /* Check if the guy tries to do a -: kind of flag */ + if (argv[*optind][optchr] == ':') + { + dash = 0; + (*optind)++; + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err)); + } + + if (opts_idx < 0) + { + while (1) + { + opts_idx++; + + if (opts[opts_idx].opt_char == '-') + { + int errind = *optind; + int errchr = optchr; + + if (!argv[*optind][optchr + 1]) + { + dash = 0; + (*optind)++; + } + else + { + optchr++; + } + + return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err)); + } + else if (argv[*optind][optchr] == opts[opts_idx].opt_char) + { + break; + } + } + } + + if (opts[opts_idx].need_param) + { + /* Check for cases where the value of the argument + is in the form - or in the form - */ + dash = 0; + + if (!argv[*optind][arg_start]) + { + (*optind)++; + + if (*optind == argc) + { + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err)); + } + + *optarg = argv[(*optind)++]; + } + else + { + *optarg = &argv[*optind][arg_start]; + (*optind)++; + } + + return opts[opts_idx].opt_char; + } + else + { + if (arg_start == 2) + { + if (!argv[*optind][optchr + 1]) + { + dash = 0; + (*optind)++; + } + else + { + optchr++; + } + } + else + { + (*optind)++; + } + + return opts[opts_idx].opt_char; + } + + assert(0); + return (0); /* never reached */ +} + diff --git a/tools/re2c/mbo_getopt.h b/tools/re2c/mbo_getopt.h new file mode 100755 index 00000000..8f962fd8 --- /dev/null +++ b/tools/re2c/mbo_getopt.h @@ -0,0 +1,22 @@ +/* + Author: Marcus Boerger +*/ + +/* Define structure for one recognized option (both single char and long name). + * If short_open is '-' this is the last option. + */ + +#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ +#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ + +typedef struct mbo_opt_struct +{ + const char opt_char; + const int need_param; + const char * opt_name; +} mbo_opt_struct; + +int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err); + +#endif + diff --git a/tools/re2c/re.h b/tools/re2c/re.h index a07d567b..cfbdf8ad 100644 --- a/tools/re2c/re.h +++ b/tools/re2c/re.h @@ -181,6 +181,7 @@ RegExp_new_CloseVOp(RegExp *e, int lb, int ub) extern void genCode(FILE *, RegExp*); extern RegExp *mkDiff(RegExp*, RegExp*); extern RegExp *strToRE(SubStr); +extern RegExp *strToCaseInsensitiveRE(SubStr); extern RegExp *ranToRE(SubStr); extern RegExp *mkAlt(RegExp*, RegExp*); diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c index e94cc40a..8bdb61f0 100644 --- a/tools/re2c/scanner.c +++ b/tools/re2c/scanner.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.9.1-C on Sat Oct 8 17:52:50 2005 +/* Generated by re2c 0.9.1-C on Sat Oct 8 22:50:57 2005 */ #line 1 "scanner.re" #include @@ -94,44 +94,53 @@ yy0: yych = *YYCURSOR; if(yych == '\n') goto yy4; if(yych != '/') goto yy6; - yyaccept = 0; + goto yy2; +yy2: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if(yych == '*') goto yy7; + goto yy3; yy3: #line 98 "scanner.re" { goto echo; } -#line 104 "scanner.c" +#line 106 "scanner.c" yy4: yych = *++YYCURSOR; - + goto yy5; +yy5: #line 94 "scanner.re" { if(cursor == s->eof) RETURN(0); fwrite(s->tok, 1, cursor - s->tok, out); s->tok = s->pos = cursor; s->cline++; oline++; goto echo; } -#line 112 "scanner.c" +#line 115 "scanner.c" yy6: yych = *++YYCURSOR; goto yy3; yy7: yych = *++YYCURSOR; if(yych == '!') goto yy9; + goto yy8; yy8: YYCURSOR = YYMARKER; switch(yyaccept){ case 0: goto yy3; } yy9: yych = *++YYCURSOR; if(yych != 'r') goto yy8; - yych = *++YYCURSOR; + goto yy10; +yy10: yych = *++YYCURSOR; if(yych != 'e') goto yy8; - yych = *++YYCURSOR; + goto yy11; +yy11: yych = *++YYCURSOR; if(yych != '2') goto yy8; - yych = *++YYCURSOR; + goto yy12; +yy12: yych = *++YYCURSOR; if(yych != 'c') goto yy8; - yych = *++YYCURSOR; - + goto yy13; +yy13: yych = *++YYCURSOR; + goto yy14; +yy14: #line 91 "scanner.re" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out); s->tok = cursor; RETURN(1); } -#line 135 "scanner.c" +#line 144 "scanner.c" } #line 99 "scanner.re" @@ -149,7 +158,7 @@ scan: s->tline = s->cline; s->tok = cursor; -#line 153 "scanner.c" +#line 162 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; @@ -158,404 +167,478 @@ scan: yy15: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= ':'){ + if(yych <= '/'){ if(yych <= '"'){ if(yych <= '\n'){ - if(yych <= '\b') goto yy35; - if(yych <= '\t') goto yy31; - goto yy33; + if(yych <= '\b') goto yy37; + if(yych <= '\t') goto yy33; + goto yy35; } else { - if(yych == ' ') goto yy31; - if(yych <= '!') goto yy35; + if(yych == ' ') goto yy33; + if(yych <= '!') goto yy37; goto yy23; } } else { - if(yych <= '*'){ - if(yych <= '\'') goto yy35; - if(yych <= ')') goto yy27; - goto yy21; + if(yych <= ')'){ + if(yych <= '&') goto yy37; + if(yych <= '\'') goto yy25; + goto yy29; } else { - if(yych <= '+') goto yy28; - if(yych == '/') goto yy19; - goto yy35; + if(yych <= '*') goto yy21; + if(yych <= '+') goto yy30; + if(yych <= '.') goto yy37; + goto yy19; } } } else { - if(yych <= 'Z'){ - if(yych <= '='){ - if(yych == '<') goto yy35; - goto yy27; + if(yych <= '@'){ + if(yych <= '<'){ + if(yych == ';') goto yy29; + goto yy37; } else { - if(yych == '?') goto yy28; - if(yych <= '@') goto yy35; - goto yy29; + if(yych <= '=') goto yy29; + if(yych == '?') goto yy30; + goto yy37; } } else { if(yych <= '`'){ - if(yych <= '[') goto yy25; - if(yych <= '\\') goto yy27; - goto yy35; + if(yych <= 'Z') goto yy31; + if(yych <= '[') goto yy27; + if(yych <= '\\') goto yy29; + goto yy37; } else { - if(yych <= 'z') goto yy29; + if(yych <= 'z') goto yy31; if(yych <= '{') goto yy17; - if(yych <= '|') goto yy27; - goto yy35; + if(yych <= '|') goto yy29; + goto yy37; } } } yy17: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if(yych <= '/') goto yy18; - if(yych <= '9') goto yy56; + if(yych <= '9') goto yy63; + goto yy18; yy18: #line 114 "scanner.re" { depth = 1; goto code; } -#line 216 "scanner.c" +#line 228 "scanner.c" yy19: yych = *++YYCURSOR; - if(yych == '*') goto yy54; + if(yych == '*') goto yy61; + goto yy20; yy20: -#line 133 "scanner.re" +#line 140 "scanner.re" { RETURN(*s->tok); } -#line 222 "scanner.c" +#line 235 "scanner.c" yy21: yych = *++YYCURSOR; - if(yych == '/') goto yy52; + if(yych == '/') goto yy59; + goto yy22; yy22: -#line 135 "scanner.re" +#line 142 "scanner.re" { yylval.op = *s->tok; RETURN(CLOSE); } -#line 229 "scanner.c" +#line 243 "scanner.c" yy23: yyaccept = 1; yych = *(YYMARKER = ++YYCURSOR); - if(yych != '\n') goto yy48; + if(yych != '\n') goto yy55; + goto yy24; yy24: -#line 126 "scanner.re" -{ Scanner_fatal(s, "bad string"); } -#line 236 "scanner.c" +#line 131 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing \")"); } +#line 251 "scanner.c" yy25: yyaccept = 2; yych = *(YYMARKER = ++YYCURSOR); - if(yych != '\n') goto yy42; + if(yych != '\n') goto yy50; + goto yy26; yy26: -#line 131 "scanner.re" -{ Scanner_fatal(s, "bad character constant"); } -#line 243 "scanner.c" -yy27: yych = *++YYCURSOR; +#line 132 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing ')"); } +#line 259 "scanner.c" +yy27: yyaccept = 3; + yych = *(YYMARKER = ++YYCURSOR); + if(yych != '\n') goto yy44; + goto yy28; +yy28: +#line 138 "scanner.re" +{ Scanner_fatal(s, "unterminated range (missing ])"); } +#line 267 "scanner.c" +yy29: yych = *++YYCURSOR; goto yy20; -yy28: yych = *++YYCURSOR; +yy30: yych = *++YYCURSOR; goto yy22; -yy29: yych = *++YYCURSOR; - goto yy40; -yy30: -#line 150 "scanner.re" +yy31: yych = *++YYCURSOR; + goto yy42; +yy32: +#line 157 "scanner.re" { SubStr substr; s->cur = cursor; substr = Scanner_token(s); yylval.symbol = Symbol_find(&substr); return ID; } -#line 257 "scanner.c" -yy31: yych = *++YYCURSOR; - goto yy38; -yy32: -#line 156 "scanner.re" -{ goto scan; } -#line 263 "scanner.c" +#line 281 "scanner.c" yy33: yych = *++YYCURSOR; - -#line 158 "scanner.re" + goto yy40; +yy34: +#line 163 "scanner.re" +{ goto scan; } +#line 287 "scanner.c" +yy35: yych = *++YYCURSOR; + goto yy36; +yy36: +#line 165 "scanner.re" { if(cursor == s->eof) RETURN(0); s->pos = cursor; s->cline++; goto scan; } -#line 271 "scanner.c" -yy35: yych = *++YYCURSOR; - -#line 163 "scanner.re" +#line 296 "scanner.c" +yy37: yych = *++YYCURSOR; + goto yy38; +yy38: +#line 170 "scanner.re" { fprintf(stderr, "unexpected character: '%c'\n", *s->tok); goto scan; } -#line 278 "scanner.c" -yy37: ++YYCURSOR; +#line 304 "scanner.c" +yy39: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy38: if(yych == '\t') goto yy37; - if(yych == ' ') goto yy37; - goto yy32; -yy39: ++YYCURSOR; + goto yy40; +yy40: if(yych == '\t') goto yy39; + if(yych == ' ') goto yy39; + goto yy34; +yy41: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy40: if(yych <= '@'){ - if(yych <= '/') goto yy30; - if(yych <= '9') goto yy39; - goto yy30; + goto yy42; +yy42: if(yych <= '@'){ + if(yych <= '/') goto yy32; + if(yych <= '9') goto yy41; + goto yy32; } else { - if(yych <= 'Z') goto yy39; - if(yych <= '`') goto yy30; - if(yych <= 'z') goto yy39; - goto yy30; + if(yych <= 'Z') goto yy41; + if(yych <= '`') goto yy32; + if(yych <= 'z') goto yy41; + goto yy32; } -yy41: ++YYCURSOR; +yy43: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy42: if(yych <= '['){ - if(yych != '\n') goto yy41; + goto yy44; +yy44: if(yych <= '['){ + if(yych != '\n') goto yy43; + goto yy45; } else { - if(yych <= '\\') goto yy44; - if(yych <= ']') goto yy45; - goto yy41; + if(yych <= '\\') goto yy46; + if(yych <= ']') goto yy47; + goto yy43; } -yy43: YYCURSOR = YYMARKER; +yy45: YYCURSOR = YYMARKER; switch(yyaccept){ case 0: goto yy18; case 1: goto yy24; case 2: goto yy26; + case 3: goto yy28; } -yy44: ++YYCURSOR; +yy46: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy43; - goto yy41; -yy45: yych = *++YYCURSOR; - -#line 128 "scanner.re" + if(yych == '\n') goto yy45; + goto yy43; +yy47: yych = *++YYCURSOR; + goto yy48; +yy48: +#line 134 "scanner.re" { s->cur = cursor; yylval.regexp = ranToRE(Scanner_token(s)); return RANGE; } -#line 325 "scanner.c" -yy47: ++YYCURSOR; +#line 357 "scanner.c" +yy49: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy48: if(yych <= '!'){ - if(yych == '\n') goto yy43; - goto yy47; + goto yy50; +yy50: if(yych <= '&'){ + if(yych == '\n') goto yy45; + goto yy49; } else { - if(yych <= '"') goto yy50; - if(yych != '\\') goto yy47; + if(yych <= '\'') goto yy52; + if(yych != '\\') goto yy49; + goto yy51; } - ++YYCURSOR; +yy51: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy43; - goto yy47; -yy50: yych = *++YYCURSOR; - + if(yych == '\n') goto yy45; + goto yy49; +yy52: yych = *++YYCURSOR; + goto yy53; +yy53: +#line 127 "scanner.re" +{ s->cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s)); + return STRING; } +#line 382 "scanner.c" +yy54: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy55; +yy55: if(yych <= '!'){ + if(yych == '\n') goto yy45; + goto yy54; + } else { + if(yych <= '"') goto yy57; + if(yych != '\\') goto yy54; + goto yy56; + } +yy56: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy45; + goto yy54; +yy57: yych = *++YYCURSOR; + goto yy58; +yy58: #line 123 "scanner.re" { s->cur = cursor; yylval.regexp = strToRE(Scanner_token(s)); return STRING; } -#line 347 "scanner.c" -yy52: yych = *++YYCURSOR; - +#line 407 "scanner.c" +yy59: yych = *++YYCURSOR; + goto yy60; +yy60: #line 120 "scanner.re" { s->tok = cursor; RETURN(0); } -#line 353 "scanner.c" -yy54: yych = *++YYCURSOR; - +#line 414 "scanner.c" +yy61: yych = *++YYCURSOR; + goto yy62; +yy62: #line 117 "scanner.re" { depth = 1; goto comment; } -#line 359 "scanner.c" -yy56: ++YYCURSOR; +#line 421 "scanner.c" +yy63: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych <= '/'){ - if(yych == ',') goto yy60; - goto yy43; + goto yy64; +yy64: if(yych <= '/'){ + if(yych == ',') goto yy67; + goto yy45; } else { - if(yych <= '9') goto yy56; - if(yych != '}') goto yy43; + if(yych <= '9') goto yy63; + if(yych != '}') goto yy45; + goto yy65; } - yych = *++YYCURSOR; - -#line 138 "scanner.re" +yy65: yych = *++YYCURSOR; + goto yy66; +yy66: +#line 145 "scanner.re" { yylval.extop.minsize = atoi((char *)s->tok+1); yylval.extop.maxsize = atoi((char *)s->tok+1); RETURN(CLOSESIZE); } -#line 376 "scanner.c" -yy60: yych = *++YYCURSOR; - if(yych != '}') goto yy64; - yych = *++YYCURSOR; - -#line 146 "scanner.re" +#line 441 "scanner.c" +yy67: yych = *++YYCURSOR; + if(yych != '}') goto yy71; + goto yy68; +yy68: yych = *++YYCURSOR; + goto yy69; +yy69: +#line 153 "scanner.re" { yylval.extop.minsize = atoi((char *)s->tok+1); yylval.extop.maxsize = -1; RETURN(CLOSESIZE); } -#line 385 "scanner.c" -yy63: ++YYCURSOR; +#line 452 "scanner.c" +yy70: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy64: if(yych <= '/') goto yy43; - if(yych <= '9') goto yy63; - if(yych != '}') goto yy43; - yych = *++YYCURSOR; - -#line 142 "scanner.re" + goto yy71; +yy71: if(yych <= '/') goto yy45; + if(yych <= '9') goto yy70; + if(yych != '}') goto yy45; + goto yy72; +yy72: yych = *++YYCURSOR; + goto yy73; +yy73: +#line 149 "scanner.re" { yylval.extop.minsize = atoi((char *)s->tok+1); yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); RETURN(CLOSESIZE); } -#line 398 "scanner.c" +#line 468 "scanner.c" } -#line 166 "scanner.re" +#line 173 "scanner.re" code: -#line 405 "scanner.c" +#line 475 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; - goto yy67; + goto yy74; ++YYCURSOR; -yy67: +yy74: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= '&'){ if(yych <= '\n'){ - if(yych <= '\t') goto yy75; - goto yy73; + if(yych <= '\t') goto yy82; + goto yy80; } else { - if(yych == '"') goto yy77; - goto yy75; + if(yych == '"') goto yy84; + goto yy82; } } else { if(yych <= '{'){ - if(yych <= '\'') goto yy78; - if(yych <= 'z') goto yy75; - goto yy71; + if(yych <= '\'') goto yy85; + if(yych <= 'z') goto yy82; + goto yy78; } else { - if(yych != '}') goto yy75; + if(yych != '}') goto yy82; + goto yy76; } } - yych = *++YYCURSOR; - -#line 170 "scanner.re" +yy76: yych = *++YYCURSOR; + goto yy77; +yy77: +#line 177 "scanner.re" { if(--depth == 0){ s->cur = cursor; yylval.token = Token_new(Scanner_token(s), s->tline); return CODE; } goto code; } -#line 440 "scanner.c" -yy71: yych = *++YYCURSOR; - -#line 176 "scanner.re" +#line 512 "scanner.c" +yy78: yych = *++YYCURSOR; + goto yy79; +yy79: +#line 183 "scanner.re" { ++depth; goto code; } -#line 446 "scanner.c" -yy73: yych = *++YYCURSOR; - -#line 178 "scanner.re" +#line 519 "scanner.c" +yy80: yych = *++YYCURSOR; + goto yy81; +yy81: +#line 185 "scanner.re" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); s->pos = cursor; s->cline++; goto code; } -#line 454 "scanner.c" -yy75: yych = *++YYCURSOR; -yy76: -#line 182 "scanner.re" +#line 528 "scanner.c" +yy82: yych = *++YYCURSOR; + goto yy83; +yy83: +#line 189 "scanner.re" { goto code; } -#line 459 "scanner.c" -yy77: yyaccept = 0; +#line 534 "scanner.c" +yy84: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '\n') goto yy76; - goto yy84; -yy78: yyaccept = 0; + if(yych == '\n') goto yy83; + goto yy91; +yy85: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '\n') goto yy76; - goto yy80; -yy79: ++YYCURSOR; + if(yych == '\n') goto yy83; + goto yy87; +yy86: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy80: if(yych <= '&'){ - if(yych != '\n') goto yy79; + goto yy87; +yy87: if(yych <= '&'){ + if(yych != '\n') goto yy86; + goto yy88; } else { - if(yych <= '\'') goto yy75; - if(yych == '\\') goto yy82; - goto yy79; + if(yych <= '\'') goto yy82; + if(yych == '\\') goto yy89; + goto yy86; } -yy81: YYCURSOR = YYMARKER; +yy88: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy76; + case 0: goto yy83; } -yy82: ++YYCURSOR; +yy89: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy81; - goto yy79; -yy83: ++YYCURSOR; + if(yych == '\n') goto yy88; + goto yy86; +yy90: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy84: if(yych <= '!'){ - if(yych == '\n') goto yy81; - goto yy83; + goto yy91; +yy91: if(yych <= '!'){ + if(yych == '\n') goto yy88; + goto yy90; } else { - if(yych <= '"') goto yy75; - if(yych != '\\') goto yy83; + if(yych <= '"') goto yy82; + if(yych != '\\') goto yy90; + goto yy92; } - ++YYCURSOR; +yy92: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yych == '\n') goto yy81; - goto yy83; + if(yych == '\n') goto yy88; + goto yy90; } -#line 183 "scanner.re" +#line 190 "scanner.re" comment: -#line 508 "scanner.c" +#line 587 "scanner.c" { YYCTYPE yych; unsigned int yyaccept; - goto yy86; + goto yy93; ++YYCURSOR; -yy86: +yy93: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; if(yych <= ')'){ - if(yych == '\n') goto yy91; - goto yy93; + if(yych == '\n') goto yy98; + goto yy100; } else { - if(yych <= '*') goto yy88; - if(yych == '/') goto yy90; - goto yy93; + if(yych <= '*') goto yy95; + if(yych == '/') goto yy97; + goto yy100; } -yy88: yych = *++YYCURSOR; - if(yych == '/') goto yy96; -yy89: -#line 197 "scanner.re" +yy95: yych = *++YYCURSOR; + if(yych == '/') goto yy103; + goto yy96; +yy96: +#line 204 "scanner.re" { goto comment; } -#line 530 "scanner.c" -yy90: yych = *++YYCURSOR; - if(yych == '*') goto yy94; - goto yy89; -yy91: yych = *++YYCURSOR; - -#line 193 "scanner.re" +#line 610 "scanner.c" +yy97: yych = *++YYCURSOR; + if(yych == '*') goto yy101; + goto yy96; +yy98: yych = *++YYCURSOR; + goto yy99; +yy99: +#line 200 "scanner.re" { if(cursor == s->eof) RETURN(0); s->tok = s->pos = cursor; s->cline++; goto comment; } -#line 541 "scanner.c" -yy93: yych = *++YYCURSOR; - goto yy89; -yy94: yych = *++YYCURSOR; - -#line 191 "scanner.re" +#line 622 "scanner.c" +yy100: yych = *++YYCURSOR; + goto yy96; +yy101: yych = *++YYCURSOR; + goto yy102; +yy102: +#line 198 "scanner.re" { ++depth; goto comment; } -#line 549 "scanner.c" -yy96: yych = *++YYCURSOR; - -#line 187 "scanner.re" +#line 631 "scanner.c" +yy103: yych = *++YYCURSOR; + goto yy104; +yy104: +#line 194 "scanner.re" { if(--depth == 0) goto scan; else goto comment; } -#line 557 "scanner.c" +#line 640 "scanner.c" } -#line 198 "scanner.re" +#line 205 "scanner.re" } diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re index ee504f4c..980c2685 100644 --- a/tools/re2c/scanner.re +++ b/tools/re2c/scanner.re @@ -123,12 +123,19 @@ scan: dstring { s->cur = cursor; yylval.regexp = strToRE(Scanner_token(s)); return STRING; } - "\"" { Scanner_fatal(s, "bad string"); } + + sstring { s->cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s)); + return STRING; } + + "\"" { Scanner_fatal(s, "unterminated string constant (missing \")"); } + "'" { Scanner_fatal(s, "unterminated string constant (missing ')"); } cstring { s->cur = cursor; yylval.regexp = ranToRE(Scanner_token(s)); return RANGE; } - "[" { Scanner_fatal(s, "bad character constant"); } + + "[" { Scanner_fatal(s, "unterminated range (missing ])"); } [()|=;/\\] { RETURN(*s->tok); } -- 2.40.0