From d10fb601f3ba257bae0845945daf1dfa754d6146 Mon Sep 17 00:00:00 2001 From: helly Date: Fri, 30 Dec 2005 16:24:07 +0000 Subject: [PATCH] - Allow to use -w with -b # Still quite some work to do but it is possible --- code.cc | 82 +++++++------ dfa.h | 8 +- main.cc | 4 +- re2c.1.in | 6 +- test/cvsignore.wb.c | 277 +++++++++++++++++++++++++++++++++++++++++++ test/cvsignore.wb.re | 63 ++++++++++ 6 files changed, 398 insertions(+), 42 deletions(-) create mode 100755 test/cvsignore.wb.c create mode 100755 test/cvsignore.wb.re diff --git a/code.cc b/code.cc index b8ca873d..83d8c00e 100644 --- a/code.cc +++ b/code.cc @@ -279,7 +279,7 @@ void indent(std::ostream &o, uint i) } } -static void need(std::ostream &o, uint n, bool & readCh) +static void need(std::ostream &o, uint n, bool & readCh, uint mask) { uint fillIndex; bool hasFillIndex = (0<=vFillIndexes); @@ -335,7 +335,7 @@ void Match::emit(std::ostream &o, bool &readCh) if (state->link) { - need(o, state->depth, readCh); + need(o, state->depth, readCh, 0); } } @@ -346,7 +346,7 @@ void Enter::emit(std::ostream &o, bool &readCh) o << "\t++YYCURSOR;\n"; o << "yy" << label << ":\n"; oline += 2; - need(o, state->depth, readCh); + need(o, state->depth, readCh, 0); } else { @@ -370,7 +370,7 @@ void Save::emit(std::ostream &o, bool &readCh) { o << "\tYYMARKER = ++YYCURSOR;\n"; ++oline; - need(o, state->depth, readCh); + need(o, state->depth, readCh, 0); } else { @@ -452,7 +452,7 @@ void Rule::emit(std::ostream &o, bool &readCh) } } -void doLinear(std::ostream &o, uint i, Span *s, uint n, const State *from, const State *next, bool &readCh) +void doLinear(std::ostream &o, uint i, Span *s, uint n, const State *from, const State *next, bool &readCh, uint mask) { for (;;) { @@ -462,18 +462,24 @@ void doLinear(std::ostream &o, uint i, Span *s, uint n, const State *from, const { if (s[1].to == next && n == 3) { - indent(o, i); - genIf(o, "!=", s[0].ub, readCh); - genGoTo(o, from, bg, readCh); + if (!mask || (s[0].ub > 0x00FF)) + { + indent(o, i); + genIf(o, "!=", s[0].ub, readCh); + genGoTo(o, from, bg, readCh); + } indent(o, i); genGoTo(o, from, next, readCh); return ; } else { - indent(o, i); - genIf(o, "==", s[0].ub, readCh); - genGoTo(o, from, s[1].to, readCh); + if (!mask || (s[0].ub > 0x00FF)) + { + indent(o, i); + genIf(o, "==", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); + } } n -= 2; @@ -490,18 +496,24 @@ void doLinear(std::ostream &o, uint i, Span *s, uint n, const State *from, const } else if (n == 2 && bg == next) { - indent(o, i); - genIf(o, ">=", s[0].ub, readCh); - genGoTo(o, from, s[1].to, readCh); + if (!mask || (s[0].ub > 0x00FF)) + { + indent(o, i); + genIf(o, ">=", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); + } indent(o, i); genGoTo(o, from, next, readCh); return ; } else { - indent(o, i); - genIf(o, "<=", s[0].ub - 1, readCh); - genGoTo(o, from, bg, readCh); + if (!mask || ((s[0].ub - 1) > 0x00FF)) + { + indent(o, i); + genIf(o, "<=", s[0].ub - 1, readCh); + genGoTo(o, from, bg, readCh); + } n -= 1; s += 1; } @@ -511,9 +523,9 @@ void doLinear(std::ostream &o, uint i, Span *s, uint n, const State *from, const genGoTo(o, from, next, readCh); } -void Go::genLinear(std::ostream &o, const State *from, const State *next, bool &readCh) const +void Go::genLinear(std::ostream &o, const State *from, const State *next, bool &readCh, uint mask) const { - doLinear(o, 0, span, nSpans, from, next, readCh); + doLinear(o, 0, span, nSpans, from, next, readCh, mask); } void genCases(std::ostream &o, uint lb, Span *s) @@ -537,11 +549,11 @@ void genCases(std::ostream &o, uint lb, Span *s) } } -void Go::genSwitch(std::ostream &o, const State *from, const State *next, bool &readCh) const +void Go::genSwitch(std::ostream &o, const State *from, const State *next, bool &readCh, uint mask) const { if (nSpans <= 2) { - genLinear(o, from, next, readCh); + genLinear(o, from, next, readCh, mask); } else { @@ -611,11 +623,11 @@ void Go::genSwitch(std::ostream &o, const State *from, const State *next, bool & } } -void doBinary(std::ostream &o, uint i, Span *s, uint n, const State *from, const State *next, bool &readCh) +void doBinary(std::ostream &o, uint i, Span *s, uint n, const State *from, const State *next, bool &readCh, uint mask) { if (n <= 4) { - doLinear(o, i, s, n, from, next, readCh); + doLinear(o, i, s, n, from, next, readCh, mask); } else { @@ -624,23 +636,23 @@ void doBinary(std::ostream &o, uint i, Span *s, uint n, const State *from, const genIf(o, "<=", s[h - 1].ub - 1, readCh); o << "{\n"; ++oline; - doBinary(o, i + 1, &s[0], h, from, next, readCh); + doBinary(o, i + 1, &s[0], h, from, next, readCh, mask); indent(o, i); o << "\t} else {\n"; ++oline; - doBinary(o, i + 1, &s[h], n - h, from, next, readCh); + doBinary(o, i + 1, &s[h], n - h, from, next, readCh, mask); indent(o, i); o << "\t}\n"; ++oline; } } -void Go::genBinary(std::ostream &o, const State *from, const State *next, bool &readCh) const +void Go::genBinary(std::ostream &o, const State *from, const State *next, bool &readCh, uint mask) const { - doBinary(o, 0, span, nSpans, from, next, readCh); + doBinary(o, 0, span, nSpans, from, next, readCh, mask); } -void Go::genBase(std::ostream &o, const State *from, const State *next, bool &readCh) const +void Go::genBase(std::ostream &o, const State *from, const State *next, bool &readCh, uint mask) const { if (nSpans == 0) { @@ -649,7 +661,7 @@ void Go::genBase(std::ostream &o, const State *from, const State *next, bool &re if (!sFlag) { - genSwitch(o, from, next, readCh); + genSwitch(o, from, next, readCh, mask); return ; } @@ -676,18 +688,18 @@ void Go::genBase(std::ostream &o, const State *from, const State *next, bool &re if (util <= 2) { - genSwitch(o, from, next, readCh); + genSwitch(o, from, next, readCh, mask); return ; } } if (nSpans > 5) { - genBinary(o, from, next, readCh); + genBinary(o, from, next, readCh, mask); } else { - genLinear(o, from, next, readCh); + genLinear(o, from, next, readCh, mask); } } @@ -721,7 +733,7 @@ void Go::genGoto(std::ostream &o, const State *from, const State *next, bool &re o << "\tif (yyh & 0xFF00) {\n"; oline++; /* here we need to reduce to those having high byte set */ - genBase(o, from, next, readCh); + genBase(o, from, next, readCh, 1); o << "\t} else "; } else if (readCh) @@ -740,7 +752,7 @@ void Go::genGoto(std::ostream &o, const State *from, const State *next, bool &re genGoTo(o, from, to, readCh, "\t\t"); o << "\t}\n"; oline++; - go.genBase(o, from, next, readCh); + go.genBase(o, from, next, readCh, 0); delete [] go.span; return ; } @@ -748,7 +760,7 @@ void Go::genGoto(std::ostream &o, const State *from, const State *next, bool &re } } - genBase(o, from, next, readCh); + genBase(o, from, next, readCh, 0); } void State::emit(std::ostream &o, bool &readCh) diff --git a/dfa.h b/dfa.h index db11d252..821bdab9 100644 --- a/dfa.h +++ b/dfa.h @@ -115,10 +115,10 @@ public: public: void genGoto( std::ostream&, const State *from, const State *next, bool &readCh); - void genBase( std::ostream&, const State *from, const State *next, bool &readCh) const; - void genLinear(std::ostream&, const State *from, const State *next, bool &readCh) const; - void genBinary(std::ostream&, const State *from, const State *next, bool &readCh) const; - void genSwitch(std::ostream&, const State *from, const State *next, bool &readCh) const; + void genBase( std::ostream&, const State *from, const State *next, bool &readCh, uint mask) const; + void genLinear(std::ostream&, const State *from, const State *next, bool &readCh, uint mask) const; + void genBinary(std::ostream&, const State *from, const State *next, bool &readCh, uint mask) const; + void genSwitch(std::ostream&, const State *from, const State *next, bool &readCh, uint mask) const; void compact(); void unmap(Go*, const State*); }; diff --git a/main.cc b/main.cc index 003ee03c..f459f375 100644 --- a/main.cc +++ b/main.cc @@ -79,7 +79,7 @@ static void usage() "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n" "\n" "-o --output=output Specify the output file instead of stdout\n" - " This cannot be used together with switches -b or -e.\n" + " This cannot be used together with -e switch.\n" "\n" "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" " need this assist to generate better code.\n" @@ -171,7 +171,7 @@ int main(int argc, char *argv[]) } } - if (wFlag && (bFlag || eFlag)) + if (wFlag && eFlag) { usage(); return 2; diff --git a/re2c.1.in b/re2c.1.in index c0e5e747..13211109 100644 --- a/re2c.1.in +++ b/re2c.1.in @@ -7,6 +7,10 @@ .ds rx regular expression .ds lx \fIl\fP-expression \"$Log$ +\"Revision 1.30 2005/12/30 16:24:07 helly +\"- Allow to use -w with -b +\"# Still quite some work to do but it is possible +\" \"Revision 1.29 2005/12/29 16:46:49 helly \"- Update docu \" @@ -237,7 +241,7 @@ Show the version as a number XXYYZZ. .TP \fB-w\fP Create a parser that supports wide chars (UCS-2). This implies \fB-s\fP and -cannot be used together with switches \fB-b\fP or \fB-e\fP. +cannot be used together with \fB-e\fP switch. .SH "INTERFACE CODE" Unlike other scanner generators, \*(re does not generate complete scanners: the user must supply some interface code. diff --git a/test/cvsignore.wb.c b/test/cvsignore.wb.c new file mode 100755 index 00000000..9e61d803 --- /dev/null +++ b/test/cvsignore.wb.c @@ -0,0 +1,277 @@ +/* Generated by re2c */ +#line 1 "cvsignore.wb.re" + +#define YYFILL(n) if (cursor >= limit) break; +#define YYCTYPE unsigned short +#define YYCURSOR cursor +#define YYLIMIT limit +#define YYMARKER marker + +#line 16 "cvsignore.wb.re" + + +#define APPEND(text) \ + append(output, outsize, text, sizeof(text) - sizeof(YYCTYPE)) + +inline void append(YYCTYPE *output, size_t & outsize, const YYCTYPE * text, size_t len) +{ + memcpy(output + outsize, text, len); + outsize += (len / sizeof(YYCTYPE)); +} + +void scan(YYCTYPE *pText, size_t *pSize, int *pbChanged) +{ + // rule + // scan lines + // find $ in lines + // compact $: .. $ to $$ + + YYCTYPE *output; + const YYCTYPE *cursor, *limit, *marker; + + cursor = marker = output = *pText; + + size_t insize = *pSize; + size_t outsize = 0; + + limit = cursor + insize; + + while(1) { +loop: +{ + static unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + +#line 78 "" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + goto yy0; + ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 11) YYFILL(11); + yych = *YYCURSOR; + if(yych != '$') goto yy4; + goto yy2; +yy2: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch(yych){ + case 'D': goto yy10; + case 'I': goto yy9; + case 'L': goto yy8; + case 'R': goto yy7; + case 'S': goto yy5; + default: goto yy3; + } +yy3: +#line 53 "cvsignore.wb.re" +{ output[outsize++] = cursor[-1]; if (cursor >= limit) break; goto loop; } +#line 102 "" +yy4: yych = *++YYCURSOR; + goto yy3; +yy5: yych = *++YYCURSOR; + if(yych == 'o') goto yy44; + goto yy6; +yy6: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy3; + } +yy7: yych = *++YYCURSOR; + if(yych == 'e') goto yy32; + goto yy6; +yy8: yych = *++YYCURSOR; + if(yych == 'o') goto yy25; + goto yy6; +yy9: yych = *++YYCURSOR; + if(yych == 'd') goto yy19; + goto yy6; +yy10: yych = *++YYCURSOR; + if(yych != 'a') goto yy6; + goto yy11; +yy11: yych = *++YYCURSOR; + if(yych != 't') goto yy6; + goto yy12; +yy12: yych = *++YYCURSOR; + if(yych != 'e') goto yy6; + goto yy13; +yy13: yych = *++YYCURSOR; + if(yych == '$') goto yy15; + if(yych != ':') goto yy6; + goto yy14; +yy14: yych = *++YYCURSOR; + if(yych == '$') goto yy6; + goto yy18; +yy15: ++YYCURSOR; + goto yy16; +yy16: +#line 48 "cvsignore.wb.re" +{ APPEND(L"$" L"Date$"); goto loop; } +#line 142 "" +yy17: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy18; +yy18: if (yyh & 0xFF00) { + goto yy17; + } else if(yybm[0+yych] & 128) { + goto yy17; + } + if(yych <= '#') goto yy6; + goto yy15; +yy19: yych = *++YYCURSOR; + if(yych == '$') goto yy21; + if(yych != ':') goto yy6; + goto yy20; +yy20: yych = *++YYCURSOR; + if(yych == '$') goto yy6; + goto yy24; +yy21: ++YYCURSOR; + goto yy22; +yy22: +#line 49 "cvsignore.wb.re" +{ APPEND(L"$" L"Id$"); goto loop; } +#line 166 "" +yy23: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy24; +yy24: if(yych == 0x000A) goto yy6; + if(yych == '$') goto yy21; + goto yy23; +yy25: yych = *++YYCURSOR; + if(yych != 'g') goto yy6; + goto yy26; +yy26: yych = *++YYCURSOR; + if(yych == '$') goto yy28; + if(yych != ':') goto yy6; + goto yy27; +yy27: yych = *++YYCURSOR; + if(yych == '$') goto yy6; + goto yy31; +yy28: ++YYCURSOR; + goto yy29; +yy29: +#line 50 "cvsignore.wb.re" +{ APPEND(L"$" L"Log$"); goto loop; } +#line 189 "" +yy30: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy31; +yy31: if(yych == 0x000A) goto yy6; + if(yych == '$') goto yy28; + goto yy30; +yy32: yych = *++YYCURSOR; + if(yych != 'v') goto yy6; + goto yy33; +yy33: yych = *++YYCURSOR; + if(yych != 'i') goto yy6; + goto yy34; +yy34: yych = *++YYCURSOR; + if(yych != 's') goto yy6; + goto yy35; +yy35: yych = *++YYCURSOR; + if(yych != 'i') goto yy6; + goto yy36; +yy36: yych = *++YYCURSOR; + if(yych != 'o') goto yy6; + goto yy37; +yy37: yych = *++YYCURSOR; + if(yych != 'n') goto yy6; + goto yy38; +yy38: yych = *++YYCURSOR; + if(yych == '$') goto yy40; + if(yych != ':') goto yy6; + goto yy39; +yy39: yych = *++YYCURSOR; + if(yych == '$') goto yy6; + goto yy43; +yy40: ++YYCURSOR; + goto yy41; +yy41: +#line 51 "cvsignore.wb.re" +{ APPEND(L"$" L"Revision$"); goto loop; } +#line 227 "" +yy42: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy43; +yy43: if(yych == 0x000A) goto yy6; + if(yych == '$') goto yy40; + goto yy42; +yy44: yych = *++YYCURSOR; + if(yych != 'u') goto yy6; + goto yy45; +yy45: yych = *++YYCURSOR; + if(yych != 'r') goto yy6; + goto yy46; +yy46: yych = *++YYCURSOR; + if(yych != 'c') goto yy6; + goto yy47; +yy47: yych = *++YYCURSOR; + if(yych != 'e') goto yy6; + goto yy48; +yy48: yych = *++YYCURSOR; + if(yych == '$') goto yy50; + if(yych != ':') goto yy6; + goto yy49; +yy49: yych = *++YYCURSOR; + if(yych == '$') goto yy6; + goto yy53; +yy50: ++YYCURSOR; + goto yy51; +yy51: +#line 52 "cvsignore.wb.re" +{ APPEND(L"$" L"Source$"); goto loop; } +#line 259 "" +yy52: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy53; +yy53: if(yych == 0x000A) goto yy6; + if(yych == '$') goto yy50; + goto yy52; +} +} +#line 55 "cvsignore.wb.re" + + } + output[outsize] = '\0'; + + // set the new size + *pSize = outsize; + + *pbChanged = (insize == outsize) ? 0 : 1; +} diff --git a/test/cvsignore.wb.re b/test/cvsignore.wb.re new file mode 100755 index 00000000..442a45ed --- /dev/null +++ b/test/cvsignore.wb.re @@ -0,0 +1,63 @@ + +#define YYFILL(n) if (cursor >= limit) break; +#define YYCTYPE unsigned short +#define YYCURSOR cursor +#define YYLIMIT limit +#define YYMARKER marker + +/*!re2c +any = (.|"\n"); +value = (":" (.\"$")+)?; +cvsdat = "Date"; +cvsid = "Id"; +cvslog = "Log"; +cvsrev = "Revision"; +cvssrc = "Source"; +*/ + +#define APPEND(text) \ + append(output, outsize, text, sizeof(text) - sizeof(YYCTYPE)) + +inline void append(YYCTYPE *output, size_t & outsize, const YYCTYPE * text, size_t len) +{ + memcpy(output + outsize, text, len); + outsize += (len / sizeof(YYCTYPE)); +} + +void scan(YYCTYPE *pText, size_t *pSize, int *pbChanged) +{ + // rule + // scan lines + // find $ in lines + // compact $: .. $ to $$ + + YYCTYPE *output; + const YYCTYPE *cursor, *limit, *marker; + + cursor = marker = output = *pText; + + size_t insize = *pSize; + size_t outsize = 0; + + limit = cursor + insize; + + while(1) { +loop: +/*!re2c + +"$" cvsdat value "$" { APPEND(L"$" L"Date$"); goto loop; } +"$" cvsid value "$" { APPEND(L"$" L"Id$"); goto loop; } +"$" cvslog value "$" { APPEND(L"$" L"Log$"); goto loop; } +"$" cvsrev value "$" { APPEND(L"$" L"Revision$"); goto loop; } +"$" cvssrc value "$" { APPEND(L"$" L"Source$"); goto loop; } +any { output[outsize++] = cursor[-1]; if (cursor >= limit) break; goto loop; } + +*/ + } + output[outsize] = '\0'; + + // set the new size + *pSize = outsize; + + *pbChanged = (insize == outsize) ? 0 : 1; +} -- 2.40.0