From: helly Date: Sat, 16 Apr 2005 14:20:51 +0000 (+0000) Subject: - Apply #1181535 please integrate storable state patch X-Git-Tag: 0.13.6~656 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b164cc322d360af74893187bd1b9121ed2cf6962;p=re2c - Apply #1181535 please integrate storable state patch --- diff --git a/CHANGELOG b/CHANGELOG index b3889bc9..a73827bb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,6 @@ Version 0.9.7 (200?-??-??) -------------------------- +- Applied #1181535 storable state patch. Version 0.9.6 (2005-04-14) -------------------------- diff --git a/code.cc b/code.cc index 0020fed3..6a147aa1 100644 --- a/code.cc +++ b/code.cc @@ -290,6 +290,15 @@ void indent(std::ostream &o, uint i) static void need(std::ostream &o, uint n, bool & readCh) { + uint fillIndex; + bool hasFillIndex = (0<=vFillIndexes); + if ( hasFillIndex == true ) + { + fillIndex = vFillIndexes++; + o << "\tYYSETSTATE(" << fillIndex << ");\n"; + ++oline; + } + if (n == 1) { o << "\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n"; @@ -301,6 +310,12 @@ static void need(std::ostream &o, uint n, bool & readCh) ++oline; } + if ( hasFillIndex == true ) + { + o << "yyFillLabel" << fillIndex << ":\n"; + ++oline; + } + o << "\tyych = *YYCURSOR;\n"; readCh = false; ++oline; @@ -1149,18 +1164,33 @@ void DFA::emit(std::ostream &o) delete head->action; - ++oline; + bool hasFillLabels = (0<=vFillIndexes); + + oline++; o << "\n#line " << ++oline << " \"" << outputFileName << "\"\n"; - o << "{\n\tYYCTYPE yych;\n\tunsigned int yyaccept;\n"; - oline += 3; + + if ( hasFillLabels == false ) + { + o << "{\n\tYYCTYPE yych;\n\tunsigned int yyaccept;\n"; + oline += 3; + } + else + { + o << "{\n\n"; + oline += 2; + } if (bFlag) { BitMap::gen(o, lbChar, ubChar); } - o << "\tgoto yy" << label << ";\n"; - ++oline; + if ( hasFillLabels == false ) + { + o << "\tgoto yy" << label << ";\n"; + ++oline; + } + vUsedLabels.append(label); (void) new Enter(head, label++); @@ -1170,16 +1200,38 @@ void DFA::emit(std::ostream &o) } null_stream noWhere; - unsigned int nOrgOline = oline; + int maxFillIndexes = vFillIndexes; + int orgVFillIndexes = vFillIndexes; for (s = head; s; s = s->next) { bool readCh = false; s->emit(noWhere, readCh); s->go.genGoto(noWhere, s, s->next, readCh); } + maxFillIndexes = vFillIndexes; + vFillIndexes = orgVFillIndexes; oline = nOrgOline; + if (hasFillLabels == true ) + { + o << " switch(YYGETSTATE())\n"; + o << " {\n"; + o << " case -1: goto yy0;\n"; + + for (size_t i=0; inext) { bool readCh = false; diff --git a/examples/push.re b/examples/push.re new file mode 100644 index 00000000..a76b7aec --- /dev/null +++ b/examples/push.re @@ -0,0 +1,340 @@ +/* + * A push-model scanner example for re2c -f + * Written Mon Apr 11 2005 by mgix@mgix.com + * This file is in the public domain. + * + */ + +// ---------------------------------------------------------------------- + +#include +#include +#include +#include +#include + +#if defined(WIN32) + + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +#else + + #include + #include + + #ifndef O_BINARY + #define O_BINARY 0 + #endif + +#endif + +// ---------------------------------------------------------------------- +#define TOKENS \ + \ + TOK(kEOF) \ + TOK(kEOL) \ + TOK(kUnknown) \ + TOK(kIdentifier) \ + TOK(kDecimalConstant) \ + \ + TOK(kEqual) \ + TOK(kLeftParen) \ + TOK(kRightParen) \ + TOK(kMinus) \ + TOK(kPlus) \ + TOK(kStar) \ + TOK(kSlash) \ + \ + TOK(kIf) \ + TOK(kFor) \ + TOK(kElse) \ + TOK(kGoto) \ + TOK(kBreak) \ + TOK(kWhile) \ + TOK(kReturn) \ + + +// ---------------------------------------------------------------------- +static const char *tokenNames[] = +{ + #define TOK(x) #x, + TOKENS + #undef TOK +}; + +// ---------------------------------------------------------------------- +class PushScanner +{ +public: + + enum Token + { + #define TOK(x) x, + TOKENS + #undef TOK + }; + +private: + + bool eof; + int32_t state; + + uint8_t *limit; + uint8_t *start; + uint8_t *cursor; + uint8_t *marker; + + uint8_t *buffer; + uint8_t *bufferEnd; + + uint8_t yych; + uint32_t yyaccept; + +public: + + // ---------------------------------------------------------------------- + PushScanner() + { + limit = 0; + start = 0; + state = -1; + cursor = 0; + marker = 0; + buffer = 0; + eof = false; + bufferEnd = 0; + } + + // ---------------------------------------------------------------------- + ~PushScanner() + { + } + + // ---------------------------------------------------------------------- + void send( + Token token + ) + { + size_t tokenSize = cursor-start; + const char *tokenName = tokenNames[token]; + printf( + "scanner is pushing out a token of type %d (%s)", + token, + tokenName + ); + + if(token==kEOF) putchar('\n'); + else + { + size_t tokenNameSize = strlen(tokenNames[token]); + size_t padSize = 20-(20"); + + fwrite( + start, + tokenSize, + 1, + stdout + ); + + printf("<----\n"); + } + } + + // ---------------------------------------------------------------------- + uint32_t push( + const void *input, + ssize_t inputSize + ) + { + printf( + "scanner is receiving a new data batch of length %d\n" + "scanner continues with saved state = %d\n", + inputSize, + state + ); + + /* + * Data source is signaling end of file when batch size + * is less than maxFill. This is slightly annoying because + * maxFill is a value that can only be known after re2c does + * its thing. Practically though, maxFill is never bigger than + * the longest keyword, so given our grammar, 32 is a safe bet. + */ + uint8_t null[64]; + const ssize_t maxFill = 32; + if(inputSize vUsedLabels; } // end namespace re2c diff --git a/main.cc b/main.cc index 42901c29..f110c44a 100644 --- a/main.cc +++ b/main.cc @@ -22,6 +22,8 @@ bool sFlag = false; bool bFlag = false; unsigned int oline = 1; uint maxFill = 1; + +int vFillIndexes = -1; label_list vUsedLabels; using namespace std; @@ -36,6 +38,7 @@ static const mbo_opt_struct OPTIONS[] = mbo_opt_struct('e', 0, "ecb"), mbo_opt_struct('h', 0, "help"), mbo_opt_struct('s', 0, "nested-ifs"), + mbo_opt_struct('f', 0, "storable-state"), mbo_opt_struct('o', 1, "output"), mbo_opt_struct('v', 0, "version"), mbo_opt_struct('V', 0, "vernum"), @@ -59,6 +62,8 @@ static void usage() "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" " need this assist to generate better code.\n" "\n" + "-f --storable-state Generate a scanner with support for storable state\n" + "\n" "-o --output=output Specify the output file instead of stdout\n" "\n" "-v --version Show version information.\n" @@ -100,6 +105,10 @@ int main(int argc, char *argv[]) sFlag = true; break; + case 'f': + vFillIndexes = 0; + break; + case 'o': outputFileName = opt_arg; break; diff --git a/re2c.1.in b/re2c.1.in index 0a44daf8..642f48bf 100644 --- a/re2c.1.in +++ b/re2c.1.in @@ -7,6 +7,9 @@ .ds rx regular expression .ds lx \fIl\fP-expression \"$Log$ +\"Revision 1.12 2005/04/16 14:20:51 helly +\"- Apply #1181535 please integrate storable state patch +\" \"Revision 1.11 2005/04/10 18:06:18 helly \"- Update \" @@ -50,7 +53,7 @@ re2c \- convert regular expressions to C/C++ .SH SYNOPSIS -\*(re [\fB-esbvh\fP] [\fB-o output\fP] file\fP +\*(re [\fB-efsbvh\fP] [\fB-o output\fP] file\fP .SH DESCRIPTION \*(re is a preprocessor that generates C-based recognizers from regular @@ -138,6 +141,9 @@ to parse the input. That is the maximum value \fCYYFILL()\fP will receive. \fB-e\fP Cross-compile from an ASCII platform to an EBCDIC one. .TP +\fB-f\fP +Generate a scanner with support for storable state. +.TP \fB-s\fP Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this assist to generate better code. @@ -192,6 +198,46 @@ The generated code "calls" \fCYYFILL\fP when the buffer needs be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and \fCYYMARKER\fP as needed. Note that for typical programming languages \fIn\fP will be the length of the longest keyword plus one. +.TP +\fCYYGETSTATE()\fP +The user only needs to define this macro if the \fB-f\fP flag was specified. +In that case, the generated code "calls" \fCYYGETSTATE\fP at the very beginning +of the scanner in order to obtain the saved state. YYGETSTATE must return a signed +integer. The value must be either -1, indicating that the scanner is entered for the +first time, or a value previously saved by \fCYYSETSTATE\fP. In the second case, the +scanner will resume operations right after where the last \fCYYFILL\fP was called. +.TP +\fCYYSETSTATE(\fP\fIn\fP\fC)\fP +The user only needs to define this macro if the \fB-f\fP flag was specified. +In that case, the generated code "calls" \fCYYSETSTATE\fP just before calling +\fCYYFILL\fP. The parameter to \fCYYSETSTATE\fP is a signed integer that uniquely +identifies the specific instance of \fCYYFILL\fP that is about to be called. +Should the user wish to save the state of the scanner and have \fCYYFILL\fP return +to the caller, all he has to do is store that unique identifer in a variable. +Later, when the scannered is called again, it will call \fCYYGETSTATE()\fP and +resume execution right where it left off. + +.SH "SCANNER WITH STORABLE STATES" +When the \fB-f\fP flag is specified, re2c generates a scanner that +can store its current state, return to the caller, and later resume +operations exactly where it left off. + +The default operation of re2c is a "pull" model, where the scanner asks +for extra input whenever it needs it. However, this mode of operation +assumes that the scanner is the "owner" the parsing loop, and that may +not always be convenient. + +Typically, if there is a preprocessor ahead of the scanner in the stream, +or for that matter any other procedural source of data, the scanner cannot +"ask" for more data unless both scanner and source live in a separate threads. + +The \fB-f\fP flag is useful for just this situation : it lets users design +scanners that work in a "push" model, i.e. where data is fed to the scanner +chunk by chunk. When the scanner runs out of data to consume, it just stores +its state, and return to the caller. When more input data is fed to the scanner, +it resumes operations exactly where it left off. + +Please see examples/push.re for push-model scanner. .SH "SCANNER SPECIFICATIONS" Each scanner specification consists of a set of \fIrules\fP and name @@ -606,6 +652,8 @@ Marcus Boerger .P Hartmut Kaiser .P +Emmanuel Mogenet added storable state +.P .PD 1 .SH VERSION INFORMATION diff --git a/test/push.c b/test/push.c new file mode 100755 index 00000000..95004d31 --- /dev/null +++ b/test/push.c @@ -0,0 +1,1145 @@ + +#line 1 "push.re" +/* + * A push-model scanner example for re2c -f + * Written Mon Apr 11 2005 by mgix@mgix.com + * This file is in the public domain. + * + */ + +// ---------------------------------------------------------------------- + +#include +#include +#include +#include +#include + +#if defined(WIN32) + + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +#else + + #include + #include + + #ifndef O_BINARY + #define O_BINARY 0 + #endif + +#endif + +// ---------------------------------------------------------------------- +#define TOKENS \ + \ + TOK(kEOF) \ + TOK(kEOL) \ + TOK(kUnknown) \ + TOK(kIdentifier) \ + TOK(kDecimalConstant) \ + \ + TOK(kEqual) \ + TOK(kLeftParen) \ + TOK(kRightParen) \ + TOK(kMinus) \ + TOK(kPlus) \ + TOK(kStar) \ + TOK(kSlash) \ + \ + TOK(kIf) \ + TOK(kFor) \ + TOK(kElse) \ + TOK(kGoto) \ + TOK(kBreak) \ + TOK(kWhile) \ + TOK(kReturn) \ + + +// ---------------------------------------------------------------------- +static const char *tokenNames[] = +{ + #define TOK(x) #x, + TOKENS + #undef TOK +}; + +// ---------------------------------------------------------------------- +class PushScanner +{ +public: + + enum Token + { + #define TOK(x) x, + TOKENS + #undef TOK + }; + +private: + + bool eof; + int32_t state; + + uint8_t *limit; + uint8_t *start; + uint8_t *cursor; + uint8_t *marker; + + uint8_t *buffer; + uint8_t *bufferEnd; + + uint8_t yych; + uint32_t yyaccept; + +public: + + // ---------------------------------------------------------------------- + PushScanner() + { + limit = 0; + start = 0; + state = -1; + cursor = 0; + marker = 0; + buffer = 0; + eof = false; + bufferEnd = 0; + } + + // ---------------------------------------------------------------------- + ~PushScanner() + { + } + + // ---------------------------------------------------------------------- + void send( + Token token + ) + { + size_t tokenSize = cursor-start; + const char *tokenName = tokenNames[token]; + printf( + "scanner is pushing out a token of type %d (%s)", + token, + tokenName + ); + + if(token==kEOF) putchar('\n'); + else + { + size_t tokenNameSize = strlen(tokenNames[token]); + size_t padSize = 20-(20"); + + fwrite( + start, + tokenSize, + 1, + stdout + ); + + printf("<----\n"); + } + } + + // ---------------------------------------------------------------------- + uint32_t push( + const void *input, + ssize_t inputSize + ) + { + printf( + "scanner is receiving a new data batch of length %d\n" + "scanner continues with saved state = %d\n", + inputSize, + state + ); + + /* + * Data source is signaling end of file when batch size + * is less than maxFill. This is slightly annoying because + * maxFill is a value that can only be known after re2c does + * its thing. Practically though, maxFill is never bigger than + * the longest keyword, so given our grammar, 32 is a safe bet. + */ + uint8_t null[64]; + const ssize_t maxFill = 32; + if(inputSize" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy0; + ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 7) YYFILL(7); + yych = *YYCURSOR; + switch(yych){ + case 0x00: goto yy32; + case 0x09: case 0x0B: + case 0x0C: + case 0x0D: case ' ': goto yy30; + case 0x0A: goto yy28; + case '(': goto yy16; + case ')': goto yy18; + case '*': goto yy24; + case '+': goto yy22; + case '-': goto yy20; + case '/': goto yy26; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy12; + case '=': goto yy14; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': case 'c': + case 'd': case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': case 's': + case 't': + case 'u': + case 'v': case 'x': + case 'y': + case 'z': goto yy10; + case 'b': goto yy7; + case 'e': goto yy5; + case 'f': goto yy4; + case 'g': goto yy6; + case 'h': goto yy11; + case 'i': goto yy2; + case 'r': goto yy9; + case 'w': goto yy8; + default: goto yy34; + } +yy2: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case 'f': goto yy67; + default: goto yy39; + } +yy3: +#line 246 "push.re" +{ SEND(kIdentifier); } +#line 95 "" +yy4: yych = *++YYCURSOR; + switch(yych){ + case 'o': goto yy64; + default: goto yy39; + } +yy5: yych = *++YYCURSOR; + switch(yych){ + case 'l': goto yy60; + default: goto yy39; + } +yy6: yych = *++YYCURSOR; + switch(yych){ + case 'o': goto yy56; + default: goto yy39; + } +yy7: yych = *++YYCURSOR; + switch(yych){ + case 'r': goto yy51; + default: goto yy39; + } +yy8: yych = *++YYCURSOR; + switch(yych){ + case 'h': goto yy46; + default: goto yy39; + } +yy9: yych = *++YYCURSOR; + switch(yych){ + case 'e': goto yy40; + default: goto yy39; + } +yy10: yych = *++YYCURSOR; + goto yy39; +yy11: yych = *++YYCURSOR; + goto yy39; +yy12: ++YYCURSOR; + yych = *YYCURSOR; + goto yy37; +yy13: +#line 247 "push.re" +{ SEND(kDecimalConstant);} +#line 135 "" +yy14: ++YYCURSOR; + goto yy15; +yy15: +#line 249 "push.re" +{ SEND(kEqual); } +#line 141 "" +yy16: ++YYCURSOR; + goto yy17; +yy17: +#line 250 "push.re" +{ SEND(kLeftParen); } +#line 147 "" +yy18: ++YYCURSOR; + goto yy19; +yy19: +#line 251 "push.re" +{ SEND(kRightParen); } +#line 153 "" +yy20: ++YYCURSOR; + goto yy21; +yy21: +#line 252 "push.re" +{ SEND(kMinus); } +#line 159 "" +yy22: ++YYCURSOR; + goto yy23; +yy23: +#line 253 "push.re" +{ SEND(kPlus); } +#line 165 "" +yy24: ++YYCURSOR; + goto yy25; +yy25: +#line 254 "push.re" +{ SEND(kStar); } +#line 171 "" +yy26: ++YYCURSOR; + goto yy27; +yy27: +#line 255 "push.re" +{ SEND(kSlash); } +#line 177 "" +yy28: ++YYCURSOR; + goto yy29; +yy29: +#line 257 "push.re" +{ SKIP(); } +#line 183 "" +yy30: ++YYCURSOR; + goto yy31; +yy31: +#line 258 "push.re" +{ SKIP(); } +#line 189 "" +yy32: ++YYCURSOR; + goto yy33; +yy33: +#line 259 "push.re" +{ send(kEOF); return 1; } +#line 195 "" +yy34: ++YYCURSOR; + goto yy35; +yy35: +#line 260 "push.re" +{ SEND(kUnknown); } +#line 201 "" +yy36: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy37; +yy37: switch(yych){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy36; + default: goto yy13; + } +yy38: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy39; +yy39: switch(yych){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy3; + } +yy40: yych = *++YYCURSOR; + switch(yych){ + case 't': goto yy41; + default: goto yy39; + } +yy41: yych = *++YYCURSOR; + switch(yych){ + case 'u': goto yy42; + default: goto yy39; + } +yy42: yych = *++YYCURSOR; + switch(yych){ + case 'r': goto yy43; + default: goto yy39; + } +yy43: yych = *++YYCURSOR; + switch(yych){ + case 'n': goto yy44; + default: goto yy39; + } +yy44: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy45; + } +yy45: +#line 245 "push.re" +{ SEND(kReturn); } +#line 375 "" +yy46: yych = *++YYCURSOR; + switch(yych){ + case 'i': goto yy47; + default: goto yy39; + } +yy47: yych = *++YYCURSOR; + switch(yych){ + case 'l': goto yy48; + default: goto yy39; + } +yy48: yych = *++YYCURSOR; + switch(yych){ + case 'e': goto yy49; + default: goto yy39; + } +yy49: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy50; + } +yy50: +#line 244 "push.re" +{ SEND(kWhile); } +#line 458 "" +yy51: yych = *++YYCURSOR; + switch(yych){ + case 'e': goto yy52; + default: goto yy39; + } +yy52: yych = *++YYCURSOR; + switch(yych){ + case 'a': goto yy53; + default: goto yy39; + } +yy53: yych = *++YYCURSOR; + switch(yych){ + case 'k': goto yy54; + default: goto yy39; + } +yy54: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy55; + } +yy55: +#line 243 "push.re" +{ SEND(kBreak); } +#line 541 "" +yy56: yych = *++YYCURSOR; + switch(yych){ + case 't': goto yy57; + default: goto yy39; + } +yy57: yych = *++YYCURSOR; + switch(yych){ + case 'o': goto yy58; + default: goto yy39; + } +yy58: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy59; + } +yy59: +#line 242 "push.re" +{ SEND(kGoto); } +#line 619 "" +yy60: yych = *++YYCURSOR; + switch(yych){ + case 's': goto yy61; + default: goto yy39; + } +yy61: yych = *++YYCURSOR; + switch(yych){ + case 'e': goto yy62; + default: goto yy39; + } +yy62: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy63; + } +yy63: +#line 241 "push.re" +{ SEND(kElse); } +#line 697 "" +yy64: yych = *++YYCURSOR; + switch(yych){ + case 'r': goto yy65; + default: goto yy39; + } +yy65: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy66; + } +yy66: +#line 240 "push.re" +{ SEND(kFor); } +#line 770 "" +yy67: ++YYCURSOR; + switch((yych = *YYCURSOR)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': case '_': case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy38; + default: goto yy68; + } +yy68: +#line 239 "push.re" +{ SEND(kIf); } +#line 838 "" +} +#line 261 "push.re" + + + fill: + ssize_t unfinishedSize = cursor-start; + printf( + "scanner needs a refill. Exiting for now with:\n" + " saved fill state = %d\n" + " unfinished token size = %d\n", + state, + unfinishedSize + ); + + if(0 +#include +#include +#include +#include + +#if defined(WIN32) + + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +#else + + #include + #include + + #ifndef O_BINARY + #define O_BINARY 0 + #endif + +#endif + +// ---------------------------------------------------------------------- +#define TOKENS \ + \ + TOK(kEOF) \ + TOK(kEOL) \ + TOK(kUnknown) \ + TOK(kIdentifier) \ + TOK(kDecimalConstant) \ + \ + TOK(kEqual) \ + TOK(kLeftParen) \ + TOK(kRightParen) \ + TOK(kMinus) \ + TOK(kPlus) \ + TOK(kStar) \ + TOK(kSlash) \ + \ + TOK(kIf) \ + TOK(kFor) \ + TOK(kElse) \ + TOK(kGoto) \ + TOK(kBreak) \ + TOK(kWhile) \ + TOK(kReturn) \ + + +// ---------------------------------------------------------------------- +static const char *tokenNames[] = +{ + #define TOK(x) #x, + TOKENS + #undef TOK +}; + +// ---------------------------------------------------------------------- +class PushScanner +{ +public: + + enum Token + { + #define TOK(x) x, + TOKENS + #undef TOK + }; + +private: + + bool eof; + int32_t state; + + uint8_t *limit; + uint8_t *start; + uint8_t *cursor; + uint8_t *marker; + + uint8_t *buffer; + uint8_t *bufferEnd; + + uint8_t yych; + uint32_t yyaccept; + +public: + + // ---------------------------------------------------------------------- + PushScanner() + { + limit = 0; + start = 0; + state = -1; + cursor = 0; + marker = 0; + buffer = 0; + eof = false; + bufferEnd = 0; + } + + // ---------------------------------------------------------------------- + ~PushScanner() + { + } + + // ---------------------------------------------------------------------- + void send( + Token token + ) + { + size_t tokenSize = cursor-start; + const char *tokenName = tokenNames[token]; + printf( + "scanner is pushing out a token of type %d (%s)", + token, + tokenName + ); + + if(token==kEOF) putchar('\n'); + else + { + size_t tokenNameSize = strlen(tokenNames[token]); + size_t padSize = 20-(20"); + + fwrite( + start, + tokenSize, + 1, + stdout + ); + + printf("<----\n"); + } + } + + // ---------------------------------------------------------------------- + uint32_t push( + const void *input, + ssize_t inputSize + ) + { + printf( + "scanner is receiving a new data batch of length %d\n" + "scanner continues with saved state = %d\n", + inputSize, + state + ); + + /* + * Data source is signaling end of file when batch size + * is less than maxFill. This is slightly annoying because + * maxFill is a value that can only be known after re2c does + * its thing. Practically though, maxFill is never bigger than + * the longest keyword, so given our grammar, 32 is a safe bet. + */ + uint8_t null[64]; + const ssize_t maxFill = 32; + if(inputSize