--- /dev/null
+re2c
+----
+
+Version 0.9.1
+-------------
+
+- removed rcs comments in source files
+
+Version 0.9
+-----------
+
+- redistribution based on version 0.5
+- added parentheses to assignment expressions in 'if' statements
+- rearranged class members to match initialization order
+- substr fix
+- use array delete [] when necessary
+- other minor fixes for subduing compiler warnings
+
--- /dev/null
+# $Log$
+# Revision 1.1 2003/12/13 04:58:19 nuffer
+# Initial revision
+#
+#Revision 1.1 1994/04/08 16:30:37 peter
+#Initial revision
+#
+
+BIN = /usr/local/bin
+MAN = /usr/local/man
+
+%.o : %.cc ; $(CC) -o $@ $(CFLAGS) -c $<
+%.cc : %.y ; $(YACC) $(YFLAGS) $<; mv $(YTAB).c $@
+%.cc : %.l ; $(LEX) $(LFLAGS) $<; mv $(LEXYY).c $@
+
+%.cc: %.re
+ re2c -s $< >$@
+
+SOURCES = code.cc dfa.cc main.cc parser.y actions.cc scanner.re substr.cc\
+ translate.cc
+OBJS = code.o dfa.o main.o parser.o actions.o scanner.o substr.o\
+ translate.o
+
+CC = g++
+CFLAGS = -O2 -Wall -I. -Wno-unused -Wno-parentheses
+YFLAGS = -d
+LDFLAGS =
+
+default: re2c
+
+clean:
+ rm -f *.o *.s y.tab.c y.tab.h scanner.cc parser.cc .version version.h re2c
+
+parser.cc: parser.y
+ yacc -d parser.y
+ mv -f y.tab.c parser.cc
+
+re2c: $(OBJS)
+ $(CC) -o $@ $(OBJS) $(LDFLAGS) -lstdc++
+
+.version: README
+ egrep "^Version" README | sed 's/Version //' > .version
+
+version.h: .version
+ echo "#define RE2C_VERSION" `cat .version` > version.h
+
+install: re2c
+ install -d $(BIN)
+ install -s re2c $(BIN)
+ install -d $(MAN)/man1
+ install -m 0644 re2c.1 $(MAN)/man1
+
+dist: re2c scanner.cc .version
+ mkdir re2c-`cat .version`
+ cp -P `p4 files ... | sed s/\\\\/\\\\/depot\\\\/home\\\\/re2c\\\\/// | sed '/- delete/d' | sed s/#.*$$//` re2c-`cat .version`/
+ tar zcf re2c-`cat .version`.tar.gz re2c-`cat .version`/
+ rm -rf re2c-`cat .version`
+
+#
+# generated with "gcc -I. -MM -x c++ *.cc *.y *.re"
+# and edited by hand
+#
+actions.o : actions.cc globals.h basics.h parser.h scanner.h \
+ token.h substr.h re.h ins.h dfa.h
+code.o : code.cc substr.h basics.h globals.h dfa.h re.h token.h \
+ ins.h
+dfa.o : dfa.cc globals.h basics.h substr.h dfa.h re.h token.h \
+ ins.h
+main.o : main.cc globals.h basics.h parser.h scanner.h token.h \
+ substr.h re.h ins.h dfa.h version.h
+substr.o : substr.cc substr.h basics.h
+translate.o : translate.cc globals.h basics.h
+scanner.o : scanner.re scanner.h token.h substr.h basics.h \
+ parser.h re.h ins.h ./parser.o
+parser.o : parser.y globals.h basics.h parser.h scanner.h token.h \
+ substr.h re.h ins.h
--- /dev/null
+re2c is distributed with no warranty whatever. The author and any other
+contributors take no responsibility for the consequences of its use.
--- /dev/null
+re2c
+----
+
+Version 0.9.1
+Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
+Currently maintained by Brian Young (bayoung@acm.org)
+
+The re2c distribution can be found at:
+
+ http://www.tildeslash.org/re2c/index.html
+
+The source distribution is available from:
+
+ http://www.tildeslash.org/re2c/re2c-0.9.1.tar.gz
+
+This distribution is a cleaned up version of the 0.5 release
+maintained by me (Brian Young). Several bugs were fixed as well
+as code cleanup for warning free compilation. It has been developed
+and tested with egcs 1.0.2 and gcc 2.7.2.3 on Linux x86. Peter
+Bumbulis' original release can be found at:
+
+ ftp://csg.uwaterloo.ca/pub/peter/re2c.0.5.tar.gz
+
+re2c is a great tool for writing fast and flexible lexers. It has
+served many people well for many years and it deserves to be
+maintained more actively. re2c is on the order of 2-3 times faster
+than a flex based scanner, and its input model is much more
+flexible.
+
+Patches and requests for features will be entertained. Areas of
+particular interest to me are porting (a Solaris and an NT
+version will be forthcoming) and wide character support. Note
+that the code is already quite portable and should be buildable
+on any platform with minor makefile changes.
+
+Peter's original version 0.5 ANNOUNCE and README follows.
+
+Brian
+
+--
+
+re2c is a tool for generating C-based recognizers from regular
+expressions. re2c-based scanners are efficient: for programming
+languages, given similar specifications, an re2c-based scanner is
+typically almost twice as fast as a flex-based scanner with little or no
+increase in size (possibly a decrease on cisc architectures). Indeed,
+re2c-based scanners are quite competitive with hand-crafted ones.
+
+Unlike flex, re2c does not generate complete scanners: the user must
+supply some interface code. While this code is not bulky (about 50-100
+lines for a flex-like scanner; see the man page and examples in the
+distribution) careful coding is required for efficiency (and
+correctness). One advantage of this arrangement is that the generated
+code is not tied to any particular input model. For example, re2c
+generated code can be used to scan data from a null-byte terminated
+buffer as illustrated below.
+
+Given the following source
+
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ /*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+ */
+ }
+
+re2c will generate
+
+ /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+ #line 1 "simple.re"
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ {
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ yy1: ++YYCURSOR;
+ yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+ yy2: yych = *++YYCURSOR;
+ goto yy7;
+ yy3:
+ #line 10
+ {return YYCURSOR;}
+ yy4: yych = *++YYCURSOR;
+ yy5:
+ #line 11
+ {return NULL;}
+ yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+ }
+ #line 12
+
+ }
+
+Note that most compilers will perform dead-code elimination to remove
+all YYCURSOR, YYLIMIT comparisions.
+
+re2c was developed for a particular project (constructing a fast REXX
+scanner of all things!) and so while it has some rough edges, it should
+be quite usable. More information about re2c can be found in the
+(admittedly skimpy) man page; the algorithms and heuristics used are
+described in an upcoming LOPLAS article (included in the distribution).
+Probably the best way to find out more about re2c is to try the supplied
+examples. re2c is written in C++, and is currently being developed
+under Linux using gcc 2.5.8.
+
+Peter
+
+--
+
+re2c is distributed with no warranty whatever. The code is certain to
+contain errors. Neither the author nor any contributor takes
+responsibility for any consequences of its use.
+
+re2c is in the public domain. The data structures and algorithms used
+in re2c are all either taken from documents available to the general
+public or are inventions of the author. Programs generated by re2c may
+be distributed freely. re2c itself may be distributed freely, in source
+or binary, unchanged or modified. Distributors may charge whatever fees
+they can obtain for re2c.
+
+If you do make use of re2c, or incorporate it into a larger project an
+acknowledgement somewhere (documentation, research report, etc.) would
+be appreciated.
+
+Please send bug reports and feedback (including suggestions for
+improving the distribution) to
+
+ peter@csg.uwaterloo.ca
+
+Include a small example and the banner from parser.y with bug reports.
+
--- /dev/null
+#include <time.h>
+#include <string.h>
+#include <iostream.h>
+#include <iomanip.h>
+
+#include "globals.h"
+#include "parser.h"
+#include "dfa.h"
+
+Symbol *Symbol::first = NULL;
+
+Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) {
+ first = this;
+}
+
+Symbol *Symbol::find(const SubStr &str){
+ for(Symbol *sym = first; sym; sym = sym->next)
+ if(sym->name == str) return sym;
+ return new Symbol(str);
+}
+
+void showIns(ostream &o, const Ins &i, const Ins &base){
+ o.width(3);
+ o << &i - &base << ": ";
+ switch(i.i.tag){
+ case CHAR: {
+ o << "match ";
+ for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j)
+ prtCh(o, j->c.value);
+ break;
+ } case GOTO:
+ o << "goto " << ((Ins*) i.i.link - &base);
+ break;
+ case FORK:
+ o << "fork " << ((Ins*) i.i.link - &base);
+ break;
+ case CTXT:
+ o << "term " << ((RuleOp*) i.i.link)->accept;
+ break;
+ case TERM:
+ o << "term " << ((RuleOp*) i.i.link)->accept;
+ break;
+ }
+ o << "\n";
+}
+
+uint RegExp::fixedLength(){
+ return ~0;
+}
+
+char *NullOp::type = "NullOp";
+
+void NullOp::calcSize(Char*){
+ size = 0;
+}
+
+uint NullOp::fixedLength(){
+ return 0;
+}
+
+void NullOp::compile(Char*, Ins*){
+ ;
+}
+
+void NullOp::split(CharSet&){
+ ;
+}
+
+ostream& operator<<(ostream &o, const Range &r){
+ if((r.ub - r.lb) == 1){
+ prtCh(o, r.lb);
+ } else {
+ prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1);
+ }
+ return o << r.next;
+}
+
+Range *doUnion(Range *r1, Range *r2){
+ Range *r, **rP = &r;
+ for(;;){
+ Range *s;
+ if(r1->lb <= r2->lb){
+ s = new Range(*r1);
+ } else {
+ s = new Range(*r2);
+ }
+ *rP = s;
+ rP = &s->next;
+ for(;;){
+ if(r1->lb <= r2->lb){
+ if(r1->lb > s->ub)
+ break;
+ if(r1->ub > s->ub)
+ s->ub = r1->ub;
+ if(!(r1 = r1->next)){
+ uint ub = 0;
+ for(; r2 && r2->lb <= s->ub; r2 = r2->next)
+ ub = r2->ub;
+ if(ub > s->ub)
+ s->ub = ub;
+ *rP = r2;
+ return r;
+ }
+ } else {
+ if(r2->lb > s->ub)
+ break;
+ if(r2->ub > s->ub)
+ s->ub = r2->ub;
+ if(!(r2 = r2->next)){
+ uint ub = 0;
+ for(; r1 && r1->lb <= s->ub; r1 = r1->next)
+ ub = r1->ub;
+ if(ub > s->ub)
+ s->ub = ub;
+ *rP = r1;
+ return r;
+ }
+ }
+ }
+ }
+ *rP = NULL;
+ return r;
+}
+
+Range *doDiff(Range *r1, Range *r2){
+ Range *r, *s, **rP = &r;
+ for(; r1; r1 = r1->next){
+ uint lb = r1->lb;
+ for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
+ for(; r2 && r2->lb < r1->ub; r2 = r2->next){
+ if(lb < r2->lb){
+ *rP = s = new Range(lb, r2->lb);
+ rP = &s->next;
+ }
+ if((lb = r2->ub) >= r1->ub)
+ goto noMore;
+ }
+ *rP = s = new Range(lb, r1->ub);
+ rP = &s->next;
+ noMore:;
+ }
+ *rP = NULL;
+ return r;
+}
+
+MatchOp *merge(MatchOp *m1, MatchOp *m2){
+ if(!m1)
+ return m2;
+ if(!m2)
+ return m1;
+ return new MatchOp(doUnion(m1->match, m2->match));
+}
+
+char *MatchOp::type = "MatchOp";
+
+void MatchOp::display(ostream &o) const{
+ o << match;
+}
+
+void MatchOp::calcSize(Char *rep){
+ size = 1;
+ for(Range *r = match; r; r = r->next)
+ for(uint c = r->lb; c < r->ub; ++c)
+ if(rep[c] == c)
+ ++size;
+}
+
+uint MatchOp::fixedLength(){
+ return 1;
+}
+
+void MatchOp::compile(Char *rep, Ins *i){
+ i->i.tag = CHAR;
+ i->i.link = &i[size];
+ Ins *j = &i[1];
+ uint bump = size;
+ for(Range *r = match; r; r = r->next){
+ for(uint c = r->lb; c < r->ub; ++c){
+ if(rep[c] == c){
+ j->c.value = c;
+ j->c.bump = --bump;
+ j++;
+ }
+ }
+ }
+}
+
+void MatchOp::split(CharSet &s){
+ for(Range *r = match; r; r = r->next){
+ for(uint c = r->lb; c < r->ub; ++c){
+ CharPtn *x = s.rep[c], *a = x->nxt;
+ if(!a){
+ if(x->card == 1)
+ continue;
+ x->nxt = a = s.freeHead;
+ if(!(s.freeHead = s.freeHead->nxt))
+ s.freeTail = &s.freeHead;
+ a->nxt = NULL;
+ x->fix = s.fix;
+ s.fix = x;
+ }
+ if(--(x->card) == 0){
+ *s.freeTail = x;
+ *(s.freeTail = &x->nxt) = NULL;
+ }
+ s.rep[c] = a;
+ ++(a->card);
+ }
+ }
+ for(; s.fix; s.fix = s.fix->fix)
+ if(s.fix->card)
+ s.fix->nxt = NULL;
+}
+
+RegExp *mkDiff(RegExp *e1, RegExp *e2){
+ MatchOp *m1, *m2;
+ if(!(m1 = (MatchOp*) e1->isA(MatchOp::type)))
+ return NULL;
+ if(!(m2 = (MatchOp*) e2->isA(MatchOp::type)))
+ return NULL;
+ Range *r = doDiff(m1->match, m2->match);
+ return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
+}
+
+RegExp *doAlt(RegExp *e1, RegExp *e2){
+ if(!e1)
+ return e2;
+ if(!e2)
+ return e1;
+ return new AltOp(e1, e2);
+}
+
+RegExp *mkAlt(RegExp *e1, RegExp *e2){
+ AltOp *a;
+ MatchOp *m1, *m2;
+ if((a = (AltOp*) e1->isA(AltOp::type))){
+ if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type)))
+ e1 = a->exp2;
+ } else if((m1 = (MatchOp*) e1->isA(MatchOp::type))){
+ e1 = NULL;
+ }
+ if((a = (AltOp*) e2->isA(AltOp::type))){
+ if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type)))
+ e2 = a->exp2;
+ } else if((m2 = (MatchOp*) e2->isA(MatchOp::type))){
+ e2 = NULL;
+ }
+ return doAlt(merge(m1, m2), doAlt(e1, e2));
+}
+
+char *AltOp::type = "AltOp";
+
+void AltOp::calcSize(Char *rep){
+ exp1->calcSize(rep);
+ exp2->calcSize(rep);
+ size = exp1->size + exp2->size + 2;
+}
+
+uint AltOp::fixedLength(){
+ uint l1 = exp1->fixedLength();
+ uint l2 = exp1->fixedLength();
+ if(l1 != l2 || l1 == ~0u)
+ return ~0;
+ return l1;
+}
+
+void AltOp::compile(Char *rep, Ins *i){
+ i->i.tag = FORK;
+ Ins *j = &i[exp1->size + 1];
+ i->i.link = &j[1];
+ exp1->compile(rep, &i[1]);
+ j->i.tag = GOTO;
+ j->i.link = &j[exp2->size + 1];
+ exp2->compile(rep, &j[1]);
+}
+
+void AltOp::split(CharSet &s){
+ exp1->split(s);
+ exp2->split(s);
+}
+
+char *CatOp::type = "CatOp";
+
+void CatOp::calcSize(Char *rep){
+ exp1->calcSize(rep);
+ exp2->calcSize(rep);
+ size = exp1->size + exp2->size;
+}
+
+uint CatOp::fixedLength(){
+ uint l1, l2;
+ if((l1 = exp1->fixedLength()) != ~0u )
+ if((l2 = exp2->fixedLength()) != ~0u)
+ return l1+l2;
+ return ~0;
+}
+
+void CatOp::compile(Char *rep, Ins *i){
+ exp1->compile(rep, &i[0]);
+ exp2->compile(rep, &i[exp1->size]);
+}
+
+void CatOp::split(CharSet &s){
+ exp1->split(s);
+ exp2->split(s);
+}
+
+char *CloseOp::type = "CloseOp";
+
+void CloseOp::calcSize(Char *rep){
+ exp->calcSize(rep);
+ size = exp->size + 1;
+}
+
+void CloseOp::compile(Char *rep, Ins *i){
+ exp->compile(rep, &i[0]);
+ i += exp->size;
+ i->i.tag = FORK;
+ i->i.link = i - exp->size;
+}
+
+void CloseOp::split(CharSet &s){
+ exp->split(s);
+}
+
+RegExp *expr(Scanner &);
+
+uchar unescape(SubStr &s){
+ s.len--;
+ uchar c;
+ if((c = *s.str++) != '\\' || s.len == 0)
+ return xlat[c];
+ s.len--;
+ switch(c = *s.str++){
+ case 'n':
+ return xlat['\n'];
+ case 't':
+ return xlat['\t'];
+ case 'v':
+ return xlat['\v'];
+ case 'b':
+ return xlat['\b'];
+ case 'r':
+ return xlat['\r'];
+ case 'f':
+ return xlat['\f'];
+ case 'a':
+ return xlat['\a'];
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': {
+ uchar v = c - '0';
+ for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
+ v = v*8 + (c - '0');
+ return v;
+ } default:
+ return xlat[c];
+ }
+}
+
+Range *getRange(SubStr &s){
+ uchar lb = unescape(s), ub;
+ if(s.len < 2 || *s.str != '-'){
+ ub = lb;
+ } else {
+ s.len--; s.str++;
+ ub = unescape(s);
+ if(ub < lb){
+ uchar tmp;
+ tmp = lb; lb = ub; ub = tmp;
+ }
+ }
+ return new Range(lb, ub+1);
+}
+
+RegExp *matchChar(uint c){
+ return new MatchOp(new Range(c, c+1));
+}
+
+RegExp *strToRE(SubStr s){
+ s.len -= 2; s.str += 1;
+ if(s.len == 0)
+ return new NullOp;
+ RegExp *re = matchChar(unescape(s));
+ while(s.len > 0)
+ re = new CatOp(re, matchChar(unescape(s)));
+ return re;
+}
+
+RegExp *ranToRE(SubStr s){
+ s.len -= 2; s.str += 1;
+ if(s.len == 0)
+ return new NullOp;
+ Range *r = getRange(s);
+ while(s.len > 0)
+ r = doUnion(r, getRange(s));
+ return new MatchOp(r);
+}
+
+char *RuleOp::type = "RuleOp";
+
+RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
+ : exp(e), ctx(c), ins(NULL), accept(a), code(t) {
+ ;
+}
+
+void RuleOp::calcSize(Char *rep){
+ exp->calcSize(rep);
+ ctx->calcSize(rep);
+ size = exp->size + ctx->size + 1;
+}
+
+void RuleOp::compile(Char *rep, Ins *i){
+ ins = i;
+ exp->compile(rep, &i[0]);
+ i += exp->size;
+ ctx->compile(rep, &i[0]);
+ i += ctx->size;
+ i->i.tag = TERM;
+ i->i.link = this;
+}
+
+void RuleOp::split(CharSet &s){
+ exp->split(s);
+ ctx->split(s);
+}
+
+extern void printSpan(ostream&, uint, uint);
+
+void optimize(Ins *i){
+ while(!isMarked(i)){
+ mark(i);
+ if(i->i.tag == CHAR){
+ i = (Ins*) i->i.link;
+ } else if(i->i.tag == GOTO || i->i.tag == FORK){
+ Ins *target = (Ins*) i->i.link;
+ optimize(target);
+ if(target->i.tag == GOTO)
+ i->i.link = target->i.link == target? i : target;
+ if(i->i.tag == FORK){
+ Ins *follow = (Ins*) &i[1];
+ optimize(follow);
+ if(follow->i.tag == GOTO && follow->i.link == follow){
+ i->i.tag = GOTO;
+ } else if(i->i.link == i){
+ i->i.tag = GOTO;
+ i->i.link = follow;
+ }
+ }
+ return;
+ } else {
+ ++i;
+ }
+ }
+}
+
+void genCode(ostream& o, RegExp *re){
+ CharSet cs;
+ uint j;
+ memset(&cs, 0, sizeof(cs));
+ for(j = 0; j < nChars; ++j){
+ cs.rep[j] = &cs.ptn[0];
+ cs.ptn[j].nxt = &cs.ptn[j+1];
+ }
+ cs.freeHead = &cs.ptn[1];
+ *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
+ cs.ptn[0].card = nChars;
+ cs.ptn[0].nxt = NULL;
+ re->split(cs);
+/*
+ for(uint k = 0; k < nChars;){
+ for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
+ printSpan(cerr, j, k);
+ cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
+ }
+*/
+ Char rep[nChars];
+ for(j = 0; j < nChars; ++j){
+ if(!cs.rep[j]->nxt)
+ cs.rep[j]->nxt = &cs.ptn[j];
+ rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
+ }
+
+ re->calcSize(rep);
+ Ins *ins = new Ins[re->size+1];
+ memset(ins, 0, (re->size+1)*sizeof(Ins));
+ re->compile(rep, ins);
+ Ins *eoi = &ins[re->size];
+ eoi->i.tag = GOTO;
+ eoi->i.link = eoi;
+
+ optimize(ins);
+ for(j = 0; j < re->size;){
+ unmark(&ins[j]);
+ if(ins[j].i.tag == CHAR){
+ j = (Ins*) ins[j].i.link - ins;
+ } else {
+ j++;
+ }
+ }
+
+ DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
+ dfa->emit(o);
+ delete dfa;
+ delete [] ins;
+}
--- /dev/null
+#ifndef _basics_h
+#define _basics_h
+
+typedef unsigned int uint;
+typedef unsigned char uchar, byte;
+typedef unsigned short ushort, word;
+typedef unsigned long ulong, dword;
+
+#endif
--- /dev/null
+#ifndef lint
+static char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93";
+#endif
+#define YYBYACC 1
+#define YYMAJOR 1
+#define YYMINOR 9
+#define yyclearin (yychar=(-1))
+#define yyerrok (yyerrflag=0)
+#define YYRECOVERING (yyerrflag!=0)
+#define YYPREFIX "yy"
+#line 2 "parser.y"
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+#line 21 "parser.y"
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+} YYSTYPE;
+#line 35 "y.tab.c"
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+#define YYERRCODE 256
+short yylhs[] = { -1,
+ 0, 0, 0, 9, 2, 3, 3, 4, 4, 5,
+ 5, 6, 6, 7, 7, 1, 1, 8, 8, 8,
+ 8,
+};
+short yylen[] = { 2,
+ 0, 2, 2, 4, 3, 0, 2, 1, 3, 1,
+ 3, 1, 2, 1, 2, 1, 2, 1, 1, 1,
+ 3,
+};
+short yydefred[] = { 1,
+ 0, 0, 19, 20, 0, 2, 0, 0, 0, 12,
+ 0, 3, 0, 18, 0, 0, 0, 0, 0, 13,
+ 16, 0, 0, 21, 0, 0, 5, 0, 17, 4,
+};
+short yydgoto[] = { 1,
+ 22, 6, 18, 7, 8, 9, 10, 11, 12,
+};
+short yysindex[] = { 0,
+ -27, -49, 0, 0, -23, 0, -44, -84, -23, 0,
+ -243, 0, -23, 0, -39, -23, -23, -244, -23, 0,
+ 0, -239, -53, 0, -104, -84, 0, -23, 0, 0,
+};
+short yyrindex[] = { 0,
+ 0, -31, 0, 0, 0, 0, -227, -17, -20, 0,
+ -40, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -36, 0, 0, -226, -16, 0, -19, 0, 0,
+};
+short yygindex[] = { 0,
+ 0, 0, 0, 21, 18, 17, 1, 0, 0,
+};
+#define YYTABLESIZE 243
+short yytable[] = { 14,
+ 14, 24, 16, 15, 15, 30, 14, 19, 18, 20,
+ 15, 13, 5, 21, 27, 18, 5, 29, 14, 17,
+ 10, 11, 15, 8, 9, 15, 10, 11, 20, 8,
+ 9, 6, 7, 23, 26, 28, 25, 0, 10, 11,
+ 0, 8, 9, 0, 0, 0, 0, 0, 0, 0,
+ 0, 14, 0, 0, 0, 15, 0, 0, 0, 0,
+ 18, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 17, 10, 11, 0, 0, 0, 0, 0, 0, 17,
+ 0, 0, 0, 14, 17, 0, 0, 15, 0, 0,
+ 0, 0, 18, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 10, 11, 0, 8, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 14, 14, 14,
+ 14, 15, 15, 15, 15, 18, 18, 18, 18, 18,
+ 2, 0, 3, 4, 14, 0, 3, 4, 10, 11,
+ 0, 8, 9,
+};
+short yycheck[] = { 40,
+ 41, 41, 47, 40, 41, 59, 47, 92, 40, 9,
+ 47, 61, 40, 257, 259, 47, 40, 257, 59, 124,
+ 41, 41, 59, 41, 41, 5, 47, 47, 28, 47,
+ 47, 259, 259, 13, 17, 19, 16, -1, 59, 59,
+ -1, 59, 59, -1, -1, -1, -1, -1, -1, -1,
+ -1, 92, -1, -1, -1, 92, -1, -1, -1, -1,
+ 92, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 124, 92, 92, -1, -1, -1, -1, -1, -1, 124,
+ -1, -1, -1, 124, 124, -1, -1, 124, -1, -1,
+ -1, -1, 124, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 124, 124, -1, 124, 124, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 258, 259, 260,
+ 261, 258, 259, 260, 261, 257, 258, 259, 260, 261,
+ 258, -1, 260, 261, 258, -1, 260, 261, 259, 259,
+ -1, 259, 259,
+};
+#define YYFINAL 1
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+#define YYMAXTOKEN 261
+#if YYDEBUG
+char *yyname[] = {
+"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,"'('","')'",0,0,0,0,0,"'/'",0,0,0,0,0,0,0,0,0,0,0,"';'",0,"'='",0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'\\\\'",0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+"CLOSE","ID","CODE","RANGE","STRING",
+};
+char *yyrule[] = {
+"$accept : spec",
+"spec :",
+"spec : spec rule",
+"spec : spec decl",
+"decl : ID '=' expr ';'",
+"rule : expr look CODE",
+"look :",
+"look : '/' expr",
+"expr : diff",
+"expr : expr '|' diff",
+"diff : term",
+"diff : diff '\\\\' term",
+"term : factor",
+"term : term factor",
+"factor : primary",
+"factor : primary close",
+"close : CLOSE",
+"close : close CLOSE",
+"primary : ID",
+"primary : RANGE",
+"primary : STRING",
+"primary : '(' expr ')'",
+};
+#endif
+#ifdef YYSTACKSIZE
+#undef YYMAXDEPTH
+#define YYMAXDEPTH YYSTACKSIZE
+#else
+#ifdef YYMAXDEPTH
+#define YYSTACKSIZE YYMAXDEPTH
+#else
+#define YYSTACKSIZE 500
+#define YYMAXDEPTH 500
+#endif
+#endif
+int yydebug;
+int yynerrs;
+int yyerrflag;
+int yychar;
+short *yyssp;
+YYSTYPE *yyvsp;
+YYSTYPE yyval;
+YYSTYPE yylval;
+short yyss[YYSTACKSIZE];
+YYSTYPE yyvs[YYSTACKSIZE];
+#define yystacksize YYSTACKSIZE
+#line 121 "parser.y"
+
+void yyerror(char* s){
+ in->fatal(s);
+}
+
+int yylex(){
+ return in->scan();
+}
+
+void parse(int i, ostream &o){
+ char * fnamebuf;
+ char * token;
+
+ o << "/* Generated by re2c 0.5 on ";
+ time_t now = time(&now);
+ o.write(ctime(&now), 24);
+ o << " */\n";
+
+ in = new Scanner(i);
+
+ o << "#line " << in->line() << " \"";
+ if( fileName != NULL ) {
+ fnamebuf = strdup( fileName );
+ } else {
+ fnamebuf = strdup( "<stdin>" );
+ }
+ token = strtok( fnamebuf, "\\" );
+ for(;;) {
+ o << token;
+ token = strtok( NULL, "\\" );
+ if( token == NULL ) break;
+ o << "\\\\";
+ }
+ o << "\"\n";
+ free( fnamebuf );
+
+ while(in->echo(o)){
+ yyparse();
+ if(spec)
+ genCode(o, spec);
+ o << "#line " << in->line() << "\n";
+ }
+}
+#line 235 "y.tab.c"
+#define YYABORT goto yyabort
+#define YYREJECT goto yyabort
+#define YYACCEPT goto yyaccept
+#define YYERROR goto yyerrlab
+int
+yyparse()
+{
+ register int yym, yyn, yystate;
+#if YYDEBUG
+ register char *yys;
+ extern char *getenv();
+
+ if (yys = getenv("YYDEBUG"))
+ {
+ yyn = *yys;
+ if (yyn >= '0' && yyn <= '9')
+ yydebug = yyn - '0';
+ }
+#endif
+
+ yynerrs = 0;
+ yyerrflag = 0;
+ yychar = (-1);
+
+ yyssp = yyss;
+ yyvsp = yyvs;
+ *yyssp = yystate = 0;
+
+yyloop:
+ if (yyn = yydefred[yystate]) goto yyreduce;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ }
+ if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, shifting to state %d\n",
+ YYPREFIX, yystate, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ yychar = (-1);
+ if (yyerrflag > 0) --yyerrflag;
+ goto yyloop;
+ }
+ if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+ yyn = yytable[yyn];
+ goto yyreduce;
+ }
+ if (yyerrflag) goto yyinrecovery;
+#ifdef lint
+ goto yynewerror;
+#endif
+yynewerror:
+ yyerror("syntax error");
+#ifdef lint
+ goto yyerrlab;
+#endif
+yyerrlab:
+ ++yynerrs;
+yyinrecovery:
+ if (yyerrflag < 3)
+ {
+ yyerrflag = 3;
+ for (;;)
+ {
+ if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, error recovery shifting\
+ to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ goto yyloop;
+ }
+ else
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: error recovery discarding state %d\n",
+ YYPREFIX, *yyssp);
+#endif
+ if (yyssp <= yyss) goto yyabort;
+ --yyssp;
+ --yyvsp;
+ }
+ }
+ }
+ else
+ {
+ if (yychar == 0) goto yyabort;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ yychar = (-1);
+ goto yyloop;
+ }
+yyreduce:
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, reducing by rule %d (%s)\n",
+ YYPREFIX, yystate, yyn, yyrule[yyn]);
+#endif
+ yym = yylen[yyn];
+ yyval = yyvsp[1-yym];
+ switch (yyn)
+ {
+case 1:
+#line 40 "parser.y"
+{ accept = 0;
+ spec = NULL; }
+break;
+case 2:
+#line 43 "parser.y"
+{ spec = spec? mkAlt(spec, yyvsp[0].regexp) : yyvsp[0].regexp; }
+break;
+case 4:
+#line 48 "parser.y"
+{ if(yyvsp[-3].symbol->re)
+ in->fatal("sym already defined");
+ yyvsp[-3].symbol->re = yyvsp[-1].regexp; }
+break;
+case 5:
+#line 54 "parser.y"
+{ yyval.regexp = new RuleOp(yyvsp[-2].regexp, yyvsp[-1].regexp, yyvsp[0].token, accept++); }
+break;
+case 6:
+#line 58 "parser.y"
+{ yyval.regexp = new NullOp; }
+break;
+case 7:
+#line 60 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 8:
+#line 64 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 9:
+#line 66 "parser.y"
+{ yyval.regexp = mkAlt(yyvsp[-2].regexp, yyvsp[0].regexp); }
+break;
+case 10:
+#line 70 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 11:
+#line 72 "parser.y"
+{ yyval.regexp = mkDiff(yyvsp[-2].regexp, yyvsp[0].regexp);
+ if(!yyval.regexp)
+ in->fatal("can only difference char sets");
+ }
+break;
+case 12:
+#line 79 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 13:
+#line 81 "parser.y"
+{ yyval.regexp = new CatOp(yyvsp[-1].regexp, yyvsp[0].regexp); }
+break;
+case 14:
+#line 85 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 15:
+#line 87 "parser.y"
+{
+ switch(yyvsp[0].op){
+ case '*':
+ yyval.regexp = mkAlt(new CloseOp(yyvsp[-1].regexp), new NullOp());
+ break;
+ case '+':
+ yyval.regexp = new CloseOp(yyvsp[-1].regexp);
+ break;
+ case '?':
+ yyval.regexp = mkAlt(yyvsp[-1].regexp, new NullOp());
+ break;
+ }
+ }
+break;
+case 16:
+#line 103 "parser.y"
+{ yyval.op = yyvsp[0].op; }
+break;
+case 17:
+#line 105 "parser.y"
+{ yyval.op = (yyvsp[-1].op == yyvsp[0].op) ? yyvsp[-1].op : '*'; }
+break;
+case 18:
+#line 109 "parser.y"
+{ if(!yyvsp[0].symbol->re)
+ in->fatal("can't find symbol");
+ yyval.regexp = yyvsp[0].symbol->re; }
+break;
+case 19:
+#line 113 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 20:
+#line 115 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 21:
+#line 117 "parser.y"
+{ yyval.regexp = yyvsp[-1].regexp; }
+break;
+#line 476 "y.tab.c"
+ }
+ yyssp -= yym;
+ yystate = *yyssp;
+ yyvsp -= yym;
+ yym = yylhs[yyn];
+ if (yystate == 0 && yym == 0)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state 0 to\
+ state %d\n", YYPREFIX, YYFINAL);
+#endif
+ yystate = YYFINAL;
+ *++yyssp = YYFINAL;
+ *++yyvsp = yyval;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, YYFINAL, yychar, yys);
+ }
+#endif
+ }
+ if (yychar == 0) goto yyaccept;
+ goto yyloop;
+ }
+ if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
+ yystate = yytable[yyn];
+ else
+ yystate = yydgoto[yym];
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state %d \
+to state %d\n", YYPREFIX, *yyssp, yystate);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate;
+ *++yyvsp = yyval;
+ goto yyloop;
+yyoverflow:
+ yyerror("yacc stack overflow");
+yyabort:
+ return (1);
+yyaccept:
+ return (0);
+}
--- /dev/null
+
+
+
+RE2C(1) RE2C(1)
+
+
+N\bNA\bAM\bME\bE
+ re2c - convert regular expressions to C/C++
+
+
+S\bSY\bYN\bNO\bOP\bPS\bSI\bIS\bS
+ r\bre\be2\b2c\bc [-\b-e\bes\bsb\bb] _\bn_\ba_\bm_\be
+
+
+D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
+ r\bre\be2\b2c\bc is a preprocessor that generates C-based recognizers
+ from regular expressions. The input to r\bre\be2\b2c\bc consists of
+ C/C++ source interleaved with comments of the form /\b/*\b*!\b!r\bre\be2\b2c\bc
+ ... *\b*/\b/ which contain scanner specifications. In the out-
+ put these comments are replaced with code that, when exe-
+ cuted, will find the next input token and then execute
+ some user-supplied token-specific code.
+
+ For example, given the following code
+
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ /*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+ */
+ }
+
+ r\bre\be2\b2c\bc will generate
+
+ /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+ #line 1 "simple.re"
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ {
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ yy1: ++YYCURSOR;
+ yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+
+
+
+Version 0.5 8 April 1994 1
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ if(yych >= ':') goto yy4;
+ yy2: yych = *++YYCURSOR;
+ goto yy7;
+ yy3:
+ #line 10
+ {return YYCURSOR;}
+ yy4: yych = *++YYCURSOR;
+ yy5:
+ #line 11
+ {return NULL;}
+ yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+ }
+ #line 12
+
+ }
+
+
+O\bOP\bPT\bTI\bIO\bON\bNS\bS
+ r\bre\be2\b2c\bc provides the following options:
+
+ -\b-e\be Cross-compile from an ASCII platform to an EBCDIC
+ one.
+
+ -\b-s\bs Generate nested i\bif\bfs for some s\bsw\bwi\bit\btc\bch\bhes. Many com-
+ pilers need this assist to generate better code.
+
+ -\b-b\bb Implies -\b-s\bs. Use bit vectors as well in the attempt
+ to coax better code out of the compiler. Most use-
+ ful for specifications with more than a few key-
+ words (e.g. for most programming languages).
+
+
+I\bIN\bNT\bTE\bER\bRF\bFA\bAC\bCE\bE C\bCO\bOD\bDE\bE
+ Unlike other scanner generators, r\bre\be2\b2c\bc does not generate
+ complete scanners: the user must supply some interface
+ code. In particular, the user must define the following
+ macros:
+
+ Y\bYY\bYC\bCH\bHA\bAR\bR Type used to hold an input symbol. Usually c\bch\bha\bar\br or
+ u\bun\bns\bsi\big\bgn\bne\bed\bd c\bch\bha\bar\br.
+
+ Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR
+ _\bl-expression of type *\b*Y\bYY\bYC\bCH\bHA\bAR\bR that points to the
+ current input symbol. The generated code advances
+ Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR as symbols are matched. On entry, Y\bYY\bYC\bCU\bUR\bR-\b-
+ S\bSO\bOR\bR is assumed to point to the first character of
+ the current token. On exit, Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR will point to
+ the first character of the following token.
+
+
+
+
+Version 0.5 8 April 1994 2
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ Y\bYL\bLI\bIM\bMI\bIT\bT Expression of type *\b*Y\bYY\bYC\bCH\bHA\bAR\bR that marks the end of
+ the buffer (Y\bYL\bLI\bIM\bMI\bIT\bT[\b[-\b-1\b1]\b] is the last character in the
+ buffer). The generated code repeatedly compares
+ Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR to Y\bYL\bLI\bIM\bMI\bIT\bT to determine when the buffer
+ needs (re)filling.
+
+ Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR
+ _\bl-expression of type *\b*Y\bYY\bYC\bCH\bHA\bAR\bR. The generated code
+ saves backtracking information in Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR.
+
+ Y\bYY\bYF\bFI\bIL\bLL\bL(\b(_\bn)\b)
+ The generated code "calls" Y\bYY\bYF\bFI\bIL\bLL\bL when the buffer
+ needs (re)filling: at least _\bn additional charac-
+ ters should be provided. Y\bYY\bYF\bFI\bIL\bLL\bL should adjust
+ Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR, Y\bYY\bYL\bLI\bIM\bMI\bIT\bT and Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR as needed. Note
+ that for typical programming languages _\bn will be
+ the length of the longest keyword plus one.
+
+
+S\bSC\bCA\bAN\bNN\bNE\bER\bR S\bSP\bPE\bEC\bCI\bIF\bFI\bIC\bCA\bAT\bTI\bIO\bON\bNS\bS
+ Each scanner specification consists of a set of _\br_\bu_\bl_\be_\bs and
+ name definitions. Rules consist of a regular expression
+ along with a block of C/C++ code that is to be executed
+ when the associated regular expression is matched. Name
+ definitions are of the form ``_\bn_\ba_\bm_\be =\b= _\br_\be_\bg_\bu_\bl_\ba_\br _\be_\bx_\bp_\br_\be_\bs_\b-
+ _\bs_\bi_\bo_\bn;\b;''.
+
+
+S\bSU\bUM\bMM\bMA\bAR\bRY\bY O\bOF\bF R\bRE\bE2\b2C\bC R\bRE\bEG\bGU\bUL\bLA\bAR\bR E\bEX\bXP\bPR\bRE\bES\bSS\bSI\bIO\bON\bNS\bS
+ "\b"f\bfo\boo\bo"\b" the literal string f\bfo\boo\bo. ANSI-C escape sequences
+ can be used.
+
+ [\b[x\bxy\byz\bz]\b] a "character class"; in this case, the regular
+ expression matches either an 'x\bx', a 'y\by', or a 'z\bz'.
+
+ [\b[a\bab\bbj\bj-\b-o\boZ\bZ]\b]
+ a "character class" with a range in it; matches an
+ 'a\ba', a 'b\bb', any letter from 'j\bj' through 'o\bo', or a
+ 'Z\bZ'.
+
+ _\br\\b\_\bs match any _\br which isn't an _\bs. _\br and _\bs must be regu-
+ lar expressions which can be expressed as character
+ classes.
+
+ _\br*\b* zero or more _\br's, where _\br is any regular expression
+
+ _\br+\b+ one or more _\br's
+
+ _\br?\b? zero or one _\br's (that is, "an optional _\br")
+
+ name the expansion of the "name" definition (see above)
+
+ (\b(_\br)\b) an _\br; parentheses are used to override precedence
+ (see below)
+
+
+
+Version 0.5 8 April 1994 3
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ _\br_\bs an _\br followed by an _\bs ("concatenation")
+
+ _\br|\b|_\bs either an _\br or an _\bs
+
+ _\br/\b/_\bs an _\br but only if it is followed by an _\bs. The s is
+ not part of the matched text. This type of regular
+ expression is called "trailing context".
+
+ The regular expressions listed above are grouped according
+ to precedence, from highest precedence at the top to low-
+ est at the bottom. Those grouped together have equal
+ precedence.
+
+
+A\bA L\bLA\bAR\bRG\bGE\bER\bR E\bEX\bXA\bAM\bMP\bPL\bLE\bE
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include <fcntl.h>
+ #include <string.h>
+
+ #define ADDEQ 257
+ #define ANDAND 258
+ #define ANDEQ 259
+ #define ARRAY 260
+ #define ASM 261
+ #define AUTO 262
+ #define BREAK 263
+ #define CASE 264
+ #define CHAR 265
+ #define CONST 266
+ #define CONTINUE 267
+ #define DECR 268
+ #define DEFAULT 269
+ #define DEREF 270
+ #define DIVEQ 271
+ #define DO 272
+ #define DOUBLE 273
+ #define ELLIPSIS 274
+ #define ELSE 275
+ #define ENUM 276
+ #define EQL 277
+ #define EXTERN 278
+ #define FCON 279
+ #define FLOAT 280
+ #define FOR 281
+ #define FUNCTION 282
+ #define GEQ 283
+ #define GOTO 284
+ #define ICON 285
+ #define ID 286
+ #define IF 287
+ #define INCR 288
+ #define INT 289
+ #define LEQ 290
+
+
+
+Version 0.5 8 April 1994 4
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ #define LONG 291
+ #define LSHIFT 292
+ #define LSHIFTEQ 293
+ #define MODEQ 294
+ #define MULEQ 295
+ #define NEQ 296
+ #define OREQ 297
+ #define OROR 298
+ #define POINTER 299
+ #define REGISTER 300
+ #define RETURN 301
+ #define RSHIFT 302
+ #define RSHIFTEQ 303
+ #define SCON 304
+ #define SHORT 305
+ #define SIGNED 306
+ #define SIZEOF 307
+ #define STATIC 308
+ #define STRUCT 309
+ #define SUBEQ 310
+ #define SWITCH 311
+ #define TYPEDEF 312
+ #define UNION 313
+ #define UNSIGNED 314
+ #define VOID 315
+ #define VOLATILE 316
+ #define WHILE 317
+ #define XOREQ 318
+ #define EOI 319
+
+ typedef unsigned int uint;
+ typedef unsigned char uchar;
+
+ #define BSIZE 8192
+
+ #define YYCTYPE uchar
+ #define YYCURSOR cursor
+ #define YYLIMIT s->lim
+ #define YYMARKER s->ptr
+ #define YYFILL(n) {cursor = fill(s, cursor);}
+
+ #define RET(i) {s->cur = cursor; return i;}
+
+ typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+ } Scanner;
+
+ uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+
+
+
+Version 0.5 8 April 1994 5
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*)
+ malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+ }
+
+ int scan(Scanner *s){
+ uchar *cursor = s->cur;
+ std:
+ s->tok = cursor;
+ /*!re2c
+ any = [\000-\377];
+ O = [0-7];
+ D = [0-9];
+ L = [a-zA-Z_];
+ H = [a-fA-F0-9];
+ E = [Ee] [+-]? D+;
+ FS = [fFlL];
+ IS = [uUlL]*;
+ ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+ */
+
+ /*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+
+
+
+Version 0.5 8 April 1994 6
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+
+
+
+Version 0.5 8 April 1994 7
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+ */
+
+ comment:
+ /*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+
+
+
+Version 0.5 8 April 1994 8
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ goto comment;
+ }
+ any { goto comment; }
+ */
+ }
+
+ main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+ /*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+ */
+ }
+ close(in.fd);
+ }
+
+
+S\bSE\bEE\bE A\bAL\bLS\bSO\bO
+ flex(1), lex(1).
+
+
+F\bFE\bEA\bAT\bTU\bUR\bRE\bES\bS
+ r\bre\be2\b2c\bc does not provide a default action: the generated code
+ assumes that the input will consist of a sequence of
+ tokens. Typically this can be dealt with by adding a rule
+ such as the one for unexpected characters in the example
+ above.
+
+ The user must arrange for a sentinel token to appear at
+ the end of input (and provide a rule for matching it):
+ r\bre\be2\b2c\bc does not provide an <\b<<\b<E\bEO\bOF\bF>\b>>\b> expression. If the
+ source is from a null-byte terminated string, a rule
+ matching a null character will suffice. If the source is
+ from a file then the approach taken in the example can be
+ used: pad the input with a newline (or some other charac-
+ ter that can't appear within another token); upon recog-
+ nizing such a character check to see if it is the sentinel
+ and act accordingly.
+
+ r\bre\be2\b2c\bc does not provide start conditions: use a separate
+ scanner specification for each start condition (as illus-
+ trated in the above example).
+
+ No [^x]. Use difference instead.
+
+B\bBU\bUG\bGS\bS
+ Only fixed length trailing context can be handled.
+
+ The maximum value appearing as a parameter _\bn to Y\bYY\bYF\bFI\bIL\bLL\bL is
+ not provided to the generated code (this value is needed
+
+
+
+Version 0.5 8 April 1994 9
+
+
+
+
+
+RE2C(1) RE2C(1)
+
+
+ for constructing the interface code). Note that this
+ value is usually relatively small: for typical programming
+ languages _\bn will be the length of the longest keyword plus
+ one.
+
+ Difference only works for character sets.
+
+ The r\bre\be2\b2c\bc internal algorithms need documentation.
+
+
+A\bAU\bUT\bTH\bHO\bOR\bR
+ Please send bug reports, fixes and feedback to:
+
+ Peter Bumbulis
+ Computer Systems Group
+ University of Waterloo
+ Waterloo, Ontario
+ N2L 3G1
+ Internet: peter@csg.uwaterloo.ca
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Version 0.5 8 April 1994 10
+
+
--- /dev/null
+/* Generated by re2c 0.5 on Sat May 15 11:35:52 1999 */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT lim
+#define YYMARKER ptr
+#define YYFILL(n) {cursor = fill(cursor);}
+
+#define RETURN(i) {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+ bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+ top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+ ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+ if(!eof){
+ uint cnt = tok - bot;
+ if(cnt){
+ memcpy(bot, tok, lim - tok);
+ tok = bot;
+ ptr -= cnt;
+ cursor -= cnt;
+ pos -= cnt;
+ lim -= cnt;
+ }
+ if((top - lim) < BSIZE){
+ uchar *buf = new uchar[(lim - bot) + BSIZE];
+ memcpy(buf, tok, lim - tok);
+ tok = buf;
+ ptr = &buf[ptr - bot];
+ cursor = &buf[cursor - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[BSIZE];
+ delete [] bot;
+ bot = buf;
+ }
+ if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+ eof = &lim[cnt]; *eof++ = '\n';
+ }
+ lim += cnt;
+ }
+ return cursor;
+}
+
+#line 68
+
+
+int Scanner::echo(ostream &out){
+ uchar *cursor = cur;
+ tok = cursor;
+echo:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+yy1: ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy4;
+ if(yych != '/') goto yy6;
+yy2: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '*') goto yy7;
+yy3:
+#line 82
+ { goto echo; }
+yy4: yych = *++YYCURSOR;
+yy5:
+#line 78
+ { if(cursor == eof) RETURN(0);
+ out.write(tok, cursor - tok);
+ tok = pos = cursor; cline++;
+ goto echo; }
+yy6: yych = *++YYCURSOR;
+ goto yy3;
+yy7: yych = *++YYCURSOR;
+ if(yych == '!') goto yy9;
+yy8: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy3;
+ }
+yy9: yych = *++YYCURSOR;
+ if(yych != 'r') goto yy8;
+yy10: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy8;
+yy11: yych = *++YYCURSOR;
+ if(yych != '2') goto yy8;
+yy12: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy8;
+yy13: yych = *++YYCURSOR;
+yy14:
+#line 75
+ { out.write(tok, &cursor[-7] - tok);
+ tok = cursor;
+ RETURN(1); }
+}
+#line 83
+
+}
+
+
+int Scanner::scan(){
+ uchar *cursor = cur;
+ uint depth;
+
+scan:
+ tchar = cursor - pos;
+ tline = cline;
+ tok = cursor;
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy15;
+yy16: ++YYCURSOR;
+yy15:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ':'){
+ if(yych <= '"'){
+ if(yych <= '\n'){
+ if(yych <= '\b') goto yy35;
+ if(yych <= '\t') goto yy31;
+ goto yy33;
+ } else {
+ if(yych == ' ') goto yy31;
+ if(yych <= '!') goto yy35;
+ goto yy23;
+ }
+ } else {
+ if(yych <= '*'){
+ if(yych <= '\'') goto yy35;
+ if(yych <= ')') goto yy27;
+ goto yy21;
+ } else {
+ if(yych <= '+') goto yy28;
+ if(yych == '/') goto yy19;
+ goto yy35;
+ }
+ }
+ } else {
+ if(yych <= 'Z'){
+ if(yych <= '='){
+ if(yych == '<') goto yy35;
+ goto yy27;
+ } else {
+ if(yych == '?') goto yy28;
+ if(yych <= '@') goto yy35;
+ goto yy29;
+ }
+ } else {
+ if(yych <= '`'){
+ if(yych <= '[') goto yy25;
+ if(yych <= '\\') goto yy27;
+ goto yy35;
+ } else {
+ if(yych <= 'z') goto yy29;
+ if(yych <= '{') goto yy17;
+ if(yych <= '|') goto yy27;
+ goto yy35;
+ }
+ }
+ }
+yy17: yych = *++YYCURSOR;
+yy18:
+#line 96
+ { depth = 1;
+ goto code;
+ }
+yy19: yych = *++YYCURSOR;
+ if(yych == '*') goto yy54;
+yy20:
+#line 115
+ { RETURN(*tok); }
+yy21: yych = *++YYCURSOR;
+ if(yych == '/') goto yy52;
+yy22:
+#line 117
+ { yylval.op = *tok;
+ RETURN(CLOSE); }
+yy23: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy48;
+yy24:
+#line 108
+ { fatal("bad string"); }
+yy25: yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy42;
+yy26:
+#line 113
+ { fatal("bad character constant"); }
+yy27: yych = *++YYCURSOR;
+ goto yy20;
+yy28: yych = *++YYCURSOR;
+ goto yy22;
+yy29: yych = *++YYCURSOR;
+ goto yy40;
+yy30:
+#line 120
+ { cur = cursor;
+ yylval.symbol = Symbol::find(token());
+ return ID; }
+yy31: yych = *++YYCURSOR;
+ goto yy38;
+yy32:
+#line 124
+ { goto scan; }
+yy33: yych = *++YYCURSOR;
+yy34:
+#line 126
+ { if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
+yy35: yych = *++YYCURSOR;
+yy36:
+#line 131
+ { cerr << "unexpected character: " << *tok << endl;
+ goto scan;
+ }
+yy37: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy38: if(yych == '\t') goto yy37;
+ if(yych == ' ') goto yy37;
+ goto yy32;
+yy39: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy40: if(yych <= '@'){
+ if(yych <= '/') goto yy30;
+ if(yych <= '9') goto yy39;
+ goto yy30;
+ } else {
+ if(yych <= 'Z') goto yy39;
+ if(yych <= '`') goto yy30;
+ if(yych <= 'z') goto yy39;
+ goto yy30;
+ }
+yy41: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy42: if(yych <= '['){
+ if(yych != '\n') goto yy41;
+ } else {
+ if(yych <= '\\') goto yy44;
+ if(yych <= ']') goto yy45;
+ goto yy41;
+ }
+yy43: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy24;
+ case 1: goto yy26;
+ }
+yy44: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy43;
+ goto yy41;
+yy45: yych = *++YYCURSOR;
+yy46:
+#line 110
+ { cur = cursor;
+ yylval.regexp = ranToRE(token());
+ return RANGE; }
+yy47: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy48: if(yych <= '!'){
+ if(yych == '\n') goto yy43;
+ goto yy47;
+ } else {
+ if(yych <= '"') goto yy50;
+ if(yych != '\\') goto yy47;
+ }
+yy49: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy43;
+ goto yy47;
+yy50: yych = *++YYCURSOR;
+yy51:
+#line 105
+ { cur = cursor;
+ yylval.regexp = strToRE(token());
+ return STRING; }
+yy52: yych = *++YYCURSOR;
+yy53:
+#line 102
+ { tok = cursor;
+ RETURN(0); }
+yy54: yych = *++YYCURSOR;
+yy55:
+#line 99
+ { depth = 1;
+ goto comment; }
+}
+#line 134
+
+
+code:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy56;
+yy57: ++YYCURSOR;
+yy56:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '&'){
+ if(yych <= '\n'){
+ if(yych <= '\t') goto yy64;
+ goto yy62;
+ } else {
+ if(yych == '"') goto yy66;
+ goto yy64;
+ }
+ } else {
+ if(yych <= '{'){
+ if(yych <= '\'') goto yy67;
+ if(yych <= 'z') goto yy64;
+ goto yy60;
+ } else {
+ if(yych != '}') goto yy64;
+ }
+ }
+yy58: yych = *++YYCURSOR;
+yy59:
+#line 138
+ { if(--depth == 0){
+ cur = cursor;
+ yylval.token = new Token(token(), tline);
+ return CODE;
+ }
+ goto code; }
+yy60: yych = *++YYCURSOR;
+yy61:
+#line 144
+ { ++depth;
+ goto code; }
+yy62: yych = *++YYCURSOR;
+yy63:
+#line 146
+ { if(cursor == eof) fatal("missing '}'");
+ pos = cursor; cline++;
+ goto code;
+ }
+yy64: yych = *++YYCURSOR;
+yy65:
+#line 150
+ { goto code; }
+yy66: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy65;
+ goto yy73;
+yy67: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy65;
+ goto yy69;
+yy68: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy69: if(yych <= '&'){
+ if(yych != '\n') goto yy68;
+ } else {
+ if(yych <= '\'') goto yy64;
+ if(yych == '\\') goto yy71;
+ goto yy68;
+ }
+yy70: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy65;
+ }
+yy71: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy70;
+ goto yy68;
+yy72: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy73: if(yych <= '!'){
+ if(yych == '\n') goto yy70;
+ goto yy72;
+ } else {
+ if(yych <= '"') goto yy64;
+ if(yych != '\\') goto yy72;
+ }
+yy74: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy70;
+ goto yy72;
+}
+#line 151
+
+
+comment:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy75;
+yy76: ++YYCURSOR;
+yy75:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych == '\n') goto yy80;
+ goto yy82;
+ } else {
+ if(yych <= '*') goto yy77;
+ if(yych == '/') goto yy79;
+ goto yy82;
+ }
+yy77: yych = *++YYCURSOR;
+ if(yych == '/') goto yy85;
+yy78:
+#line 165
+ { goto comment; }
+yy79: yych = *++YYCURSOR;
+ if(yych == '*') goto yy83;
+ goto yy78;
+yy80: yych = *++YYCURSOR;
+yy81:
+#line 161
+ { if(cursor == eof) RETURN(0);
+ tok = pos = cursor; cline++;
+ goto comment;
+ }
+yy82: yych = *++YYCURSOR;
+ goto yy78;
+yy83: yych = *++YYCURSOR;
+yy84:
+#line 159
+ { ++depth;
+ goto comment; }
+yy85: yych = *++YYCURSOR;
+yy86:
+#line 155
+ { if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+}
+#line 166
+
+}
+
+void Scanner::fatal(char *msg){
+ cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+ << msg << endl;
+ exit(1);
+}
--- /dev/null
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+} YYSTYPE;
+extern YYSTYPE yylval;
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <iomanip.h>
+#include "substr.h"
+#include "globals.h"
+#include "dfa.h"
+
+// there must be at least one span in list; all spans must cover
+// same range
+
+void Go::compact(){
+ // arrange so that adjacent spans have different targets
+ uint i = 0;
+ for(uint j = 1; j < nSpans; ++j){
+ if(span[j].to != span[i].to){
+ ++i; span[i].to = span[j].to;
+ }
+ span[i].ub = span[j].ub;
+ }
+ nSpans = i + 1;
+}
+
+void Go::unmap(Go *base, State *x){
+ Span *s = span, *b = base->span, *e = &b[base->nSpans];
+ uint lb = 0;
+ s->ub = 0;
+ s->to = NULL;
+ for(; b != e; ++b){
+ if(b->to == x){
+ if((s->ub - lb) > 1)
+ s->ub = b->ub;
+ } else {
+ if(b->to != s->to){
+ if(s->ub){
+ lb = s->ub; ++s;
+ }
+ s->to = b->to;
+ }
+ s->ub = b->ub;
+ }
+ }
+ s->ub = e[-1].ub; ++s;
+ nSpans = s - span;
+}
+
+void doGen(Go *g, State *s, uchar *bm, uchar m){
+Span *b = g->span, *e = &b[g->nSpans];
+uint lb = 0;
+for(; b < e; ++b){
+ if(b->to == s)
+ for(; lb < b->ub; ++lb) bm[lb] |= m;
+ lb = b->ub;
+}
+}
+
+void prt(ostream& o, Go *g, State *s){
+Span *b = g->span, *e = &b[g->nSpans];
+uint lb = 0;
+for(; b < e; ++b){
+ if(b->to == s)
+ printSpan(o, lb, b->ub);
+ lb = b->ub;
+}
+}
+
+bool matches(Go *g1, State *s1, Go *g2, State *s2){
+Span *b1 = g1->span, *e1 = &b1[g1->nSpans];
+uint lb1 = 0;
+Span *b2 = g2->span, *e2 = &b2[g2->nSpans];
+uint lb2 = 0;
+for(;;){
+ for(; b1 < e1 && b1->to != s1; ++b1) lb1 = b1->ub;
+ for(; b2 < e2 && b2->to != s2; ++b2) lb2 = b2->ub;
+ if(b1 == e1) return b2 == e2;
+ if(b2 == e2) return false;
+ if(lb1 != lb2 || b1->ub != b2->ub) return false;
+ ++b1; ++b2;
+}
+}
+
+class BitMap {
+public:
+static BitMap *first;
+Go *go;
+State *on;
+BitMap *next;
+uint i;
+uchar m;
+public:
+static BitMap *find(Go*, State*);
+static BitMap *find(State*);
+static void gen(ostream&, uint, uint);
+static void stats();
+BitMap(Go*, State*);
+};
+
+BitMap *BitMap::first = NULL;
+
+BitMap::BitMap(Go *g, State *x) : go(g), on(x), next(first) {
+first = this;
+}
+
+BitMap *BitMap::find(Go *g, State *x){
+for(BitMap *b = first; b; b = b->next){
+ if(matches(b->go, b->on, g, x))
+ return b;
+ }
+ return new BitMap(g, x);
+}
+
+BitMap *BitMap::find(State *x){
+ for(BitMap *b = first; b; b = b->next){
+ if(b->on == x)
+ return b;
+ }
+ return NULL;
+}
+
+void BitMap::gen(ostream &o, uint lb, uint ub){
+ BitMap *b = first;
+ if(b){
+ o << "\tstatic unsigned char yybm[] = {";
+ uint n = ub - lb;
+ uchar *bm = new uchar[n];
+ memset(bm, 0, n);
+ for(uint i = 0; b; i += n){
+ for(uchar m = 0x80; b && m; b = b->next, m >>= 1){
+ b->i = i; b->m = m;
+ doGen(b->go, b->on, bm-lb, m);
+ }
+ for(uint j = 0; j < n; ++j){
+ if(j%8 == 0) o << "\n\t";
+ o << setw(3) << (uint) bm[j] << ", ";
+ }
+ }
+ o << "\n\t};\n";
+ }
+}
+
+void BitMap::stats(){
+ uint n = 0;
+ for(BitMap *b = first; b; b = b->next){
+prt(cerr, b->go, b->on); cerr << endl;
+ ++n;
+ }
+ cerr << n << " bitmaps\n";
+ first = NULL;
+}
+
+void genGoTo(ostream &o, State *to){
+ o << "\tgoto yy" << to->label << ";\n";
+}
+
+void genIf(ostream &o, char *cmp, uint v){
+ o << "\tif(yych " << cmp << " '";
+ prtCh(o, v);
+ o << "')";
+}
+
+void indent(ostream &o, uint i){
+ while(i-- > 0)
+ o << "\t";
+}
+
+static void need(ostream &o, uint n){
+ if(n == 1)
+ o << "\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n";
+ else
+ o << "\tif((YYLIMIT - YYCURSOR) < " << n << ") YYFILL(" << n << ");\n";
+ o << "\tyych = *YYCURSOR;\n";
+}
+
+void Match::emit(ostream &o){
+ if(state->link){
+ o << "\t++YYCURSOR;\n";
+ need(o, state->depth);
+ } else {
+ o << "\tyych = *++YYCURSOR;\n";
+ }
+}
+
+void Enter::emit(ostream &o){
+ if(state->link){
+ o << "\t++YYCURSOR;\n";
+ o << "yy" << label << ":\n";
+ need(o, state->depth);
+ } else {
+ o << "\tyych = *++YYCURSOR;\n";
+ o << "yy" << label << ":\n";
+ }
+}
+
+void Save::emit(ostream &o){
+ o << "\tyyaccept = " << selector << ";\n";
+ if(state->link){
+ o << "\tYYMARKER = ++YYCURSOR;\n";
+ need(o, state->depth);
+ } else {
+ o << "\tyych = *(YYMARKER = ++YYCURSOR);\n";
+ }
+}
+
+Move::Move(State *s) : Action(s) {
+ ;
+}
+
+void Move::emit(ostream &o){
+ ;
+}
+
+Accept::Accept(State *x, uint n, uint *s, State **r)
+ : Action(x), nRules(n), saves(s), rules(r){
+ ;
+}
+
+void Accept::emit(ostream &o){
+ bool first = true;
+ for(uint i = 0; i < nRules; ++i)
+ if(saves[i] != ~0u){
+ if(first){
+ first = false;
+ o << "\tYYCURSOR = YYMARKER;\n";
+ o << "\tswitch(yyaccept){\n";
+ }
+ o << "\tcase " << saves[i] << ":";
+ genGoTo(o, rules[i]);
+ }
+ if(!first)
+ o << "\t}\n";
+}
+
+Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) {
+ ;
+}
+
+void Rule::emit(ostream &o){
+ uint back = rule->ctx->fixedLength();
+ if(back != ~0u && back > 0u)
+ o << "\tYYCURSOR -= " << back << ";";
+ o << "\n#line " << rule->code->line
+ << "\n\t" << rule->code->text << "\n";
+}
+
+void doLinear(ostream &o, uint i, Span *s, uint n, State *next){
+ for(;;){
+ State *bg = s[0].to;
+ while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){
+ if(s[1].to == next && n == 3){
+ indent(o, i); genIf(o, "!=", s[0].ub); genGoTo(o, bg);
+ return;
+ } else {
+ indent(o, i); genIf(o, "==", s[0].ub); genGoTo(o, s[1].to);
+ }
+ n -= 2; s += 2;
+ }
+ if(n == 1){
+ if(bg != next){
+ indent(o, i); genGoTo(o, s[0].to);
+ }
+ return;
+ } else if(n == 2 && bg == next){
+ indent(o, i); genIf(o, ">=", s[0].ub); genGoTo(o, s[1].to);
+ return;
+ } else {
+ indent(o, i); genIf(o, "<=", s[0].ub - 1); genGoTo(o, bg);
+ n -= 1; s += 1;
+ }
+ }
+}
+
+void Go::genLinear(ostream &o, State *next){
+ doLinear(o, 0, span, nSpans, next);
+}
+
+void genCases(ostream &o, uint lb, Span *s){
+ if(lb < s->ub){
+ for(;;){
+ o << "\tcase '"; prtCh(o, lb); o << "':";
+ if(++lb == s->ub)
+ break;
+ o << "\n";
+ }
+ }
+}
+
+void Go::genSwitch(ostream &o, State *next){
+ if(nSpans <= 2){
+ genLinear(o, next);
+ } else {
+ State *def = span[nSpans-1].to;
+ Span **sP = new Span*[nSpans-1], **r, **s, **t;
+
+ t = &sP[0];
+ for(uint i = 0; i < nSpans; ++i)
+ if(span[i].to != def)
+ *(t++) = &span[i];
+
+ o << "\tswitch(yych){\n";
+ while(t != &sP[0]){
+ r = s = &sP[0];
+ if(*s == &span[0])
+ genCases(o, 0, *s);
+ else
+ genCases(o, (*s)[-1].ub, *s);
+ State *to = (*s)->to;
+ while(++s < t){
+ if((*s)->to == to)
+ genCases(o, (*s)[-1].ub, *s);
+ else
+ *(r++) = *s;
+ }
+ genGoTo(o, to);
+ t = r;
+ }
+ o << "\tdefault:";
+ genGoTo(o, def);
+ o << "\t}\n";
+
+ delete [] sP;
+ }
+}
+
+void doBinary(ostream &o, uint i, Span *s, uint n, State *next){
+ if(n <= 4){
+ doLinear(o, i, s, n, next);
+ } else {
+ uint h = n/2;
+ indent(o, i); genIf(o, "<=", s[h-1].ub - 1); o << "{\n";
+ doBinary(o, i+1, &s[0], h, next);
+ indent(o, i); o << "\t} else {\n";
+ doBinary(o, i+1, &s[h], n - h, next);
+ indent(o, i); o << "\t}\n";
+ }
+}
+
+void Go::genBinary(ostream &o, State *next){
+ doBinary(o, 0, span, nSpans, next);
+}
+
+void Go::genBase(ostream &o, State *next){
+ if(nSpans == 0)
+ return;
+ if(!sFlag){
+ genSwitch(o, next);
+ return;
+ }
+ if(nSpans > 8){
+ Span *bot = &span[0], *top = &span[nSpans-1];
+ uint util;
+ if(bot[0].to == top[0].to){
+ util = (top[-1].ub - bot[0].ub)/(nSpans - 2);
+ } else {
+ if(bot[0].ub > (top[0].ub - top[-1].ub)){
+ util = (top[0].ub - bot[0].ub)/(nSpans - 1);
+ } else {
+ util = top[-1].ub/(nSpans - 1);
+ }
+ }
+ if(util <= 2){
+ genSwitch(o, next);
+ return;
+ }
+ }
+ if(nSpans > 5){
+ genBinary(o, next);
+ } else {
+ genLinear(o, next);
+ }
+}
+
+void Go::genGoto(ostream &o, State *next){
+ if(bFlag){
+ for(uint i = 0; i < nSpans; ++i){
+ State *to = span[i].to;
+ if(to && to->isBase){
+ BitMap *b = BitMap::find(to);
+ if(b && matches(b->go, b->on, this, to)){
+ Go go;
+ go.span = new Span[nSpans];
+ go.unmap(this, to);
+ o << "\tif(yybm[" << b->i << "+yych] & " << (uint) b->m << ")";
+ genGoTo(o, to);
+ go.genBase(o, next);
+ delete [] go.span;
+ return;
+ }
+ }
+ }
+ }
+ genBase(o, next);
+}
+
+void State::emit(ostream &o){
+ o << "yy" << label << ":";
+ action->emit(o);
+}
+
+uint merge(Span *x0, State *fg, State *bg){
+ Span *x = x0, *f = fg->go.span, *b = bg->go.span;
+ uint nf = fg->go.nSpans, nb = bg->go.nSpans;
+ State *prev = NULL, *to;
+ // NB: we assume both spans are for same range
+ for(;;){
+ if(f->ub == b->ub){
+ to = f->to == b->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = f->ub;
+ ++x; ++f; --nf; ++b; --nb;
+ if(nf == 0 && nb == 0)
+ return x - x0;
+ }
+ while(f->ub < b->ub){
+ to = f->to == b->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = f->ub;
+ ++x; ++f; --nf;
+ }
+ while(b->ub < f->ub){
+ to = b->to == f->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = b->ub;
+ ++x; ++b; --nb;
+ }
+ }
+}
+
+const uint cInfinity = ~0;
+
+class SCC {
+public:
+ State **top, **stk;
+public:
+ SCC(uint);
+ ~SCC();
+ void traverse(State*);
+};
+
+SCC::SCC(uint size){
+ top = stk = new State*[size];
+}
+
+SCC::~SCC(){
+ delete [] stk;
+}
+
+void SCC::traverse(State *x){
+ *top = x;
+ uint k = ++top - stk;
+ x->depth = k;
+ for(uint i = 0; i < x->go.nSpans; ++i){
+ State *y = x->go.span[i].to;
+ if(y){
+ if(y->depth == 0)
+ traverse(y);
+ if(y->depth < x->depth)
+ x->depth = y->depth;
+ }
+ }
+ if(x->depth == k)
+ do {
+ (*--top)->depth = cInfinity;
+ (*top)->link = x;
+ } while(*top != x);
+}
+
+uint maxDist(State *s){
+ uint mm = 0;
+ for(uint i = 0; i < s->go.nSpans; ++i){
+ State *t = s->go.span[i].to;
+ if(t){
+ uint m = 1;
+ if(!t->link)
+ m += maxDist(t);
+ if(m > mm)
+ mm = m;
+ }
+ }
+ return mm;
+}
+
+void calcDepth(State *head){
+ State *t;
+ for(State *s = head; s; s = s->next){
+ if(s->link == s){
+ for(uint i = 0; i < s->go.nSpans; ++i){
+ t = s->go.span[i].to;
+ if(t && t->link == s)
+ goto inSCC;
+ }
+ s->link = NULL;
+ } else {
+ inSCC:
+ s->depth = maxDist(s);
+ }
+ }
+}
+
+void DFA::findSCCs(){
+ SCC scc(nStates);
+ State *s;
+
+ for(s = head; s; s = s->next){
+ s->depth = 0;
+ s->link = NULL;
+ }
+
+ for(s = head; s; s = s->next)
+ if(!s->depth)
+ scc.traverse(s);
+
+ calcDepth(head);
+}
+
+void DFA::split(State *s){
+ State *move = new State;
+ (void) new Move(move);
+ addState(&s->next, move);
+ move->link = s->link;
+ move->rule = s->rule;
+ move->go = s->go;
+ s->rule = NULL;
+ s->go.nSpans = 1;
+ s->go.span = new Span[1];
+ s->go.span[0].ub = ubChar;
+ s->go.span[0].to = move;
+}
+
+void DFA::emit(ostream &o){
+ static uint label = 0;
+ State *s;
+ uint i;
+
+ findSCCs();
+ head->link = head;
+ head->depth = maxDist(head);
+
+ uint nRules = 0;
+ for(s = head; s; s = s->next)
+ if(s->rule && s->rule->accept >= nRules)
+ nRules = s->rule->accept + 1;
+
+ uint nSaves = 0;
+ uint *saves = new uint[nRules];
+ memset(saves, ~0, (nRules)*sizeof(*saves));
+
+ // mark backtracking points
+ for(s = head; s; s = s->next){
+ RuleOp *ignore = NULL;
+ if(s->rule){
+ for(i = 0; i < s->go.nSpans; ++i)
+ if(s->go.span[i].to && !s->go.span[i].to->rule){
+ delete s->action;
+ if(saves[s->rule->accept] == ~0u)
+ saves[s->rule->accept] = nSaves++;
+ (void) new Save(s, saves[s->rule->accept]);
+ continue;
+ }
+ ignore = s->rule;
+ }
+ }
+
+ // insert actions
+ State **rules = new State*[nRules];
+ memset(rules, 0, (nRules)*sizeof(*rules));
+ State *accept = NULL;
+ for(s = head; s; s = s->next){
+ State *ow;
+ if(!s->rule){
+ ow = accept;
+ } else {
+ if(!rules[s->rule->accept]){
+ State *n = new State;
+ (void) new Rule(n, s->rule);
+ rules[s->rule->accept] = n;
+ addState(&s->next, n);
+ }
+ ow = rules[s->rule->accept];
+ }
+ for(i = 0; i < s->go.nSpans; ++i)
+ if(!s->go.span[i].to){
+ if(!ow){
+ ow = accept = new State;
+ (void) new Accept(accept, nRules, saves, rules);
+ addState(&s->next, accept);
+ }
+ s->go.span[i].to = ow;
+ }
+ }
+
+ // split ``base'' states into two parts
+ for(s = head; s; s = s->next){
+ s->isBase = false;
+ if(s->link){
+ for(i = 0; i < s->go.nSpans; ++i){
+ if(s->go.span[i].to == s){
+ s->isBase = true;
+ split(s);
+ if(bFlag)
+ BitMap::find(&s->next->go, s);
+ s = s->next;
+ break;
+ }
+ }
+ }
+ }
+
+ // find ``base'' state, if possible
+ Span *span = new Span[ubChar - lbChar];
+ for(s = head; s; s = s->next){
+ if(!s->link){
+ for(i = 0; i < s->go.nSpans; ++i){
+ State *to = s->go.span[i].to;
+ if(to && to->isBase){
+ to = to->go.span[0].to;
+ uint nSpans = merge(span, s, to);
+ if(nSpans < s->go.nSpans){
+ delete [] s->go.span;
+ s->go.nSpans = nSpans;
+ s->go.span = new Span[nSpans];
+ memcpy(s->go.span, span, nSpans*sizeof(Span));
+ }
+ break;
+ }
+ }
+ }
+ }
+ delete [] span;
+
+ delete head->action;
+
+ o << "{\n\tYYCTYPE yych;\n\tunsigned int yyaccept;\n";
+
+ if(bFlag)
+ BitMap::gen(o, lbChar, ubChar);
+
+ o << "\tgoto yy" << label << ";\n";
+ (void) new Enter(head, label++);
+
+ for(s = head; s; s = s->next)
+ s->label = label++;
+
+ for(s = head; s; s = s->next){
+ s->emit(o);
+ s->go.genGoto(o, s->next);
+ }
+ o << "}\n";
+
+ BitMap::first = NULL;
+
+ delete [] saves;
+ delete [] rules;
+}
--- /dev/null
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "globals.h"
+#include "substr.h"
+#include "dfa.h"
+
+inline char octCh(uint c){
+ return '0' + c%8;
+}
+
+void prtCh(ostream &o, uchar c){
+ uchar oc = talx[c];
+ switch(oc){
+ case '\'': o << "\\'"; break;
+ case '\n': o << "\\n"; break;
+ case '\t': o << "\\t"; break;
+ case '\v': o << "\\v"; break;
+ case '\b': o << "\\b"; break;
+ case '\r': o << "\\r"; break;
+ case '\f': o << "\\f"; break;
+ case '\a': o << "\\a"; break;
+ case '\\': o << "\\\\"; break;
+ default:
+ if(isprint(oc))
+ o << (char) oc;
+ else
+ o << '\\' << octCh(c/64) << octCh(c/8) << octCh(c);
+ }
+}
+
+void printSpan(ostream &o, uint lb, uint ub){
+ if(lb > ub)
+ o << "*";
+ o << "[";
+ if((ub - lb) == 1){
+ prtCh(o, lb);
+ } else {
+ prtCh(o, lb);
+ o << "-";
+ prtCh(o, ub-1);
+ }
+ o << "]";
+}
+
+uint Span::show(ostream &o, uint lb){
+ if(to){
+ printSpan(o, lb, ub);
+ o << " " << to->label << "; ";
+ }
+ return ub;
+}
+
+ostream& operator<<(ostream &o, const State &s){
+ o << "state " << s.label;
+ if(s.rule)
+ o << " accepts " << s.rule->accept;
+ o << "\n";
+ uint lb = 0;
+ for(uint i = 0; i < s.go.nSpans; ++i)
+ lb = s.go.span[i].show(o, lb);
+ return o;
+}
+
+ostream& operator<<(ostream &o, const DFA &dfa){
+ for(State *s = dfa.head; s; s = s->next)
+ o << s << "\n\n";
+ return o;
+}
+
+State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) {
+ go.nSpans = 0;
+ go.span = NULL;
+}
+
+State::~State(){
+ delete [] kernel;
+ delete [] go.span;
+}
+
+static Ins **closure(Ins **cP, Ins *i){
+ while(!isMarked(i)){
+ mark(i);
+ *(cP++) = i;
+ if(i->i.tag == FORK){
+ cP = closure(cP, i + 1);
+ i = (Ins*) i->i.link;
+ } else if(i->i.tag == GOTO){
+ i = (Ins*) i->i.link;
+ } else
+ break;
+ }
+ return cP;
+}
+
+struct GoTo {
+ Char ch;
+ void *to;
+};
+
+DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep)
+ : lbChar(lb), ubChar(ub) {
+ Ins **work = new Ins*[ni+1];
+ uint nc = ub - lb;
+ GoTo *goTo = new GoTo[nc];
+ Span *span = new Span[nc];
+ memset((char*) goTo, 0, nc*sizeof(GoTo));
+ tail = &head;
+ head = NULL;
+ nStates = 0;
+ toDo = NULL;
+ findState(work, closure(work, &ins[0]) - work);
+ while(toDo){
+ State *s = toDo;
+ toDo = s->link;
+
+ Ins **cP, **iP, *i;
+ uint nGoTos = 0;
+ uint j;
+
+ s->rule = NULL;
+ for(iP = s->kernel; (i = *iP); ++iP){
+ if(i->i.tag == CHAR){
+ for(Ins *j = i + 1; j < (Ins*) i->i.link; ++j){
+ if(!(j->c.link = goTo[j->c.value - lb].to))
+ goTo[nGoTos++].ch = j->c.value;
+ goTo[j->c.value - lb].to = j;
+ }
+ } else if(i->i.tag == TERM){
+ if(!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept)
+ s->rule = (RuleOp*) i->i.link;
+ }
+ }
+
+ for(j = 0; j < nGoTos; ++j){
+ GoTo *go = &goTo[goTo[j].ch - lb];
+ i = (Ins*) go->to;
+ for(cP = work; i; i = (Ins*) i->c.link)
+ cP = closure(cP, i + i->c.bump);
+ go->to = findState(work, cP - work);
+ }
+
+ s->go.nSpans = 0;
+ for(j = 0; j < nc;){
+ State *to = (State*) goTo[rep[j]].to;
+ while(++j < nc && goTo[rep[j]].to == to);
+ span[s->go.nSpans].ub = lb + j;
+ span[s->go.nSpans].to = to;
+ s->go.nSpans++;
+ }
+
+ for(j = nGoTos; j-- > 0;)
+ goTo[goTo[j].ch - lb].to = NULL;
+
+ s->go.span = new Span[s->go.nSpans];
+ memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
+
+ (void) new Match(s);
+
+ }
+ delete [] work;
+ delete [] goTo;
+ delete [] span;
+}
+
+DFA::~DFA(){
+ State *s;
+ while((s = head)){
+ head = s->next;
+ delete s;
+ }
+}
+
+void DFA::addState(State **a, State *s){
+ s->label = nStates++;
+ s->next = *a;
+ *a = s;
+ if(a == tail)
+ tail = &s->next;
+}
+
+State *DFA::findState(Ins **kernel, uint kCount){
+ Ins **cP, **iP, *i;
+ State *s;
+
+ kernel[kCount] = NULL;
+
+ cP = kernel;
+ for(iP = kernel; (i = *iP); ++iP){
+ if(i->i.tag == CHAR || i->i.tag == TERM){
+ *cP++ = i;
+ } else {
+ unmark(i);
+ }
+ }
+ kCount = cP - kernel;
+ kernel[kCount] = NULL;
+
+ for(s = head; s; s = s->next){
+ if(s->kCount == kCount){
+ for(iP = s->kernel; (i = *iP); ++iP)
+ if(!isMarked(i))
+ goto nextState;
+ goto unmarkAll;
+ }
+ nextState:;
+ }
+
+ s = new State;
+ addState(tail, s);
+ s->kCount = kCount;
+ s->kernel = new Ins*[kCount+1];
+ memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*));
+ s->link = toDo;
+ toDo = s;
+
+unmarkAll:
+ for(iP = kernel; (i = *iP); ++iP)
+ unmark(i);
+
+ return s;
+}
--- /dev/null
+#ifndef _dfa_h
+#define _dfa_h
+
+#include <iostream.h>
+#include "re.h"
+
+extern void prtCh(ostream&, uchar);
+extern void printSpan(ostream&, uint, uint);
+
+class DFA;
+class State;
+
+class Action {
+public:
+ State *state;
+public:
+ Action(State*);
+ virtual void emit(ostream&) = 0;
+};
+
+class Match: public Action {
+public:
+ Match(State*);
+ void emit(ostream&);
+};
+
+class Enter: public Action {
+public:
+ uint label;
+public:
+ Enter(State*, uint);
+ void emit(ostream&);
+};
+
+class Save: public Match {
+public:
+ uint selector;
+public:
+ Save(State*, uint);
+ void emit(ostream&);
+};
+
+class Move: public Action {
+public:
+ Move(State*);
+ void emit(ostream&);
+};
+
+class Accept: public Action {
+public:
+ uint nRules;
+ uint *saves;
+ State **rules;
+public:
+ Accept(State*, uint, uint*, State**);
+ void emit(ostream&);
+};
+
+class Rule: public Action {
+public:
+ RuleOp *rule;
+public:
+ Rule(State*, RuleOp*);
+ void emit(ostream&);
+};
+
+class Span {
+public:
+ uint ub;
+ State *to;
+public:
+ uint show(ostream&, uint);
+};
+
+class Go {
+public:
+ uint nSpans;
+ Span *span;
+public:
+ void genGoto(ostream&, State*);
+ void genBase(ostream&, State*);
+ void genLinear(ostream&, State*);
+ void genBinary(ostream&, State*);
+ void genSwitch(ostream&, State*);
+ void compact();
+ void unmap(Go*, State*);
+};
+
+class State {
+public:
+ uint label;
+ RuleOp *rule;
+ State *next;
+ State *link;
+ uint depth; // for finding SCCs
+ uint kCount;
+ Ins **kernel;
+ bool isBase:1;
+ Go go;
+ Action *action;
+public:
+ State();
+ ~State();
+ void emit(ostream&);
+ friend ostream& operator<<(ostream&, const State&);
+ friend ostream& operator<<(ostream&, const State*);
+};
+
+class DFA {
+public:
+ uint lbChar;
+ uint ubChar;
+ uint nStates;
+ State *head, **tail;
+ State *toDo;
+public:
+ DFA(Ins*, uint, uint, uint, Char*);
+ ~DFA();
+ void addState(State**, State*);
+ State *findState(Ins**, uint);
+ void split(State*);
+
+ void findSCCs();
+ void emit(ostream&);
+
+ friend ostream& operator<<(ostream&, const DFA&);
+ friend ostream& operator<<(ostream&, const DFA*);
+};
+
+inline Action::Action(State *s) : state(s) {
+ s->action = this;
+}
+
+inline Match::Match(State *s) : Action(s)
+ { }
+
+inline Enter::Enter(State *s, uint l) : Action(s), label(l)
+ { }
+
+inline Save::Save(State *s, uint i) : Match(s), selector(i)
+ { }
+
+inline ostream& operator<<(ostream &o, const State *s)
+ { return o << *s; }
+
+inline ostream& operator<<(ostream &o, const DFA *dfa)
+ { return o << *dfa; }
+
+#endif
--- /dev/null
+@Article{Bumbulis94,
+ author = {Peter Bumbulis and Donald D. Cowan},
+ title = {RE2C -- A More Versatile Scanner Generator},
+ journal = "ACM Letters on Programming Languages and Systems",
+ volume = 2,
+ number = "1--4",
+ year = 1994,
+ abstract = {
+ It is usually claimed that lexical analysis routines are still coded by
+ hand, despite the widespread availability of scanner generators, for
+ efficiency reasons. While efficiency is a consideration, there exist
+ freely available scanner generators such as GLA \cite{Gray88} that can
+ generate scanners that are faster than most hand-coded ones. However,
+ most generated scanners are tailored for a particular environment, and
+ retargetting these scanners to other environments, if possible, is
+ usually complex enough to make a hand-coded scanner more appealing. In
+ this paper we describe RE2C, a scanner generator that not only generates
+ scanners which are faster (and usually smaller) than those produced by
+ any other scanner generator known to the authors, including GLA, but
+ also adapt easily to any environment.
+ }
+}
+@Article{Gray88,
+ author = {Robert W. Gray},
+ title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That
+ Programmers Can Use},
+ journal = {USENIX Conference Proceedings},
+ year = {1988},
+ month = {June},
+ pages = {147-160},
+ abstract = {Writing an efficient lexical analyzer for even a simple
+ language is not a trivial task, and should not be done by hand. We
+ describe GLA, a tool that generates very efficient scanners. These
+ scanners do not use the conventional transition matrix, but instead
+ use a few 128 element vectors. Scanning time is only slightly
+ greater than the absolute minimum --- the time it takes to look at
+ each character in a file. The GLA language allows simple, concise
+ specification of scanners. Augmenting regular expressions with
+ auxiliary scanners easily handles nasty problems such as C comments
+ and C literal constants. We formalize the connection between token
+ scanning and token processing by associating a processor with
+ appropriate patterns. A library of canned descriptions simplifies the
+ specification of commonly used language pieces --- such as,
+ C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully
+ tuned lexical analysis support modules are provided for error
+ handling, input buffering, storing identifiers in hash tables and
+ manipulating denotations.}
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
+
+volatile char ch;
+
+main(){
+ struct stat statbuf;
+ uchar *buf;
+ fstat(0, &statbuf);
+ buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+ 0, 0);
+ if(buf != (uchar*)(-1)){
+ uchar *cur, *lim = &buf[statbuf.st_size];
+ for(cur = buf; buf != lim; ++cur){
+ ch = *cur;
+ }
+ munmap(buf, statbuf.st_size);
+ }
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int unint;
+typedef unsigned char uchar;
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ uchar *tok, *ptr, *cur, *pos, *lim, *eof;
+ unint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ unint cnt = s->lim - s->tok;
+ uchar *buf = malloc((cnt + 1)*sizeof(uchar));
+ memcpy(buf, s->tok, cnt);
+ cursor = &buf[cursor - s->tok];
+ s->pos = &buf[s->pos - s->tok];
+ s->ptr = &buf[s->ptr - s->tok];
+ s->lim = &buf[cnt];
+ s->eof = s->lim; *(s->eof)++ = '\n';
+ s->tok = buf;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
+
+main(){
+ Scanner in;
+ struct stat statbuf;
+ uchar *buf;
+ fstat(0, &statbuf);
+ buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+ 0, 0);
+ if(buf != (uchar*)(-1)){
+ int t;
+ in.lim = &(in.cur = buf)[statbuf.st_size];
+ in.pos = NULL;
+ in.eof = NULL;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ munmap(buf, statbuf.st_size);
+ }
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+I = L|D;
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+X = any\[*/];
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+
+ L { RET(ID); }
+ L I { RET(ID); }
+ L I I { RET(ID); }
+ L I I I { RET(ID); }
+ L I I I I { RET(ID); }
+ L I I I I I { RET(ID); }
+ L I I I I I I { RET(ID); }
+ L I I I I I I I { RET(ID); }
+ L I* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ X { goto comment; }
+ X X { goto comment; }
+ X X X { goto comment; }
+ X X X X { goto comment; }
+ X X X X X { goto comment; }
+ X X X X X X { goto comment; }
+ X X X X X X X { goto comment; }
+ X X X X X X X X { goto comment; }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL {cursor = fill(s, cursor);}
+
+#define RETURN(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+ uint depth;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+digit = [0-9];
+letter = [a-zA-Z];
+*/
+
+/*!re2c
+ "(*" { depth = 1; goto comment; }
+
+ digit + {RETURN(1);}
+ digit + / ".." {RETURN(1);}
+ [0-7] + "B" {RETURN(2);}
+ [0-7] + "C" {RETURN(3);}
+ digit [0-9A-F] * "H" {RETURN(4);}
+ digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);}
+ ['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);}
+
+ "#" {RETURN(7);}
+ "&" {RETURN(8);}
+ "(" {RETURN(9);}
+ ")" {RETURN(10);}
+ "*" {RETURN(11);}
+ "+" {RETURN(12);}
+ "," {RETURN(13);}
+ "-" {RETURN(14);}
+ "." {RETURN(15);}
+ ".." {RETURN(16);}
+ "/" {RETURN(17);}
+ ":" {RETURN(18);}
+ ":=" {RETURN(19);}
+ ";" {RETURN(20);}
+ "<" {RETURN(21);}
+ "<=" {RETURN(22);}
+ "<>" {RETURN(23);}
+ "=" {RETURN(24);}
+ ">" {RETURN(25);}
+ ">=" {RETURN(26);}
+ "[" {RETURN(27);}
+ "]" {RETURN(28);}
+ "^" {RETURN(29);}
+ "{" {RETURN(30);}
+ "|" {RETURN(31);}
+ "}" {RETURN(32);}
+ "~" {RETURN(33);}
+
+ "AND" {RETURN(34);}
+ "ARRAY" {RETURN(35);}
+ "BEGIN" {RETURN(36);}
+ "BY" {RETURN(37);}
+ "CASE" {RETURN(38);}
+ "CONST" {RETURN(39);}
+ "DEFINITION" {RETURN(40);}
+ "DIV" {RETURN(41);}
+ "DO" {RETURN(42);}
+ "ELSE" {RETURN(43);}
+ "ELSIF" {RETURN(44);}
+ "END" {RETURN(45);}
+ "EXIT" {RETURN(46);}
+ "EXPORT" {RETURN(47);}
+ "FOR" {RETURN(48);}
+ "FROM" {RETURN(49);}
+ "IF" {RETURN(50);}
+ "IMPLEMENTATION" {RETURN(51);}
+ "IMPORT" {RETURN(52);}
+ "IN" {RETURN(53);}
+ "LOOP" {RETURN(54);}
+ "MOD" {RETURN(55);}
+ "MODULE" {RETURN(56);}
+ "NOT" {RETURN(57);}
+ "OF" {RETURN(58);}
+ "OR" {RETURN(59);}
+ "POINTER" {RETURN(60);}
+ "PROCEDURE" {RETURN(61);}
+ "QUALIFIED" {RETURN(62);}
+ "RECORD" {RETURN(63);}
+ "REPEAT" {RETURN(64);}
+ "RETURN" {RETURN(65);}
+ "SET" {RETURN(66);}
+ "THEN" {RETURN(67);}
+ "TO" {RETURN(68);}
+ "TYPE" {RETURN(69);}
+ "UNTIL" {RETURN(70);}
+ "VAR" {RETURN(71);}
+ "WHILE" {RETURN(72);}
+ "WITH" {RETURN(73);}
+
+ letter (letter | digit) * {RETURN(74);}
+
+ [ \t]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RETURN(0);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+comment:
+/*!re2c
+ "*)"
+ {
+ if(--depth == 0)
+ goto std;
+ else
+ goto comment;
+ }
+ "(*" { ++depth; goto comment; }
+ "\n"
+ {
+ if(cursor == s->eof) RETURN(0);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+/*
+void putStr(FILE *o, char *s, uint l){
+ while(l-- > 0)
+ putc(*s++, o);
+}
+*/
+
+main(){
+ Scanner in;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while(scan(&in)){
+/*
+ putc('<', stdout);
+ putStr(stdout, (char*) in.tok, in.cur - in.tok);
+ putc('>', stdout);
+ putc('\n', stdout);
+*/
+ }
+}
--- /dev/null
+Replacement modules for an existing REXX interpreter. Not standalone.
--- /dev/null
+#include "scanio.h"
+#include "scanner.h"
+
+#define CURSOR ch
+#define LOADCURSOR ch = *cursor;
+#define ADVANCE cursor++;
+#define BACK(n) cursor -= (n);
+#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);}
+#define MARK(n) ScanCB.ptr = cursor; sel = (n);
+#define REVERT cursor = ScanCB.ptr;
+#define MARKER sel
+
+#define RETURN(i) {ScanCB.cur = cursor; return i;}
+
+int ScanToken(){
+ uchar *cursor = ScanCB.cur;
+ unsigned sel;
+ uchar ch;
+ ScanCB.tok = cursor;
+ ScanCB.eot = NULL;
+/*!re2c
+all = [\000-\377];
+eof = [\000];
+any = all\eof;
+letter = [a-z]|[A-Z];
+digit = [0-9];
+symchr = letter|digit|[.!?_];
+const = (digit|[.])symchr*([eE][+-]?digit+)?;
+simple = (symchr\(digit|[.]))(symchr\[.])*;
+stem = simple [.];
+symbol = symchr*;
+sqstr = ['] ((any\['\n])|(['][']))* ['];
+dqstr = ["] ((any\["\n])|(["]["]))* ["];
+str = sqstr|dqstr;
+ob = [ \t]*;
+not = [\\~];
+A = [aA];
+B = [bB];
+C = [cC];
+D = [dD];
+E = [eE];
+F = [fF];
+G = [gG];
+H = [hH];
+I = [iI];
+J = [jJ];
+K = [kK];
+L = [lL];
+M = [mM];
+N = [nN];
+O = [oO];
+P = [pP];
+Q = [qQ];
+R = [rR];
+S = [sS];
+T = [tT];
+U = [uU];
+V = [vV];
+W = [wW];
+X = [xX];
+Y = [yY];
+Z = [zZ];
+*/
+
+scan:
+/*!re2c
+"\n"
+ {
+ ++(ScanCB.lineNum);
+ ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+ RETURN(SU_EOL);
+ }
+"|" ob "|"
+ { RETURN(OP_CONCAT); }
+"+"
+ { RETURN(OP_PLUS); }
+"-"
+ { RETURN(OP_MINUS); }
+"*"
+ { RETURN(OP_MULT); }
+"/"
+ { RETURN(OP_DIV); }
+"%"
+ { RETURN(OP_IDIV); }
+"/" ob "/"
+ { RETURN(OP_REMAIN); }
+"*" ob "*"
+ { RETURN(OP_POWER); }
+"="
+ { RETURN(OP_EQUAL); }
+not ob "=" | "<" ob ">" | ">" ob "<"
+ { RETURN(OP_EQUAL_N); }
+">"
+ { RETURN(OP_GT); }
+"<"
+ { RETURN(OP_LT); }
+">" ob "=" | not ob "<"
+ { RETURN(OP_GE); }
+"<" ob "=" | not ob ">"
+ { RETURN(OP_LE); }
+"=" ob "="
+ { RETURN(OP_EQUAL_EQ); }
+not ob "=" ob "="
+ { RETURN(OP_EQUAL_EQ_N); }
+">" ob ">"
+ { RETURN(OP_GT_STRICT); }
+"<" ob "<"
+ { RETURN(OP_LT_STRICT); }
+">" ob ">" ob "=" | not ob "<" ob "<"
+ { RETURN(OP_GE_STRICT); }
+"<" ob "<" ob "=" | not ob ">" ob ">"
+ { RETURN(OP_LE_STRICT); }
+"&"
+ { RETURN(OP_AND); }
+"|"
+ { RETURN(OP_OR); }
+"&" ob "&"
+ { RETURN(OP_XOR); }
+not
+ { RETURN(OP_NOT); }
+
+":"
+ { RETURN(SU_COLON); }
+","
+ { RETURN(SU_COMMA); }
+"("
+ { RETURN(SU_POPEN); }
+")"
+ { RETURN(SU_PCLOSE); }
+";"
+ { RETURN(SU_EOC); }
+
+A D D R E S S
+ { RETURN(RX_ADDRESS); }
+A R G
+ { RETURN(RX_ARG); }
+C A L L
+ { RETURN(RX_CALL); }
+D O
+ { RETURN(RX_DO); }
+D R O P
+ { RETURN(RX_DROP); }
+E L S E
+ { RETURN(RX_ELSE); }
+E N D
+ { RETURN(RX_END); }
+E X I T
+ { RETURN(RX_EXIT); }
+I F
+ { RETURN(RX_IF); }
+I N T E R P R E T
+ { RETURN(RX_INTERPRET); }
+I T E R A T E
+ { RETURN(RX_ITERATE); }
+L E A V E
+ { RETURN(RX_LEAVE); }
+N O P
+ { RETURN(RX_NOP); }
+N U M E R I C
+ { RETURN(RX_NUMERIC); }
+O P T I O N S
+ { RETURN(RX_OPTIONS); }
+O T H E R W I S E
+ { RETURN(RX_OTHERWISE); }
+P A R S E
+ { RETURN(RX_PARSE); }
+P R O C E D U R E
+ { RETURN(RX_PROCEDURE); }
+P U L L
+ { RETURN(RX_PULL); }
+P U S H
+ { RETURN(RX_PUSH); }
+Q U E U E
+ { RETURN(RX_QUEUE); }
+R E T U R N
+ { RETURN(RX_RETURN); }
+S A Y
+ { RETURN(RX_SAY); }
+S E L E C T
+ { RETURN(RX_SELECT); }
+S I G N A L
+ { RETURN(RX_SIGNAL); }
+T H E N
+ { RETURN(RX_THEN); }
+T R A C E
+ { RETURN(RX_TRACE); }
+W H E N
+ { RETURN(RX_WHEN); }
+O F F
+ { RETURN(RXS_OFF); }
+O N
+ { RETURN(RXS_ON); }
+B Y
+ { RETURN(RXS_BY); }
+D I G I T S
+ { RETURN(RXS_DIGITS); }
+E N G I N E E R I N G
+ { RETURN(RXS_ENGINEERING); }
+E R R O R
+ { RETURN(RXS_ERROR); }
+E X P O S E
+ { RETURN(RXS_EXPOSE); }
+F A I L U R E
+ { RETURN(RXS_FAILURE); }
+F O R
+ { RETURN(RXS_FOR); }
+F O R E V E R
+ { RETURN(RXS_FOREVER); }
+F O R M
+ { RETURN(RXS_FORM); }
+F U Z Z
+ { RETURN(RXS_FUZZ); }
+H A L T
+ { RETURN(RXS_HALT); }
+L I N E I N
+ { RETURN(RXS_LINEIN); }
+N A M E
+ { RETURN(RXS_NAME); }
+N O T R E A D Y
+ { RETURN(RXS_NOTREADY); }
+N O V A L U E
+ { RETURN(RXS_NOVALUE); }
+S C I E N T I F I C
+ { RETURN(RXS_SCIENTIFIC); }
+S O U R C E
+ { RETURN(RXS_SOURCE); }
+S Y N T A X
+ { RETURN(RXS_SYNTAX); }
+T O
+ { RETURN(RXS_TO); }
+U N T I L
+ { RETURN(RXS_UNTIL); }
+U P P E R
+ { RETURN(RXS_UPPER); }
+V A L U E
+ { RETURN(RXS_VALUE); }
+V A R
+ { RETURN(RXS_VAR); }
+V E R S I O N
+ { RETURN(RXS_VERSION); }
+W H I L E
+ { RETURN(RXS_WHILE); }
+W I T H
+ { RETURN(RXS_WITH); }
+
+const
+ { RETURN(SU_CONST); }
+simple
+ { RETURN(SU_SYMBOL); }
+stem
+ { RETURN(SU_SYMBOL_STEM); }
+symbol
+ { RETURN(SU_SYMBOL_COMPOUND); }
+str
+ { RETURN(SU_LITERAL); }
+str [bB] / (all\symchr)
+ { RETURN(SU_LITERAL_BIN); }
+str [xX] / (all\symchr)
+ { RETURN(SU_LITERAL_HEX); }
+
+eof
+ { RETURN(SU_EOF); }
+any
+ { RETURN(SU_ERROR); }
+*/
+}
+
+bool StripToken(){
+ uchar *cursor = ScanCB.cur;
+ unsigned depth;
+ uchar ch;
+ bool blanks = FALSE;
+ ScanCB.eot = cursor;
+strip:
+/*!re2c
+"/*"
+ {
+ depth = 1;
+ goto comment;
+ }
+"\r"
+ { goto strip; }
+[ \t]
+ {
+ blanks = TRUE;
+ goto strip;
+ }
+[] / all
+ { RETURN(blanks); }
+*/
+
+comment:
+/*!re2c
+"*/"
+ {
+ if(--depth == 0)
+ goto strip;
+ else
+ goto comment;
+ }
+"\n"
+ {
+ ++(ScanCB.lineNum);
+ ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+ goto comment;
+ }
+"/*"
+ {
+ ++depth;
+ goto comment;
+ }
+eof
+ { RETURN(blanks); }
+any
+ {
+ goto comment;
+ }
+*/
+}
--- /dev/null
+uchar *ScanFill(uchar *cursor){
+ unsigned cnt = s->tok - s->bot;
+ s->pos += cursor - s->mrk;
+ if(cnt){
+ if(s->eot){
+ unsigned len = s->eot - s->tok;
+ memcpy(s->bot, s->tok, len);
+ s->eot = &s->bot[len];
+ if((len = s->lim - cursor) != 0)
+ memcpy(s->eot, cursor, len);
+ cursor = s->eot;
+ s->lim = &cursor[len];
+ } else {
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ cursor -= cnt;
+ s->lim -= cnt;
+ }
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ }
+ if((s->top - s->lim) < 512){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar));
+ memcpy(buf, s->bot, s->lim - s->bot);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ if(s->eot)
+ s->eot = &buf[s->eot - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[512];
+ free(s->bot);
+ s->bot = buf;
+ }
+ s->mrk = cursor;
+ if(ScanCBIO.file){
+ if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512)
+ memset(&s->lim[cnt], 0, 512 - cnt);
+ s->lim += 512;
+ }
+ return cursor;
+}
--- /dev/null
+/*!re2c
+ "print" {return PRINT;}
+ [a-z]+ {return ID;}
+ [0-9]+ {return DEC;}
+ "0x" [0-9a-f]+ {return HEX;}
+ [\000-\377] {return ERR;}
+*/
--- /dev/null
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+*/
+}
--- /dev/null
+#ifndef _globals_h
+#define _globals_h
+
+#include "basics.h"
+
+extern char *fileName;
+extern bool sFlag;
+extern bool bFlag;
+
+extern uchar asc2ebc[256];
+extern uchar ebc2asc[256];
+
+extern uchar *xlat, *talx;
+
+#endif
--- /dev/null
+#ifndef _ins_h
+#define _ins_h
+
+#include <iostream.h>
+#include "basics.h"
+
+const uint nChars = 256;
+typedef uchar Char;
+
+const uint CHAR = 0;
+const uint GOTO = 1;
+const uint FORK = 2;
+const uint TERM = 3;
+const uint CTXT = 4;
+
+union Ins {
+ struct {
+ byte tag;
+ byte marked;
+ void *link;
+ } i;
+ struct {
+ ushort value;
+ ushort bump;
+ void *link;
+ } c;
+};
+
+inline bool isMarked(Ins *i){
+ return i->i.marked != 0;
+}
+
+inline void mark(Ins *i){
+ i->i.marked = true;
+}
+
+inline void unmark(Ins *i){
+ i->i.marked = false;
+}
+
+#endif
--- /dev/null
+#include <fstream.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "globals.h"
+#include "parser.h"
+#include "dfa.h"
+
+char *fileName;
+bool sFlag = false;
+bool bFlag = false;
+
+int main(unsigned argc, char *argv[]){
+ fileName = NULL;
+ if(argc == 1)
+ goto usage;
+ while(--argc > 1){
+ char *p = *++argv;
+ while(*++p != '\0'){
+ switch(*p){
+ case 'e':
+ xlat = asc2ebc;
+ talx = ebc2asc;
+ break;
+ case 's':
+ sFlag = true;
+ break;
+ case 'b':
+ sFlag = true;
+ bFlag = true;
+ break;
+ default:
+ goto usage;
+ }
+ }
+ }
+ fileName = *++argv;
+ int fd;
+ if(fileName[0] == '-' && fileName[1] == '\0'){
+ fileName = "<stdin>";
+ fd = 0;
+ } else {
+ if((fd = open(fileName, O_RDONLY)) < 0){
+ cerr << "can't open " << fileName << "\n";
+ return 1;
+ }
+ }
+ parse(fd, cout);
+ return 0;
+usage:
+ cerr << "usage: re2c [-esb] name\n";
+ return 2;
+}
--- /dev/null
+#ifndef lint
+static char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93";
+#endif
+#define YYBYACC 1
+#define YYMAJOR 1
+#define YYMINOR 9
+#define yyclearin (yychar=(-1))
+#define yyerrok (yyerrflag=0)
+#define YYRECOVERING (yyerrflag!=0)
+#define YYPREFIX "yy"
+#line 2 "parser.y"
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+#line 21 "parser.y"
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+} YYSTYPE;
+#line 35 "y.tab.c"
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+#define YYERRCODE 256
+short yylhs[] = { -1,
+ 0, 0, 0, 9, 2, 3, 3, 4, 4, 5,
+ 5, 6, 6, 7, 7, 1, 1, 8, 8, 8,
+ 8,
+};
+short yylen[] = { 2,
+ 0, 2, 2, 4, 3, 0, 2, 1, 3, 1,
+ 3, 1, 2, 1, 2, 1, 2, 1, 1, 1,
+ 3,
+};
+short yydefred[] = { 1,
+ 0, 0, 19, 20, 0, 2, 0, 0, 0, 12,
+ 0, 3, 0, 18, 0, 0, 0, 0, 0, 13,
+ 16, 0, 0, 21, 0, 0, 5, 0, 17, 4,
+};
+short yydgoto[] = { 1,
+ 22, 6, 18, 7, 8, 9, 10, 11, 12,
+};
+short yysindex[] = { 0,
+ -27, -49, 0, 0, -23, 0, -44, -84, -23, 0,
+ -243, 0, -23, 0, -39, -23, -23, -244, -23, 0,
+ 0, -239, -53, 0, -104, -84, 0, -23, 0, 0,
+};
+short yyrindex[] = { 0,
+ 0, -31, 0, 0, 0, 0, -227, -17, -20, 0,
+ -40, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -36, 0, 0, -226, -16, 0, -19, 0, 0,
+};
+short yygindex[] = { 0,
+ 0, 0, 0, 21, 18, 17, 1, 0, 0,
+};
+#define YYTABLESIZE 243
+short yytable[] = { 14,
+ 14, 24, 16, 15, 15, 30, 14, 19, 18, 20,
+ 15, 13, 5, 21, 27, 18, 5, 29, 14, 17,
+ 10, 11, 15, 8, 9, 15, 10, 11, 20, 8,
+ 9, 6, 7, 23, 26, 28, 25, 0, 10, 11,
+ 0, 8, 9, 0, 0, 0, 0, 0, 0, 0,
+ 0, 14, 0, 0, 0, 15, 0, 0, 0, 0,
+ 18, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 17, 10, 11, 0, 0, 0, 0, 0, 0, 17,
+ 0, 0, 0, 14, 17, 0, 0, 15, 0, 0,
+ 0, 0, 18, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 10, 11, 0, 8, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 14, 14, 14,
+ 14, 15, 15, 15, 15, 18, 18, 18, 18, 18,
+ 2, 0, 3, 4, 14, 0, 3, 4, 10, 11,
+ 0, 8, 9,
+};
+short yycheck[] = { 40,
+ 41, 41, 47, 40, 41, 59, 47, 92, 40, 9,
+ 47, 61, 40, 257, 259, 47, 40, 257, 59, 124,
+ 41, 41, 59, 41, 41, 5, 47, 47, 28, 47,
+ 47, 259, 259, 13, 17, 19, 16, -1, 59, 59,
+ -1, 59, 59, -1, -1, -1, -1, -1, -1, -1,
+ -1, 92, -1, -1, -1, 92, -1, -1, -1, -1,
+ 92, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 124, 92, 92, -1, -1, -1, -1, -1, -1, 124,
+ -1, -1, -1, 124, 124, -1, -1, 124, -1, -1,
+ -1, -1, 124, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 124, 124, -1, 124, 124, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, 258, 259, 260,
+ 261, 258, 259, 260, 261, 257, 258, 259, 260, 261,
+ 258, -1, 260, 261, 258, -1, 260, 261, 259, 259,
+ -1, 259, 259,
+};
+#define YYFINAL 1
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+#define YYMAXTOKEN 261
+#if YYDEBUG
+char *yyname[] = {
+"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,"'('","')'",0,0,0,0,0,"'/'",0,0,0,0,0,0,0,0,0,0,0,"';'",0,"'='",0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'\\\\'",0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+"CLOSE","ID","CODE","RANGE","STRING",
+};
+char *yyrule[] = {
+"$accept : spec",
+"spec :",
+"spec : spec rule",
+"spec : spec decl",
+"decl : ID '=' expr ';'",
+"rule : expr look CODE",
+"look :",
+"look : '/' expr",
+"expr : diff",
+"expr : expr '|' diff",
+"diff : term",
+"diff : diff '\\\\' term",
+"term : factor",
+"term : term factor",
+"factor : primary",
+"factor : primary close",
+"close : CLOSE",
+"close : close CLOSE",
+"primary : ID",
+"primary : RANGE",
+"primary : STRING",
+"primary : '(' expr ')'",
+};
+#endif
+#ifdef YYSTACKSIZE
+#undef YYMAXDEPTH
+#define YYMAXDEPTH YYSTACKSIZE
+#else
+#ifdef YYMAXDEPTH
+#define YYSTACKSIZE YYMAXDEPTH
+#else
+#define YYSTACKSIZE 500
+#define YYMAXDEPTH 500
+#endif
+#endif
+int yydebug;
+int yynerrs;
+int yyerrflag;
+int yychar;
+short *yyssp;
+YYSTYPE *yyvsp;
+YYSTYPE yyval;
+YYSTYPE yylval;
+short yyss[YYSTACKSIZE];
+YYSTYPE yyvs[YYSTACKSIZE];
+#define yystacksize YYSTACKSIZE
+#line 121 "parser.y"
+
+void yyerror(char* s){
+ in->fatal(s);
+}
+
+int yylex(){
+ return in->scan();
+}
+
+void parse(int i, ostream &o){
+ char * fnamebuf;
+ char * token;
+
+ o << "/* Generated by re2c 0.5 on ";
+ time_t now = time(&now);
+ o.write(ctime(&now), 24);
+ o << " */\n";
+
+ in = new Scanner(i);
+
+ o << "#line " << in->line() << " \"";
+ if( fileName != NULL ) {
+ fnamebuf = strdup( fileName );
+ } else {
+ fnamebuf = strdup( "<stdin>" );
+ }
+ token = strtok( fnamebuf, "\\" );
+ for(;;) {
+ o << token;
+ token = strtok( NULL, "\\" );
+ if( token == NULL ) break;
+ o << "\\\\";
+ }
+ o << "\"\n";
+ free( fnamebuf );
+
+ while(in->echo(o)){
+ yyparse();
+ if(spec)
+ genCode(o, spec);
+ o << "#line " << in->line() << "\n";
+ }
+}
+#line 235 "y.tab.c"
+#define YYABORT goto yyabort
+#define YYREJECT goto yyabort
+#define YYACCEPT goto yyaccept
+#define YYERROR goto yyerrlab
+int
+yyparse()
+{
+ register int yym, yyn, yystate;
+#if YYDEBUG
+ register char *yys;
+ extern char *getenv();
+
+ if (yys = getenv("YYDEBUG"))
+ {
+ yyn = *yys;
+ if (yyn >= '0' && yyn <= '9')
+ yydebug = yyn - '0';
+ }
+#endif
+
+ yynerrs = 0;
+ yyerrflag = 0;
+ yychar = (-1);
+
+ yyssp = yyss;
+ yyvsp = yyvs;
+ *yyssp = yystate = 0;
+
+yyloop:
+ if (yyn = yydefred[yystate]) goto yyreduce;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ }
+ if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, shifting to state %d\n",
+ YYPREFIX, yystate, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ yychar = (-1);
+ if (yyerrflag > 0) --yyerrflag;
+ goto yyloop;
+ }
+ if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+ {
+ yyn = yytable[yyn];
+ goto yyreduce;
+ }
+ if (yyerrflag) goto yyinrecovery;
+#ifdef lint
+ goto yynewerror;
+#endif
+yynewerror:
+ yyerror("syntax error");
+#ifdef lint
+ goto yyerrlab;
+#endif
+yyerrlab:
+ ++yynerrs;
+yyinrecovery:
+ if (yyerrflag < 3)
+ {
+ yyerrflag = 3;
+ for (;;)
+ {
+ if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, error recovery shifting\
+ to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate = yytable[yyn];
+ *++yyvsp = yylval;
+ goto yyloop;
+ }
+ else
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: error recovery discarding state %d\n",
+ YYPREFIX, *yyssp);
+#endif
+ if (yyssp <= yyss) goto yyabort;
+ --yyssp;
+ --yyvsp;
+ }
+ }
+ }
+ else
+ {
+ if (yychar == 0) goto yyabort;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
+ YYPREFIX, yystate, yychar, yys);
+ }
+#endif
+ yychar = (-1);
+ goto yyloop;
+ }
+yyreduce:
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: state %d, reducing by rule %d (%s)\n",
+ YYPREFIX, yystate, yyn, yyrule[yyn]);
+#endif
+ yym = yylen[yyn];
+ yyval = yyvsp[1-yym];
+ switch (yyn)
+ {
+case 1:
+#line 40 "parser.y"
+{ accept = 0;
+ spec = NULL; }
+break;
+case 2:
+#line 43 "parser.y"
+{ spec = spec? mkAlt(spec, yyvsp[0].regexp) : yyvsp[0].regexp; }
+break;
+case 4:
+#line 48 "parser.y"
+{ if(yyvsp[-3].symbol->re)
+ in->fatal("sym already defined");
+ yyvsp[-3].symbol->re = yyvsp[-1].regexp; }
+break;
+case 5:
+#line 54 "parser.y"
+{ yyval.regexp = new RuleOp(yyvsp[-2].regexp, yyvsp[-1].regexp, yyvsp[0].token, accept++); }
+break;
+case 6:
+#line 58 "parser.y"
+{ yyval.regexp = new NullOp; }
+break;
+case 7:
+#line 60 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 8:
+#line 64 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 9:
+#line 66 "parser.y"
+{ yyval.regexp = mkAlt(yyvsp[-2].regexp, yyvsp[0].regexp); }
+break;
+case 10:
+#line 70 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 11:
+#line 72 "parser.y"
+{ yyval.regexp = mkDiff(yyvsp[-2].regexp, yyvsp[0].regexp);
+ if(!yyval.regexp)
+ in->fatal("can only difference char sets");
+ }
+break;
+case 12:
+#line 79 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 13:
+#line 81 "parser.y"
+{ yyval.regexp = new CatOp(yyvsp[-1].regexp, yyvsp[0].regexp); }
+break;
+case 14:
+#line 85 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 15:
+#line 87 "parser.y"
+{
+ switch(yyvsp[0].op){
+ case '*':
+ yyval.regexp = mkAlt(new CloseOp(yyvsp[-1].regexp), new NullOp());
+ break;
+ case '+':
+ yyval.regexp = new CloseOp(yyvsp[-1].regexp);
+ break;
+ case '?':
+ yyval.regexp = mkAlt(yyvsp[-1].regexp, new NullOp());
+ break;
+ }
+ }
+break;
+case 16:
+#line 103 "parser.y"
+{ yyval.op = yyvsp[0].op; }
+break;
+case 17:
+#line 105 "parser.y"
+{ yyval.op = (yyvsp[-1].op == yyvsp[0].op) ? yyvsp[-1].op : '*'; }
+break;
+case 18:
+#line 109 "parser.y"
+{ if(!yyvsp[0].symbol->re)
+ in->fatal("can't find symbol");
+ yyval.regexp = yyvsp[0].symbol->re; }
+break;
+case 19:
+#line 113 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 20:
+#line 115 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 21:
+#line 117 "parser.y"
+{ yyval.regexp = yyvsp[-1].regexp; }
+break;
+#line 476 "y.tab.c"
+ }
+ yyssp -= yym;
+ yystate = *yyssp;
+ yyvsp -= yym;
+ yym = yylhs[yyn];
+ if (yystate == 0 && yym == 0)
+ {
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state 0 to\
+ state %d\n", YYPREFIX, YYFINAL);
+#endif
+ yystate = YYFINAL;
+ *++yyssp = YYFINAL;
+ *++yyvsp = yyval;
+ if (yychar < 0)
+ {
+ if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+ if (yydebug)
+ {
+ yys = 0;
+ if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+ if (!yys) yys = "illegal-symbol";
+ printf("%sdebug: state %d, reading %d (%s)\n",
+ YYPREFIX, YYFINAL, yychar, yys);
+ }
+#endif
+ }
+ if (yychar == 0) goto yyaccept;
+ goto yyloop;
+ }
+ if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
+ yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
+ yystate = yytable[yyn];
+ else
+ yystate = yydgoto[yym];
+#if YYDEBUG
+ if (yydebug)
+ printf("%sdebug: after reduction, shifting from state %d \
+to state %d\n", YYPREFIX, *yyssp, yystate);
+#endif
+ if (yyssp >= yyss + yystacksize - 1)
+ {
+ goto yyoverflow;
+ }
+ *++yyssp = yystate;
+ *++yyvsp = yyval;
+ goto yyloop;
+yyoverflow:
+ yyerror("yacc stack overflow");
+yyabort:
+ return (1);
+yyaccept:
+ return (0);
+}
--- /dev/null
+#ifndef _parser_h
+#define _parser_h
+
+#include "scanner.h"
+#include "re.h"
+
+class Symbol {
+public:
+ static Symbol *first;
+ Symbol *next;
+ Str name;
+ RegExp *re;
+public:
+ Symbol(const SubStr&);
+ static Symbol *find(const SubStr&);
+};
+
+void parse(int, ostream&);
+
+#endif
--- /dev/null
+%{
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+%}
+
+%start spec
+
+%union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+}
+
+%token CLOSE ID CODE RANGE STRING
+
+%type <op> CLOSE
+%type <op> close
+%type <symbol> ID
+%type <token> CODE
+%type <regexp> RANGE STRING
+%type <regexp> rule look expr diff term factor primary
+
+%%
+
+spec :
+ { accept = 0;
+ spec = NULL; }
+ | spec rule
+ { spec = spec? mkAlt(spec, $2) : $2; }
+ | spec decl
+ ;
+
+decl : ID '=' expr ';'
+ { if($1->re)
+ in->fatal("sym already defined");
+ $1->re = $3; }
+ ;
+
+rule : expr look CODE
+ { $$ = new RuleOp($1, $2, $3, accept++); }
+ ;
+
+look :
+ { $$ = new NullOp; }
+ | '/' expr
+ { $$ = $2; }
+ ;
+
+expr : diff
+ { $$ = $1; }
+ | expr '|' diff
+ { $$ = mkAlt($1, $3); }
+ ;
+
+diff : term
+ { $$ = $1; }
+ | diff '\\' term
+ { $$ = mkDiff($1, $3);
+ if(!$$)
+ in->fatal("can only difference char sets");
+ }
+ ;
+
+term : factor
+ { $$ = $1; }
+ | term factor
+ { $$ = new CatOp($1, $2); }
+ ;
+
+factor : primary
+ { $$ = $1; }
+ | primary close
+ {
+ switch($2){
+ case '*':
+ $$ = mkAlt(new CloseOp($1), new NullOp());
+ break;
+ case '+':
+ $$ = new CloseOp($1);
+ break;
+ case '?':
+ $$ = mkAlt($1, new NullOp());
+ break;
+ }
+ }
+ ;
+
+close : CLOSE
+ { $$ = $1; }
+ | close CLOSE
+ { $$ = ($1 == $2) ? $1 : '*'; }
+ ;
+
+primary : ID
+ { if(!$1->re)
+ in->fatal("can't find symbol");
+ $$ = $1->re; }
+ | RANGE
+ { $$ = $1; }
+ | STRING
+ { $$ = $1; }
+ | '(' expr ')'
+ { $$ = $2; }
+ ;
+
+%%
+
+void yyerror(char* s){
+ in->fatal(s);
+}
+
+int yylex(){
+ return in->scan();
+}
+
+void parse(int i, ostream &o){
+ char * fnamebuf;
+ char * token;
+
+ o << "/* Generated by re2c 0.5 on ";
+ time_t now = time(&now);
+ o.write(ctime(&now), 24);
+ o << " */\n";
+
+ in = new Scanner(i);
+
+ o << "#line " << in->line() << " \"";
+ if( fileName != NULL ) {
+ fnamebuf = strdup( fileName );
+ } else {
+ fnamebuf = strdup( "<stdin>" );
+ }
+ token = strtok( fnamebuf, "\\" );
+ for(;;) {
+ o << token;
+ token = strtok( NULL, "\\" );
+ if( token == NULL ) break;
+ o << "\\\\";
+ }
+ o << "\"\n";
+ free( fnamebuf );
+
+ while(in->echo(o)){
+ yyparse();
+ if(spec)
+ genCode(o, spec);
+ o << "#line " << in->line() << "\n";
+ }
+}
--- /dev/null
+#ifndef _re_h
+#define _re_h
+
+#include <iostream.h>
+#include "token.h"
+#include "ins.h"
+
+struct CharPtn {
+ uint card;
+ CharPtn *fix;
+ CharPtn *nxt;
+};
+
+struct CharSet {
+ CharPtn *fix;
+ CharPtn *freeHead, **freeTail;
+ CharPtn *rep[nChars];
+ CharPtn ptn[nChars];
+};
+
+class Range {
+public:
+ Range *next;
+ uint lb, ub; // [lb,ub)
+public:
+ Range(uint l, uint u) : next(NULL), lb(l), ub(u)
+ { }
+ Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub)
+ { }
+ friend ostream& operator<<(ostream&, const Range&);
+ friend ostream& operator<<(ostream&, const Range*);
+};
+
+inline ostream& operator<<(ostream &o, const Range *r){
+ return r? o << *r : o;
+}
+
+class RegExp {
+public:
+ uint size;
+public:
+ virtual char *typeOf() = 0;
+ RegExp *isA(char *t)
+ { return typeOf() == t? this : NULL; }
+ virtual void split(CharSet&) = 0;
+ virtual void calcSize(Char*) = 0;
+ virtual uint fixedLength();
+ virtual void compile(Char*, Ins*) = 0;
+ virtual void display(ostream&) const = 0;
+ friend ostream& operator<<(ostream&, const RegExp&);
+ friend ostream& operator<<(ostream&, const RegExp*);
+};
+
+inline ostream& operator<<(ostream &o, const RegExp &re){
+ re.display(o);
+ return o;
+}
+
+inline ostream& operator<<(ostream &o, const RegExp *re){
+ return o << *re;
+}
+
+class NullOp: public RegExp {
+public:
+ static char *type;
+public:
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ uint fixedLength();
+ void compile(Char*, Ins*);
+ void display(ostream &o) const {
+ o << "_";
+ }
+};
+
+class MatchOp: public RegExp {
+public:
+ static char *type;
+ Range *match;
+public:
+ MatchOp(Range *m) : match(m)
+ { }
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ uint fixedLength();
+ void compile(Char*, Ins*);
+ void display(ostream&) const;
+};
+
+class RuleOp: public RegExp {
+private:
+ RegExp *exp;
+public:
+ RegExp *ctx;
+ static char *type;
+ Ins *ins;
+ uint accept;
+ Token *code;
+ uint line;
+public:
+ RuleOp(RegExp*, RegExp*, Token*, uint);
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ void compile(Char*, Ins*);
+ void display(ostream &o) const {
+ o << exp << "/" << ctx << ";";
+ }
+};
+
+class AltOp: public RegExp {
+private:
+ RegExp *exp1, *exp2;
+public:
+ static char *type;
+public:
+ AltOp(RegExp *e1, RegExp *e2)
+ { exp1 = e1; exp2 = e2; }
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ uint fixedLength();
+ void compile(Char*, Ins*);
+ void display(ostream &o) const {
+ o << exp1 << "|" << exp2;
+ }
+ friend RegExp *mkAlt(RegExp*, RegExp*);
+};
+
+class CatOp: public RegExp {
+private:
+ RegExp *exp1, *exp2;
+public:
+ static char *type;
+public:
+ CatOp(RegExp *e1, RegExp *e2)
+ { exp1 = e1; exp2 = e2; }
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ uint fixedLength();
+ void compile(Char*, Ins*);
+ void display(ostream &o) const {
+ o << exp1 << exp2;
+ }
+};
+
+class CloseOp: public RegExp {
+private:
+ RegExp *exp;
+public:
+ static char *type;
+public:
+ CloseOp(RegExp *e)
+ { exp = e; }
+ char *typeOf()
+ { return type; }
+ void split(CharSet&);
+ void calcSize(Char*);
+ void compile(Char*, Ins*);
+ void display(ostream &o) const {
+ o << exp << "+";
+ }
+};
+
+extern void genCode(ostream&, RegExp*);
+extern RegExp *mkDiff(RegExp*, RegExp*);
+extern RegExp *strToRE(SubStr);
+extern RegExp *ranToRE(SubStr);
+
+#endif
--- /dev/null
+.ds re \fBre2c\fP
+.ds le \fBlex\fP
+.ds rx regular expression
+.ds lx \fIl\fP-expression
+.TH RE2C 1 "8 April 1994" "Version 0.5"
+\"$Log$
+\"Revision 1.1 2003/12/13 04:58:20 nuffer
+\"Initial revision
+\"
+\"Revision 1.2 1994/04/16 15:50:32 peter
+\"Fix bug in simple example.
+\"
+\"Revision 1.1 1994/04/08 15:39:09 peter
+\"Initial revision
+\"
+.SH NAME
+re2c \- convert regular expressions to C/C++
+
+.SH SYNOPSIS
+\*(re [\fB-esb\fP] \fIname\fP
+
+.SH DESCRIPTION
+\*(re is a preprocessor that generates C-based recognizers from regular
+expressions.
+The input to \*(re consists of C/C++ source interleaved with
+comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
+scanner specifications.
+In the output these comments are replaced with code that, when
+executed, will find the next input token and then execute
+some user-supplied token-specific code.
+
+For example, given the following code
+
+.in +3
+.nf
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\\000-\\377] {return NULL;}
+*/
+}
+.fi
+.in -3
+
+\*(re will generate
+
+.in +3
+.nf
+/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+#line 1 "simple.re"
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+yy1: ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+yy2: yych = *++YYCURSOR;
+ goto yy7;
+yy3:
+#line 10
+ {return YYCURSOR;}
+yy4: yych = *++YYCURSOR;
+yy5:
+#line 11
+ {return NULL;}
+yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+}
+#line 12
+
+}
+.fi
+.in -3
+
+.SH OPTIONS
+\*(re provides the following options:
+.TP
+\fB-e\fP
+Cross-compile from an ASCII platform to an EBCDIC one.
+.TP
+\fB-s\fP
+Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
+assist to generate better code.
+.TP
+\fB-b\fP
+Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
+code out of the compiler. Most useful for specifications with more than a
+few keywords (e.g. for most programming languages).
+
+.SH "INTERFACE CODE"
+Unlike other scanner generators, \*(re does not generate complete scanners:
+the user must supply some interface code.
+In particular, the user must define the following macros:
+.TP
+\fCYYCHAR\fP
+Type used to hold an input symbol.
+Usually \fCchar\fP or \fCunsigned char\fP.
+.TP
+\fCYYCURSOR\fP
+\*(lx of type \fC*YYCHAR\fP that points to the current input symbol.
+The generated code advances \fCYYCURSOR\fP as symbols are matched.
+On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
+current token. On exit, \fCYYCURSOR\fP will point to the first character of
+the following token.
+.TP
+\fCYLIMIT\fP
+Expression of type \fC*YYCHAR\fP that marks the end of the buffer
+(\fCYLIMIT[-1]\fP is the last character in the buffer).
+The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP
+to determine when the buffer needs (re)filling.
+.TP
+\fCYYMARKER\fP
+\*(lx of type \fC*YYCHAR\fP.
+The generated code saves backtracking information in \fCYYMARKER\fP.
+.TP
+\fCYYFILL(\fP\fIn\fP\fC)\fP
+The generated code "calls" \fCYYFILL\fP when the buffer needs
+(re)filling: at least \fIn\fP additional characters should
+be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and
+\fCYYMARKER\fP as needed. Note that for typical programming languages
+\fIn\fP will be the length of the longest keyword plus one.
+
+.SH "SCANNER SPECIFICATIONS"
+Each scanner specification consists of a set of \fIrules\fP and name
+definitions.
+Rules consist of a regular expression along with a block of C/C++ code that
+is to be executed when the associated regular expression is matched.
+Name definitions are of the form
+``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''.
+
+.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
+.TP
+\fC"foo"\fP
+the literal string \fCfoo\fP.
+ANSI-C escape sequences can be used.
+.TP
+\fC[xyz]\fP
+a "character class"; in this case,
+the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
+.TP
+\fC[abj-oZ]\fP
+a "character class" with a range in it;
+matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
+or a '\fCZ\fP'.
+.TP
+\fIr\fP\fC\e\fP\fIs\fP
+match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
+which can be expressed as character classes.
+.TP
+\fIr\fP\fC*\fP
+zero or more \fIr\fP's, where \fIr\fP is any regular expression
+.TP
+\fC\fIr\fP\fC+\fP
+one or more \fIr\fP's
+.TP
+\fC\fIr\fP\fC?\fP
+zero or one \fIr\fP's (that is, "an optional \fIr\fP")
+.TP
+name
+the expansion of the "name" definition (see above)
+.TP
+\fC(\fP\fIr\fP\fC)\fP
+an \fIr\fP; parentheses are used to override precedence
+(see below)
+.TP
+\fIrs\fP
+an \fIr\fP followed by an \fIs\fP ("concatenation")
+.TP
+\fIr\fP\fC|\fP\fIs\fP
+either an \fIr\fP or an \fIs\fP
+.TP
+\fIr\fP\fC/\fP\fIs\fP
+an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of
+the matched text. This type of \*(rx is called "trailing context".
+.LP
+The regular expressions listed above are grouped according to
+precedence, from highest precedence at the top to lowest at the bottom.
+Those grouped together have equal precedence.
+
+.SH "A LARGER EXAMPLE"
+.LP
+.in +3
+.nf
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*)
+ malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\\000-\\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\\[\\n\\\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\\[\\n\\\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \\t\\v\\f]+ { goto std; }
+
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
+ printf("%d\\n", t);
+*/
+ }
+ close(in.fd);
+}
+.fi
+.in -3
+
+.SH "SEE ALSO"
+.LP
+flex(1), lex(1).
+
+.SH FEATURES
+.LP
+\*(re does not provide a default action:
+the generated code assumes that the input
+will consist of a sequence of tokens.
+Typically this can be dealt with by adding a rule such as the one for
+unexpected characters in the example above.
+.LP
+The user must arrange for a sentinel token to appear at the end of input
+(and provide a rule for matching it):
+\*(re does not provide an \fC<<EOF>>\fP expression.
+If the source is from a null-byte terminated string, a
+rule matching a null character will suffice. If the source is from a
+file then the approach taken in the example can be used: pad the input with
+a newline (or some other character that can't appear within another token);
+upon recognizing such a character check to see if it is the sentinel
+and act accordingly.
+.LP
+\*(re does not provide start conditions: use a separate scanner
+specification for each start condition (as illustrated in the above example).
+.LP
+No [^x]. Use difference instead.
+.SH BUGS
+.LP
+Only fixed length trailing context can be handled.
+.LP
+The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not
+provided to the generated code (this value is needed for constructing
+the interface code).
+Note that this value is usually relatively small: for
+typical programming languages \fIn\fP will be the length of the longest
+keyword plus one.
+.LP
+Difference only works for character sets.
+.LP
+The \*(re internal algorithms need documentation.
+
+.SH AUTHOR
+.LP
+Please send bug reports, fixes and feedback to:
+.LP
+.nf
+Peter Bumbulis
+Computer Systems Group
+University of Waterloo
+Waterloo, Ontario
+N2L 3G1
+Internet: peter@csg.uwaterloo.ca
+.fi
--- /dev/null
+/* Generated by re2c 0.5 on Sat May 15 11:35:52 1999 */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT lim
+#define YYMARKER ptr
+#define YYFILL(n) {cursor = fill(cursor);}
+
+#define RETURN(i) {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+ bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+ top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+ ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+ if(!eof){
+ uint cnt = tok - bot;
+ if(cnt){
+ memcpy(bot, tok, lim - tok);
+ tok = bot;
+ ptr -= cnt;
+ cursor -= cnt;
+ pos -= cnt;
+ lim -= cnt;
+ }
+ if((top - lim) < BSIZE){
+ uchar *buf = new uchar[(lim - bot) + BSIZE];
+ memcpy(buf, tok, lim - tok);
+ tok = buf;
+ ptr = &buf[ptr - bot];
+ cursor = &buf[cursor - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[BSIZE];
+ delete [] bot;
+ bot = buf;
+ }
+ if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+ eof = &lim[cnt]; *eof++ = '\n';
+ }
+ lim += cnt;
+ }
+ return cursor;
+}
+
+#line 68
+
+
+int Scanner::echo(ostream &out){
+ uchar *cursor = cur;
+ tok = cursor;
+echo:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+yy1: ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy4;
+ if(yych != '/') goto yy6;
+yy2: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '*') goto yy7;
+yy3:
+#line 82
+ { goto echo; }
+yy4: yych = *++YYCURSOR;
+yy5:
+#line 78
+ { if(cursor == eof) RETURN(0);
+ out.write(tok, cursor - tok);
+ tok = pos = cursor; cline++;
+ goto echo; }
+yy6: yych = *++YYCURSOR;
+ goto yy3;
+yy7: yych = *++YYCURSOR;
+ if(yych == '!') goto yy9;
+yy8: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy3;
+ }
+yy9: yych = *++YYCURSOR;
+ if(yych != 'r') goto yy8;
+yy10: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy8;
+yy11: yych = *++YYCURSOR;
+ if(yych != '2') goto yy8;
+yy12: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy8;
+yy13: yych = *++YYCURSOR;
+yy14:
+#line 75
+ { out.write(tok, &cursor[-7] - tok);
+ tok = cursor;
+ RETURN(1); }
+}
+#line 83
+
+}
+
+
+int Scanner::scan(){
+ uchar *cursor = cur;
+ uint depth;
+
+scan:
+ tchar = cursor - pos;
+ tline = cline;
+ tok = cursor;
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy15;
+yy16: ++YYCURSOR;
+yy15:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ':'){
+ if(yych <= '"'){
+ if(yych <= '\n'){
+ if(yych <= '\b') goto yy35;
+ if(yych <= '\t') goto yy31;
+ goto yy33;
+ } else {
+ if(yych == ' ') goto yy31;
+ if(yych <= '!') goto yy35;
+ goto yy23;
+ }
+ } else {
+ if(yych <= '*'){
+ if(yych <= '\'') goto yy35;
+ if(yych <= ')') goto yy27;
+ goto yy21;
+ } else {
+ if(yych <= '+') goto yy28;
+ if(yych == '/') goto yy19;
+ goto yy35;
+ }
+ }
+ } else {
+ if(yych <= 'Z'){
+ if(yych <= '='){
+ if(yych == '<') goto yy35;
+ goto yy27;
+ } else {
+ if(yych == '?') goto yy28;
+ if(yych <= '@') goto yy35;
+ goto yy29;
+ }
+ } else {
+ if(yych <= '`'){
+ if(yych <= '[') goto yy25;
+ if(yych <= '\\') goto yy27;
+ goto yy35;
+ } else {
+ if(yych <= 'z') goto yy29;
+ if(yych <= '{') goto yy17;
+ if(yych <= '|') goto yy27;
+ goto yy35;
+ }
+ }
+ }
+yy17: yych = *++YYCURSOR;
+yy18:
+#line 96
+ { depth = 1;
+ goto code;
+ }
+yy19: yych = *++YYCURSOR;
+ if(yych == '*') goto yy54;
+yy20:
+#line 115
+ { RETURN(*tok); }
+yy21: yych = *++YYCURSOR;
+ if(yych == '/') goto yy52;
+yy22:
+#line 117
+ { yylval.op = *tok;
+ RETURN(CLOSE); }
+yy23: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy48;
+yy24:
+#line 108
+ { fatal("bad string"); }
+yy25: yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy42;
+yy26:
+#line 113
+ { fatal("bad character constant"); }
+yy27: yych = *++YYCURSOR;
+ goto yy20;
+yy28: yych = *++YYCURSOR;
+ goto yy22;
+yy29: yych = *++YYCURSOR;
+ goto yy40;
+yy30:
+#line 120
+ { cur = cursor;
+ yylval.symbol = Symbol::find(token());
+ return ID; }
+yy31: yych = *++YYCURSOR;
+ goto yy38;
+yy32:
+#line 124
+ { goto scan; }
+yy33: yych = *++YYCURSOR;
+yy34:
+#line 126
+ { if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
+yy35: yych = *++YYCURSOR;
+yy36:
+#line 131
+ { cerr << "unexpected character: " << *tok << endl;
+ goto scan;
+ }
+yy37: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy38: if(yych == '\t') goto yy37;
+ if(yych == ' ') goto yy37;
+ goto yy32;
+yy39: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy40: if(yych <= '@'){
+ if(yych <= '/') goto yy30;
+ if(yych <= '9') goto yy39;
+ goto yy30;
+ } else {
+ if(yych <= 'Z') goto yy39;
+ if(yych <= '`') goto yy30;
+ if(yych <= 'z') goto yy39;
+ goto yy30;
+ }
+yy41: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy42: if(yych <= '['){
+ if(yych != '\n') goto yy41;
+ } else {
+ if(yych <= '\\') goto yy44;
+ if(yych <= ']') goto yy45;
+ goto yy41;
+ }
+yy43: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy24;
+ case 1: goto yy26;
+ }
+yy44: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy43;
+ goto yy41;
+yy45: yych = *++YYCURSOR;
+yy46:
+#line 110
+ { cur = cursor;
+ yylval.regexp = ranToRE(token());
+ return RANGE; }
+yy47: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy48: if(yych <= '!'){
+ if(yych == '\n') goto yy43;
+ goto yy47;
+ } else {
+ if(yych <= '"') goto yy50;
+ if(yych != '\\') goto yy47;
+ }
+yy49: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy43;
+ goto yy47;
+yy50: yych = *++YYCURSOR;
+yy51:
+#line 105
+ { cur = cursor;
+ yylval.regexp = strToRE(token());
+ return STRING; }
+yy52: yych = *++YYCURSOR;
+yy53:
+#line 102
+ { tok = cursor;
+ RETURN(0); }
+yy54: yych = *++YYCURSOR;
+yy55:
+#line 99
+ { depth = 1;
+ goto comment; }
+}
+#line 134
+
+
+code:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy56;
+yy57: ++YYCURSOR;
+yy56:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '&'){
+ if(yych <= '\n'){
+ if(yych <= '\t') goto yy64;
+ goto yy62;
+ } else {
+ if(yych == '"') goto yy66;
+ goto yy64;
+ }
+ } else {
+ if(yych <= '{'){
+ if(yych <= '\'') goto yy67;
+ if(yych <= 'z') goto yy64;
+ goto yy60;
+ } else {
+ if(yych != '}') goto yy64;
+ }
+ }
+yy58: yych = *++YYCURSOR;
+yy59:
+#line 138
+ { if(--depth == 0){
+ cur = cursor;
+ yylval.token = new Token(token(), tline);
+ return CODE;
+ }
+ goto code; }
+yy60: yych = *++YYCURSOR;
+yy61:
+#line 144
+ { ++depth;
+ goto code; }
+yy62: yych = *++YYCURSOR;
+yy63:
+#line 146
+ { if(cursor == eof) fatal("missing '}'");
+ pos = cursor; cline++;
+ goto code;
+ }
+yy64: yych = *++YYCURSOR;
+yy65:
+#line 150
+ { goto code; }
+yy66: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy65;
+ goto yy73;
+yy67: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy65;
+ goto yy69;
+yy68: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy69: if(yych <= '&'){
+ if(yych != '\n') goto yy68;
+ } else {
+ if(yych <= '\'') goto yy64;
+ if(yych == '\\') goto yy71;
+ goto yy68;
+ }
+yy70: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy65;
+ }
+yy71: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy70;
+ goto yy68;
+yy72: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy73: if(yych <= '!'){
+ if(yych == '\n') goto yy70;
+ goto yy72;
+ } else {
+ if(yych <= '"') goto yy64;
+ if(yych != '\\') goto yy72;
+ }
+yy74: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy70;
+ goto yy72;
+}
+#line 151
+
+
+comment:
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy75;
+yy76: ++YYCURSOR;
+yy75:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych == '\n') goto yy80;
+ goto yy82;
+ } else {
+ if(yych <= '*') goto yy77;
+ if(yych == '/') goto yy79;
+ goto yy82;
+ }
+yy77: yych = *++YYCURSOR;
+ if(yych == '/') goto yy85;
+yy78:
+#line 165
+ { goto comment; }
+yy79: yych = *++YYCURSOR;
+ if(yych == '*') goto yy83;
+ goto yy78;
+yy80: yych = *++YYCURSOR;
+yy81:
+#line 161
+ { if(cursor == eof) RETURN(0);
+ tok = pos = cursor; cline++;
+ goto comment;
+ }
+yy82: yych = *++YYCURSOR;
+ goto yy78;
+yy83: yych = *++YYCURSOR;
+yy84:
+#line 159
+ { ++depth;
+ goto comment; }
+yy85: yych = *++YYCURSOR;
+yy86:
+#line 155
+ { if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+}
+#line 166
+
+}
+
+void Scanner::fatal(char *msg){
+ cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+ << msg << endl;
+ exit(1);
+}
--- /dev/null
+#ifndef _scanner_h
+#define _scanner_h
+
+#include "token.h"
+
+class Scanner {
+ private:
+ int in;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint tchar, tline, cline;
+ private:
+ uchar *fill(uchar*);
+ public:
+ Scanner(int);
+ int echo(ostream&);
+ int scan();
+ void fatal(char*);
+ SubStr token();
+ uint line();
+};
+
+inline SubStr Scanner::token(){
+ return SubStr(tok, cur - tok);
+}
+
+inline uint Scanner::line(){
+ return cline;
+}
+
+#endif
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT lim
+#define YYMARKER ptr
+#define YYFILL(n) {cursor = fill(cursor);}
+
+#define RETURN(i) {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+ bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+ top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+ ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+ if(!eof){
+ uint cnt = tok - bot;
+ if(cnt){
+ memcpy(bot, tok, lim - tok);
+ tok = bot;
+ ptr -= cnt;
+ cursor -= cnt;
+ pos -= cnt;
+ lim -= cnt;
+ }
+ if((top - lim) < BSIZE){
+ uchar *buf = new uchar[(lim - bot) + BSIZE];
+ memcpy(buf, tok, lim - tok);
+ tok = buf;
+ ptr = &buf[ptr - bot];
+ cursor = &buf[cursor - bot];
+ pos = &buf[pos - bot];
+ lim = &buf[lim - bot];
+ top = &lim[BSIZE];
+ delete [] bot;
+ bot = buf;
+ }
+ if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+ eof = &lim[cnt]; *eof++ = '\n';
+ }
+ lim += cnt;
+ }
+ return cursor;
+}
+
+/*!re2c
+any = [\000-\377];
+dot = any \ [\n];
+esc = dot \ [\\];
+cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ;
+dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
+sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ;
+letter = [a-zA-Z];
+digit = [0-9];
+*/
+
+int Scanner::echo(ostream &out){
+ uchar *cursor = cur;
+ tok = cursor;
+echo:
+/*!re2c
+ "/*!re2c" { out.write(tok, &cursor[-7] - tok);
+ tok = cursor;
+ RETURN(1); }
+ "\n" { if(cursor == eof) RETURN(0);
+ out.write(tok, cursor - tok);
+ tok = pos = cursor; cline++;
+ goto echo; }
+ any { goto echo; }
+*/
+}
+
+
+int Scanner::scan(){
+ uchar *cursor = cur;
+ uint depth;
+
+scan:
+ tchar = cursor - pos;
+ tline = cline;
+ tok = cursor;
+/*!re2c
+ "{" { depth = 1;
+ goto code;
+ }
+ "/*" { depth = 1;
+ goto comment; }
+
+ "*/" { tok = cursor;
+ RETURN(0); }
+
+ dstring { cur = cursor;
+ yylval.regexp = strToRE(token());
+ return STRING; }
+ "\"" { fatal("bad string"); }
+
+ cstring { cur = cursor;
+ yylval.regexp = ranToRE(token());
+ return RANGE; }
+ "[" { fatal("bad character constant"); }
+
+ [()|=;/\\] { RETURN(*tok); }
+
+ [*+?] { yylval.op = *tok;
+ RETURN(CLOSE); }
+
+ letter (letter|digit)* { cur = cursor;
+ yylval.symbol = Symbol::find(token());
+ return ID; }
+
+ [ \t]+ { goto scan; }
+
+ "\n" { if(cursor == eof) RETURN(0);
+ pos = cursor; cline++;
+ goto scan;
+ }
+
+ any { cerr << "unexpected character: " << *tok << endl;
+ goto scan;
+ }
+*/
+
+code:
+/*!re2c
+ "}" { if(--depth == 0){
+ cur = cursor;
+ yylval.token = new Token(token(), tline);
+ return CODE;
+ }
+ goto code; }
+ "{" { ++depth;
+ goto code; }
+ "\n" { if(cursor == eof) fatal("missing '}'");
+ pos = cursor; cline++;
+ goto code;
+ }
+ dstring | sstring | any { goto code; }
+*/
+
+comment:
+/*!re2c
+ "*/" { if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+ "/*" { ++depth;
+ goto comment; }
+ "\n" { if(cursor == eof) RETURN(0);
+ tok = pos = cursor; cline++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+void Scanner::fatal(char *msg){
+ cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+ << msg << endl;
+ exit(1);
+}
--- /dev/null
+#include <string.h>
+#include "substr.h"
+
+void SubStr::out(ostream& o) const {
+ o.write(str, len);
+}
+
+bool operator==(const SubStr &s1, const SubStr &s2){
+ return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0);
+}
+
+Str::Str(const SubStr& s) : SubStr(new char[s.len], s.len) {
+ memcpy(str, s.str, s.len);
+}
+
+Str::Str(Str& s) : SubStr(s.str, s.len) {
+ s.str = NULL;
+ s.len = 0;
+}
+
+Str::Str() : SubStr((char*) NULL, 0) {
+ ;
+}
+
+
+Str::~Str() {
+ delete str;
+ str = (char*)-1;
+ len = (uint)-1;
+}
--- /dev/null
+#ifndef _substr_h
+#define _substr_h
+
+#include <iostream.h>
+#include "basics.h"
+
+class SubStr {
+public:
+ char *str;
+ uint len;
+public:
+ friend bool operator==(const SubStr &, const SubStr &);
+ SubStr(uchar*, uint);
+ SubStr(char*, uint);
+ SubStr(const SubStr&);
+ void out(ostream&) const;
+};
+
+class Str: public SubStr {
+public:
+ Str(const SubStr&);
+ Str(Str&);
+ Str();
+ ~Str();
+};
+
+inline ostream& operator<<(ostream& o, const SubStr &s){
+ s.out(o);
+ return o;
+}
+
+inline ostream& operator<<(ostream& o, const SubStr* s){
+ return o << *s;
+}
+
+inline SubStr::SubStr(uchar *s, uint l)
+ : str((char*) s), len(l) { }
+
+inline SubStr::SubStr(char *s, uint l)
+ : str(s), len(l) { }
+
+inline SubStr::SubStr(const SubStr &s)
+ : str(s.str), len(s.len) { }
+
+#endif
--- /dev/null
+#ifndef _token_h
+#define _token_h
+
+#include "substr.h"
+
+class Token {
+ public:
+ Str text;
+ uint line;
+ public:
+ Token(SubStr, uint);
+};
+
+inline Token::Token(SubStr t, uint l) : text(t), line(l) {
+ ;
+}
+
+#endif
--- /dev/null
+#include "globals.h"
+
+uchar asc2asc[256] = {
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
+
+uchar *xlat = asc2asc;
+uchar *talx = asc2asc;
+
+uchar asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f,
+0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f,
+0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
+0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xba,0xe0,0xbb,0xb0,0x6d,
+0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96,
+0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07,
+0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
+0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff,
+0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbd,0xb4,0x9a,0x8a,0x5f,0xca,0xaf,0xbc,
+0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab,
+0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77,
+0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xad,0x8e,0x59,
+0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57,
+0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0xae,0xdf
+};
+
+uchar ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
+0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a,
+0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c,
+0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0xac,
+0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f,
+0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22,
+0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xde,0xb1,
+0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4,
+0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0xdd,0xfe,0xae,
+0x5e,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0x5b,0x5d,0xaf,0xa8,0xb4,0xd7,
+0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5,
+0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff,
+0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f
+};
--- /dev/null
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+} YYSTYPE;
+extern YYSTYPE yylval;