]> granicus.if.org Git - re2c/commitdiff
Initial revision
authornuffer <nuffer@642ea486-5414-0410-9d7f-a0204ed87703>
Sat, 13 Dec 2003 04:58:19 +0000 (04:58 +0000)
committernuffer <nuffer@642ea486-5414-0410-9d7f-a0204ed87703>
Sat, 13 Dec 2003 04:58:19 +0000 (04:58 +0000)
42 files changed:
CHANGELOG [new file with mode: 0644]
Makefile [new file with mode: 0644]
NO_WARRANTY [new file with mode: 0644]
README [new file with mode: 0644]
actions.cc [new file with mode: 0644]
basics.h [new file with mode: 0644]
bootstrap/parser.cc [new file with mode: 0644]
bootstrap/re2c.man [new file with mode: 0644]
bootstrap/scanner.cc [new file with mode: 0644]
bootstrap/y.tab.h [new file with mode: 0644]
code.cc [new file with mode: 0644]
dfa.cc [new file with mode: 0644]
dfa.h [new file with mode: 0644]
doc/loplas.ps.gz [new file with mode: 0644]
doc/sample.bib [new file with mode: 0644]
examples/basemmap.c [new file with mode: 0644]
examples/c.re [new file with mode: 0644]
examples/cmmap.re [new file with mode: 0644]
examples/cnokw.re [new file with mode: 0644]
examples/cunroll.re [new file with mode: 0644]
examples/modula.re [new file with mode: 0644]
examples/rexx/README [new file with mode: 0644]
examples/rexx/rexx.l [new file with mode: 0644]
examples/rexx/scanio.c [new file with mode: 0644]
examples/sample.re [new file with mode: 0644]
examples/simple.re [new file with mode: 0644]
globals.h [new file with mode: 0644]
ins.h [new file with mode: 0644]
main.cc [new file with mode: 0644]
parser.cc [new file with mode: 0644]
parser.h [new file with mode: 0644]
parser.y [new file with mode: 0644]
re.h [new file with mode: 0644]
re2c.1 [new file with mode: 0644]
scanner.cc [new file with mode: 0644]
scanner.h [new file with mode: 0644]
scanner.re [new file with mode: 0644]
substr.cc [new file with mode: 0644]
substr.h [new file with mode: 0644]
token.h [new file with mode: 0644]
translate.cc [new file with mode: 0644]
y.tab.h [new file with mode: 0644]

diff --git a/CHANGELOG b/CHANGELOG
new file mode 100644 (file)
index 0000000..06090aa
--- /dev/null
+++ b/CHANGELOG
@@ -0,0 +1,18 @@
+re2c
+----
+
+Version 0.9.1
+-------------
+
+- removed rcs comments in source files
+
+Version 0.9
+-----------
+
+- redistribution based on version 0.5
+- added parentheses to assignment expressions in 'if' statements
+- rearranged class members to match initialization order
+- substr fix
+- use array delete [] when necessary
+- other minor fixes for subduing compiler warnings
+
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..b392b0f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,76 @@
+# $Log$
+# Revision 1.1  2003/12/13 04:58:19  nuffer
+# Initial revision
+#
+#Revision 1.1  1994/04/08  16:30:37  peter
+#Initial revision
+#
+
+BIN = /usr/local/bin
+MAN = /usr/local/man
+
+%.o : %.cc ; $(CC) -o $@ $(CFLAGS) -c $<
+%.cc : %.y ; $(YACC)  $(YFLAGS) $<; mv $(YTAB).c $@
+%.cc : %.l ; $(LEX)   $(LFLAGS) $<; mv $(LEXYY).c $@
+
+%.cc:  %.re
+       re2c -s $< >$@
+
+SOURCES        = code.cc dfa.cc main.cc parser.y actions.cc scanner.re substr.cc\
+       translate.cc
+OBJS   = code.o dfa.o main.o parser.o actions.o scanner.o substr.o\
+       translate.o
+
+CC             = g++
+CFLAGS         = -O2 -Wall -I. -Wno-unused -Wno-parentheses
+YFLAGS         = -d
+LDFLAGS                = 
+
+default:       re2c
+
+clean:
+       rm -f *.o *.s y.tab.c y.tab.h scanner.cc parser.cc .version version.h re2c
+
+parser.cc:     parser.y
+       yacc -d parser.y
+       mv -f y.tab.c parser.cc
+
+re2c:  $(OBJS)
+       $(CC) -o $@ $(OBJS) $(LDFLAGS) -lstdc++
+
+.version: README
+       egrep "^Version" README | sed 's/Version //' > .version
+
+version.h: .version
+       echo "#define RE2C_VERSION" `cat .version` > version.h
+
+install: re2c
+       install -d $(BIN)
+       install -s re2c $(BIN)
+       install -d $(MAN)/man1
+       install -m 0644 re2c.1 $(MAN)/man1
+
+dist: re2c scanner.cc .version
+       mkdir re2c-`cat .version`
+       cp -P `p4 files ... | sed s/\\\\/\\\\/depot\\\\/home\\\\/re2c\\\\/// | sed '/- delete/d' | sed s/#.*$$//` re2c-`cat .version`/
+       tar zcf re2c-`cat .version`.tar.gz re2c-`cat .version`/
+       rm -rf re2c-`cat .version`
+
+#
+# generated with "gcc -I. -MM -x c++ *.cc *.y *.re"
+# and edited by hand
+#
+actions.o : actions.cc globals.h basics.h parser.h scanner.h \
+  token.h substr.h re.h ins.h dfa.h 
+code.o : code.cc substr.h basics.h globals.h dfa.h re.h token.h \
+  ins.h 
+dfa.o : dfa.cc globals.h basics.h substr.h dfa.h re.h token.h \
+  ins.h 
+main.o : main.cc globals.h basics.h parser.h scanner.h token.h \
+  substr.h re.h ins.h dfa.h version.h
+substr.o : substr.cc substr.h basics.h 
+translate.o : translate.cc globals.h basics.h 
+scanner.o : scanner.re scanner.h token.h substr.h basics.h \
+  parser.h re.h ins.h ./parser.o
+parser.o : parser.y globals.h basics.h parser.h scanner.h token.h \
+  substr.h re.h ins.h 
diff --git a/NO_WARRANTY b/NO_WARRANTY
new file mode 100644 (file)
index 0000000..885a13d
--- /dev/null
@@ -0,0 +1,2 @@
+re2c is distributed with no warranty whatever.  The author and any other
+contributors take no responsibility for the consequences of its use.
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..a16c471
--- /dev/null
+++ b/README
@@ -0,0 +1,153 @@
+re2c
+----
+
+Version 0.9.1
+Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
+Currently maintained by Brian Young (bayoung@acm.org)
+
+The re2c distribution can be found at:
+
+    http://www.tildeslash.org/re2c/index.html
+
+The source distribution is available from:
+
+    http://www.tildeslash.org/re2c/re2c-0.9.1.tar.gz
+
+This distribution is a cleaned up version of the 0.5 release
+maintained by me (Brian Young).  Several bugs were fixed as well
+as code cleanup for warning free compilation.  It has been developed
+and tested with egcs 1.0.2 and gcc 2.7.2.3 on Linux x86.  Peter
+Bumbulis' original release can be found at:
+
+    ftp://csg.uwaterloo.ca/pub/peter/re2c.0.5.tar.gz
+
+re2c is a great tool for writing fast and flexible lexers.  It has
+served many people well for many years and it deserves to be
+maintained more actively.  re2c is on the order of 2-3 times faster
+than a flex based scanner, and its input model is much more
+flexible.
+
+Patches and requests for features will be entertained.  Areas of
+particular interest to me are porting (a Solaris and an NT
+version will be forthcoming) and wide character support.  Note
+that the code is already quite portable and should be buildable
+on any platform with minor makefile changes.
+
+Peter's original version 0.5 ANNOUNCE and README follows.
+
+Brian
+
+--
+
+re2c is a tool for generating C-based recognizers from regular
+expressions.  re2c-based scanners are efficient:  for programming
+languages, given similar specifications, an re2c-based scanner is
+typically almost twice as fast as a flex-based scanner with little or no
+increase in size (possibly a decrease on cisc architectures).  Indeed,
+re2c-based scanners are quite competitive with hand-crafted ones.
+
+Unlike flex, re2c does not generate complete scanners:  the user must
+supply some interface code.  While this code is not bulky (about 50-100
+lines for a flex-like scanner; see the man page and examples in the
+distribution) careful coding is required for efficiency (and
+correctness).  One advantage of this arrangement is that the generated
+code is not tied to any particular input model.  For example, re2c
+generated code can be used to scan data from a null-byte terminated
+buffer as illustrated below.
+
+Given the following source
+
+    #define NULL            ((char*) 0)
+    char *scan(char *p){
+    char *q;
+    #define YYCTYPE         char
+    #define YYCURSOR        p
+    #define YYLIMIT         p
+    #define YYMARKER        q
+    #define YYFILL(n)
+    /*!re2c
+           [0-9]+          {return YYCURSOR;}
+           [\000-\377]     {return NULL;}
+    */
+    }
+
+re2c will generate
+
+    /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+    #line 1 "simple.re"
+    #define NULL            ((char*) 0)
+    char *scan(char *p){
+    char *q;
+    #define YYCTYPE         char
+    #define YYCURSOR        p
+    #define YYLIMIT         p
+    #define YYMARKER        q
+    #define YYFILL(n)
+    {
+           YYCTYPE yych;
+           unsigned int yyaccept;
+           goto yy0;
+    yy1:    ++YYCURSOR;
+    yy0:
+           if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+           yych = *YYCURSOR;
+           if(yych <= '/') goto yy4;
+           if(yych >= ':') goto yy4;
+    yy2:    yych = *++YYCURSOR;
+           goto yy7;
+    yy3:
+    #line 10
+           {return YYCURSOR;}
+    yy4:    yych = *++YYCURSOR;
+    yy5:
+    #line 11
+           {return NULL;}
+    yy6:    ++YYCURSOR;
+           if(YYLIMIT == YYCURSOR) YYFILL(1);
+           yych = *YYCURSOR;
+    yy7:    if(yych <= '/') goto yy3;
+           if(yych <= '9') goto yy6;
+           goto yy3;
+    }
+    #line 12
+
+    }
+
+Note that most compilers will perform dead-code elimination to remove
+all YYCURSOR, YYLIMIT comparisions.
+
+re2c was developed for a particular project (constructing a fast REXX
+scanner of all things!) and so while it has some rough edges, it should
+be quite usable.  More information about re2c can be found in the
+(admittedly skimpy) man page; the algorithms and heuristics used are
+described in an upcoming LOPLAS article (included in the distribution).
+Probably the best way to find out more about re2c is to try the supplied
+examples.  re2c is written in C++, and is currently being developed
+under Linux using gcc 2.5.8.
+
+Peter
+
+--
+
+re2c is distributed with no warranty whatever.  The code is certain to
+contain errors.  Neither the author nor any contributor takes
+responsibility for any consequences of its use.
+
+re2c is in the public domain.  The data structures and algorithms used
+in re2c are all either taken from documents available to the general
+public or are inventions of the author.  Programs generated by re2c may
+be distributed freely.  re2c itself may be distributed freely, in source
+or binary, unchanged or modified.  Distributors may charge whatever fees
+they can obtain for re2c.
+
+If you do make use of re2c, or incorporate it into a larger project an
+acknowledgement somewhere (documentation, research report, etc.) would
+be appreciated.
+
+Please send bug reports and feedback (including suggestions for
+improving the distribution) to
+
+                       peter@csg.uwaterloo.ca
+
+Include a small example and the banner from parser.y with bug reports.
+
diff --git a/actions.cc b/actions.cc
new file mode 100644 (file)
index 0000000..0260b5f
--- /dev/null
@@ -0,0 +1,505 @@
+#include <time.h>
+#include <string.h>
+#include <iostream.h>
+#include <iomanip.h>
+
+#include "globals.h"
+#include "parser.h"
+#include "dfa.h"
+
+Symbol *Symbol::first = NULL;
+
+Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) {
+    first = this;
+}
+
+Symbol *Symbol::find(const SubStr &str){
+    for(Symbol *sym = first; sym; sym = sym->next)
+       if(sym->name == str) return sym;
+    return new Symbol(str);
+}
+
+void showIns(ostream &o, const Ins &i, const Ins &base){
+    o.width(3);
+    o << &i - &base << ": ";
+    switch(i.i.tag){
+    case CHAR: {
+       o << "match ";
+       for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j)
+           prtCh(o, j->c.value);
+       break;
+    } case GOTO:
+       o << "goto " << ((Ins*) i.i.link - &base);
+       break;
+    case FORK:
+       o << "fork " << ((Ins*) i.i.link - &base);
+       break;
+    case CTXT:
+       o << "term " << ((RuleOp*) i.i.link)->accept;
+       break;
+    case TERM:
+       o << "term " << ((RuleOp*) i.i.link)->accept;
+       break;
+    }
+    o << "\n";
+}
+
+uint RegExp::fixedLength(){
+    return ~0;
+}
+
+char *NullOp::type = "NullOp";
+
+void NullOp::calcSize(Char*){
+    size = 0;
+}
+
+uint NullOp::fixedLength(){
+    return 0;
+}
+
+void NullOp::compile(Char*, Ins*){
+    ;
+}
+
+void NullOp::split(CharSet&){
+    ;
+}
+
+ostream& operator<<(ostream &o, const Range &r){
+    if((r.ub - r.lb) == 1){
+       prtCh(o, r.lb);
+    } else {
+       prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1);
+    }
+    return o << r.next;
+}
+
+Range *doUnion(Range *r1, Range *r2){
+    Range *r, **rP = &r;
+    for(;;){
+       Range *s;
+       if(r1->lb <= r2->lb){
+           s = new Range(*r1);
+       } else {
+           s = new Range(*r2);
+       }
+       *rP = s;
+       rP = &s->next;
+       for(;;){
+           if(r1->lb <= r2->lb){
+               if(r1->lb > s->ub)
+                   break;
+               if(r1->ub > s->ub)
+                   s->ub = r1->ub;
+               if(!(r1 = r1->next)){
+                   uint ub = 0;
+                   for(; r2 && r2->lb <= s->ub; r2 = r2->next)
+                       ub = r2->ub;
+                   if(ub > s->ub)
+                       s->ub = ub;
+                   *rP = r2;
+                   return r;
+               }
+           } else {
+               if(r2->lb > s->ub)
+                   break;
+               if(r2->ub > s->ub)
+                   s->ub = r2->ub;
+               if(!(r2 = r2->next)){
+                   uint ub = 0;
+                   for(; r1 && r1->lb <= s->ub; r1 = r1->next)
+                       ub = r1->ub;
+                   if(ub > s->ub)
+                       s->ub = ub;
+                   *rP = r1;
+                   return r;
+               }
+           }
+       }
+    }
+    *rP = NULL;
+    return r;
+}
+
+Range *doDiff(Range *r1, Range *r2){
+    Range *r, *s, **rP = &r;
+    for(; r1; r1 = r1->next){
+       uint lb = r1->lb;
+       for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
+       for(; r2 && r2->lb <  r1->ub; r2 = r2->next){
+           if(lb < r2->lb){
+               *rP = s = new Range(lb, r2->lb);
+               rP = &s->next;
+           }
+           if((lb = r2->ub) >= r1->ub)
+               goto noMore;
+       }
+       *rP = s = new Range(lb, r1->ub);
+       rP = &s->next;
+    noMore:;
+    }
+    *rP = NULL;
+    return r;
+}
+
+MatchOp *merge(MatchOp *m1, MatchOp *m2){
+    if(!m1)
+       return m2;
+    if(!m2)
+       return m1;
+    return new MatchOp(doUnion(m1->match, m2->match));
+}
+
+char *MatchOp::type = "MatchOp";
+
+void MatchOp::display(ostream &o) const{
+    o << match;
+}
+
+void MatchOp::calcSize(Char *rep){
+    size = 1;
+    for(Range *r = match; r; r = r->next)
+       for(uint c = r->lb; c < r->ub; ++c)
+           if(rep[c] == c)
+               ++size;
+}
+
+uint MatchOp::fixedLength(){
+    return 1;
+}
+
+void MatchOp::compile(Char *rep, Ins *i){
+    i->i.tag = CHAR;
+    i->i.link = &i[size];
+    Ins *j = &i[1];
+    uint bump = size;
+    for(Range *r = match; r; r = r->next){
+       for(uint c = r->lb; c < r->ub; ++c){
+           if(rep[c] == c){
+               j->c.value = c;
+               j->c.bump = --bump;
+               j++;
+           }
+       }
+    }
+}
+
+void MatchOp::split(CharSet &s){
+    for(Range *r = match; r; r = r->next){
+       for(uint c = r->lb; c < r->ub; ++c){
+           CharPtn *x = s.rep[c], *a = x->nxt;
+           if(!a){
+               if(x->card == 1)
+                   continue;
+               x->nxt = a = s.freeHead;
+               if(!(s.freeHead = s.freeHead->nxt))
+                   s.freeTail = &s.freeHead;
+               a->nxt = NULL;
+               x->fix = s.fix;
+               s.fix = x;
+           }
+           if(--(x->card) == 0){
+               *s.freeTail = x;
+               *(s.freeTail = &x->nxt) = NULL;
+           }
+           s.rep[c] = a;
+           ++(a->card);
+       }
+    }
+    for(; s.fix; s.fix = s.fix->fix)
+       if(s.fix->card)
+           s.fix->nxt = NULL;
+}
+
+RegExp *mkDiff(RegExp *e1, RegExp *e2){
+    MatchOp *m1, *m2;
+    if(!(m1 = (MatchOp*) e1->isA(MatchOp::type)))
+       return NULL;
+    if(!(m2 = (MatchOp*) e2->isA(MatchOp::type)))
+       return NULL;
+    Range *r = doDiff(m1->match, m2->match);
+    return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
+}
+
+RegExp *doAlt(RegExp *e1, RegExp *e2){
+    if(!e1)
+       return e2;
+    if(!e2)
+       return e1;
+    return new AltOp(e1, e2);
+}
+
+RegExp *mkAlt(RegExp *e1, RegExp *e2){
+    AltOp *a;
+    MatchOp *m1, *m2;
+    if((a = (AltOp*) e1->isA(AltOp::type))){
+       if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type)))
+           e1 = a->exp2;
+    } else if((m1 = (MatchOp*) e1->isA(MatchOp::type))){
+           e1 = NULL;
+    }
+    if((a = (AltOp*) e2->isA(AltOp::type))){
+       if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type)))
+           e2 = a->exp2;
+    } else if((m2 = (MatchOp*) e2->isA(MatchOp::type))){
+           e2 = NULL;
+    }
+    return doAlt(merge(m1, m2), doAlt(e1, e2));
+}
+
+char *AltOp::type = "AltOp";
+
+void AltOp::calcSize(Char *rep){
+    exp1->calcSize(rep);
+    exp2->calcSize(rep);
+    size = exp1->size + exp2->size + 2;
+}
+
+uint AltOp::fixedLength(){
+    uint l1 = exp1->fixedLength();
+    uint l2 = exp1->fixedLength();
+    if(l1 != l2 || l1 == ~0u)
+       return ~0;
+    return l1;
+}
+
+void AltOp::compile(Char *rep, Ins *i){
+    i->i.tag = FORK;
+    Ins *j = &i[exp1->size + 1];
+    i->i.link = &j[1];
+    exp1->compile(rep, &i[1]);
+    j->i.tag = GOTO;
+    j->i.link = &j[exp2->size + 1];
+    exp2->compile(rep, &j[1]);
+}
+
+void AltOp::split(CharSet &s){
+    exp1->split(s);
+    exp2->split(s);
+}
+
+char *CatOp::type = "CatOp";
+
+void CatOp::calcSize(Char *rep){
+    exp1->calcSize(rep);
+    exp2->calcSize(rep);
+    size = exp1->size + exp2->size;
+}
+
+uint CatOp::fixedLength(){
+    uint l1, l2;
+    if((l1 = exp1->fixedLength()) != ~0u )
+        if((l2 = exp2->fixedLength()) != ~0u)
+           return l1+l2;
+    return ~0;
+}
+
+void CatOp::compile(Char *rep, Ins *i){
+    exp1->compile(rep, &i[0]);
+    exp2->compile(rep, &i[exp1->size]);
+}
+
+void CatOp::split(CharSet &s){
+    exp1->split(s);
+    exp2->split(s);
+}
+
+char *CloseOp::type = "CloseOp";
+
+void CloseOp::calcSize(Char *rep){
+    exp->calcSize(rep);
+    size = exp->size + 1;
+}
+
+void CloseOp::compile(Char *rep, Ins *i){
+    exp->compile(rep, &i[0]);
+    i += exp->size;
+    i->i.tag = FORK;
+    i->i.link = i - exp->size;
+}
+
+void CloseOp::split(CharSet &s){
+    exp->split(s);
+}
+
+RegExp *expr(Scanner &);
+
+uchar unescape(SubStr &s){
+    s.len--;
+    uchar c;
+    if((c = *s.str++) != '\\' || s.len == 0)
+       return xlat[c];
+    s.len--;
+    switch(c = *s.str++){
+    case 'n':
+       return xlat['\n'];
+    case 't':
+       return xlat['\t'];
+    case 'v':
+       return xlat['\v'];
+    case 'b':
+       return xlat['\b'];
+    case 'r':
+       return xlat['\r'];
+    case 'f':
+       return xlat['\f'];
+    case 'a':
+       return xlat['\a'];
+    case '0': case '1': case '2': case '3':
+    case '4': case '5': case '6': case '7': {
+       uchar v = c - '0';
+       for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
+           v = v*8 + (c - '0');
+       return v;
+    } default:
+       return xlat[c];
+    }
+}
+
+Range *getRange(SubStr &s){
+    uchar lb = unescape(s), ub;
+    if(s.len < 2 || *s.str != '-'){
+       ub = lb;
+    } else {
+       s.len--; s.str++;
+       ub = unescape(s);
+       if(ub < lb){
+           uchar tmp;
+           tmp = lb; lb = ub; ub = tmp;
+       }
+    }
+    return new Range(lb, ub+1);
+}
+
+RegExp *matchChar(uint c){
+    return new MatchOp(new Range(c, c+1));
+}
+
+RegExp *strToRE(SubStr s){
+    s.len -= 2; s.str += 1;
+    if(s.len == 0)
+       return new NullOp;
+    RegExp *re = matchChar(unescape(s));
+    while(s.len > 0)
+       re = new CatOp(re, matchChar(unescape(s)));
+    return re;
+}
+
+RegExp *ranToRE(SubStr s){
+    s.len -= 2; s.str += 1;
+    if(s.len == 0)
+       return new NullOp;
+    Range *r = getRange(s);
+    while(s.len > 0)
+       r = doUnion(r, getRange(s));
+    return new MatchOp(r);
+}
+
+char *RuleOp::type = "RuleOp";
+
+RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
+       : exp(e), ctx(c), ins(NULL), accept(a), code(t) {
+    ;
+}
+
+void RuleOp::calcSize(Char *rep){
+    exp->calcSize(rep);
+    ctx->calcSize(rep);
+    size = exp->size + ctx->size + 1;
+}
+
+void RuleOp::compile(Char *rep, Ins *i){
+    ins = i;
+    exp->compile(rep, &i[0]);
+    i += exp->size;
+    ctx->compile(rep, &i[0]);
+    i += ctx->size;
+    i->i.tag = TERM;
+    i->i.link = this;
+}
+
+void RuleOp::split(CharSet &s){
+    exp->split(s);
+    ctx->split(s);
+}
+
+extern void printSpan(ostream&, uint, uint);
+
+void optimize(Ins *i){
+    while(!isMarked(i)){
+       mark(i);
+       if(i->i.tag == CHAR){
+           i = (Ins*) i->i.link;
+       } else if(i->i.tag == GOTO || i->i.tag == FORK){
+           Ins *target = (Ins*) i->i.link;
+           optimize(target);
+           if(target->i.tag == GOTO)
+               i->i.link = target->i.link == target? i : target;
+           if(i->i.tag == FORK){
+               Ins *follow = (Ins*) &i[1];
+               optimize(follow);
+               if(follow->i.tag == GOTO && follow->i.link == follow){
+                   i->i.tag = GOTO;
+               } else if(i->i.link == i){
+                   i->i.tag = GOTO;
+                   i->i.link = follow;
+               }
+           }
+           return;
+       } else {
+           ++i;
+       }
+    }
+}
+
+void genCode(ostream& o, RegExp *re){
+    CharSet cs;
+    uint j;
+    memset(&cs, 0, sizeof(cs));
+    for(j = 0; j < nChars; ++j){
+       cs.rep[j] = &cs.ptn[0];
+       cs.ptn[j].nxt = &cs.ptn[j+1];
+    }
+    cs.freeHead = &cs.ptn[1];
+    *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
+    cs.ptn[0].card = nChars;
+    cs.ptn[0].nxt = NULL;
+    re->split(cs);
+/*
+    for(uint k = 0; k < nChars;){
+       for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
+       printSpan(cerr, j, k);
+       cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
+    }
+*/
+    Char rep[nChars];
+    for(j = 0; j < nChars; ++j){
+       if(!cs.rep[j]->nxt)
+           cs.rep[j]->nxt = &cs.ptn[j];
+       rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
+    }
+
+    re->calcSize(rep);
+    Ins *ins = new Ins[re->size+1];
+    memset(ins, 0, (re->size+1)*sizeof(Ins));
+    re->compile(rep, ins);
+    Ins *eoi = &ins[re->size];
+    eoi->i.tag = GOTO;
+    eoi->i.link = eoi;
+
+    optimize(ins);
+    for(j = 0; j < re->size;){
+       unmark(&ins[j]);
+       if(ins[j].i.tag == CHAR){
+           j = (Ins*) ins[j].i.link - ins;
+       } else {
+           j++;
+       }
+    }
+
+    DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
+    dfa->emit(o);
+    delete dfa;
+    delete [] ins;
+}
diff --git a/basics.h b/basics.h
new file mode 100644 (file)
index 0000000..2adaeb7
--- /dev/null
+++ b/basics.h
@@ -0,0 +1,9 @@
+#ifndef _basics_h
+#define _basics_h
+
+typedef unsigned int   uint;
+typedef unsigned char  uchar, byte;
+typedef unsigned short         ushort, word;
+typedef unsigned long  ulong, dword;
+
+#endif
diff --git a/bootstrap/parser.cc b/bootstrap/parser.cc
new file mode 100644 (file)
index 0000000..6d66400
--- /dev/null
@@ -0,0 +1,531 @@
+#ifndef lint
+static char yysccsid[] = "@(#)yaccpar  1.9 (Berkeley) 02/21/93";
+#endif
+#define YYBYACC 1
+#define YYMAJOR 1
+#define YYMINOR 9
+#define yyclearin (yychar=(-1))
+#define yyerrok (yyerrflag=0)
+#define YYRECOVERING (yyerrflag!=0)
+#define YYPREFIX "yy"
+#line 2 "parser.y"
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+#line 21 "parser.y"
+typedef union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+} YYSTYPE;
+#line 35 "y.tab.c"
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+#define YYERRCODE 256
+short yylhs[] = {                                        -1,
+    0,    0,    0,    9,    2,    3,    3,    4,    4,    5,
+    5,    6,    6,    7,    7,    1,    1,    8,    8,    8,
+    8,
+};
+short yylen[] = {                                         2,
+    0,    2,    2,    4,    3,    0,    2,    1,    3,    1,
+    3,    1,    2,    1,    2,    1,    2,    1,    1,    1,
+    3,
+};
+short yydefred[] = {                                      1,
+    0,    0,   19,   20,    0,    2,    0,    0,    0,   12,
+    0,    3,    0,   18,    0,    0,    0,    0,    0,   13,
+   16,    0,    0,   21,    0,    0,    5,    0,   17,    4,
+};
+short yydgoto[] = {                                       1,
+   22,    6,   18,    7,    8,    9,   10,   11,   12,
+};
+short yysindex[] = {                                      0,
+  -27,  -49,    0,    0,  -23,    0,  -44,  -84,  -23,    0,
+ -243,    0,  -23,    0,  -39,  -23,  -23, -244,  -23,    0,
+    0, -239,  -53,    0, -104,  -84,    0,  -23,    0,    0,
+};
+short yyrindex[] = {                                      0,
+    0,  -31,    0,    0,    0,    0, -227,  -17,  -20,    0,
+  -40,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,  -36,    0,    0, -226,  -16,    0,  -19,    0,    0,
+};
+short yygindex[] = {                                      0,
+    0,    0,    0,   21,   18,   17,    1,    0,    0,
+};
+#define YYTABLESIZE 243
+short yytable[] = {                                      14,
+   14,   24,   16,   15,   15,   30,   14,   19,   18,   20,
+   15,   13,    5,   21,   27,   18,    5,   29,   14,   17,
+   10,   11,   15,    8,    9,   15,   10,   11,   20,    8,
+    9,    6,    7,   23,   26,   28,   25,    0,   10,   11,
+    0,    8,    9,    0,    0,    0,    0,    0,    0,    0,
+    0,   14,    0,    0,    0,   15,    0,    0,    0,    0,
+   18,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+   17,   10,   11,    0,    0,    0,    0,    0,    0,   17,
+    0,    0,    0,   14,   17,    0,    0,   15,    0,    0,
+    0,    0,   18,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,   10,   11,    0,    8,    9,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,   14,   14,   14,
+   14,   15,   15,   15,   15,   18,   18,   18,   18,   18,
+    2,    0,    3,    4,   14,    0,    3,    4,   10,   11,
+    0,    8,    9,
+};
+short yycheck[] = {                                      40,
+   41,   41,   47,   40,   41,   59,   47,   92,   40,    9,
+   47,   61,   40,  257,  259,   47,   40,  257,   59,  124,
+   41,   41,   59,   41,   41,    5,   47,   47,   28,   47,
+   47,  259,  259,   13,   17,   19,   16,   -1,   59,   59,
+   -1,   59,   59,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   92,   -1,   -1,   -1,   92,   -1,   -1,   -1,   -1,
+   92,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+  124,   92,   92,   -1,   -1,   -1,   -1,   -1,   -1,  124,
+   -1,   -1,   -1,  124,  124,   -1,   -1,  124,   -1,   -1,
+   -1,   -1,  124,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,  124,  124,   -1,  124,  124,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,  258,  259,  260,
+  261,  258,  259,  260,  261,  257,  258,  259,  260,  261,
+  258,   -1,  260,  261,  258,   -1,  260,  261,  259,  259,
+   -1,  259,  259,
+};
+#define YYFINAL 1
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+#define YYMAXTOKEN 261
+#if YYDEBUG
+char *yyname[] = {
+"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,"'('","')'",0,0,0,0,0,"'/'",0,0,0,0,0,0,0,0,0,0,0,"';'",0,"'='",0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'\\\\'",0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+"CLOSE","ID","CODE","RANGE","STRING",
+};
+char *yyrule[] = {
+"$accept : spec",
+"spec :",
+"spec : spec rule",
+"spec : spec decl",
+"decl : ID '=' expr ';'",
+"rule : expr look CODE",
+"look :",
+"look : '/' expr",
+"expr : diff",
+"expr : expr '|' diff",
+"diff : term",
+"diff : diff '\\\\' term",
+"term : factor",
+"term : term factor",
+"factor : primary",
+"factor : primary close",
+"close : CLOSE",
+"close : close CLOSE",
+"primary : ID",
+"primary : RANGE",
+"primary : STRING",
+"primary : '(' expr ')'",
+};
+#endif
+#ifdef YYSTACKSIZE
+#undef YYMAXDEPTH
+#define YYMAXDEPTH YYSTACKSIZE
+#else
+#ifdef YYMAXDEPTH
+#define YYSTACKSIZE YYMAXDEPTH
+#else
+#define YYSTACKSIZE 500
+#define YYMAXDEPTH 500
+#endif
+#endif
+int yydebug;
+int yynerrs;
+int yyerrflag;
+int yychar;
+short *yyssp;
+YYSTYPE *yyvsp;
+YYSTYPE yyval;
+YYSTYPE yylval;
+short yyss[YYSTACKSIZE];
+YYSTYPE yyvs[YYSTACKSIZE];
+#define yystacksize YYSTACKSIZE
+#line 121 "parser.y"
+
+void yyerror(char* s){
+    in->fatal(s);
+}
+
+int yylex(){
+    return in->scan();
+}
+
+void parse(int i, ostream &o){
+    char *     fnamebuf;
+    char *     token;
+
+    o << "/* Generated by re2c 0.5 on ";
+    time_t now = time(&now);
+    o.write(ctime(&now), 24);
+    o << " */\n";
+
+    in = new Scanner(i);
+
+    o << "#line " << in->line() << " \"";
+    if( fileName != NULL ) {
+       fnamebuf = strdup( fileName );
+    } else {
+       fnamebuf = strdup( "<stdin>" );
+    }
+    token = strtok( fnamebuf, "\\" );
+    for(;;) {
+       o << token;
+       token = strtok( NULL, "\\" );
+       if( token == NULL ) break;
+       o << "\\\\";
+    }
+    o << "\"\n";
+    free( fnamebuf );
+
+    while(in->echo(o)){
+       yyparse();
+       if(spec)
+           genCode(o, spec);
+       o << "#line " << in->line() << "\n";
+    }
+}
+#line 235 "y.tab.c"
+#define YYABORT goto yyabort
+#define YYREJECT goto yyabort
+#define YYACCEPT goto yyaccept
+#define YYERROR goto yyerrlab
+int
+yyparse()
+{
+    register int yym, yyn, yystate;
+#if YYDEBUG
+    register char *yys;
+    extern char *getenv();
+
+    if (yys = getenv("YYDEBUG"))
+    {
+        yyn = *yys;
+        if (yyn >= '0' && yyn <= '9')
+            yydebug = yyn - '0';
+    }
+#endif
+
+    yynerrs = 0;
+    yyerrflag = 0;
+    yychar = (-1);
+
+    yyssp = yyss;
+    yyvsp = yyvs;
+    *yyssp = yystate = 0;
+
+yyloop:
+    if (yyn = yydefred[yystate]) goto yyreduce;
+    if (yychar < 0)
+    {
+        if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+        if (yydebug)
+        {
+            yys = 0;
+            if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+            if (!yys) yys = "illegal-symbol";
+            printf("%sdebug: state %d, reading %d (%s)\n",
+                    YYPREFIX, yystate, yychar, yys);
+        }
+#endif
+    }
+    if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+    {
+#if YYDEBUG
+        if (yydebug)
+            printf("%sdebug: state %d, shifting to state %d\n",
+                    YYPREFIX, yystate, yytable[yyn]);
+#endif
+        if (yyssp >= yyss + yystacksize - 1)
+        {
+            goto yyoverflow;
+        }
+        *++yyssp = yystate = yytable[yyn];
+        *++yyvsp = yylval;
+        yychar = (-1);
+        if (yyerrflag > 0)  --yyerrflag;
+        goto yyloop;
+    }
+    if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+    {
+        yyn = yytable[yyn];
+        goto yyreduce;
+    }
+    if (yyerrflag) goto yyinrecovery;
+#ifdef lint
+    goto yynewerror;
+#endif
+yynewerror:
+    yyerror("syntax error");
+#ifdef lint
+    goto yyerrlab;
+#endif
+yyerrlab:
+    ++yynerrs;
+yyinrecovery:
+    if (yyerrflag < 3)
+    {
+        yyerrflag = 3;
+        for (;;)
+        {
+            if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
+                    yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
+            {
+#if YYDEBUG
+                if (yydebug)
+                    printf("%sdebug: state %d, error recovery shifting\
+ to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
+#endif
+                if (yyssp >= yyss + yystacksize - 1)
+                {
+                    goto yyoverflow;
+                }
+                *++yyssp = yystate = yytable[yyn];
+                *++yyvsp = yylval;
+                goto yyloop;
+            }
+            else
+            {
+#if YYDEBUG
+                if (yydebug)
+                    printf("%sdebug: error recovery discarding state %d\n",
+                            YYPREFIX, *yyssp);
+#endif
+                if (yyssp <= yyss) goto yyabort;
+                --yyssp;
+                --yyvsp;
+            }
+        }
+    }
+    else
+    {
+        if (yychar == 0) goto yyabort;
+#if YYDEBUG
+        if (yydebug)
+        {
+            yys = 0;
+            if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+            if (!yys) yys = "illegal-symbol";
+            printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
+                    YYPREFIX, yystate, yychar, yys);
+        }
+#endif
+        yychar = (-1);
+        goto yyloop;
+    }
+yyreduce:
+#if YYDEBUG
+    if (yydebug)
+        printf("%sdebug: state %d, reducing by rule %d (%s)\n",
+                YYPREFIX, yystate, yyn, yyrule[yyn]);
+#endif
+    yym = yylen[yyn];
+    yyval = yyvsp[1-yym];
+    switch (yyn)
+    {
+case 1:
+#line 40 "parser.y"
+{ accept = 0;
+                 spec = NULL; }
+break;
+case 2:
+#line 43 "parser.y"
+{ spec = spec? mkAlt(spec, yyvsp[0].regexp) : yyvsp[0].regexp; }
+break;
+case 4:
+#line 48 "parser.y"
+{ if(yyvsp[-3].symbol->re)
+                     in->fatal("sym already defined");
+                 yyvsp[-3].symbol->re = yyvsp[-1].regexp; }
+break;
+case 5:
+#line 54 "parser.y"
+{ yyval.regexp = new RuleOp(yyvsp[-2].regexp, yyvsp[-1].regexp, yyvsp[0].token, accept++); }
+break;
+case 6:
+#line 58 "parser.y"
+{ yyval.regexp = new NullOp; }
+break;
+case 7:
+#line 60 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 8:
+#line 64 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 9:
+#line 66 "parser.y"
+{ yyval.regexp =  mkAlt(yyvsp[-2].regexp, yyvsp[0].regexp); }
+break;
+case 10:
+#line 70 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 11:
+#line 72 "parser.y"
+{ yyval.regexp =  mkDiff(yyvsp[-2].regexp, yyvsp[0].regexp);
+                 if(!yyval.regexp)
+                      in->fatal("can only difference char sets");
+               }
+break;
+case 12:
+#line 79 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 13:
+#line 81 "parser.y"
+{ yyval.regexp = new CatOp(yyvsp[-1].regexp, yyvsp[0].regexp); }
+break;
+case 14:
+#line 85 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 15:
+#line 87 "parser.y"
+{
+                   switch(yyvsp[0].op){
+                   case '*':
+                       yyval.regexp = mkAlt(new CloseOp(yyvsp[-1].regexp), new NullOp());
+                       break;
+                   case '+':
+                       yyval.regexp = new CloseOp(yyvsp[-1].regexp);
+                       break;
+                   case '?':
+                       yyval.regexp = mkAlt(yyvsp[-1].regexp, new NullOp());
+                       break;
+                   }
+               }
+break;
+case 16:
+#line 103 "parser.y"
+{ yyval.op = yyvsp[0].op; }
+break;
+case 17:
+#line 105 "parser.y"
+{ yyval.op = (yyvsp[-1].op == yyvsp[0].op) ? yyvsp[-1].op : '*'; }
+break;
+case 18:
+#line 109 "parser.y"
+{ if(!yyvsp[0].symbol->re)
+                     in->fatal("can't find symbol");
+                 yyval.regexp = yyvsp[0].symbol->re; }
+break;
+case 19:
+#line 113 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 20:
+#line 115 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 21:
+#line 117 "parser.y"
+{ yyval.regexp = yyvsp[-1].regexp; }
+break;
+#line 476 "y.tab.c"
+    }
+    yyssp -= yym;
+    yystate = *yyssp;
+    yyvsp -= yym;
+    yym = yylhs[yyn];
+    if (yystate == 0 && yym == 0)
+    {
+#if YYDEBUG
+        if (yydebug)
+            printf("%sdebug: after reduction, shifting from state 0 to\
+ state %d\n", YYPREFIX, YYFINAL);
+#endif
+        yystate = YYFINAL;
+        *++yyssp = YYFINAL;
+        *++yyvsp = yyval;
+        if (yychar < 0)
+        {
+            if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+            if (yydebug)
+            {
+                yys = 0;
+                if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+                if (!yys) yys = "illegal-symbol";
+                printf("%sdebug: state %d, reading %d (%s)\n",
+                        YYPREFIX, YYFINAL, yychar, yys);
+            }
+#endif
+        }
+        if (yychar == 0) goto yyaccept;
+        goto yyloop;
+    }
+    if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
+        yystate = yytable[yyn];
+    else
+        yystate = yydgoto[yym];
+#if YYDEBUG
+    if (yydebug)
+        printf("%sdebug: after reduction, shifting from state %d \
+to state %d\n", YYPREFIX, *yyssp, yystate);
+#endif
+    if (yyssp >= yyss + yystacksize - 1)
+    {
+        goto yyoverflow;
+    }
+    *++yyssp = yystate;
+    *++yyvsp = yyval;
+    goto yyloop;
+yyoverflow:
+    yyerror("yacc stack overflow");
+yyabort:
+    return (1);
+yyaccept:
+    return (0);
+}
diff --git a/bootstrap/re2c.man b/bootstrap/re2c.man
new file mode 100644 (file)
index 0000000..b74aaaf
--- /dev/null
@@ -0,0 +1,660 @@
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+N\bNA\bAM\bME\bE
+       re2c - convert regular expressions to C/C++
+
+
+S\bSY\bYN\bNO\bOP\bPS\bSI\bIS\bS
+       r\bre\be2\b2c\bc [-\b-e\bes\bsb\bb] _\bn_\ba_\bm_\be
+
+
+D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
+       r\bre\be2\b2c\bc  is a preprocessor that generates C-based recognizers
+       from regular expressions.  The input to r\bre\be2\b2c\bc  consists  of
+       C/C++ source interleaved with comments of the form /\b/*\b*!\b!r\bre\be2\b2c\bc
+       ... *\b*/\b/ which contain scanner specifications.  In the  out-
+       put  these comments are replaced with code that, when exe-
+       cuted, will find the next input  token  and  then  execute
+       some user-supplied token-specific code.
+
+       For example, given the following code
+
+          #define NULL            ((char*) 0)
+          char *scan(char *p){
+          char *q;
+          #define YYCTYPE         char
+          #define YYCURSOR        p
+          #define YYLIMIT         p
+          #define YYMARKER        q
+          #define YYFILL(n)
+          /*!re2c
+                  [0-9]+          {return YYCURSOR;}
+                  [\000-\377]     {return NULL;}
+          */
+          }
+
+       r\bre\be2\b2c\bc will generate
+
+          /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+          #line 1 "simple.re"
+          #define NULL            ((char*) 0)
+          char *scan(char *p){
+          char *q;
+          #define YYCTYPE         char
+          #define YYCURSOR        p
+          #define YYLIMIT         p
+          #define YYMARKER        q
+          #define YYFILL(n)
+          {
+                  YYCTYPE yych;
+                  unsigned int yyaccept;
+                  goto yy0;
+          yy1:    ++YYCURSOR;
+          yy0:
+                  if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                  yych = *YYCURSOR;
+                  if(yych <= '/') goto yy4;
+
+
+
+Version 0.5                8 April 1994                         1
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+                  if(yych >= ':') goto yy4;
+          yy2:    yych = *++YYCURSOR;
+                  goto yy7;
+          yy3:
+          #line 10
+                  {return YYCURSOR;}
+          yy4:    yych = *++YYCURSOR;
+          yy5:
+          #line 11
+                  {return NULL;}
+          yy6:    ++YYCURSOR;
+                  if(YYLIMIT == YYCURSOR) YYFILL(1);
+                  yych = *YYCURSOR;
+          yy7:    if(yych <= '/') goto yy3;
+                  if(yych <= '9') goto yy6;
+                  goto yy3;
+          }
+          #line 12
+
+          }
+
+
+O\bOP\bPT\bTI\bIO\bON\bNS\bS
+       r\bre\be2\b2c\bc provides the following options:
+
+       -\b-e\be     Cross-compile  from  an ASCII platform to an EBCDIC
+              one.
+
+       -\b-s\bs     Generate nested i\bif\bfs for some s\bsw\bwi\bit\btc\bch\bhes.   Many  com-
+              pilers need this assist to generate better code.
+
+       -\b-b\bb     Implies -\b-s\bs.  Use bit vectors as well in the attempt
+              to coax better code out of the compiler.  Most use-
+              ful  for  specifications  with more than a few key-
+              words (e.g. for most programming languages).
+
+
+I\bIN\bNT\bTE\bER\bRF\bFA\bAC\bCE\bE C\bCO\bOD\bDE\bE
+       Unlike other scanner generators, r\bre\be2\b2c\bc  does  not  generate
+       complete  scanners:  the  user  must supply some interface
+       code.  In particular, the user must define  the  following
+       macros:
+
+       Y\bYY\bYC\bCH\bHA\bAR\bR Type used to hold an input symbol.  Usually c\bch\bha\bar\br or
+              u\bun\bns\bsi\big\bgn\bne\bed\bd c\bch\bha\bar\br.
+
+       Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR
+              _\bl-expression of type *\b*Y\bYY\bYC\bCH\bHA\bAR\bR  that  points  to  the
+              current  input symbol.  The generated code advances
+              Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR as symbols are matched.  On entry,  Y\bYY\bYC\bCU\bUR\bR-\b-
+              S\bSO\bOR\bR  is  assumed to point to the first character of
+              the current token.  On exit, Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR will point to
+              the first character of the following token.
+
+
+
+
+Version 0.5                8 April 1994                         2
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+       Y\bYL\bLI\bIM\bMI\bIT\bT Expression  of  type  *\b*Y\bYY\bYC\bCH\bHA\bAR\bR that marks the end of
+              the buffer (Y\bYL\bLI\bIM\bMI\bIT\bT[\b[-\b-1\b1]\b] is the last character in the
+              buffer).   The  generated  code repeatedly compares
+              Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR to Y\bYL\bLI\bIM\bMI\bIT\bT to  determine  when  the  buffer
+              needs (re)filling.
+
+       Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR
+              _\bl-expression  of  type *\b*Y\bYY\bYC\bCH\bHA\bAR\bR.  The generated code
+              saves backtracking information in Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR.
+
+       Y\bYY\bYF\bFI\bIL\bLL\bL(\b(_\bn)\b)
+              The generated code "calls" Y\bYY\bYF\bFI\bIL\bLL\bL when  the  buffer
+              needs  (re)filling:   at least _\bn additional charac-
+              ters should  be  provided.   Y\bYY\bYF\bFI\bIL\bLL\bL  should  adjust
+              Y\bYY\bYC\bCU\bUR\bRS\bSO\bOR\bR,  Y\bYY\bYL\bLI\bIM\bMI\bIT\bT  and  Y\bYY\bYM\bMA\bAR\bRK\bKE\bER\bR  as needed.  Note
+              that for typical programming languages  _\bn  will  be
+              the length of the longest keyword plus one.
+
+
+S\bSC\bCA\bAN\bNN\bNE\bER\bR S\bSP\bPE\bEC\bCI\bIF\bFI\bIC\bCA\bAT\bTI\bIO\bON\bNS\bS
+       Each  scanner specification consists of a set of _\br_\bu_\bl_\be_\bs and
+       name definitions.  Rules consist of a  regular  expression
+       along  with  a  block of C/C++ code that is to be executed
+       when the associated regular expression is  matched.   Name
+       definitions  are  of  the  form  ``_\bn_\ba_\bm_\be  =\b= _\br_\be_\bg_\bu_\bl_\ba_\br _\be_\bx_\bp_\br_\be_\bs_\b-
+       _\bs_\bi_\bo_\bn;\b;''.
+
+
+S\bSU\bUM\bMM\bMA\bAR\bRY\bY O\bOF\bF R\bRE\bE2\b2C\bC R\bRE\bEG\bGU\bUL\bLA\bAR\bR E\bEX\bXP\bPR\bRE\bES\bSS\bSI\bIO\bON\bNS\bS
+       "\b"f\bfo\boo\bo"\b"  the literal string f\bfo\boo\bo.   ANSI-C  escape  sequences
+              can be used.
+
+       [\b[x\bxy\byz\bz]\b]  a  "character  class";  in  this  case, the regular
+              expression matches either an 'x\bx', a 'y\by', or a  'z\bz'.
+
+       [\b[a\bab\bbj\bj-\b-o\boZ\bZ]\b]
+              a  "character class" with a range in it; matches an
+              'a\ba', a 'b\bb', any letter from 'j\bj' through 'o\bo',  or  a
+              'Z\bZ'.
+
+       _\br\\b\_\bs    match any _\br which isn't an _\bs. _\br and _\bs must be regu-
+              lar expressions which can be expressed as character
+              classes.
+
+       _\br*\b*     zero or more _\br's, where _\br is any regular expression
+
+       _\br+\b+     one or more _\br's
+
+       _\br?\b?     zero or one _\br's (that is, "an optional _\br")
+
+       name   the expansion of the "name" definition (see above)
+
+       (\b(_\br)\b)    an _\br; parentheses are used to  override  precedence
+              (see below)
+
+
+
+Version 0.5                8 April 1994                         3
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+       _\br_\bs     an _\br followed by an _\bs ("concatenation")
+
+       _\br|\b|_\bs    either an _\br or an _\bs
+
+       _\br/\b/_\bs    an  _\br  but only if it is followed by an _\bs. The s is
+              not part of the matched text. This type of  regular
+              expression is called "trailing context".
+
+       The regular expressions listed above are grouped according
+       to precedence, from highest precedence at the top to  low-
+       est  at  the  bottom.   Those  grouped together have equal
+       precedence.
+
+
+A\bA L\bLA\bAR\bRG\bGE\bER\bR E\bEX\bXA\bAM\bMP\bPL\bLE\bE
+          #include <stdlib.h>
+          #include <stdio.h>
+          #include <fcntl.h>
+          #include <string.h>
+
+          #define ADDEQ   257
+          #define ANDAND  258
+          #define ANDEQ   259
+          #define ARRAY   260
+          #define ASM     261
+          #define AUTO    262
+          #define BREAK   263
+          #define CASE    264
+          #define CHAR    265
+          #define CONST   266
+          #define CONTINUE        267
+          #define DECR    268
+          #define DEFAULT 269
+          #define DEREF   270
+          #define DIVEQ   271
+          #define DO      272
+          #define DOUBLE  273
+          #define ELLIPSIS        274
+          #define ELSE    275
+          #define ENUM    276
+          #define EQL     277
+          #define EXTERN  278
+          #define FCON    279
+          #define FLOAT   280
+          #define FOR     281
+          #define FUNCTION        282
+          #define GEQ     283
+          #define GOTO    284
+          #define ICON    285
+          #define ID      286
+          #define IF      287
+          #define INCR    288
+          #define INT     289
+          #define LEQ     290
+
+
+
+Version 0.5                8 April 1994                         4
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+          #define LONG    291
+          #define LSHIFT  292
+          #define LSHIFTEQ        293
+          #define MODEQ   294
+          #define MULEQ   295
+          #define NEQ     296
+          #define OREQ    297
+          #define OROR    298
+          #define POINTER 299
+          #define REGISTER        300
+          #define RETURN  301
+          #define RSHIFT  302
+          #define RSHIFTEQ        303
+          #define SCON    304
+          #define SHORT   305
+          #define SIGNED  306
+          #define SIZEOF  307
+          #define STATIC  308
+          #define STRUCT  309
+          #define SUBEQ   310
+          #define SWITCH  311
+          #define TYPEDEF 312
+          #define UNION   313
+          #define UNSIGNED        314
+          #define VOID    315
+          #define VOLATILE        316
+          #define WHILE   317
+          #define XOREQ   318
+          #define EOI     319
+
+          typedef unsigned int uint;
+          typedef unsigned char uchar;
+
+          #define BSIZE   8192
+
+          #define YYCTYPE         uchar
+          #define YYCURSOR        cursor
+          #define YYLIMIT         s->lim
+          #define YYMARKER        s->ptr
+          #define YYFILL(n)       {cursor = fill(s, cursor);}
+
+          #define RET(i)  {s->cur = cursor; return i;}
+
+          typedef struct Scanner {
+              int                 fd;
+              uchar               *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+              uint                line;
+          } Scanner;
+
+          uchar *fill(Scanner *s, uchar *cursor){
+              if(!s->eof){
+                  uint cnt = s->tok - s->bot;
+                  if(cnt){
+                      memcpy(s->bot, s->tok, s->lim - s->tok);
+
+
+
+Version 0.5                8 April 1994                         5
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+                      s->tok = s->bot;
+                      s->ptr -= cnt;
+                      cursor -= cnt;
+                      s->pos -= cnt;
+                      s->lim -= cnt;
+                  }
+                  if((s->top - s->lim) < BSIZE){
+                      uchar *buf = (uchar*)
+                          malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+                      memcpy(buf, s->tok, s->lim - s->tok);
+                      s->tok = buf;
+                      s->ptr = &buf[s->ptr - s->bot];
+                      cursor = &buf[cursor - s->bot];
+                      s->pos = &buf[s->pos - s->bot];
+                      s->lim = &buf[s->lim - s->bot];
+                      s->top = &s->lim[BSIZE];
+                      free(s->bot);
+                      s->bot = buf;
+                  }
+                  if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+                      s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+                  }
+                  s->lim += cnt;
+              }
+              return cursor;
+          }
+
+          int scan(Scanner *s){
+                  uchar *cursor = s->cur;
+          std:
+                  s->tok = cursor;
+          /*!re2c
+          any     = [\000-\377];
+          O       = [0-7];
+          D       = [0-9];
+          L       = [a-zA-Z_];
+          H       = [a-fA-F0-9];
+          E       = [Ee] [+-]? D+;
+          FS      = [fFlL];
+          IS      = [uUlL]*;
+          ESC     = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+          */
+
+          /*!re2c
+                  "/*"                    { goto comment; }
+
+                  "auto"                  { RET(AUTO); }
+                  "break"                 { RET(BREAK); }
+                  "case"                  { RET(CASE); }
+                  "char"                  { RET(CHAR); }
+                  "const"                 { RET(CONST); }
+                  "continue"              { RET(CONTINUE); }
+                  "default"               { RET(DEFAULT); }
+                  "do"                    { RET(DO); }
+
+
+
+Version 0.5                8 April 1994                         6
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+                  "double"                { RET(DOUBLE); }
+                  "else"                  { RET(ELSE); }
+                  "enum"                  { RET(ENUM); }
+                  "extern"                { RET(EXTERN); }
+                  "float"                 { RET(FLOAT); }
+                  "for"                   { RET(FOR); }
+                  "goto"                  { RET(GOTO); }
+                  "if"                    { RET(IF); }
+                  "int"                   { RET(INT); }
+                  "long"                  { RET(LONG); }
+                  "register"              { RET(REGISTER); }
+                  "return"                { RET(RETURN); }
+                  "short"                 { RET(SHORT); }
+                  "signed"                { RET(SIGNED); }
+                  "sizeof"                { RET(SIZEOF); }
+                  "static"                { RET(STATIC); }
+                  "struct"                { RET(STRUCT); }
+                  "switch"                { RET(SWITCH); }
+                  "typedef"               { RET(TYPEDEF); }
+                  "union"                 { RET(UNION); }
+                  "unsigned"              { RET(UNSIGNED); }
+                  "void"                  { RET(VOID); }
+                  "volatile"              { RET(VOLATILE); }
+                  "while"                 { RET(WHILE); }
+
+                  L (L|D)*                { RET(ID); }
+
+                  ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+                  (['] (ESC|any\[\n\\'])* ['])
+                                          { RET(ICON); }
+
+                  (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                                          { RET(FCON); }
+
+                  (["] (ESC|any\[\n\\"])* ["])
+                                          { RET(SCON); }
+
+                  "..."                   { RET(ELLIPSIS); }
+                  ">>="                   { RET(RSHIFTEQ); }
+                  "<<="                   { RET(LSHIFTEQ); }
+                  "+="                    { RET(ADDEQ); }
+                  "-="                    { RET(SUBEQ); }
+                  "*="                    { RET(MULEQ); }
+                  "/="                    { RET(DIVEQ); }
+                  "%="                    { RET(MODEQ); }
+                  "&="                    { RET(ANDEQ); }
+                  "^="                    { RET(XOREQ); }
+                  "|="                    { RET(OREQ); }
+                  ">>"                    { RET(RSHIFT); }
+                  "<<"                    { RET(LSHIFT); }
+                  "++"                    { RET(INCR); }
+                  "--"                    { RET(DECR); }
+                  "->"                    { RET(DEREF); }
+                  "&&"                    { RET(ANDAND); }
+
+
+
+Version 0.5                8 April 1994                         7
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+                  "||"                    { RET(OROR); }
+                  "<="                    { RET(LEQ); }
+                  ">="                    { RET(GEQ); }
+                  "=="                    { RET(EQL); }
+                  "!="                    { RET(NEQ); }
+                  ";"                     { RET(';'); }
+                  "{"                     { RET('{'); }
+                  "}"                     { RET('}'); }
+                  ","                     { RET(','); }
+                  ":"                     { RET(':'); }
+                  "="                     { RET('='); }
+                  "("                     { RET('('); }
+                  ")"                     { RET(')'); }
+                  "["                     { RET('['); }
+                  "]"                     { RET(']'); }
+                  "."                     { RET('.'); }
+                  "&"                     { RET('&'); }
+                  "!"                     { RET('!'); }
+                  "~"                     { RET('~'); }
+                  "-"                     { RET('-'); }
+                  "+"                     { RET('+'); }
+                  "*"                     { RET('*'); }
+                  "/"                     { RET('/'); }
+                  "%"                     { RET('%'); }
+                  "<"                     { RET('<'); }
+                  ">"                     { RET('>'); }
+                  "^"                     { RET('^'); }
+                  "|"                     { RET('|'); }
+                  "?"                     { RET('?'); }
+
+
+                  [ \t\v\f]+           { goto std; }
+
+                  "\n"
+                      {
+                          if(cursor == s->eof) RET(EOI);
+                          s->pos = cursor; s->line++;
+                          goto std;
+                      }
+
+                  any
+                      {
+                          printf("unexpected character: %c\n", *s->tok);
+                          goto std;
+                      }
+          */
+
+          comment:
+          /*!re2c
+                  "*/"                    { goto std; }
+                  "\n"
+                      {
+                          if(cursor == s->eof) RET(EOI);
+                          s->tok = s->pos = cursor; s->line++;
+
+
+
+Version 0.5                8 April 1994                         8
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+                          goto comment;
+                      }
+                  any                     { goto comment; }
+          */
+          }
+
+          main(){
+              Scanner in;
+              int t;
+              memset((char*) &in, 0, sizeof(in));
+              in.fd = 0;
+              while((t = scan(&in)) != EOI){
+          /*
+                  printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+                  printf("%d\n", t);
+          */
+              }
+              close(in.fd);
+          }
+
+
+S\bSE\bEE\bE A\bAL\bLS\bSO\bO
+       flex(1), lex(1).
+
+
+F\bFE\bEA\bAT\bTU\bUR\bRE\bES\bS
+       r\bre\be2\b2c\bc does not provide a default action: the generated code
+       assumes  that  the  input  will  consist  of a sequence of
+       tokens.  Typically this can be dealt with by adding a rule
+       such  as  the one for unexpected characters in the example
+       above.
+
+       The user must arrange for a sentinel token  to  appear  at
+       the  end  of  input  (and provide a rule for matching it):
+       r\bre\be2\b2c\bc does not  provide  an  <\b<<\b<E\bEO\bOF\bF>\b>>\b>  expression.   If  the
+       source  is  from  a  null-byte  terminated  string, a rule
+       matching a null character will suffice.  If the source  is
+       from  a file then the approach taken in the example can be
+       used: pad the input with a newline (or some other  charac-
+       ter  that  can't appear within another token); upon recog-
+       nizing such a character check to see if it is the sentinel
+       and act accordingly.
+
+       r\bre\be2\b2c\bc  does  not  provide start conditions:  use a separate
+       scanner specification for each start condition (as  illus-
+       trated in the above example).
+
+       No [^x].  Use difference instead.
+
+B\bBU\bUG\bGS\bS
+       Only fixed length trailing context can be handled.
+
+       The  maximum value appearing as a parameter _\bn to Y\bYY\bYF\bFI\bIL\bLL\bL is
+       not provided to the generated code (this value  is  needed
+
+
+
+Version 0.5                8 April 1994                         9
+
+
+
+
+
+RE2C(1)                                                   RE2C(1)
+
+
+       for  constructing  the  interface  code).   Note that this
+       value is usually relatively small: for typical programming
+       languages _\bn will be the length of the longest keyword plus
+       one.
+
+       Difference only works for character sets.
+
+       The r\bre\be2\b2c\bc internal algorithms need documentation.
+
+
+A\bAU\bUT\bTH\bHO\bOR\bR
+       Please send bug reports, fixes and feedback to:
+
+       Peter Bumbulis
+       Computer Systems Group
+       University of Waterloo
+       Waterloo, Ontario
+       N2L 3G1
+       Internet:  peter@csg.uwaterloo.ca
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Version 0.5                8 April 1994                        10
+
+
diff --git a/bootstrap/scanner.cc b/bootstrap/scanner.cc
new file mode 100644 (file)
index 0000000..19b4259
--- /dev/null
@@ -0,0 +1,470 @@
+/* Generated by re2c 0.5 on Sat May 15 11:35:52 1999 */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         lim
+#define        YYMARKER        ptr
+#define        YYFILL(n)       {cursor = fill(cursor);}
+
+#define        RETURN(i)       {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+       bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+       top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+    ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+    if(!eof){
+       uint cnt = tok - bot;
+       if(cnt){
+           memcpy(bot, tok, lim - tok);
+           tok = bot;
+           ptr -= cnt;
+           cursor -= cnt;
+           pos -= cnt;
+           lim -= cnt;
+       }
+       if((top - lim) < BSIZE){
+           uchar *buf = new uchar[(lim - bot) + BSIZE];
+           memcpy(buf, tok, lim - tok);
+           tok = buf;
+           ptr = &buf[ptr - bot];
+           cursor = &buf[cursor - bot];
+           pos = &buf[pos - bot];
+           lim = &buf[lim - bot];
+           top = &lim[BSIZE];
+           delete [] bot;
+           bot = buf;
+       }
+       if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+           eof = &lim[cnt]; *eof++ = '\n';
+       }
+       lim += cnt;
+    }
+    return cursor;
+}
+
+#line 68
+
+
+int Scanner::echo(ostream &out){
+    uchar *cursor = cur;
+    tok = cursor;
+echo:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy0;
+yy1:   ++YYCURSOR;
+yy0:
+       if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy4;
+       if(yych != '/') goto yy6;
+yy2:   yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '*') goto yy7;
+yy3:
+#line 82
+       { goto echo; }
+yy4:   yych = *++YYCURSOR;
+yy5:
+#line 78
+       { if(cursor == eof) RETURN(0);
+                                 out.write(tok, cursor - tok);
+                                 tok = pos = cursor; cline++;
+                                 goto echo; }
+yy6:   yych = *++YYCURSOR;
+       goto yy3;
+yy7:   yych = *++YYCURSOR;
+       if(yych == '!') goto yy9;
+yy8:   YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy3;
+       }
+yy9:   yych = *++YYCURSOR;
+       if(yych != 'r') goto yy8;
+yy10:  yych = *++YYCURSOR;
+       if(yych != 'e') goto yy8;
+yy11:  yych = *++YYCURSOR;
+       if(yych != '2') goto yy8;
+yy12:  yych = *++YYCURSOR;
+       if(yych != 'c') goto yy8;
+yy13:  yych = *++YYCURSOR;
+yy14:
+#line 75
+       { out.write(tok, &cursor[-7] - tok);
+                                 tok = cursor;
+                                 RETURN(1); }
+}
+#line 83
+
+}
+
+
+int Scanner::scan(){
+    uchar *cursor = cur;
+    uint depth;
+
+scan:
+    tchar = cursor - pos;
+    tline = cline;
+    tok = cursor;
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy15;
+yy16:  ++YYCURSOR;
+yy15:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= ':'){
+               if(yych <= '"'){
+                       if(yych <= '\n'){
+                               if(yych <= '\b')        goto yy35;
+                               if(yych <= '\t')        goto yy31;
+                               goto yy33;
+                       } else {
+                               if(yych == ' ') goto yy31;
+                               if(yych <= '!') goto yy35;
+                               goto yy23;
+                       }
+               } else {
+                       if(yych <= '*'){
+                               if(yych <= '\'')        goto yy35;
+                               if(yych <= ')') goto yy27;
+                               goto yy21;
+                       } else {
+                               if(yych <= '+') goto yy28;
+                               if(yych == '/') goto yy19;
+                               goto yy35;
+                       }
+               }
+       } else {
+               if(yych <= 'Z'){
+                       if(yych <= '='){
+                               if(yych == '<') goto yy35;
+                               goto yy27;
+                       } else {
+                               if(yych == '?') goto yy28;
+                               if(yych <= '@') goto yy35;
+                               goto yy29;
+                       }
+               } else {
+                       if(yych <= '`'){
+                               if(yych <= '[') goto yy25;
+                               if(yych <= '\\')        goto yy27;
+                               goto yy35;
+                       } else {
+                               if(yych <= 'z') goto yy29;
+                               if(yych <= '{') goto yy17;
+                               if(yych <= '|') goto yy27;
+                               goto yy35;
+                       }
+               }
+       }
+yy17:  yych = *++YYCURSOR;
+yy18:
+#line 96
+       { depth = 1;
+                                 goto code;
+                               }
+yy19:  yych = *++YYCURSOR;
+       if(yych == '*') goto yy54;
+yy20:
+#line 115
+       { RETURN(*tok); }
+yy21:  yych = *++YYCURSOR;
+       if(yych == '/') goto yy52;
+yy22:
+#line 117
+       { yylval.op = *tok;
+                                 RETURN(CLOSE); }
+yy23:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych != '\n')        goto yy48;
+yy24:
+#line 108
+       { fatal("bad string"); }
+yy25:  yyaccept = 1;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych != '\n')        goto yy42;
+yy26:
+#line 113
+       { fatal("bad character constant"); }
+yy27:  yych = *++YYCURSOR;
+       goto yy20;
+yy28:  yych = *++YYCURSOR;
+       goto yy22;
+yy29:  yych = *++YYCURSOR;
+       goto yy40;
+yy30:
+#line 120
+       { cur = cursor;
+                                 yylval.symbol = Symbol::find(token());
+                                 return ID; }
+yy31:  yych = *++YYCURSOR;
+       goto yy38;
+yy32:
+#line 124
+       { goto scan; }
+yy33:  yych = *++YYCURSOR;
+yy34:
+#line 126
+       { if(cursor == eof) RETURN(0);
+                                 pos = cursor; cline++;
+                                 goto scan;
+                               }
+yy35:  yych = *++YYCURSOR;
+yy36:
+#line 131
+       { cerr << "unexpected character: " << *tok << endl;
+                                 goto scan;
+                               }
+yy37:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy38:  if(yych == '\t')        goto yy37;
+       if(yych == ' ') goto yy37;
+       goto yy32;
+yy39:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy40:  if(yych <= '@'){
+               if(yych <= '/') goto yy30;
+               if(yych <= '9') goto yy39;
+               goto yy30;
+       } else {
+               if(yych <= 'Z') goto yy39;
+               if(yych <= '`') goto yy30;
+               if(yych <= 'z') goto yy39;
+               goto yy30;
+       }
+yy41:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy42:  if(yych <= '['){
+               if(yych != '\n')        goto yy41;
+       } else {
+               if(yych <= '\\')        goto yy44;
+               if(yych <= ']') goto yy45;
+               goto yy41;
+       }
+yy43:  YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy24;
+       case 1: goto yy26;
+       }
+yy44:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy43;
+       goto yy41;
+yy45:  yych = *++YYCURSOR;
+yy46:
+#line 110
+       { cur = cursor;
+                                 yylval.regexp = ranToRE(token());
+                                 return RANGE; }
+yy47:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy48:  if(yych <= '!'){
+               if(yych == '\n')        goto yy43;
+               goto yy47;
+       } else {
+               if(yych <= '"') goto yy50;
+               if(yych != '\\')        goto yy47;
+       }
+yy49:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy43;
+       goto yy47;
+yy50:  yych = *++YYCURSOR;
+yy51:
+#line 105
+       { cur = cursor;
+                                 yylval.regexp = strToRE(token());
+                                 return STRING; }
+yy52:  yych = *++YYCURSOR;
+yy53:
+#line 102
+       { tok = cursor;
+                                 RETURN(0); }
+yy54:  yych = *++YYCURSOR;
+yy55:
+#line 99
+       { depth = 1;
+                                 goto comment; }
+}
+#line 134
+
+
+code:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy56;
+yy57:  ++YYCURSOR;
+yy56:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= '&'){
+               if(yych <= '\n'){
+                       if(yych <= '\t')        goto yy64;
+                       goto yy62;
+               } else {
+                       if(yych == '"') goto yy66;
+                       goto yy64;
+               }
+       } else {
+               if(yych <= '{'){
+                       if(yych <= '\'')        goto yy67;
+                       if(yych <= 'z') goto yy64;
+                       goto yy60;
+               } else {
+                       if(yych != '}') goto yy64;
+               }
+       }
+yy58:  yych = *++YYCURSOR;
+yy59:
+#line 138
+       { if(--depth == 0){
+                                       cur = cursor;
+                                       yylval.token = new Token(token(), tline);
+                                       return CODE;
+                                 }
+                                 goto code; }
+yy60:  yych = *++YYCURSOR;
+yy61:
+#line 144
+       { ++depth;
+                                 goto code; }
+yy62:  yych = *++YYCURSOR;
+yy63:
+#line 146
+       { if(cursor == eof) fatal("missing '}'");
+                                 pos = cursor; cline++;
+                                 goto code;
+                               }
+yy64:  yych = *++YYCURSOR;
+yy65:
+#line 150
+       { goto code; }
+yy66:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '\n')        goto yy65;
+       goto yy73;
+yy67:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '\n')        goto yy65;
+       goto yy69;
+yy68:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy69:  if(yych <= '&'){
+               if(yych != '\n')        goto yy68;
+       } else {
+               if(yych <= '\'')        goto yy64;
+               if(yych == '\\')        goto yy71;
+               goto yy68;
+       }
+yy70:  YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy65;
+       }
+yy71:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy70;
+       goto yy68;
+yy72:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy73:  if(yych <= '!'){
+               if(yych == '\n')        goto yy70;
+               goto yy72;
+       } else {
+               if(yych <= '"') goto yy64;
+               if(yych != '\\')        goto yy72;
+       }
+yy74:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy70;
+       goto yy72;
+}
+#line 151
+
+
+comment:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy75;
+yy76:  ++YYCURSOR;
+yy75:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= ')'){
+               if(yych == '\n')        goto yy80;
+               goto yy82;
+       } else {
+               if(yych <= '*') goto yy77;
+               if(yych == '/') goto yy79;
+               goto yy82;
+       }
+yy77:  yych = *++YYCURSOR;
+       if(yych == '/') goto yy85;
+yy78:
+#line 165
+       { goto comment; }
+yy79:  yych = *++YYCURSOR;
+       if(yych == '*') goto yy83;
+       goto yy78;
+yy80:  yych = *++YYCURSOR;
+yy81:
+#line 161
+       { if(cursor == eof) RETURN(0);
+                                 tok = pos = cursor; cline++;
+                                 goto comment;
+                               }
+yy82:  yych = *++YYCURSOR;
+       goto yy78;
+yy83:  yych = *++YYCURSOR;
+yy84:
+#line 159
+       { ++depth;
+                                 goto comment; }
+yy85:  yych = *++YYCURSOR;
+yy86:
+#line 155
+       { if(--depth == 0)
+                                       goto scan;
+                                   else
+                                       goto comment; }
+}
+#line 166
+
+}
+
+void Scanner::fatal(char *msg){
+    cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+       << msg << endl;
+    exit(1);
+}
diff --git a/bootstrap/y.tab.h b/bootstrap/y.tab.h
new file mode 100644 (file)
index 0000000..d7b3702
--- /dev/null
@@ -0,0 +1,12 @@
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+typedef union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+} YYSTYPE;
+extern YYSTYPE yylval;
diff --git a/code.cc b/code.cc
new file mode 100644 (file)
index 0000000..8aaf6a8
--- /dev/null
+++ b/code.cc
@@ -0,0 +1,665 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <iomanip.h>
+#include "substr.h"
+#include "globals.h"
+#include "dfa.h"
+
+// there must be at least one span in list;  all spans must cover
+// same range
+
+void Go::compact(){
+    // arrange so that adjacent spans have different targets
+    uint i = 0;
+    for(uint j = 1; j < nSpans; ++j){
+       if(span[j].to != span[i].to){
+           ++i; span[i].to = span[j].to;
+       }
+       span[i].ub = span[j].ub;
+    }
+    nSpans = i + 1;
+}
+
+void Go::unmap(Go *base, State *x){
+    Span *s = span, *b = base->span, *e = &b[base->nSpans];
+    uint lb = 0;
+    s->ub = 0;
+    s->to = NULL;
+    for(; b != e; ++b){
+       if(b->to == x){
+           if((s->ub - lb) > 1)
+               s->ub = b->ub;
+       } else {
+           if(b->to != s->to){
+               if(s->ub){
+                   lb = s->ub; ++s;
+               }
+               s->to = b->to;
+           }
+           s->ub = b->ub;
+       }
+    }
+    s->ub = e[-1].ub; ++s;
+    nSpans = s - span;
+}
+
+void doGen(Go *g, State *s, uchar *bm, uchar m){
+Span *b = g->span, *e = &b[g->nSpans];
+uint lb = 0;
+for(; b < e; ++b){
+    if(b->to == s)
+       for(; lb < b->ub; ++lb) bm[lb] |= m;
+    lb = b->ub;
+}
+}
+
+void prt(ostream& o, Go *g, State *s){
+Span *b = g->span, *e = &b[g->nSpans];
+uint lb = 0;
+for(; b < e; ++b){
+    if(b->to == s)
+       printSpan(o, lb, b->ub);
+    lb = b->ub;
+}
+}
+
+bool matches(Go *g1, State *s1, Go *g2, State *s2){
+Span *b1 = g1->span, *e1 = &b1[g1->nSpans];
+uint lb1 = 0;
+Span *b2 = g2->span, *e2 = &b2[g2->nSpans];
+uint lb2 = 0;
+for(;;){
+    for(; b1 < e1 && b1->to != s1; ++b1) lb1 = b1->ub;
+    for(; b2 < e2 && b2->to != s2; ++b2) lb2 = b2->ub;
+    if(b1 == e1) return b2 == e2;
+    if(b2 == e2) return false;
+    if(lb1 != lb2 || b1->ub != b2->ub) return false;
+    ++b1; ++b2;
+}
+}
+
+class BitMap {
+public:
+static BitMap  *first;
+Go                     *go;
+State          *on;
+BitMap         *next;
+uint           i;
+uchar          m;
+public:
+static BitMap *find(Go*, State*);
+static BitMap *find(State*);
+static void gen(ostream&, uint, uint);
+static void stats();
+BitMap(Go*, State*);
+};
+
+BitMap *BitMap::first = NULL;
+
+BitMap::BitMap(Go *g, State *x) : go(g), on(x), next(first) {
+first = this;
+}
+
+BitMap *BitMap::find(Go *g, State *x){
+for(BitMap *b = first; b; b = b->next){
+    if(matches(b->go, b->on, g, x))
+           return b;
+    }
+    return new BitMap(g, x);
+}
+
+BitMap *BitMap::find(State *x){
+    for(BitMap *b = first; b; b = b->next){
+       if(b->on == x)
+           return b;
+    }
+    return NULL;
+}
+
+void BitMap::gen(ostream &o, uint lb, uint ub){
+    BitMap *b = first;
+    if(b){
+       o << "\tstatic unsigned char yybm[] = {";
+       uint n = ub - lb;
+       uchar *bm = new uchar[n];
+       memset(bm, 0, n);
+       for(uint i = 0; b; i += n){
+           for(uchar m = 0x80; b && m; b = b->next, m >>= 1){
+               b->i = i; b->m = m;
+               doGen(b->go, b->on, bm-lb, m);
+           }
+           for(uint j = 0; j < n; ++j){
+               if(j%8 == 0) o << "\n\t";
+               o << setw(3) << (uint) bm[j] << ", ";
+           }
+       }
+       o << "\n\t};\n";
+    }
+}
+
+void BitMap::stats(){
+    uint n = 0;
+    for(BitMap *b = first; b; b = b->next){
+prt(cerr, b->go, b->on); cerr << endl;
+        ++n;
+    }
+    cerr << n << " bitmaps\n";
+    first = NULL;
+}
+
+void genGoTo(ostream &o, State *to){
+    o  << "\tgoto yy" << to->label << ";\n";
+}
+
+void genIf(ostream &o, char *cmp, uint v){
+    o << "\tif(yych " << cmp << " '";
+    prtCh(o, v);
+    o << "')";
+}
+
+void indent(ostream &o, uint i){
+    while(i-- > 0)
+       o << "\t";
+}
+
+static void need(ostream &o, uint n){
+    if(n == 1)
+       o << "\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n";
+    else
+       o << "\tif((YYLIMIT - YYCURSOR) < " << n << ") YYFILL(" << n << ");\n";
+    o << "\tyych = *YYCURSOR;\n";
+}
+
+void Match::emit(ostream &o){
+    if(state->link){
+       o << "\t++YYCURSOR;\n";
+       need(o, state->depth);
+    } else {
+       o << "\tyych = *++YYCURSOR;\n";
+    }
+}
+
+void Enter::emit(ostream &o){
+    if(state->link){
+       o << "\t++YYCURSOR;\n";
+       o << "yy" << label << ":\n";
+       need(o, state->depth);
+    } else {
+       o << "\tyych = *++YYCURSOR;\n";
+       o << "yy" << label << ":\n";
+    }
+}
+
+void Save::emit(ostream &o){
+    o << "\tyyaccept = " << selector << ";\n";
+    if(state->link){
+       o << "\tYYMARKER = ++YYCURSOR;\n";
+       need(o, state->depth);
+    } else {
+       o << "\tyych = *(YYMARKER = ++YYCURSOR);\n";
+    }
+}
+
+Move::Move(State *s) : Action(s) {
+    ;
+}
+
+void Move::emit(ostream &o){
+    ;
+}
+
+Accept::Accept(State *x, uint n, uint *s, State **r)
+    : Action(x), nRules(n), saves(s), rules(r){
+    ;
+}
+
+void Accept::emit(ostream &o){
+    bool first = true;
+    for(uint i = 0; i < nRules; ++i)
+       if(saves[i] != ~0u){
+           if(first){
+               first = false;
+               o << "\tYYCURSOR = YYMARKER;\n";
+               o << "\tswitch(yyaccept){\n";
+           }
+           o << "\tcase " << saves[i] << ":";
+           genGoTo(o, rules[i]);
+       }
+    if(!first)
+       o << "\t}\n";
+}
+
+Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) {
+    ;
+}
+
+void Rule::emit(ostream &o){
+    uint back = rule->ctx->fixedLength();
+    if(back != ~0u && back > 0u)
+       o << "\tYYCURSOR -= " << back << ";";
+    o << "\n#line " << rule->code->line
+      << "\n\t" << rule->code->text << "\n";
+}
+
+void doLinear(ostream &o, uint i, Span *s, uint n, State *next){
+    for(;;){
+       State *bg = s[0].to;
+       while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){
+           if(s[1].to == next && n == 3){
+               indent(o, i); genIf(o, "!=", s[0].ub); genGoTo(o, bg);
+               return;
+           } else {
+               indent(o, i); genIf(o, "==", s[0].ub); genGoTo(o, s[1].to);
+           }
+           n -= 2; s += 2;
+       }
+       if(n == 1){
+           if(bg != next){
+               indent(o, i); genGoTo(o, s[0].to);
+           }
+           return;
+       } else if(n == 2 && bg == next){
+           indent(o, i); genIf(o, ">=", s[0].ub); genGoTo(o, s[1].to);
+           return;
+       } else {
+           indent(o, i); genIf(o, "<=", s[0].ub - 1); genGoTo(o, bg);
+           n -= 1; s += 1;
+       }
+    }
+}
+
+void Go::genLinear(ostream &o, State *next){
+    doLinear(o, 0, span, nSpans, next);
+}
+
+void genCases(ostream &o, uint lb, Span *s){
+    if(lb < s->ub){
+       for(;;){
+           o << "\tcase '"; prtCh(o, lb); o << "':";
+           if(++lb == s->ub)
+               break;
+           o << "\n";
+       }
+    }
+}
+
+void Go::genSwitch(ostream &o, State *next){
+    if(nSpans <= 2){
+       genLinear(o, next);
+    } else {
+       State *def = span[nSpans-1].to;
+       Span **sP = new Span*[nSpans-1], **r, **s, **t;
+
+       t = &sP[0];
+       for(uint i = 0; i < nSpans; ++i)
+           if(span[i].to != def)
+               *(t++) = &span[i];
+
+       o << "\tswitch(yych){\n";
+       while(t != &sP[0]){
+           r = s = &sP[0];
+           if(*s == &span[0])
+               genCases(o, 0, *s);
+           else
+               genCases(o, (*s)[-1].ub, *s);
+           State *to = (*s)->to;
+           while(++s < t){
+               if((*s)->to == to)
+                   genCases(o, (*s)[-1].ub, *s);
+               else
+                   *(r++) = *s;
+           }
+           genGoTo(o, to);
+           t = r;
+       }
+       o << "\tdefault:";
+       genGoTo(o, def);
+       o << "\t}\n";
+
+       delete [] sP;
+    }
+}
+
+void doBinary(ostream &o, uint i, Span *s, uint n, State *next){
+    if(n <= 4){
+       doLinear(o, i, s, n, next);
+    } else {
+       uint h = n/2;
+       indent(o, i); genIf(o, "<=", s[h-1].ub - 1); o << "{\n";
+       doBinary(o, i+1, &s[0], h, next);
+       indent(o, i); o << "\t} else {\n";
+       doBinary(o, i+1, &s[h], n - h, next);
+       indent(o, i); o << "\t}\n";
+    }
+}
+
+void Go::genBinary(ostream &o, State *next){
+    doBinary(o, 0, span, nSpans, next);
+}
+
+void Go::genBase(ostream &o, State *next){
+    if(nSpans == 0)
+       return;
+    if(!sFlag){
+       genSwitch(o, next);
+       return;
+    }
+    if(nSpans > 8){
+       Span *bot = &span[0], *top = &span[nSpans-1];
+       uint util;
+       if(bot[0].to == top[0].to){
+           util = (top[-1].ub - bot[0].ub)/(nSpans - 2);
+       } else {
+           if(bot[0].ub > (top[0].ub - top[-1].ub)){
+               util = (top[0].ub - bot[0].ub)/(nSpans - 1);
+           } else {
+               util = top[-1].ub/(nSpans - 1);
+           }
+       }
+       if(util <= 2){
+           genSwitch(o, next);
+           return;
+       }
+    }
+    if(nSpans > 5){
+       genBinary(o, next);
+    } else {
+       genLinear(o, next);
+    }
+}
+
+void Go::genGoto(ostream &o, State *next){
+    if(bFlag){
+       for(uint i = 0; i < nSpans; ++i){
+           State *to = span[i].to;
+           if(to && to->isBase){
+               BitMap *b = BitMap::find(to);
+               if(b && matches(b->go, b->on, this, to)){
+                   Go go;
+                   go.span = new Span[nSpans];
+                   go.unmap(this, to);
+                   o << "\tif(yybm[" << b->i << "+yych] & " << (uint) b->m << ")";
+                   genGoTo(o, to);
+                   go.genBase(o, next);
+                   delete [] go.span;
+                   return;
+               }
+           }
+       }
+    }
+    genBase(o, next);
+}
+
+void State::emit(ostream &o){
+    o << "yy" << label << ":";
+    action->emit(o);
+}
+
+uint merge(Span *x0, State *fg, State *bg){
+    Span *x = x0, *f = fg->go.span, *b = bg->go.span;
+    uint nf = fg->go.nSpans, nb = bg->go.nSpans;
+    State *prev = NULL, *to;
+    // NB: we assume both spans are for same range
+    for(;;){
+       if(f->ub == b->ub){
+           to = f->to == b->to? bg : f->to;
+           if(to == prev){
+               --x;
+           } else {
+               x->to = prev = to;
+           }
+           x->ub = f->ub;
+           ++x; ++f; --nf; ++b; --nb;
+           if(nf == 0 && nb == 0)
+               return x - x0;
+       }
+       while(f->ub < b->ub){
+           to = f->to == b->to? bg : f->to;
+           if(to == prev){
+               --x;
+           } else {
+               x->to = prev = to;
+           }
+           x->ub = f->ub;
+           ++x; ++f; --nf;
+       }
+       while(b->ub < f->ub){
+           to = b->to == f->to? bg : f->to;
+           if(to == prev){
+               --x;
+           } else {
+               x->to = prev = to;
+           }
+           x->ub = b->ub;
+           ++x; ++b; --nb;
+       }
+    }
+}
+
+const uint cInfinity = ~0;
+
+class SCC {
+public:
+    State      **top, **stk;
+public:
+    SCC(uint);
+    ~SCC();
+    void traverse(State*);
+};
+
+SCC::SCC(uint size){
+    top = stk = new State*[size];
+}
+
+SCC::~SCC(){
+    delete [] stk;
+}
+
+void SCC::traverse(State *x){
+    *top = x;
+    uint k = ++top - stk;
+    x->depth = k;
+    for(uint i = 0; i < x->go.nSpans; ++i){
+       State *y = x->go.span[i].to;
+       if(y){
+           if(y->depth == 0)
+               traverse(y);
+           if(y->depth < x->depth)
+               x->depth = y->depth;
+       }
+    }
+    if(x->depth == k)
+       do {
+           (*--top)->depth = cInfinity;
+           (*top)->link = x;
+       } while(*top != x);
+}
+
+uint maxDist(State *s){
+    uint mm = 0;
+    for(uint i = 0; i < s->go.nSpans; ++i){
+       State *t = s->go.span[i].to;
+       if(t){
+           uint m = 1;
+           if(!t->link)
+               m += maxDist(t);
+           if(m > mm)
+               mm = m;
+       }
+    }
+    return mm;
+}
+
+void calcDepth(State *head){
+    State *t;
+    for(State *s = head; s; s = s->next){
+       if(s->link == s){
+           for(uint i = 0; i < s->go.nSpans; ++i){
+               t = s->go.span[i].to;
+               if(t && t->link == s)
+                   goto inSCC;
+           }
+           s->link = NULL;
+       } else {
+       inSCC:
+           s->depth = maxDist(s);
+       }
+    }
+}
+void DFA::findSCCs(){
+    SCC scc(nStates);
+    State *s;
+
+    for(s = head; s; s = s->next){
+       s->depth = 0;
+       s->link = NULL;
+    }
+
+    for(s = head; s; s = s->next)
+       if(!s->depth)
+           scc.traverse(s);
+
+    calcDepth(head);
+}
+
+void DFA::split(State *s){
+    State *move = new State;
+    (void) new Move(move);
+    addState(&s->next, move);
+    move->link = s->link;
+    move->rule = s->rule;
+    move->go = s->go;
+    s->rule = NULL;
+    s->go.nSpans = 1;
+    s->go.span = new Span[1];
+    s->go.span[0].ub = ubChar;
+    s->go.span[0].to = move;
+}
+
+void DFA::emit(ostream &o){
+    static uint label = 0;
+    State *s;
+    uint i;
+
+    findSCCs();
+    head->link = head;
+    head->depth = maxDist(head);
+
+    uint nRules = 0;
+    for(s = head; s; s = s->next)
+       if(s->rule && s->rule->accept >= nRules)
+               nRules = s->rule->accept + 1;
+
+    uint nSaves = 0;
+    uint *saves = new uint[nRules];
+    memset(saves, ~0, (nRules)*sizeof(*saves));
+
+    // mark backtracking points
+    for(s = head; s; s = s->next){
+       RuleOp *ignore = NULL;
+       if(s->rule){
+           for(i = 0; i < s->go.nSpans; ++i)
+               if(s->go.span[i].to && !s->go.span[i].to->rule){
+                   delete s->action;
+                   if(saves[s->rule->accept] == ~0u)
+                       saves[s->rule->accept] = nSaves++;
+                   (void) new Save(s, saves[s->rule->accept]);
+                   continue;
+               }
+           ignore = s->rule;
+       }
+    }
+
+    // insert actions
+    State **rules = new State*[nRules];
+    memset(rules, 0, (nRules)*sizeof(*rules));
+    State *accept = NULL;
+    for(s = head; s; s = s->next){
+       State *ow;
+       if(!s->rule){
+           ow = accept;
+       } else {
+           if(!rules[s->rule->accept]){
+               State *n = new State;
+               (void) new Rule(n, s->rule);
+               rules[s->rule->accept] = n;
+               addState(&s->next, n);
+           }
+           ow = rules[s->rule->accept];
+       }
+       for(i = 0; i < s->go.nSpans; ++i)
+           if(!s->go.span[i].to){
+               if(!ow){
+                   ow = accept = new State;
+                   (void) new Accept(accept, nRules, saves, rules);
+                   addState(&s->next, accept);
+               }
+               s->go.span[i].to = ow;
+           }
+    }
+
+    // split ``base'' states into two parts
+    for(s = head; s; s = s->next){
+       s->isBase = false;
+       if(s->link){
+           for(i = 0; i < s->go.nSpans; ++i){
+               if(s->go.span[i].to == s){
+                   s->isBase = true;
+                   split(s);
+                   if(bFlag)
+                       BitMap::find(&s->next->go, s);
+                   s = s->next;
+                   break;
+               }
+           }
+       }
+    }
+
+    // find ``base'' state, if possible
+    Span *span = new Span[ubChar - lbChar];
+    for(s = head; s; s = s->next){
+       if(!s->link){
+           for(i = 0; i < s->go.nSpans; ++i){
+               State *to = s->go.span[i].to;
+               if(to && to->isBase){
+                   to = to->go.span[0].to;
+                   uint nSpans = merge(span, s, to);
+                   if(nSpans < s->go.nSpans){
+                       delete [] s->go.span;
+                       s->go.nSpans = nSpans;
+                       s->go.span = new Span[nSpans];
+                       memcpy(s->go.span, span, nSpans*sizeof(Span));
+                   }
+                   break;
+               }
+           }
+       }
+    }
+    delete [] span;
+
+    delete head->action;
+
+    o << "{\n\tYYCTYPE yych;\n\tunsigned int yyaccept;\n";
+
+    if(bFlag)
+       BitMap::gen(o, lbChar, ubChar);
+
+    o << "\tgoto yy" << label << ";\n";
+    (void) new Enter(head, label++);
+
+    for(s = head; s; s = s->next)
+       s->label = label++;
+
+    for(s = head; s; s = s->next){
+       s->emit(o);
+       s->go.genGoto(o, s->next);
+    }
+    o << "}\n";
+
+    BitMap::first = NULL;
+
+    delete [] saves;
+    delete [] rules;
+}
diff --git a/dfa.cc b/dfa.cc
new file mode 100644 (file)
index 0000000..c1ff054
--- /dev/null
+++ b/dfa.cc
@@ -0,0 +1,222 @@
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "globals.h"
+#include "substr.h"
+#include "dfa.h"
+
+inline char octCh(uint c){
+    return '0' + c%8;
+}
+
+void prtCh(ostream &o, uchar c){
+    uchar oc = talx[c];
+    switch(oc){
+    case '\'': o << "\\'"; break;
+    case '\n': o << "\\n"; break;
+    case '\t': o << "\\t"; break;
+    case '\v': o << "\\v"; break;
+    case '\b': o << "\\b"; break;
+    case '\r': o << "\\r"; break;
+    case '\f': o << "\\f"; break;
+    case '\a': o << "\\a"; break;
+    case '\\': o << "\\\\"; break;
+    default:
+       if(isprint(oc))
+           o << (char) oc;
+       else
+           o << '\\' << octCh(c/64) << octCh(c/8) << octCh(c);
+    }
+}
+
+void printSpan(ostream &o, uint lb, uint ub){
+    if(lb > ub)
+       o << "*";
+    o << "[";
+    if((ub - lb) == 1){
+       prtCh(o, lb);
+    } else {
+       prtCh(o, lb);
+       o << "-";
+       prtCh(o, ub-1);
+    }
+    o << "]";
+}
+
+uint Span::show(ostream &o, uint lb){
+    if(to){
+       printSpan(o, lb, ub);
+       o << " " << to->label << "; ";
+    }
+    return ub;
+}
+
+ostream& operator<<(ostream &o, const State &s){
+    o << "state " << s.label;
+    if(s.rule)
+       o << " accepts " << s.rule->accept;
+    o << "\n";
+    uint lb = 0;
+    for(uint i = 0; i < s.go.nSpans; ++i)
+       lb = s.go.span[i].show(o, lb);
+    return o;
+}
+
+ostream& operator<<(ostream &o, const DFA &dfa){
+    for(State *s = dfa.head; s; s = s->next)
+       o << s << "\n\n";
+    return o;
+}
+
+State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) {
+    go.nSpans = 0;
+    go.span = NULL;
+}
+
+State::~State(){
+    delete [] kernel;
+    delete [] go.span;
+}
+
+static Ins **closure(Ins **cP, Ins *i){
+    while(!isMarked(i)){
+       mark(i);
+       *(cP++) = i;
+       if(i->i.tag == FORK){
+           cP = closure(cP, i + 1);
+           i = (Ins*) i->i.link;
+       } else if(i->i.tag == GOTO){
+           i = (Ins*) i->i.link;
+       } else
+           break;
+    }
+    return cP;
+}
+
+struct GoTo {
+    Char       ch;
+    void       *to;
+};
+
+DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep)
+    : lbChar(lb), ubChar(ub) {
+    Ins **work = new Ins*[ni+1];
+    uint nc = ub - lb;
+    GoTo *goTo = new GoTo[nc];
+    Span *span = new Span[nc];
+    memset((char*) goTo, 0, nc*sizeof(GoTo));
+    tail = &head;
+    head = NULL;
+    nStates = 0;
+    toDo = NULL;
+    findState(work, closure(work, &ins[0]) - work);
+    while(toDo){
+       State *s = toDo;
+       toDo = s->link;
+
+       Ins **cP, **iP, *i;
+       uint nGoTos = 0;
+       uint j;
+
+       s->rule = NULL;
+       for(iP = s->kernel; (i = *iP); ++iP){
+           if(i->i.tag == CHAR){
+               for(Ins *j = i + 1; j < (Ins*) i->i.link; ++j){
+                   if(!(j->c.link = goTo[j->c.value - lb].to))
+                       goTo[nGoTos++].ch = j->c.value;
+                   goTo[j->c.value - lb].to = j;
+               }
+           } else if(i->i.tag == TERM){
+               if(!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept)
+                   s->rule = (RuleOp*) i->i.link;
+           }
+       }
+
+       for(j = 0; j < nGoTos; ++j){
+           GoTo *go = &goTo[goTo[j].ch - lb];
+           i = (Ins*) go->to;
+           for(cP = work; i; i = (Ins*) i->c.link)
+               cP = closure(cP, i + i->c.bump);
+           go->to = findState(work, cP - work);
+       }
+
+       s->go.nSpans = 0;
+       for(j = 0; j < nc;){
+           State *to = (State*) goTo[rep[j]].to;
+           while(++j < nc && goTo[rep[j]].to == to);
+           span[s->go.nSpans].ub = lb + j;
+           span[s->go.nSpans].to = to;
+           s->go.nSpans++;
+       }
+
+       for(j = nGoTos; j-- > 0;)
+           goTo[goTo[j].ch - lb].to = NULL;
+
+       s->go.span = new Span[s->go.nSpans];
+       memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
+
+       (void) new Match(s);
+
+    }
+    delete [] work;
+    delete [] goTo;
+    delete [] span;
+}
+
+DFA::~DFA(){
+    State *s;
+    while((s = head)){
+       head = s->next;
+       delete s;
+    }
+}
+
+void DFA::addState(State **a, State *s){
+    s->label = nStates++;
+    s->next = *a;
+    *a = s;
+    if(a == tail)
+       tail = &s->next;
+}
+
+State *DFA::findState(Ins **kernel, uint kCount){
+    Ins **cP, **iP, *i;
+    State *s;
+
+    kernel[kCount] = NULL;
+
+    cP = kernel;
+    for(iP = kernel; (i = *iP); ++iP){
+        if(i->i.tag == CHAR || i->i.tag == TERM){
+            *cP++ = i;
+       } else {
+            unmark(i);
+       }
+    }
+    kCount = cP - kernel;
+    kernel[kCount] = NULL;
+
+    for(s = head; s; s = s->next){
+        if(s->kCount == kCount){
+            for(iP = s->kernel; (i = *iP); ++iP)
+                if(!isMarked(i))
+                    goto nextState;
+            goto unmarkAll;
+        }
+        nextState:;
+    }
+
+    s = new State;
+    addState(tail, s);
+    s->kCount = kCount;
+    s->kernel = new Ins*[kCount+1];
+    memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*));
+    s->link = toDo;
+    toDo = s;
+
+unmarkAll:
+    for(iP = kernel; (i = *iP); ++iP)
+        unmark(i);
+
+    return s;
+}
diff --git a/dfa.h b/dfa.h
new file mode 100644 (file)
index 0000000..edd018c
--- /dev/null
+++ b/dfa.h
@@ -0,0 +1,149 @@
+#ifndef _dfa_h
+#define _dfa_h
+
+#include <iostream.h>
+#include "re.h"
+
+extern void prtCh(ostream&, uchar);
+extern void printSpan(ostream&, uint, uint);
+
+class DFA;
+class State;
+
+class Action {
+public:
+    State              *state;
+public:
+    Action(State*);
+    virtual void emit(ostream&) = 0;
+};
+
+class Match: public Action {
+public:
+    Match(State*);
+    void emit(ostream&);
+};
+
+class Enter: public Action {
+public:
+    uint               label;
+public:
+    Enter(State*, uint);
+    void emit(ostream&);
+};
+
+class Save: public Match {
+public:
+    uint               selector;
+public:
+    Save(State*, uint);
+    void emit(ostream&);
+};
+
+class Move: public Action {
+public:
+    Move(State*);
+    void emit(ostream&);
+};
+
+class Accept: public Action {
+public:
+    uint               nRules;
+    uint               *saves;
+    State              **rules;
+public:
+    Accept(State*, uint, uint*, State**);
+    void emit(ostream&);
+};
+
+class Rule: public Action {
+public:
+    RuleOp             *rule;
+public:
+    Rule(State*, RuleOp*);
+    void emit(ostream&);
+};
+
+class Span {
+public:
+    uint               ub;
+    State              *to;
+public:
+    uint show(ostream&, uint);
+};
+
+class Go {
+public:
+    uint               nSpans;
+    Span               *span;
+public:
+    void genGoto(ostream&, State*);
+    void genBase(ostream&, State*);
+    void genLinear(ostream&, State*);
+    void genBinary(ostream&, State*);
+    void genSwitch(ostream&, State*);
+    void compact();
+    void unmap(Go*, State*);
+};
+
+class State {
+public:
+    uint               label;
+    RuleOp             *rule;
+    State              *next;
+    State              *link;
+    uint               depth;          // for finding SCCs
+    uint               kCount;
+    Ins                        **kernel;
+    bool               isBase:1;
+    Go                 go;
+    Action             *action;
+public:
+    State();
+    ~State();
+    void emit(ostream&);
+    friend ostream& operator<<(ostream&, const State&);
+    friend ostream& operator<<(ostream&, const State*);
+};
+
+class DFA {
+public:
+    uint               lbChar;
+    uint               ubChar;
+    uint               nStates;
+    State              *head, **tail;
+    State              *toDo;
+public:
+    DFA(Ins*, uint, uint, uint, Char*);
+    ~DFA();
+    void addState(State**, State*);
+    State *findState(Ins**, uint);
+    void split(State*);
+
+    void findSCCs();
+    void emit(ostream&);
+
+    friend ostream& operator<<(ostream&, const DFA&);
+    friend ostream& operator<<(ostream&, const DFA*);
+};
+
+inline Action::Action(State *s) : state(s) {
+    s->action = this;
+}
+
+inline Match::Match(State *s) : Action(s)
+    { }
+
+inline Enter::Enter(State *s, uint l) : Action(s), label(l)
+    { }
+
+inline Save::Save(State *s, uint i) : Match(s), selector(i)
+    { }
+
+inline ostream& operator<<(ostream &o, const State *s)
+    { return o << *s; }
+
+inline ostream& operator<<(ostream &o, const DFA *dfa)
+    { return o << *dfa; }
+
+#endif
diff --git a/doc/loplas.ps.gz b/doc/loplas.ps.gz
new file mode 100644 (file)
index 0000000..b0be76b
Binary files /dev/null and b/doc/loplas.ps.gz differ
diff --git a/doc/sample.bib b/doc/sample.bib
new file mode 100644 (file)
index 0000000..1f34ab1
--- /dev/null
@@ -0,0 +1,48 @@
+@Article{Bumbulis94,
+  author = {Peter Bumbulis and Donald D. Cowan},
+  title = {RE2C -- A More Versatile Scanner Generator},
+  journal = "ACM Letters on Programming Languages and Systems",
+  volume = 2,
+  number = "1--4",
+  year = 1994,
+  abstract = {
+  It is usually claimed that lexical analysis routines are still coded by
+  hand, despite the widespread availability of scanner generators, for
+  efficiency reasons.  While efficiency is a consideration, there exist
+  freely available scanner generators such as GLA \cite{Gray88} that can
+  generate scanners that are faster than most hand-coded ones.  However,
+  most generated scanners are tailored for a particular environment, and
+  retargetting these scanners to other environments, if possible, is
+  usually complex enough to make a hand-coded scanner more appealing.  In
+  this paper we describe RE2C, a scanner generator that not only generates
+  scanners which are faster (and usually smaller) than those produced by
+  any other scanner generator known to the authors, including GLA, but
+  also adapt easily to any environment.
+  }
+}
+@Article{Gray88,
+  author = {Robert W. Gray},
+  title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That
+  Programmers Can Use},
+  journal = {USENIX Conference Proceedings},
+  year = {1988},
+  month = {June},
+  pages = {147-160},
+  abstract = {Writing an efficient lexical analyzer for even a simple
+  language is not a trivial task, and should not be done by hand. We
+  describe GLA, a tool that generates very efficient scanners. These
+  scanners do not use the conventional transition matrix, but instead
+  use a few 128 element vectors.  Scanning time is only slightly
+  greater than the absolute minimum --- the time it takes to look at
+  each character in a file. The GLA language allows simple, concise
+  specification of scanners. Augmenting regular expressions with
+  auxiliary scanners easily handles nasty problems such as C comments
+  and C literal constants. We formalize the connection between token
+  scanning and token processing by associating a processor with
+  appropriate patterns. A library of canned descriptions simplifies the
+  specification of commonly used language pieces --- such as,
+  C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully
+  tuned lexical analysis support modules are provided for error
+  handling, input buffering, storing identifiers in hash tables and
+  manipulating denotations.}
+}
diff --git a/examples/basemmap.c b/examples/basemmap.c
new file mode 100644 (file)
index 0000000..3e5b037
--- /dev/null
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#ifndef        MAP_NORESERVE
+#define        MAP_NORESERVE   0
+#endif
+
+volatile char ch;
+
+main(){
+    struct stat statbuf;
+    uchar *buf;
+    fstat(0, &statbuf);
+    buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+       0, 0);
+    if(buf != (uchar*)(-1)){
+       uchar *cur, *lim = &buf[statbuf.st_size];
+       for(cur = buf; buf != lim; ++cur){
+           ch = *cur;
+       }
+       munmap(buf, statbuf.st_size);
+    }
+}
diff --git a/examples/c.re b/examples/c.re
new file mode 100644 (file)
index 0000000..419964f
--- /dev/null
@@ -0,0 +1,272 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define        ADDEQ   257
+#define        ANDAND  258
+#define        ANDEQ   259
+#define        ARRAY   260
+#define        ASM     261
+#define        AUTO    262
+#define        BREAK   263
+#define        CASE    264
+#define        CHAR    265
+#define        CONST   266
+#define        CONTINUE        267
+#define        DECR    268
+#define        DEFAULT 269
+#define        DEREF   270
+#define        DIVEQ   271
+#define        DO      272
+#define        DOUBLE  273
+#define        ELLIPSIS        274
+#define        ELSE    275
+#define        ENUM    276
+#define        EQL     277
+#define        EXTERN  278
+#define        FCON    279
+#define        FLOAT   280
+#define        FOR     281
+#define        FUNCTION        282
+#define        GEQ     283
+#define        GOTO    284
+#define        ICON    285
+#define        ID      286
+#define        IF      287
+#define        INCR    288
+#define        INT     289
+#define        LEQ     290
+#define        LONG    291
+#define        LSHIFT  292
+#define        LSHIFTEQ        293
+#define        MODEQ   294
+#define        MULEQ   295
+#define        NEQ     296
+#define        OREQ    297
+#define        OROR    298
+#define        POINTER 299
+#define        REGISTER        300
+#define        RETURN  301
+#define        RSHIFT  302
+#define        RSHIFTEQ        303
+#define        SCON    304
+#define        SHORT   305
+#define        SIGNED  306
+#define        SIZEOF  307
+#define        STATIC  308
+#define        STRUCT  309
+#define        SUBEQ   310
+#define        SWITCH  311
+#define        TYPEDEF 312
+#define        UNION   313
+#define        UNSIGNED        314
+#define        VOID    315
+#define        VOLATILE        316
+#define        WHILE   317
+#define        XOREQ   318
+#define        EOI     319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         s->lim
+#define        YYMARKER        s->ptr
+#define        YYFILL(n)       {cursor = fill(s, cursor);}
+
+#define        RET(i)  {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    int                        fd;
+    uchar              *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint               line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+       uint cnt = s->tok - s->bot;
+       if(cnt){
+           memcpy(s->bot, s->tok, s->lim - s->tok);
+           s->tok = s->bot;
+           s->ptr -= cnt;
+           cursor -= cnt;
+           s->pos -= cnt;
+           s->lim -= cnt;
+       }
+       if((s->top - s->lim) < BSIZE){
+           uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+           memcpy(buf, s->tok, s->lim - s->tok);
+           s->tok = buf;
+           s->ptr = &buf[s->ptr - s->bot];
+           cursor = &buf[cursor - s->bot];
+           s->pos = &buf[s->pos - s->bot];
+           s->lim = &buf[s->lim - s->bot];
+           s->top = &s->lim[BSIZE];
+           free(s->bot);
+           s->bot = buf;
+       }
+       if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+           s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+       }
+       s->lim += cnt;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+       uchar *cursor = s->cur;
+std:
+       s->tok = cursor;
+/*!re2c
+any    = [\000-\377];
+O      = [0-7];
+D      = [0-9];
+L      = [a-zA-Z_];
+H      = [a-fA-F0-9];
+E      = [Ee] [+-]? D+;
+FS     = [fFlL];
+IS     = [uUlL]*;
+ESC    = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+       "/*"                    { goto comment; }
+       
+       "auto"                  { RET(AUTO); }
+       "break"                 { RET(BREAK); }
+       "case"                  { RET(CASE); }
+       "char"                  { RET(CHAR); }
+       "const"                 { RET(CONST); }
+       "continue"              { RET(CONTINUE); }
+       "default"               { RET(DEFAULT); }
+       "do"                    { RET(DO); }
+       "double"                { RET(DOUBLE); }
+       "else"                  { RET(ELSE); }
+       "enum"                  { RET(ENUM); }
+       "extern"                { RET(EXTERN); }
+       "float"                 { RET(FLOAT); }
+       "for"                   { RET(FOR); }
+       "goto"                  { RET(GOTO); }
+       "if"                    { RET(IF); }
+       "int"                   { RET(INT); }
+       "long"                  { RET(LONG); }
+       "register"              { RET(REGISTER); }
+       "return"                { RET(RETURN); }
+       "short"                 { RET(SHORT); }
+       "signed"                { RET(SIGNED); }
+       "sizeof"                { RET(SIZEOF); }
+       "static"                { RET(STATIC); }
+       "struct"                { RET(STRUCT); }
+       "switch"                { RET(SWITCH); }
+       "typedef"               { RET(TYPEDEF); }
+       "union"                 { RET(UNION); }
+       "unsigned"              { RET(UNSIGNED); }
+       "void"                  { RET(VOID); }
+       "volatile"              { RET(VOLATILE); }
+       "while"                 { RET(WHILE); }
+       
+       L (L|D)*                { RET(ID); }
+       
+       ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+       (['] (ESC|any\[\n\\'])* ['])
+                               { RET(ICON); }
+       
+       (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                               { RET(FCON); }
+       
+       (["] (ESC|any\[\n\\"])* ["])
+                               { RET(SCON); }
+       
+       "..."                   { RET(ELLIPSIS); }
+       ">>="                   { RET(RSHIFTEQ); }
+       "<<="                   { RET(LSHIFTEQ); }
+       "+="                    { RET(ADDEQ); }
+       "-="                    { RET(SUBEQ); }
+       "*="                    { RET(MULEQ); }
+       "/="                    { RET(DIVEQ); }
+       "%="                    { RET(MODEQ); }
+       "&="                    { RET(ANDEQ); }
+       "^="                    { RET(XOREQ); }
+       "|="                    { RET(OREQ); }
+       ">>"                    { RET(RSHIFT); }
+       "<<"                    { RET(LSHIFT); }
+       "++"                    { RET(INCR); }
+       "--"                    { RET(DECR); }
+       "->"                    { RET(DEREF); }
+       "&&"                    { RET(ANDAND); }
+       "||"                    { RET(OROR); }
+       "<="                    { RET(LEQ); }
+       ">="                    { RET(GEQ); }
+       "=="                    { RET(EQL); }
+       "!="                    { RET(NEQ); }
+       ";"                     { RET(';'); }
+       "{"                     { RET('{'); }
+       "}"                     { RET('}'); }
+       ","                     { RET(','); }
+       ":"                     { RET(':'); }
+       "="                     { RET('='); }
+       "("                     { RET('('); }
+       ")"                     { RET(')'); }
+       "["                     { RET('['); }
+       "]"                     { RET(']'); }
+       "."                     { RET('.'); }
+       "&"                     { RET('&'); }
+       "!"                     { RET('!'); }
+       "~"                     { RET('~'); }
+       "-"                     { RET('-'); }
+       "+"                     { RET('+'); }
+       "*"                     { RET('*'); }
+       "/"                     { RET('/'); }
+       "%"                     { RET('%'); }
+       "<"                     { RET('<'); }
+       ">"                     { RET('>'); }
+       "^"                     { RET('^'); }
+       "|"                     { RET('|'); }
+       "?"                     { RET('?'); }
+
+
+       [ \t\v\f]+              { goto std; }
+
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->pos = cursor; s->line++;
+               goto std;
+           }
+
+       any
+           {
+               printf("unexpected character: %c\n", *s->tok);
+               goto std;
+           }
+*/
+
+comment:
+/*!re2c
+       "*/"                    { goto std; }
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->tok = s->pos = cursor; s->line++;
+               goto comment;
+           }
+        any                    { goto comment; }
+*/
+}
+
+main(){
+    Scanner in;
+    int t;
+    memset((char*) &in, 0, sizeof(in));
+    in.fd = 0;
+    while((t = scan(&in)) != EOI){
+/*
+       printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+       printf("%d\n", t);
+*/
+    }
+    close(in.fd);
+}
diff --git a/examples/cmmap.re b/examples/cmmap.re
new file mode 100644 (file)
index 0000000..bc4d498
--- /dev/null
@@ -0,0 +1,267 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define        ADDEQ   257
+#define        ANDAND  258
+#define        ANDEQ   259
+#define        ARRAY   260
+#define        ASM     261
+#define        AUTO    262
+#define        BREAK   263
+#define        CASE    264
+#define        CHAR    265
+#define        CONST   266
+#define        CONTINUE        267
+#define        DECR    268
+#define        DEFAULT 269
+#define        DEREF   270
+#define        DIVEQ   271
+#define        DO      272
+#define        DOUBLE  273
+#define        ELLIPSIS        274
+#define        ELSE    275
+#define        ENUM    276
+#define        EQL     277
+#define        EXTERN  278
+#define        FCON    279
+#define        FLOAT   280
+#define        FOR     281
+#define        FUNCTION        282
+#define        GEQ     283
+#define        GOTO    284
+#define        ICON    285
+#define        ID      286
+#define        IF      287
+#define        INCR    288
+#define        INT     289
+#define        LEQ     290
+#define        LONG    291
+#define        LSHIFT  292
+#define        LSHIFTEQ        293
+#define        MODEQ   294
+#define        MULEQ   295
+#define        NEQ     296
+#define        OREQ    297
+#define        OROR    298
+#define        POINTER 299
+#define        REGISTER        300
+#define        RETURN  301
+#define        RSHIFT  302
+#define        RSHIFTEQ        303
+#define        SCON    304
+#define        SHORT   305
+#define        SIGNED  306
+#define        SIZEOF  307
+#define        STATIC  308
+#define        STRUCT  309
+#define        SUBEQ   310
+#define        SWITCH  311
+#define        TYPEDEF 312
+#define        UNION   313
+#define        UNSIGNED        314
+#define        VOID    315
+#define        VOLATILE        316
+#define        WHILE   317
+#define        XOREQ   318
+#define        EOI     319
+
+typedef unsigned int unint;
+typedef unsigned char uchar;
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         s->lim
+#define        YYMARKER        s->ptr
+#define        YYFILL(n)       {cursor = fill(s, cursor);}
+
+#define        RET(i)  {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    uchar              *tok, *ptr, *cur, *pos, *lim, *eof;
+    unint              line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+       unint cnt = s->lim - s->tok;
+       uchar *buf = malloc((cnt + 1)*sizeof(uchar));
+       memcpy(buf, s->tok, cnt);
+       cursor = &buf[cursor - s->tok];
+       s->pos = &buf[s->pos - s->tok];
+       s->ptr = &buf[s->ptr - s->tok];
+       s->lim = &buf[cnt];
+       s->eof = s->lim; *(s->eof)++ = '\n';
+       s->tok = buf;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+       uchar *cursor = s->cur;
+std:
+       s->tok = cursor;
+/*!re2c
+any    = [\000-\377];
+O      = [0-7];
+D      = [0-9];
+L      = [a-zA-Z_];
+H      = [a-fA-F0-9];
+E      = [Ee] [+-]? D+;
+FS     = [fFlL];
+IS     = [uUlL]*;
+ESC    = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+       "/*"                    { goto comment; }
+       
+       "auto"                  { RET(AUTO); }
+       "break"                 { RET(BREAK); }
+       "case"                  { RET(CASE); }
+       "char"                  { RET(CHAR); }
+       "const"                 { RET(CONST); }
+       "continue"              { RET(CONTINUE); }
+       "default"               { RET(DEFAULT); }
+       "do"                    { RET(DO); }
+       "double"                { RET(DOUBLE); }
+       "else"                  { RET(ELSE); }
+       "enum"                  { RET(ENUM); }
+       "extern"                { RET(EXTERN); }
+       "float"                 { RET(FLOAT); }
+       "for"                   { RET(FOR); }
+       "goto"                  { RET(GOTO); }
+       "if"                    { RET(IF); }
+       "int"                   { RET(INT); }
+       "long"                  { RET(LONG); }
+       "register"              { RET(REGISTER); }
+       "return"                { RET(RETURN); }
+       "short"                 { RET(SHORT); }
+       "signed"                { RET(SIGNED); }
+       "sizeof"                { RET(SIZEOF); }
+       "static"                { RET(STATIC); }
+       "struct"                { RET(STRUCT); }
+       "switch"                { RET(SWITCH); }
+       "typedef"               { RET(TYPEDEF); }
+       "union"                 { RET(UNION); }
+       "unsigned"              { RET(UNSIGNED); }
+       "void"                  { RET(VOID); }
+       "volatile"              { RET(VOLATILE); }
+       "while"                 { RET(WHILE); }
+       
+       L (L|D)*                { RET(ID); }
+       
+       ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+       (['] (ESC|any\[\n\\'])* ['])
+                               { RET(ICON); }
+       
+       (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                               { RET(FCON); }
+       
+       (["] (ESC|any\[\n\\"])* ["])
+                               { RET(SCON); }
+       
+       "..."                   { RET(ELLIPSIS); }
+       ">>="                   { RET(RSHIFTEQ); }
+       "<<="                   { RET(LSHIFTEQ); }
+       "+="                    { RET(ADDEQ); }
+       "-="                    { RET(SUBEQ); }
+       "*="                    { RET(MULEQ); }
+       "/="                    { RET(DIVEQ); }
+       "%="                    { RET(MODEQ); }
+       "&="                    { RET(ANDEQ); }
+       "^="                    { RET(XOREQ); }
+       "|="                    { RET(OREQ); }
+       ">>"                    { RET(RSHIFT); }
+       "<<"                    { RET(LSHIFT); }
+       "++"                    { RET(INCR); }
+       "--"                    { RET(DECR); }
+       "->"                    { RET(DEREF); }
+       "&&"                    { RET(ANDAND); }
+       "||"                    { RET(OROR); }
+       "<="                    { RET(LEQ); }
+       ">="                    { RET(GEQ); }
+       "=="                    { RET(EQL); }
+       "!="                    { RET(NEQ); }
+       ";"                     { RET(';'); }
+       "{"                     { RET('{'); }
+       "}"                     { RET('}'); }
+       ","                     { RET(','); }
+       ":"                     { RET(':'); }
+       "="                     { RET('='); }
+       "("                     { RET('('); }
+       ")"                     { RET(')'); }
+       "["                     { RET('['); }
+       "]"                     { RET(']'); }
+       "."                     { RET('.'); }
+       "&"                     { RET('&'); }
+       "!"                     { RET('!'); }
+       "~"                     { RET('~'); }
+       "-"                     { RET('-'); }
+       "+"                     { RET('+'); }
+       "*"                     { RET('*'); }
+       "/"                     { RET('/'); }
+       "%"                     { RET('%'); }
+       "<"                     { RET('<'); }
+       ">"                     { RET('>'); }
+       "^"                     { RET('^'); }
+       "|"                     { RET('|'); }
+       "?"                     { RET('?'); }
+
+
+       [ \t\v\f]+              { goto std; }
+
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->pos = cursor; s->line++;
+               goto std;
+           }
+
+       any
+           {
+               printf("unexpected character: %c\n", *s->tok);
+               goto std;
+           }
+*/
+
+comment:
+/*!re2c
+       "*/"                    { goto std; }
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->tok = s->pos = cursor; s->line++;
+               goto comment;
+           }
+        any                    { goto comment; }
+*/
+}
+
+#ifndef        MAP_NORESERVE
+#define        MAP_NORESERVE   0
+#endif
+
+main(){
+    Scanner in;
+    struct stat statbuf;
+    uchar *buf;
+    fstat(0, &statbuf);
+    buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+       0, 0);
+    if(buf != (uchar*)(-1)){
+       int t;
+       in.lim = &(in.cur = buf)[statbuf.st_size];
+       in.pos = NULL;
+       in.eof = NULL;
+       while((t = scan(&in)) != EOI){
+/*
+           printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+           printf("%d\n", t);
+*/
+       }
+       munmap(buf, statbuf.st_size);
+    }
+}
diff --git a/examples/cnokw.re b/examples/cnokw.re
new file mode 100644 (file)
index 0000000..bdc1279
--- /dev/null
@@ -0,0 +1,239 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define        ADDEQ   257
+#define        ANDAND  258
+#define        ANDEQ   259
+#define        ARRAY   260
+#define        ASM     261
+#define        AUTO    262
+#define        BREAK   263
+#define        CASE    264
+#define        CHAR    265
+#define        CONST   266
+#define        CONTINUE        267
+#define        DECR    268
+#define        DEFAULT 269
+#define        DEREF   270
+#define        DIVEQ   271
+#define        DO      272
+#define        DOUBLE  273
+#define        ELLIPSIS        274
+#define        ELSE    275
+#define        ENUM    276
+#define        EQL     277
+#define        EXTERN  278
+#define        FCON    279
+#define        FLOAT   280
+#define        FOR     281
+#define        FUNCTION        282
+#define        GEQ     283
+#define        GOTO    284
+#define        ICON    285
+#define        ID      286
+#define        IF      287
+#define        INCR    288
+#define        INT     289
+#define        LEQ     290
+#define        LONG    291
+#define        LSHIFT  292
+#define        LSHIFTEQ        293
+#define        MODEQ   294
+#define        MULEQ   295
+#define        NEQ     296
+#define        OREQ    297
+#define        OROR    298
+#define        POINTER 299
+#define        REGISTER        300
+#define        RETURN  301
+#define        RSHIFT  302
+#define        RSHIFTEQ        303
+#define        SCON    304
+#define        SHORT   305
+#define        SIGNED  306
+#define        SIZEOF  307
+#define        STATIC  308
+#define        STRUCT  309
+#define        SUBEQ   310
+#define        SWITCH  311
+#define        TYPEDEF 312
+#define        UNION   313
+#define        UNSIGNED        314
+#define        VOID    315
+#define        VOLATILE        316
+#define        WHILE   317
+#define        XOREQ   318
+#define        EOI     319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         s->lim
+#define        YYMARKER        s->ptr
+#define        YYFILL(n)       {cursor = fill(s, cursor);}
+
+#define        RET(i)  {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    int                        fd;
+    uchar              *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint               line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+       uint cnt = s->tok - s->bot;
+       if(cnt){
+           memcpy(s->bot, s->tok, s->lim - s->tok);
+           s->tok = s->bot;
+           s->ptr -= cnt;
+           cursor -= cnt;
+           s->pos -= cnt;
+           s->lim -= cnt;
+       }
+       if((s->top - s->lim) < BSIZE){
+           uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+           memcpy(buf, s->tok, s->lim - s->tok);
+           s->tok = buf;
+           s->ptr = &buf[s->ptr - s->bot];
+           cursor = &buf[cursor - s->bot];
+           s->pos = &buf[s->pos - s->bot];
+           s->lim = &buf[s->lim - s->bot];
+           s->top = &s->lim[BSIZE];
+           free(s->bot);
+           s->bot = buf;
+       }
+       if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+           s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+       }
+       s->lim += cnt;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+       uchar *cursor = s->cur;
+std:
+       s->tok = cursor;
+/*!re2c
+any    = [\000-\377];
+O      = [0-7];
+D      = [0-9];
+L      = [a-zA-Z_];
+H      = [a-fA-F0-9];
+E      = [Ee] [+-]? D+;
+FS     = [fFlL];
+IS     = [uUlL]*;
+ESC    = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+       "/*"                    { goto comment; }
+       
+       L (L|D)*                { RET(ID); }
+       
+       ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+       (['] (ESC|any\[\n\\'])* ['])
+                               { RET(ICON); }
+       
+       (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                               { RET(FCON); }
+       
+       (["] (ESC|any\[\n\\"])* ["])
+                               { RET(SCON); }
+       
+       "..."                   { RET(ELLIPSIS); }
+       ">>="                   { RET(RSHIFTEQ); }
+       "<<="                   { RET(LSHIFTEQ); }
+       "+="                    { RET(ADDEQ); }
+       "-="                    { RET(SUBEQ); }
+       "*="                    { RET(MULEQ); }
+       "/="                    { RET(DIVEQ); }
+       "%="                    { RET(MODEQ); }
+       "&="                    { RET(ANDEQ); }
+       "^="                    { RET(XOREQ); }
+       "|="                    { RET(OREQ); }
+       ">>"                    { RET(RSHIFT); }
+       "<<"                    { RET(LSHIFT); }
+       "++"                    { RET(INCR); }
+       "--"                    { RET(DECR); }
+       "->"                    { RET(DEREF); }
+       "&&"                    { RET(ANDAND); }
+       "||"                    { RET(OROR); }
+       "<="                    { RET(LEQ); }
+       ">="                    { RET(GEQ); }
+       "=="                    { RET(EQL); }
+       "!="                    { RET(NEQ); }
+       ";"                     { RET(';'); }
+       "{"                     { RET('{'); }
+       "}"                     { RET('}'); }
+       ","                     { RET(','); }
+       ":"                     { RET(':'); }
+       "="                     { RET('='); }
+       "("                     { RET('('); }
+       ")"                     { RET(')'); }
+       "["                     { RET('['); }
+       "]"                     { RET(']'); }
+       "."                     { RET('.'); }
+       "&"                     { RET('&'); }
+       "!"                     { RET('!'); }
+       "~"                     { RET('~'); }
+       "-"                     { RET('-'); }
+       "+"                     { RET('+'); }
+       "*"                     { RET('*'); }
+       "/"                     { RET('/'); }
+       "%"                     { RET('%'); }
+       "<"                     { RET('<'); }
+       ">"                     { RET('>'); }
+       "^"                     { RET('^'); }
+       "|"                     { RET('|'); }
+       "?"                     { RET('?'); }
+
+
+       [ \t\v\f]+              { goto std; }
+
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->pos = cursor; s->line++;
+               goto std;
+           }
+
+       any
+           {
+               printf("unexpected character: %c\n", *s->tok);
+               goto std;
+           }
+*/
+
+comment:
+/*!re2c
+       "*/"                    { goto std; }
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->tok = s->pos = cursor; s->line++;
+               goto comment;
+           }
+        any                    { goto comment; }
+*/
+}
+
+main(){
+    Scanner in;
+    int t;
+    memset((char*) &in, 0, sizeof(in));
+    in.fd = 0;
+    while((t = scan(&in)) != EOI){
+/*
+       printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+       printf("%d\n", t);
+*/
+    }
+    close(in.fd);
+}
diff --git a/examples/cunroll.re b/examples/cunroll.re
new file mode 100644 (file)
index 0000000..dd9d805
--- /dev/null
@@ -0,0 +1,258 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define        ADDEQ   257
+#define        ANDAND  258
+#define        ANDEQ   259
+#define        ARRAY   260
+#define        ASM     261
+#define        AUTO    262
+#define        BREAK   263
+#define        CASE    264
+#define        CHAR    265
+#define        CONST   266
+#define        CONTINUE        267
+#define        DECR    268
+#define        DEFAULT 269
+#define        DEREF   270
+#define        DIVEQ   271
+#define        DO      272
+#define        DOUBLE  273
+#define        ELLIPSIS        274
+#define        ELSE    275
+#define        ENUM    276
+#define        EQL     277
+#define        EXTERN  278
+#define        FCON    279
+#define        FLOAT   280
+#define        FOR     281
+#define        FUNCTION        282
+#define        GEQ     283
+#define        GOTO    284
+#define        ICON    285
+#define        ID      286
+#define        IF      287
+#define        INCR    288
+#define        INT     289
+#define        LEQ     290
+#define        LONG    291
+#define        LSHIFT  292
+#define        LSHIFTEQ        293
+#define        MODEQ   294
+#define        MULEQ   295
+#define        NEQ     296
+#define        OREQ    297
+#define        OROR    298
+#define        POINTER 299
+#define        REGISTER        300
+#define        RETURN  301
+#define        RSHIFT  302
+#define        RSHIFTEQ        303
+#define        SCON    304
+#define        SHORT   305
+#define        SIGNED  306
+#define        SIZEOF  307
+#define        STATIC  308
+#define        STRUCT  309
+#define        SUBEQ   310
+#define        SWITCH  311
+#define        TYPEDEF 312
+#define        UNION   313
+#define        UNSIGNED        314
+#define        VOID    315
+#define        VOLATILE        316
+#define        WHILE   317
+#define        XOREQ   318
+#define        EOI     319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         s->lim
+#define        YYMARKER        s->ptr
+#define        YYFILL(n)       {cursor = fill(s, cursor);}
+
+#define        RET(i)  {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    int                        fd;
+    uchar              *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint               line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+       uint cnt = s->tok - s->bot;
+       if(cnt){
+           memcpy(s->bot, s->tok, s->lim - s->tok);
+           s->tok = s->bot;
+           s->ptr -= cnt;
+           cursor -= cnt;
+           s->pos -= cnt;
+           s->lim -= cnt;
+       }
+       if((s->top - s->lim) < BSIZE){
+           uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+           memcpy(buf, s->tok, s->lim - s->tok);
+           s->tok = buf;
+           s->ptr = &buf[s->ptr - s->bot];
+           cursor = &buf[cursor - s->bot];
+           s->pos = &buf[s->pos - s->bot];
+           s->lim = &buf[s->lim - s->bot];
+           s->top = &s->lim[BSIZE];
+           free(s->bot);
+           s->bot = buf;
+       }
+       if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+           s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+       }
+       s->lim += cnt;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+       uchar *cursor = s->cur;
+std:
+       s->tok = cursor;
+/*!re2c
+any    = [\000-\377];
+O      = [0-7];
+D      = [0-9];
+L      = [a-zA-Z_];
+I      = L|D;
+H      = [a-fA-F0-9];
+E      = [Ee] [+-]? D+;
+FS     = [fFlL];
+IS     = [uUlL]*;
+ESC    = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+X      = any\[*/];
+*/
+
+/*!re2c
+       "/*"                    { goto comment; }
+       
+       
+       L                       { RET(ID); }
+       L I                     { RET(ID); }
+       L I I                   { RET(ID); }
+       L I I I                 { RET(ID); }
+       L I I I I               { RET(ID); }
+       L I I I I I             { RET(ID); }
+       L I I I I I I           { RET(ID); }
+       L I I I I I I I         { RET(ID); }
+       L I*                    { RET(ID); }
+       
+       ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+       (['] (ESC|any\[\n\\'])* ['])
+                               { RET(ICON); }
+       
+       (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                               { RET(FCON); }
+       
+       (["] (ESC|any\[\n\\"])* ["])
+                               { RET(SCON); }
+       
+       "..."                   { RET(ELLIPSIS); }
+       ">>="                   { RET(RSHIFTEQ); }
+       "<<="                   { RET(LSHIFTEQ); }
+       "+="                    { RET(ADDEQ); }
+       "-="                    { RET(SUBEQ); }
+       "*="                    { RET(MULEQ); }
+       "/="                    { RET(DIVEQ); }
+       "%="                    { RET(MODEQ); }
+       "&="                    { RET(ANDEQ); }
+       "^="                    { RET(XOREQ); }
+       "|="                    { RET(OREQ); }
+       ">>"                    { RET(RSHIFT); }
+       "<<"                    { RET(LSHIFT); }
+       "++"                    { RET(INCR); }
+       "--"                    { RET(DECR); }
+       "->"                    { RET(DEREF); }
+       "&&"                    { RET(ANDAND); }
+       "||"                    { RET(OROR); }
+       "<="                    { RET(LEQ); }
+       ">="                    { RET(GEQ); }
+       "=="                    { RET(EQL); }
+       "!="                    { RET(NEQ); }
+       ";"                     { RET(';'); }
+       "{"                     { RET('{'); }
+       "}"                     { RET('}'); }
+       ","                     { RET(','); }
+       ":"                     { RET(':'); }
+       "="                     { RET('='); }
+       "("                     { RET('('); }
+       ")"                     { RET(')'); }
+       "["                     { RET('['); }
+       "]"                     { RET(']'); }
+       "."                     { RET('.'); }
+       "&"                     { RET('&'); }
+       "!"                     { RET('!'); }
+       "~"                     { RET('~'); }
+       "-"                     { RET('-'); }
+       "+"                     { RET('+'); }
+       "*"                     { RET('*'); }
+       "/"                     { RET('/'); }
+       "%"                     { RET('%'); }
+       "<"                     { RET('<'); }
+       ">"                     { RET('>'); }
+       "^"                     { RET('^'); }
+       "|"                     { RET('|'); }
+       "?"                     { RET('?'); }
+
+
+       [ \t\v\f]+              { goto std; }
+
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->pos = cursor; s->line++;
+               goto std;
+           }
+
+       any
+           {
+               printf("unexpected character: %c\n", *s->tok);
+               goto std;
+           }
+*/
+
+comment:
+/*!re2c
+       "*/"                    { goto std; }
+       "\n"
+           {
+               if(cursor == s->eof) RET(EOI);
+               s->tok = s->pos = cursor; s->line++;
+               goto comment;
+           }
+        X                      { goto comment; }
+        X X                    { goto comment; }
+        X X X                  { goto comment; }
+        X X X X                        { goto comment; }
+        X X X X        X               { goto comment; }
+        X X X X        X X             { goto comment; }
+        X X X X        X X X           { goto comment; }
+        X X X X        X X X X         { goto comment; }
+        any                    { goto comment; }
+*/
+}
+
+main(){
+    Scanner in;
+    int t;
+    memset((char*) &in, 0, sizeof(in));
+    in.fd = 0;
+    while((t = scan(&in)) != EOI){
+/*
+       printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+       printf("%d\n", t);
+*/
+    }
+    close(in.fd);
+}
diff --git a/examples/modula.re b/examples/modula.re
new file mode 100644 (file)
index 0000000..0468ba4
--- /dev/null
@@ -0,0 +1,202 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         s->lim
+#define        YYMARKER        s->ptr
+#define        YYFILL          {cursor = fill(s, cursor);}
+
+#define        RETURN(i)       {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    int                        fd;
+    uchar              *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint               line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+       uint cnt = s->tok - s->bot;
+       if(cnt){
+           memcpy(s->bot, s->tok, s->lim - s->tok);
+           s->tok = s->bot;
+           s->ptr -= cnt;
+           cursor -= cnt;
+           s->pos -= cnt;
+           s->lim -= cnt;
+       }
+       if((s->top - s->lim) < BSIZE){
+           uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+           memcpy(buf, s->tok, s->lim - s->tok);
+           s->tok = buf;
+           s->ptr = &buf[s->ptr - s->bot];
+           cursor = &buf[cursor - s->bot];
+           s->pos = &buf[s->pos - s->bot];
+           s->lim = &buf[s->lim - s->bot];
+           s->top = &s->lim[BSIZE];
+           free(s->bot);
+           s->bot = buf;
+       }
+       if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+           s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+       }
+       s->lim += cnt;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+       uchar *cursor = s->cur;
+       uint depth;
+std:
+       s->tok = cursor;
+/*!re2c
+any    = [\000-\377];
+digit  = [0-9];
+letter = [a-zA-Z];
+*/
+
+/*!re2c
+       "(*"                    { depth = 1; goto comment; }
+
+       digit +                 {RETURN(1);}
+       digit + / ".."          {RETURN(1);}
+       [0-7] + "B"             {RETURN(2);}
+       [0-7] + "C"             {RETURN(3);}
+       digit [0-9A-F] * "H"    {RETURN(4);}
+       digit + "." digit * ("E" ([+-]) ? digit +) ?    {RETURN(5);}
+       ['] (any\[\n']) * [']   | ["] (any\[\n"]) * ["] {RETURN(6);}
+
+       "#"                     {RETURN(7);}
+       "&"                     {RETURN(8);}
+       "("                     {RETURN(9);}
+       ")"                     {RETURN(10);}
+       "*"                     {RETURN(11);}
+       "+"                     {RETURN(12);}
+       ","                     {RETURN(13);}
+       "-"                     {RETURN(14);}
+       "."                     {RETURN(15);}
+       ".."                    {RETURN(16);}
+       "/"                     {RETURN(17);}
+       ":"                     {RETURN(18);}
+       ":="                    {RETURN(19);}
+       ";"                     {RETURN(20);}
+       "<"                     {RETURN(21);}
+       "<="                    {RETURN(22);}
+       "<>"                    {RETURN(23);}
+       "="                     {RETURN(24);}
+       ">"                     {RETURN(25);}
+       ">="                    {RETURN(26);}
+       "["                     {RETURN(27);}
+       "]"                     {RETURN(28);}
+       "^"                     {RETURN(29);}
+       "{"                     {RETURN(30);}
+       "|"                     {RETURN(31);}
+       "}"                     {RETURN(32);}
+       "~"                     {RETURN(33);}
+
+       "AND"                   {RETURN(34);}
+       "ARRAY"                 {RETURN(35);}
+       "BEGIN"                 {RETURN(36);}
+       "BY"                    {RETURN(37);}
+       "CASE"                  {RETURN(38);}
+       "CONST"                 {RETURN(39);}
+       "DEFINITION"            {RETURN(40);}
+       "DIV"                   {RETURN(41);}
+       "DO"                    {RETURN(42);}
+       "ELSE"                  {RETURN(43);}
+       "ELSIF"                 {RETURN(44);}
+       "END"                   {RETURN(45);}
+       "EXIT"                  {RETURN(46);}
+       "EXPORT"                {RETURN(47);}
+       "FOR"                   {RETURN(48);}
+       "FROM"                  {RETURN(49);}
+       "IF"                    {RETURN(50);}
+       "IMPLEMENTATION"        {RETURN(51);}
+       "IMPORT"                {RETURN(52);}
+       "IN"                    {RETURN(53);}
+       "LOOP"                  {RETURN(54);}
+       "MOD"                   {RETURN(55);}
+       "MODULE"                {RETURN(56);}
+       "NOT"                   {RETURN(57);}
+       "OF"                    {RETURN(58);}
+       "OR"                    {RETURN(59);}
+       "POINTER"               {RETURN(60);}
+       "PROCEDURE"             {RETURN(61);}
+       "QUALIFIED"             {RETURN(62);}
+       "RECORD"                {RETURN(63);}
+       "REPEAT"                {RETURN(64);}
+       "RETURN"                {RETURN(65);}
+       "SET"                   {RETURN(66);}
+       "THEN"                  {RETURN(67);}
+       "TO"                    {RETURN(68);}
+       "TYPE"                  {RETURN(69);}
+       "UNTIL"                 {RETURN(70);}
+       "VAR"                   {RETURN(71);}
+       "WHILE"                 {RETURN(72);}
+       "WITH"                  {RETURN(73);}
+
+       letter (letter | digit) *       {RETURN(74);}
+
+       [ \t]+                  { goto std; }
+
+       "\n"
+           {
+               if(cursor == s->eof) RETURN(0);
+               s->pos = cursor; s->line++;
+               goto std;
+           }
+
+       any
+           {
+               printf("unexpected character: %c\n", *s->tok);
+               goto std;
+           }
+*/
+comment:
+/*!re2c
+       "*)"
+           {
+               if(--depth == 0)
+                   goto std;
+               else
+                   goto comment;
+           }
+       "(*"                    { ++depth; goto comment; }
+       "\n"
+           {
+               if(cursor == s->eof) RETURN(0);
+               s->tok = s->pos = cursor; s->line++;
+               goto comment;
+           }
+        any                    { goto comment; }
+*/
+}
+
+/*
+void putStr(FILE *o, char *s, uint l){
+    while(l-- > 0)
+       putc(*s++, o);
+}
+*/
+
+main(){
+    Scanner in;
+    memset((char*) &in, 0, sizeof(in));
+    in.fd = 0;
+    while(scan(&in)){
+/*
+       putc('<', stdout);
+       putStr(stdout, (char*) in.tok, in.cur - in.tok);
+       putc('>', stdout);
+       putc('\n', stdout);
+*/
+    }
+}
diff --git a/examples/rexx/README b/examples/rexx/README
new file mode 100644 (file)
index 0000000..2af0178
--- /dev/null
@@ -0,0 +1 @@
+Replacement modules for an existing REXX interpreter.  Not standalone.
diff --git a/examples/rexx/rexx.l b/examples/rexx/rexx.l
new file mode 100644 (file)
index 0000000..b74741d
--- /dev/null
@@ -0,0 +1,319 @@
+#include "scanio.h"
+#include "scanner.h"
+
+#define        CURSOR          ch
+#define        LOADCURSOR      ch = *cursor;
+#define        ADVANCE         cursor++;
+#define        BACK(n)         cursor -= (n);
+#define        CHECK(n)        if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);}
+#define        MARK(n)         ScanCB.ptr = cursor; sel = (n);
+#define        REVERT          cursor = ScanCB.ptr;
+#define        MARKER          sel
+
+#define        RETURN(i)       {ScanCB.cur = cursor; return i;}
+
+int ScanToken(){
+       uchar *cursor = ScanCB.cur;
+       unsigned sel;
+       uchar ch;
+       ScanCB.tok = cursor;
+       ScanCB.eot = NULL;
+/*!re2c
+all    = [\000-\377];
+eof    = [\000];
+any    = all\eof;
+letter = [a-z]|[A-Z];
+digit  = [0-9];
+symchr = letter|digit|[.!?_];
+const  = (digit|[.])symchr*([eE][+-]?digit+)?;
+simple = (symchr\(digit|[.]))(symchr\[.])*;
+stem   = simple [.];
+symbol = symchr*;
+sqstr  = ['] ((any\['\n])|(['][']))* ['];
+dqstr  = ["] ((any\["\n])|(["]["]))* ["];
+str    = sqstr|dqstr;
+ob     = [ \t]*;
+not    = [\\~];
+A      = [aA];
+B      = [bB];
+C      = [cC];
+D      = [dD];
+E      = [eE];
+F      = [fF];
+G      = [gG];
+H      = [hH];
+I      = [iI];
+J      = [jJ];
+K      = [kK];
+L      = [lL];
+M      = [mM];
+N      = [nN];
+O      = [oO];
+P      = [pP];
+Q      = [qQ];
+R      = [rR];
+S      = [sS];
+T      = [tT];
+U      = [uU];
+V      = [vV];
+W      = [wW];
+X      = [xX];
+Y      = [yY];
+Z      = [zZ];
+*/
+
+scan:
+/*!re2c
+"\n"
+           {
+               ++(ScanCB.lineNum);
+               ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+               RETURN(SU_EOL);
+           }
+"|" ob "|"
+           { RETURN(OP_CONCAT); }
+"+"
+           { RETURN(OP_PLUS); }
+"-"
+           { RETURN(OP_MINUS); }
+"*"
+           { RETURN(OP_MULT); }
+"/"
+           { RETURN(OP_DIV); }
+"%"
+           { RETURN(OP_IDIV); }
+"/" ob "/"
+           { RETURN(OP_REMAIN); }
+"*" ob "*"
+           { RETURN(OP_POWER); }
+"="
+           { RETURN(OP_EQUAL); }
+not ob "=" | "<" ob ">" | ">" ob "<"
+           { RETURN(OP_EQUAL_N); }
+">"
+           { RETURN(OP_GT); }
+"<"
+           { RETURN(OP_LT); }
+">" ob "=" | not ob "<"
+           { RETURN(OP_GE); }
+"<" ob "=" | not ob ">"
+           { RETURN(OP_LE); }
+"=" ob "="
+           { RETURN(OP_EQUAL_EQ); }
+not ob "=" ob "="
+           { RETURN(OP_EQUAL_EQ_N); }
+">" ob ">"
+           { RETURN(OP_GT_STRICT); }
+"<" ob "<"
+           { RETURN(OP_LT_STRICT); }
+">" ob ">" ob "=" | not ob "<" ob "<"
+           { RETURN(OP_GE_STRICT); }
+"<" ob "<" ob "=" | not ob ">" ob ">"
+           { RETURN(OP_LE_STRICT); }
+"&"
+           { RETURN(OP_AND); }
+"|"
+           { RETURN(OP_OR); }
+"&" ob "&"
+           { RETURN(OP_XOR); }
+not
+           { RETURN(OP_NOT); }
+
+":"
+           { RETURN(SU_COLON); }
+","
+           { RETURN(SU_COMMA); }
+"("
+           { RETURN(SU_POPEN); }
+")"
+           { RETURN(SU_PCLOSE); }
+";"
+           { RETURN(SU_EOC); }
+
+A D D R E S S
+           { RETURN(RX_ADDRESS); }
+A R G
+           { RETURN(RX_ARG); }
+C A L L
+           { RETURN(RX_CALL); }
+D O
+           { RETURN(RX_DO); }
+D R O P
+           { RETURN(RX_DROP); }
+E L S E
+           { RETURN(RX_ELSE); }
+E N D
+           { RETURN(RX_END); }
+E X I T
+           { RETURN(RX_EXIT); }
+I F
+           { RETURN(RX_IF); }
+I N T E R P R E T
+           { RETURN(RX_INTERPRET); }
+I T E R A T E
+           { RETURN(RX_ITERATE); }
+L E A V E
+           { RETURN(RX_LEAVE); }
+N O P
+           { RETURN(RX_NOP); }
+N U M E R I C
+           { RETURN(RX_NUMERIC); }
+O P T I O N S
+           { RETURN(RX_OPTIONS); }
+O T H E R W I S E
+           { RETURN(RX_OTHERWISE); }
+P A R S E
+           { RETURN(RX_PARSE); }
+P R O C E D U R E
+           { RETURN(RX_PROCEDURE); }
+P U L L
+           { RETURN(RX_PULL); }
+P U S H
+           { RETURN(RX_PUSH); }
+Q U E U E
+           { RETURN(RX_QUEUE); }
+R E T U R N
+           { RETURN(RX_RETURN); }
+S A Y
+           { RETURN(RX_SAY); }
+S E L E C T
+           { RETURN(RX_SELECT); }
+S I G N A L
+           { RETURN(RX_SIGNAL); }
+T H E N
+           { RETURN(RX_THEN); }
+T R A C E
+           { RETURN(RX_TRACE); }
+W H E N
+           { RETURN(RX_WHEN); }
+O F F
+           { RETURN(RXS_OFF); }
+O N
+           { RETURN(RXS_ON); }
+B Y
+           { RETURN(RXS_BY); }
+D I G I T S
+           { RETURN(RXS_DIGITS); }
+E N G I N E E R I N G
+           { RETURN(RXS_ENGINEERING); }
+E R R O R
+           { RETURN(RXS_ERROR); }
+E X P O S E
+           { RETURN(RXS_EXPOSE); }
+F A I L U R E
+           { RETURN(RXS_FAILURE); }
+F O R
+           { RETURN(RXS_FOR); }
+F O R E V E R
+           { RETURN(RXS_FOREVER); }
+F O R M
+           { RETURN(RXS_FORM); }
+F U Z Z
+           { RETURN(RXS_FUZZ); }
+H A L T
+           { RETURN(RXS_HALT); }
+L I N E I N
+           { RETURN(RXS_LINEIN); }
+N A M E
+           { RETURN(RXS_NAME); }
+N O T R E A D Y
+           { RETURN(RXS_NOTREADY); }
+N O V A L U E
+           { RETURN(RXS_NOVALUE); }
+S C I E N T I F I C
+           { RETURN(RXS_SCIENTIFIC); }
+S O U R C E
+           { RETURN(RXS_SOURCE); }
+S Y N T A X
+           { RETURN(RXS_SYNTAX); }
+T O
+           { RETURN(RXS_TO); }
+U N T I L
+           { RETURN(RXS_UNTIL); }
+U P P E R
+           { RETURN(RXS_UPPER); }
+V A L U E
+           { RETURN(RXS_VALUE); }
+V A R
+           { RETURN(RXS_VAR); }
+V E R S I O N
+           { RETURN(RXS_VERSION); }
+W H I L E
+           { RETURN(RXS_WHILE); }
+W I T H
+           { RETURN(RXS_WITH); }
+
+const
+           { RETURN(SU_CONST); }
+simple
+           { RETURN(SU_SYMBOL); }
+stem
+           { RETURN(SU_SYMBOL_STEM); }
+symbol
+           { RETURN(SU_SYMBOL_COMPOUND); }
+str
+           { RETURN(SU_LITERAL); }
+str [bB] / (all\symchr)
+           { RETURN(SU_LITERAL_BIN); }
+str [xX] / (all\symchr)
+           { RETURN(SU_LITERAL_HEX); }
+
+eof
+           { RETURN(SU_EOF); }
+any
+           { RETURN(SU_ERROR); }
+*/
+}
+
+bool StripToken(){
+       uchar *cursor = ScanCB.cur;
+       unsigned depth;
+       uchar ch;
+       bool blanks = FALSE;
+       ScanCB.eot = cursor;
+strip:
+/*!re2c
+"/*"
+           {
+               depth = 1;
+               goto comment;
+           }
+"\r"
+           { goto strip; }
+[ \t]
+           {
+               blanks = TRUE;
+               goto strip;
+           }
+[] / all
+           { RETURN(blanks); }
+*/
+
+comment:
+/*!re2c
+"*/"
+           {
+               if(--depth == 0)
+                   goto strip;
+               else
+                   goto comment;
+           }
+"\n"
+           {
+               ++(ScanCB.lineNum);
+               ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+               goto comment;
+           }
+"/*"
+           {
+               ++depth;
+               goto comment;
+           }
+eof
+           { RETURN(blanks); }
+any
+           {
+               goto comment;
+           }
+*/
+}
diff --git a/examples/rexx/scanio.c b/examples/rexx/scanio.c
new file mode 100644 (file)
index 0000000..de6898d
--- /dev/null
@@ -0,0 +1,41 @@
+uchar *ScanFill(uchar *cursor){
+    unsigned cnt = s->tok - s->bot;
+    s->pos += cursor - s->mrk;
+    if(cnt){
+        if(s->eot){
+            unsigned len = s->eot - s->tok;
+            memcpy(s->bot, s->tok, len);
+            s->eot = &s->bot[len];
+            if((len = s->lim - cursor) != 0)
+                memcpy(s->eot, cursor, len);
+            cursor = s->eot;
+            s->lim = &cursor[len];
+        } else {
+            memcpy(s->bot, s->tok, s->lim - s->tok);
+            cursor -= cnt;
+            s->lim -= cnt;
+        }
+        s->tok = s->bot;
+        s->ptr -= cnt;
+    }
+    if((s->top - s->lim) < 512){
+        uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar));
+        memcpy(buf, s->bot, s->lim - s->bot);
+        s->tok = buf;
+        s->ptr = &buf[s->ptr - s->bot];
+        if(s->eot)
+            s->eot = &buf[s->eot - s->bot];
+        cursor = &buf[cursor - s->bot];
+        s->lim = &buf[s->lim - s->bot];
+        s->top = &s->lim[512];
+        free(s->bot);
+        s->bot = buf;
+    }
+    s->mrk = cursor;
+    if(ScanCBIO.file){
+        if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512)
+            memset(&s->lim[cnt], 0, 512 - cnt);
+        s->lim += 512;
+    }
+    return cursor;
+}
diff --git a/examples/sample.re b/examples/sample.re
new file mode 100644 (file)
index 0000000..2f497a3
--- /dev/null
@@ -0,0 +1,7 @@
+/*!re2c
+       "print"         {return PRINT;}
+       [a-z]+          {return ID;}
+       [0-9]+          {return DEC;}
+       "0x" [0-9a-f]+  {return HEX;}
+       [\000-\377]     {return ERR;}
+*/
diff --git a/examples/simple.re b/examples/simple.re
new file mode 100644 (file)
index 0000000..5fd8891
--- /dev/null
@@ -0,0 +1,13 @@
+#define        NULL            ((char*) 0)
+char *scan(char *p){
+char *q;
+#define        YYCTYPE         char
+#define        YYCURSOR        p
+#define        YYLIMIT         p
+#define        YYMARKER        q
+#define        YYFILL(n)
+/*!re2c
+       [0-9]+          {return YYCURSOR;}
+       [\000-\377]     {return NULL;}
+*/
+}
diff --git a/globals.h b/globals.h
new file mode 100644 (file)
index 0000000..79edbff
--- /dev/null
+++ b/globals.h
@@ -0,0 +1,15 @@
+#ifndef        _globals_h
+#define        _globals_h
+
+#include "basics.h"
+
+extern char *fileName;
+extern bool sFlag;
+extern bool bFlag;
+
+extern uchar asc2ebc[256];
+extern uchar ebc2asc[256];
+
+extern uchar *xlat, *talx;
+
+#endif
diff --git a/ins.h b/ins.h
new file mode 100644 (file)
index 0000000..5d08cca
--- /dev/null
+++ b/ins.h
@@ -0,0 +1,41 @@
+#ifndef _ins_h
+#define _ins_h
+
+#include <iostream.h>
+#include "basics.h"
+
+const uint nChars = 256;
+typedef uchar Char;
+
+const uint CHAR = 0;
+const uint GOTO = 1;
+const uint FORK = 2;
+const uint TERM = 3;
+const uint CTXT = 4;
+
+union Ins {
+    struct {
+       byte    tag;
+       byte    marked;
+       void    *link;
+    }                  i;
+    struct {
+       ushort  value;
+       ushort  bump;
+       void    *link;
+    }                  c;
+};
+
+inline bool isMarked(Ins *i){
+    return i->i.marked != 0;
+}
+
+inline void mark(Ins *i){
+    i->i.marked = true;
+}
+
+inline void unmark(Ins *i){
+    i->i.marked = false;
+}
+
+#endif
diff --git a/main.cc b/main.cc
new file mode 100644 (file)
index 0000000..9e22c23
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,54 @@
+#include <fstream.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "globals.h"
+#include "parser.h"
+#include "dfa.h"
+
+char *fileName;
+bool sFlag = false;
+bool bFlag = false;
+
+int main(unsigned argc, char *argv[]){
+    fileName = NULL;
+    if(argc == 1)
+       goto usage;
+    while(--argc > 1){
+       char *p = *++argv;
+       while(*++p != '\0'){
+           switch(*p){
+           case 'e':
+               xlat = asc2ebc;
+               talx = ebc2asc;
+               break;
+           case 's':
+               sFlag = true;
+               break;
+           case 'b':
+               sFlag = true;
+               bFlag = true;
+               break;
+           default:
+               goto usage;
+           }
+       }
+    }
+    fileName = *++argv;
+    int fd;
+    if(fileName[0] == '-' && fileName[1] == '\0'){
+       fileName = "<stdin>";
+       fd = 0;
+    } else {
+       if((fd = open(fileName, O_RDONLY)) < 0){
+           cerr << "can't open " << fileName << "\n";
+           return 1;
+       }
+    }
+    parse(fd, cout);
+    return 0;
+usage:
+    cerr << "usage: re2c [-esb] name\n";
+    return 2;
+}
diff --git a/parser.cc b/parser.cc
new file mode 100644 (file)
index 0000000..6d66400
--- /dev/null
+++ b/parser.cc
@@ -0,0 +1,531 @@
+#ifndef lint
+static char yysccsid[] = "@(#)yaccpar  1.9 (Berkeley) 02/21/93";
+#endif
+#define YYBYACC 1
+#define YYMAJOR 1
+#define YYMINOR 9
+#define yyclearin (yychar=(-1))
+#define yyerrok (yyerrflag=0)
+#define YYRECOVERING (yyerrflag!=0)
+#define YYPREFIX "yy"
+#line 2 "parser.y"
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+#line 21 "parser.y"
+typedef union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+} YYSTYPE;
+#line 35 "y.tab.c"
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+#define YYERRCODE 256
+short yylhs[] = {                                        -1,
+    0,    0,    0,    9,    2,    3,    3,    4,    4,    5,
+    5,    6,    6,    7,    7,    1,    1,    8,    8,    8,
+    8,
+};
+short yylen[] = {                                         2,
+    0,    2,    2,    4,    3,    0,    2,    1,    3,    1,
+    3,    1,    2,    1,    2,    1,    2,    1,    1,    1,
+    3,
+};
+short yydefred[] = {                                      1,
+    0,    0,   19,   20,    0,    2,    0,    0,    0,   12,
+    0,    3,    0,   18,    0,    0,    0,    0,    0,   13,
+   16,    0,    0,   21,    0,    0,    5,    0,   17,    4,
+};
+short yydgoto[] = {                                       1,
+   22,    6,   18,    7,    8,    9,   10,   11,   12,
+};
+short yysindex[] = {                                      0,
+  -27,  -49,    0,    0,  -23,    0,  -44,  -84,  -23,    0,
+ -243,    0,  -23,    0,  -39,  -23,  -23, -244,  -23,    0,
+    0, -239,  -53,    0, -104,  -84,    0,  -23,    0,    0,
+};
+short yyrindex[] = {                                      0,
+    0,  -31,    0,    0,    0,    0, -227,  -17,  -20,    0,
+  -40,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,  -36,    0,    0, -226,  -16,    0,  -19,    0,    0,
+};
+short yygindex[] = {                                      0,
+    0,    0,    0,   21,   18,   17,    1,    0,    0,
+};
+#define YYTABLESIZE 243
+short yytable[] = {                                      14,
+   14,   24,   16,   15,   15,   30,   14,   19,   18,   20,
+   15,   13,    5,   21,   27,   18,    5,   29,   14,   17,
+   10,   11,   15,    8,    9,   15,   10,   11,   20,    8,
+    9,    6,    7,   23,   26,   28,   25,    0,   10,   11,
+    0,    8,    9,    0,    0,    0,    0,    0,    0,    0,
+    0,   14,    0,    0,    0,   15,    0,    0,    0,    0,
+   18,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+   17,   10,   11,    0,    0,    0,    0,    0,    0,   17,
+    0,    0,    0,   14,   17,    0,    0,   15,    0,    0,
+    0,    0,   18,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,   10,   11,    0,    8,    9,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,    0,    0,    0,    0,    0,    0,   14,   14,   14,
+   14,   15,   15,   15,   15,   18,   18,   18,   18,   18,
+    2,    0,    3,    4,   14,    0,    3,    4,   10,   11,
+    0,    8,    9,
+};
+short yycheck[] = {                                      40,
+   41,   41,   47,   40,   41,   59,   47,   92,   40,    9,
+   47,   61,   40,  257,  259,   47,   40,  257,   59,  124,
+   41,   41,   59,   41,   41,    5,   47,   47,   28,   47,
+   47,  259,  259,   13,   17,   19,   16,   -1,   59,   59,
+   -1,   59,   59,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   92,   -1,   -1,   -1,   92,   -1,   -1,   -1,   -1,
+   92,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+  124,   92,   92,   -1,   -1,   -1,   -1,   -1,   -1,  124,
+   -1,   -1,   -1,  124,  124,   -1,   -1,  124,   -1,   -1,
+   -1,   -1,  124,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,  124,  124,   -1,  124,  124,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+   -1,   -1,   -1,   -1,   -1,   -1,   -1,  258,  259,  260,
+  261,  258,  259,  260,  261,  257,  258,  259,  260,  261,
+  258,   -1,  260,  261,  258,   -1,  260,  261,  259,  259,
+   -1,  259,  259,
+};
+#define YYFINAL 1
+#ifndef YYDEBUG
+#define YYDEBUG 0
+#endif
+#define YYMAXTOKEN 261
+#if YYDEBUG
+char *yyname[] = {
+"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,"'('","')'",0,0,0,0,0,"'/'",0,0,0,0,0,0,0,0,0,0,0,"';'",0,"'='",0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'\\\\'",0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+"CLOSE","ID","CODE","RANGE","STRING",
+};
+char *yyrule[] = {
+"$accept : spec",
+"spec :",
+"spec : spec rule",
+"spec : spec decl",
+"decl : ID '=' expr ';'",
+"rule : expr look CODE",
+"look :",
+"look : '/' expr",
+"expr : diff",
+"expr : expr '|' diff",
+"diff : term",
+"diff : diff '\\\\' term",
+"term : factor",
+"term : term factor",
+"factor : primary",
+"factor : primary close",
+"close : CLOSE",
+"close : close CLOSE",
+"primary : ID",
+"primary : RANGE",
+"primary : STRING",
+"primary : '(' expr ')'",
+};
+#endif
+#ifdef YYSTACKSIZE
+#undef YYMAXDEPTH
+#define YYMAXDEPTH YYSTACKSIZE
+#else
+#ifdef YYMAXDEPTH
+#define YYSTACKSIZE YYMAXDEPTH
+#else
+#define YYSTACKSIZE 500
+#define YYMAXDEPTH 500
+#endif
+#endif
+int yydebug;
+int yynerrs;
+int yyerrflag;
+int yychar;
+short *yyssp;
+YYSTYPE *yyvsp;
+YYSTYPE yyval;
+YYSTYPE yylval;
+short yyss[YYSTACKSIZE];
+YYSTYPE yyvs[YYSTACKSIZE];
+#define yystacksize YYSTACKSIZE
+#line 121 "parser.y"
+
+void yyerror(char* s){
+    in->fatal(s);
+}
+
+int yylex(){
+    return in->scan();
+}
+
+void parse(int i, ostream &o){
+    char *     fnamebuf;
+    char *     token;
+
+    o << "/* Generated by re2c 0.5 on ";
+    time_t now = time(&now);
+    o.write(ctime(&now), 24);
+    o << " */\n";
+
+    in = new Scanner(i);
+
+    o << "#line " << in->line() << " \"";
+    if( fileName != NULL ) {
+       fnamebuf = strdup( fileName );
+    } else {
+       fnamebuf = strdup( "<stdin>" );
+    }
+    token = strtok( fnamebuf, "\\" );
+    for(;;) {
+       o << token;
+       token = strtok( NULL, "\\" );
+       if( token == NULL ) break;
+       o << "\\\\";
+    }
+    o << "\"\n";
+    free( fnamebuf );
+
+    while(in->echo(o)){
+       yyparse();
+       if(spec)
+           genCode(o, spec);
+       o << "#line " << in->line() << "\n";
+    }
+}
+#line 235 "y.tab.c"
+#define YYABORT goto yyabort
+#define YYREJECT goto yyabort
+#define YYACCEPT goto yyaccept
+#define YYERROR goto yyerrlab
+int
+yyparse()
+{
+    register int yym, yyn, yystate;
+#if YYDEBUG
+    register char *yys;
+    extern char *getenv();
+
+    if (yys = getenv("YYDEBUG"))
+    {
+        yyn = *yys;
+        if (yyn >= '0' && yyn <= '9')
+            yydebug = yyn - '0';
+    }
+#endif
+
+    yynerrs = 0;
+    yyerrflag = 0;
+    yychar = (-1);
+
+    yyssp = yyss;
+    yyvsp = yyvs;
+    *yyssp = yystate = 0;
+
+yyloop:
+    if (yyn = yydefred[yystate]) goto yyreduce;
+    if (yychar < 0)
+    {
+        if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+        if (yydebug)
+        {
+            yys = 0;
+            if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+            if (!yys) yys = "illegal-symbol";
+            printf("%sdebug: state %d, reading %d (%s)\n",
+                    YYPREFIX, yystate, yychar, yys);
+        }
+#endif
+    }
+    if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+    {
+#if YYDEBUG
+        if (yydebug)
+            printf("%sdebug: state %d, shifting to state %d\n",
+                    YYPREFIX, yystate, yytable[yyn]);
+#endif
+        if (yyssp >= yyss + yystacksize - 1)
+        {
+            goto yyoverflow;
+        }
+        *++yyssp = yystate = yytable[yyn];
+        *++yyvsp = yylval;
+        yychar = (-1);
+        if (yyerrflag > 0)  --yyerrflag;
+        goto yyloop;
+    }
+    if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
+    {
+        yyn = yytable[yyn];
+        goto yyreduce;
+    }
+    if (yyerrflag) goto yyinrecovery;
+#ifdef lint
+    goto yynewerror;
+#endif
+yynewerror:
+    yyerror("syntax error");
+#ifdef lint
+    goto yyerrlab;
+#endif
+yyerrlab:
+    ++yynerrs;
+yyinrecovery:
+    if (yyerrflag < 3)
+    {
+        yyerrflag = 3;
+        for (;;)
+        {
+            if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
+                    yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
+            {
+#if YYDEBUG
+                if (yydebug)
+                    printf("%sdebug: state %d, error recovery shifting\
+ to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
+#endif
+                if (yyssp >= yyss + yystacksize - 1)
+                {
+                    goto yyoverflow;
+                }
+                *++yyssp = yystate = yytable[yyn];
+                *++yyvsp = yylval;
+                goto yyloop;
+            }
+            else
+            {
+#if YYDEBUG
+                if (yydebug)
+                    printf("%sdebug: error recovery discarding state %d\n",
+                            YYPREFIX, *yyssp);
+#endif
+                if (yyssp <= yyss) goto yyabort;
+                --yyssp;
+                --yyvsp;
+            }
+        }
+    }
+    else
+    {
+        if (yychar == 0) goto yyabort;
+#if YYDEBUG
+        if (yydebug)
+        {
+            yys = 0;
+            if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+            if (!yys) yys = "illegal-symbol";
+            printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
+                    YYPREFIX, yystate, yychar, yys);
+        }
+#endif
+        yychar = (-1);
+        goto yyloop;
+    }
+yyreduce:
+#if YYDEBUG
+    if (yydebug)
+        printf("%sdebug: state %d, reducing by rule %d (%s)\n",
+                YYPREFIX, yystate, yyn, yyrule[yyn]);
+#endif
+    yym = yylen[yyn];
+    yyval = yyvsp[1-yym];
+    switch (yyn)
+    {
+case 1:
+#line 40 "parser.y"
+{ accept = 0;
+                 spec = NULL; }
+break;
+case 2:
+#line 43 "parser.y"
+{ spec = spec? mkAlt(spec, yyvsp[0].regexp) : yyvsp[0].regexp; }
+break;
+case 4:
+#line 48 "parser.y"
+{ if(yyvsp[-3].symbol->re)
+                     in->fatal("sym already defined");
+                 yyvsp[-3].symbol->re = yyvsp[-1].regexp; }
+break;
+case 5:
+#line 54 "parser.y"
+{ yyval.regexp = new RuleOp(yyvsp[-2].regexp, yyvsp[-1].regexp, yyvsp[0].token, accept++); }
+break;
+case 6:
+#line 58 "parser.y"
+{ yyval.regexp = new NullOp; }
+break;
+case 7:
+#line 60 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 8:
+#line 64 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 9:
+#line 66 "parser.y"
+{ yyval.regexp =  mkAlt(yyvsp[-2].regexp, yyvsp[0].regexp); }
+break;
+case 10:
+#line 70 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 11:
+#line 72 "parser.y"
+{ yyval.regexp =  mkDiff(yyvsp[-2].regexp, yyvsp[0].regexp);
+                 if(!yyval.regexp)
+                      in->fatal("can only difference char sets");
+               }
+break;
+case 12:
+#line 79 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 13:
+#line 81 "parser.y"
+{ yyval.regexp = new CatOp(yyvsp[-1].regexp, yyvsp[0].regexp); }
+break;
+case 14:
+#line 85 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 15:
+#line 87 "parser.y"
+{
+                   switch(yyvsp[0].op){
+                   case '*':
+                       yyval.regexp = mkAlt(new CloseOp(yyvsp[-1].regexp), new NullOp());
+                       break;
+                   case '+':
+                       yyval.regexp = new CloseOp(yyvsp[-1].regexp);
+                       break;
+                   case '?':
+                       yyval.regexp = mkAlt(yyvsp[-1].regexp, new NullOp());
+                       break;
+                   }
+               }
+break;
+case 16:
+#line 103 "parser.y"
+{ yyval.op = yyvsp[0].op; }
+break;
+case 17:
+#line 105 "parser.y"
+{ yyval.op = (yyvsp[-1].op == yyvsp[0].op) ? yyvsp[-1].op : '*'; }
+break;
+case 18:
+#line 109 "parser.y"
+{ if(!yyvsp[0].symbol->re)
+                     in->fatal("can't find symbol");
+                 yyval.regexp = yyvsp[0].symbol->re; }
+break;
+case 19:
+#line 113 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 20:
+#line 115 "parser.y"
+{ yyval.regexp = yyvsp[0].regexp; }
+break;
+case 21:
+#line 117 "parser.y"
+{ yyval.regexp = yyvsp[-1].regexp; }
+break;
+#line 476 "y.tab.c"
+    }
+    yyssp -= yym;
+    yystate = *yyssp;
+    yyvsp -= yym;
+    yym = yylhs[yyn];
+    if (yystate == 0 && yym == 0)
+    {
+#if YYDEBUG
+        if (yydebug)
+            printf("%sdebug: after reduction, shifting from state 0 to\
+ state %d\n", YYPREFIX, YYFINAL);
+#endif
+        yystate = YYFINAL;
+        *++yyssp = YYFINAL;
+        *++yyvsp = yyval;
+        if (yychar < 0)
+        {
+            if ((yychar = yylex()) < 0) yychar = 0;
+#if YYDEBUG
+            if (yydebug)
+            {
+                yys = 0;
+                if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
+                if (!yys) yys = "illegal-symbol";
+                printf("%sdebug: state %d, reading %d (%s)\n",
+                        YYPREFIX, YYFINAL, yychar, yys);
+            }
+#endif
+        }
+        if (yychar == 0) goto yyaccept;
+        goto yyloop;
+    }
+    if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
+            yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
+        yystate = yytable[yyn];
+    else
+        yystate = yydgoto[yym];
+#if YYDEBUG
+    if (yydebug)
+        printf("%sdebug: after reduction, shifting from state %d \
+to state %d\n", YYPREFIX, *yyssp, yystate);
+#endif
+    if (yyssp >= yyss + yystacksize - 1)
+    {
+        goto yyoverflow;
+    }
+    *++yyssp = yystate;
+    *++yyvsp = yyval;
+    goto yyloop;
+yyoverflow:
+    yyerror("yacc stack overflow");
+yyabort:
+    return (1);
+yyaccept:
+    return (0);
+}
diff --git a/parser.h b/parser.h
new file mode 100644 (file)
index 0000000..56178a8
--- /dev/null
+++ b/parser.h
@@ -0,0 +1,20 @@
+#ifndef _parser_h
+#define _parser_h
+
+#include "scanner.h"
+#include "re.h"
+
+class Symbol {
+public:
+    static Symbol      *first;
+    Symbol             *next;
+    Str                        name;
+    RegExp             *re;
+public:
+    Symbol(const SubStr&);
+    static Symbol *find(const SubStr&);
+};
+
+void parse(int, ostream&);
+
+#endif
diff --git a/parser.y b/parser.y
new file mode 100644 (file)
index 0000000..8f2a7dc
--- /dev/null
+++ b/parser.y
@@ -0,0 +1,163 @@
+%{
+
+#include <time.h>
+#include <iostream.h>
+#include <string.h>
+#include <malloc.h>
+#include "globals.h"
+#include "parser.h"
+int yyparse();
+int yylex();
+void yyerror(char*);
+
+static uint accept;
+static RegExp *spec;
+static Scanner *in;
+
+%}
+
+%start spec
+
+%union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+}
+
+%token         CLOSE   ID      CODE    RANGE   STRING
+
+%type  <op>            CLOSE
+%type  <op>            close
+%type  <symbol>        ID
+%type  <token>         CODE
+%type  <regexp>        RANGE   STRING
+%type  <regexp>        rule    look    expr    diff    term    factor  primary
+
+%%
+
+spec   :
+               { accept = 0;
+                 spec = NULL; }
+       |       spec rule
+               { spec = spec? mkAlt(spec, $2) : $2; }
+       |       spec decl
+       ;
+
+decl   :       ID '=' expr ';'
+               { if($1->re)
+                     in->fatal("sym already defined");
+                 $1->re = $3; }
+       ;
+
+rule   :       expr look CODE
+               { $$ = new RuleOp($1, $2, $3, accept++); }
+       ;
+
+look   :
+               { $$ = new NullOp; }
+       |       '/' expr
+               { $$ = $2; }
+       ;
+
+expr   :       diff
+               { $$ = $1; }
+       |       expr '|' diff
+               { $$ =  mkAlt($1, $3); }
+       ;
+
+diff   :       term
+               { $$ = $1; }
+       |       diff '\\' term
+               { $$ =  mkDiff($1, $3);
+                 if(!$$)
+                      in->fatal("can only difference char sets");
+               }
+       ;
+
+term   :       factor
+               { $$ = $1; }
+       |       term factor
+               { $$ = new CatOp($1, $2); }
+       ;
+
+factor :       primary
+               { $$ = $1; }
+       |       primary close
+               {
+                   switch($2){
+                   case '*':
+                       $$ = mkAlt(new CloseOp($1), new NullOp());
+                       break;
+                   case '+':
+                       $$ = new CloseOp($1);
+                       break;
+                   case '?':
+                       $$ = mkAlt($1, new NullOp());
+                       break;
+                   }
+               }
+       ;
+
+close  :       CLOSE
+               { $$ = $1; }
+       |       close CLOSE
+               { $$ = ($1 == $2) ? $1 : '*'; }
+       ;
+
+primary        :       ID
+               { if(!$1->re)
+                     in->fatal("can't find symbol");
+                 $$ = $1->re; }
+       |       RANGE
+               { $$ = $1; }
+       |       STRING
+               { $$ = $1; }
+       |       '(' expr ')'
+               { $$ = $2; }
+       ;
+
+%%
+
+void yyerror(char* s){
+    in->fatal(s);
+}
+
+int yylex(){
+    return in->scan();
+}
+
+void parse(int i, ostream &o){
+    char *     fnamebuf;
+    char *     token;
+
+    o << "/* Generated by re2c 0.5 on ";
+    time_t now = time(&now);
+    o.write(ctime(&now), 24);
+    o << " */\n";
+
+    in = new Scanner(i);
+
+    o << "#line " << in->line() << " \"";
+    if( fileName != NULL ) {
+       fnamebuf = strdup( fileName );
+    } else {
+       fnamebuf = strdup( "<stdin>" );
+    }
+    token = strtok( fnamebuf, "\\" );
+    for(;;) {
+       o << token;
+       token = strtok( NULL, "\\" );
+       if( token == NULL ) break;
+       o << "\\\\";
+    }
+    o << "\"\n";
+    free( fnamebuf );
+
+    while(in->echo(o)){
+       yyparse();
+       if(spec)
+           genCode(o, spec);
+       o << "#line " << in->line() << "\n";
+    }
+}
diff --git a/re.h b/re.h
new file mode 100644 (file)
index 0000000..2ea6e63
--- /dev/null
+++ b/re.h
@@ -0,0 +1,178 @@
+#ifndef _re_h
+#define _re_h
+
+#include <iostream.h>
+#include "token.h"
+#include "ins.h"
+
+struct CharPtn {
+    uint       card;
+    CharPtn    *fix;
+    CharPtn    *nxt;
+};
+
+struct CharSet {
+    CharPtn    *fix;
+    CharPtn    *freeHead, **freeTail;
+    CharPtn    *rep[nChars];
+    CharPtn    ptn[nChars];
+};
+
+class Range {
+public:
+    Range      *next;
+    uint       lb, ub;         // [lb,ub)
+public:
+    Range(uint l, uint u) : next(NULL), lb(l), ub(u)
+       { }
+    Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub)
+       { }
+    friend ostream& operator<<(ostream&, const Range&);
+    friend ostream& operator<<(ostream&, const Range*);
+};
+
+inline ostream& operator<<(ostream &o, const Range *r){
+       return r? o << *r : o;
+}
+
+class RegExp {
+public:
+    uint       size;
+public:
+    virtual char *typeOf() = 0;
+    RegExp *isA(char *t)
+       { return typeOf() == t? this : NULL; }
+    virtual void split(CharSet&) = 0;
+    virtual void calcSize(Char*) = 0;
+    virtual uint fixedLength();
+    virtual void compile(Char*, Ins*) = 0;
+    virtual void display(ostream&) const = 0;
+    friend ostream& operator<<(ostream&, const RegExp&);
+    friend ostream& operator<<(ostream&, const RegExp*);
+};
+
+inline ostream& operator<<(ostream &o, const RegExp &re){
+    re.display(o);
+    return o;
+}
+
+inline ostream& operator<<(ostream &o, const RegExp *re){
+    return o << *re;
+}
+
+class NullOp: public RegExp {
+public:
+    static char *type;
+public:
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    uint fixedLength();
+    void compile(Char*, Ins*);
+    void display(ostream &o) const {
+       o << "_";
+    }
+};
+
+class MatchOp: public RegExp {
+public:
+    static char *type;
+    Range      *match;
+public:
+    MatchOp(Range *m) : match(m)
+       { }
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    uint fixedLength();
+    void compile(Char*, Ins*);
+    void display(ostream&) const;
+};
+
+class RuleOp: public RegExp {
+private:
+    RegExp     *exp;
+public:
+    RegExp     *ctx;
+    static char *type;
+    Ins                *ins;
+    uint       accept;
+    Token      *code;
+    uint       line;
+public:
+    RuleOp(RegExp*, RegExp*, Token*, uint);
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    void compile(Char*, Ins*);
+    void display(ostream &o) const {
+       o << exp << "/" << ctx << ";";
+    }
+};
+
+class AltOp: public RegExp {
+private:
+    RegExp     *exp1, *exp2;
+public:
+    static char *type;
+public:
+    AltOp(RegExp *e1, RegExp *e2)
+       { exp1 = e1;  exp2 = e2; }
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    uint fixedLength();
+    void compile(Char*, Ins*);
+    void display(ostream &o) const {
+       o << exp1 << "|" << exp2;
+    }
+    friend RegExp *mkAlt(RegExp*, RegExp*);
+};
+
+class CatOp: public RegExp {
+private:
+    RegExp     *exp1, *exp2;
+public:
+    static char *type;
+public:
+    CatOp(RegExp *e1, RegExp *e2)
+       { exp1 = e1;  exp2 = e2; }
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    uint fixedLength();
+    void compile(Char*, Ins*);
+    void display(ostream &o) const {
+       o << exp1 << exp2;
+    }
+};
+
+class CloseOp: public RegExp {
+private:
+    RegExp     *exp;
+public:
+    static char *type;
+public:
+    CloseOp(RegExp *e)
+       { exp = e; }
+    char *typeOf()
+       { return type; }
+    void split(CharSet&);
+    void calcSize(Char*);
+    void compile(Char*, Ins*);
+    void display(ostream &o) const {
+       o << exp << "+";
+    }
+};
+
+extern void genCode(ostream&, RegExp*);
+extern RegExp *mkDiff(RegExp*, RegExp*);
+extern RegExp *strToRE(SubStr);
+extern RegExp *ranToRE(SubStr);
+
+#endif
diff --git a/re2c.1 b/re2c.1
new file mode 100644 (file)
index 0000000..a2a580c
--- /dev/null
+++ b/re2c.1
@@ -0,0 +1,536 @@
+.ds re \fBre2c\fP
+.ds le \fBlex\fP
+.ds rx regular expression
+.ds lx \fIl\fP-expression
+.TH RE2C 1 "8 April 1994" "Version 0.5"
+\"$Log$
+\"Revision 1.1  2003/12/13 04:58:20  nuffer
+\"Initial revision
+\"
+\"Revision 1.2  1994/04/16  15:50:32  peter
+\"Fix bug in simple example.
+\"
+\"Revision 1.1  1994/04/08  15:39:09  peter
+\"Initial revision
+\"
+.SH NAME
+re2c \- convert regular expressions to C/C++
+
+.SH SYNOPSIS
+\*(re [\fB-esb\fP] \fIname\fP
+
+.SH DESCRIPTION
+\*(re is a preprocessor that generates C-based recognizers from regular
+expressions.
+The input to \*(re consists of C/C++ source interleaved with
+comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
+scanner specifications.
+In the output these comments are replaced with code that, when
+executed, will find the next input token and then execute
+some user-supplied token-specific code.
+
+For example, given the following code
+
+.in +3
+.nf
+#define NULL            ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE         char
+#define YYCURSOR        p
+#define YYLIMIT         p
+#define YYMARKER        q
+#define YYFILL(n)
+/*!re2c
+        [0-9]+          {return YYCURSOR;}
+        [\\000-\\377]     {return NULL;}
+*/
+}
+.fi
+.in -3
+
+\*(re will generate
+
+.in +3
+.nf
+/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+#line 1 "simple.re"
+#define NULL            ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE         char
+#define YYCURSOR        p
+#define YYLIMIT         p
+#define YYMARKER        q
+#define YYFILL(n)
+{
+        YYCTYPE yych;
+        unsigned int yyaccept;
+        goto yy0;
+yy1:    ++YYCURSOR;
+yy0:
+        if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+        yych = *YYCURSOR;
+        if(yych <= '/') goto yy4;
+        if(yych >= ':') goto yy4;
+yy2:    yych = *++YYCURSOR;
+        goto yy7;
+yy3:
+#line 10
+        {return YYCURSOR;}
+yy4:    yych = *++YYCURSOR;
+yy5:
+#line 11
+        {return NULL;}
+yy6:    ++YYCURSOR;
+        if(YYLIMIT == YYCURSOR) YYFILL(1);
+        yych = *YYCURSOR;
+yy7:    if(yych <= '/') goto yy3;
+        if(yych <= '9') goto yy6;
+        goto yy3;
+}
+#line 12
+
+}
+.fi
+.in -3
+
+.SH OPTIONS
+\*(re provides the following options:
+.TP
+\fB-e\fP
+Cross-compile from an ASCII platform to an EBCDIC one. 
+.TP
+\fB-s\fP
+Generate nested \fCif\fPs for some \fCswitch\fPes.  Many compilers need this
+assist to generate better code.
+.TP
+\fB-b\fP
+Implies \fB-s\fP.  Use bit vectors as well in the attempt to coax better
+code out of the compiler.  Most useful for specifications with more than a
+few keywords (e.g. for most programming languages).
+
+.SH "INTERFACE CODE"
+Unlike other scanner generators, \*(re does not generate complete scanners:
+the user must supply some interface code.
+In particular, the user must define the following macros:
+.TP
+\fCYYCHAR\fP
+Type used to hold an input symbol.
+Usually \fCchar\fP or \fCunsigned char\fP.
+.TP
+\fCYYCURSOR\fP
+\*(lx of type \fC*YYCHAR\fP that points to the current input symbol.
+The generated code advances \fCYYCURSOR\fP as symbols are matched.
+On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
+current token.  On exit, \fCYYCURSOR\fP will point to the first character of
+the following token.
+.TP
+\fCYLIMIT\fP
+Expression of type \fC*YYCHAR\fP that marks the end of the buffer
+(\fCYLIMIT[-1]\fP is the last character in the buffer).
+The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP
+to determine when the buffer needs (re)filling.
+.TP
+\fCYYMARKER\fP
+\*(lx of type \fC*YYCHAR\fP.
+The generated code saves backtracking information in \fCYYMARKER\fP.
+.TP
+\fCYYFILL(\fP\fIn\fP\fC)\fP
+The generated code "calls" \fCYYFILL\fP when the buffer needs
+(re)filling:  at least \fIn\fP additional characters should
+be provided.  \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and
+\fCYYMARKER\fP as needed.  Note that for typical programming languages
+\fIn\fP will be the length of the longest keyword plus one.
+
+.SH "SCANNER SPECIFICATIONS"
+Each scanner specification consists of a set of \fIrules\fP and name
+definitions.
+Rules consist of a regular expression along with a block of C/C++ code that
+is to be executed when the associated regular expression is matched.
+Name definitions are of the form
+``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''.
+
+.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
+.TP
+\fC"foo"\fP
+the literal string \fCfoo\fP.
+ANSI-C escape sequences can be used.
+.TP
+\fC[xyz]\fP
+a "character class"; in this case,
+the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
+.TP
+\fC[abj-oZ]\fP
+a "character class" with a range in it;
+matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
+or a '\fCZ\fP'.
+.TP
+\fIr\fP\fC\e\fP\fIs\fP
+match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
+which can be expressed as character classes.
+.TP
+\fIr\fP\fC*\fP
+zero or more \fIr\fP's, where \fIr\fP is any regular expression
+.TP
+\fC\fIr\fP\fC+\fP
+one or more \fIr\fP's
+.TP
+\fC\fIr\fP\fC?\fP
+zero or one \fIr\fP's (that is, "an optional \fIr\fP")
+.TP
+name
+the expansion of the "name" definition (see above)
+.TP
+\fC(\fP\fIr\fP\fC)\fP
+an \fIr\fP; parentheses are used to override precedence
+(see below)
+.TP
+\fIrs\fP
+an \fIr\fP followed by an \fIs\fP ("concatenation")
+.TP
+\fIr\fP\fC|\fP\fIs\fP
+either an \fIr\fP or an \fIs\fP
+.TP
+\fIr\fP\fC/\fP\fIs\fP
+an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of
+the matched text. This type of \*(rx is called "trailing context".
+.LP
+The regular expressions listed above are grouped according to
+precedence, from highest precedence at the top to lowest at the bottom.
+Those grouped together have equal precedence.
+
+.SH "A LARGER EXAMPLE"
+.LP
+.in +3
+.nf
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define ADDEQ   257
+#define ANDAND  258
+#define ANDEQ   259
+#define ARRAY   260
+#define ASM     261
+#define AUTO    262
+#define BREAK   263
+#define CASE    264
+#define CHAR    265
+#define CONST   266
+#define CONTINUE        267
+#define DECR    268
+#define DEFAULT 269
+#define DEREF   270
+#define DIVEQ   271
+#define DO      272
+#define DOUBLE  273
+#define ELLIPSIS        274
+#define ELSE    275
+#define ENUM    276
+#define EQL     277
+#define EXTERN  278
+#define FCON    279
+#define FLOAT   280
+#define FOR     281
+#define FUNCTION        282
+#define GEQ     283
+#define GOTO    284
+#define ICON    285
+#define ID      286
+#define IF      287
+#define INCR    288
+#define INT     289
+#define LEQ     290
+#define LONG    291
+#define LSHIFT  292
+#define LSHIFTEQ        293
+#define MODEQ   294
+#define MULEQ   295
+#define NEQ     296
+#define OREQ    297
+#define OROR    298
+#define POINTER 299
+#define REGISTER        300
+#define RETURN  301
+#define RSHIFT  302
+#define RSHIFTEQ        303
+#define SCON    304
+#define SHORT   305
+#define SIGNED  306
+#define SIZEOF  307
+#define STATIC  308
+#define STRUCT  309
+#define SUBEQ   310
+#define SWITCH  311
+#define TYPEDEF 312
+#define UNION   313
+#define UNSIGNED        314
+#define VOID    315
+#define VOLATILE        316
+#define WHILE   317
+#define XOREQ   318
+#define EOI     319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE   8192
+
+#define YYCTYPE         uchar
+#define YYCURSOR        cursor
+#define YYLIMIT         s->lim
+#define YYMARKER        s->ptr
+#define YYFILL(n)       {cursor = fill(s, cursor);}
+
+#define RET(i)  {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+    int                 fd;
+    uchar               *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint                line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+    if(!s->eof){
+        uint cnt = s->tok - s->bot;
+        if(cnt){
+            memcpy(s->bot, s->tok, s->lim - s->tok);
+            s->tok = s->bot;
+            s->ptr -= cnt;
+            cursor -= cnt;
+            s->pos -= cnt;
+            s->lim -= cnt;
+        }
+        if((s->top - s->lim) < BSIZE){
+            uchar *buf = (uchar*)
+                malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+            memcpy(buf, s->tok, s->lim - s->tok);
+            s->tok = buf;
+            s->ptr = &buf[s->ptr - s->bot];
+            cursor = &buf[cursor - s->bot];
+            s->pos = &buf[s->pos - s->bot];
+            s->lim = &buf[s->lim - s->bot];
+            s->top = &s->lim[BSIZE];
+            free(s->bot);
+            s->bot = buf;
+        }
+        if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+            s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
+        }
+        s->lim += cnt;
+    }
+    return cursor;
+}
+
+int scan(Scanner *s){
+        uchar *cursor = s->cur;
+std:
+        s->tok = cursor;
+/*!re2c
+any     = [\\000-\\377];
+O       = [0-7];
+D       = [0-9];
+L       = [a-zA-Z_];
+H       = [a-fA-F0-9];
+E       = [Ee] [+-]? D+;
+FS      = [fFlL];
+IS      = [uUlL]*;
+ESC     = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+        "/*"                    { goto comment; }
+        
+        "auto"                  { RET(AUTO); }
+        "break"                 { RET(BREAK); }
+        "case"                  { RET(CASE); }
+        "char"                  { RET(CHAR); }
+        "const"                 { RET(CONST); }
+        "continue"              { RET(CONTINUE); }
+        "default"               { RET(DEFAULT); }
+        "do"                    { RET(DO); }
+        "double"                { RET(DOUBLE); }
+        "else"                  { RET(ELSE); }
+        "enum"                  { RET(ENUM); }
+        "extern"                { RET(EXTERN); }
+        "float"                 { RET(FLOAT); }
+        "for"                   { RET(FOR); }
+        "goto"                  { RET(GOTO); }
+        "if"                    { RET(IF); }
+        "int"                   { RET(INT); }
+        "long"                  { RET(LONG); }
+        "register"              { RET(REGISTER); }
+        "return"                { RET(RETURN); }
+        "short"                 { RET(SHORT); }
+        "signed"                { RET(SIGNED); }
+        "sizeof"                { RET(SIZEOF); }
+        "static"                { RET(STATIC); }
+        "struct"                { RET(STRUCT); }
+        "switch"                { RET(SWITCH); }
+        "typedef"               { RET(TYPEDEF); }
+        "union"                 { RET(UNION); }
+        "unsigned"              { RET(UNSIGNED); }
+        "void"                  { RET(VOID); }
+        "volatile"              { RET(VOLATILE); }
+        "while"                 { RET(WHILE); }
+        
+        L (L|D)*                { RET(ID); }
+        
+        ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+        (['] (ESC|any\\[\\n\\\\'])* ['])
+                                { RET(ICON); }
+        
+        (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+                                { RET(FCON); }
+        
+        (["] (ESC|any\\[\\n\\\\"])* ["])
+                                { RET(SCON); }
+        
+        "..."                   { RET(ELLIPSIS); }
+        ">>="                   { RET(RSHIFTEQ); }
+        "<<="                   { RET(LSHIFTEQ); }
+        "+="                    { RET(ADDEQ); }
+        "-="                    { RET(SUBEQ); }
+        "*="                    { RET(MULEQ); }
+        "/="                    { RET(DIVEQ); }
+        "%="                    { RET(MODEQ); }
+        "&="                    { RET(ANDEQ); }
+        "^="                    { RET(XOREQ); }
+        "|="                    { RET(OREQ); }
+        ">>"                    { RET(RSHIFT); }
+        "<<"                    { RET(LSHIFT); }
+        "++"                    { RET(INCR); }
+        "--"                    { RET(DECR); }
+        "->"                    { RET(DEREF); }
+        "&&"                    { RET(ANDAND); }
+        "||"                    { RET(OROR); }
+        "<="                    { RET(LEQ); }
+        ">="                    { RET(GEQ); }
+        "=="                    { RET(EQL); }
+        "!="                    { RET(NEQ); }
+        ";"                     { RET(';'); }
+        "{"                     { RET('{'); }
+        "}"                     { RET('}'); }
+        ","                     { RET(','); }
+        ":"                     { RET(':'); }
+        "="                     { RET('='); }
+        "("                     { RET('('); }
+        ")"                     { RET(')'); }
+        "["                     { RET('['); }
+        "]"                     { RET(']'); }
+        "."                     { RET('.'); }
+        "&"                     { RET('&'); }
+        "!"                     { RET('!'); }
+        "~"                     { RET('~'); }
+        "-"                     { RET('-'); }
+        "+"                     { RET('+'); }
+        "*"                     { RET('*'); }
+        "/"                     { RET('/'); }
+        "%"                     { RET('%'); }
+        "<"                     { RET('<'); }
+        ">"                     { RET('>'); }
+        "^"                     { RET('^'); }
+        "|"                     { RET('|'); }
+        "?"                     { RET('?'); }
+
+
+        [ \\t\\v\\f]+           { goto std; }
+
+        "\\n"
+            {
+                if(cursor == s->eof) RET(EOI);
+                s->pos = cursor; s->line++;
+                goto std;
+            }
+
+        any
+            {
+                printf("unexpected character: %c\\n", *s->tok);
+                goto std;
+            }
+*/
+
+comment:
+/*!re2c
+        "*/"                    { goto std; }
+        "\\n"
+            {
+                if(cursor == s->eof) RET(EOI);
+                s->tok = s->pos = cursor; s->line++;
+                goto comment;
+            }
+        any                     { goto comment; }
+*/
+}
+
+main(){
+    Scanner in;
+    int t;
+    memset((char*) &in, 0, sizeof(in));
+    in.fd = 0;
+    while((t = scan(&in)) != EOI){
+/*
+        printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
+        printf("%d\\n", t);
+*/
+    }
+    close(in.fd);
+}
+.fi
+.in -3
+
+.SH "SEE ALSO"
+.LP
+flex(1), lex(1).
+
+.SH FEATURES
+.LP
+\*(re does not provide a default action:
+the generated code assumes that the input
+will consist of a sequence of tokens.
+Typically this can be dealt with by adding a rule such as the one for
+unexpected characters in the example above.
+.LP
+The user must arrange for a sentinel token to appear at the end of input
+(and provide a rule for matching it):
+\*(re does not provide an \fC<<EOF>>\fP expression.
+If the source is from a null-byte terminated string, a
+rule matching a null character will suffice.  If the source is from a
+file then the approach taken in the example can be used: pad the input with
+a newline (or some other character that can't appear within another token);
+upon recognizing such a character check to see if it is the sentinel
+and act accordingly.
+.LP
+\*(re does not provide start conditions:  use a separate scanner
+specification for each start condition (as illustrated in the above example).
+.LP
+No [^x].  Use difference instead.
+.SH BUGS
+.LP
+Only fixed length trailing context can be handled.
+.LP
+The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not
+provided to the generated code (this value is needed for constructing
+the interface code).
+Note that this value is usually relatively small: for
+typical programming languages \fIn\fP will be the length of the longest
+keyword plus one.
+.LP
+Difference only works for character sets.
+.LP
+The \*(re internal algorithms need documentation.
+
+.SH AUTHOR
+.LP
+Please send bug reports, fixes and feedback to:
+.LP
+.nf
+Peter Bumbulis
+Computer Systems Group
+University of Waterloo
+Waterloo, Ontario
+N2L 3G1
+Internet:  peter@csg.uwaterloo.ca
+.fi
diff --git a/scanner.cc b/scanner.cc
new file mode 100644 (file)
index 0000000..19b4259
--- /dev/null
@@ -0,0 +1,470 @@
+/* Generated by re2c 0.5 on Sat May 15 11:35:52 1999 */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         lim
+#define        YYMARKER        ptr
+#define        YYFILL(n)       {cursor = fill(cursor);}
+
+#define        RETURN(i)       {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+       bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+       top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+    ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+    if(!eof){
+       uint cnt = tok - bot;
+       if(cnt){
+           memcpy(bot, tok, lim - tok);
+           tok = bot;
+           ptr -= cnt;
+           cursor -= cnt;
+           pos -= cnt;
+           lim -= cnt;
+       }
+       if((top - lim) < BSIZE){
+           uchar *buf = new uchar[(lim - bot) + BSIZE];
+           memcpy(buf, tok, lim - tok);
+           tok = buf;
+           ptr = &buf[ptr - bot];
+           cursor = &buf[cursor - bot];
+           pos = &buf[pos - bot];
+           lim = &buf[lim - bot];
+           top = &lim[BSIZE];
+           delete [] bot;
+           bot = buf;
+       }
+       if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+           eof = &lim[cnt]; *eof++ = '\n';
+       }
+       lim += cnt;
+    }
+    return cursor;
+}
+
+#line 68
+
+
+int Scanner::echo(ostream &out){
+    uchar *cursor = cur;
+    tok = cursor;
+echo:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy0;
+yy1:   ++YYCURSOR;
+yy0:
+       if((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy4;
+       if(yych != '/') goto yy6;
+yy2:   yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '*') goto yy7;
+yy3:
+#line 82
+       { goto echo; }
+yy4:   yych = *++YYCURSOR;
+yy5:
+#line 78
+       { if(cursor == eof) RETURN(0);
+                                 out.write(tok, cursor - tok);
+                                 tok = pos = cursor; cline++;
+                                 goto echo; }
+yy6:   yych = *++YYCURSOR;
+       goto yy3;
+yy7:   yych = *++YYCURSOR;
+       if(yych == '!') goto yy9;
+yy8:   YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy3;
+       }
+yy9:   yych = *++YYCURSOR;
+       if(yych != 'r') goto yy8;
+yy10:  yych = *++YYCURSOR;
+       if(yych != 'e') goto yy8;
+yy11:  yych = *++YYCURSOR;
+       if(yych != '2') goto yy8;
+yy12:  yych = *++YYCURSOR;
+       if(yych != 'c') goto yy8;
+yy13:  yych = *++YYCURSOR;
+yy14:
+#line 75
+       { out.write(tok, &cursor[-7] - tok);
+                                 tok = cursor;
+                                 RETURN(1); }
+}
+#line 83
+
+}
+
+
+int Scanner::scan(){
+    uchar *cursor = cur;
+    uint depth;
+
+scan:
+    tchar = cursor - pos;
+    tline = cline;
+    tok = cursor;
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy15;
+yy16:  ++YYCURSOR;
+yy15:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= ':'){
+               if(yych <= '"'){
+                       if(yych <= '\n'){
+                               if(yych <= '\b')        goto yy35;
+                               if(yych <= '\t')        goto yy31;
+                               goto yy33;
+                       } else {
+                               if(yych == ' ') goto yy31;
+                               if(yych <= '!') goto yy35;
+                               goto yy23;
+                       }
+               } else {
+                       if(yych <= '*'){
+                               if(yych <= '\'')        goto yy35;
+                               if(yych <= ')') goto yy27;
+                               goto yy21;
+                       } else {
+                               if(yych <= '+') goto yy28;
+                               if(yych == '/') goto yy19;
+                               goto yy35;
+                       }
+               }
+       } else {
+               if(yych <= 'Z'){
+                       if(yych <= '='){
+                               if(yych == '<') goto yy35;
+                               goto yy27;
+                       } else {
+                               if(yych == '?') goto yy28;
+                               if(yych <= '@') goto yy35;
+                               goto yy29;
+                       }
+               } else {
+                       if(yych <= '`'){
+                               if(yych <= '[') goto yy25;
+                               if(yych <= '\\')        goto yy27;
+                               goto yy35;
+                       } else {
+                               if(yych <= 'z') goto yy29;
+                               if(yych <= '{') goto yy17;
+                               if(yych <= '|') goto yy27;
+                               goto yy35;
+                       }
+               }
+       }
+yy17:  yych = *++YYCURSOR;
+yy18:
+#line 96
+       { depth = 1;
+                                 goto code;
+                               }
+yy19:  yych = *++YYCURSOR;
+       if(yych == '*') goto yy54;
+yy20:
+#line 115
+       { RETURN(*tok); }
+yy21:  yych = *++YYCURSOR;
+       if(yych == '/') goto yy52;
+yy22:
+#line 117
+       { yylval.op = *tok;
+                                 RETURN(CLOSE); }
+yy23:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych != '\n')        goto yy48;
+yy24:
+#line 108
+       { fatal("bad string"); }
+yy25:  yyaccept = 1;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych != '\n')        goto yy42;
+yy26:
+#line 113
+       { fatal("bad character constant"); }
+yy27:  yych = *++YYCURSOR;
+       goto yy20;
+yy28:  yych = *++YYCURSOR;
+       goto yy22;
+yy29:  yych = *++YYCURSOR;
+       goto yy40;
+yy30:
+#line 120
+       { cur = cursor;
+                                 yylval.symbol = Symbol::find(token());
+                                 return ID; }
+yy31:  yych = *++YYCURSOR;
+       goto yy38;
+yy32:
+#line 124
+       { goto scan; }
+yy33:  yych = *++YYCURSOR;
+yy34:
+#line 126
+       { if(cursor == eof) RETURN(0);
+                                 pos = cursor; cline++;
+                                 goto scan;
+                               }
+yy35:  yych = *++YYCURSOR;
+yy36:
+#line 131
+       { cerr << "unexpected character: " << *tok << endl;
+                                 goto scan;
+                               }
+yy37:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy38:  if(yych == '\t')        goto yy37;
+       if(yych == ' ') goto yy37;
+       goto yy32;
+yy39:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy40:  if(yych <= '@'){
+               if(yych <= '/') goto yy30;
+               if(yych <= '9') goto yy39;
+               goto yy30;
+       } else {
+               if(yych <= 'Z') goto yy39;
+               if(yych <= '`') goto yy30;
+               if(yych <= 'z') goto yy39;
+               goto yy30;
+       }
+yy41:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy42:  if(yych <= '['){
+               if(yych != '\n')        goto yy41;
+       } else {
+               if(yych <= '\\')        goto yy44;
+               if(yych <= ']') goto yy45;
+               goto yy41;
+       }
+yy43:  YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy24;
+       case 1: goto yy26;
+       }
+yy44:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy43;
+       goto yy41;
+yy45:  yych = *++YYCURSOR;
+yy46:
+#line 110
+       { cur = cursor;
+                                 yylval.regexp = ranToRE(token());
+                                 return RANGE; }
+yy47:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy48:  if(yych <= '!'){
+               if(yych == '\n')        goto yy43;
+               goto yy47;
+       } else {
+               if(yych <= '"') goto yy50;
+               if(yych != '\\')        goto yy47;
+       }
+yy49:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy43;
+       goto yy47;
+yy50:  yych = *++YYCURSOR;
+yy51:
+#line 105
+       { cur = cursor;
+                                 yylval.regexp = strToRE(token());
+                                 return STRING; }
+yy52:  yych = *++YYCURSOR;
+yy53:
+#line 102
+       { tok = cursor;
+                                 RETURN(0); }
+yy54:  yych = *++YYCURSOR;
+yy55:
+#line 99
+       { depth = 1;
+                                 goto comment; }
+}
+#line 134
+
+
+code:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy56;
+yy57:  ++YYCURSOR;
+yy56:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= '&'){
+               if(yych <= '\n'){
+                       if(yych <= '\t')        goto yy64;
+                       goto yy62;
+               } else {
+                       if(yych == '"') goto yy66;
+                       goto yy64;
+               }
+       } else {
+               if(yych <= '{'){
+                       if(yych <= '\'')        goto yy67;
+                       if(yych <= 'z') goto yy64;
+                       goto yy60;
+               } else {
+                       if(yych != '}') goto yy64;
+               }
+       }
+yy58:  yych = *++YYCURSOR;
+yy59:
+#line 138
+       { if(--depth == 0){
+                                       cur = cursor;
+                                       yylval.token = new Token(token(), tline);
+                                       return CODE;
+                                 }
+                                 goto code; }
+yy60:  yych = *++YYCURSOR;
+yy61:
+#line 144
+       { ++depth;
+                                 goto code; }
+yy62:  yych = *++YYCURSOR;
+yy63:
+#line 146
+       { if(cursor == eof) fatal("missing '}'");
+                                 pos = cursor; cline++;
+                                 goto code;
+                               }
+yy64:  yych = *++YYCURSOR;
+yy65:
+#line 150
+       { goto code; }
+yy66:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '\n')        goto yy65;
+       goto yy73;
+yy67:  yyaccept = 0;
+       yych = *(YYMARKER = ++YYCURSOR);
+       if(yych == '\n')        goto yy65;
+       goto yy69;
+yy68:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy69:  if(yych <= '&'){
+               if(yych != '\n')        goto yy68;
+       } else {
+               if(yych <= '\'')        goto yy64;
+               if(yych == '\\')        goto yy71;
+               goto yy68;
+       }
+yy70:  YYCURSOR = YYMARKER;
+       switch(yyaccept){
+       case 0: goto yy65;
+       }
+yy71:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy70;
+       goto yy68;
+yy72:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy73:  if(yych <= '!'){
+               if(yych == '\n')        goto yy70;
+               goto yy72;
+       } else {
+               if(yych <= '"') goto yy64;
+               if(yych != '\\')        goto yy72;
+       }
+yy74:  ++YYCURSOR;
+       if(YYLIMIT == YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       if(yych == '\n')        goto yy70;
+       goto yy72;
+}
+#line 151
+
+
+comment:
+{
+       YYCTYPE yych;
+       unsigned int yyaccept;
+       goto yy75;
+yy76:  ++YYCURSOR;
+yy75:
+       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       if(yych <= ')'){
+               if(yych == '\n')        goto yy80;
+               goto yy82;
+       } else {
+               if(yych <= '*') goto yy77;
+               if(yych == '/') goto yy79;
+               goto yy82;
+       }
+yy77:  yych = *++YYCURSOR;
+       if(yych == '/') goto yy85;
+yy78:
+#line 165
+       { goto comment; }
+yy79:  yych = *++YYCURSOR;
+       if(yych == '*') goto yy83;
+       goto yy78;
+yy80:  yych = *++YYCURSOR;
+yy81:
+#line 161
+       { if(cursor == eof) RETURN(0);
+                                 tok = pos = cursor; cline++;
+                                 goto comment;
+                               }
+yy82:  yych = *++YYCURSOR;
+       goto yy78;
+yy83:  yych = *++YYCURSOR;
+yy84:
+#line 159
+       { ++depth;
+                                 goto comment; }
+yy85:  yych = *++YYCURSOR;
+yy86:
+#line 155
+       { if(--depth == 0)
+                                       goto scan;
+                                   else
+                                       goto comment; }
+}
+#line 166
+
+}
+
+void Scanner::fatal(char *msg){
+    cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+       << msg << endl;
+    exit(1);
+}
diff --git a/scanner.h b/scanner.h
new file mode 100644 (file)
index 0000000..cf5bb1f
--- /dev/null
+++ b/scanner.h
@@ -0,0 +1,30 @@
+#ifndef _scanner_h
+#define        _scanner_h
+
+#include "token.h"
+
+class Scanner {
+  private:
+    int                        in;
+    uchar              *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    uint               tchar, tline, cline;
+  private:
+    uchar *fill(uchar*);
+  public:
+    Scanner(int);
+    int echo(ostream&);
+    int scan();
+    void fatal(char*);
+    SubStr token();
+    uint line();
+};
+
+inline SubStr Scanner::token(){
+    return SubStr(tok, cur - tok);
+}
+
+inline uint Scanner::line(){
+    return cline;
+}
+
+#endif
diff --git a/scanner.re b/scanner.re
new file mode 100644 (file)
index 0000000..f7b48cb
--- /dev/null
@@ -0,0 +1,173 @@
+#include <stdlib.h>
+#include <string.h>
+#include <iostream.h>
+#include <unistd.h>
+#include "scanner.h"
+#include "parser.h"
+#include "y.tab.h"
+
+extern YYSTYPE yylval;
+
+#define        BSIZE   8192
+
+#define        YYCTYPE         uchar
+#define        YYCURSOR        cursor
+#define        YYLIMIT         lim
+#define        YYMARKER        ptr
+#define        YYFILL(n)       {cursor = fill(cursor);}
+
+#define        RETURN(i)       {cur = cursor; return i;}
+
+
+Scanner::Scanner(int i) : in(i),
+       bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL),
+       top(NULL), eof(NULL), tchar(0), tline(0), cline(1) {
+    ;
+}
+
+uchar *Scanner::fill(uchar *cursor){
+    if(!eof){
+       uint cnt = tok - bot;
+       if(cnt){
+           memcpy(bot, tok, lim - tok);
+           tok = bot;
+           ptr -= cnt;
+           cursor -= cnt;
+           pos -= cnt;
+           lim -= cnt;
+       }
+       if((top - lim) < BSIZE){
+           uchar *buf = new uchar[(lim - bot) + BSIZE];
+           memcpy(buf, tok, lim - tok);
+           tok = buf;
+           ptr = &buf[ptr - bot];
+           cursor = &buf[cursor - bot];
+           pos = &buf[pos - bot];
+           lim = &buf[lim - bot];
+           top = &lim[BSIZE];
+           delete [] bot;
+           bot = buf;
+       }
+       if((cnt = read(in, (char*) lim, BSIZE)) != BSIZE){
+           eof = &lim[cnt]; *eof++ = '\n';
+       }
+       lim += cnt;
+    }
+    return cursor;
+}
+
+/*!re2c
+any            = [\000-\377];
+dot            = any \ [\n];
+esc            = dot \ [\\];
+cstring                = "["  ((esc \ [\]]) | "\\" dot)* "]" ;
+dstring                = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
+sstring                = "'"  ((esc \ ['] ) | "\\" dot)* "'" ;
+letter         = [a-zA-Z];
+digit          = [0-9];
+*/
+
+int Scanner::echo(ostream &out){
+    uchar *cursor = cur;
+    tok = cursor;
+echo:
+/*!re2c
+       "/*!re2c"               { out.write(tok, &cursor[-7] - tok);
+                                 tok = cursor;
+                                 RETURN(1); }
+       "\n"                    { if(cursor == eof) RETURN(0);
+                                 out.write(tok, cursor - tok);
+                                 tok = pos = cursor; cline++;
+                                 goto echo; }
+        any                    { goto echo; }
+*/
+}
+
+
+int Scanner::scan(){
+    uchar *cursor = cur;
+    uint depth;
+
+scan:
+    tchar = cursor - pos;
+    tline = cline;
+    tok = cursor;
+/*!re2c
+       "{"                     { depth = 1;
+                                 goto code;
+                               }
+       "/*"                    { depth = 1;
+                                 goto comment; }
+
+       "*/"                    { tok = cursor;
+                                 RETURN(0); }
+
+       dstring                 { cur = cursor;
+                                 yylval.regexp = strToRE(token());
+                                 return STRING; }
+       "\""                    { fatal("bad string"); }
+
+       cstring                 { cur = cursor;
+                                 yylval.regexp = ranToRE(token());
+                                 return RANGE; }
+       "["                     { fatal("bad character constant"); }
+
+       [()|=;/\\]              { RETURN(*tok); }
+
+       [*+?]                   { yylval.op = *tok;
+                                 RETURN(CLOSE); }
+
+       letter (letter|digit)*  { cur = cursor;
+                                 yylval.symbol = Symbol::find(token());
+                                 return ID; }
+
+       [ \t]+                  { goto scan; }
+
+       "\n"                    { if(cursor == eof) RETURN(0);
+                                 pos = cursor; cline++;
+                                 goto scan;
+                               }
+
+       any                     { cerr << "unexpected character: " << *tok << endl;
+                                 goto scan;
+                               }
+*/
+
+code:
+/*!re2c
+       "}"                     { if(--depth == 0){
+                                       cur = cursor;
+                                       yylval.token = new Token(token(), tline);
+                                       return CODE;
+                                 }
+                                 goto code; }
+       "{"                     { ++depth;
+                                 goto code; }
+       "\n"                    { if(cursor == eof) fatal("missing '}'");
+                                 pos = cursor; cline++;
+                                 goto code;
+                               }
+       dstring | sstring | any { goto code; }
+*/
+
+comment:
+/*!re2c
+       "*/"                    { if(--depth == 0)
+                                       goto scan;
+                                   else
+                                       goto comment; }
+       "/*"                    { ++depth;
+                                 goto comment; }
+       "\n"                    { if(cursor == eof) RETURN(0);
+                                 tok = pos = cursor; cline++;
+                                 goto comment;
+                               }
+        any                    { goto comment; }
+*/
+}
+
+void Scanner::fatal(char *msg){
+    cerr << "line " << tline << ", column " << (tchar + 1) << ": "
+       << msg << endl;
+    exit(1);
+}
diff --git a/substr.cc b/substr.cc
new file mode 100644 (file)
index 0000000..3275660
--- /dev/null
+++ b/substr.cc
@@ -0,0 +1,30 @@
+#include <string.h>
+#include "substr.h"
+
+void SubStr::out(ostream& o) const {
+    o.write(str, len);
+}
+
+bool operator==(const SubStr &s1, const SubStr &s2){
+    return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0);
+}
+
+Str::Str(const SubStr& s) : SubStr(new char[s.len], s.len) {
+    memcpy(str, s.str, s.len);
+}
+
+Str::Str(Str& s) : SubStr(s.str, s.len) {
+    s.str = NULL;
+    s.len = 0;
+}
+
+Str::Str() : SubStr((char*) NULL, 0) {
+    ;
+}
+
+
+Str::~Str() {
+    delete str;
+    str = (char*)-1;
+    len = (uint)-1;
+}
diff --git a/substr.h b/substr.h
new file mode 100644 (file)
index 0000000..fb5e2cc
--- /dev/null
+++ b/substr.h
@@ -0,0 +1,45 @@
+#ifndef _substr_h
+#define _substr_h
+
+#include <iostream.h>
+#include "basics.h"
+
+class SubStr {
+public:
+    char               *str;
+    uint               len;
+public:
+    friend bool operator==(const SubStr &, const SubStr &);
+    SubStr(uchar*, uint);
+    SubStr(char*, uint);
+    SubStr(const SubStr&);
+    void out(ostream&) const;
+};
+
+class Str: public SubStr {
+public:
+    Str(const SubStr&);
+    Str(Str&);
+    Str();
+    ~Str();
+};
+
+inline ostream& operator<<(ostream& o, const SubStr &s){
+    s.out(o);
+    return o;
+}
+
+inline ostream& operator<<(ostream& o, const SubStr* s){
+    return o << *s;
+}
+
+inline SubStr::SubStr(uchar *s, uint l)
+    : str((char*) s), len(l) { }
+
+inline SubStr::SubStr(char *s, uint l)
+    : str(s), len(l) { }
+
+inline SubStr::SubStr(const SubStr &s)
+    : str(s.str), len(s.len) { }
+
+#endif
diff --git a/token.h b/token.h
new file mode 100644 (file)
index 0000000..de51eb4
--- /dev/null
+++ b/token.h
@@ -0,0 +1,18 @@
+#ifndef _token_h
+#define        _token_h
+
+#include "substr.h"
+
+class Token {
+  public:
+    Str                        text;
+    uint               line;
+  public:
+    Token(SubStr, uint);
+};
+
+inline Token::Token(SubStr t, uint l) : text(t), line(l) {
+    ;
+}
+
+#endif
diff --git a/translate.cc b/translate.cc
new file mode 100644 (file)
index 0000000..2eeaabf
--- /dev/null
@@ -0,0 +1,61 @@
+#include "globals.h"
+
+uchar asc2asc[256] = {
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
+
+uchar *xlat = asc2asc;
+uchar *talx = asc2asc;
+
+uchar asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f,
+0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f,
+0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
+0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xba,0xe0,0xbb,0xb0,0x6d,
+0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96,
+0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07,
+0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
+0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff,
+0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbd,0xb4,0x9a,0x8a,0x5f,0xca,0xaf,0xbc,
+0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab,
+0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77,
+0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xad,0x8e,0x59,
+0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57,
+0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0xae,0xdf
+};
+
+uchar ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
+0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a,
+0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c,
+0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0xac,
+0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f,
+0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22,
+0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xde,0xb1,
+0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4,
+0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0xdd,0xfe,0xae,
+0x5e,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0x5b,0x5d,0xaf,0xa8,0xb4,0xd7,
+0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5,
+0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff,
+0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f
+};
diff --git a/y.tab.h b/y.tab.h
new file mode 100644 (file)
index 0000000..d7b3702
--- /dev/null
+++ b/y.tab.h
@@ -0,0 +1,12 @@
+#define CLOSE 257
+#define ID 258
+#define CODE 259
+#define RANGE 260
+#define STRING 261
+typedef union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+} YYSTYPE;
+extern YYSTYPE yylval;