-# $Log$
-# Revision 1.2 2004/01/12 04:36:00 nuffer
-# Integrated in bug fixes from yasm. 1. Fix detection of EOF in the re2c scanner. 2. Correctly output #line directives so that debugging the generated output is possible.
-#
-# Revision 1.1.1.1 2003/12/13 04:58:19 nuffer
-# Initial import
-#
-#Revision 1.1 1994/04/08 16:30:37 peter
-#Initial revision
-#
+# $Id$
BIN = /usr/local/bin
MAN = /usr/local/man
+RE2C_VERSION = 0.9.2
%.o : %.cc ; $(CC) -o $@ $(CFLAGS) -c $<
%.cc : %.y ; $(YACC) $(YFLAGS) $<; mv $(YTAB).c $@
-%.cc : %.l ; $(LEX) $(LFLAGS) $<; mv $(LEXYY).c $@
%.cc: %.re
- ./re2c -s $< >$@
+ -@if test -x re2c; then \
+ echo "re2c -s $< >$@"; \
+ re2c -s $< >$@; \
+ else \
+ echo "cp -f bootstrap/$@ $@"; \
+ cp -f bootstrap/$@ $@; \
+ fi
SOURCES = code.cc dfa.cc main.cc parser.y actions.cc scanner.re substr.cc\
translate.cc
OBJS = code.o dfa.o main.o parser.o actions.o scanner.o substr.o\
translate.o
-CC = g++
-CFLAGS = -O2 -Wall -I. -Wno-unused -Wno-parentheses
+CC = g++
+CFLAGS = -O2 -Wall -I. -Wno-unused -Wno-parentheses -Wno-deprecated
+YACC = bison -y
YFLAGS = -d
LDFLAGS =
default: re2c
-clean:
- rm -f *.o *.s y.tab.c y.tab.h scanner.cc parser.cc .version version.h re2c
+dist-clean:
+ rm -f *.o *.s y.tab.c y.tab.h parser.cc .version version.h
+
+clean: dist-clean
+ rm -f re2c scanner.cc README re2c.1 re2c.ps re2c*.spec makerpm
parser.cc: parser.y
- bison -d parser.y
- mv -f parser.tab.c parser.cc
+ $(YACC) $(YFLAGS) parser.y
+ mv -f y.tab.c parser.cc
-re2c: $(OBJS)
+re2c: README re2c.1 $(OBJS)
$(CC) -o $@ $(OBJS) $(LDFLAGS) -lstdc++
-.version: README
- egrep "^Version" README | sed 's/Version //' > .version
+re2c.ps:
+ gunzip -c doc/loplas.ps.gz > re2c.ps
+
+.version:
+ echo $(RE2C_VERSION) > .version
-version.h: .version
- echo "#define RE2C_VERSION" `cat .version` > version.h
+version.h:
+ echo "#define RE2C_VERSION \"$(RE2C_VERSION)\"" > version.h
-install: re2c
+README:
+ cat README.in | sed 's/RE2C_VERSION/$(RE2C_VERSION)/g' > README
+
+makerpm:
+ cat makerpm.in | sed 's/RE2C_VERSION/$(RE2C_VERSION)/g' > makerpm
+ chmod +x makerpm
+
+re2c.1:
+ cat re2c.1.in | sed 's/RE2C_VERSION/$(RE2C_VERSION)/g' > re2c.1
+
+install: re2c re2c.1
install -d $(BIN)
install -s re2c $(BIN)
install -d $(MAN)/man1
install -m 0644 re2c.1 $(MAN)/man1
-dist: re2c scanner.cc .version
- mkdir re2c-`cat .version`
- cp -P `p4 files ... | sed s/\\\\/\\\\/depot\\\\/home\\\\/re2c\\\\/// | sed '/- delete/d' | sed s/#.*$$//` re2c-`cat .version`/
- tar zcf re2c-`cat .version`.tar.gz re2c-`cat .version`/
- rm -rf re2c-`cat .version`
+uninstall:
+ rm -f $(BIN)/re2c
+ rm -f $(MAN)/man1/re2c.1*
+
+dist: re2c scanner.cc
+ mkdir re2c-$(RE2C_VERSION)
+ cp -P `p4 files ... | sed s/\\\\/\\\\/depot\\\\/home\\\\/re2c\\\\/// | sed '/- delete/d' | sed s/#.*$$//` re2c-$(RE2C_VERSION)/
+ tar zcf re2c-$(RE2C_VERSION).tar.gz re2c-$(RE2C_VERSION)/
+ rm -rf re2c-$(RE2C_VERSION)
#
# generated with "gcc -I. -MM -x c++ *.cc *.y *.re"
scanner.o : scanner.re scanner.h token.h substr.h basics.h \
parser.h re.h ins.h ./parser.o
parser.o : parser.y globals.h basics.h parser.h scanner.h token.h \
- substr.h re.h ins.h
+ substr.h re.h ins.h version.h
--- /dev/null
+re2c Version RE2C_VERSION
+------------------
+
+Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
+Currently maintained by Brian Young (bayoung@acm.org)
+
+The re2c distribution can be found at:
+
+ http://sourceforge.net/projects/re2c/
+
+This distribution is a cleaned up version of the 0.5 release
+maintained by me (Brian Young). Several bugs were fixed as well
+as code cleanup for warning free compilation. It has been developed
+and tested with egcs 1.0.2 and gcc 2.7.2.3, 2.96 and 3.3.1 on Linux
+x86. You can compile your own version with other gcc version if you
+have yacc or any working bison version (tested up to bison 1.875).
+
+You can install this software by simple typing the following commands:
+ make
+ make install
+
+Or you can create a rpm package and install it by the following commands:
+ make makerpm
+ ./makerpm <release>
+ rpm -Uhv <packagedir>/re2c-RE2C_VERSION-<release>.rpm
+
+Here <realease> should be a number like 1. And <packagedir> must equal
+the directory where the makerpm step has written the generated rpm to.
+
+Peter Bumbulis' original release can be found at:
+
+ ftp://csg.uwaterloo.ca/pub/peter/re2c.0.5.tar.gz
+
+re2c is a great tool for writing fast and flexible lexers. It has
+served many people well for many years and it deserves to be
+maintained more actively. re2c is on the order of 2-3 times faster
+than a flex based scanner, and its input model is much more
+flexible.
+
+Patches and requests for features will be entertained. Areas of
+particular interest to me are porting (a Solaris and an NT
+version will be forthcoming) and wide character support. Note
+that the code is already quite portable and should be buildable
+on any platform with minor makefile changes.
+
+Peter's original version 0.5 ANNOUNCE and README follows.
+
+Brian
+
+--
+
+re2c is a tool for generating C-based recognizers from regular
+expressions. re2c-based scanners are efficient: for programming
+languages, given similar specifications, an re2c-based scanner is
+typically almost twice as fast as a flex-based scanner with little or no
+increase in size (possibly a decrease on cisc architectures). Indeed,
+re2c-based scanners are quite competitive with hand-crafted ones.
+
+Unlike flex, re2c does not generate complete scanners: the user must
+supply some interface code. While this code is not bulky (about 50-100
+lines for a flex-like scanner; see the man page and examples in the
+distribution) careful coding is required for efficiency (and
+correctness). One advantage of this arrangement is that the generated
+code is not tied to any particular input model. For example, re2c
+generated code can be used to scan data from a null-byte terminated
+buffer as illustrated below.
+
+Given the following source
+
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ /*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+ */
+ }
+
+re2c will generate
+
+ /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+ #line 1 "simple.re"
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ {
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ yy1: ++YYCURSOR;
+ yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+ yy2: yych = *++YYCURSOR;
+ goto yy7;
+ yy3:
+ #line 10
+ {return YYCURSOR;}
+ yy4: yych = *++YYCURSOR;
+ yy5:
+ #line 11
+ {return NULL;}
+ yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+ }
+ #line 12
+
+ }
+
+Note that most compilers will perform dead-code elimination to remove
+all YYCURSOR, YYLIMIT comparisions.
+
+re2c was developed for a particular project (constructing a fast REXX
+scanner of all things!) and so while it has some rough edges, it should
+be quite usable. More information about re2c can be found in the
+(admittedly skimpy) man page; the algorithms and heuristics used are
+described in an upcoming LOPLAS article (included in the distribution).
+Probably the best way to find out more about re2c is to try the supplied
+examples. re2c is written in C++, and is currently being developed
+under Linux using gcc 2.5.8.
+
+Peter
+
+--
+
+re2c is distributed with no warranty whatever. The code is certain to
+contain errors. Neither the author nor any contributor takes
+responsibility for any consequences of its use.
+
+re2c is in the public domain. The data structures and algorithms used
+in re2c are all either taken from documents available to the general
+public or are inventions of the author. Programs generated by re2c may
+be distributed freely. re2c itself may be distributed freely, in source
+or binary, unchanged or modified. Distributors may charge whatever fees
+they can obtain for re2c.
+
+If you do make use of re2c, or incorporate it into a larger project an
+acknowledgement somewhere (documentation, research report, etc.) would
+be appreciated.
+
+Please send bug reports and feedback (including suggestions for
+improving the distribution) to
+
+ peter@csg.uwaterloo.ca
+
+Include a small example and the banner from parser.y with bug reports.
+
+/* $Id$ */
#include <time.h>
#include <string.h>
#include <iostream.h>
+/* $Id$ */
#ifndef _basics_h
#define _basics_h
+/* $Id$ */
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+/* $Id$ */
#ifndef _dfa_h
#define _dfa_h
+/* $Id$ */
#ifndef _globals_h
#define _globals_h
+/* $Id$ */
#ifndef _ins_h
#define _ins_h
+/* $Id$ */
#include <fstream>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
+#include <string.h>
#include "globals.h"
#include "parser.h"
#include "dfa.h"
+#include "version.h"
char *fileName;
bool sFlag = false;
using namespace std;
-int main(unsigned argc, char *argv[]){
+int main(unsigned argc, char *argv[])
+{
fileName = NULL;
- if(argc == 1)
- goto usage;
- while(--argc > 1){
- char *p = *++argv;
- while(*++p != '\0'){
- switch(*p){
- case 'e':
- xlat = asc2ebc;
- talx = ebc2asc;
- break;
- case 's':
- sFlag = true;
- break;
- case 'b':
- sFlag = true;
- bFlag = true;
- break;
- default:
+ if (argc == 1) {
goto usage;
- }
}
+ while(--argc > 1) {
+ char *p = *++argv;
+ if (*p != '-' || *(p+1) == '\0') {
+ goto usage;
+ }
+ while (*++p != '\0') {
+ switch(*p){
+ case 'e':
+ xlat = asc2ebc;
+ talx = ebc2asc;
+ break;
+ case 's':
+ sFlag = true;
+ break;
+ case 'b':
+ sFlag = true;
+ bFlag = true;
+ break;
+ case '-':
+ if (!strcmp(p, "-version")) {
+ goto version;
+ }
+ goto usage;
+ default:
+ goto usage;
+ }
+ }
}
fileName = *++argv;
+ if (!strcmp(fileName, "--version")) {
+ goto version;
+ }
int fd;
- if(fileName[0] == '-' && fileName[1] == '\0'){
- fileName = "<stdin>";
- fd = 0;
+ if (fileName[0] == '-' && fileName[1] == '\0') {
+ fileName = "<stdin>";
+ fd = 0;
} else {
- if((fd = open(fileName, O_RDONLY)) < 0){
- cerr << "can't open " << fileName << "\n";
- return 1;
- }
+ if ((fd = open(fileName, O_RDONLY)) < 0) {
+ cerr << "can't open " << fileName << "\n";
+ return 1;
+ }
}
parse(fd, cout);
return 0;
+
usage:
cerr << "usage: re2c [-esb] name\n";
return 2;
+
+version:
+ cerr << "re2c " << RE2C_VERSION << "\n";
+ return 2;
}
--- /dev/null
+#! /bin/sh
+
+# $Id$
+# M.Boerger <re2c@somabo.de>
+
+PREFIX="re2c"
+#VERSION=RE2C_VERSION
+TARDIR="`basename \`pwd\``"
+RELEASE=${1:-1}
+VERSION=${2:-`echo $TARDIR | sed "s/$PREFIX-//g"`}
+
+echo "Usage:"
+echo "$0 <release>"
+echo
+echo "e.g.:"
+echo "$0"
+echo -n "Building RPM version $VERSION, release: $RELEASE "
+sleep 1 ; echo -n . ; sleep 1 ; echo -n . ; sleep 1 ; echo -n .
+echo
+
+TAR=re2c-$VERSION.tar.gz
+SPEC=re2c-$VERSION.spec
+
+# write out the .spec file
+sed -e "s/RPMVERSION/$VERSION/g" \
+ -e "s/RPMRELEASE/$RELEASE/g" \
+ -e "s/RPMTARDIR/$TARDIR/g" \
+ > $SPEC <<'EOF'
+Summary: re2c - A tool for generating C-based recognizers from regular expressions
+Name: re2c
+Version: RPMVERSION
+Release: RPMRELEASE
+Group: Development
+License: public domain
+URL: http://sourceforge.net/projects/re2c/
+Source: %{name}-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-root
+
+%description
+re2c is a great tool for writing fast and flexible lexers. It has
+served many people well for many years and it deserves to be
+maintained more actively. re2c is on the order of 2-3 times faster
+than a flex based scanner, and its input model is much more
+flexible.
+
+%prep
+%setup -q -n RPMTARDIR
+
+%build
+make clean
+make re2c
+#regenerate file scanner.cc
+rm -f scanner.cc
+make scanner.cc
+#regenerate re2c itself
+rm -f re2c
+make re2c
+make dist-clean
+
+%install
+rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT%{_bindir}
+install -m 0755 re2c $RPM_BUILD_ROOT%{_bindir}
+
+mkdir -p $RPM_BUILD_ROOT%{_mandir}/man1
+install -m 0755 re2c.1 $RPM_BUILD_ROOT%{_mandir}/man1
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%changelog
+* Sun Jan 04 2003 Marcus Boerger <re2c@somabo.de>
+- Initial version.
+
+%files
+%defattr(-,root,root)
+%{_bindir}/re2c
+%{_mandir}/man1/re2c.1*
+
+%doc README examples doc/*
+EOF
+
+RPMBASE=/usr/src/redhat
+for i in /usr/src/redhat /usr/src/packages /usr/src/RPM; do
+ if test -d $i; then
+ RPMBASE=$i
+ break
+ fi
+done
+
+RPMDIR=${RPMBASE}/RPMS
+SPECDIR=${RPMBASE}/SPECS
+SRCDIR=${RPMBASE}/SOURCES
+
+(
+make clean
+cd ..
+tar czvf ${SRCDIR}/${TAR} $TARDIR )
+
+echo "CP: (`pwd`) cp -a $SPEC $SPECDIR/${SPEC}"
+cp -a $SPEC $SPECDIR/${SPEC}
+#cp -a *.patch $SRCDIR
+cd $SPECDIR
+echo "RPM: rpm -ba -vv ${SPEC}"
+rpm -ba ${SPEC}
+/* $Id$ */
#ifndef _parser_h
#define _parser_h
%{
+/* $Id$ */
+
#include <time.h>
#include <iostream.h>
#include <string.h>
static RegExp *spec;
static Scanner *in;
+/* Bison version 1.875 emits a definition that is not working
+ * with several g++ version. Hence we disable it here.
+ */
+#if defined(__GNUC__)
+#define __attribute__(x)
+#endif
+
%}
%start spec
+/* $Id$ */
#ifndef _re_h
#define _re_h
--- /dev/null
+./"
+./" $Id$
+./"
+.TH RE2C 1 "8 April 1994" "Version RE2C_VERSION"
+.ds re \fBre2c\fP
+.ds le \fBlex\fP
+.ds rx regular expression
+.ds lx \fIl\fP-expression
+\"$Log$
+\"Revision 1.1 2004/01/31 15:44:39 nuffer
+\"Applied patch from Marcus Boerger
+\"
+\"Revision 1.2 1994/04/16 15:50:32 peter
+\"Fix bug in simple example.
+\"
+\"Revision 1.1 1994/04/08 15:39:09 peter
+\"Initial revision
+\"
+.SH NAME
+re2c \- convert regular expressions to C/C++
+
+.SH SYNOPSIS
+\*(re [\fB-esb\fP] \fIname\fP
+
+.SH DESCRIPTION
+\*(re is a preprocessor that generates C-based recognizers from regular
+expressions.
+The input to \*(re consists of C/C++ source interleaved with
+comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
+scanner specifications.
+In the output these comments are replaced with code that, when
+executed, will find the next input token and then execute
+some user-supplied token-specific code.
+
+For example, given the following code
+
+.in +3
+.nf
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\\000-\\377] {return NULL;}
+*/
+}
+.fi
+.in -3
+
+\*(re will generate
+
+.in +3
+.nf
+/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+#line 1 "simple.re"
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+yy1: ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+yy2: yych = *++YYCURSOR;
+ goto yy7;
+yy3:
+#line 10
+ {return YYCURSOR;}
+yy4: yych = *++YYCURSOR;
+yy5:
+#line 11
+ {return NULL;}
+yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+}
+#line 12
+
+}
+.fi
+.in -3
+
+.SH OPTIONS
+\*(re provides the following options:
+.TP
+\fB-e\fP
+Cross-compile from an ASCII platform to an EBCDIC one.
+.TP
+\fB-s\fP
+Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
+assist to generate better code.
+.TP
+\fB-b\fP
+Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
+code out of the compiler. Most useful for specifications with more than a
+few keywords (e.g. for most programming languages).
+
+.SH "INTERFACE CODE"
+Unlike other scanner generators, \*(re does not generate complete scanners:
+the user must supply some interface code.
+In particular, the user must define the following macros:
+.TP
+\fCYYCHAR\fP
+Type used to hold an input symbol.
+Usually \fCchar\fP or \fCunsigned char\fP.
+.TP
+\fCYYCURSOR\fP
+\*(lx of type \fC*YYCHAR\fP that points to the current input symbol.
+The generated code advances \fCYYCURSOR\fP as symbols are matched.
+On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
+current token. On exit, \fCYYCURSOR\fP will point to the first character of
+the following token.
+.TP
+\fCYLIMIT\fP
+Expression of type \fC*YYCHAR\fP that marks the end of the buffer
+(\fCYLIMIT[-1]\fP is the last character in the buffer).
+The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP
+to determine when the buffer needs (re)filling.
+.TP
+\fCYYMARKER\fP
+\*(lx of type \fC*YYCHAR\fP.
+The generated code saves backtracking information in \fCYYMARKER\fP.
+.TP
+\fCYYFILL(\fP\fIn\fP\fC)\fP
+The generated code "calls" \fCYYFILL\fP when the buffer needs
+(re)filling: at least \fIn\fP additional characters should
+be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and
+\fCYYMARKER\fP as needed. Note that for typical programming languages
+\fIn\fP will be the length of the longest keyword plus one.
+
+.SH "SCANNER SPECIFICATIONS"
+Each scanner specification consists of a set of \fIrules\fP and name
+definitions.
+Rules consist of a regular expression along with a block of C/C++ code that
+is to be executed when the associated regular expression is matched.
+Name definitions are of the form
+``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''.
+
+.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
+.TP
+\fC"foo"\fP
+the literal string \fCfoo\fP.
+ANSI-C escape sequences can be used.
+.TP
+\fC[xyz]\fP
+a "character class"; in this case,
+the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
+.TP
+\fC[abj-oZ]\fP
+a "character class" with a range in it;
+matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
+or a '\fCZ\fP'.
+.TP
+\fIr\fP\fC\e\fP\fIs\fP
+match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
+which can be expressed as character classes.
+.TP
+\fIr\fP\fC*\fP
+zero or more \fIr\fP's, where \fIr\fP is any regular expression
+.TP
+\fC\fIr\fP\fC+\fP
+one or more \fIr\fP's
+.TP
+\fC\fIr\fP\fC?\fP
+zero or one \fIr\fP's (that is, "an optional \fIr\fP")
+.TP
+name
+the expansion of the "name" definition (see above)
+.TP
+\fC(\fP\fIr\fP\fC)\fP
+an \fIr\fP; parentheses are used to override precedence
+(see below)
+.TP
+\fIrs\fP
+an \fIr\fP followed by an \fIs\fP ("concatenation")
+.TP
+\fIr\fP\fC|\fP\fIs\fP
+either an \fIr\fP or an \fIs\fP
+.TP
+\fIr\fP\fC/\fP\fIs\fP
+an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of
+the matched text. This type of \*(rx is called "trailing context".
+.LP
+The regular expressions listed above are grouped according to
+precedence, from highest precedence at the top to lowest at the bottom.
+Those grouped together have equal precedence.
+
+.SH "A LARGER EXAMPLE"
+.LP
+.in +3
+.nf
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*)
+ malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\\000-\\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\\[\\n\\\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\\[\\n\\\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \\t\\v\\f]+ { goto std; }
+
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
+ printf("%d\\n", t);
+*/
+ }
+ close(in.fd);
+}
+.fi
+.in -3
+
+.SH "SEE ALSO"
+.LP
+flex(1), lex(1).
+
+.SH FEATURES
+.LP
+\*(re does not provide a default action:
+the generated code assumes that the input
+will consist of a sequence of tokens.
+Typically this can be dealt with by adding a rule such as the one for
+unexpected characters in the example above.
+.LP
+The user must arrange for a sentinel token to appear at the end of input
+(and provide a rule for matching it):
+\*(re does not provide an \fC<<EOF>>\fP expression.
+If the source is from a null-byte terminated string, a
+rule matching a null character will suffice. If the source is from a
+file then the approach taken in the example can be used: pad the input with
+a newline (or some other character that can't appear within another token);
+upon recognizing such a character check to see if it is the sentinel
+and act accordingly.
+.LP
+\*(re does not provide start conditions: use a separate scanner
+specification for each start condition (as illustrated in the above example).
+.LP
+No [^x]. Use difference instead.
+.SH BUGS
+.LP
+Only fixed length trailing context can be handled.
+.LP
+The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not
+provided to the generated code (this value is needed for constructing
+the interface code).
+Note that this value is usually relatively small: for
+typical programming languages \fIn\fP will be the length of the longest
+keyword plus one.
+.LP
+Difference only works for character sets.
+.LP
+The \*(re internal algorithms need documentation.
+
+.SH AUTHOR
+.LP
+Please send bug reports, fixes and feedback to:
+.LP
+.nf
+Peter Bumbulis
+Computer Systems Group
+University of Waterloo
+Waterloo, Ontario
+N2L 3G1
+Internet: peter@csg.uwaterloo.ca
+.fi
+/* $Id$ */
#ifndef _scanner_h
#define _scanner_h
class Scanner {
private:
int in;
- char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint tchar, tline, cline;
private:
- char *fill(char*);
+ uchar *fill(uchar*);
public:
Scanner(int);
int echo(ostream&);
-#include <stdlib.h> #include <string.h>
+/* $Id$ */
+#include <stdlib.h>
+#include <string.h>
#include <iostream.h>
#include <unistd.h>
#include "scanner.h"
#define BSIZE 8192
-#define YYCTYPE char
+#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT lim
#define YYMARKER ptr
;
}
-char *Scanner::fill(char *cursor){
+uchar *Scanner::fill(uchar *cursor){
if(!eof){
uint cnt = tok - bot;
if(cnt){
lim -= cnt;
}
if((top - lim) < BSIZE){
- char *buf = new char[(lim - bot) + BSIZE];
+ uchar *buf = new uchar[(lim - bot) + BSIZE];
memcpy(buf, tok, lim - tok);
tok = buf;
ptr = &buf[ptr - bot];
*/
int Scanner::echo(ostream &out){
- char *cursor = cur;
+ uchar *cursor = cur;
// Catch EOF
if (eof && cursor == eof)
tok = cursor;
echo:
/*!re2c
- "/*!re2c" { out.write((char *)tok, &cursor[-7] - tok);
+ "/*!re2c" { out.write((const char*)(tok), &cursor[-7] - tok);
tok = cursor;
RETURN(1); }
"\n" { if(cursor == eof) RETURN(0);
- out.write((char *)tok, cursor - tok);
+ out.write((const char*)(tok), cursor - tok);
tok = pos = cursor; cline++;
goto echo; }
any { goto echo; }
int Scanner::scan(){
- char *cursor = cur;
+ uchar *cursor = cur;
uint depth;
scan:
+/* $Id$ */
#include <string.h>
#include "substr.h"
#include "globals.h"
void SubStr::out(ostream& o) const {
o.write(str, len);
- for (int i = 0; i < len; ++i)
+ for (size_t i = 0; i < (size_t)len; ++i)
{
if (str[i] == '\n')
++oline;
+/* $Id$ */
#ifndef _substr_h
#define _substr_h
+/* $Id$ */
#ifndef _token_h
#define _token_h
+/* $Id$ */
#include "globals.h"
uchar asc2asc[256] = {