]> granicus.if.org Git - yasm/commitdiff
Port re2c parser from YACC to recursive descent.
authorPeter Johnson <peter@tortall.net>
Sun, 24 Dec 2006 00:13:19 +0000 (00:13 -0000)
committerPeter Johnson <peter@tortall.net>
Sun, 24 Dec 2006 00:13:19 +0000 (00:13 -0000)
svn path=/trunk/yasm/; revision=1718

Mkfiles/Makefile.dj
Mkfiles/Makefile.flat
Mkfiles/vc/re2c/re2c.vcproj
Mkfiles/vc8/re2c/re2c.vcproj
tools/re2c/Makefile.inc
tools/re2c/parser.c [new file with mode: 0644]
tools/re2c/parser.h [new file with mode: 0644]
tools/re2c/re2c-parser.y [deleted file]
tools/re2c/scanner.c
tools/re2c/scanner.re

index cb37cac3d98f11f69dbb69e7ecc1ac9176b22dcf..dd520685c4ce6b27529d03e30da3cf302cab3498 100644 (file)
@@ -185,7 +185,7 @@ RE2C_SRCS= \
        tools/re2c/main.c \
        tools/re2c/code.c \
        tools/re2c/dfa.c \
-       re2c-parser.c \
+       tools/re2c/parser.c \
        tools/re2c/actions.c \
        tools/re2c/scanner.c \
        tools/re2c/mbo_getopt.c \
index f164984427205d4a1b8642e78ed15440e141c9f4..cb30bddee63e9090d855623af598c61f8dfc0c9c 100644 (file)
@@ -188,7 +188,7 @@ RE2C_SRCS= \
        tools/re2c/main.c \
        tools/re2c/code.c \
        tools/re2c/dfa.c \
-       re2c-parser.c \
+       tools/re2c/parser.c \
        tools/re2c/actions.c \
        tools/re2c/scanner.c \
        tools/re2c/mbo_getopt.c \
index e52d1ee98118330e5f8107b5616c656f9d2f0ee6..13579b6e3cdf9991e94f4fdee6d4241846e16a78 100644 (file)
                                RelativePath="..\..\..\tools\re2c\mbo_getopt.c">\r
                        </File>\r
                        <File\r
-                               RelativePath="..\..\..\re2c-parser.c">\r
-                       </File>\r
-                       <File\r
-                               RelativePath="..\..\..\re2c-parser.h">\r
+                               RelativePath="..\..\..\tools\re2c\parser.c">\r
                        </File>\r
                        <File\r
                                RelativePath="..\..\..\tools\re2c\scanner.c">\r
                        <File\r
                                RelativePath="..\..\..\tools\re2c\parse.h">\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\..\..\tools\re2c\parser.h">\r
+                       </File>\r
                        <File\r
                                RelativePath="..\..\..\tools\re2c\re.h">\r
                        </File>\r
index 4e689057a0f62a60614a9b555144c0b543bafe47..daa8ff55aaf59de984491d64f695863f8be93b29 100644 (file)
                                >\r
                        </File>\r
                        <File\r
-                               RelativePath="..\..\..\re2c-parser.c"\r
+                               RelativePath="..\..\..\tools\re2c\parser.c"\r
                                >\r
                        </File>\r
-                       <File\r
-                               RelativePath="..\..\..\re2c-parser.h"\r
-                               >\r
-                       </File>\r
-                       <File\r
-                               RelativePath="..\..\..\tools\re2c\re2c-parser.y"\r
-                               >\r
-                               <FileConfiguration\r
-                                       Name="Debug|Win32"\r
-                                       ExcludedFromBuild="true"\r
-                                       >\r
-                                       <Tool\r
-                                               Name="Bison"\r
-                                               OutputStem="../../../re2c-parser"\r
-                                       />\r
-                               </FileConfiguration>\r
-                               <FileConfiguration\r
-                                       Name="Release|Win32"\r
-                                       ExcludedFromBuild="true"\r
-                                       >\r
-                                       <Tool\r
-                                               Name="Bison"\r
-                                               OutputStem="../../../re2c-parser"\r
-                                       />\r
-                               </FileConfiguration>\r
-                       </File>\r
                        <File\r
                                RelativePath="..\..\..\tools\re2c\scanner.c"\r
                                >\r
                                RelativePath="..\..\..\tools\re2c\parse.h"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\..\..\tools\re2c\parser.h"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath="..\..\..\tools\re2c\re.h"\r
                                >\r
index 06f471341865586e73e12cf29eb4436e8323ff14..abd2db8743dc6fcd22e83d20b1a481fc18153f39 100644 (file)
@@ -16,7 +16,8 @@ EXTRA_DIST += tools/re2c/code.c
 EXTRA_DIST += tools/re2c/dfa.h
 EXTRA_DIST += tools/re2c/dfa.c
 EXTRA_DIST += tools/re2c/parse.h
-EXTRA_DIST += tools/re2c/re2c-parser.y
+EXTRA_DIST += tools/re2c/parser.h
+EXTRA_DIST += tools/re2c/parser.c
 EXTRA_DIST += tools/re2c/actions.c
 EXTRA_DIST += tools/re2c/scanner.h
 EXTRA_DIST += tools/re2c/scanner.c
@@ -45,8 +46,8 @@ re2c-code.$(OBJEXT): tools/re2c/code.c
 re2c-dfa.$(OBJEXT): tools/re2c/dfa.c
        $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c
 
-re2c-parser.$(OBJEXT): re2c-parser.c
-       $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f re2c-parser.c || echo '$(srcdir)/'`re2c-parser.c
+re2c-parser.$(OBJEXT): tools/re2c/parser.c
+       $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c
 
 re2c-actions.$(OBJEXT): tools/re2c/actions.c
        $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c
@@ -63,41 +64,6 @@ re2c-substr.$(OBJEXT): tools/re2c/substr.c
 re2c-translate.$(OBJEXT): tools/re2c/translate.c
        $(CC_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c
 
-re2c-parser.c: tools/re2c/re2c-parser.y
-       $(YACC) $(YFLAGS) $(AM_YFLAGS) `test -f 'tools/re2c/re2c-parser.y' || echo '$(srcdir)/'`tools/re2c/re2c-parser.y
-       if test -f y.tab.h; then \
-         to=`echo "re2c-parser_H" | sed \
-                -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-                -e 's/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g'`; \
-         sed "/^#/ s/Y_TAB_H/$$to/g" y.tab.h >re2c-parser.ht; \
-         rm -f y.tab.h; \
-         if cmp -s re2c-parser.ht re2c-parser.h; then \
-           rm -f re2c-parser.ht ;\
-         else \
-           mv re2c-parser.ht re2c-parser.h; \
-         fi; \
-       fi
-       if test -f y.output; then \
-         mv y.output re2c-parser.output; \
-       fi
-       sed '/^#/ s|y\.tab\.c|re2c-parser.c|' y.tab.c >re2c-parser.ct && mv re2c-parser.ct re2c-parser.c
-       rm -f y.tab.c
-
-re2c-parser.h: re2c-parser.c
-       @if test ! -f $@; then \
-         rm -f re2c-parser.c; \
-         $(MAKE) re2c-parser.c; \
-       else :; fi
-
-BUILT_SOURCES += re2c-parser.c
-BUILT_SOURCES += re2c-parser.h
-
-CLEANFILES += re2c-parser.c
-CLEANFILES += re2c-parser.h
-
-EXTRA_DIST += re2c-parser.c
-EXTRA_DIST += re2c-parser.h
-
 EXTRA_DIST += tools/re2c/CHANGELOG
 EXTRA_DIST += tools/re2c/NO_WARRANTY
 EXTRA_DIST += tools/re2c/README
diff --git a/tools/re2c/parser.c b/tools/re2c/parser.c
new file mode 100644 (file)
index 0000000..02d5c66
--- /dev/null
@@ -0,0 +1,249 @@
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/parser.h"
+
+int yylex(void);
+static RegExp *parse_expr(void);
+static RegExp *parse_diff(void);
+static RegExp *parse_term(void);
+static RegExp *parse_factor(void);
+static RegExp *parse_primary(void);
+
+static unsigned int accept;
+static RegExp *spec;
+static Scanner *in;
+
+static int curtok, peektok;
+yystype yylval;
+static yystype peekval;
+
+#define get_next_token()    (curtok = yylex())
+
+static void
+get_peek_token(void)
+{
+    yystype temp = yylval; /* structure copy */
+    if (peektok != NONE)
+       Scanner_fatal(in, "more than one token of lookahead?");
+    peektok = yylex();
+    peekval = yylval; /* structure copy */
+    yylval = temp;
+}
+
+static void
+yyparse(void)
+{
+    RegExp *re, *look;
+
+    accept = 0;
+    spec = NULL;
+    get_next_token();
+    while (curtok != 0) {
+       switch (curtok) {
+           case ID:
+               get_peek_token();
+               if (peektok == '=') {
+                   /* ID = expr; */
+                   Symbol *sym = yylval.symbol;
+                   get_next_token(); /* id */
+                   get_next_token(); /* = */
+                   re = parse_expr();
+                   if (curtok != ';')
+                       Scanner_fatal(in, "missing `;' after regexp");
+                   get_next_token(); /* ; */
+                   if (sym->re)
+                       Scanner_fatal(in, "sym already defined");
+                   sym->re = re;
+                   break;
+               }
+               /*@fallthrough@*/
+           default:
+               /* rule: expr [/ expr] CODE */
+               re = parse_expr();
+               if (!re)
+                   Scanner_fatal(in, "expression syntax error");
+
+               if (curtok == '/') {
+                   get_next_token(); /* / */
+                   look = parse_expr();
+               } else
+                   look = RegExp_new_NullOp();
+
+               if (curtok != CODE)
+                   Scanner_fatal(in, "missing code after regexp");
+               re = RegExp_new_RuleOp(re, look, yylval.token, accept++);
+               get_next_token(); /* CODE */
+               spec = spec ? mkAlt(spec, re) : re;
+       }
+    }
+}
+
+static RegExp *
+parse_expr(void)
+{
+    RegExp *e, *f;
+    e = parse_diff();
+    while (curtok == '|') {
+       get_next_token(); /* | */
+       f = parse_diff();
+       e = mkAlt(e, f);
+    }
+    return e;
+}
+
+static RegExp *
+parse_diff(void)
+{
+    RegExp *e, *f;
+    e = parse_term();
+    while (curtok == '\\') {
+       get_next_token(); /* \ */
+       f = parse_term();
+       e = mkDiff(e, f);
+       if(!e)
+           Scanner_fatal(in, "can only difference char sets");
+    }
+    return e;
+}
+
+static RegExp *
+parse_term(void)
+{
+    RegExp *e, *f;
+    e = parse_factor();
+    while ((f = parse_factor())) {
+       e = RegExp_new_CatOp(e, f);
+    }
+    return e;
+}
+
+static RegExp *
+parse_factor(void)
+{
+    RegExp *e;
+    char ch;
+    e = parse_primary();
+    while (curtok == CLOSE || curtok == CLOSESIZE) {
+       switch (curtok) {
+           case CLOSE:
+               ch = yylval.op;
+               while (get_next_token() == CLOSE) {
+                   if (ch != yylval.op)
+                       ch = '*';
+               }
+               switch (ch) {
+                   case '*':
+                       e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp());
+                       break;
+                   case '+':
+                       e = RegExp_new_CloseOp(e);
+                       break;
+                   case '?':
+                       e = mkAlt(e, RegExp_new_NullOp());
+                       break;
+               }
+               break;
+           case CLOSESIZE:
+               e = RegExp_new_CloseVOp(e, yylval.extop.minsize,
+                                       yylval.extop.maxsize);
+               get_next_token();       /* CLOSESIZE */
+               break;
+           default:
+               Scanner_fatal(in, "parse error");
+               break;
+       }
+    }
+    return e;
+}
+
+static RegExp *
+parse_primary(void)
+{
+    RegExp *e;
+    switch (curtok) {
+       case ID:
+           if (!yylval.symbol->re)
+               Scanner_fatal(in, "can't find symbol");
+           e = yylval.symbol->re;
+           get_next_token();
+           break;
+       case RANGE:
+       case STRING:
+           e = yylval.regexp;
+           get_next_token();
+           break;
+       case '(':
+           get_next_token();
+           e = parse_expr();
+           if (curtok != ')')
+               Scanner_fatal(in, "missing closing parenthesis");
+           get_next_token();
+           break;
+       default:
+           return NULL;
+    }
+    return e;
+}
+
+int
+yylex(void)
+{
+    if (peektok != NONE) {
+       int tok = peektok;
+       yylval = peekval;
+       peektok = NONE;
+       return tok;
+    }
+    return Scanner_scan(in);
+}
+
+void line_source(FILE *o, unsigned int line)
+{
+    char *     fnamebuf;
+    char *     token;
+
+    if (iFlag)
+       return;
+    fprintf(o, "#line %u \"", line);
+    if( fileName != NULL ) {
+       fnamebuf = mystrdup( fileName );
+    } else {
+       fnamebuf = mystrdup( "<stdin>" );
+    }
+    token = strtok( fnamebuf, "\\" );
+    for(;;) {
+       fprintf(o, "%s", token);
+       token = strtok( NULL, "\\" );
+       if( token == NULL ) break;
+       fputs("\\\\", o);
+    }
+    fputs("\"\n", o); oline++;
+    free( fnamebuf );
+}
+
+void parse(FILE *i, FILE *o){
+    time_t now;
+
+    time(&now);
+
+    peektok = NONE;
+
+    fputs("/* Generated by re2c 0.9.1-C on ", o);
+    fprintf(o, "%-24s", ctime(&now));
+    fputs(" */\n", o); oline+=2;
+
+    in = Scanner_new(i);
+
+    line_source(o, Scanner_line(in));
+
+    while(Scanner_echo(in, o)){
+       yyparse();
+       if(spec)
+           genCode(o, spec);
+       line_source(o, Scanner_line(in));
+    }
+}
diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h
new file mode 100644 (file)
index 0000000..c433a99
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef RE2C_PARSER_H
+#define RE2C_PARSER_H
+
+/* Tokens */
+enum yytokentype {
+    CLOSESIZE = 258,
+    CLOSE = 259,
+    ID = 260,
+    CODE = 261,
+    RANGE = 262,
+    STRING = 263,
+    NONE = 264
+};
+
+#define CLOSESIZE 258
+#define CLOSE 259
+#define ID 260
+#define CODE 261
+#define RANGE 262
+#define STRING 263
+#define NONE 264
+
+typedef union {
+    Symbol     *symbol;
+    RegExp     *regexp;
+    Token      *token;
+    char       op;
+    ExtOp      extop;
+} yystype;
+
+extern yystype yylval;
+
+#endif
diff --git a/tools/re2c/re2c-parser.y b/tools/re2c/re2c-parser.y
deleted file mode 100644 (file)
index 998cdb0..0000000
+++ /dev/null
@@ -1,181 +0,0 @@
-%{
-#include <time.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include "tools/re2c/globals.h"
-#include "tools/re2c/parse.h"
-
-/* Work around bug in Bison 2.1 */
-#define YYPARSE_PARAM  unused
-
-int yylex(void);
-void yyerror(const char*);
-
-static unsigned int accept;
-static RegExp *spec;
-static Scanner *in;
-
-%}
-
-%start spec
-
-%union {
-    Symbol     *symbol;
-    RegExp     *regexp;
-    Token      *token;
-    char       op;
-    ExtOp      extop;
-}
-
-%token         CLOSESIZE   CLOSE       ID      CODE    RANGE   STRING
-
-%type  <op>            CLOSE
-%type  <op>            close
-%type  <extop>         CLOSESIZE
-%type  <symbol>        ID
-%type  <token>         CODE
-%type  <regexp>        RANGE   STRING
-%type  <regexp>        rule    look    expr    diff    term    factor  primary
-
-%%
-
-spec   :
-               { accept = 0;
-                 spec = NULL; }
-       |       spec rule
-               { spec = spec? mkAlt(spec, $2) : $2; }
-       |       spec decl
-       ;
-
-decl   :       ID '=' expr ';'
-               { if($1->re)
-                     Scanner_fatal(in, "sym already defined");
-                 $1->re = $3; }
-       ;
-
-rule   :       expr look CODE
-               { $$ = RegExp_new_RuleOp($1, $2, $3, accept++); }
-       ;
-
-look   :
-               { $$ = RegExp_new_NullOp(); }
-       |       '/' expr
-               { $$ = $2; }
-       ;
-
-expr   :       diff
-               { $$ = $1; }
-       |       expr '|' diff
-               { $$ =  mkAlt($1, $3); }
-       ;
-
-diff   :       term
-               { $$ = $1; }
-       |       diff '\\' term
-               { $$ =  mkDiff($1, $3);
-                 if(!$$)
-                      Scanner_fatal(in, "can only difference char sets");
-               }
-       ;
-
-term   :       factor
-               { $$ = $1; }
-       |       term factor
-               { $$ = RegExp_new_CatOp($1, $2); }
-       ;
-
-factor :       primary
-               { $$ = $1; }
-       |       primary close
-               {
-                   switch($2){
-                   case '*':
-                       $$ = mkAlt(RegExp_new_CloseOp($1), RegExp_new_NullOp());
-                       break;
-                   case '+':
-                       $$ = RegExp_new_CloseOp($1);
-                       break;
-                   case '?':
-                       $$ = mkAlt($1, RegExp_new_NullOp());
-                       break;
-                   }
-               }
-       |       primary CLOSESIZE
-               {
-                       $$ = RegExp_new_CloseVOp($1, $2.minsize, $2.maxsize);
-               }
-       ;
-
-close  :       CLOSE
-               { $$ = $1; }
-       |       close CLOSE
-               { $$ = ($1 == $2) ? $1 : '*'; }
-       ;
-
-primary        :       ID
-               { if(!$1->re)
-                     Scanner_fatal(in, "can't find symbol");
-                 $$ = $1->re; }
-       |       RANGE
-               { $$ = $1; }
-       |       STRING
-               { $$ = $1; }
-       |       '(' expr ')'
-               { $$ = $2; }
-       ;
-
-%%
-
-void yyerror(const char* s){
-    Scanner_fatal(in, s);
-}
-
-int yylex(){
-    return Scanner_scan(in);
-}
-
-void line_source(FILE *o, unsigned int line)
-{
-    char *     fnamebuf;
-    char *     token;
-
-    if (iFlag)
-       return;
-    fprintf(o, "#line %u \"", line);
-    if( fileName != NULL ) {
-       fnamebuf = mystrdup( fileName );
-    } else {
-       fnamebuf = mystrdup( "<stdin>" );
-    }
-    token = strtok( fnamebuf, "\\" );
-    for(;;) {
-       fprintf(o, "%s", token);
-       token = strtok( NULL, "\\" );
-       if( token == NULL ) break;
-       fputs("\\\\", o);
-    }
-    fputs("\"\n", o); oline++;
-    free( fnamebuf );
-}
-
-void parse(FILE *i, FILE *o){
-    time_t now;
-
-    time(&now);
-
-    fputs("/* Generated by re2c 0.9.1-C on ", o);
-    fprintf(o, "%-24s", ctime(&now));
-    fputs(" */\n", o); oline+=2;
-
-    in = Scanner_new(i);
-
-    line_source(o, Scanner_line(in));
-
-    while(Scanner_echo(in, o)){
-       yyparse(NULL);
-       if(spec)
-           genCode(o, spec);
-       line_source(o, Scanner_line(in));
-    }
-}
index fd0ca93ee0576190374a166461f9f6c57b32a0b6..4640ee4ad02e72e4a12ec4debec6c551f5966223 100644 (file)
@@ -6,7 +6,7 @@
 #include "tools/re2c/scanner.h"
 #include "tools/re2c/parse.h"
 #include "tools/re2c/globals.h"
-#include "re2c-parser.h"
+#include "tools/re2c/parser.h"
 
 #ifndef MAX
 #define MAX(a,b) (((a)>(b))?(a):(b))
index 81dcf48b2bfdf41eec885f84e1536fd22d57e9f5..10af0883ed288f0e00f75531dffc61afcce07328 100644 (file)
@@ -3,7 +3,7 @@
 #include "tools/re2c/scanner.h"
 #include "tools/re2c/parse.h"
 #include "tools/re2c/globals.h"
-#include "re2c-parser.h"
+#include "tools/re2c/parser.h"
 
 #ifndef MAX
 #define MAX(a,b) (((a)>(b))?(a):(b))