]> granicus.if.org Git - re2c/commitdiff
- Added tutorial like lessons to re2c.
authorhelly <helly@642ea486-5414-0410-9d7f-a0204ed87703>
Sat, 15 Apr 2006 16:03:36 +0000 (16:03 +0000)
committerhelly <helly@642ea486-5414-0410-9d7f-a0204ed87703>
Sat, 15 Apr 2006 16:03:36 +0000 (16:03 +0000)
18 files changed:
CHANGELOG
README.in
htdocs/index.html
htdocs/manual.html
lessons/.cvsignore [new file with mode: 0755]
lessons/001_upn_calculator/calc_001.c [new file with mode: 0755]
lessons/001_upn_calculator/calc_001.re [new file with mode: 0755]
lessons/001_upn_calculator/calc_002.c [new file with mode: 0755]
lessons/001_upn_calculator/calc_002.re [new file with mode: 0755]
lessons/001_upn_calculator/calc_003.c [new file with mode: 0755]
lessons/001_upn_calculator/calc_003.re [new file with mode: 0755]
lessons/001_upn_calculator/calc_004.c [new file with mode: 0755]
lessons/001_upn_calculator/calc_004.re [new file with mode: 0755]
lessons/001_upn_calculator/calc_005.c [new file with mode: 0755]
lessons/001_upn_calculator/calc_005.re [new file with mode: 0755]
lessons/001_upn_calculator/readme.txt [new file with mode: 0755]
re2c.1.in
run_tests.sh.in

index a004b0dcf5c3e84910225ce41705bf8ad7805757..a01a7355e54e590f9f3ec4cbfae3968e97943fc0 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,6 @@
 Version 0.10.2 (2006-04-13)
 ---------------------------
+- Added tutorial like lessons to re2c.
 - Added /*!ignore!re2c */ to support documenting of re2c source.
 - Fixed issue with multiline re2c comments (/*!max:re2c ... */ and alike).
 - Fixed generation of YYDEBUG() when using -d switch.
index 05ae0a5fa1edc79355d2eae4cc154ab389fbe2af..84867278451920255a24fd5ba0069e0dca26329b 100644 (file)
--- a/README.in
+++ b/README.in
@@ -63,6 +63,8 @@ served many people well for many years. re2c is on the order of 2-3
 times faster than a flex based scanner, and its input model is much 
 more flexible.
 
+For an introduction to re2c refer to the lessons sub directory.
+
 Peter's original version 0.5 ANNOUNCE and README follows.
 
 --
index b8c1daf30fc9cbae3dce971d7a8204a189c1f801..cb6388974f7287c73510b96148deb092418831da 100755 (executable)
@@ -78,6 +78,7 @@ provide re2c packages.</li>
 <h1>Changelog</h1>
 <h2>2006-04-13: 0.10.2</h2>
 <ul>
+<li>Added tutorial like lessons to re2c.</li>
 <li>Added /*!ignore!re2c */ to support documenting of re2c source.</li>
 <li>Fixed issue with multiline re2c comments (/*!max:re2c ... */ and alike).</li>
 <li>Fixed generation of YYDEBUG() when using -d switch.</li>
index 4936018497e1acf92356575414e68ab8c3d7e4e5..10af793237a54085947cbb8dceb6d2c89ee4d2c6 100755 (executable)
@@ -363,286 +363,10 @@ placing a "/*!getstate:re2c */" comment.
 </dd>
 </dl>
 <a name="lbAK" id="lbAK"> </a>
-<h2>A LARGER EXAMPLE</h2>
-<pre>
-#include &lt;stdlib.h&gt;
-#include &lt;stdio.h&gt;
-#include &lt;fcntl.h&gt;
-#include &lt;string.h&gt;
-
-#define ADDEQ   257
-#define ANDAND  258
-#define ANDEQ   259
-#define ARRAY   260
-#define ASM     261
-#define AUTO    262
-#define BREAK   263
-#define CASE    264
-#define CHAR    265
-#define CONST   266
-#define CONTINUE        267
-#define DECR    268
-#define DEFAULT 269
-#define DEREF   270
-#define DIVEQ   271
-#define DO      272
-#define DOUBLE  273
-#define ELLIPSIS        274
-#define ELSE    275
-#define ENUM    276
-#define EQL     277
-#define EXTERN  278
-#define FCON    279
-#define FLOAT   280
-#define FOR     281
-#define FUNCTION        282
-#define GEQ     283
-#define GOTO    284
-#define ICON    285
-#define ID      286
-#define IF      287
-#define INCR    288
-#define INT     289
-#define LEQ     290
-#define LONG    291
-#define LSHIFT  292
-#define LSHIFTEQ        293
-#define MODEQ   294
-#define MULEQ   295
-#define NEQ     296
-#define OREQ    297
-#define OROR    298
-#define POINTER 299
-#define REGISTER        300
-#define RETURN  301
-#define RSHIFT  302
-#define RSHIFTEQ        303
-#define SCON    304
-#define SHORT   305
-#define SIGNED  306
-#define SIZEOF  307
-#define STATIC  308
-#define STRUCT  309
-#define SUBEQ   310
-#define SWITCH  311
-#define TYPEDEF 312
-#define UNION   313
-#define UNSIGNED        314
-#define VOID    315
-#define VOLATILE        316
-#define WHILE   317
-#define XOREQ   318
-#define EOI     319
-
-typedef unsigned int uint;
-typedef unsigned char uchar;
-
-#define BSIZE   8192
-
-#define YYCTYPE         uchar
-#define YYCURSOR        cursor
-#define YYLIMIT         s-&gt;lim
-#define YYMARKER        s-&gt;ptr
-#define YYFILL(n)       {cursor = fill(s, cursor);}
-
-#define RET(i)  {s-&gt;cur = cursor; return i;}
-
-typedef struct Scanner {
-    int                 fd;
-    uchar               *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
-    uint                line;
-} Scanner;
-
-uchar *fill(Scanner *s, uchar *cursor){
-    if(!s-&gt;eof){
-        uint cnt = s-&gt;tok - s-&gt;bot;
-        if(cnt){
-            memcpy(s-&gt;bot, s-&gt;tok, s-&gt;lim - s-&gt;tok);
-            s-&gt;tok = s-&gt;bot;
-            s-&gt;ptr -= cnt;
-            cursor -= cnt;
-            s-&gt;pos -= cnt;
-            s-&gt;lim -= cnt;
-        }
-        if((s-&gt;top - s-&gt;lim) &lt; BSIZE){
-            uchar *buf = (uchar*)
-                malloc(((s-&gt;lim - s-&gt;bot) + BSIZE)*sizeof(uchar));
-            memcpy(buf, s-&gt;tok, s-&gt;lim - s-&gt;tok);
-            s-&gt;tok = buf;
-            s-&gt;ptr = &amp;buf[s-&gt;ptr - s-&gt;bot];
-            cursor = &amp;buf[cursor - s-&gt;bot];
-            s-&gt;pos = &amp;buf[s-&gt;pos - s-&gt;bot];
-            s-&gt;lim = &amp;buf[s-&gt;lim - s-&gt;bot];
-            s-&gt;top = &amp;s-&gt;lim[BSIZE];
-            free(s-&gt;bot);
-            s-&gt;bot = buf;
-        }
-        if((cnt = read(s-&gt;fd, (char*) s-&gt;lim, BSIZE)) != BSIZE){
-            s-&gt;eof = &amp;s-&gt;lim[cnt]; *(s-&gt;eof)++ = '\n';
-        }
-        s-&gt;lim += cnt;
-    }
-    s-&gt;cur = cursor;
-    return cursor;
-}
-
-int scan(Scanner *s){
-        uchar *cursor = s-&gt;cur;
-std:
-        s-&gt;tok = cursor;
-/*!re2c
-any     = [\000-\377];
-O       = [0-7];
-D       = [0-9];
-L       = [a-zA-Z_];
-H       = [a-fA-F0-9];
-E       = [Ee] [+-]? D+;
-FS      = [fFlL];
-IS      = [uUlL]*;
-ESC     = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
-*/
-
-/*!re2c
-        "/*"                    { goto comment; }
-        
-        "auto"                  { RET(AUTO); }
-        "break"                 { RET(BREAK); }
-        "case"                  { RET(CASE); }
-        "char"                  { RET(CHAR); }
-        "const"                 { RET(CONST); }
-        "continue"              { RET(CONTINUE); }
-        "default"               { RET(DEFAULT); }
-        "do"                    { RET(DO); }
-        "double"                { RET(DOUBLE); }
-        "else"                  { RET(ELSE); }
-        "enum"                  { RET(ENUM); }
-        "extern"                { RET(EXTERN); }
-        "float"                 { RET(FLOAT); }
-        "for"                   { RET(FOR); }
-        "goto"                  { RET(GOTO); }
-        "if"                    { RET(IF); }
-        "int"                   { RET(INT); }
-        "long"                  { RET(LONG); }
-        "register"              { RET(REGISTER); }
-        "return"                { RET(RETURN); }
-        "short"                 { RET(SHORT); }
-        "signed"                { RET(SIGNED); }
-        "sizeof"                { RET(SIZEOF); }
-        "static"                { RET(STATIC); }
-        "struct"                { RET(STRUCT); }
-        "switch"                { RET(SWITCH); }
-        "typedef"               { RET(TYPEDEF); }
-        "union"                 { RET(UNION); }
-        "unsigned"              { RET(UNSIGNED); }
-        "void"                  { RET(VOID); }
-        "volatile"              { RET(VOLATILE); }
-        "while"                 { RET(WHILE); }
-        
-        L (L|D)*                { RET(ID); }
-        
-        ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
-        (['] (ESC|any\[\n\\'])* ['])
-                                { RET(ICON); }
-        
-        (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
-                                { RET(FCON); }
-        
-        (["] (ESC|any\[\n\\"])* ["])
-                                { RET(SCON); }
-        
-        "..."                   { RET(ELLIPSIS); }
-        "&gt;&gt;="                   { RET(RSHIFTEQ); }
-        "&lt;&lt;="                   { RET(LSHIFTEQ); }
-        "+="                    { RET(ADDEQ); }
-        "-="                    { RET(SUBEQ); }
-        "*="                    { RET(MULEQ); }
-        "/="                    { RET(DIVEQ); }
-        "%="                    { RET(MODEQ); }
-        "&amp;="                    { RET(ANDEQ); }
-        "^="                    { RET(XOREQ); }
-        "|="                    { RET(OREQ); }
-        "&gt;&gt;"                    { RET(RSHIFT); }
-        "&lt;&lt;"                    { RET(LSHIFT); }
-        "++"                    { RET(INCR); }
-        "--"                    { RET(DECR); }
-        "-&gt;"                    { RET(DEREF); }
-        "&amp;&amp;"                    { RET(ANDAND); }
-        "||"                    { RET(OROR); }
-        "&lt;="                    { RET(LEQ); }
-        "&gt;="                    { RET(GEQ); }
-        "=="                    { RET(EQL); }
-        "!="                    { RET(NEQ); }
-        ";"                     { RET(';'); }
-        "{"                     { RET('{'); }
-        "}"                     { RET('}'); }
-        ","                     { RET(','); }
-        ":"                     { RET(':'); }
-        "="                     { RET('='); }
-        "("                     { RET('('); }
-        ")"                     { RET(')'); }
-        "["                     { RET('['); }
-        "]"                     { RET(']'); }
-        "."                     { RET('.'); }
-        "&amp;"                     { RET('&amp;'); }
-        "!"                     { RET('!'); }
-        "~"                     { RET('~'); }
-        "-"                     { RET('-'); }
-        "+"                     { RET('+'); }
-        "*"                     { RET('*'); }
-        "/"                     { RET('/'); }
-        "%"                     { RET('%'); }
-        "&lt;"                     { RET('&lt;'); }
-        "&gt;"                     { RET('&gt;'); }
-        "^"                     { RET('^'); }
-        "|"                     { RET('|'); }
-        "?"                     { RET('?'); }
-
-
-        [ \t\v\f]+           { goto std; }
-
-        "\n"
-            {
-                if(cursor == s-&gt;eof) RET(EOI);
-                s-&gt;pos = cursor; s-&gt;line++;
-                goto std;
-            }
-
-        any
-            {
-                printf("unexpected character: %c\n", *s-&gt;tok);
-                goto std;
-            }
-*/
-
-comment:
-/*!re2c
-        "*/"                    { goto std; }
-        "\n"
-            {
-                if(cursor == s-&gt;eof) RET(EOI);
-                s-&gt;tok = s-&gt;pos = cursor; s-&gt;line++;
-                goto comment;
-            }
-        any                     { goto comment; }
-*/
-}
-
-main(){
-    Scanner in;
-    int t;
-    memset((char*) &amp;in, 0, sizeof(in));
-    in.fd = 0;
-    while((t = scan(&amp;in)) != EOI){
-/*
-        printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
-        printf("%d\n", t);
-*/
-    }
-    close(in.fd);
-}
-</pre>
-<br />
-<br />
+<h2>UNDERSTANDING RE2C</h2>
+<p>The subdirectory lessons of the re2c distribution contains a few step by step
+lessons to get you started with re2c. All examples in the lessons subdirectory
+can be compiled and actually work.</p>
 <a name="lbAK" id="lbAK"> </a>
 <h2>FEATURES</h2>
 <p><b>re2c</b> does not provide a default action: the generated code assumes
diff --git a/lessons/.cvsignore b/lessons/.cvsignore
new file mode 100755 (executable)
index 0000000..1938665
--- /dev/null
@@ -0,0 +1,4 @@
+a.out
+*.temp
+*.diff
+*.o
diff --git a/lessons/001_upn_calculator/calc_001.c b/lessons/001_upn_calculator/calc_001.c
new file mode 100755 (executable)
index 0000000..b758124
--- /dev/null
@@ -0,0 +1,147 @@
+/* Generated by re2c */
+#line 1 "calc_001.re"
+/* re2c lesson_001, calc_001, (c) M. Boerger 2006 */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l)
+       #define YYMARKER        q
+       #define YYFILL(n)
+       
+       for(;;)
+       {
+
+#line 23 "<stdout>"
+               {
+                       YYCTYPE yych;
+
+                       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case 0x00:      goto yy10;
+                       case '+':       goto yy6;
+                       case '-':       goto yy8;
+                       case '0':       goto yy2;
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy4;
+                       default:        goto yy12;
+                       }
+yy2:
+                       ++YYCURSOR;
+                       switch((yych = *YYCURSOR)) {
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy3;
+                       }
+yy3:
+#line 64 "calc_001.re"
+                       { printf("Num\n");      continue; }
+#line 63 "<stdout>"
+yy4:
+                       ++YYCURSOR;
+                       yych = *YYCURSOR;
+                       goto yy15;
+yy5:
+#line 63 "calc_001.re"
+                       { printf("Num\n");      continue; }
+#line 71 "<stdout>"
+yy6:
+                       ++YYCURSOR;
+#line 65 "calc_001.re"
+                       { printf("+\n");        continue; }
+#line 76 "<stdout>"
+yy8:
+                       ++YYCURSOR;
+#line 66 "calc_001.re"
+                       { printf("-\n");        continue; }
+#line 81 "<stdout>"
+yy10:
+                       ++YYCURSOR;
+#line 67 "calc_001.re"
+                       { printf("EOF\n");      return 0; }
+#line 86 "<stdout>"
+yy12:
+                       ++YYCURSOR;
+#line 68 "calc_001.re"
+                       { printf("ERR\n");      return 1; }
+#line 91 "<stdout>"
+yy14:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy15:
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy14;
+                       default:        goto yy5;
+                       }
+yy16:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy18;
+                       }
+yy18:
+#line 62 "calc_001.re"
+                       { printf("Oct\n");      continue; }
+#line 130 "<stdout>"
+               }
+#line 69 "calc_001.re"
+
+       }
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 1;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_001.re b/lessons/001_upn_calculator/calc_001.re
new file mode 100755 (executable)
index 0000000..b603564
--- /dev/null
@@ -0,0 +1,84 @@
+/* re2c lesson_001, calc_001, (c) M. Boerger 2006 */
+/*!ignore:re2c
+
+- basic interface for string reading
+
+  . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
+  . YYCTYPE is the type re2c operates on or in other words the type that 
+    it generates code for. While it is not a big difference when we were
+    using 'unsigned char' here we would need to run re2c with option -w
+    to fully support types with sieof() > 1.
+  . YYCURSOR is used internally and holds the current scanner position. In
+    expression handlers, the code blocks after re2c expressions, this can be 
+    used to identify the end of the token.
+  . YYMARKER is not always being used so we set an initial value to avoid
+    a compiler warning.
+  . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() 
+    in this lesson. In the next example we see one way to get rid of it.
+  . We use a 'for(;;)'-loop around the scanner block. We could have used a
+    'while(1)'-loop instead but some compilers generate a warning for it.
+  . To make the output more readable we use 're2c:indent:top' scanenr 
+    configuration that configures re2c to prepend a single tab (the default)
+    to the beginning of each output line.
+  . The following lines are expressions and for each expression we output the 
+    token name and continue the scanner loop.
+  . The second last token detects the end of our input, the terminating zero in
+    out input string. In other scanners detecting the end of input may vary.
+    For example binary code may contain \0 as valid input.
+  . The last expression accepts any input character. It tells re2c to accept 
+    the opposit of the empty range. This includes numbers and our tokens but
+    as re2c goes from top to botton when evaluating the expressions this is no 
+    problem.
+  . The first three rules show that re2c actually prioritizes the expressions 
+    from top to bottom. Octal number require a starting "0" and the actual 
+    number. Normal numbers start with a digit greater 0. And zero is finally a
+    special case. A single "0" is detected by the last rule of this set. And
+    valid ocal number is already being detected by the first rule. This even
+    includes multi "0" sequences that in octal notation also means zero.
+    Another way would be to only use two rules:
+    "0" [0-9]+
+    "0" | ( [1-9] [0-9]* )
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l)
+       #define YYMARKER        q
+       #define YYFILL(n)
+       
+       for(;;)
+       {
+/*!re2c
+       re2c:indent:top = 2;
+       "0"[0-9]+       { printf("Oct\n");      continue; }
+       [1-9][0-9]*     { printf("Num\n");      continue; }
+       "0"                     { printf("Num\n");      continue; }
+       "+"                     { printf("+\n");        continue; }
+       "-"                     { printf("-\n");        continue; }
+       "\000"          { printf("EOF\n");      return 0; }
+       [^]                     { printf("ERR\n");      return 1; }
+*/
+       }
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 1;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_002.c b/lessons/001_upn_calculator/calc_002.c
new file mode 100755 (executable)
index 0000000..09f2eb0
--- /dev/null
@@ -0,0 +1,156 @@
+/* Generated by re2c */
+#line 1 "calc_002.re"
+/* re2c lesson_001, calc_002, (c) M. Boerger 2006 */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int fill(char *p, int n, char **l)
+{
+       while (*++p && n--) ;
+       * l = p;
+       return n <= 0;
+}
+
+int scan(char *s)
+{
+       char *p = s;
+       char *l = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         l
+       #define YYMARKER        q
+       #define YYFILL(n)               { if (!fill(p, n, &l)) break; }
+       
+       for(;;)
+       {
+
+#line 31 "<stdout>"
+               {
+                       YYCTYPE yych;
+
+                       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case 0x00:      goto yy10;
+                       case '+':       goto yy6;
+                       case '-':       goto yy8;
+                       case '0':       goto yy2;
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy4;
+                       default:        goto yy12;
+                       }
+yy2:
+                       ++YYCURSOR;
+                       switch((yych = *YYCURSOR)) {
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy3;
+                       }
+yy3:
+#line 48 "calc_002.re"
+                       { printf("Num\n");      continue; }
+#line 71 "<stdout>"
+yy4:
+                       ++YYCURSOR;
+                       yych = *YYCURSOR;
+                       goto yy15;
+yy5:
+#line 47 "calc_002.re"
+                       { printf("Num\n");      continue; }
+#line 79 "<stdout>"
+yy6:
+                       ++YYCURSOR;
+#line 49 "calc_002.re"
+                       { printf("+\n");        continue; }
+#line 84 "<stdout>"
+yy8:
+                       ++YYCURSOR;
+#line 50 "calc_002.re"
+                       { printf("+\n");        continue; }
+#line 89 "<stdout>"
+yy10:
+                       ++YYCURSOR;
+#line 51 "calc_002.re"
+                       { printf("EOF\n");      return 0; }
+#line 94 "<stdout>"
+yy12:
+                       ++YYCURSOR;
+#line 52 "calc_002.re"
+                       { printf("ERR\n");      return 1; }
+#line 99 "<stdout>"
+yy14:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy15:
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy14;
+                       default:        goto yy5;
+                       }
+yy16:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy18;
+                       }
+yy18:
+#line 46 "calc_002.re"
+                       { printf("Oct\n");      continue; }
+#line 138 "<stdout>"
+               }
+#line 53 "calc_002.re"
+
+       }
+       printf("OOD\n"); return 2;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1]);
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_002.re b/lessons/001_upn_calculator/calc_002.re
new file mode 100755 (executable)
index 0000000..b805d84
--- /dev/null
@@ -0,0 +1,69 @@
+/* re2c lesson_001, calc_002, (c) M. Boerger 2006 */
+/*!ignore:re2c
+
+- making use of YYFILL
+
+  . Here we modified the scanner to not require strlen() on the call. Instead
+    we compute limit on the fly. That is whenever more input is needed we 
+    search for the terminating \0 in the next n chars the scanner needs.
+  . If there is not enough input we quit the scanner.
+  . Note that in lesson_001 YYLIMIT was a character pointer computed only once.
+    Here is of course also of type YYCTYPE but a variable that gets reevaluated
+    by YYFILL().
+  . To make the code smaller we take advantage of the fact that our loop has no
+    break so far. This allows us to use break here and have the code that is 
+    used for YYFILL() not contain the printf in every occurence. That way the 
+    generated code gets smaller.
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int fill(char *p, int n, char **l)
+{
+       while (*++p && n--) ;
+       * l = p;
+       return n <= 0;
+}
+
+int scan(char *s)
+{
+       char *p = s;
+       char *l = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         l
+       #define YYMARKER        q
+       #define YYFILL(n)               { if (!fill(p, n, &l)) break; }
+       
+       for(;;)
+       {
+/*!re2c
+       re2c:indent:top = 2;
+       "0"[0-9]+       { printf("Oct\n");      continue; }
+       [1-9][0-9]*     { printf("Num\n");      continue; }
+       "0"                     { printf("Num\n");      continue; }
+       "+"                     { printf("+\n");        continue; }
+       "-"                     { printf("+\n");        continue; }
+       "\000"          { printf("EOF\n");      return 0; }
+       [^]                     { printf("ERR\n");      return 1; }
+*/
+       }
+       printf("OOD\n"); return 2;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1]);
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_003.c b/lessons/001_upn_calculator/calc_003.c
new file mode 100755 (executable)
index 0000000..58f1318
--- /dev/null
@@ -0,0 +1,148 @@
+/* Generated by re2c */
+#line 1 "calc_003.re"
+/* re2c lesson_001, calc_003, (c) M. Boerger 2006 */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+2)
+       #define YYMARKER        q
+       #define YYFILL(n)               { printf("OOD\n"); return 2; }
+       
+       for(;;)
+       {
+
+#line 23 "<stdout>"
+               {
+                       YYCTYPE yych;
+
+                       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case 0x00:      goto yy10;
+                       case '+':       goto yy6;
+                       case '-':       goto yy8;
+                       case '0':       goto yy2;
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy4;
+                       default:        goto yy12;
+                       }
+yy2:
+                       ++YYCURSOR;
+                       switch((yych = *YYCURSOR)) {
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy3;
+                       }
+yy3:
+#line 40 "calc_003.re"
+                       { printf("Num\n");      continue; }
+#line 63 "<stdout>"
+yy4:
+                       ++YYCURSOR;
+                       yych = *YYCURSOR;
+                       goto yy15;
+yy5:
+#line 39 "calc_003.re"
+                       { printf("Num\n");      continue; }
+#line 71 "<stdout>"
+yy6:
+                       ++YYCURSOR;
+#line 41 "calc_003.re"
+                       { printf("+\n");        continue; }
+#line 76 "<stdout>"
+yy8:
+                       ++YYCURSOR;
+#line 42 "calc_003.re"
+                       { printf("+\n");        continue; }
+#line 81 "<stdout>"
+yy10:
+                       ++YYCURSOR;
+#line 43 "calc_003.re"
+                       { printf("EOF\n");      return 0; }
+#line 86 "<stdout>"
+yy12:
+                       ++YYCURSOR;
+#line 44 "calc_003.re"
+                       { printf("ERR\n");      return 1; }
+#line 91 "<stdout>"
+yy14:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy15:
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy14;
+                       default:        goto yy5;
+                       }
+yy16:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy16;
+                       default:        goto yy18;
+                       }
+yy18:
+#line 38 "calc_003.re"
+                       { printf("Oct\n");      continue; }
+#line 130 "<stdout>"
+               }
+#line 45 "calc_003.re"
+
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_003.re b/lessons/001_upn_calculator/calc_003.re
new file mode 100755 (executable)
index 0000000..c4cd00f
--- /dev/null
@@ -0,0 +1,61 @@
+/* re2c lesson_001, calc_003, (c) M. Boerger 2006 */
+/*!ignore:re2c
+
+- making use of YYFILL
+
+  . Again provide the length of the input to generate the limit only once. Now
+    we can use YYFILL() to detect the end and simply return since YYFILL() is 
+    only being used if the next scanner run might use more chars then YYLIMIT
+    allows.
+  . Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In 
+    the first lesson we did not quit from YYFILL() and used a special rule to
+    detect the end of input. Here we use the fact that we know the exact end
+    of input and that this length does not include the terminating zero. Since
+    YYLIMIT points to the first character behind the used buffer we use "+ 2".
+    If we would use "+1" we could drop the "\000" rule but could no longer
+    distinguish between end of input and out of data.
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+2)
+       #define YYMARKER        q
+       #define YYFILL(n)               { printf("OOD\n"); return 2; }
+       
+       for(;;)
+       {
+/*!re2c
+       re2c:indent:top = 2;
+       "0"[0-9]+       { printf("Oct\n");      continue; }
+       [1-9][0-9]*     { printf("Num\n");      continue; }
+       "0"                     { printf("Num\n");      continue; }
+       "+"                     { printf("+\n");        continue; }
+       "-"                     { printf("+\n");        continue; }
+       "\000"          { printf("EOF\n");      return 0; }
+       [^]                     { printf("ERR\n");      return 1; }
+*/
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_004.c b/lessons/001_upn_calculator/calc_004.c
new file mode 100755 (executable)
index 0000000..d0dec85
--- /dev/null
@@ -0,0 +1,155 @@
+/* Generated by re2c */
+#line 1 "calc_004.re"
+/* re2c lesson_001, calc_004, (c) M. Boerger 2006 */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+char * tokendup(const char *t, const char *l)
+{
+       size_t n = l -t + 1;
+       char *r = (char*)malloc(n);
+       
+       memmove(r, t, n-1);
+       r[n] = '\0';
+       return r;
+}
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       char *t;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+2)
+       #define YYMARKER        q
+       #define YYFILL(n)               { printf("OOD\n"); return 2; }
+       
+       for(;;)
+       {
+               t = p;
+
+#line 35 "<stdout>"
+               {
+                       YYCTYPE yych;
+
+                       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case 0x00:      goto yy9;
+                       case '+':       goto yy5;
+                       case '-':       goto yy7;
+                       case '0':       goto yy2;
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy4;
+                       default:        goto yy11;
+                       }
+yy2:
+                       ++YYCURSOR;
+                       switch((yych = *YYCURSOR)) {
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy15;
+                       default:        goto yy3;
+                       }
+yy3:
+#line 57 "calc_004.re"
+                       { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; }
+#line 75 "<stdout>"
+yy4:
+                       yych = *++YYCURSOR;
+                       goto yy14;
+yy5:
+                       ++YYCURSOR;
+#line 58 "calc_004.re"
+                       { printf("+\n");        continue; }
+#line 83 "<stdout>"
+yy7:
+                       ++YYCURSOR;
+#line 59 "calc_004.re"
+                       { printf("+\n");        continue; }
+#line 88 "<stdout>"
+yy9:
+                       ++YYCURSOR;
+#line 60 "calc_004.re"
+                       { printf("EOF\n");      return 0; }
+#line 93 "<stdout>"
+yy11:
+                       ++YYCURSOR;
+#line 61 "calc_004.re"
+                       { printf("ERR\n");      return 1; }
+#line 98 "<stdout>"
+yy13:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy14:
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy13;
+                       default:        goto yy3;
+                       }
+yy15:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy15;
+                       default:        goto yy17;
+                       }
+yy17:
+#line 56 "calc_004.re"
+                       { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; }
+#line 137 "<stdout>"
+               }
+#line 62 "calc_004.re"
+
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_004.re b/lessons/001_upn_calculator/calc_004.re
new file mode 100755 (executable)
index 0000000..fd6d88a
--- /dev/null
@@ -0,0 +1,78 @@
+/* re2c lesson_001, calc_004, (c) M. Boerger 2006 */
+/*!ignore:re2c
+
+- making use of definitions
+  . We provide complex rules as definitions. We can even have definitions made
+    up from other definitions. And we could also use definitions as part of 
+    rules and not only as full rules as shown in this lesson.
+
+- showing the tokens
+  . re2c does not store the beginning of a token on its own but we can easily 
+    do this by providing variable, in our case t, that is set to YYCURSOR on
+    every loop. If we were not using a loop here the token, we could have used
+    s instead of a new variable instead.
+  . As we use the token for an output function that requires a terminating zero
+    we copy the token. Alternatively we could store the end of the token, then
+    replace it with a zero character and replace it after the token has been 
+    used. However that approach is not always acceptable.
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+char * tokendup(const char *t, const char *l)
+{
+       size_t n = l -t + 1;
+       char *r = (char*)malloc(n);
+       
+       memmove(r, t, n-1);
+       r[n] = '\0';
+       return r;
+}
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       char *t;
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+2)
+       #define YYMARKER        q
+       #define YYFILL(n)               { printf("OOD\n"); return 2; }
+       
+       for(;;)
+       {
+               t = p;
+/*!re2c
+       re2c:indent:top = 2;
+
+       DIGIT   = [0-9] ;
+       OCT             = "0" DIGIT+ ;
+       INT             = "0" | ( [1-9] DIGIT* ) ;
+
+       OCT                     { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; }
+       INT                     { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; }
+       "+"                     { printf("+\n");        continue; }
+       "-"                     { printf("+\n");        continue; }
+       "\000"          { printf("EOF\n");      return 0; }
+       [^]                     { printf("ERR\n");      return 1; }
+*/
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               return scan(argv[1], strlen(argv[1]));
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_005.c b/lessons/001_upn_calculator/calc_005.c
new file mode 100755 (executable)
index 0000000..b24a5c9
--- /dev/null
@@ -0,0 +1,235 @@
+/* Generated by re2c */
+#line 1 "calc_005.re"
+/* re2c lesson_001, calc_004, (c) M. Boerger 2006 */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define DEBUG(stmt) stmt
+
+int  stack[4];
+int  depth = 0;
+
+int push_num(const char *t, const char *l, int radix)
+{
+       int num = 0;
+       
+       if (depth >= sizeof(stack))
+       {
+               return 3;
+       }
+
+       --t;
+       while(++t < l)
+       {
+               num = num * radix + (*t - '0');
+       }
+       DEBUG(printf("Num: %d\n", num));
+
+       stack[depth++] = num;
+       return 0;
+}
+
+int stack_add()
+{
+       if (depth < 2) return 4;
+       
+       --depth;
+       stack[depth-1] = stack[depth-1] + stack[depth];
+       return 0;
+}
+
+int stack_sub()
+{
+       if (depth < 2) return 4;
+
+       --depth;
+       stack[depth-1] = stack[depth-1] - stack[depth];
+       return 0;
+}
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       char *t;
+       int res = 0;
+
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+1)
+       #define YYMARKER        q
+       #define YYFILL(n)               { return depth == 1 ? 0 : 2; }
+       
+       while(!res)
+       {
+               t = p;
+
+#line 70 "<stdout>"
+               {
+                       YYCTYPE yych;
+
+                       if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case 0x09:
+                       case ' ':       goto yy2;
+                       case '+':       goto yy7;
+                       case '-':       goto yy9;
+                       case '0':       goto yy4;
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy6;
+                       default:        goto yy11;
+                       }
+yy2:
+                       ++YYCURSOR;
+                       yych = *YYCURSOR;
+                       goto yy19;
+yy3:
+#line 91 "calc_005.re"
+                       { continue; }
+#line 100 "<stdout>"
+yy4:
+                       ++YYCURSOR;
+                       switch((yych = *YYCURSOR)) {
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy15;
+                       default:        goto yy5;
+                       }
+yy5:
+#line 93 "calc_005.re"
+                       { res = push_num(t, p, 10); continue; }
+#line 119 "<stdout>"
+yy6:
+                       yych = *++YYCURSOR;
+                       goto yy14;
+yy7:
+                       ++YYCURSOR;
+#line 94 "calc_005.re"
+                       { res = stack_add();            continue; }
+#line 127 "<stdout>"
+yy9:
+                       ++YYCURSOR;
+#line 95 "calc_005.re"
+                       { res = stack_sub();            continue; }
+#line 132 "<stdout>"
+yy11:
+                       ++YYCURSOR;
+#line 96 "calc_005.re"
+                       { res = 1;                                      continue; }
+#line 137 "<stdout>"
+yy13:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy14:
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy13;
+                       default:        goto yy5;
+                       }
+yy15:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+                       switch(yych){
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':       goto yy15;
+                       default:        goto yy17;
+                       }
+yy17:
+#line 92 "calc_005.re"
+                       { res = push_num(t, p, 8);      continue; }
+#line 176 "<stdout>"
+yy18:
+                       ++YYCURSOR;
+                       if(YYLIMIT == YYCURSOR) YYFILL(1);
+                       yych = *YYCURSOR;
+yy19:
+                       switch(yych){
+                       case 0x09:
+                       case ' ':       goto yy18;
+                       default:        goto yy3;
+                       }
+               }
+#line 97 "calc_005.re"
+
+       }
+       return res;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               char *inp;
+               int res = 0, argp = 0, len;
+               
+               while(!res && ++argp < argc)
+               {
+                       inp = argv[argp];
+                       len = strlen(inp);
+                       if (inp[0] == '\"' && inp[len-1] == '\"')
+                       {
+                               ++inp;
+                               len -=2;
+                       }
+                       res = scan(inp, len);
+               }
+               switch(res)
+               {
+               case 0:
+                       printf("Result: %d\n", stack[0]);
+                       return 0;
+               case 1:
+                       fprintf(stderr, "Illegal character in input.\n");
+                       return 1;
+               case 2:
+                       fprintf(stderr, "Premature end of input.\n");
+                       return 2;
+               case 3:
+                       fprintf(stderr, "Stack overflow.\n");
+                       return 3;
+               case 4:
+                       fprintf(stderr, "Stack underflow.\n");
+                       return 4;
+               }
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/calc_005.re b/lessons/001_upn_calculator/calc_005.re
new file mode 100755 (executable)
index 0000000..c19d38b
--- /dev/null
@@ -0,0 +1,144 @@
+/* re2c lesson_001, calc_004, (c) M. Boerger 2006 */
+/*!ignore:re2c
+
+- turning this lesson into an easy calculator
+  . We are going to write an UPN calculator so we need an additional rule to
+    ignore white space.
+  . Then we need to store the scanned input somewhere and do our math on it.
+  . Also we need to scann all arguments since the main c code gets the input
+    split up into chunks.
+  . In contrast to what we did before we now add a variable res that holds the 
+    scanner state. We initialize that variable to 0 and quit the loop when it
+    is non zero. This will also be our return value so that we can use it in
+    function main to generate error information.
+  . To support operating systems where ' and " get passed in program arguments
+    we check for them being first and last input character. If so we correct
+    input pointer and input length. Since now our scanner might not see a 
+    terminating zero we change YYLIMIT again and drop the special zero rule.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define DEBUG(stmt) stmt
+
+int  stack[4];
+int  depth = 0;
+
+int push_num(const char *t, const char *l, int radix)
+{
+       int num = 0;
+       
+       if (depth >= sizeof(stack))
+       {
+               return 3;
+       }
+
+       --t;
+       while(++t < l)
+       {
+               num = num * radix + (*t - '0');
+       }
+       DEBUG(printf("Num: %d\n", num));
+
+       stack[depth++] = num;
+       return 0;
+}
+
+int stack_add()
+{
+       if (depth < 2) return 4;
+       
+       --depth;
+       stack[depth-1] = stack[depth-1] + stack[depth];
+       return 0;
+}
+
+int stack_sub()
+{
+       if (depth < 2) return 4;
+
+       --depth;
+       stack[depth-1] = stack[depth-1] - stack[depth];
+       return 0;
+}
+
+int scan(char *s, int l)
+{
+       char *p = s;
+       char *q = 0;
+       char *t;
+       int res = 0;
+
+       #define YYCTYPE         char
+       #define YYCURSOR        p
+       #define YYLIMIT         (s+l+1)
+       #define YYMARKER        q
+       #define YYFILL(n)               { return depth == 1 ? 0 : 2; }
+       
+       while(!res)
+       {
+               t = p;
+/*!re2c
+       re2c:indent:top = 2;
+
+       DIGIT   = [0-9] ;
+       OCT             = "0" DIGIT+ ;
+       INT             = "0" | ( [1-9] DIGIT* ) ;
+       WS              = [ \t]+ ;
+
+       WS              { continue; }
+       OCT             { res = push_num(t, p, 8);      continue; }
+       INT             { res = push_num(t, p, 10); continue; }
+       "+"             { res = stack_add();            continue; }
+       "-"             { res = stack_sub();            continue; }
+       [^]             { res = 1;                                      continue; }
+*/
+       }
+       return res;
+}
+
+int main(int argc, char **argv)
+{
+       if (argc > 0)
+       {
+               char *inp;
+               int res = 0, argp = 0, len;
+               
+               while(!res && ++argp < argc)
+               {
+                       inp = argv[argp];
+                       len = strlen(inp);
+                       if (inp[0] == '\"' && inp[len-1] == '\"')
+                       {
+                               ++inp;
+                               len -=2;
+                       }
+                       res = scan(inp, len);
+               }
+               switch(res)
+               {
+               case 0:
+                       printf("Result: %d\n", stack[0]);
+                       return 0;
+               case 1:
+                       fprintf(stderr, "Illegal character in input.\n");
+                       return 1;
+               case 2:
+                       fprintf(stderr, "Premature end of input.\n");
+                       return 2;
+               case 3:
+                       fprintf(stderr, "Stack overflow.\n");
+                       return 3;
+               case 4:
+                       fprintf(stderr, "Stack underflow.\n");
+                       return 4;
+               }
+       }
+       else
+       {
+               fprintf(stderr, "%s <expr>\n", argv[1]);
+               return 0;
+       }
+}
diff --git a/lessons/001_upn_calculator/readme.txt b/lessons/001_upn_calculator/readme.txt
new file mode 100755 (executable)
index 0000000..9905762
--- /dev/null
@@ -0,0 +1,27 @@
+re2c lesson_001, (c) M. Boerger 2006
+
+This lesson gets you started with re2c. In the end you will have an easy UPN
+calculator for use at command line.
+
+You will learn about the basic interface of re2c when scanning input strings. 
+How to detect the end of the input and use that to stop scanning in order to
+avoid problems.
+
+Once you have successfully installed re2c you can use it to generate *.c files
+from the *.re files presented in this lesson. Actually the expected *.c files 
+are already present. So you should name them *.cc or something alike or just 
+give them a different name like test.c. To do so you simply change into the 
+directory and execute the following command:
+
+  re2c calc_001.re > test.c
+
+Then use your compiler to compile that code and run it.
+
+When you want to debug the code it helps to make re2c generate working #line
+information. To do so you simply specify the output file using the -o switch 
+followed by the output filename:
+
+  re2c -o test.c calc_001.re
+
+The input files *.re each contain basic step by comments that explain what is
+going on and what you can see in the examples.
index 3fe264b4da6a8dab20ac2f1e3c71791fe50958bd..81921946d1bb3656df9a42636b0bfd601736d9e3 100644 (file)
--- a/re2c.1.in
+++ b/re2c.1.in
@@ -7,6 +7,9 @@
 .ds rx regular expression
 .ds lx \fIl\fP-expression
 \"$Log$
+\"Revision 1.50  2006/04/15 16:03:35  helly
+\"- Added tutorial like lessons to re2c.
+\"
 \"Revision 1.49  2006/04/15 13:00:04  helly
 \"- Added /*!ignore!re2c */ to support documenting of re2c source.
 \"- Fixed issue with multiline re2c comments (/*!max:re2c ... */ and alike).
@@ -594,285 +597,9 @@ placing a "\fC/*!getstate:re2c */\fP" comment.
 
 .SH "A LARGER EXAMPLE"
 .LP
-.in +3
-.nf
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <string.h>
-
-#define ADDEQ   257
-#define ANDAND  258
-#define ANDEQ   259
-#define ARRAY   260
-#define ASM     261
-#define AUTO    262
-#define BREAK   263
-#define CASE    264
-#define CHAR    265
-#define CONST   266
-#define CONTINUE        267
-#define DECR    268
-#define DEFAULT 269
-#define DEREF   270
-#define DIVEQ   271
-#define DO      272
-#define DOUBLE  273
-#define ELLIPSIS        274
-#define ELSE    275
-#define ENUM    276
-#define EQL     277
-#define EXTERN  278
-#define FCON    279
-#define FLOAT   280
-#define FOR     281
-#define FUNCTION        282
-#define GEQ     283
-#define GOTO    284
-#define ICON    285
-#define ID      286
-#define IF      287
-#define INCR    288
-#define INT     289
-#define LEQ     290
-#define LONG    291
-#define LSHIFT  292
-#define LSHIFTEQ        293
-#define MODEQ   294
-#define MULEQ   295
-#define NEQ     296
-#define OREQ    297
-#define OROR    298
-#define POINTER 299
-#define REGISTER        300
-#define RETURN  301
-#define RSHIFT  302
-#define RSHIFTEQ        303
-#define SCON    304
-#define SHORT   305
-#define SIGNED  306
-#define SIZEOF  307
-#define STATIC  308
-#define STRUCT  309
-#define SUBEQ   310
-#define SWITCH  311
-#define TYPEDEF 312
-#define UNION   313
-#define UNSIGNED        314
-#define VOID    315
-#define VOLATILE        316
-#define WHILE   317
-#define XOREQ   318
-#define EOI     319
-
-typedef unsigned int uint;
-typedef unsigned char uchar;
-
-#define BSIZE   8192
-
-#define YYCTYPE         uchar
-#define YYCURSOR        cursor
-#define YYLIMIT         s->lim
-#define YYMARKER        s->ptr
-#define YYFILL(n)       {cursor = fill(s, cursor);}
-
-#define RET(i)  {s->cur = cursor; return i;}
-
-typedef struct Scanner {
-    int                 fd;
-    uchar               *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
-    uint                line;
-} Scanner;
-
-uchar *fill(Scanner *s, uchar *cursor){
-    if(!s->eof){
-        uint cnt = s->tok - s->bot;
-        if(cnt){
-            memcpy(s->bot, s->tok, s->lim - s->tok);
-            s->tok = s->bot;
-            s->ptr -= cnt;
-            cursor -= cnt;
-            s->pos -= cnt;
-            s->lim -= cnt;
-        }
-        if((s->top - s->lim) < BSIZE){
-            uchar *buf = (uchar*)
-                malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
-            memcpy(buf, s->tok, s->lim - s->tok);
-            s->tok = buf;
-            s->ptr = &buf[s->ptr - s->bot];
-            cursor = &buf[cursor - s->bot];
-            s->pos = &buf[s->pos - s->bot];
-            s->lim = &buf[s->lim - s->bot];
-            s->top = &s->lim[BSIZE];
-            free(s->bot);
-            s->bot = buf;
-        }
-        if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
-            s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
-        }
-        s->lim += cnt;
-    }
-    s->cur = cursor;
-    return cursor;
-}
-
-int scan(Scanner *s){
-        uchar *cursor = s->cur;
-std:
-        s->tok = cursor;
-/*!re2c
-any     = [\\000-\\377];
-O       = [0-7];
-D       = [0-9];
-L       = [a-zA-Z_];
-H       = [a-fA-F0-9];
-E       = [Ee] [+-]? D+;
-FS      = [fFlL];
-IS      = [uUlL]*;
-ESC     = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
-*/
-
-/*!re2c
-        "/*"                    { goto comment; }
-        
-        "auto"                  { RET(AUTO); }
-        "break"                 { RET(BREAK); }
-        "case"                  { RET(CASE); }
-        "char"                  { RET(CHAR); }
-        "const"                 { RET(CONST); }
-        "continue"              { RET(CONTINUE); }
-        "default"               { RET(DEFAULT); }
-        "do"                    { RET(DO); }
-        "double"                { RET(DOUBLE); }
-        "else"                  { RET(ELSE); }
-        "enum"                  { RET(ENUM); }
-        "extern"                { RET(EXTERN); }
-        "float"                 { RET(FLOAT); }
-        "for"                   { RET(FOR); }
-        "goto"                  { RET(GOTO); }
-        "if"                    { RET(IF); }
-        "int"                   { RET(INT); }
-        "long"                  { RET(LONG); }
-        "register"              { RET(REGISTER); }
-        "return"                { RET(RETURN); }
-        "short"                 { RET(SHORT); }
-        "signed"                { RET(SIGNED); }
-        "sizeof"                { RET(SIZEOF); }
-        "static"                { RET(STATIC); }
-        "struct"                { RET(STRUCT); }
-        "switch"                { RET(SWITCH); }
-        "typedef"               { RET(TYPEDEF); }
-        "union"                 { RET(UNION); }
-        "unsigned"              { RET(UNSIGNED); }
-        "void"                  { RET(VOID); }
-        "volatile"              { RET(VOLATILE); }
-        "while"                 { RET(WHILE); }
-        
-        L (L|D)*                { RET(ID); }
-        
-        ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
-        (['] (ESC|any\\[\\n\\\\'])* ['])
-                                { RET(ICON); }
-        
-        (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
-                                { RET(FCON); }
-        
-        (["] (ESC|any\\[\\n\\\\"])* ["])
-                                { RET(SCON); }
-        
-        "..."                   { RET(ELLIPSIS); }
-        ">>="                   { RET(RSHIFTEQ); }
-        "<<="                   { RET(LSHIFTEQ); }
-        "+="                    { RET(ADDEQ); }
-        "-="                    { RET(SUBEQ); }
-        "*="                    { RET(MULEQ); }
-        "/="                    { RET(DIVEQ); }
-        "%="                    { RET(MODEQ); }
-        "&="                    { RET(ANDEQ); }
-        "^="                    { RET(XOREQ); }
-        "|="                    { RET(OREQ); }
-        ">>"                    { RET(RSHIFT); }
-        "<<"                    { RET(LSHIFT); }
-        "++"                    { RET(INCR); }
-        "--"                    { RET(DECR); }
-        "->"                    { RET(DEREF); }
-        "&&"                    { RET(ANDAND); }
-        "||"                    { RET(OROR); }
-        "<="                    { RET(LEQ); }
-        ">="                    { RET(GEQ); }
-        "=="                    { RET(EQL); }
-        "!="                    { RET(NEQ); }
-        ";"                     { RET(';'); }
-        "{"                     { RET('{'); }
-        "}"                     { RET('}'); }
-        ","                     { RET(','); }
-        ":"                     { RET(':'); }
-        "="                     { RET('='); }
-        "("                     { RET('('); }
-        ")"                     { RET(')'); }
-        "["                     { RET('['); }
-        "]"                     { RET(']'); }
-        "."                     { RET('.'); }
-        "&"                     { RET('&'); }
-        "!"                     { RET('!'); }
-        "~"                     { RET('~'); }
-        "-"                     { RET('-'); }
-        "+"                     { RET('+'); }
-        "*"                     { RET('*'); }
-        "/"                     { RET('/'); }
-        "%"                     { RET('%'); }
-        "<"                     { RET('<'); }
-        ">"                     { RET('>'); }
-        "^"                     { RET('^'); }
-        "|"                     { RET('|'); }
-        "?"                     { RET('?'); }
-
-
-        [ \\t\\v\\f]+           { goto std; }
-
-        "\\n"
-            {
-                if(cursor == s->eof) RET(EOI);
-                s->pos = cursor; s->line++;
-                goto std;
-            }
-
-        any
-            {
-                printf("unexpected character: %c\\n", *s->tok);
-                goto std;
-            }
-*/
-
-comment:
-/*!re2c
-        "*/"                    { goto std; }
-        "\\n"
-            {
-                if(cursor == s->eof) RET(EOI);
-                s->tok = s->pos = cursor; s->line++;
-                goto comment;
-            }
-        any                     { goto comment; }
-*/
-}
-
-main(){
-    Scanner in;
-    int t;
-    memset((char*) &in, 0, sizeof(in));
-    in.fd = 0;
-    while((t = scan(&in)) != EOI){
-/*
-        printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
-        printf("%d\\n", t);
-*/
-    }
-    close(in.fd);
-}
-.fi
-.in -3
+The subdirectory lessons of the re2c distribution contains a few step by step
+lessons to get you started with re2c. All examples in the lessons subdirectory
+can be compiled and actually work.
 
 .SH FEATURES
 .LP
@@ -894,6 +621,7 @@ and act accordingly.
 .LP
 \*(re does not provide start conditions:  use a separate scanner
 specification for each start condition (as illustrated in the above example).
+
 .SH BUGS
 .LP
 Difference only works for character sets.
index bee44c2cce6acf1290121cd6932b738163cf78c4..de033b28c7317719cbf9db1b2a0f80c31ae37db4 100644 (file)
@@ -3,7 +3,7 @@ _XPG=1
 result=0
 errcnt=0
 tstcnt=0;
-for x in @top_srcdir@/test/*.re; do
+for x in @top_srcdir@/test/*.re `find @top_srcdir@/lessons -name '*.re'`; do
        tstcnt=$(($tstcnt+1))
        switches=`basename $x|sed -e 's/^[^.]*\.\(.*\)\.re$/-\1/g' -e 's/^[^-].*//g'`
        genname=`echo $switches|sed -e 's,^.[^o].*$,,g' -e 's,^-o\([ -]*\),@builddir@/test/\1,g'`
@@ -26,7 +26,7 @@ for x in @top_srcdir@/test/*.re; do
                result=1
                errcnt=$(($errcnt+1))
        fi
-       test -f ${x%.re}.diff -a ! -s $difname && rm -f $difname
+       test -f $difname -a ! -s $difname && rm -f $difname
 done
 if test $result = 0; then
        echo "All $tstcnt tests passed successfully."