]> granicus.if.org Git - flex/commitdiff
re-add these files
authorWill Estes <wlestes@users.sourceforge.net>
Tue, 24 Jul 2001 20:23:36 +0000 (20:23 +0000)
committerWill Estes <wlestes@users.sourceforge.net>
Tue, 24 Jul 2001 20:23:36 +0000 (20:23 +0000)
examples/fastwc/README [new file with mode: 0644]
examples/fastwc/mywc.c [new file with mode: 0644]
examples/fastwc/wc1.l [new file with mode: 0644]
examples/fastwc/wc2.l [new file with mode: 0644]
examples/fastwc/wc3.l [new file with mode: 0644]
examples/fastwc/wc4.l [new file with mode: 0644]
examples/fastwc/wc5.l [new file with mode: 0644]

diff --git a/examples/fastwc/README b/examples/fastwc/README
new file mode 100644 (file)
index 0000000..0dd3afe
--- /dev/null
@@ -0,0 +1,56 @@
+This directory contains some examples illustrating techniques for extracting
+high-performance from flex scanners.  Each program implements a simplified
+version of the Unix "wc" tool: read text from stdin and print the number of
+characters, words, and lines present in the text.  All programs were compiled
+using gcc (version unavailable, sorry) with the -O flag, and run on a
+SPARCstation 1+.  The input used was a PostScript file, mainly containing
+figures, with the following "wc" counts:
+
+       lines  words  characters
+       214217 635954 2592172
+
+
+The basic principles illustrated by these programs are:
+
+       - match as much text with each rule as possible
+       - adding rules does not slow you down!
+       - avoid backing up
+
+and the big caveat that comes with them is:
+
+       - you buy performance with decreased maintainability; make
+         sure you really need it before applying the above techniques.
+
+See the "Performance Considerations" section of flexdoc for more
+details regarding these principles.
+
+
+The different versions of "wc":
+
+       mywc.c
+               a simple but fairly efficient C version
+
+       wc1.l   a naive flex "wc" implementation
+
+       wc2.l   somewhat faster; adds rules to match multiple tokens at once
+
+       wc3.l   faster still; adds more rules to match longer runs of tokens
+
+       wc4.l   fastest; still more rules added; hard to do much better
+               using flex (or, I suspect, hand-coding)
+
+       wc5.l   identical to wc3.l except one rule has been slightly
+               shortened, introducing backing-up
+
+Timing results (all times in user CPU seconds):
+
+       program   time   notes
+       -------   ----   -----
+       wc1       16.4   default flex table compression (= -Cem)
+       wc1        6.7   -Cf compression option
+       /bin/wc    5.8   Sun's standard "wc" tool
+       mywc       4.6   simple but better C implementation!
+       wc2        4.6   as good as C implementation; built using -Cf
+       wc3        3.8   -Cf
+       wc4        3.3   -Cf
+       wc5        5.7   -Cf; ouch, backing up is expensive
diff --git a/examples/fastwc/mywc.c b/examples/fastwc/mywc.c
new file mode 100644 (file)
index 0000000..92e5a36
--- /dev/null
@@ -0,0 +1,26 @@
+/* A simple but fairly efficient C version of the Unix "wc" tool */
+
+#include <stdio.h>
+#include <ctype.h>
+
+main()
+{
+       register int c, cc = 0, wc = 0, lc = 0;
+       FILE *f = stdin;
+
+       while ((c = getc(f)) != EOF) {
+               ++cc;
+               if (isgraph(c)) {
+                       ++wc;
+                       do {
+                               c = getc(f);
+                               if (c == EOF)
+                                       goto done;
+                               ++cc;
+                       } while (isgraph(c));
+               }
+               if (c == '\n')
+                       ++lc;
+       }
+done:  printf( "%8d%8d%8d\n", lc, wc, cc );
+}
diff --git a/examples/fastwc/wc1.l b/examples/fastwc/wc1.l
new file mode 100644 (file)
index 0000000..d6696bc
--- /dev/null
@@ -0,0 +1,18 @@
+/* First cut at a flex-based "wc" tool. */
+
+ws    [ \t]
+nonws [^ \t\n]
+
+%%
+       int cc = 0, wc = 0, lc = 0;
+
+{nonws}+       cc += yyleng; ++wc;
+
+{ws}+          cc += yyleng;
+
+\n             ++lc; ++cc;
+
+<<EOF>>                {
+               printf( "%8d %8d %8d\n", lc, wc, cc );
+               yyterminate();
+               }
diff --git a/examples/fastwc/wc2.l b/examples/fastwc/wc2.l
new file mode 100644 (file)
index 0000000..bd63cd4
--- /dev/null
@@ -0,0 +1,20 @@
+/* Somewhat faster "wc" tool: match more text with each rule */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+
+%%
+       int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*    cc += yyleng; ++wc;
+{word}{ws}*\n  cc += yyleng; ++wc; ++lc;
+
+{ws}+          cc += yyleng;
+
+\n+            cc += yyleng; lc += yyleng;
+
+<<EOF>>                {
+               printf( "%8d %8d %8d\n", lc, wc, cc );
+               yyterminate();
+               }
diff --git a/examples/fastwc/wc3.l b/examples/fastwc/wc3.l
new file mode 100644 (file)
index 0000000..7c5f2e2
--- /dev/null
@@ -0,0 +1,24 @@
+/* Somewhat faster still: potentially match a lot of text with each rule */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+       int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*            cc += yyleng; ++wc;
+{word}{ws}*\n          cc += yyleng; ++wc; ++lc;
+{words}{word}{ws}*     cc += yyleng; wc += 2;
+{words}{2}{word}{ws}*  cc += yyleng; wc += 3;
+{words}{3}{word}{ws}*  cc += yyleng; wc += 4;
+
+{ws}+                  cc += yyleng;
+
+\n+                    cc += yyleng; lc += yyleng;
+
+<<EOF>>                {
+               printf( "%8d %8d %8d\n", lc, wc, cc );
+               yyterminate();
+               }
diff --git a/examples/fastwc/wc4.l b/examples/fastwc/wc4.l
new file mode 100644 (file)
index 0000000..cbe56f6
--- /dev/null
@@ -0,0 +1,27 @@
+/* Fastest version of wc: add rules to pick up newlines, too */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+       int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*            ++wc; cc += yyleng;
+{word}{ws}*\n          ++wc; cc += yyleng; ++lc;
+{words}{word}{ws}*     wc += 2; cc += yyleng;
+{words}{word}{ws}*\n   wc += 2; cc += yyleng; ++lc;
+{words}{2}{word}{ws}*  wc += 3; cc += yyleng;
+{words}{2}{word}{ws}*\n        wc += 3; cc += yyleng; ++lc;
+{words}{3}{word}{ws}*  wc += 4; cc += yyleng;
+{words}{3}{word}{ws}*\n        wc += 4; cc += yyleng; ++lc;
+
+{ws}+                  cc += yyleng;
+
+\n+                    cc += yyleng; lc += yyleng;
+
+<<EOF>>                {
+               printf( "%8d %8d %8d\n", lc, wc, cc );
+               yyterminate();
+               }
diff --git a/examples/fastwc/wc5.l b/examples/fastwc/wc5.l
new file mode 100644 (file)
index 0000000..8fe17b6
--- /dev/null
@@ -0,0 +1,24 @@
+/* Oops; slight change from wc3.l introduces backtracking */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+       int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*            cc += yyleng; ++wc;
+{word}{ws}*\n          cc += yyleng; ++wc; ++lc;
+{words}{word}          cc += yyleng; wc += 2;  /* oops */
+{words}{2}{word}{ws}*  cc += yyleng; wc += 3;
+{words}{3}{word}{ws}*  cc += yyleng; wc += 4;
+
+{ws}+                  cc += yyleng;
+
+\n+                    cc += yyleng; lc += yyleng;
+
+<<EOF>>                {
+               printf( "%8d %8d %8d\n", lc, wc, cc );
+               yyterminate();
+               }