Prepare release 0.13.7.

author Ulya Fokanova <skvadrik@gmail.com>

Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)

committer Ulya Fokanova <skvadrik@gmail.com>

Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)
author Ulya Fokanova <skvadrik@gmail.com>
Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)
committer Ulya Fokanova <skvadrik@gmail.com>
Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)
diff --git a/add-release.txt b/add-release.txt

index 6290979be47d3d32a858f2771af104807770d5e1..cf73d68ae6a475fadfd17ca49e17ca0e9135770b 100755 (executable)
--- a/add-release.txt
+++ b/add-release.txt
@@ -1,59 +1,14 @@
-1. For a BRANCH first create a new version directory
-   and apply fixes in the new directory if not yet done:
-   > svn cp tags/RELEASE_<old> tags/RELEASE_<new>
+1. Upload new and delete old documentation
  
-2. Edit dates in the following files:
+2. Test windows build using cygwin
+
+3. Edit the following files:
     - CHANGELOG
     - htdocs/index.html
  
-3. For a BRANCH edit version in:
-   - configure.in
-
-4. Keep CHANGELOG, htdocs/index.html in synch
-
-5. Commit preparation phase:
-   > svn ci -m "- Prepare new release <new>"
-
-6. For HEAD:
-   > svn mkdir tags/RELEASE_<new>
-   > svn cp trunk/re2c tags/RELEASE_<new>
-
-7. For HEAD edit version in:
-   - tags/RELEASE_<new>/re2c/configure.in   (del ".dev")
-   - trunk/re2c/configure.in                (inc ver, add ".dev")
-
-8. Compile and update version files:
-   > ./configure && make clean && make && ./re2c -v
-
-9. Check version in config_w32.h
-
-10. Build windows versions
-
-11. Test windows build using cygwin
-
-12. Make release (also tests the new *nix release):
-    > make release
-
-13. Add release packages to subversion
-    > svn add re2c-<new>*
-
-13. Upload release files and add releases
-
-14. Write NEWS file
-
-15. Add release notes on freshmeat
-
-16. Upload new and delete old documentation
-
-17. Login to "re2c.sourceforge.net" and update web site
-
-18. Add new version lines in these files:
-    - CHANGELOG
-    - htdocs/index.html
+4. Run script release.sh:
+    ./release.sh <version>
  
-19. Update to new version in HEAD
-    > make clean && make && ./re2c -v
+5. Write NEWS file
  
-20. Commit releases and changes in HEAD
-    > svn ci -m "- Add new release <new>" tags/RELEASE_<new>
-    > scn ci -m "- This is <new+1> now" trunk/re2c
+6. Add release notes on freshmeat
diff --git a/re2c/.gitignore b/re2c/.gitignore

index daa5bca0da9b71331243f2cc81ae393235bc73c7..a961d65d7c442c6843c7791d1eb50c5806da19c3 100644 (file)
--- a/re2c/.gitignore
+++ b/re2c/.gitignore
@@ -1,5 +1,6 @@
  re2c
  re2c.1
+re2c.ad
  .deps
  autoscan.log
  autom4te.cache
diff --git a/re2c/CHANGELOG b/re2c/CHANGELOG

index 96d9566b4b561c9521fae27abd376df23409c9e3..c172d75df0732af82aa3957258c6b1a9ded888d9 100644 (file)
--- a/re2c/CHANGELOG
+++ b/re2c/CHANGELOG
@@ -1,5 +1,9 @@
-Version 0.13.7 (20??-??-??)
+Version 0.13.7 (2014-07-25)
  ---------------------------
+- Added UTF-8 support
+- Added UTF-16 support
+- Added default rule
+- Added option to control ill-formed Unicode
  
  Version 0.13.6 (2013-07-04)
  ---------------------------
diff --git a/re2c/Makefile.am b/re2c/Makefile.am

index 74fdce0e58d6035354eb3f7c61b24267fd0f2689..6c0afadf610f8033c9a0b42e5875a9bf7e3d0fa4 100755 (executable)
--- a/re2c/Makefile.am
+++ b/re2c/Makefile.am
@@ -9,8 +9,6 @@ re2c_SOURCES = code.cc dfa.cc main.cc parser.cc actions.cc scanner.re substr.cc
         substr.h token.h mbo_getopt.h code.h stream_lc.h code_names.h
  BUILT_SOURCES = parser.cc scanner.cc
  
-man_MANS     = re2c.1
-
  #CXXFLAGS     = -O2 -Wall -Wno-unused -Wno-parentheses -Wno-deprecated
  #CXXFLAGS     = -ggdb -fno-inline -O2 -Wall -Wextra -pedantic -Wconversion -Wpointer-arith -Wwrite-strings -Wredundant-decls -Werror -Wunused-function -DPEDANTIC
  CXXFLAGS     = -W -Wall -Wextra -pedantic -Wredundant-decls -DPEDANTIC -O2 -g
@@ -23,18 +21,18 @@ CLEANFILES   = parser.cc y.tab.c y.tab.h scanner.cc re2c.1 .version htdocs/manua
  
  DISTCLEANFILES = makerpm re2c.spec README scanner.cc re2c$(EXEEXT)
  
-EXTRA_SRC    = $(man_MANS) README parser.y scanner.re y.tab.h CHANGELOG \
+EXTRA_SRC    = README parser.y scanner.re y.tab.h CHANGELOG \
                 doc examples test bootstrap/*.cc bootstrap/*.h lessons
-EXTRA_DIST   = $(EXTRA_SRC) makerpm.in re2c.spec.in re2c.spec README.in config_w32.h.in htdocs/manual.html.in
+EXTRA_DIST   = $(EXTRA_SRC) makerpm.in re2c.spec.in re2c.spec README.in config_w32.h.in
  EXTRA_ZIP    = $(EXTRA_SRC) config_w32.h *.sln *.vcproj re2c.rules
  
-dist-hook: vtest re2c.spec
+dist-hook: re2c.spec
         rm -rf `find $(distdir)/doc -name .git`
         rm -rf `find $(distdir)/examples -name .git`
         rm -rf `find $(distdir)/test -name .git -o -name .gitignore`
         rm -rf `find $(distdir)/lessons -name .git -o -name .gitignore`
  
-rpm-files:     $(bin_PROGRAMS) $(EXTRA_DIST)
+rpm-files: $(bin_PROGRAMS) $(EXTRA_DIST)
  
  rpm: dist
         cp -f re2c-$(PACKAGE_VERSION).tar.gz `rpm --eval "%{_sourcedir}"`
@@ -83,10 +81,19 @@ scanner.cc: $(top_srcdir)/scanner.re
  
  TESTS = run_tests.sh
  
-test: all $(TESTS)
+tests: all $(TESTS)
         test -x $(TESTS) || chmod +x $(TESTS)
         ./$(TESTS)
  
-vtest: all $(TESTS)
+vtests: all $(TESTS)
         test -x $(TESTS) || chmod +x $(TESTS)
         ./$(TESTS) --valgrind
+
+DOCS_IN  = re2c.ad
+DOCS = re2c.1 htdocs/manual.html
+DOCS_GEN = ./re2c_docs.sh
+
+docs: $(DOCS)
+
+$(DOCS): $(DOCS_GEN) $(DOCS_IN)
+       $(DOCS_GEN)
diff --git a/re2c/htdocs/index.html b/re2c/htdocs/index.html

index 0e0aff7643a8fc50cb8f089ebe178771fe88dfbb..0f2d7b74d91e2b2088b51ac0ebdb3772ee67581c 100755 (executable)
--- a/re2c/htdocs/index.html
+++ b/re2c/htdocs/index.html
@@ -68,8 +68,12 @@
      </ul>
      <hr />
      <h1>Changelog</h1>
-    <h2>20??-??-??: 0.13.7</h2>
+    <h2>2014-07-25: 0.13.7</h2>
      <ul>
+      <li>Added UTF-8 support</li>
+      <li>Added UTF-16 support</li>
+      <li>Added default rule</li>
+      <li>Added option to control ill-formed Unicode</li>
      </ul>
      <h2>2013-07-04: 0.13.6</h2>
      <ul>
diff --git a/re2c/htdocs/manual.html.in b/re2c/htdocs/manual.html.in

deleted file mode 100755 (executable)

index add8fa4..0000000
--- a/re2c/htdocs/manual.html.in
+++ /dev/null
@@ -1,816 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>Manpage of RE2C</title>
-</head>
-<body>
-<h1>RE2C</h1>
-Section: User Commands (1)<br />
-Updated: @PACKAGE_DATE@<br />
-<a href="#index">Index</a>
-<hr />
-<a name="lbAB" id="lbAB">&nbsp;</a>
-<h2>NAME</h2>
-<p>re2c - convert regular expressions to C/C++</p>
-<a name="lbAC" id="lbAC">&nbsp;</a>
-<h2>SYNOPSIS</h2>
-<p><b>re2c</b> [<b>-bdefFghisuvVwx18</b>] [<b>-o output</b>] [<b>-c</b> [<b>-t header</b>]] file</p>
-<a name="lbAD" id="lbAD">&nbsp;</a>
-<h2>DESCRIPTION</h2>
-<p><b>re2c</b> is a preprocessor that generates C-based recognizers from
-regular expressions. The input to <b>re2c</b> consists of C/C++ source
-interleaved with comments of the form /*!re2c ... */ which contain scanner
-specifications. In the output these comments are replaced with code that, when
-executed, will find the next input token and then execute some user-supplied
-token-specific code.</p>
-<p>For example, given the following code</p>
-<pre>
-char *scan(char *p)
-{
-/*!re2c
-        re2c:define:YYCTYPE  = "unsigned char";
-        re2c:define:YYCURSOR = p;
-        re2c:yyfill:enable   = 0;
-        re2c:yych:conversion = 1;
-        re2c:indent:top      = 1;
-        [0-9]+          {return p;}
-        [^]             {return (char*)0;}
-*/
-}
-</pre>
-<br />
-<br />
-<p><b>re2c -is</b> will generate</p>
-<pre>
-/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
-char *scan(char *p)
-{
-    {
-        unsigned char yych;
-
-        yych = (unsigned char)*p;
-        if(yych &lt;= '/') goto yy4;
-        if(yych &gt;= ':') goto yy4;
-        ++p;
-        yych = (unsigned char)*p;
-        goto yy7;
-yy3:
-        {return p;}
-yy4:
-        ++p;
-        yych = (unsigned char)*p;
-        {return char*)0;}
-yy6:
-        ++p;
-        yych = (unsigned char)*p;
-yy7:
-        if(yych &lt;= '/') goto yy3;
-        if(yych &lt;= '9') goto yy6;
-        goto yy3;
-    }
-
-}
-</pre>
-<br />
-<br />
-<p>You can place one /*!max:re2c */ comment that will output a "#define YYMAXFILL
-&lt;n&gt;" line that holds the maximum number of characters 
-required to parse the input. That is the maximum value YYFILL(n)
-will receive. If -1 is in effect then YYMAXFILL can be triggered once 
-after the last /*!re2c */.</p>
-<p>You can also use /*!ignore:re2c */ blocks that allows to document the
-scanner code and will not be part of the output.</p>
-<a name="lbAE" id="lbAE">&nbsp;</a>
-<h2>OPTIONS</h2>
-<p><b>re2c</b> provides the following options:</p>
-<dl compact="compact">
-<dt><b>-?</b></dt>
-<dd><b>-h</b> Invoke a short help.<br /><br /></dd>
-<dt><b>-b</b></dt>
-<dd>Implies <b>-s</b>. Use bit vectors as well in the attempt to coax better
-code out of the compiler. Most useful for specifications with more than a few
-keywords (e.g. for most programming languages).<br /><br /></dd>
-<dt><b>-c</b></dt>
-<dd>Used to support (f)lex-like condition support.<br /><br /></dd>
-<dt><b>-d</b></dt>
-<dd>Creates a parser that dumps information about the current position and in
-which state the parser is while parsing the input. This is useful to debug
-parser issues and states. If you use this switch you need to define a macro
-<i>YYDEBUG</i> that is called like a function with two parameters: <i>void
-YYDEBUG(int state, char current)</i>. The first parameter receives the state or
--1 and the second parameter receives the input at the current cursor.<br /><br /></dd>
-<dt><b>-D</b></dt>
-<dd>Emit Graphviz dot data. It can then be processed with e.g.
-"dot -Tpng input.dot > output.png". Please note that scanners with many states
-may crash dot.<br /><br /></dd>
-<dt><b>-e</b></dt>
-<dd>Generate a parser that supports EBCDIC. The generated code can deal with any 
-character up to 0xFF. In this mode re2c assumes that input character size is 
-1 byte. This switch is incompatible with <b>-w</b>, <b>-x</b>, <b>-u</b> and <b>-8</b>.<br /><br /></dd>
-<dt><b>-f</b></dt>
-<dd>Generate a scanner with support for storable state. For details see below
-at <b>SCANNER WITH STORABLE STATES</b>.<br /><br /></dd>
-<dt><b>-F</b><dt>
-<dd>Partial support for flex syntax. When this flag is active then named 
-definitions must be surrounded by curly braces and can be defined without an
-equal sign and the terminating semi colon. Instead names are treated as direct
-double quoted strings.<br /><br /></dd>
-<dt><b>-g</b></dt>
-<dd>Generate a scanner that utilizes GCC's computed goto feature. That is re2c
-generates jump tables whenever a decision is of a certain complexity (e.g. a 
-lot of if conditions are otherwise necessary). This is only useable with GCC 
-and produces output that cannot be compiled with any other compiler. Note that
-this implies -b and that the complexity threshold can be configured using the
-inplace configuration "cgoto:threshold".</dd>
-<dt><b>-i</b></dt>
-<dd>Do not output #line information. This is usefull when you want use a CMS
-tool with the re2c output which you might want if you do not require your users
-to have re2c themselves when building from your source. <b>-o output</b>
-Specify the output file.<br /><br /></dd>
-<dt><b>-r</b></dt>
-<dd>Allows reuse of scanner definitions with '<b>/*!use:re2c</b>' after
-'<b>/*!rules:re2c</b>'. In this mode no '<b>/*!re2c</b>' block and exactly one
-'<b>/*!rules:re2c</b>' must be present. The rules are being saved and used by
-every '<b>/*!use:re2c</b>' block that follows. These blocks can contain
-inplace configurations, especially '<b>re2c:flags:e</b>', '<b>re2c:flags:w</b>', 
-'<b>re2c:flags:x</b>', '<b>re2c:flags:u</b>' and '<b>re2c:flags:8</b>'.
-That way it is possible to create the same scanner multiple times for different
-character types, different input mechanisms or different output mechanisms.
-The '<b>/*!use:re2c</b>' blocks can also contain additional rules that will be
-appended to the set of rules in '<b>/*!rules:re2c</b>'.
-<br /><br /></dd>
-<dt><b>-s</b></dt>
-<dd>Generate nested ifs for some switches. Many compilers need this assist to
-generate better code.<br /><br /></dd>
-<dt><b>-t</b></dt>
-<dd>Create a header file that contains types for the (f)lex-like condition support.
-This can only be activated when <b>-c</b> is in use.<br /><br /></dd>
-<dt><b>-u</b></dt>
-<dd>Generate a parser that supports UTF-32. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode re2c assumes that input 
-character size is 4 bytes. This switch is incompatible with <b>-e</b>, <b>-w</b>, <b>-x</b> and <b>-8</b>.
-This implies <b>-s</b>.<br /><br /></dd>
-<dt><b>-v</b></dt>
-<dd>Show version information.<br /><br /></dd>
-<dt><b>-V</b></dt>
-<dd>Show the version as a number XXYYZZ.<br /><br /></dd>
-<dt><b>-w</b></dt>
-<dd>Generate a parser that supports UCS-2. The generated code can deal with any 
-valid Unicode character up to 0xFFFF. In this mode re2c assumes that input 
-character size is 2 bytes. This switch is incompatible with <b>-e</b>, <b>-x</b>, <b>-u</b> and <b>-8</b>.
-This implies <b>-s</b>.<br /><br /></dd>
-<dt><b>-x</b></dt>
-<dd>Generate a parser that supports UF16-2. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode re2c assumes that input 
-character size is 2 bytes. This switch is incompatible with <b>-e</b>, <b>-w</b>, <b>-u</b> and <b>-8</b>.
-This implies <b>-s</b>.<br /><br /></dd>
-<dt><b>-1</b></dt>
-<dd>Force single pass generation, this cannot be combined with -f and disables 
-YYMAXFILL generation prior to last re2c block.<br /><br /></dd>
-<dt><b>-8</b></dt>
-<dd>Generate a parser that supports UTF-8. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode re2c assumes that input 
-character size is 1 byte. This switch is incompatible with <b>-e</b>, <b>-w</b>, <b>-x</b> and <b>-u</b>.<br /><br /></dd>
-<dt><b>--no-generation-date</b></dt>
-<dd>Suppress date output in the generated output so that it only shows the re2c
-version.<br /><br /></dd>
-<dt><b>--case-insensitive</b></dt>
-<dd>All strings are case insensitive, so all "-expressions are treated
-in the same way '-expressions are.<br /><br /></dd>
-<dt><b>--case-inverted</b></dt>
-<dd>Invert the meaning of single and double quoted strings.
-With this switch single quotes are case sensitive and
-double quotes are case insensitive.<br /><br /></dd>
-</dl>
-<br />
-<br />
-<a name="lbAF" id="lbAF">&nbsp;</a>
-<h2>INTERFACE CODE</h2>
-<p>Unlike other scanner generators, <b>re2c</b> does not generate complete
-scanners: the user must supply some interface code. In particular, the user
-must define the following macros or use the 
-corresponding inplace configurations:</p>
-<dl compact="compact">
-<dt>YYCONDTYPE</dt>
-<dd>In <b>-c</b> mode you can use <b>-t</b> to generate a file that contains the 
-enumeration used as conditions. Each of the values refers to a condition of
-a rule set.</dd>
-<dt>YYCTXMARKER</dt>
-<dd><i>l</i>-expression of type *YYCTYPE. The generated code saves context 
-backtracking information in YYCTXMARKER. The user only  needs to define this 
-macro if a scanner specification uses trailing context in one or more of its 
-regular expressions.<br /><br /></dd>
-<dt>YYCTYPE</dt>
-<dd>Type used to hold an input symbol. Usually char or unsigned char.<br /><br /></dd>
-<dt>YYCURSOR</dt>
-<dd><i>l</i>-expression of type *YYCTYPE that points to the current input
-symbol. The generated code advances YYCURSOR as symbols are matched. On entry,
-YYCURSOR is assumed to point to the first character of the current token. On
-exit, YYCURSOR will point to the first character of the following token.<br /><br /></dd>
-<dt>YYDEBUG(<i>state</i>,<i>current</i>)</dt>
-<dd>This is only needed if the <b>-d</b> flag was specified. It allows to
-easily debug the generated parser by calling a user defined function for every
-state. The function should have the following signature: <i>void YYDEBUG(int
-state, char current)</i>. The first parameter receives the state or -1 and the
-second parameter receives the input at the current cursor.<br /><br /></dd>
-<dt>YYFILL(<i>n</i>)</dt>
-<dd>The generated code "calls" YYFILL(n) when the buffer needs (re)filling: at
-least <i>n</i> additional characters should be provided. YYFILL(n) should adjust
-YYCURSOR, YYLIMIT, YYMARKER and YYCTXMARKER as needed. Note that for typical 
-programming languages <i>n</i> will be the length of the longest keyword plus 
-one. The user can place a comment of the form /*!max:re2c */ once to insert 
-a YYMAXFILL definition that is set to the maximum length value. If -1 
-switch is used then YYMAXFILL can be triggered only once after the 
-last /*!re2c */ block.<br /><br /></dd>
-<dt>YYGETCONDITION()</dt>
-<dd>This define is used to get the condition prior to entering the scanner code
-when using <b>-c</b> switch. The value must be initialized with a value from 
-the enumeration YYCONDTYPE type.</dd>
-<dt>YYGETSTATE()</dt>
-<dd>The user only needs to define this macro if the <b>-f</b> flag was
-specified. In that case, the generated code "calls" YYGETSTATE() at the very
-beginning of the scanner in order to obtain the saved state. YYGETSTATE() must
-return a signed integer. The value must be either -1, indicating that the
-scanner is entered for the first time, or a value previously saved by
-YYSETSTATE(s). In the second case, the scanner will resume operations right after
-where the last YYFILL(n) was called.<br /><br /></dd>
-<dt>YYLIMIT</dt>
-<dd>Expression of type *YYCTYPE that marks the end of the buffer (YYLIMIT[-1] is
-the last character in the buffer). The generated code repeatedly compares
-YYCURSOR to YYLIMIT to determine when the buffer needs (re)filling.<br /><br /></dd>
-<dt>YYMARKER</dt>
-<dd><i>l</i>-expression of type *YYCTYPE. The generated code saves backtracking
-information in YYMARKER. Some easy scanners might not use this.<br /><br /></dd>
-<dt>YYMAXFILL</dt>
-<dd>This will be automatically defined by /*!max:re2c */ blocks as explained
-above.<br /><br /></dd>
-<dt>YYSETCONDITION(<i>c</i>)</dt>
-<dd>This define is used to set the condition in transition rules. This is only
-being used when <b>-c</b> is active and transition rules are being used.<br /><br /></dd>
-<dt>YYSETSTATE(<i>s</i>)</dt>
-<dd>The user only needs to define this macro if the <b>-f</b> flag was
-specified. In that case, the generated code "calls" YYSETSTATE just before
-calling YYFILL(n). The parameter to YYSETSTATE(s) is a signed integer that uniquely
-identifies the specific instance of YYFILL(n) that is about to be called. Should
-the user wish to save the state of the scanner and have YYFILL(n) return to the
-caller, all he has to do is store that unique identifer in a variable. Later,
-when the scanner is called again, it will call YYGETSTATE() and resume
-execution right where it left off. The generated code will contain 
-both YYSETSTATE(s) and YYGETSTATE() even if YYFILL(n) is being
-disabled.<br /><br /></dd>
-</dl>
-<br />
-<br />
-<a name="lbAG" id="lbAG">&nbsp;</a>
-<h2>SCANNER WITH STORABLE STATES</h2>
-<p>When the <b>-f</b> flag is specified, re2c generates a scanner that can
-store its current state, return to the caller, and later resume operations
-exactly where it left off.</p>
-<p>The default operation of re2c is a "pull" model, where the scanner asks for
-extra input whenever it needs it. However, this mode of operation assumes that
-the scanner is the "owner" the parsing loop, and that may not always be
-convenient.</p>
-<p>Typically, if there is a preprocessor ahead of the scanner in the stream, or
-for that matter any other procedural source of data, the scanner cannot "ask"
-for more data unless both scanner and source live in a separate threads.</p>
-<p>The <b>-f</b> flag is useful for just this situation : it lets users design
-scanners that work in a "push" model, i.e. where data is fed to the scanner
-chunk by chunk. When the scanner runs out of data to consume, it just stores
-its state, and return to the caller. When more input data is fed to the
-scanner, it resumes operations exactly where it left off.</p>
-<p>When using the -f option re2c does not accept stdin because it has to do the 
-full generation process twice which means it has to read the input twice. That
-means re2c would fail in case it cannot open the input twice or reading the
-input for the first time influences the second read attempt.</p>
-<p>Changes needed compared to the "pull" model.</p>
-<p>1. User has to supply macros YYSETSTATE() and YYGETSTATE(state)</p>
-<p>2. The <b>-f</b> option inhibits declaration of <i>yych</i> and
-<i>yyaccept</i>. So the user has to declare these. Also the user has to save
-and restore these. In the example <i>examples/push.re</i> these are declared as
-fields of the (C++) class of which the scanner is a method, so they do not need
-to be saved/restored explicitly. For C they could e.g. be made macros that
-select fields from a structure passed in as parameter. Alternatively, they
-could be declared as local variables, saved with YYFILL(n) when it decides
-to return and restored at entry to the function. Also, it could be more
-efficient to save the state from YYFILL(n) because
-YYSETSTATE(state) is called unconditionally. YYFILL(n) however does not
-get <i>state</i> as parameter, so we would have to store state in a local
-variable by YYSETSTATE(state).</p>
-<p>3. Modify YYFILL(n) to return (from
-the function calling it) if more input is needed.</p>
-<p>4. Modify caller to recognise "more input is needed" and respond
-appropriately.</p>
-<p>5.The generated code will contain a switch block that is used to restores 
-the last state by jumping behind the corrspoding YYFILL(n) call. This code is
-automatically generated in the epilog of the first "/*!re2c */" block. 
-It is possible to trigger generation of the YYGETSTATE() block earlier by 
-placing a "/*!getstate:re2c */" comment. This is especially useful when
-the scanner code should be wrapped inside a loop.</p>
-<p>Please see examples/push.re for push-model scanner. The generated code can be
-tweaked using inplace configurations "state:abort" and "state:nextlabel".</p>
-<a name="lbAH" id="lbAH">&nbsp;</a>
-<h2>SCANNER WITH CONDITION SUPPORT</h2>
-<p>
-You can preceed regular-expressions with a list of condition names when using the <b>-c</b>
-switch. In this case <b>re2c</b> generates scanner blocks for each conditon. Where each of the
-generated scanners has its own precondition. The precondition is given by the 
-interface define <b>YYGETCONDITON</b> and must be of type <b>YYCONDTYPE</b>.
-</p><p>
-There are two special rule types. First, the rules of the condition '*' are 
-merged to all  conditions. And second the empty condition list allows to 
-provide a code block that does not have a scanner part. Meaning it does not 
-allow any regular expression. The condition value referring to this special 
-block is always the one with the enumeration value 0. This way the code of this
-special rule can be used to initialize a scanner. It is in no way necessary to
-have these rules: but sometimes it is helpful to have a dedicated uninitialized
-condition state.
-</p><p>
-Non empty rules allow to specify the new condition, which makes them
-transition rules. Besides generating calls for the define <b>YYSETCONDTITION</b>
-no other special code is generated.
-</p>
-<p>
-There is another kind of special rules that allow to prepend code to any code
-block of all rules of a certain set of conditions or to all code blocks to all
-rules. This can be helpful when some operation is common among rules. For
-instance this can be used to store the length of the scanned string. These
-special setup rules start with an exclamation mark followed by either a list
-of conditions <b>&lt;! condition, ... &gt;</b> or a star <b>&lt;!*&gt;</b>.
-When <b>re2c</b> generates the code for a rule whose state does not have a
-setup rule and a star'd setup rule is present, than that code will be used
-as setup code.
-</p>
-<a name="lbAH2" id="lbAH2">&nbsp;</a>
-<h2>ENCODINGS</h2>
-<p>
-<b>re2c</b> supports the following encodings: ASCII, EBCDIC (<b>-e</b>), UCS-2 (<b>-w</b>), 
-UTF-16 (<b>-x</b>), UTF-32 (<b>-u</b>) and UTF-8 (<b>-8</b>). ASCII is default. You can 
-either pass cmd flag or use inplace configuration.
-</p>
-<p>
-The following concepts should be clarified when talking about encoding. <b>Code point</b> 
-is an abstract number, which represents single encoding symbol. <b>Code unit</b> is the 
-smallest unit of memory, which is used in the encoded text (it corresponds to one 
-character in the input stream). One or more code units can be needed to represent 
-a single code point, depending on the encoding. In <b>fixed-length</b> encoding, each 
-code point is represented with equal number of code units. In <b>variable-length</b> 
-encoding, different code points can be represented with different number of code units.
-</p>
-<p>
-<b>ASCII</b>
-is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
-to 0xFF (note that this is <b>re2c</b>-specific understanding of ASCII). One code point 
-is represented with exactly one 1-byte code unit, which has the same value as the 
-code point. Size of YYCTYPE must be 1 byte.
-</p>
-<p>
-<b>EBCDIC</b>
-is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
-to 0xFF. One code point is represented with exactly one 1-byte code unit, which has 
-the same value as the code point. Size of YYCTYPE must be 1 byte.
-</p>
-<p>
-<b>UCS-2</b>
-is a fixed-length encoding. Its code space includes 0x10000 code points, from 0 
-to 0xFFFF. One code point is represented with exactly one 2-byte code unit, which has 
-the same value as the code point. Size of YYCTYPE must be 2 bytes.
-</p>
-<p>
-<b>UTF-16</b>
-is a variable-length encoding. Its code space includes all Unicode code points, 
-from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with one or 
-two 2-byte code units. Size of YYCTYPE must be 2 bytes.
-</p>
-<p>
-<b>UTF-32</b>
-is a fixed-length encoding. Its code space includes all Unicode code points, 
-from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with exactly 
-one 4-byte code unit. Size of YYCTYPE must be 4 bytes.
-</p>
-<p>
-<b>UTF-8</b>
-is a variable-length encoding. Its code space includes all 
-Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point 
-is represented with sequence of one, two, three or four 1-byte code units. Size of 
-YYCTYPE must be 1 bytes.
-</p>
-<p>
-In Unicode, values from range 0xD800 to 0xDFFF (surrogates) are not valid Unicode 
-code points, any encoded sequence of code units, that would map to Unicode code points 
-in the range 0xD800-0xDFFF, is ill-formed.
-</p>
-<p>
-For some encodings, there are code units, that never occur in valid encoded stream 
-(e.g. 0xFF byte in UTF-8). If the generated scanner must check for such input symbols, 
-the only way to do so is to use default rule <b>*</b>. Note, that full range rule <b>[^]</b> 
-means "all valid code points", while default rule <b>*</b> means "all possible code units".
-</p>
-<a name="lbAI" id="lbAI">&nbsp;</a>
-<h2>SCANNER SPECIFICATIONS</h2>
-<p>Each scanner specification consists of a set of <i>rules</i>, <i>named
-definitions</i> and <i>configurations</i>.</p>
-<p><i>Rules</i> consist of a regular expression along with a block of C/C++
-code that is to be executed when the associated <i>regular expression</i> is
-matched. You can either
-start the code with an opening curly brace or the sequence '<b>:=</b>'. When
-the code with a curly brace then <b>re2c</b> counts the brace depth and stops looking
-for code automatically. Otherwise curly braces are not allowed and <b>re2c</b> stops
-looking for code at the first line that does not begin with whitespace. If two
-or more rules overlap, the first rule is preferred.</p>
-<dl compact="compact">
-<dd><i>regular-expression</i> { <i>C/C++ code</i> }</dd>
-<dd><i>regular-expression</i> := <i>C/C++ code</i></dd>
-</dl>
-<p>
-There is one special rule: default rule <b>*</b>.
-</p>
-<dl compact="compact">
-<dd><b>*</b> { <i>C/C++ code</i> }</dd>
-<dd><b>*</b> := <i>C/C++ code</i></dd>
-</dl>
-<p>
-The former "default" rule <b>[^]</b> differs from <b>*</b>:
-</p>
-<dl compact="compact">
-<dd>- <b>*</b> can occur anywhere a normal rule can occur, but regardless to its place, 
-<b>*</b> has the lowest priority.
-<dd>- <b>[^]</b> matches all valid symbols in current encoding, while <b>*</b> matches 
-any input character, either valid or invalid.
-<dd>- <b>[^]</b> can consume multiple input characters, while <b>*</b> always consumes 
-one input character.
-</dl>
-<p>
-In fact, when variable-length encoding is used, <b>*</b> is the only possible way 
-to match invalid input character.
-</p>
-<p>
-If <b>-c</b> is active then each regular expression is preceeded by a list of 
-comma separated condition names. Besides normal naming rules there are two 
-special cases. A rule may contain the single condition name '*' and no contition 
-name at all. In the latter case the rule cannot have a regular expression. Non 
-empty rules may further more specify the new condition. In that case re2c will
-generated the necessary code to chnage the condition automatically. Just as above
-code can be started with a curly brace of the sequence '<b>:=</b>'. Further more
-rules can use ':=>' as a shortcut to automatically generate code that not only
-sets the new condition state but also continues execution with the new state. A
-shortcut rule should not be used in a loop where there is code between the start
-of the loop and the <b>re2c</b> block unless <i>re2c:cond:goto</i> is changed
-to '<i>continue;</i>'. If code is necessary before all rule (though not simple
-jumps) you can doso by using &lt;! pseudo-rules.
-</p>
-<dl compact="compact">
-<dd>&lt;<i>condition-list</i>&gt; <i>regular-expression</i> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>condition-list</i>&gt; <i>regular-expression</i> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>condition-list</i>&gt; <b>*</b> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>condition-list</i>&gt; <b>*</b> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>condition-list</i>&gt; <i>regular-expression</i> =&gt; <i>condition</i> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>condition-list</i>&gt; <i>regular-expression</i> =&gt; <i>condition</i> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>condition-list</i>&gt; <i>regular-expression</i> :=&gt; <i>condition</i></dd>
-<dd>&lt;<i>*</i>&gt; <i>regular-expression</i> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>*</i>&gt; <i>regular-expression</i> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>*</i>&gt; <b>*</b> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>*</i>&gt; <b>*</b> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>*</i>&gt; <i>regular-expression</i> =&gt; <i>condition</i> { <i>C/C++ code</i> }</dd>
-<dd>&lt;<i>*</i>&gt; <i>regular-expression</i> =&gt; <i>condition</i> := <i>C/C++ code</i></dd>
-<dd>&lt;<i>*</i>&gt; <i>regular-expression</i> :=&gt; <i>condition</i></dd>
-<dd>&lt;&gt; { <i>C/C++ code</i> }</dd>
-<dd>&lt;&gt; := <i>C/C++ code</i></dd>
-<dd>&lt;&gt; =&gt; <i>condition</i> { <i>C/C++ code</i> }</dd>
-<dd>&lt;&gt; =&gt; <i>condition</i> := <i>C/C++ code</i></dd>
-<dd>&lt;&gt; :=&gt; <i>condition</i></dd>
-<dd>&lt;!<i>condition-list</i>&gt; { <i>C/C++ code</i> }</dd>
-<dd>&lt;!<i>condition-list</i>&gt; := <i>C/C++ code</i></dd>
-<dd>&lt;!*&gt; { <i>C/C++ code</i> }</dd>
-<dd>&lt;!*&gt; := <i>C/C++ code</i></dd>
-</dl>
-<p>Named definitions are of the form:</p>
-<dl compact="compact">
-<dd><i>name</i> = <i>regular expression</i>;</dd>
-</dl>
-<p>When <b>-F</b> is active, then named definitions are also of the form:</p>
-<dl compact="compact">
-<dd><i>name</i> <i>regular expression</i></dd>
-</dl>
-<p>Configurations look like named definitions whose names start with
-"<b>re2c:</b>":</p>
-<dl compact="compact">
-<dd>re2c:<i>name</i> = <i>value</i>;</dd>
-<dd>re2c:<i>name</i> = <b>"</b><i>value</i><b>"</b>;</dd>
-</dl>
-<a name="lbAJ" id="lbAJ">&nbsp;</a>
-<h2>SUMMARY OF RE2C REGULAR EXPRESSIONS</h2>
-<dl compact="compact">
-<dt>"foo"</dt>
-<dd>the literal string foo. ANSI-C escape sequences can be used.</dd>
-<dt>'foo'</dt>
-<dd>the literal string foo (characters [a-zA-Z] treated case-insensitive).
-ANSI-C escape sequences can be used.</dd>
-<dt>[xyz]</dt>
-<dd>a "character class"; in this case, the regular expression matches either an
-'x', a 'y', or a 'z'.</dd>
-<dt>[abj-oZ]</dt>
-<dd>a "character class" with a range in it; matches an 'a', a 'b', any letter
-from 'j' through 'o', or a 'Z'.</dd>
-<dt>[^<i>class</i>]</dt>
-<dd>an inverted "character class".</dd>
-<dt><i>r</i>\<i>s</i></dt>
-<dd>match any <i>r</i> which isn't an <i>s</i>. <i>r</i> and <i>s</i> must be
-regular expressions which can be expressed as character classes.</dd>
-<dt><i>r</i>*</dt>
-<dd>zero or more <i>r</i>'s, where <i>r</i> is any regular expression</dd>
-<dt><i>r</i>+</dt>
-<dd>one or more <i>r</i>'s</dd>
-<dt><i>r</i>?</dt>
-<dd>zero or one <i>r</i>'s (that is, "an optional <i>r</i>")</dd>
-<dt>name</dt>
-<dd>the expansion of the "named definition" (see above)</dd>
-<dt>(<i>r</i>)</dt>
-<dd>an <i>r</i>; parentheses are used to override precedence (see below)</dd>
-<dt><i>rs</i></dt>
-<dd>an <i>r</i> followed by an <i>s</i> ("concatenation")</dd>
-<dt><i>r</i>|<i>s</i></dt>
-<dd>either an <i>r</i> or an <i>s</i></dd>
-<dt><i>r</i>/<i>s</i></dt>
-<dd>an <i>r</i> but only if it is followed by an <i>s</i>. The <i>s</i> is not part of
-the matched text. This type of regular expression is called "trailing context". A trailing 
-context can only be the end of a rule and not part of a named definition.</dd>
-<dt><i>r</i>{<i>n</i>}</dt>
-<dd>matches <i>r</i> exactly <i>n</i> times.</dd>
-<dt><i>r</i>{<i>n</i>,}</dt>
-<dd>matches <i>r</i> at least <i>n</i> times.</dd>
-<dt><i>r</i>{<i>n</i>,<i>m</i>}</dt>
-<dd>matches <i>r</i> at least <i>n</i> but not more than <i>m</i> times.</dd>
-<dt>.</dt>
-<dd>match any character except newline (\n).</dd>
-<dt><i>def</i></dt>
-<dd>matches named definition as specified by <i>def</i> only if <b>-F</b> is
-off. If the switch <b>-F</b> is active then this behaves like it was enclosed
-in double quotes and matches the string <i>def</i>.</dd>
-</dl>
-<br />
-<br />
-<p>Character classes and string literals may contain octoal or hexadecimal
-character definitions and the following set of escape sequences
-(<b>\n</b>,<br />
- <b>\t</b>, <b>\v</b>, <b>\b</b>, <b>\r</b>, <b>\f</b>, <b>\a</b>, <b>\\</b>).
-An octal character is defined by a backslash followed by its three octal digits
-and a hexadecimal character is defined by backslash, a lower cased '<b>x</b>'
-and its two hexadecimal digits or a backslash, an upper cased <b>X</b> and its
-four hexadecimal digits.</p>
-<p>re2c further more supports the c/c++ unicode notation. That is a backslash
-followed by either a lowercased <b>u</b> and its four hexadecimal digits or an
-uppercased <b>U</b> and its eight hexadecimal digits. However only in \fB-u\fP 
-mode the generated code can deal with any valid Unicode character up to 
-0x10FFFF.</p>
-<p>The only portable "<b>any</b>" rule is the default rule <b>*</b>.</p>
-<p>The regular expressions listed above are grouped according to precedence,
-from highest precedence at the top to lowest at the bottom. Those grouped
-together have equal precedence.</p>
-<a name="lbAK" id="lbAK">&nbsp;</a>
-<h2>INPLACE CONFIGURATION</h2>
-<p>It is possible to configure code generation inside re2c blocks. The
-following lists the available configurations:</p>
-<dl compact="compact">
-<dt><i>re2c:cond:divider</i> <b>=</b> "/* *********************************** */" <b>;</b></dt>
-<dd>Allows to customize the devider for condition blocks. You can use '@@' to 
-put the name of the condition or ustomize the plaeholder
-using <i>re2c:cond:divider@cond</i>.</dd>
-<dt><i>re2c:cond:divider@cond</i> <b>=</b> @@ <b>;</b></dt>
-<dd>Specify the placeholder that will be replaced with the condition name
-in <i>re2c:cond:divider\fP</i>.</dd>
-<dt><i>re2c:cond:goto</i> <b>=</b> "goto @@;" <b>;</b></dt>
-<dd>Allows to customize the condition goto statements used with ':=>' style rules.
-You can use '@@' to put the name of the condition or ustomize the plaeholder
-using <i>re2c:cond:goto@cond</i>. You can also change this to 'continue;',
-which would allow you to continue with the next loop cycle including any code
-between loop start and re2c block.</dd>
-<dt><i>re2c:cond:goto@cond</i> <b>=</b> @@ <b>;</b></dt>
-<dd>Spcifies the placeholder that will be replaced with the condition label
-in <i>re2c:cond:goto</i>.</dd>
-<dt><i>re2c:indent:top</i> <b>=</b> 0 <b>;</b></dt>
-<dd>Specifies the minimum number of indendation to use. Requires a numeric
-value greater than or equal zero.</dd>
-<dt><i>re2c:condprefix</i> <b>=</b> yyc_ <b>;</b></dt>
-<dd>Allows to specify the prefix used for condition labels. That is this text is 
-prepended to any condition label in the generated output file.</dd>
-<dt><i>re2c:condenumprefix</i> <b>=</b> yyc <b>;</b></dt>
-<dd>Allows to specify the prefix used for condition values. That is this text is 
-prepended to any condition enum value in the generated output file.</dd>
-<dt><i>re2c:indent:string</i> <b>=</b> "\t" <b>;</b></dt>
-<dd>Specifies the string to use for indendation. Requires a string that should
-contain only whitespace unless you need this for external tools. The easiest
-way to specify spaces is to enclude them in single or double quotes. If you do
-not want any indendation at all you can simply set this to <b>""</b>.</dd>
-<dt><i>re2c:yych:conversion</i> <b>=</b> 0 <b>;</b></dt>
-<dd>When this setting is non zero, then \*(re automatically generates conversion 
-code whenever yych gets read. In this case the type must be defined using
-<b>re2c:define:YYCTYPE</b>.</dd>
-<dt><i>re2c:yych:emit</i> <b>=</b> 1 <b>;</b></dt>
-<dd>Generation of \fByych\fP can be suppressed by setting this to 0.</dd>
-<dt><i>re2c:yybm:hex</i> <b>=</b> 0 <b>;</b></dt>
-<dd>If set to zero then a decimal table is being used else a hexadecimal table
-will be generated.</dd>
-<dt><i>re2c:yyfill:enable</i> <b>=</b> 1 <b>;</b></dt>
-<dd>Set this to zero to suppress generation of YYFILL(n). When using this be sure
-to verify that the generated scanner does not read behind input. Allowing
-this behavior might introduce sever security issues to you programs.</dd>
-<dt><i>re2c:yyfill:check</i> <b>=</b> 1 </b>;</b></dt>
-<dd>This can be set 0 to suppress output of the pre condition using YYCURSOR and
-YYLIMIT which becomes usefull when YYLIMIT + max(YYFILL) is always accessible.</dd>
-<dt><i>re2c:yyfill:parameter</i> <b>=</b> 1 <b>;</b></dt>
-<dd>Allows to suppress parameter passing to <b>YYFILL</b> calls. If set to zero 
-then no parameter is passed to <b>YYFILL</b>. However <b>define:YYFILL@LEN</b>
-allows to specify a replacement string for the actual length value. If set to
-a non zero value then <b>YYFILL</b> usage will be followed by the number of 
-requested characters in braces unless <b>re2c:define:YYFILL:naked</b> is set. 
-Also look at <b>re2c:define:YYFILL:naked</b> and <b>re2c:define:YYFILL@LEN</b>.</dd>
-<dt><i>re2c:startlabel</i> <b>=</b> 0 <b>;</b></dt>
-<dd>If set to a non zero integer then the start label of the next scanner
-blocks will be generated even if not used by the scanner itself. Otherwise the
-normal <b>yy0</b> like start label is only being generated if needed. If set to
-a text value then a label with that text will be generated regardless of
-whether the normal start label is being used or not. This setting is being
-reset to <b>0</b> after a start label has been generated.</dd>
-<dt><i>re2c:labelprefix</i> <b>=</b> yy <b>;</b></dt>
-<dd>Allows to change the prefix of numbered labels. The default is \fByy\fP and
-can be set any string that is a valid label.</dd>
-<dt><i>re2c:state:abort</i> <b>=</b> 0 <b>;</b></dt>
-<dd>When not zero and switch -f is active then the YYGETSTATE block will 
-contain a default case that aborts and a -1 case is used for initialization.</dd>
-<dt><i>re2c:state:nextlabel</i> <b>=</b> 0 <b>;</b></dt>
-<dd>Used when -f is active to control whether the YYGETSTATE block is 
-followed by a yyNext: label line. Instead of using yyNext you can 
-usually also use configuration <i>startlabel</i> to force a specific start label
-or default to yy0 as start label. Instead of using a dedicated label it 
-is often better to separate the YYGETSTATE code from the actual scanner code by
-placing a "/*!getstate:re2c */" comment.</dd>
-<dt><i>re2c:cgoto:threshold</i> <b>=</b> 9 <b>;</b></dt>
-<dd>When -g is active this value specifies the complexity threshold that triggers
-generation of jump tables rather than using nested if's and decision bitfields.
-The threshold is compared against a calculated estimation of if-s needed where 
-every used bitmap divides the threshold by 2.</dd>
-<dt><i>re2c:yych:conversion</i> <b>=</b> 0 <b>;</b></dt>
-<dd>When the input uses signed characters and <b>-s</b> or <b>-b</b> switches are
-in effect re2c allows to automatically convert to the unsigned character type
-that is then necessary for its internal single character. When this setting
-is zero or an empty string the conversion is disabled. Using a non zero number
-the conversion is taken from <b>YYCTYPE</b>. If that is given by an inplace
-configuration that value is being used. Otherwise it will be <b>(YYCTYPE)</b>
-and changes to that configuration are  no longer possible. When this setting is
-a string the braces must be specified. Now assuming your input is a <b>char*</b>
-buffer and you are using above mentioned switches you can set <b>YYCTYPE</b> to
-<b>unsigned char</b> and this setting to either <b>1</b> or <b>"(unsigned char)"</b>.</dd>
-<dt><i>re2c:define:define:YYCONDTYPE</i> <b>=</b> YYCONDTYPE <b>;</b></dt>
-<dd>Enumeration used for condition support with <b>-c</b> mode.</dd>
-<dt><i>re2c:define:YYCTXMARKER</i> <b>=</b> YYCTXMARKER <b>;</b></dt>
-<dd>Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYCTYPE</i> <b>=</b> YYCTYPE <b>;</b></dt>
-<dd>Allows to overwrite the define YYCTYPE and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYCURSOR</i> <b>=</b> YYCURSOR <b>;</b></dt>
-<dd>Allows to overwrite the define YYCURSOR and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYDEBUG</i> <b>=</b> YYDEBUG <b>;</b></dt>
-<dd>Allows to overwrite the define \fBYYDEBUG\fP and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYFILL</i> <b>=</b> YYFILL <b>;</b></dt>
-<dd>Allows to overwrite the define \fBYYFILL\fP and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYFILL:naked</i> <b>=</b> 0 <b>;</b></dt>
-<dd>When set to 1 neither braces, parameter nor semicolon gets emitted.</dd>
-<dt><i>re2c:define:YYFILL@LEN</i> <b>=</b> @@ <b>;</b></dt>
-<dd>When using <b>re2c:define:YYFILL</b> and <b>re2c:yyfill:parameter</b> is 0 then
-any occurence of this text inside <b>YYFILL</b> will be replaced with the actual
-length value.</dd>
-<dt><i>re2c:define:YYGETCONDITION</i> <b>=</b> YYGETCONDITION <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYGETCONDITION</b>.</dd>
-<dt><i>re2c:define:YYGETCONDITION:naked</i> <b>=</b> 0 <b>;</b></dt>
-<dd>When set to 1 neither braces, parameter nor semicolon gets emitted.</dd>
-<dt><i>re2c:define:YYGETSTATE</i> <b>=</b> YYGETSTATE <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYGETSTATE</b> and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYGETSTATE:naked</i> <b>=</b> 0 <b>;</b></dt>
-<dt>When set to 1 neither braces, parameter nor semicolon gets emitted.</dt>
-<dt><i>re2c:define:YYLIMIT</i> <b>=</b> YYLIMIT <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYLIMIT</b> and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYMARKER</i> <b>=</b> YYMARKER <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYMARKER</b> and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYSETCONDITION</i> <b>=</b> YYSETCONDITION <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYSETCONDITION</b>.</dd>
-<dt><i>re2c:define:YYSETCONDITION@cond</i> <b>=</b> @@ <b>;</b></dt>
-<dd>When using <b>re2c:define:YYSETCONDITION</b> then any occurence of this text 
-inside <b>YYSETCONDITION</b> will be replaced with the actual new condition value.</dd>
-<dt><i>re2c:define:YYSETSTATE</i> <b>=</b> YYSETSTATE <b>;</b></dt>
-<dd>Allows to overwrite the define <b>YYSETSTATE</b> and thus avoiding it by setting the
-value to the actual code needed.</dd>
-<dt><i>re2c:define:YYSETSTATE:naked</i> <b>=</b> 0 <b>;</b></dt>
-<dt>When set to 1 neither braces, parameter nor semicolon gets emitted.</dt>
-<dt><i>re2c:define:YYSETSTATE@state</i> <b>=</b> @@ <b>;</b></dt>
-<dd>When using <b>re2c:define:YYSETSTATE</b> then any occurence of this text 
-inside <b>YYSETSTATE</b> will be replaced with the actual new state value.</dd>
-<dt><i>re2c:label:yyFillLabel</i> <b>=</b> yyFillLabel <b>;</b></dt>
-<dd>Allows to overwrite the name of the label yyFillLabel.</dd>
-<dt><i>re2c:label:yyNext</i> <b>=</b> yyNext <b>;</b></dt>
-<dd>Allows to overwrite the name of the label yyNext.</dd>
-<dt><i>re2c:variable:yyaccept</i> <b>=</b> yyaccept <b>;</b></dt>
-<dd>Allows to overwrite the name of the variable yyaccept.</dd>
-<dt><i>re2c:variable:yybm</i> <b>=</b> yybm <b>;</b></dt>
-<dd>Allows to overwrite the name of the variable yybm.</dd>
-<dt><i>re2c:variable:yych</i> <b>=</b> yych <b>;</b></dt>
-<dd>Allows to overwrite the name of the variable yych.</dd>
-<dt><i>re2c:variable:yyctable</i> <b>=</b> yyctable <b>;</b></dt>
-<dd>When both <b>-c</b> and <b>-g</b> are active then <b>re2c</b> uses this variable to 
-generate a static jump table for YYGETCONDITION.</dd>
-<dt><i>re2c:variable:yystable</i> <b>=</b> yystable <b>;</b></dt>
-<dd>When both <b>-f</b> and <b>-g</b> are active then <b>re2c</b> uses this variable to 
-generate a static jump table for YYGETSTATE.</dd>
-<dt><i>re2c:variable:yytarget</i> <b>=</b> yytarget <b>;</b></dt>
-<dd>Allows to overwrite the name of the variable yytarget.</dd>
-</dl>
-<a name="lbAL" id="lbAL">&nbsp;</a>
-<h2>UNDERSTANDING RE2C</h2>
-<p>The subdirectory lessons of the re2c distribution contains a few step by step
-lessons to get you started with re2c. All examples in the lessons subdirectory
-can be compiled and actually work.</p>
-<a name="lbAM" id="lbAM">&nbsp;</a>
-<h2>FEATURES</h2>
-<p><b>re2c</b> provides default action: <b>*</b>. When the default rule matches, 
-exactly one input character is consumed.</p>
-<p>The user must arrange for a sentinel token to appear at the end of input
-(and provide a rule for matching it): <b>re2c</b> does not provide an
-&lt;&lt;EOF&gt;&gt; expression. If the source is from a null-byte terminated
-string, a rule matching a null character will suffice. If the source is from a
-file then you could pad the input with a newline (or some other character that 
-cannot appear within another token); upon recognizing such a character check 
-to see if it is the sentinel and act accordingly. And you can also use YYFILL(n)
-to end the scanner in case not enough characters are available which is nothing
-else then e detection of end of data/file.</p>
-<a name="lbAN" id="lbAN">&nbsp;</a>
-<h2>BUGS</h2>
-<p>Difference only works for character sets.</p>
-<p>The generated DFA is not minimal.</p>
-<p>Features, that are naturally orthogonal (such as reusable rules, conditions, 
-setup rules and default rules), cannot always be combined. E.g., one cannot set 
-setup/default rule for condition in scanner with reusable rules.</p>
-<p><b>re2c</b> does too much unnecessary work: e.g., if /*!use:re2c ... */ block has 
-additional rules, these rules are parsed 4 times, while they should be parsed 
-only once.</p>
-<p>The <b>re2c</b> internal algorithms need documentation.</p>
-<a name="lbAO" id="lbAO">&nbsp;</a>
-<h2>SEE ALSO</h2>
-<p>flex(1), lex(1), quex(<b><a href="http://quex.sourceforge.net/">http://quex.sourceforge.net/</a></b>). More information on <b>re2c</b> can be found here:
-<b><a href=
-"http://re2c.org/">http://re2c.org/</a></b></p>
-<a name="lbAP" id="lbAP">&nbsp;</a>
-<h2>AUTHORS</h2>
-<ul>
-<li>Peter Bumbulis &lt;<a href=
-"mailto:peter@csg.uwaterloo.ca">peter@csg.uwaterloo.ca</a>&gt;</li>
-<li>Brian Young &lt;<a href=
-"mailto:bayoung@acm.org">bayoung@acm.org</a>&gt;</li>
-<li>Dan Nuffer &lt;<a href=
-"mailto:nuffer@users.sourceforge.net">nuffer@users.sourceforge.net</a>&gt;</li>
-<li>Marcus Boerger &lt;<a href=
-"mailto:helly@users.sourceforge.net">helly@users.sourceforge.net</a>&gt;</li>
-<li>Hartmut Kaiser &lt;<a href=
-"mailto:hkaiser@users.sourceforge.net">hkaiser@users.sourceforge.net</a>&gt;</li>
-<li>Emmanuel Mogenet &lt;<a href="mailto:mgix@mgix.com">mgix@mgix.com</a>&gt;
-(added storable state)</li>
-</ul>
-<br />
-<br />
-<a name="lbAQ" id="lbAQ">&nbsp;</a>
-<h2>VERSION INFORMATION</h2>
-<p>This manpage describes <b>re2c</b>, version @PACKAGE_VERSION@.</p>
-<hr />
-<a name="index" id="index">&nbsp;</a>
-<h2>Index</h2>
-<dl>
-<dt><a href="#lbAB">NAME</a><br /></dt>
-<dt><a href="#lbAC">SYNOPSIS</a><br /></dt>
-<dt><a href="#lbAD">DESCRIPTION</a><br /></dt>
-<dt><a href="#lbAE">OPTIONS</a><br /></dt>
-<dt><a href="#lbAF">INTERFACE CODE</a><br /></dt>
-<dt><a href="#lbAG">SCANNER WITH STORABLE STATES</a><br /></dt>
-<dt><a href="#lbAH">SCANNER WITH CONDITION SUPPORT</a><br /></dt>
-<dt><a href="#lbAI">SCANNER SPECIFICATIONS</a><br /></dt>
-<dt><a href="#lbAJ">SUMMARY OF RE2C REGULAR EXPRESSIONS</a><br /></dt>
-<dt><a href="#lbAK">INPLACE CONFIGURATION</a><br /></dt>
-<dt><a href="#lbAL">UNDERSTANDING RE2C</a><br /></dt>
-<dt><a href="#lbAM">FEATURES</a><br /></dt>
-<dt><a href="#lbAN">BUGS</a><br /></dt>
-<dt><a href="#lbAO">SEE ALSO</a><br /></dt>
-<dt><a href="#lbAP">AUTHORS</a><br /></dt>
-<dt><a href="#lbAQ">VERSION INFORMATION</a><br /></dt>
-</dl>
-<br />
-<br />
-<hr />
-<p>This document was created by man2html, using the manual pages.<br />
-Date: @PACKAGE_DATE@</p>
-</body>
-</html>
diff --git a/re2c/re2c.1.in b/re2c/re2c.1.in

deleted file mode 100644 (file)

index d3d8c9e..0000000
--- a/re2c/re2c.1.in
+++ /dev/null
@@ -1,944 +0,0 @@
-./" 
-./" $Id$
-./"
-.TH RE2C 1 "@PACKAGE_DATE@" "Version @PACKAGE_VERSION@"
-.ds re \fBre2c\fP
-.ds le \fBlex\fP
-.ds rx regular-expression
-.ds rxs regular-expressions
-.ds lx \fIl\fP-expression
-.SH NAME
-\*(re \- convert \*(rxs to C/C++
-
-.SH SYNOPSIS
-\*(re [\fB-bdDefFghisuvVwx18\fP] [\fB-o output\fP] [\fB-c\fP [\fB-t header\fP]] \fBfile\fP
-
-.SH DESCRIPTION
-\*(re is a preprocessor that generates C-based recognizers from regular
-expressions.
-The input to \*(re consists of C/C++ source interleaved with
-comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
-scanner specifications.
-In the output these comments are replaced with code that, when
-executed, will find the next input token and then execute
-some user-supplied token-specific code.
-
-For example, given the following code
-
-.in +3
-.nf
-char *scan(char *p)
-{
-/*!re2c
-        re2c:define:YYCTYPE  = "unsigned char";
-        re2c:define:YYCURSOR = p;
-        re2c:yyfill:enable   = 0;
-        re2c:yych:conversion = 1;
-        re2c:indent:top      = 1;
-        [0-9]+          {return p;}
-        [^]             {return (char*)0;}
-*/
-}
-.fi
-.in -3
-
-\*(re -is will generate
-
-.in +3
-.nf
-/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
-char *scan(char *p)
-{
-    {
-        unsigned char yych;
-
-        yych = (unsigned char)*p;
-        if(yych <= '/') goto yy4;
-        if(yych >= ':') goto yy4;
-        ++p;
-        yych = (unsigned char)*p;
-        goto yy7;
-yy3:
-        {return p;}
-yy4:
-        ++p;
-        yych = (unsigned char)*p;
-        {return char*)0;}
-yy6:
-        ++p;
-        yych = (unsigned char)*p;
-yy7:
-        if(yych <= '/') goto yy3;
-        if(yych <= '9') goto yy6;
-        goto yy3;
-    }
-
-}
-.fi
-.in -3
-
-You can place one \fC/*!max:re2c */\fP comment that will output a "#define 
-\fCYYMAXFILL\fP <n>" line that holds the maximum number of characters 
-required to parse the input. That is the maximum value \fCYYFILL\fP(n)
-will receive. If -1 is in effect then YYMAXFILL can only be triggered once
-after the last \fC/*!re2c */\fP.
-
-You can also use \fC/*!ignore:re2c */\fP blocks that allows to document the
-scanner code and will not be part of the output.
-
-.SH OPTIONS
-\*(re provides the following options:
-.TP
-\fB-?\fP
-\fB-h\fP
-Invoke a short help.
-.TP
-\fB-b\fP
-Implies \fB-s\fP.  Use bit vectors as well in the attempt to coax better
-code out of the compiler.  Most useful for specifications with more than a
-few keywords (e.g. for most programming languages).
-.TP
-\fB-c\fP
-Used to support (f)lex-like condition support.
-.TP
-\fB-d\fP
-Creates a parser that dumps information about the current position and in 
-which state the parser is while parsing the input. This is useful to debug 
-parser issues and states. If you use this switch you need to define a macro
-\fIYYDEBUG\fP that is called like a function with two parameters:
-\fIvoid YYDEBUG(int state, char current)\fP. The first parameter receives the 
-state or -1 and the second parameter receives the input at the current cursor.
-.TP
-\fB-D\fP
-Emit Graphviz dot data. It can then be processed with e.g.
-"dot -Tpng input.dot > output.png". Please note that scanners with many states
-may crash dot.
-.TP
-\fB-e\fP
-Generate a parser that supports EBCDIC. The generated code can deal with any 
-character up to 0xFF. In this mode \*(re assumes that input character size is 
-1 byte. This switch is incompatible with \fB-w\fP, \fB-x\fP, \fB-u\fP and \fB-8\fP.
-.TP
-\fB-f\fP
-Generate a scanner with support for storable state.
-For details see below at \fBSCANNER WITH STORABLE STATES\fP.
-.TP
-\fB-F\fP
-Partial support for flex syntax. When this flag is active then named
-definitions must be surrounded by curly braces and can be defined without an
-equal sign and the terminating semi colon. Instead names are treated as direct
-double quoted strings.
-.TP
-\fB-g\fP
-Generate a scanner that utilizes GCC's computed goto feature. That is \*(re
-generates jump tables whenever a decision is of a certain complexity (e.g. a 
-lot of if conditions are otherwise necessary). This is only useable with GCC 
-and produces output that cannot be compiled with any other compiler. Note that
-this implies -b and that the complexity threshold can be configured using the
-inplace configuration "cgoto:threshold".
-.TP
-\fB-i\fP
-Do not output #line information. This is usefull when you want use a CMS tool
-with the \*(re output which you might want if you do not require your users to 
-have \*(re themselves when building from your source.
-.TP
-\fB-o output\fP
-Specify the output file.
-.TP
-\fB-r\fP
-Allows reuse of scanner definitions with '\fB/*!use:re2c\fP' after
-'\fB/*!rules:re2c\fP'. In this mode no '\fB/*!re2c\fP' block and exactly one
-'\fB/*!rules:re2c\fP' must be present. The rules are being saved and used by
-every '\fB/*!use:re2c\fP' block that follows. These blocks can contain
-inplace configurations, especially '\fBre2c:flags:e\fP', '\fBre2c:flags:w\fP', 
-'\fBre2c:flags:x\fP', '\fBre2c:flags:u\fP' and '\fBre2c:flags:8\fP'.
-That way it is possible to create the same scanner multiple times for different
-character types, different input mechanisms or different output mechanisms.
-The '\fB/*!use:re2c\fP' blocks can also contain additional rules that will be
-appended to the set of rules in '\fB/*!rules:re2c\fP'.
-.TP
-\fB-s\fP
-Generate nested \fCif\fPs for some \fCswitch\fPes.  Many compilers need this
-assist to generate better code.
-.TP
-\fB-t\fP
-Create a header file that contains types for the (f)lex-like condition support.
-This can only be activated when \fB-c\fP is in use.
-.TP
-\fB-u\fP
-Generate a parser that supports UTF-32. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode \*(re assumes that input 
-character size is 4 bytes. This switch is incompatible with \fB-e\fP, \fB-w\fP, \fB-x\fP 
-and \fB-8\fP. This implies \fB-s\fP.
-.TP
-\fB-v\fP
-Show version information.
-.TP
-\fB-V\fP
-Show the version as a number XXYYZZ.
-.TP
-\fB-w\fP
-Generate a parser that supports UCS-2. The generated code can deal with any 
-valid Unicode character up to 0xFFFF. In this mode \*(re assumes that input 
-character size is 2 bytes. This switch is incompatible with \fB-e\fP, \fB-x\fP, \fB-u\fP 
-and \fB-8\fP. This implies \fB-s\fP.
-.TP
-\fB-x\fP
-Generate a parser that supports UTF-16. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode \*(re assumes that input 
-character size is 2 bytes. This switch is incompatible with \fB-e\fP, \fB-w\fP, \fB-u\fP 
-and \fB-8\fP. This implies \fB-s\fP.
-.TP
-\fB-1\fP
-Force single pass generation, this cannot be combined with -f and disables 
-YYMAXFILL generation prior to last \*(re block.
-.TP
-\fB-8\fP
-Generate a parser that supports UTF-8. The generated code can deal with any 
-valid Unicode character up to 0x10FFFF. In this mode \*(re assumes that input 
-character size is 1 byte. This switch is incompatible with \fB-e\fP, \fB-w\fP, \fB-x\fP 
-and \fB-u\fP.
-.TP
-\fB--no-generation-date\fP
-Suppress date output in the generated output so that it only shows the re2c
-version.
-.TP
-\fb--case-insensitive\fP
-All strings are case insensitive, so all "-expressions are treated
-in the same way '-expressions are.
-.TP
-\fB--case-inverted\fP
-Invert the meaning of single and double quoted strings.
-With this switch single quotes are case sensitive and
-double quotes are case insensitive.
-
-.SH "INTERFACE CODE"
-Unlike other scanner generators, \*(re does not generate complete scanners:
-the user must supply some interface code.
-In particular, the user must define the following macros or use the 
-corresponding inplace configurations:
-.TP
-\fCYYCONDTYPE\fP
-In \fB-c\fP mode you can use \fB-t\fP to generate a file that contains the 
-enumeration used as conditions. Each of the values refers to a condition of
-a rule set.
-.TP
-\fCYYCTXMARKER\fP
-\*(lx of type \fC*YYCTYPE\fP.
-The generated code saves trailing context backtracking information in \fCYYCTXMARKER\fP.
-The user only needs to define this macro if a scanner specification uses trailing
-context in one or more of its \*(rxs.
-.TP
-\fCYYCTYPE\fP
-Type used to hold an input symbol.
-Usually \fCchar\fP or \fCunsigned char\fP.
-.TP
-\fCYYCURSOR\fP
-\*(lx of type \fC*YYCTYPE\fP that points to the current input symbol.
-The generated code advances \fCYYCURSOR\fP as symbols are matched.
-On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
-current token.  On exit, \fCYYCURSOR\fP will point to the first character of
-the following token.
-.TP
-\fCYYDEBUG(\fP\fIstate\fP,\fIcurrent\fC)\fP
-This is only needed if the \fB-d\fP flag was specified. It allows to easily debug
-the generated parser by calling a user defined function for every state. The function
-should have the following signature: \fIvoid YYDEBUG(int state, char current)\fP. 
-The first parameter receives the state or -1 and the second parameter receives the 
-input at the current cursor.
-.TP
-\fCYYFILL\fP(\fIn\fP\fC\fP)
-The generated code "calls" \fCYYFILL\fP(n) when the buffer needs
-(re)filling:  at least \fIn\fP additional characters should
-be provided. \fCYYFILL\fP(n) should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP,
-\fCYYMARKER\fP and \fCYYCTXMARKER\fP as needed.  Note that for typical 
-programming languages \fIn\fP will be the length of the longest keyword plus one.
-The user can place a comment of the form \fC/*!max:re2c */\fP once to insert 
-a \fCYYMAXFILL\fP(n) definition that is set to the maximum length value. If -1 
-switch is used then \fCYYMAXFILL\fP can be triggered only once after the 
-last \fC/*!re2c */\fP
-block.
-.TP
-\fCYYGETCONDITION\fP()
-This define is used to get the condition prior to entering the scanner code
-when using \fB-c\fP switch. The value must be initialized with a value from
-the enumeration \fCYYCONDTYPE\fP type.
-.TP
-\fCYYGETSTATE\fP()
-The user only needs to define this macro if the \fB-f\fP flag was specified.
-In that case, the generated code "calls" \fCYYGETSTATE\fP() at the very beginning
-of the scanner in order to obtain the saved state. \fCYYGETSTATE\fP() must return a signed
-integer. The value must be either -1, indicating that the scanner is entered for the
-first time, or a value previously saved by \fCYYSETSTATE\fP(s).  In the second case, the
-scanner will resume operations right after where the last \fCYYFILL\fP(n) was called.
-.TP
-\fCYYLIMIT\fP
-Expression of type \fC*YYCTYPE\fP that marks the end of the buffer
-(\fCYYLIMIT[-1]\fP is the last character in the buffer).
-The generated code repeatedly compares \fCYYCURSOR\fP to \fCYYLIMIT\fP
-to determine when the buffer needs (re)filling.
-.TP
-\fCYYMARKER\fP
-\*(lx of type \fC*YYCTYPE\fP.
-The generated code saves backtracking information in \fCYYMARKER\fP. Some easy
-scanners might not use this.
-.TP
-\fCYYMAXFILL
-This will be automatically defined by \fC/*!max:re2c */\fP blocks as explained above.
-.TP
-\fCYYSETCONDITION(\fP\fIc\fP\fC)\fP
-This define is used to set the condition in transition rules.  This is only
-being used when \fB-c\fP is active and transition rules are being used.
-.TP
-\fCYYSETSTATE(\fP\fIs\fP\fC)\fP
-The user only needs to define this macro if the \fB-f\fP flag was specified.
-In that case, the generated code "calls" \fCYYSETSTATE\fP just before calling
-\fCYYFILL\fP(n).  The parameter to \fCYYSETSTATE\fP is a signed integer that uniquely
-identifies the specific instance of \fCYYFILL\fP(n) that is about to be called.
-Should the user wish to save the state of the scanner and have \fCYYFILL\fP(n) return
-to the caller, all he has to do is store that unique identifer in a variable.
-Later, when the scannered is called again, it will call \fCYYGETSTATE()\fP and
-resume execution right where it left off. The generated code will contain 
-both \fCYYSETSTATE\fP(s) and \fCYYGETSTATE\fP even if \fCYYFILL\fP(n) is being
-disabled.
-
-.SH "SCANNER WITH STORABLE STATES"
-When the \fB-f\fP flag is specified, \*(re generates a scanner that
-can store its current state, return to the caller, and later resume
-operations exactly where it left off.
-
-The default operation of \*(re is a "pull" model, where the scanner asks
-for extra input whenever it needs it. However, this mode of operation
-assumes that the scanner is the "owner" the parsing loop, and that may
-not always be convenient.
-
-Typically, if there is a preprocessor ahead of the scanner in the stream,
-or for that matter any other procedural source of data, the scanner cannot
-"ask" for more data unless both scanner and source live in a separate threads.
-
-The \fB-f\fP flag is useful for just this situation : it lets users design
-scanners that work in a "push" model, i.e. where data is fed to the scanner
-chunk by chunk. When the scanner runs out of data to consume, it just stores
-its state, and return to the caller. When more input data is fed to the scanner,
-it resumes operations exactly where it left off.
-
-When using the -f option \*(re does not accept stdin because it has to do the 
-full generation process twice which means it has to read the input twice. That
-means \*(re would fail in case it cannot open the input twice or reading the
-input for the first time influences the second read attempt.
-
-Changes needed compared to the "pull" model.
-
-1. User has to supply macros YYSETSTATE() and YYGETSTATE(state)
-
-2. The \fB-f\fP option inhibits declaration of \fIyych\fP and
-\fIyyaccept\fP. So the user has to declare these. Also the user has
-to save and restore these. In the example \fIexamples/push.re\fP these
-are declared as fields of the (C++) class of which the scanner is a
-method, so they do not need to be saved/restored explicitly. For C
-they could e.g. be made macros that select fields from a structure
-passed in as parameter. Alternatively, they could be declared as local
-variables, saved with YYFILL(n) when it decides to return and restored
-at entry to the function. Also, it could be more efficient to save the
-state from YYFILL(n) because YYSETSTATE(state) is called
-unconditionally. YYFILL(n) however does not get \fIstate\fP as
-parameter, so we would have to store state in a local variable by
-YYSETSTATE(state).
-
-3. Modify YYFILL(n) to return (from the function calling it) if more
-input is needed.
-
-4. Modify caller to recognise "more input is needed" and respond
-appropriately.
-
-5. The generated code will contain a switch block that is used to restores 
-the last state by jumping behind the corrspoding YYFILL(n) call. This code is
-automatically generated in the epilog of the first "\fC/*!re2c */\fP" block. 
-It is possible to trigger generation of the YYGETSTATE() block earlier by 
-placing a "\fC/*!getstate:re2c */\fP" comment. This is especially useful when
-the scanner code should be wrapped inside a loop.
-
-Please see examples/push.re for push-model scanner. The generated code can be
-tweaked using inplace configurations "\fBstate:abort\fP" and "\fBstate:nextlabel\fP".
-
-.SH "SCANNER WITH CONDITION SUPPORT"
-You can preceed \*(rxs with a list of condition names when using the \fB-c\fP 
-switch. In this case \*(re generates scanner blocks for each conditon. Where each of the
-generated blocks has its own precondition. The precondition is given by the 
-interface define \fBYYGETCONDITON\fP and must be of type \fBYYCONDTYPE\fP.
-.LP
-There are two special rule types. First, the rules of the condition '*' are 
-merged to all  conditions. And second the empty condition list allows to 
-provide a code block that does not have a scanner part. Meaning it does not 
-allow any regular expression. The condition value referring to this special 
-block is always the one with the enumeration value 0. This way the code of this
-special rule can be used to initialize a scanner. It is in no way necessary to
-have these rules: but sometimes it is helpful to have a dedicated uninitialized
-condition state.
-.LP
-Non empty rules allow to specify the new condition, which makes them
-transition rules. Besides generating calls for the define \fBYYSETCONDTITION\fP
-no other special code is generated.
-.LP
-There is another kind of special rules that allow to prepend code to any code
-block of all rules of a certain set of conditions or to all code blocks to all
-rules. This can be helpful when some operation is common among rules. For
-instance this can be used to store the length of the scanned string. These
-special setup rules start with an exclamation mark followed by either a list
-of conditions \fB<! condition, ... >\fP or a star \fB<!*>\fP.
-When \*(re generates the code for a rule whose state does not have a
-setup rule and a star'd setup rule is present, than that code will be used
-as setup code.
-
-.SH "ENCODINGS"
-\*(re supports the following encodings: ASCII, EBCDIC (\fB-e\fP), UCS-2 (\fB-w\fP), 
-UTF-16 (\fB-x\fP), UTF-32 (\fB-u\fP) and UTF-8 (\fB-8\fP). ASCII is default. You can 
-either pass cmd flag or use inplace configuration.
-.LP
-The following concepts should be clarified when talking about encoding. \fBCode point\fP 
-is an abstract number, which represents single encoding symbol. \fBCode unit\fP is the 
-smallest unit of memory, which is used in the encoded text (it corresponds to one 
-character in the input stream). One or more code units can be needed to represent 
-a single code point, depending on the encoding. In \fBfixed-length\fP encoding, each 
-code point is represented with equal number of code units. In \fBvariable-length\fP 
-encoding, different code points can be represented with different number of code units.
-.TP
-\fC\fBASCII\fP
-is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
-to 0xFF (note that this is \*(re-specific understanding of ASCII). One code point 
-is represented with exactly one 1-byte code unit, which has the same value as the 
-code point. Size of YYCTYPE must be 1 byte.
-.TP
-\fC\fBEBCDIC\fP
-is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
-to 0xFF. One code point is represented with exactly one 1-byte code unit, which has 
-the same value as the code point. Size of YYCTYPE must be 1 byte.
-.TP
-\fC\fBUCS-2\fP
-is a fixed-length encoding. Its code space includes 0x10000 code points, from 0 
-to 0xFFFF. One code point is represented with exactly one 2-byte code unit, which has 
-the same value as the code point. Size of YYCTYPE must be 2 bytes.
-.TP
-\fC\fBUTF-16\fP
-is a variable-length encoding. Its code space includes all Unicode code points, 
-from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with one or 
-two 2-byte code units. Size of YYCTYPE must be 2 bytes.
-.TP
-\fC\fBUTF-32\fP
-is a fixed-length encoding. Its code space includes all Unicode code points, 
-from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with exactly 
-one 4-byte code unit. Size of YYCTYPE must be 4 bytes.
-.TP
-\fC\fBUTF-8\fP
-is a variable-length encoding. Its code space includes all 
-Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point 
-is represented with sequence of one, two, three or four 1-byte code units. Size of 
-YYCTYPE must be 1 bytes.
-.LP
-In Unicode, values from range 0xD800 to 0xDFFF (surrogates) are not valid Unicode 
-code points, any encoded sequence of code units, that would map to Unicode code points 
-in the range 0xD800-0xDFFF, is ill-formed.
-.LP
-For some encodings, there are code units, that never occur in valid encoded stream 
-(e.g. 0xFF byte in UTF-8). If the generated scanner must check for such input symbols, 
-the only way to do so is to use default rule \fB*\fP. Note, that full range rule \fB[^]\fP 
-means "all valid code points", while default rule \fB*\fP means "all possible code units".
-
-.SH "SCANNER SPECIFICATIONS"
-Each scanner specification consists of a set of \fIrules\fP, \fInamed
-definitions\fP and \fIconfigurations\fP.
-.LP
-\fIRules\fP consist of a \*(rx along with a block of C/C++ code that
-is to be executed when the associated \fI\*(rx\fP is matched. You can either
-start the code with an opening curly brace or the sequence '\fB:=\fP'. When
-the code with a curly brace then \*(re counts the brace depth and stops looking
-for code automatically. Otherwise curly braces are not allowed and \*(re stops
-looking for code at the first line that does not begin with whitespace. If two
-or more rules overlap, the first rule is preferred.
-.P
-.RS
-\fI\*(rx\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fI\*(rx\fP \fC:=\fP \fIC/C++ code\fP
-.RE
-.P
-There is one special rule: default rule \fB*\fP.
-.P
-.RS
-\fI*\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fI*\fP \fC:=\fP \fIC/C++ code\fP
-.RE
-.P
-The former "default" rule \fB[^]\fP differs from \fB*\fP:
-.P
-.RS
-- \fB*\fP can occur anywhere a normal rule can occur, but regardless to its place, 
-\fB*\fP has the lowest priority.
-.P
-- \fB[^]\fP matches all valid symbols in current encoding, while \fB*\fP matches 
-any input character, either valid or invalid.
-.P
-- \fB[^]\fP can consume multiple input characters, while \fB*\fP always consumes 
-one input character.
-.RE
-.P
-In fact, when variable-length encoding is used, \fB*\fP is the only possible way 
-to match invalid input character.
-.LP
-If \fB-c\fP is active then each \*(rx is preceeded by a list of 
-comma separated condition names. Besides normal naming rules there are two 
-special cases. A rule may contain the single condition name '*' and no contition 
-name at all. In the latter case the rule cannot have a \*(rx. Non 
-empty rules may further more specify the new condition. In that case \*(re will
-generated the necessary code to chnage the condition automatically. Just as above
-code can be started with a curly brace of the sequence '\fB:=\fP'. Further more
-rules can use ':=>' as a shortcut to automatically generate code that not only
-sets the new condition state but also continues execution with the new state. A
-shortcut rule should not be used in a loop where there is code between the start
-of the loop and the \*(re block unless \fIre2c:cond:goto\fP is changed
-to '\fIcontinue;\fP'. If code is necessary before all rule (though not simple
-jumps) you can doso by using <! pseudo-rules.
-.P
-.RS
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI\*(rx\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI\*(rx\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI*\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI*\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI\*(rx\fP \fC=>\fP \fP\fIcondition\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI\*(rx\fP \fC=>\fP \fP\fIcondition\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fIcondition-list\fP\fC>\fP \fI\*(rx\fP \fC:=>\fP \fP\fIcondition\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI\*(rx\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI\*(rx\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI*\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI*\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI\*(rx\fP \fC=>\fP \fP\fIcondition\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI\*(rx\fP \fC=>\fP \fP\fIcondition\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<\fP\fI*\fP\fC>\fP \fI\*(rx\fP \fC:=>\fP \fP\fIcondition\fP
-.P
-\fC<>\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<>\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<>\fP \fC=>\fP \fP\fIcondition\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<>\fP \fC=>\fP \fP\fIcondition\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<>\fP \fC:=>\fP \fP\fIcondition\fP
-.P
-\fC<!\fIcondition-list\fP\fC>\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<!\fIcondition-list\fP\fC>\fP \fC:=\fP \fIC/C++ code\fP
-.P
-\fC<!*>\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
-.P
-\fC<!*>\fP \fC:=\fP \fIC/C++ code\fP
-.RE
-.LP
-Named definitions are of the form:
-.P
-.RS
-\fIname\fP \fC=\fP \fI\*(rx\fP\fC;\fP
-.RE
-.LP
-\fB-F\fP is active, then named definitions are also of the form:
-.P
-.RS
-\fIname\fP \fI\*(rx\fP
-.RE
-.LP
-Configurations look like named definitions whose names start 
-with "\fBre2c:\fP":
-.P
-.RS
-\fCre2c:\fP\fIname\fP \fC=\fP \fIvalue\fP\fC;\fP
-.RE
-.RS
-\fCre2c:\fP\fIname\fP \fC=\fP \fB"\fP\fIvalue\fP\fB"\fP\fC;\fP
-.RE
-
-.SH "SUMMARY OF RE2C REGULAR-EXPRESSIONS"
-.TP
-\fC"foo"\fP
-the literal string \fCfoo\fP.
-ANSI-C escape sequences can be used.
-.TP
-\fC'foo'\fP
-the literal string \fCfoo\fP (characters [a-zA-Z] treated case-insensitive).
-ANSI-C escape sequences can be used.
-.TP
-\fC[xyz]\fP
-a "character class"; in this case,
-the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
-.TP
-\fC[abj-oZ]\fP
-a "character class" with a range in it;
-matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
-or a '\fCZ\fP'.
-.TP
-\fC[^\fIclass\fP\fC]\fP
-an inverted "character class".
-.TP
-\fIr\fP\fC\e\fP\fIs\fP
-match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be \*(rxs
-which can be expressed as character classes.
-.TP
-\fIr\fP\fC*\fP
-zero or more \fIr\fP's, where \fIr\fP is any \*(rx
-.TP
-\fC\fIr\fP\fC+\fP
-one or more \fIr\fP's
-.TP
-\fC\fIr\fP\fC?\fP
-zero or one \fIr\fP's (that is, "an optional \fIr\fP")
-.TP
-name
-the expansion of the "named definition" (see above)
-.TP
-\fC(\fP\fIr\fP\fC)\fP
-an \fIr\fP; parentheses are used to override precedence
-(see below)
-.TP
-\fIrs\fP
-an \fIr\fP followed by an \fIs\fP ("concatenation")
-.TP
-\fIr\fP\fC|\fP\fIs\fP
-either an \fIr\fP or an \fIs\fP
-.TP
-\fIr\fP\fC/\fP\fIs\fP
-an \fIr\fP but only if it is followed by an \fIs\fP. The \fIs\fP is not part of
-the matched text. This type of \*(rx is called "trailing context". A trailing
-context can only be the end of a rule and not part of a named definition.
-.TP
-\fIr\fP\fC{\fP\fIn\fP\fC}\fP
-matches \fIr\fP exactly \fIn\fP times.
-.TP
-\fIr\fP\fC{\fP\fIn\fP\fC,}\fP
-matches \fIr\fP at least \fIn\fP times.
-.TP
-\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP
-matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times.
-.TP
-\fC.\fP
-match any character except newline (\\n).
-.TP
-\fIdef\fP
-matches named definition as specified by \fIdef\fP only if \fB-F\fP is
-off. If the switch \fB-F\fP is active then this behaves like it was enclosed
-in double quotes and matches the string \fIdef\fP.
-.LP
-Character classes and string literals may contain octoal or hexadecimal 
-character definitions and the following set of escape sequences (\fB\\n\fP,
- \fB\\t\fP, \fB\\v\fP, \fB\\b\fP, \fB\\r\fP, \fB\\f\fP, \fB\\a\fP, \fB\\\\\fP).
-An octal character is defined by a backslash followed by its three octal digits
-and a hexadecimal character is defined by backslash, a lower cased '\fBx\fP' 
-and its two hexadecimal digits or a backslash, an upper cased \fBX\fP and its 
-four hexadecimal digits.
-.LP
-\*(re further more supports the c/c++ unicode notation. That is a backslash followed
-by either a lowercased \fBu\fP and its four hexadecimal digits or an uppercased 
-\fBU\fP and its eight hexadecimal digits. However only in \fB-u\fP mode the
-generated code can deal with any valid Unicode character up to 0x10FFFF.
-.LP
-The only portable "\fBany\fP" rule is the default rule \fB*\fP.
-.LP
-The \*(rxs listed above are grouped according to
-precedence, from highest precedence at the top to lowest at the bottom.
-Those grouped together have equal precedence.
-
-.SH "INPLACE CONFIGURATION"
-.LP
-It is possible to configure code generation inside \*(re blocks. The following
-lists the available configurations:
-.TP
-\fIre2c:condprefix\fP \fB=\fP yyc_ \fB;\fP
-Allows to specify the prefix used for condition labels. That is this text is 
-prepended to any condition label in the generated output file.
-.TP
-\fIre2c:condenumprefix\fP \fB=\fP yyc \fB;\fP
-Allows to specify the prefix used for condition values. That is this text is 
-prepended to any condition enum value in the generated output file. 
-.TP
-\fIre2c:cond:divider\fP \fB=\fP "/* *********************************** */" \fB;\fP
-Allows to customize the devider for condition blocks. You can use '@@' to 
-put the name of the condition or ustomize the plaeholder
-using \fIre2c:cond:divider@cond\fP.
-.TP
-\fIre2c:cond:divider@cond\fP \fB=\fP @@ \fB;\fP
-Specifies the placeholder that will be replaced with the condition name
-in \fIre2c:cond:divider\fP.
-.TP
-\fIre2c:cond:goto\fP \fB=\fP "goto @@;" \fB;\fP
-Allows to customize the condition goto statements used with ':=>' style rules.
-You can use '@@' to put the name of the condition or ustomize the plaeholder
-using \fIre2c:cond:goto@cond\fP. You can also change this to 'continue;',
-which would allow you to continue with the next loop cycle including any code
-between loop start and re2c block.
-.TP
-\fIre2c:cond:goto@cond\fP \fB=\fP @@ \fB;\fP
-Spcifies the placeholder that will be replaced with the condition label
-in \fIre2c:cond:goto\fP.
-.TP
-\fIre2c:indent:top\fP \fB=\fP 0 \fB;\fP
-Specifies the minimum number of indendation to use. Requires a numeric value 
-greater than or equal zero.
-.TP
-\fIre2c:indent:string\fP \fB=\fP "\\t" \fB;\fP
-Specifies the string to use for indendation. Requires a string that should 
-contain only whitespace unless you need this for external tools. The easiest 
-way to specify spaces is to enclude them in single or double quotes. If you do 
-not want any indendation at all you can simply set this to \fB""\fP.
-.TP
-\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP
-When this setting is non zero, then \*(re automatically generates conversion 
-code whenever yych gets read. In this case the type must be defined using
-\fBre2c:define:YYCTYPE\fP.
-.TP
-\fIre2c:yych:emit\fP \fB=\fP 1 \fB;\fP
-Generation of \fByych\fP can be suppressed by setting this to 0.
-.TP
-\fIre2c:yybm:hex\fP \fB=\fP 0 \fB;\fP
-If set to zero then a decimal table is being used else a hexadecimal table 
-will be generated.
-.TP
-\fIre2c:yyfill:enable\fP \fB=\fP 1 \fB;\fP
-Set this to zero to suppress generation of YYFILL(n). When using this be sure
-to verify that the generated scanner does not read behind input. Allowing
-this behavior might introduce sever security issues to you programs.
-.TP
-\fIre2c:yyfill:check\fP \fB=\fP 1 \fB;\fP
-This can be set 0 to suppress output of the pre condition using YYCURSOR and
-YYLIMIT which becomes usefull when YYLIMIT + max(YYFILL) is always accessible.
-.TP
-\fIre2c:yyfill:parameter\fP \fB=\fP 1 \fB;\fP
-Allows to suppress parameter passing to \fBYYFILL\fP calls. If set to zero 
-then no parameter is passed to \fBYYFILL\fP. However \fBdefine:YYFILL@LEN\fP
-allows to specify a replacement string for the actual length value. If set to
-a non zero value then \fBYYFILL\fP usage will be followed by the number of 
-requested characters in braces unless \fBre2c:define:YYFILL:naked\fP is set. 
-Also look at \fBre2c:define:YYFILL:naked\fP and \fBre2c:define:YYFILL@LEN\fP.
-.TP
-\fIre2c:startlabel\fP \fB=\fP 0 \fB;\fP
-If set to a non zero integer then the start label of the next scanner blocks 
-will be generated even if not used by the scanner itself. Otherwise the normal 
-\fByy0\fP like start label is only being generated if needed. If set to a text 
-value then a label with that text will be generated regardless of whether the 
-normal start label is being used or not. This setting is being reset to \fB0\fP
-after a start label has been generated.
-.TP
-\fIre2c:labelprefix\fP \fB=\fP yy \fB;\fP
-Allows to change the prefix of numbered labels. The default is \fByy\fP and
-can be set any string that is a valid label.
-.TP
-\fIre2c:state:abort\fP \fB=\fP 0 \fB;\fP
-When not zero and switch -f is active then the \fCYYGETSTATE\fP block will 
-contain a default case that aborts and a -1 case is used for initialization.
-.TP
-\fIre2c:state:nextlabel\fP \fB=\fP 0 \fB;\fP
-Used when -f is active to control whether the \fCYYGETSTATE\fP block is 
-followed by a \fCyyNext:\fP label line. Instead of using \fCyyNext\fP you can 
-usually also use configuration \fIstartlabel\fP to force a specific start label
-or default to \fCyy0\fP as start label. Instead of using a dedicated label it 
-is often better to separate the YYGETSTATE code from the actual scanner code by
-placing a "\fC/*!getstate:re2c */\fP" comment.
-.TP
-\fIre2c:cgoto:threshold\fP \fB=\fP 9 \fB;\fP
-When -g is active this value specifies the complexity threshold that triggers
-generation of jump tables rather than using nested if's and decision bitfields.
-The threshold is compared against a calculated estimation of if-s needed where 
-every used bitmap divides the threshold by 2.
-.TP
-\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP
-When the input uses signed characters and \fB-s\fP or \fB-b\fP switches are 
-in effect re2c allows to automatically convert to the unsigned character type 
-that is then necessary for its internal single character. When this setting 
-is zero or an empty string the conversion is disabled. Using a non zero number
-the conversion is taken from \fBYYCTYPE\fP. If that is given by an inplace 
-configuration that value is being used. Otherwise it will be \fB(YYCTYPE)\fP 
-and changes to that configuration are  no longer possible. When this setting is
-a string the braces must be specified. Now assuming your input is a \fBchar*\fP
-buffer and you are using above mentioned switches you can set \fBYYCTYPE\fP to
-\fBunsigned char\fP and this setting to either \fB1\fP or \fB"(unsigned char)"\fP.
-.TP
-\fIre2c:define:define:YYCONDTYPE\fP \fB=\fP YYCONDTYPE \fB;\fP
-Enumeration used for condition support with \fB-c\fP mode.
-.TP
-\fIre2c:define:YYCTXMARKER\fP \fB=\fP YYCTXMARKER \fB;\fP
-Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYCTYPE\fP \fB=\fP YYCTYPE \fB;\fP
-Allows to overwrite the define YYCTYPE and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYCURSOR\fP \fB=\fP YYCURSOR \fB;\fP
-Allows to overwrite the define YYCURSOR and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYDEBUG\fP \fB=\fP YYDEBUG \fB;\fP
-Allows to overwrite the define \fBYYDEBUG\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYFILL\fP \fB=\fP YYFILL \fB;\fP
-Allows to overwrite the define \fBYYFILL\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYFILL:naked\fP \fB=\fP 0 \fB;\fP
-When set to 1 neither braces, parameter nor semicolon gets emitted.
-.TP
-\fIre2c:define:YYFILL@len\fP \fB=\fP @@ \fB;\fP
-When using \fIre2c:define:YYFILL\fP and \fIre2c:yyfill:parameter\fP is 0 then
-any occurence of this text inside \fBYYFILL\fP will be replaced with the actual
-length value.
-.TP
-\fIre2c:define:YYGETCONDITION\fP \fB=\fP YYGETCONDITION \fB;\fP
-Allows to overwrite the define \fBYYGETCONDITION\fP.
-.TP
-\fIre2c:define:YYGETCONDITION:naked\fP \fB=\fP  \fB;\fP
-When set to 1 neither braces, parameter nor semicolon gets emitted.
-.TP
-\fIre2c:define:YYGETSTATE\fP \fB=\fP YYGETSTATE \fB;\fP
-Allows to overwrite the define \fBYYGETSTATE\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYGETSTATE:naked\fP \fB=\fP 0 \fB;\fP
-When set to 1 neither braces, parameter nor semicolon gets emitted.
-.TP
-\fIre2c:define:YYLIMIT\fP \fB=\fP YYLIMIT \fB;\fP
-Allows to overwrite the define \fBYYLIMIT\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYMARKER\fP \fB=\fP YYMARKER \fB;\fP
-Allows to overwrite the define \fBYYMARKER\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYSETCONDITION\fP \fB=\fP YYSETCONDITION \fB;\fP
-Allows to overwrite the define \fBYYSETCONDITION\fP.
-.TP
-\fIre2c:define:YYSETCONDITION@cond\fP \fB=\fP @@ \fB;\fP
-When using \fIre2c:define:YYSETCONDITION\fP then any occurence of this text 
-inside \fBYYSETCONDITION\fP will be replaced with the actual new condition value.
-.TP
-\fIre2c:define:YYSETSTATE\fP \fB=\fP YYSETSTATE \fB;\fP
-Allows to overwrite the define \fBYYSETSTATE\fP and thus avoiding it by setting the
-value to the actual code needed.
-.TP
-\fIre2c:define:YYSETSTATE:naked\fP \fB=\fP 0 \fB;\fP
-When set to 1 neither braces, parameter nor semicolon gets emitted.
-.TP
-\fIre2c:define:YYSETSTATE@state\fP \fB=\fP @@ \fB;\fP
-When using \fIre2c:define:YYSETSTATE\fP then any occurence of this text 
-inside \fBYYSETSTATE\fP will be replaced with the actual new state value.
-.TP
-\fIre2c:label:yyFillLabel\fP \fB=\fP yyFillLabel \fB;\fP
-Allows to overwrite the name of the label yyFillLabel.
-.TP
-\fIre2c:label:yyNext\fP \fB=\fP yyNext \fB;\fP
-Allows to overwrite the name of the label yyNext.
-.TP
-\fIre2c:variable:yyaccept\fP \fB=\fP yyaccept \fB;\fP
-Allows to overwrite the name of the variable yyaccept.
-.TP
-\fIre2c:variable:yybm\fP \fB=\fP yybm \fB;\fP
-Allows to overwrite the name of the variable yybm.
-.TP
-\fIre2c:variable:yych\fP \fB=\fP yych \fB;\fP
-Allows to overwrite the name of the variable yych.
-.TP
-\fIre2c:variable:yyctable\fP \fB=\fP yyctable \fB;\fP
-When both \fB-c\fP and \fB-g\fP are active then \*(re uses this variable to 
-generate a static jump table for YYGETCONDITION.
-.TP
-\fIre2c:variable:yystable\fP \fB=\fP yystable \fB;\fP
-When both \fB-f\fP and \fB-g\fP are active then \*(re uses this variable to 
-generate a static jump table for YYGETSTATE.
-.TP
-\fIre2c:variable:yytarget\fP \fB=\fP yytarget \fB;\fP
-Allows to overwrite the name of the variable yytarget.
-
-.SH "UNDERSTANDING RE2C"
-.LP
-The subdirectory lessons of the \*(re distribution contains a few step by step
-lessons to get you started with \*(re. All examples in the lessons subdirectory
-can be compiled and actually work.
-
-.SH FEATURES
-.LP
-\*(re provides default action: \fB*\fP. When the default rule matches, 
-exactly one input character is consumed.
-.LP
-The user must arrange for a sentinel token to appear at the end of input
-(and provide a rule for matching it):
-\*(re does not provide an \fC<<EOF>>\fP expression.
-If the source is from a null-byte terminated string, a
-rule matching a null character will suffice.  If the source is from a
-file then you could pad the input with a newline (or some other character that 
-cannot appear within another token); upon recognizing such a character check 
-to see if it is the sentinel and act accordingly. And you can also use YYFILL(n)
-to end the scanner in case not enough characters are available which is nothing
-else then e detection of end of data/file.
-
-.SH BUGS
-.LP
-Difference only works for character sets, and not in UTF-8 mode.
-.LP
-The generated DFA is not minimal.
-.LP
-Features, that are naturally orthogonal (such as reusable rules, conditions, 
-setup rules and default rules), cannot always be combined. E.g., one cannot set 
-setup/default rule for condition in scanner with reusable rules.
-.LP
-\*(re does too much unnecessary work: e.g., if /*!use:re2c ... */ block has 
-additional rules, these rules are parsed 4 times, while they should be parsed 
-only once.
-.LP
-The \*(re internal algorithms need documentation.
-
-.SH "SEE ALSO"
-.LP
-flex(1), lex(1), quex(
-.PD 0
-.B http://quex.sourceforge.net
-.PD 1
-).
-.P
-More information on \*(re can be found here:
-.PD 0
-.P
-.B http://re2c.org/
-.PD 1
-
-.SH AUTHORS
-.PD 0
-.P
-Peter Bumbulis <peter@csg.uwaterloo.ca>
-.P
-Brian Young <bayoung@acm.org>
-.P
-Dan Nuffer <nuffer@users.sourceforge.net>
-.P
-Marcus Boerger <helly@users.sourceforge.net>
-.P
-Hartmut Kaiser <hkaiser@users.sourceforge.net>
-.P
-Emmanuel Mogenet <mgix@mgix.com> added storable state
-.P
-.PD 1
-
-.SH VERSION INFORMATION
-This manpage describes \*(re, version @PACKAGE_VERSION@.
-
-.fi
diff --git a/re2c/re2c.ad.in b/re2c/re2c.ad.in

new file mode 100644 (file)

index 0000000..45446f4
--- /dev/null
+++ b/re2c/re2c.ad.in
@@ -0,0 +1,895 @@
+RE2C (1)
+========
+:doctype: manpage
+
+
+NAME
+----
+re2c - convert regular expressions to C/C++
+
+
+SYNOPSIS
+--------
+*re2c* [_OPTIONS_] _FILE_
+
+
+DESCRIPTION
+-----------
+*re2c* is a lexer generator for C/C\+\+.
+It finds regular expression specifications inside of C/C++ comments and replaces them with a hard-coded DFA.
+The user must supply some interface code in order to control and customize the generated DFA.
+
+
+EXAMPLE
+-------
+Given the following code:
+
+[source,C]
+----
+unsigned int stou (const char * s)
+{
+#   define YYCTYPE char
+    const YYCTYPE * YYCURSOR = s;
+    unsigned int result = 0;
+
+    for (;;)
+    {
+        /*!re2c
+            re2c:yyfill:enable = 0;
+
+            "\x00" { return result; }
+            [0-9]  { result = result * 10 + c; continue; }
+        */
+    }
+}
+----
+
+`re2c -is` will generate:
+
+[source,C]
+----
+/* Generated by re2c 0.13.7.dev on Mon Jul 14 13:37:46 2014 */
+unsigned int stou (const char * s)
+{
+#   define YYCTYPE char
+    const YYCTYPE * YYCURSOR = s;
+    unsigned int result = 0;
+
+    for (;;)
+    {
+
+{
+        YYCTYPE yych;
+
+        yych = *YYCURSOR;
+        if (yych <= 0x00) goto yy3;
+        if (yych <= '/') goto yy2;
+        if (yych <= '9') goto yy5;
+yy2:
+yy3:
+        ++YYCURSOR;
+        { return result; }
+yy5:
+        ++YYCURSOR;
+        { result = result * 10 + c; continue; }
+}
+
+    }
+}
+----
+
+
+OPTIONS
+-------
+*-?*, *-h*::
+    Invoke a short help.
+
+
+*-b*::
+    Implies *-s*.  Use bit vectors as well in the attempt to coax better
+    code out of the compiler.  Most useful for specifications with more than a
+    few keywords (e.g. for most programming languages).
+
+*-c*::
+    Used to support (f)lex-like condition support.
+
+*-d*::
+    Creates a parser that dumps information about the current position and in 
+    which state the parser is while parsing the input. This is useful to debug 
+    parser issues and states. If you use this switch you need to define a macro
+    *YYDEBUG* that is called like a function with two parameters:
+    *void YYDEBUG (int state, char current)*. The first parameter receives the 
+    state or -1 and the second parameter receives the input at the current cursor.
+
+*-D*::
+    Emit Graphviz dot data. It can then be processed with e.g.
+    `dot -Tpng input.dot > output.png`. Please note that scanners with many states
+    may crash dot.
+
+*-e*::
+    Generate a parser that supports EBCDIC. The generated code can deal with any 
+    character up to 0xFF. In this mode *re2c* assumes that input character size is 
+    1 byte. This switch is incompatible with *-w*, *-x*, *-u* and *-8*.
+
+*-f*::
+    Generate a scanner with support for storable state.
+    For details see below at *SCANNER WITH STORABLE STATES*.
+
+*-F*::
+    Partial support for flex syntax. When this flag is active then named
+    definitions must be surrounded by curly braces and can be defined without an
+    equal sign and the terminating semi colon. Instead names are treated as direct
+    double quoted strings.
+
+*-g*::
+    Generate a scanner that utilizes GCC's computed goto feature. That is *re2c*
+    generates jump tables whenever a decision is of a certain complexity (e.g. a 
+    lot of if conditions are otherwise necessary). This is only useable with GCC 
+    and produces output that cannot be compiled with any other compiler. Note that
+    this implies *-b* and that the complexity threshold can be configured using the
+    inplace configuration *$$cgoto:threshold$$*.
+
+*-i*::
+    Do not output *#line* information. This is usefull when you want use a CMS tool
+    with the *re2c* output which you might want if you do not require your users to 
+    have *re2c* themselves when building from your source.
+
+*-o OUTPUT*::
+    Specify the output file.
+
+*-r*::
+    Allows reuse of scanner definitions with *$$/*!use:re2c$$* after
+    *$$/*!rules:re2c$$*. In this mode no *$$/*!re2c$$* block and exactly one
+    *$$/*!rules:re2c$$* must be present. The rules are being saved and used by
+    every *$$/*!use:re2c$$* block that follows. These blocks can contain
+    inplace configurations, especially *$$re2c:flags:e$$*, *$$re2c:flags:w$$*, 
+    *$$re2c:flags:x$$*, *$$re2c:flags:u$$* and *$$re2c:flags:8$$*.
+    That way it is possible to create the same scanner multiple times for different
+    character types, different input mechanisms or different output mechanisms.
+    The *$$/*!use:re2c$$* blocks can also contain additional rules that will be
+    appended to the set of rules in *$$/*!rules:re2c$$*.
+
+*-s*::
+    Generate nested ifs for some switches.  Many compilers need this
+    assist to generate better code.
+
+*-t*::
+    Create a header file that contains types for the (f)lex-like condition support.
+    This can only be activated when *-c* is in use.
+
+*-u*::
+    Generate a parser that supports UTF-32. The generated code can deal with any 
+    valid Unicode character up to 0x10FFFF. In this mode *re2c* assumes that input 
+    character size is 4 bytes. This switch is incompatible with *-e*, *-w*, *-x* 
+    and *-8*. This implies *-s*.
+
+*-v*::
+    Show version information.
+
+*-V*::
+    Show the version as a number XXYYZZ.
+
+*-w*::
+    Generate a parser that supports UCS-2. The generated code can deal with any 
+    valid Unicode character up to 0xFFFF. In this mode *re2c* assumes that input 
+    character size is 2 bytes. This switch is incompatible with *-e*, *-x*, *-u* 
+    and *-8*. This implies *-s*.
+
+*-x*::
+    Generate a parser that supports UTF-16. The generated code can deal with any 
+    valid Unicode character up to 0x10FFFF. In this mode *re2c* assumes that input 
+    character size is 2 bytes. This switch is incompatible with *-e*, *-w*, *-u* 
+    and *-8*. This implies *-s*.
+
+*-1*::
+    Force single pass generation, this cannot be combined with -f and disables 
+    *YYMAXFILL* generation prior to last *re2c* block.
+
+*-8*::
+    Generate a parser that supports UTF-8. The generated code can deal with any 
+    valid Unicode character up to 0x10FFFF. In this mode *re2c* assumes that input 
+    character size is 1 byte. This switch is incompatible with *-e*, *-w*, *-x* 
+    and *-u*.
+
+*--case-insensitive*::
+    All strings are case insensitive, so all "-expressions are treated
+    in the same way '-expressions are.
+
+*--case-inverted*::
+    Invert the meaning of single and double quoted strings.
+    With this switch single quotes are case sensitive and
+    double quotes are case insensitive.
+
+*--no-generation-date*::
+    Suppress date output in the generated output so that it only shows the re2c
+    version.
+
+*--encoding-policy POLICY*::
+    Specify how *re2c* must treat Unicode surrogates. *POLICY* can be one of the following:
+    *fail* (abort with error when surrogate encountered),
+    *substitute* (silently substitute surrogate with error code point 0xFFFD),
+    *ignore* (treat surrogates as normal code points).
+    By default *re2c* ignores surrogates (for backward compatibility).
+    Unicode standard says that standalone surrogates are invalid code points,
+    but different libraries and programs treat them differently.
+
+
+INTERFACE CODE
+--------------
+The user must supply interface code either in the form of C/C++ code
+(macros, functions, variables, etc.) or in the form of _inplace configurations_.
+Which symbols must be defined and which are optional depends on a particular use case.
+
+*YYCONDTYPE*::
+    In *-c* mode you can use *-t* to generate a file that contains the 
+    enumeration used as conditions. Each of the values refers to a condition of
+    a rule set.
+
+*YYCTXMARKER*::
+    l-value of type *$$* YYCTYPE$$*.
+    The generated code saves trailing context backtracking information in *YYCTXMARKER*.
+    The user only needs to define this macro if a scanner specification uses trailing
+    context in one or more of its regular expressions.
+
+*YYCTYPE*::
+    Type used to hold an input symbol (code unit).
+    Usually *char* or *unsigned char* for ASCII, EBCDIC and UTF-8, *unsigned short* for UTF-16 or UCS-2 and *unsigned int* for UTF-32.
+
+*YYCURSOR*::
+    l-value of type *$$* YYCTYPE$$* that points to the current input symbol.
+    The generated code advances *YYCURSOR* as symbols are matched.
+    On entry, *YYCURSOR* is assumed to point to the first character of the
+    current token.  On exit, *YYCURSOR* will point to the first character of
+    the following token.
+
+*YYDEBUG (state, current)*::
+    This is only needed if the *-d* flag was specified. It allows to easily debug
+    the generated parser by calling a user defined function for every state. The function
+    should have the following signature: *void YYDEBUG (int state, char current)*. 
+    The first parameter receives the state or -1 and the second parameter receives the 
+    input at the current cursor.
+
+*YYFILL (n)*::
+    The generated code ``calls'' *YYFILL (n)* when the buffer needs
+    (re)filling: at least *n* additional characters should
+    be provided. *YYFILL (n)* should adjust *YYCURSOR*, *YYLIMIT*,
+    *YYMARKER* and *YYCTXMARKER* as needed.  Note that for typical 
+    programming languages *n* will be the length of the longest keyword plus one.
+    The user can place a comment of the form *$$/*!max:re2c*/$$* once to insert 
+    a *YYMAXFILL (n)* definition that is set to the maximum length value. If -1 
+    switch is used then *YYMAXFILL* can be triggered only once after the 
+    last *$$/*!re2c ... */$$* block.
+
+*YYGETCONDITION ()*::
+    This define is used to get the condition prior to entering the scanner code
+    when using *-c* switch. The value must be initialized with a value from
+    the enumeration *YYCONDTYPE* type.
+
+*YYGETSTATE ()*::
+    The user only needs to define this macro if the *-f* flag was specified.
+    In that case, the generated code ``calls'' *YYGETSTATE ()* at the very beginning
+    of the scanner in order to obtain the saved state. *YYGETSTATE ()* must return a signed
+    integer. The value must be either -1, indicating that the scanner is entered for the
+    first time, or a value previously saved by *YYSETSTATE (s)*.  In the second case, the
+    scanner will resume operations right after where the last *YYFILL (n)* was called.
+
+*YYLIMIT*::
+    Expression of type *$$* YYCTYPE$$* that marks the end of the buffer
+    (*$$YYLIMIT[-1]$$* is the last character in the buffer).
+    The generated code repeatedly compares *YYCURSOR* to *YYLIMIT*
+    to determine when the buffer needs (re)filling.
+
+*YYMARKER*::
+    l-value of type *$$* YYCTYPE$$*.
+    The generated code saves backtracking information in *YYMARKER*. Some easy
+    scanners might not use this.
+
+*YYMAXFILL*::
+    This will be automatically defined by *$$/*!max:re2c*/$$* blocks as explained above.
+
+*YYSETCONDITION (c)*::
+    This define is used to set the condition in transition rules.  This is only
+    being used when *-c* is active and transition rules are being used.
+
+*YYSETSTATE (s)*::
+    The user only needs to define this macro if the *-f* flag was specified.
+    In that case, the generated code ``calls'' *YYSETSTATE* just before calling
+    *YYFILL (n)*.  The parameter to *YYSETSTATE* is a signed integer that uniquely
+    identifies the specific instance of *YYFILL (n)* that is about to be called.
+    Should the user wish to save the state of the scanner and have *YYFILL (n)* return
+    to the caller, all he has to do is store that unique identifer in a variable.
+    Later, when the scannered is called again, it will call *YYGETSTATE ()* and
+    resume execution right where it left off. The generated code will contain 
+    both *YYSETSTATE (s)* and *YYGETSTATE* even if *YYFILL (n)* is being disabled.
+
+
+SYNTAX
+------
+Code for *re2c* consists of a set of _rules_, _named definitions_ and _inplace configurations_.
+
+_rules_ consist of a _regular-expressions_ along with a block of _$$C/C++ code$$_ that
+is to be executed when the associated _regular-expression_ is matched. You can either
+start the code with an opening curly brace or the sequence *$$:=$$*. When
+the code with a curly brace then *re2c* counts the brace depth and stops looking
+for code automatically. Otherwise curly braces are not allowed and *re2c* stops
+looking for code at the first line that does not begin with whitespace. If two
+or more rules overlap, the first rule is preferred.
+
+****
+_regular-expression_ { _$$C/C++ code$$_ }
+
+_regular-expression_ := _$$C/C++ code$$_
+****
+
+There is one special rule: default rule *$$*$$*:
+
+****
+$$*$$ { _$$C/C++ code$$_ }
+
+$$*$$ := _$$C/C++ code$$_
+****
+
+[NOTE]
+====
+*$$[^]$$* differs from *$$*$$*:
+*$$*$$* has the lowest priority, matches any code unit (either valid or invalid) and always consumes one character;
+*$$[^]$$* matches any valid code point (not code unit) and can consume multiple characters.
+In fact, when variable-length encoding is used, *$$*$$* is the only possible way to match invalid input character.
+====
+
+If *-c* is active then each _regular-expression_ is preceeded by a list of 
+comma separated condition names. Besides normal naming rules there are two 
+special cases. A rule may contain the single condition name *$$*$$* and no contition 
+name at all. In the latter case the rule cannot have a _regular-expression_. Non 
+empty rules may further more specify the new condition. In that case *re2c* will
+generated the necessary code to change the condition automatically. Just as above
+code can be started with a curly brace of the sequence *$$:=$$*. Further more
+rules can use *$$:=>$$* as a shortcut to automatically generate code that not only
+sets the new condition state but also continues execution with the new state. A
+shortcut rule should not be used in a loop where there is code between the start
+of the loop and the *re2c* block unless *$$re2c:cond:goto$$* is changed
+to *$$continue$$*. If code is necessary before all rule (though not simple
+jumps) you can doso by using *$$<!$$* pseudo-rules.
+
+****
+$$<$$_condition-list_> _regular-expression_ { _$$C/C++ code$$_ }
+
+$$<$$_condition-list_> _regular-expression_ := _$$C/C++ code$$_
+
+$$<$$_condition-list_> * { _$$C/C++ code$$_ }
+
+$$<$$_condition-list_> * := _$$C/C++ code$$_
+
+$$<$$_condition-list_> _regular-expression_ $$=>$$ _condition_ { _$$C/C++ code$$_ }
+
+$$<$$_condition-list_> _regular-expression_ $$=>$$ _condition_ := _$$C/C++ code$$_
+
+$$<$$_condition-list_> _regular-expression_ :$$=>$$ _condition_
+
+<$$*$$> _regular-expression_ { _$$C/C++ code$$_ }
+
+<$$*$$> _regular-expression_ := _$$C/C++ code$$_
+
+<$$*$$> * { _$$C/C++ code$$_ }
+
+<$$*$$> * := _$$C/C++ code$$_
+
+<$$*$$> _regular-expression_ $$=>$$ _condition_ { _$$C/C++ code$$_ }
+
+<$$*$$> _regular-expression_ $$=>$$ _condition_ := _$$C/C++ code$$_
+
+<$$*$$> _regular-expression_ :$$=>$$ _condition_
+
+$$<>$$ { _$$C/C++ code$$_ }
+
+$$<>$$ := _$$C/C++ code$$_
+
+$$<>$$ $$=>$$ _condition_ { _$$C/C++ code$$_ }
+
+$$<>$$ $$=>$$ _condition_ := _$$C/C++ code$$_
+
+$$<>$$ :$$=>$$ _condition_
+
+<!_condition-list_> { _$$C/C++ code$$_ }
+
+<!_condition-list_> := _$$C/C++ code$$_
+
+<!$$*$$> { _$$C/C++ code$$_ }
+
+<!$$*$$> := _$$C/C++ code$$_
+****
+
+_named definitions_ are of the form:
+
+****
+_name_ = _regular-expression_;
+****
+
+If *-F* is active, then named definitions are also of the form:
+
+****
+_name_ _regular-expression_
+****
+
+_inplace configurations_ are of the form:
+
+****
+re2c$$:$$_name_ = _value_;
+
+re2c$$:$$_name_ = ``_value_'';
+****
+
+
+REGULAR EXPRESSIONS
+-------------------
+``foo'' ::
+    literal string ``foo''. ANSI-C escape sequences can be used.
+
+`foo' ::
+    literal string ``foo'' (characters [a-zA-Z] treated case-insensitive). ANSI-C escape sequences can be used.
+
+[xyz] ::
+    character class; in this case, _regular-expression_ matches either `x', `y', or `z'.
+
+[abj-oZ] ::
+    character class with a range in it; matches `a', `b', any letter from `j' through `o' or `Z'.
+
+[^_class_] ::
+    inverted character class.
+
+_r_ \ _s_ ::
+    match any _r_ which isn't _s_. _r_ and _s_ must be _regular-expression_$$s$$ which can be expressed as character classes.
+
+_r_ * ::
+    zero or more _r_'s, where _r_ is any _regular-expression_.
+
+_r_ + ::
+    one or more _r_'s.
+
+_r_ ? ::
+    zero or one _r_'s (that is, an optional _r_).
+
+_name_ ::
+    the expansion of the _named definition_.
+
+( _r_ ) ::
+    _r_; parentheses are used to override precedence.
+
+_r_ _s_ ::
+    _r_ followed by _s_ (concatenation).
+
+_r_ | _s_ ::
+    either _r_ or _s_ (alternative).
+
+_r_ / _s_ ::
+    _r_ but only if it is followed by _s_. Note that _s_ is not part of the matched text.
+    This type of _regular-expression_ is called ``trailing context''.
+    Trailing context can only be the end of a rule and not part of a named definition.
+
+_r_ { _n_ } ::
+    matches _r_ exactly _n_ times.
+
+_r_ { _n_ , } ::
+    matches _r_ at least _n_ times.
+
+_r_ { _n_ , _m_ } ::
+    matches _r_ at least _n_ times, but not more than _m_ times.
+
+$$.$$ ::
+    match any character except newline.
+
+_def_ ::
+    matches named definition as specified by _def_ only if *-F* is off.
+    If *-F* is active then this behaves like it was enclosed in double quotes and matches the string ``def''.
+
+Character classes and string literals may contain octal or hexadecimal 
+character definitions and the following set of escape sequences:
+*\a*, *\b*, *\f*, *\n*, *\r*, *\t*, *\v*, *\\*.
+An octal character is defined by a backslash followed by its three octal digits (e.g. *\377*).
+Hexadecimal characters from 0 to 0xFF are defined by backslash, a lower cased `x' and two hexadecimal digits (e.g. *\x12*).
+Hexadecimal characters from 0x100 to 0xFFFF are defined by backslash, a lower cased `u' (or an upper cased `X') and four hexadecimal digits (e.g. *\u1234*).
+Hexadecimal characters from 0x10000 to 0xFFFFffff are defined by backslash, an upper cased `U' and eight hexadecimal digits (e.g. *\U12345678*).
+
+The only portable ``any'' rule is the default rule *$$*$$*.
+
+
+INPLACE CONFIGURATIONS
+----------------------
+It is possible to configure code generation inside *re2c* blocks. The following lists the available configurations:
+
+*re2c:condprefix* = yyc_; ::
+Allows to specify the prefix used for condition labels. That is this text is 
+prepended to any condition label in the generated output file.
+
+*re2c:condenumprefix* = yyc; ::
+Allows to specify the prefix used for condition values. That is this text is 
+prepended to any condition enum value in the generated output file. 
+
+*re2c:cond:divider* = ``$$/* *********************************** */$$''; ::
+Allows to customize the devider for condition blocks. You can use `@@' to 
+put the name of the condition or ustomize the placeholder
+using *re2c:cond:divider@cond*.
+
+*re2c:cond:divider@cond* = @@; ::
+Specifies the placeholder that will be replaced with the condition name
+in *re2c:cond:divider*.
+
+*re2c:cond:goto* = ``goto @@;''; ::
+Allows to customize the condition goto statements used with *$$:=>$$* style rules.
+You can use `@@' to put the name of the condition or ustomize the placeholder
+using *re2c:cond:goto@cond*. You can also change this to `continue;',
+which would allow you to continue with the next loop cycle including any code
+between loop start and re2c block.
+
+*re2c:cond:goto@cond* = @@; ::
+Spcifies the placeholder that will be replaced with the condition label
+in *re2c:cond:goto*.
+
+*re2c:indent:top* = 0; ::
+Specifies the minimum number of indendation to use. Requires a numeric value 
+greater than or equal zero.
+
+*re2c:indent:string* = ``\t''; ::
+Specifies the string to use for indendation. Requires a string that should 
+contain only whitespace unless you need this for external tools. The easiest 
+way to specify spaces is to enclude them in single or double quotes. If you do 
+not want any indendation at all you can simply set this to ``$$$$''.
+
+*re2c:yych:conversion* = 0; ::
+When this setting is non zero, then *re2c* automatically generates conversion 
+code whenever yych gets read. In this case the type must be defined using
+*re2c:define:YYCTYPE*.
+
+*re2c:yych:emit* = 1; ::
+Generation of *yych* can be suppressed by setting this to 0.
+
+*re2c:yybm:hex* = 0; ::
+If set to zero then a decimal table is being used else a hexadecimal table 
+will be generated.
+
+*re2c:yyfill:enable* = 1; ::
+Set this to zero to suppress generation of *YYFILL (n)*. When using this be sure
+to verify that the generated scanner does not read behind input. Allowing
+this behavior might introduce sever security issues to you programs.
+
+*re2c:yyfill:check* = 1; ::
+This can be set 0 to suppress output of the pre condition using *YYCURSOR* and
+*YYLIMIT* which becomes usefull when *$$YYLIMIT + max (YYFILL)$$* is always accessible.
+
+*re2c:yyfill:parameter* = 1; ::
+Allows to suppress parameter passing to *YYFILL* calls. If set to zero 
+then no parameter is passed to *YYFILL*. However *define:YYFILL@LEN*
+allows to specify a replacement string for the actual length value. If set to
+a non zero value then *YYFILL* usage will be followed by the number of 
+requested characters in braces unless *re2c:define:YYFILL:naked* is set. 
+Also look at *re2c:define:YYFILL:naked* and *re2c:define:YYFILL@LEN*.
+
+*re2c:startlabel* = 0; ::
+If set to a non zero integer then the start label of the next scanner blocks 
+will be generated even if not used by the scanner itself. Otherwise the normal 
+*yy0* like start label is only being generated if needed. If set to a text 
+value then a label with that text will be generated regardless of whether the 
+normal start label is being used or not. This setting is being reset to *0*
+after a start label has been generated.
+
+*re2c:labelprefix* = yy; ::
+Allows to change the prefix of numbered labels. The default is *yy* and
+can be set any string that is a valid label.
+
+*re2c:state:abort* = 0; ::
+When not zero and switch *-f* is active then the *YYGETSTATE* block will 
+contain a default case that aborts and a -1 case is used for initialization.
+
+*re2c:state:nextlabel* = 0; ::
+Used when *-f* is active to control whether the *YYGETSTATE* block is 
+followed by a *yyNext:* label line. Instead of using *yyNext* you can 
+usually also use configuration *startlabel* to force a specific start label
+or default to *yy0* as start label. Instead of using a dedicated label it 
+is often better to separate the *YYGETSTATE* code from the actual scanner code by
+placing a *$$/*!getstate:re2c*/$$* comment.
+
+*re2c:cgoto:threshold* = 9; ::
+When *-g* is active this value specifies the complexity threshold that triggers
+generation of jump tables rather than using nested if's and decision bitfields.
+The threshold is compared against a calculated estimation of if-s needed where 
+every used bitmap divides the threshold by 2.
+
+*re2c:yych:conversion* = 0; ::
+When the input uses signed characters and *-s* or *-b* switches are 
+in effect re2c allows to automatically convert to the unsigned character type 
+that is then necessary for its internal single character. When this setting 
+is zero or an empty string the conversion is disabled. Using a non zero number
+the conversion is taken from *YYCTYPE*. If that is given by an inplace 
+configuration that value is being used. Otherwise it will be *(YYCTYPE)* 
+and changes to that configuration are  no longer possible. When this setting is
+a string the braces must be specified. Now assuming your input is a *$$char *$$*
+buffer and you are using above mentioned switches you can set *YYCTYPE* to
+*unsigned char* and this setting to either *1* or *$$(unsigned char)$$*.
+
+*re2c:define:define:YYCONDTYPE* = *YYCONDTYPE*; ::
+Enumeration used for condition support with *-c* mode.
+
+*re2c:define:YYCTXMARKER* = *YYCTXMARKER*; ::
+Allows to overwrite the define *YYCTXMARKER* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYCTYPE* = *YYCTYPE*; ::
+Allows to overwrite the define *YYCTYPE* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYCURSOR* = *YYCURSOR*; ::
+Allows to overwrite the define *YYCURSOR* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYDEBUG* = *YYDEBUG*; ::
+Allows to overwrite the define *YYDEBUG* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYFILL* = *YYFILL*; ::
+Allows to overwrite the define *YYFILL* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYFILL:naked* = *0*; ::
+When set to 1 neither braces, parameter nor semicolon gets emitted.
+
+*re2c:define:YYFILL@len* = @@; ::
+When using *re2c:define:YYFILL* and *re2c:yyfill:parameter* is 0 then
+any occurence of this text inside *YYFILL* will be replaced with the actual
+length value.
+
+*re2c:define:YYGETCONDITION* = *YYGETCONDITION*; ::
+Allows to overwrite the define *YYGETCONDITION*.
+
+*re2c:define:YYGETCONDITION:naked* = *0*; ::
+When set to 1 neither braces, parameter nor semicolon gets emitted.
+
+*re2c:define:YYGETSTATE* = *YYGETSTATE*; ::
+Allows to overwrite the define *YYGETSTATE* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYGETSTATE:naked* = *0*; ::
+When set to 1 neither braces, parameter nor semicolon gets emitted.
+
+*re2c:define:YYLIMIT* = *YYLIMIT*; ::
+Allows to overwrite the define *YYLIMIT* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYMARKER* = *YYMARKER*; ::
+Allows to overwrite the define *YYMARKER* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYSETCONDITION* = *YYSETCONDITION*; ::
+Allows to overwrite the define *YYSETCONDITION*.
+
+*re2c:define:YYSETCONDITION@cond* = @@; ::
+When using *re2c:define:YYSETCONDITION* then any occurence of this text 
+inside *YYSETCONDITION* will be replaced with the actual new condition value.
+
+*re2c:define:YYSETSTATE* = *YYSETSTATE*; ::
+Allows to overwrite the define *YYSETSTATE* and thus avoiding it by setting the
+value to the actual code needed.
+
+*re2c:define:YYSETSTATE:naked* = *0*; ::
+When set to 1 neither braces, parameter nor semicolon gets emitted.
+
+*re2c:define:YYSETSTATE@state* = @@; ::
+When using *re2c:define:YYSETSTATE* then any occurence of this text 
+inside *YYSETSTATE* will be replaced with the actual new state value.
+
+*re2c:label:yyFillLabel* = *yyFillLabel*; ::
+Allows to overwrite the name of the label *yyFillLabel*.
+
+*re2c:label:yyNext* = *yyNext*; ::
+Allows to overwrite the name of the label *yyNext*.
+
+*re2c:variable:yyaccept* = *yyaccept*; ::
+Allows to overwrite the name of the variable *yyaccept*.
+
+*re2c:variable:yybm* = *yybm*; ::
+Allows to overwrite the name of the variable *yybm*.
+
+*re2c:variable:yych* = *yych*; ::
+Allows to overwrite the name of the variable *yych*.
+
+*re2c:variable:yyctable* = *yyctable*; ::
+When both *-c* and *-g* are active then *re2c* uses this variable to 
+generate a static jump table for *YYGETCONDITION*.
+
+*re2c:variable:yystable* = *yystable*; ::
+When both *-f* and *-g* are active then *re2c* uses this variable to 
+generate a static jump table for *YYGETSTATE*.
+
+*re2c:variable:yytarget* = *yytarget*; ::
+Allows to overwrite the name of the variable *yytarget*.
+
+
+SCANNER WITH STORABLE STATES
+----------------------------
+When the *-f* flag is specified, *re2c* generates a scanner that
+can store its current state, return to the caller, and later resume
+operations exactly where it left off.
+
+The default operation of *re2c* is a ``pull'' model, where the scanner asks
+for extra input whenever it needs it. However, this mode of operation
+assumes that the scanner is the ``owner'' the parsing loop, and that may
+not always be convenient.
+
+Typically, if there is a preprocessor ahead of the scanner in the stream,
+or for that matter any other procedural source of data, the scanner cannot
+``ask'' for more data unless both scanner and source live in a separate threads.
+
+The *-f* flag is useful for just this situation: it lets users design
+scanners that work in a ``push'' model, i.e. where data is fed to the scanner
+chunk by chunk. When the scanner runs out of data to consume, it just stores
+its state, and return to the caller. When more input data is fed to the scanner,
+it resumes operations exactly where it left off.
+
+When using the *-f* option *re2c* does not accept stdin because it has to do the 
+full generation process twice which means it has to read the input twice. That
+means *re2c* would fail in case it cannot open the input twice or reading the
+input for the first time influences the second read attempt.
+
+Changes needed compared to the ``pull'' model:
+
+. User has to supply macros *YYSETSTATE ()* and *YYGETSTATE (state)*.
+
+. The *-f* option inhibits declaration of *yych* and
+*yyaccept*. So the user has to declare these. Also the user has
+to save and restore these. In the example *examples/push.re* these
+are declared as fields of the (C\++) class of which the scanner is a
+method, so they do not need to be saved/restored explicitly. For C
+they could e.g. be made macros that select fields from a structure
+passed in as parameter. Alternatively, they could be declared as local
+variables, saved with *YYFILL (n)* when it decides to return and restored
+at entry to the function. Also, it could be more efficient to save the
+state from *YYFILL (n)* because *YYSETSTATE (state)* is called
+unconditionally. *YYFILL (n)* however does not get *state* as
+parameter, so we would have to store state in a local variable by
+*YYSETSTATE (state)*.
+
+. Modify *YYFILL (n)* to return (from the function calling it) if more input is needed.
+
+. Modify caller to recognise ``more input is needed'' and respond appropriately.
+
+. The generated code will contain a switch block that is used to restores 
+the last state by jumping behind the corrspoding *YYFILL (n)* call. This code is
+automatically generated in the epilog of the first *$$/*!re2c */$$* block. 
+It is possible to trigger generation of the *YYGETSTATE ()* block earlier by 
+placing a *$$/*!getstate:re2c*/$$* comment. This is especially useful when
+the scanner code should be wrapped inside a loop.
+
+Please see *examples/push.re* for push-model scanner. The generated code can be
+tweaked using inplace configurations *$$state:abort$$* and *$$state:nextlabel$$*.
+
+
+SCANNER WITH CONDITION SUPPORT
+------------------------------
+You can preceed regular expressions with a list of condition names when using the *-c*
+switch. In this case *re2c* generates scanner blocks for each conditon. Where each of the
+generated blocks has its own precondition. The precondition is given by the 
+interface define *YYGETCONDITON()* and must be of type *YYCONDTYPE*.
+
+There are two special rule types. First, the rules of the condition *$$*$$* are 
+merged to all  conditions. And second the empty condition list allows to 
+provide a code block that does not have a scanner part. Meaning it does not 
+allow any regular expression. The condition value referring to this special 
+block is always the one with the enumeration value 0. This way the code of this
+special rule can be used to initialize a scanner. It is in no way necessary to
+have these rules: but sometimes it is helpful to have a dedicated uninitialized
+condition state.
+
+Non empty rules allow to specify the new condition, which makes them
+transition rules. Besides generating calls for the define *YYSETCONDTITION*
+no other special code is generated.
+
+There is another kind of special rules that allow to prepend code to any code
+block of all rules of a certain set of conditions or to all code blocks to all
+rules. This can be helpful when some operation is common among rules. For
+instance this can be used to store the length of the scanned string. These
+special setup rules start with an exclamation mark followed by either a list
+of conditions *$$<! condition, ... >$$* or a star *$$<!*>$$*.
+When *re2c* generates the code for a rule whose state does not have a
+setup rule and a star'd setup rule is present, than that code will be used
+as setup code.
+
+
+ENCODINGS
+---------
+*re2c* supports the following encodings: ASCII (default), EBCDIC (*-e*), UCS-2 (*-w*), 
+UTF-16 (*-x*), UTF-32 (*-u*) and UTF-8 (*-8*). ASCII is default. You can 
+either pass cmd flag or use _inplace configuration_ in the form *$$re2c:flags$$*.
+
+The following concepts should be clarified when talking about encoding. _Code point_ 
+is an abstract number, which represents single encoding symbol. _Code unit_ is the 
+smallest unit of memory, which is used in the encoded text (it corresponds to one 
+character in the input stream). One or more code units can be needed to represent 
+a single code point, depending on the encoding. In _fixed-length_ encoding, each 
+code point is represented with equal number of code units. In _variable-length_ 
+encoding, different code points can be represented with different number of code units.
+
+*ASCII* ::
+is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
+to 0xFF (note that this is *re2c*-specific understanding of ASCII). One code point 
+is represented with exactly one 1-byte code unit, which has the same value as the 
+code point. Size of *YYCTYPE* must be 1 byte.
+
+*EBCDIC* ::
+is a fixed-length encoding. Its code space includes 0x100 code points, from 0 
+to 0xFF. One code point is represented with exactly one 1-byte code unit, which has 
+the same value as the code point. Size of *YYCTYPE* must be 1 byte.
+
+*UCS-2* ::
+is a fixed-length encoding. Its code space includes 0x10000 code points, from 0 
+to 0xFFFF. One code point is represented with exactly one 2-byte code unit, which has 
+the same value as the code point. Size of *YYCTYPE* must be 2 bytes.
+
+*UTF-16* ::
+is a variable-length encoding. Its code space includes all Unicode code points, 
+from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with one or 
+two 2-byte code units. Size of *YYCTYPE* must be 2 bytes.
+
+*UTF-32* ::
+is a fixed-length encoding. Its code space includes all Unicode code points, 
+from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point is represented with exactly 
+one 4-byte code unit. Size of *YYCTYPE* must be 4 bytes.
+
+*UTF-8* ::
+is a variable-length encoding. Its code space includes all 
+Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One code point 
+is represented with sequence of one, two, three or four 1-byte code units. Size of 
+*YYCTYPE* must be 1 bytes.
+
+In Unicode, values from range 0xD800 to 0xDFFF (surrogates) are not valid Unicode 
+code points, any encoded sequence of code units, that would map to Unicode code points 
+in the range 0xD800-0xDFFF, is ill-formed.
+The user can control how *re2c* treats such ill-formed sequences with *--encoding-policy* _policy_
+flag (see *OPTIONS* section for full explanation).
+
+For some encodings, there are code units, that never occur in valid encoded stream 
+(e.g. 0xFF byte in UTF-8). If the generated scanner must check for invalid input, 
+the only true way to do so is to use default rule *$$*$$*.
+Note, that full range rule *$$[^]$$* won't catch invalid code units when variable-length encoding is used
+(*$$[^]$$* means ``all valid code points'', while default rule *$$*$$* means ``all possible code units'':
+see *Note* about default rule in *SYNTAX* section).
+
+
+UNDERSTANDING RE2C
+------------------
+The subdirectory lessons of the *re2c* distribution contains a few step by step
+lessons to get you started with *re2c*. All examples in the lessons subdirectory
+can be compiled and actually work.
+
+
+BUGS
+----
+. Difference only works for character sets, and not in UTF-8 mode.
+. The generated DFA is not minimal.
+. Features, that are naturally orthogonal (such as reusable rules, conditions, 
+setup rules and default rules), cannot always be combined. E.g., one cannot set 
+setup/default rule for condition in scanner with reusable rules.
+. *re2c* does too much unnecessary work: e.g., if *$$/*!use:re2c ... */$$* block has 
+additional rules, these rules are parsed 4 times, while they should be parsed only once.
+. The *re2c* internal algorithms need documentation.
+
+
+SEE ALSO
+--------
+flex(1), lex(1), quex (http://quex.sourceforge.net)
+
+More information on *re2c* can be found here: http://re2c.org/.
+
+
+AUTHORS
+-------
+. Peter Bumbulis peter@csg.uwaterloo.ca
+. Brian Young bayoung@acm.org
+. Dan Nuffer nuffer@users.sourceforge.net
+. Marcus Boerger helly@users.sourceforge.net
+. Hartmut Kaiser hkaiser@users.sourceforge.net
+. Emmanuel Mogenet mgix@mgix.com (added storable state)
+. Ulya Trofimovich skvadrik@gmail.com (added UTF-8 and UTF-16 support)
+
+
+VERSION INFORMATION
+-------------------
+This manpage describes *re2c*, version @PACKAGE_VERSION@, package date @PACKAGE_DATE@.
diff --git a/re2c/re2c_docs.sh b/re2c/re2c_docs.sh

new file mode 100755 (executable)

index 0000000..f7f8592
--- /dev/null
+++ b/re2c/re2c_docs.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# generate re2c.1 from re2c.ad
+a2x -f manpage re2c.ad
+
+# generate htdocs/manual.html from re2c.ad
+asciidoc -o htdocs/manual.html re2c.ad
diff --git a/re2c/release.sh b/re2c/release.sh

new file mode 100755 (executable)

index 0000000..46a6205
--- /dev/null
+++ b/re2c/release.sh
@@ -0,0 +1,35 @@
+#!/bin/sh -e
+
+if [ $# -ne 1 ]
+then
+    echo "usage: ./release.sh <version>"
+    exit
+fi
+
+version="$1"
+
+# edit version in configure.in
+lcontext="AC_INIT\(re2c, "
+rcontext=", re2c-general@lists\.sourceforge\.net\)"
+old="[0-9]+\.[0-9]+\.[0-9]+"
+new=$version
+sed -i -E "s/$lcontext$old$rcontext/$lcontext$new$rcontext/" configure.in
+
+./autogen.sh
+./configure
+make clean
+make tests -j5
+make zip dist docs
+
+# commit release
+git commit -a -m "Release $version."
+
+# upload files on sourceforge
+src=release
+src_tarballs=$src/frs/project/re2c/re2c/$version
+src_docs=$src/project-web/re2c/htdocs
+mkdir -p $src_tarballs
+mkdir -p $src_docs
+cp re2c-$version-src.zip re2c-$version.tar.gz $src_tarballs
+cp htdocs/index.html htdocs/manual.html $src_docs
+rsync -rK $src/ skvadrik@web.sourceforge.net:/home
author	Ulya Fokanova <skvadrik@gmail.com>
	Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)
committer	Ulya Fokanova <skvadrik@gmail.com>
	Fri, 25 Jul 2014 15:41:53 +0000 (18:41 +0300)
add-release.txt		patch \| blob \| history
re2c/.gitignore		patch \| blob \| history
re2c/CHANGELOG		patch \| blob \| history
re2c/Makefile.am		patch \| blob \| history
re2c/htdocs/index.html		patch \| blob \| history
re2c/htdocs/manual.html.in	[deleted file]	patch \| blob \| history
re2c/re2c.1.in	[deleted file]	patch \| blob \| history
re2c/re2c.ad.in	[new file with mode: 0644]	patch \| blob
re2c/re2c_docs.sh	[new file with mode: 0755]	patch \| blob
re2c/release.sh	[new file with mode: 0755]	patch \| blob