From 739d2b7df8ebccf8ee8558702a923b01a6f268b6 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sun, 9 Apr 2017 19:36:28 +0100 Subject: [PATCH] Manpage: include paths must be relative to 'top_srcdir'. Otherwise out-of-source builds won't work. --- re2c/Makefile.am | 13 + re2c/bootstrap/doc/re2c.1 | 661 +++++++++++++++++++------------------- re2c/doc/manpage.rst.in | 24 +- 3 files changed, 360 insertions(+), 338 deletions(-) diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 560f715d..9cfb91de 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -189,12 +189,25 @@ CUSTOM = \ # docs SRC_DOC = doc/manpage.rst +SRC_DOC_EXT = \ + doc/manual/syntax/rules.rst_ \ + doc/manual/syntax/interface.rst_ \ + doc/manual/syntax/configurations.rst_ \ + doc/manual/syntax/regular_expressions.rst_ \ + doc/manual/warnings/warnings_list.rst \ + doc/manual/warnings/warnings_general.rst \ + doc/manual/features/generic_api/generic_api.rst_ \ + doc/manual/features/conditions/conditions.rst_ \ + doc/manual/features/state/state.rst_ \ + doc/manual/features/encodings/encodings.rst_ \ + doc/manual/options/options_list.rst DOC = doc/re2c.1 man_MANS = $(DOC) EXTRA_DIST = \ $(BOOTSTRAP) \ $(CUSTOM) \ + $(SRC_DOC_EXT) \ CHANGELOG \ NO_WARRANTY \ README \ diff --git a/re2c/bootstrap/doc/re2c.1 b/re2c/bootstrap/doc/re2c.1 index b97bab18..baaa8e17 100644 --- a/re2c/bootstrap/doc/re2c.1 +++ b/re2c/bootstrap/doc/re2c.1 @@ -43,72 +43,71 @@ control and customize the generated DFA. .INDENT 0.0 .TP .B \fB\-? \-h \-\-help\fP -Invoke a short help. +Show a short help screen: .TP .B \fB\-b \-\-bit\-vectors\fP -Implies \fB\-s\fP\&. Use bit vectors as well in the -attempt to coax better code out of the compiler. Most useful for -specifications with more than a few keywords (e.g. for most programming +Implies \fB\-s\fP\&. Use bit vectors as well to try to +coax better code out of the compiler. Most useful for +specifications with more than a few keywords (e.g., for most programming languages). .TP .B \fB\-c \-\-conditions\fP -Used to support (f)lex\-like condition support. +Used for (f)lex\-like condition support. .TP .B \fB\-d \-\-debug\-output\fP Creates a parser that dumps information about -the current position and in which state the parser is while parsing the -input. This is useful to debug parser issues and states. If you use this -switch you need to define a macro \fBYYDEBUG\fP that is called like a +the current position and the state the parser is in. +This is useful for debugging parser issues and states. If you use this +switch, you need to define a \fBYYDEBUG\fP macro, which will be called like a function with two parameters: \fBvoid YYDEBUG (int state, char current)\fP\&. The first parameter receives the state or \fB\-1\fP and the second parameter receives the input at the current cursor. .TP .B \fB\-D \-\-emit\-dot\fP -Emit Graphviz dot data. It can then be processed -with e.g. \fBdot \-Tpng input.dot > output.png\fP\&. Please note that +Emit Graphviz dot data, which can then be processed +with e.g., \fBdot \-Tpng input.dot > output.png\fP\&. Please note that scanners with many states may crash dot. .TP .B \fB\-e \-\-ecb\fP Generate a parser that supports EBCDIC. The generated -code can deal with any character up to 0xFF. In this mode \fBre2c\fP assumes -that input character size is 1 byte. This switch is incompatible with -\fB\-w\fP, \fB\-x\fP, \fB\-u\fP and \fB\-8\fP\&. +code can deal with any character up to 0xFF. In this mode, \fBre2c\fP assumes +an input character size of 1 byte. This switch is incompatible with +\fB\-w\fP, \fB\-x\fP, \fB\-u\fP, and \fB\-8\fP\&. .TP .B \fB\-f \-\-storable\-state\fP Generate a scanner with support for storable state. .TP .B \fB\-F \-\-flex\-syntax\fP Partial support for flex syntax. When this flag -is active then named definitions must be surrounded by curly braces and -can be defined without an equal sign and the terminating semi colon. -Instead names are treated as direct double quoted strings. +is active, named definitions must be surrounded by curly braces and +can be defined without an equal sign and the terminating semicolon. +Instead, names are treated as direct double quoted strings. .TP .B \fB\-g \-\-computed\-gotos\fP Generate a scanner that utilizes GCC\(aqs -computed goto feature. That is \fBre2c\fP generates jump tables whenever a -decision is of a certain complexity (e.g. a lot of if conditions are -otherwise necessary). This is only useable with GCC and produces output -that cannot be compiled with any other compiler. Note that this implies -\fB\-b\fP and that the complexity threshold can be configured using the -inplace configuration \fBcgoto:threshold\fP\&. +computed\-goto feature. That is, \fBre2c\fP generates jump tables whenever a +decision is of certain complexity (e.g., a lot of if conditions would be +otherwise necessary). This is only usable with compilers that support this feature. +Note that this implies \fB\-b\fP and that the complexity threshold can be configured +using the \fBcgoto:threshold\fP inplace configuration. .TP .B \fB\-i \-\-no\-debug\-info\fP Do not output \fB#line\fP information. This is -useful when you want use a CMS tool with the \fBre2c\fP output which you -might want if you do not require your users to have \fBre2c\fP themselves -when building from your source. +useful when you want use a CMS tool with \fBre2c\fP\(aqs output. You might +want to do this if you do not want to impose re2c as a build requirement +for your source. .TP .B \fB\-o OUTPUT \-\-output=OUTPUT\fP Specify the \fBOUTPUT\fP file. .TP .B \fB\-r \-\-reusable\fP Allows reuse of scanner definitions with \fB/*!use:re2c */\fP after \fB/*!rules:re2c */\fP\&. -In this mode no \fB/*!re2c */\fP block and exactly one \fB/*!rules:re2c */\fP must be present. -The rules are being saved and used by every \fB/*!use:re2c */\fP block that follows. +In this mode, no \fB/*!re2c */\fP block and exactly one \fB/*!rules:re2c */\fP must be present. +The rules are saved and used by every \fB/*!use:re2c */\fP block that follows. These blocks can contain inplace configurations, especially \fBre2c:flags:e\fP, -\fBre2c:flags:w\fP, \fBre2c:flags:x\fP, \fBre2c:flags:u\fP and \fBre2c:flags:8\fP\&. +\fBre2c:flags:w\fP, \fBre2c:flags:x\fP, \fBre2c:flags:u\fP, and \fBre2c:flags:8\fP\&. That way it is possible to create the same scanner multiple times for -different character types, different input mechanisms or different output mechanisms. +different character types, different input mechanisms, or different output mechanisms. The \fB/*!use:re2c */\fP blocks can also contain additional rules that will be appended to the set of rules in \fB/*!rules:re2c */\fP\&. .TP @@ -124,62 +123,64 @@ activated when \fB\-c\fP is in use. .B \fB\-u \-\-unicode\fP Generate a parser that supports UTF\-32. The generated code can deal with any valid Unicode character up to 0x10FFFF. In this -mode \fBre2c\fP assumes that input character size is 4 bytes. This switch is -incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-x\fP and \fB\-8\fP\&. This implies \fB\-s\fP\&. +mode, \fBre2c\fP assumes an input character size of 4 bytes. This switch is +incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-x\fP, and \fB\-8\fP\&. This implies \fB\-s\fP\&. .TP .B \fB\-v \-\-version\fP Show version information. .TP .B \fB\-V \-\-vernum\fP -Show the version as a number XXYYZZ. +Show the version as a number in the MMmmpp (Majorm, minor, patch) format. .TP .B \fB\-w \-\-wide\-chars\fP Generate a parser that supports UCS\-2. The generated code can deal with any valid Unicode character up to 0xFFFF. -In this mode \fBre2c\fP assumes that input character size is 2 bytes. This -switch is incompatible with \fB\-e\fP, \fB\-x\fP, \fB\-u\fP and \fB\-8\fP\&. This implies +In this mode, \fBre2c\fP assumes an input character size of 2 bytes. This +switch is incompatible with \fB\-e\fP, \fB\-x\fP, \fB\-u\fP, and \fB\-8\fP\&. This implies \fB\-s\fP\&. .TP .B \fB\-x \-\-utf\-16\fP Generate a parser that supports UTF\-16. The generated code can deal with any valid Unicode character up to 0x10FFFF. In this -mode \fBre2c\fP assumes that input character size is 2 bytes. This switch is -incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-u\fP and \fB\-8\fP\&. This implies \fB\-s\fP\&. +mode, \fBre2c\fP assumes an input character size of 2 bytes. This switch is +incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-u\fP, and \fB\-8\fP\&. This implies \fB\-s\fP\&. .TP .B \fB\-8 \-\-utf\-8\fP Generate a parser that supports UTF\-8. The generated code can deal with any valid Unicode character up to 0x10FFFF. In this -mode \fBre2c\fP assumes that input character size is 1 byte. This switch is -incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-x\fP and \fB\-u\fP\&. +mode, \fBre2c\fP assumes an input character size of 1 byte. This switch is +incompatible with \fB\-e\fP, \fB\-w\fP, \fB\-x\fP, and \fB\-u\fP\&. .TP .B \fB\-\-case\-insensitive\fP -All strings are case insensitive, so all -"\-expressions are treated in the same way \(aq\-expressions are. +Makes all strings case insensitive. This makes +"\-quoted expressions behave as \(aq\-quoted expressions. .TP .B \fB\-\-case\-inverted\fP Invert the meaning of single and double quoted -strings. With this switch single quotes are case sensitive and double +strings. With this switch, single quotes are case sensitive and double quotes are case insensitive. .TP .B \fB\-\-no\-generation\-date\fP Suppress date output in the generated file. .TP +.B \fB\-\-no\-version\fP +Suppress version output in the generated file. +.TP .B \fB\-\-no\-generation\-date\fP Suppress version output in the generated file. .TP .B \fB\-\-encoding\-policy POLICY\fP Specify how \fBre2c\fP must treat Unicode surrogates. \fBPOLICY\fP can be one of the following: \fBfail\fP (abort with -error when surrogate encountered), \fBsubstitute\fP (silently substitute -surrogate with error code point 0xFFFD), \fBignore\fP (treat surrogates as -normal code points). By default \fBre2c\fP ignores surrogates (for backward -compatibility). Unicode standard says that standalone surrogates are +an error when a surrogate is encountered), \fBsubstitute\fP (silently replace +surrogates with the error code point 0xFFFD), \fBignore\fP (treat surrogates as +normal code points). By default, \fBre2c\fP ignores surrogates (for backward +compatibility). The Unicode standard says that standalone surrogates are invalid code points, but different libraries and programs treat them differently. .TP .B \fB\-\-input INPUT\fP -Specify re2c input API. \fBINPUT\fP can be one of the -following: \fBdefault\fP, \fBcustom\fP\&. +Specify re2c\(aqs input API. \fBINPUT\fP can be either \fBdefault\fP or \fBcustom\fP\&. .TP .B \fB\-S \-\-skeleton\fP Instead of embedding re2c\-generated code into C/C++ @@ -187,89 +188,119 @@ source, generate a self\-contained program for the same DFA. Most useful for correctness and performance testing. .TP .B \fB\-\-empty\-class POLICY\fP -What to do if user inputs empty character +What to do if the user uses an empty character class. \fBPOLICY\fP can be one of the following: \fBmatch\-empty\fP (match empty input: pretty illogical, but this is the default for backwards -compatibility reason), \fBmatch\-none\fP (fail to match on any input), +compatibility reasons), \fBmatch\-none\fP (fail to match on any input), \fBerror\fP (compilation error). Note that there are various ways to -construct empty class, e.g: [], [^\ex00\-\exFF], +construct an empty class, e.g., [], [^\ex00\-\exFF], [\ex00\-\exFF][\ex00\-\exFF]. .TP .B \fB\-\-dfa\-minimization \fP -Internal algorithm used by re2c to minimize DFA (defaults to \fBmoore\fP). -Both table filling and Moore\(aqs algorithms should produce identical DFA (up to states relabelling). -Table filling algorithm is much simpler and slower; it serves as a reference implementation. +The internal algorithm used by re2c to minimize the DFA (defaults to \fBmoore\fP). +Both the table filling algorithm and the Moore algorithm should produce the same DFA (up to states relabeling). +The table filling algorithm is much simpler and slower; it serves as a reference implementation. .TP .B \fB\-1 \-\-single\-pass\fP -Deprecated and does nothing (single pass is by default now). +Deprecated. Does nothing (single pass is the default now). +.UNINDENT +.INDENT 0.0 .TP .B \fB\-W\fP Turn on all warnings. .TP .B \fB\-Werror\fP -Turn warnings into errors. Note that this option along -doesn\(aqt turn on any warnings, it only affects those warnings that have +Turn warnings into errors. Note that this option alone +doesn\(aqt turn on any warnings; it only affects those warnings that have been turned on so far or will be turned on later. .TP .B \fB\-W\fP -Turn on individual \fBwarning\fP\&. +Turn on a \fBwarning\fP\&. .TP .B \fB\-Wno\-\fP -Turn off individual \fBwarning\fP\&. +Turn off a \fBwarning\fP\&. .TP .B \fB\-Werror\-\fP -Turn on individual \fBwarning\fP and treat it as error (this implies \fB\-W\fP). +Turn on a \fBwarning\fP and treat it as an error (this implies \fB\-W\fP). .TP .B \fB\-Wno\-error\-\fP -Don\(aqt treat this particular \fBwarning\fP as error. This doesn\(aqt turn off +Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off the warning itself. +.UNINDENT +.INDENT 0.0 +.TP +.B \fB\-W\fP +Turn on all warnings. +.TP +.B \fB\-Werror\fP +Turn warnings into errors. Note that this option alone +doesn\(aqt turn on any warnings; it only affects those warnings that have +been turned on so far or will be turned on later. +.TP +.B \fB\-W\fP +Turn on a \fBwarning\fP\&. +.TP +.B \fB\-Wno\-\fP +Turn off a \fBwarning\fP\&. +.TP +.B \fB\-Werror\-\fP +Turn on a \fBwarning\fP and treat it as an error (this implies \fB\-W\fP). +.TP +.B \fB\-Wno\-error\-\fP +Don\(aqt treat this particular \fBwarning\fP as an error. This doesn\(aqt turn off +the warning itself. +.UNINDENT +.INDENT 0.0 .TP .B \fB\-Wcondition\-order\fP Warn if the generated program makes implicit -assumptions about condition numbering. One should use either \fB\-t, \-\-type\-header\fP option or -\fB/*!types:re2c*/\fP directive to generate mapping of condition names to numbers and use -autogenerated condition names. +assumptions about condition numbering. You should use either the \fB\-t, \-\-type\-header\fP option or +the \fB/*!types:re2c*/\fP directive to generate a mapping of condition names to numbers and then use +the autogenerated condition names. .TP .B \fB\-Wempty\-character\-class\fP -Warn if regular expression contains empty -character class. From the rational point of view trying to match empty +Warn if a regular expression contains an empty +character class. Rationally, trying to match an empty character class makes no sense: it should always fail. However, for -backwards compatibility reasons \fBre2c\fP allows empty character class and -treats it as empty string. Use \fB\-\-empty\-class\fP option to change default -behaviour. +backwards compatibility reasons, \fBre2c\fP allows empty character classes and +treats them as empty strings. Use the \fB\-\-empty\-class\fP option to change the default +behavior. .TP .B \fB\-Wmatch\-empty\-string\fP -Warn if regular expression in a rule is -nullable (matches empty string). If DFA runs in a loop and empty match -is unintentional (input position in not advanced manually), lexer may -get stuck in eternal loop. +Warn if a regular expression in a rule is +nullable (matches an empty string). If the DFA runs in a loop and an empty match +is unintentional (the input position in not advanced manually), the lexer may +get stuck in an infinite loop. .TP .B \fB\-Wswapped\-range\fP -Warn if range lower bound is greater that upper -bound. Default \fBre2c\fP behaviour is to silently swap range bounds. +Warn if the lower bound of a range is greater than its upper +bound. The default behavior is to silently swap the range bounds. .TP .B \fB\-Wundefined\-control\-flow\fP Warn if some input strings cause undefined -control flow in lexer (the faulty patterns are reported). This is the -most dangerous and common mistake. It can be easily fixed by adding -default rule \fB*\fP (this rule has the lowest priority, matches any code unit and consumes +control flow in the lexer (the faulty patterns are reported). This is the +most dangerous and most common mistake. It can be easily fixed by adding +the default rule (\fB*\fP) (this rule has the lowest priority, matches any code unit, and consumes exactly one code unit). .TP +.B \fB\-Wunreachable\-rules\fP +Warn about rules that are shadowed by other rules and will never match. +.TP .B \fB\-Wuseless\-escape\fP Warn if a symbol is escaped when it shouldn\(aqt be. -By default re2c silently ignores escape, but this may as well indicate a -typo or an error in escape sequence. +By default, re2c silently ignores such escapes, but this may as well indicate a +typo or error in the escape sequence. .UNINDENT .SH INTERFACE CODE .sp The user must supply interface code either in the form of C/C++ code (macros, functions, variables, etc.) or in the form of \fBINPLACE CONFIGURATIONS\fP\&. Which symbols must be defined and which are optional -depends on a particular use case. +depends on the particular use case. .INDENT 0.0 .TP .B \fBYYCONDTYPE\fP -In \fB\-c\fP mode you can use \fB\-t\fP to generate a file that +In \fB\-c\fP mode, you can use \fB\-t\fP to generate a file that contains the enumeration used as conditions. Each of the values refers to a condition of a rule set. .TP @@ -282,8 +313,8 @@ expressions. .TP .B \fBYYCTYPE\fP Type used to hold an input symbol (code unit). Usually -\fBchar\fP or \fBunsigned char\fP for ASCII, EBCDIC and UTF\-8, \fBunsigned short\fP -for UTF\-16 or UCS\-2 and \fBunsigned int\fP for UTF\-32. +\fBchar\fP or \fBunsigned char\fP for ASCII, EBCDIC or UTF\-8, or \fIunsigned short\fP +for UTF\-16 or UCS\-2, or \fBunsigned int\fP for UTF\-32. .TP .B \fBYYCURSOR\fP l\-value of type \fBYYCTYPE *\fP that points to the current input symbol. The generated code advances @@ -293,7 +324,7 @@ will point to the first character of the following token. .TP .B \fBYYDEBUG (state, current)\fP This is only needed if the \fB\-d\fP flag was -specified. It allows one to easily debug the generated parser by calling a +specified. It allows easy debugging of the generated parser by calling a user defined function for every state. The function should have the following signature: \fBvoid YYDEBUG (int state, char current)\fP\&. The first parameter receives the state or \-1 and the second parameter receives the @@ -302,16 +333,16 @@ input at the current cursor. .B \fBYYFILL (n)\fP The generated code "calls"" \fBYYFILL (n)\fP when the buffer needs (re)filling: at least \fBn\fP additional characters should be -provided. \fBYYFILL (n)\fP should adjust \fBYYCURSOR\fP, \fBYYLIMIT\fP, \fBYYMARKER\fP +provided. \fBYYFILL (n)\fP should adjust \fBYYCURSOR\fP, \fBYYLIMIT\fP, \fBYYMARKER\fP, and \fBYYCTXMARKER\fP as needed. Note that for typical programming languages \fBn\fP will be the length of the longest keyword plus one. The user can -place a comment of the form \fB/*!max:re2c*/\fP to insert \fBYYMAXFILL\fP definition that is set to the maximum +place a comment of the form \fB/*!max:re2c*/\fP to insert a \fBYYMAXFILL\fP define set to the maximum length value. .TP .B \fBYYGETCONDITION ()\fP This define is used to get the condition prior to -entering the scanner code when using \fB\-c\fP switch. The value must be -initialized with a value from the enumeration \fBYYCONDTYPE\fP type. +entering the scanner code when using the \fB\-c\fP switch. The value must be +initialized with a value from the \fBYYCONDTYPE\fP enumeration type. .TP .B \fBYYGETSTATE ()\fP The user only needs to define this macro if the \fB\-f\fP @@ -324,15 +355,15 @@ case, the scanner will resume operations right after where the last \fBYYFILL (n)\fP was called. .TP .B \fBYYLIMIT\fP -Expression of type \fBYYCTYPE *\fP that marks the end of the buffer \fBYYLIMIT[\-1]\fP +An expression of type \fBYYCTYPE *\fP that marks the end of the buffer \fBYYLIMIT[\-1]\fP is the last character in the buffer). The generated code repeatedly compares \fBYYCURSOR\fP to \fBYYLIMIT\fP to determine when the buffer needs (re)filling. .TP .B \fBYYMARKER\fP -l\-value of type \fBYYCTYPE *\fP\&. +An l\-value of type \fBYYCTYPE *\fP\&. The generated code saves backtracking information in \fBYYMARKER\fP\&. Some -easy scanners might not use this. +simple scanners might not use this. .TP .B \fBYYMAXFILL\fP This will be automatically defined by \fB/*!max:re2c*/\fP blocks as explained above. @@ -349,23 +380,23 @@ flag was specified. In that case, the generated code "calls" \fBYYSETSTATE\fP is a signed integer that uniquely identifies the specific instance of \fBYYFILL (n)\fP that is about to be called. Should the user wish to save the state of the scanner and have \fBYYFILL (n)\fP return to -the caller, all he has to do is store that unique identifer in a -variable. Later, when the scannered is called again, it will call +the caller, all he has to do is store that unique identifier in a +variable. Later, when the scanner is called again, it will call \fBYYGETSTATE ()\fP and resume execution right where it left off. The generated code will contain both \fBYYSETSTATE (s)\fP and \fBYYGETSTATE\fP even -if \fBYYFILL (n)\fP is being disabled. +if \fBYYFILL (n)\fP is disabled. .UNINDENT .SH SYNTAX .sp -Code for \fBre2c\fP consists of a set of \fBRULES\fP, \fBNAMED DEFINITIONS\fP and +Code for \fBre2c\fP consists of a set of \fBRULES\fP, \fBNAMED DEFINITIONS\fP, and \fBINPLACE CONFIGURATIONS\fP\&. .SS RULES .sp -Rules consist of a regular expression (see \fBREGULAR EXPRESSIONS\fP) along with a block of C/C++ code -that is to be executed when the associated regular expression is +Each rule consist of a regular expression (see \fBREGULAR EXPRESSIONS\fP) accompanied with a block of C/C++ code +which is to be executed when the associated regular expression is matched. You can either start the code with an opening curly brace or -the sequence \fB:=\fP\&. When the code with a curly brace then \fBre2c\fP counts the brace depth -and stops looking for code automatically. Otherwise curly braces are not +the sequence \fB:=\fP\&. If you use an opening curly brace, \fBre2c\fP will count brace depth +and stop looking for code automatically. Otherwise, curly braces are not allowed and \fBre2c\fP stops looking for code at the first line that does not begin with whitespace. If two or more rules overlap, the first rule is preferred. @@ -377,7 +408,7 @@ is preferred. .UNINDENT .UNINDENT .sp -There is one special rule: default rule \fB*\fP +There is one special rule: the default rule (\fB*\fP) .INDENT 0.0 .INDENT 3.5 \fB* { C/C++ code }\fP @@ -386,25 +417,25 @@ There is one special rule: default rule \fB*\fP .UNINDENT .UNINDENT .sp -Note that default rule \fB*\fP differs from \fB[^]\fP: default rule has the lowest priority, -matches any code unit (either valid or invalid) and always consumes one character; -while \fB[^]\fP matches any valid code point (not code unit) and can consume multiple -code units. In fact, when variable\-length encoding is used, \fB*\fP -is the only possible way to match invalid input character (see \fBENCODINGS\fP for details). -.sp -If \fB\-c\fP is active then each regular expression is preceded by a list -of comma separated condition names. Besides normal naming rules there -are two special cases: \fB<*>\fP (such rules are merged to all conditions) -and \fB<>\fP (such the rule cannot have an associated regular expression, -its code is merged to all actions). Non empty rules may further more specify the new -condition. In that case \fBre2c\fP will generate the necessary code to +Note that the default rule (\fB*\fP) differs from \fB[^]\fP: the default rule has the lowest priority, +matches any code unit (either valid or invalid) and always consumes exactly one character. +\fB[^]\fP, on the other hand, matches any valid code point (not the same as a code unit) and can consume multiple +code units. In fact, when a variable\-length encoding is used, \fB*\fP +is the only possible way to match an invalid input character. +.sp +If \fB\-c\fP is active, then each regular expression is preceded by a list +of comma\-separated condition names. Besides the normal naming rules, there +are two special cases: \fB<*>\fP (these rules are merged to all conditions) +and \fB<>\fP (these rules cannot have an associated regular expression; +their code is merged to all actions). Non\-empty rules may furthermore specify the new +condition. In that case, \fBre2c\fP will generate the necessary code to change the condition automatically. Rules can use \fB:=>\fP as a shortcut to automatically generate code that not only sets the new condition state but also continues execution with the new state. A shortcut rule should not be used in a loop where there is code between the start of the loop and the \fBre2c\fP block unless \fBre2c:cond:goto\fP -is changed to \fBcontinue\fP\&. If code is necessary before all rules (though not simple jumps) you -can doso by using \fB\fP pseudo\-rules. +is changed to \fBcontinue\fP\&. If some code is needed before all rules (though not before simple jumps), you +can insert it with \fB\fP pseudo\-rules. .INDENT 0.0 .INDENT 3.5 \fB regular\-expression { C/C++ code }\fP @@ -483,17 +514,17 @@ If \fB\-F\fP is active, then named definitions are also of the form: .INDENT 0.0 .TP .B \fBre2c:condprefix = yyc;\fP -Allows one to specify the prefix used for -condition labels. That is this text is prepended to any condition label +Allows to specify the prefix used for +condition labels. That is, the text to be prepended to condition labels in the generated output file. .TP .B \fBre2c:condenumprefix = yyc;\fP -Allows one to specify the prefix used for -condition values. That is this text is prepended to any condition enum -value in the generated output file. +Allows to specify the prefix used for +condition values. That is, the text to be prepended to condition enum +values in the generated output file. .TP .B \fBre2c:cond:divider = "/* *********************************** */";\fP -Allows one to customize the devider for condition blocks. You can use \fB@@\fP +Allows to customize the divider for condition blocks. You can use \fB@@\fP to put the name of the condition or customize the placeholder using \fBre2c:cond:divider@cond\fP\&. .TP @@ -502,159 +533,148 @@ Specifies the placeholder that will be replaced with the condition name in \fBre2c:cond:divider\fP\&. .TP .B \fBre2c:cond:goto = "goto @@;";\fP -Allows one to customize the condition goto statements used with \fB:=>\fP style rules. You can use \fB@@\fP -to put the name of the condition or ustomize the placeholder using +Allows to customize the condition goto statements used with \fB:=>\fP style rules. You can use \fB@@\fP +to put the name of the condition or customize the placeholder using \fBre2c:cond:goto@cond\fP\&. You can also change this to \fBcontinue;\fP, which would allow you to continue with the next loop cycle including any code -between loop start and re2c block. +between your loop start and your re2c block. .TP .B \fBre2c:cond:goto@cond = @@;\fP -Spcifies the placeholder that will be replaced with the condition label in \fBre2c:cond:goto\fP\&. +Specifies the placeholder that will be replaced with the condition label in \fBre2c:cond:goto\fP\&. .TP .B \fBre2c:indent:top = 0;\fP -Specifies the minimum number of indentation to -use. Requires a numeric value greater than or equal zero. +Specifies the minimum amount of indentation to +use. Requires a numeric value greater than or equal to zero. .TP .B \fBre2c:indent:string = "\et";\fP Specifies the string to use for indentation. Requires a string that should -contain only whitespace unless you need this for external tools. The easiest -way to specify spaces is to enclude them in single or double quotes. -If you do not want any indentation at all you can simply set this to "". +contain only whitespace unless you need something else for external tools. The easiest +way to specify spaces is to enclose them in single or double quotes. +If you do not want any indentation at all, you can simply set this to "". .TP .B \fBre2c:yych:conversion = 0;\fP -When this setting is non zero, then \fBre2c\fP automatically generates -conversion code whenever yych gets read. In this case the type must be +When this setting is non zero, \fBre2c\fP automatically generates +conversion code whenever yych gets read. In this case, the type must be defined using \fBre2c:define:YYCTYPE\fP\&. .TP .B \fBre2c:yych:emit = 1;\fP -Generation of \fByych\fP can be suppressed by setting this to 0. +Set this to zero to suppress the generation of \fIyych\fP\&. .TP .B \fBre2c:yybm:hex = 0;\fP -If set to zero then a decimal table is being used else a hexadecimal table will be generated. +If set to zero, a decimal table will be used. Otherwise, a hexadecimal table will be generated. .TP .B \fBre2c:yyfill:enable = 1;\fP -Set this to zero to suppress generation of \fBYYFILL (n)\fP\&. When using this be sure to verify that the generated -scanner does not read behind input. Allowing this behavior might -introduce sever security issues to you programs. +Set this to zero to suppress the generation of \fBYYFILL (n)\fP\&. When using this, be sure to verify that the generated +scanner does not read behind the end of your input, allowing such behavior might +introduce several security issues to your program. .TP .B \fBre2c:yyfill:check = 1;\fP -This can be set 0 to suppress output of the -pre condition using \fBYYCURSOR\fP and \fBYYLIMIT\fP which becomes useful when +This can be set to 0 to suppress the generations of +\fBYYCURSOR\fP and \fBYYLIMIT\fP based precondition checks. This option is useful when \fBYYLIMIT + YYMAXFILL\fP is always accessible. .TP .B \fBre2c:define:YYFILL = "YYFILL";\fP -Substitution for \fBYYFILL\fP\&. Note -that by default \fBre2c\fP generates argument in braces and semicolon after +Define a substitution for \fBYYFILL\fP\&. Note that by default, +\fBre2c\fP generates an argument in parentheses and a semicolon after \fBYYFILL\fP\&. If you need to make \fBYYFILL\fP an arbitrary statement rather -than a call, set \fBre2c:define:YYFILL:naked\fP to non\-zero and use -\fBre2c:define:YYFILL@len\fP to denote formal parameter inside of \fBYYFILL\fP +than a call, set \fBre2c:define:YYFILL:naked\fP to a non\-zero value and use +\fBre2c:define:YYFILL@len\fP to set a placeholder for the formal parameter inside of your \fBYYFILL\fP body. .TP .B \fBre2c:define:YYFILL@len = "@@";\fP Any occurrence of this text -inside of \fBYYFILL\fP will be replaced with the actual argument. +inside of a \fBYYFILL\fP call will be replaced with the actual argument. .TP .B \fBre2c:yyfill:parameter = 1;\fP -Controls argument in braces after -\fBYYFILL\fP\&. If zero, agrument is omitted. If non\-zero, argument is -generated unless \fBre2c:define:YYFILL:naked\fP is set to non\-zero. +Controls the argument in the parentheses that follow \fBYYFILL\fP\&. If zero, the argument is omitted. +If non\-zero, the argument is generated unless \fBre2c:define:YYFILL:naked\fP is set to non\-zero. .TP .B \fBre2c:define:YYFILL:naked = 0;\fP -Controls argument in braces and -semicolon after \fBYYFILL\fP\&. If zero, both agrument and semicolon are -omitted. If non\-zero, argument is generated unless -\fBre2c:yyfill:parameter\fP is set to zero and semicolon is generated +Controls the argument in the parentheses after \fBYYFILL\fP and +the following semicolon. If zero, both the argument and the semicolon are +omitted. If non\-zero, the argument is generated unless +\fBre2c:yyfill:parameter\fP is set to zero; the semicolon is generated unconditionally. .TP .B \fBre2c:startlabel = 0;\fP -If set to a non zero integer then the start -label of the next scanner blocks will be generated even if not used by -the scanner itself. Otherwise the normal \fByy0\fP like start label is only -being generated if needed. If set to a text value then a label with that +If set to a non zero integer, then the start +label of the next scanner block will be generated even if it isn\(aqt used by +the scanner itself. Otherwise, the normal \fByy0\fP\-like start label is only +generated if needed. If set to a text value, then a label with that text will be generated regardless of whether the normal start label is -being used or not. This setting is being reset to 0 after a start -label has been generated. +used or not. This setting is reset to 0 after a start label has been generated. .TP .B \fBre2c:labelprefix = "yy";\fP -Allows one to change the prefix of numbered -labels. The default is \fByy\fP and can be set any string that is a valid -label. +Allows to change the prefix of numbered +labels. The default is \fByy\fP\&. Can be set any string that is valid in +a label name. .TP .B \fBre2c:state:abort = 0;\fP -When not zero and switch \fB\-f\fP is active then +When not zero and the \fB\-f\fP switch is active, then the \fBYYGETSTATE\fP block will contain a default case that aborts and a \-1 -case is used for initialization. +case will be used for initialization. .TP .B \fBre2c:state:nextlabel = 0;\fP Used when \fB\-f\fP is active to control whether the \fBYYGETSTATE\fP block is followed by a \fByyNext:\fP label line. -Instead of using \fByyNext\fP you can usually also use configuration +Instead of using \fByyNext\fP, you can usually also use configuration \fBstartlabel\fP to force a specific start label or default to \fByy0\fP as -start label. Instead of using a dedicated label it is often better to +a start label. Instead of using a dedicated label, it is often better to separate the \fBYYGETSTATE\fP code from the actual scanner code by placing a \fB/*!getstate:re2c*/\fP comment. .TP .B \fBre2c:cgoto:threshold = 9;\fP -When \fB\-g\fP is active this value specifies -the complexity threshold that triggers generation of jump tables rather -than using nested if\(aqs and decision bitfields. The threshold is compared -against a calculated estimation of if\-s needed where every used bitmap +When \fB\-g\fP is active, this value specifies +the complexity threshold that triggers the generation of jump tables rather +than nested ifs and decision bitfields. The threshold is compared +against a calculated estimation of ifs needed where every used bitmap divides the threshold by 2. .TP .B \fBre2c:yych:conversion = 0;\fP -When the input uses signed characters and -\fB\-s\fP or \fB\-b\fP switches are in effect re2c allows one to automatically convert +When input uses signed characters and the +\fB\-s\fP or \fB\-b\fP switches are in effect, re2c allows automatic conversion to the unsigned character type that is then necessary for its internal -single character. When this setting is zero or an empty string the -conversion is disabled. Using a non zero number the conversion is taken -from \fBYYCTYPE\fP\&. If that is given by an inplace configuration that value -is being used. Otherwise it will be \fB(YYCTYPE)\fP and changes to that -configuration are no longer possible. When this setting is a string the -braces must be specified. Now assuming your input is a \fBchar *\fP -buffer and you are using above mentioned switches you can set +single character. When this setting is zero or an empty string, the +conversion is disabled. If a non zero number is used, the conversion is taken +from \fBYYCTYPE\fP\&. If \fBYYCTYPE\fP is overridden by an inplace configuration setting, that setting is +is used instead of a \fBYYCTYPE\fP cast. Otherwise, it will be \fB(YYCTYPE)\fP and changes to that +configuration are no longer possible. When this setting is a string, it must contain the casting +parentheses. Now assuming your input is a \fBchar *\fP buffer and you are using the above mentioned switches, you can set \fBYYCTYPE\fP to \fBunsigned char\fP and this setting to either 1 or \fB(unsigned char)\fP\&. .TP .B \fBre2c:define:YYCONDTYPE = "YYCONDTYPE";\fP Enumeration used for condition support with \fB\-c\fP mode. .TP .B \fBre2c:define:YYCTXMARKER = "YYCTXMARKER";\fP -Allows one to overwrite the -define \fBYYCTXMARKER\fP and thus avoiding it by setting the value to the -actual code needed. +Replaces the \fBYYCTXMARKER\fP placeholder with the specified identifier. .TP .B \fBre2c:define:YYCTYPE = "YYCTYPE";\fP -Allows one to overwrite the define -\fBYYCTYPE\fP and thus avoiding it by setting the value to the actual code -needed. +Replaces the \fBYYCTYPE\fP placeholder with the specified type. .TP .B \fBre2c:define:YYCURSOR = "YYCURSOR";\fP -Allows one to overwrite the define -\fBYYCURSOR\fP and thus avoiding it by setting the value to the actual code -needed. +Replaces the \fBYYCURSOR\fP placeholder with the specified identifier. .TP .B \fBre2c:define:YYDEBUG = "YYDEBUG";\fP -Allows one to overwrite the define -\fBYYDEBUG\fP and thus avoiding it by setting the value to the actual code -needed. +Replaces the \fBYYDEBUG\fP placeholder with the specified identifier. .TP .B \fBre2c:define:YYGETCONDITION = "YYGETCONDITION";\fP Substitution for -\fBYYGETCONDITION\fP\&. Note that by default \fBre2c\fP generates braces after +\fBYYGETCONDITION\fP\&. Note that by default, \fBre2c\fP generates parentheses after \fBYYGETCONDITION\fP\&. Set \fBre2c:define:YYGETCONDITION:naked\fP to non\-zero to -omit braces. +omit the parentheses. .TP .B \fBre2c:define:YYGETCONDITION:naked = 0;\fP -Controls braces after -\fBYYGETCONDITION\fP\&. If zero, braces are omitted. If non\-zero, braces are +Controls the parentheses after +\fBYYGETCONDITION\fP\&. If zero, the parentheses are omitted. If non\-zero, the parentheses are generated. .TP .B \fBre2c:define:YYSETCONDITION = "YYSETCONDITION";\fP Substitution for -\fBYYSETCONDITION\fP\&. Note that by default \fBre2c\fP generates argument in -braces and semicolon after \fBYYSETCONDITION\fP\&. If you need to make +\fBYYSETCONDITION\fP\&. Note that by default, \fBre2c\fP generates an argument in +parentheses followed by semicolon after \fBYYSETCONDITION\fP\&. If you need to make \fBYYSETCONDITION\fP an arbitrary statement rather than a call, set \fBre2c:define:YYSETCONDITION:naked\fP to non\-zero and use -\fBre2c:define:YYSETCONDITION@cond\fP to denote formal parameter inside of +\fBre2c:define:YYSETCONDITION@cond\fP to denote the formal parameter inside of the \fBYYSETCONDITION\fP body. .TP .B \fBre2c:define:YYSETCONDITION@cond = "@@";\fP @@ -663,74 +683,71 @@ text inside of \fBYYSETCONDITION\fP will be replaced with the actual argument. .TP .B \fBre2c:define:YYSETCONDITION:naked = 0;\fP -Controls argument in braces -and semicolon after \fBYYSETCONDITION\fP\&. If zero, both agrument and -semicolon are omitted. If non\-zero, both argument and semicolon are +Controls the argument in parentheses +and the semicolon after \fBYYSETCONDITION\fP\&. If zero, both the argument and +the semicolon are omitted. If non\-zero, both the argument and the semicolon are generated. .TP .B \fBre2c:define:YYGETSTATE = "YYGETSTATE";\fP Substitution for -\fBYYGETSTATE\fP\&. Note that by default \fBre2c\fP generates braces after +\fBYYGETSTATE\fP\&. Note that by default, \fBre2c\fP generates parentheses after \fBYYGETSTATE\fP\&. Set \fBre2c:define:YYGETSTATE:naked\fP to non\-zero to omit -braces. +the parentheses. .TP .B \fBre2c:define:YYGETSTATE:naked = 0;\fP -Controls braces after -\fBYYGETSTATE\fP\&. If zero, braces are omitted. If non\-zero, braces are +Controls the parentheses that follow +\fBYYGETSTATE\fP\&. If zero, the parentheses are omitted. If non\-zero, they are generated. .TP .B \fBre2c:define:YYSETSTATE = "YYSETSTATE";\fP Substitution for -\fBYYSETSTATE\fP\&. Note that by default \fBre2c\fP generates argument in braces -and semicolon after \fBYYSETSTATE\fP\&. If you need to make \fBYYSETSTATE\fP an +\fBYYSETSTATE\fP\&. Note that by default, \fBre2c\fP generates an argument in parentheses +followed by a semicolon after \fBYYSETSTATE\fP\&. If you need to make \fBYYSETSTATE\fP an arbitrary statement rather than a call, set \fBre2c:define:YYSETSTATE:naked\fP to non\-zero and use \fBre2c:define:YYSETSTATE@cond\fP to denote formal parameter inside of -\fBYYSETSTATE\fP body. +your \fBYYSETSTATE\fP body. .TP .B \fBre2c:define:YYSETSTATE@state = "@@";\fP Any occurrence of this text inside of \fBYYSETSTATE\fP will be replaced with the actual argument. .TP .B \fBre2c:define:YYSETSTATE:naked = 0;\fP -Controls argument in braces and -semicolon after \fBYYSETSTATE\fP\&. If zero, both agrument and semicolon are -omitted. If non\-zero, both argument and semicolon are generated. +Controls the argument in parentheses and the +semicolon after \fBYYSETSTATE\fP\&. If zero, both argument and the semicolon are +omitted. If non\-zero, both the argument and the semicolon are generated. .TP .B \fBre2c:define:YYLIMIT = "YYLIMIT";\fP -Allows one to overwrite the define -\fBYYLIMIT\fP and thus avoiding it by setting the value to the actual code +Replaces the \fBYYLIMIT\fP placeholder with the specified identifier. needed. .TP .B \fBre2c:define:YYMARKER = "YYMARKER";\fP -Allows one to overwrite the define -\fBYYMARKER\fP and thus avoiding it by setting the value to the actual code -needed. +Replaces the \fBYYMARKER\fP placeholder with the specified identifier. .TP .B \fBre2c:label:yyFillLabel = "yyFillLabel";\fP -Allows one to overwrite the name of the label \fByyFillLabel\fP\&. +Overrides the name of the \fByyFillLabel\fP label. .TP .B \fBre2c:label:yyNext = "yyNext";\fP -Allows one to overwrite the name of the label \fByyNext\fP\&. +Overrides the name of the \fByyNext\fP label. .TP .B \fBre2c:variable:yyaccept = yyaccept;\fP -Allows one to overwrite the name of the variable \fByyaccept\fP\&. +Overrides the name of the \fByyaccept\fP variable. .TP .B \fBre2c:variable:yybm = "yybm";\fP -Allows one to overwrite the name of the variable \fByybm\fP\&. +Overrides the name of the \fByybm\fP variable. .TP .B \fBre2c:variable:yych = "yych";\fP -Allows one to overwrite the name of the variable \fByych\fP\&. +Overrides the name of the \fByych\fP variable. .TP .B \fBre2c:variable:yyctable = "yyctable";\fP -When both \fB\-c\fP and \fB\-g\fP are active then \fBre2c\fP uses this variable to generate a static jump table +When both \fB\-c\fP and \fB\-g\fP are active, \fBre2c\fP will use this variable to generate a static jump table for \fBYYGETCONDITION\fP\&. .TP .B \fBre2c:variable:yystable = "yystable";\fP Deprecated. .TP .B \fBre2c:variable:yytarget = "yytarget";\fP -Allows one to overwrite the name of the variable \fByytarget\fP\&. +Overrides the name of the \fByytarget\fP variable. .UNINDENT .SS REGULAR EXPRESSIONS .INDENT 0.0 @@ -739,14 +756,14 @@ Allows one to overwrite the name of the variable \fByytarget\fP\&. literal string \fB"foo"\fP\&. ANSI\-C escape sequences can be used. .TP .B \fB\(aqfoo\(aq\fP -literal string \fB"foo"\fP (characters [a\-zA\-Z] treated -case\-insensitive). ANSI\-C escape sequences can be used. +literal string \fB"foo"\fP (case insensitive for characters [a\-zA\-Z]). +ANSI\-C escape sequences can be used. .TP .B \fB[xyz]\fP -character class; in this case, regular expression matches either \fBx\fP, \fBy\fP, or \fBz\fP\&. +character class; in this case, the regular expression matches \fBx\fP, \fBy\fP, or \fBz\fP\&. .TP .B \fB[abj\-oZ]\fP -character class with a range in it; matches \fBa\fP, \fBb\fP, any letter from \fBj\fP through \fBo\fP or \fBZ\fP\&. +character class with a range in it; matches \fBa\fP, \fBb\fP, any letter from \fBj\fP through \fBo\fP, or \fBZ\fP\&. .TP .B \fB[^class]\fP inverted character class. @@ -771,13 +788,13 @@ optional \fBr\fP\&. \fBr\fP followed by \fBs\fP (concatenation). .TP .B \fBr | s\fP -either \fBr\fP or \fBs\fP (alternative). +\fBr\fP or \fBs\fP (alternative). .TP .B \fBr\fP / \fBs\fP \fBr\fP but only if it is followed by \fBs\fP\&. Note that \fBs\fP is not part of the matched text. This type of regular expression is called -"trailing context". Trailing context can only be the end of a rule -and not part of a named definition. +"trailing context". Trailing context can only be at the end of a rule +and cannot be part of a named definition. .TP .B \fBr{n}\fP matches \fBr\fP exactly \fBn\fP times. @@ -792,7 +809,7 @@ matches \fBr\fP at least \fBn\fP times, but not more than \fBm\fP times. match any character except newline. .TP .B \fBname\fP -matches named definition as specified by \fBname\fP only if \fB\-F\fP is +matches a named definition as specified by \fBname\fP only if \fB\-F\fP is off. If \fB\-F\fP is active then this behaves like it was enclosed in double quotes and matches the string "name". .UNINDENT @@ -800,99 +817,98 @@ quotes and matches the string "name". Character classes and string literals may contain octal or hexadecimal character definitions and the following set of escape sequences: \fB\ea\fP, \fB\eb\fP, \fB\ef\fP, \fB\en\fP, \fB\er\fP, \fB\et\fP, \fB\ev\fP, \fB\e\e\fP\&. An octal character is defined by a backslash -followed by its three octal digits (e.g. \fB\e377\fP). -Hexadecimal characters from 0 to 0xFF are defined by backslash, a lower -cased \fBx\fP and two hexadecimal digits (e.g. \fB\ex12\fP). Hexadecimal characters from 0x100 to 0xFFFF are defined by backslash, a lower cased -\fB\eu\fP or an upper cased \fB\eX\fP and four hexadecimal digits (e.g. \fB\eu1234\fP). -Hexadecimal characters from 0x10000 to 0xFFFFffff are defined by backslash, an upper cased \fB\eU\fP -and eight hexadecimal digits (e.g. \fB\eU12345678\fP). -.sp -The only portable "any" rule is the default rule \fB*\fP\&. +followed by its three octal digits (e.g., \fB\e377\fP). +Hexadecimal characters from 0 to 0xFF are defined by a backslash, a lower +case \fBx\fP and two hexadecimal digits (e.g., \fB\ex12\fP). Hexadecimal characters from 0x100 to 0xFFFF are defined by a backslash, a lower case +\fB\eu\(ga\(gaor an upper case \(ga\(ga\eX\fP, and four hexadecimal digits (e.g., \fB\eu1234\fP). +Hexadecimal characters from 0x10000 to 0xFFFFffff are defined by a backslash, an upper case \fB\eU\fP, +and eight hexadecimal digits (e.g., \fB\eU12345678\fP). +.sp +The only portable "any" rule is the default rule, \fB*\fP\&. .SH SCANNER WITH STORABLE STATES .sp When the \fB\-f\fP flag is specified, \fBre2c\fP generates a scanner that can -store its current state, return to the caller, and later resume +store its current state, return to its caller, and later resume operations exactly where it left off. .sp -The default operation of \fBre2c\fP is a -"pull" model, where the scanner asks for extra input whenever it needs it. However, this mode of operation assumes that the scanner is the "owner" -the parsing loop, and that may not always be convenient. +The default mode of operation in \fBre2c\fP is a +"pull" model, where the scanner asks for extra input whenever it needs it. However, this mode of operation assumes that the scanner is the "owner" of the parsing loop, and that may not always be convenient. .sp Typically, if there is a preprocessor ahead of the scanner in the -stream, or for that matter any other procedural source of data, the -scanner cannot "ask" for more data unless both scanner and source -live in a separate threads. +stream, or for that matter, any other procedural source of data, the +scanner cannot "ask" for more data unless both the scanner and the source +live in separate threads. .sp -The \fB\-f\fP flag is useful for just this situation: it lets users design -scanners that work in a "push" model, i.e. where data is fed to the +The \fB\-f\fP flag is useful exactly for situations like that: it lets users design +scanners that work in a "push" model, i.e., a model where data is fed to the scanner chunk by chunk. When the scanner runs out of data to consume, it -just stores its state, and return to the caller. When more input data is +stores its state and returns to the caller. When more input data is fed to the scanner, it resumes operations exactly where it left off. .sp Changes needed compared to the "pull" model: .INDENT 0.0 .IP \(bu 2 -User has to supply macros \fBYYSETSTATE ()\fP and \fBYYGETSTATE (state)\fP\&. +The user has to supply macros named \fBYYSETSTATE ()\fP and \fBYYGETSTATE (state)\fP\&. .IP \(bu 2 -The \fB\-f\fP option inhibits declaration of \fByych\fP and \fByyaccept\fP\&. So the -user has to declare these. Also the user has to save and restore these. -In the example \fBexamples/push_model/push.re\fP these are declared as -fields of the (C++) class of which the scanner is a method, so they do -not need to be saved/restored explicitly. For C they could e.g. be made -macros that select fields from a structure passed in as parameter. +The \fB\-f\fP option inhibits declaration of \fByych\fP and \fByyaccept\fP, so the +user has to declare them and save and restore them where required. +In the \fBexamples/push_model/push.re\fP example, these are declared as +fields of a (C++) class of which the scanner is a method, so they do +not need to be saved/restored explicitly. For C, they could, e.g., be made +macros that select fields from a structure passed in as a parameter. Alternatively, they could be declared as local variables, saved with -\fBYYFILL (n)\fP when it decides to return and restored at entry to the +\fBYYFILL (n)\fP when it decides to return and restored upon entering the function. Also, it could be more efficient to save the state from \fBYYFILL (n)\fP because \fBYYSETSTATE (state)\fP is called unconditionally. -\fBYYFILL (n)\fP however does not get \fBstate\fP as parameter, so we would have +\fBYYFILL (n)\fP however does not get \fBstate\fP as a parameter, so we would have to store state in a local variable by \fBYYSETSTATE (state)\fP\&. .IP \(bu 2 Modify \fBYYFILL (n)\fP to return (from the function calling it) if more input is needed. .IP \(bu 2 -Modify caller to recognise if more input is needed and respond appropriately. +Modify the caller to recognize if more input is needed and respond appropriately. .IP \(bu 2 The generated code will contain a switch block that is used to -restores the last state by jumping behind the corrspoding \fBYYFILL (n)\fP -call. This code is automatically generated in the epilog of the first \fB/*!re2c */\fP +restore the last state by jumping behind the corresponding \fBYYFILL (n)\fP +call. This code is automatically generated in the epilogue of the first \fB/*!re2c */\fP block. It is possible to trigger generation of the \fBYYGETSTATE ()\fP block earlier by placing a \fB/*!getstate:re2c*/\fP comment. This is especially useful when the scanner code should be wrapped inside a loop. .UNINDENT .sp -Please see \fBexamples/push_model/push.re\fP for "push" model scanner. The -generated code can be tweaked using inplace configurations \fBstate:abort\fP +Please see \fBexamples/push_model/push.re\fP for an example of a "push" model scanner. The +generated code can be tweaked with inplace configurations \fBstate:abort\fP and \fBstate:nextlabel\fP\&. .SH SCANNER WITH CONDITION SUPPORT .sp -You can preceed regular expressions with a list of condition names when -using the \fB\-c\fP switch. In this case \fBre2c\fP generates scanner blocks for -each conditon. Where each of the generated blocks has its own +You can precede regular expressions with a list of condition names when +using the \fB\-c\fP switch. \fBre2c\fP will then generate a scanner block for +each condition, and each of the generated blocks will have its own precondition. The precondition is given by the interface define \fBYYGETCONDITON()\fP and must be of type \fBYYCONDTYPE\fP\&. .sp There are two special rule types. First, the rules of the condition \fB<*>\fP -are merged to all conditions (note that they have lower priority than -other rules of that condition). And second the empty condition list -allows one to provide a code block that does not have a scanner part. -Meaning it does not allow any regular expression. The condition value +are merged to all conditions (note that they have a lower priority than +other rules of that condition). And second, the empty condition list +allows to provide a code block that does not have a scanner part, +meaning it does not allow any regular expressions. The condition value referring to this special block is always the one with the enumeration value 0. This way the code of this special rule can be used to initialize a scanner. It is in no way necessary to have these rules: but sometimes it is helpful to have a dedicated uninitialized condition state. .sp -Non empty rules allow one to specify the new condition, which makes them -transition rules. Besides generating calls for the define -\fBYYSETCONDTITION\fP no other special code is generated. +Non empty rules allow to specify the new condition, which makes them +transition rules. Besides generating calls for the +\fBYYSETCONDTITION\fP define, no other special code is generated. .sp -There is another kind of special rules that allow one to prepend code to any +There is another kind of special rule that allows to prepend code to any code block of all rules of a certain set of conditions or to all code -blocks to all rules. This can be helpful when some operation is common -among rules. For instance this can be used to store the length of the +blocks of all rules. This can be helpful when some operation is common +among rules. For instance, this can be used to store the length of the scanned string. These special setup rules start with an exclamation mark followed by either a list of conditions \fB\fP or a star \fB\fP\&. When \fBre2c\fP generates the code for a rule whose state does not have a -setup rule and a star\(aqd setup rule is present, than that code will be +setup rule and a starred setup rule is present, the starred setup code will be used as setup code. .SH ENCODINGS .sp @@ -900,73 +916,66 @@ used as setup code. UCS\-2 (\fB\-w\fP), UTF\-16 (\fB\-x\fP), UTF\-32 (\fB\-u\fP) and UTF\-8 (\fB\-8\fP). See also inplace configuration \fBre2c:flags\fP\&. .sp -The following concepts should be clarified when talking about encoding. -Code point is an abstract number, which represents single encoding -symbol. Code unit is the smallest unit of memory, which is used in the +The following concepts should be clarified when talking about encodings. +A \fIcode point\fP is an abstract number that represents a single symbol. +A \fIcode unit\fP is the smallest unit of memory, which is used in the encoded text (it corresponds to one character in the input stream). One -or more code units can be needed to represent a single code point, -depending on the encoding. In fixed\-length encoding, each code point -is represented with equal number of code units. In variable\-length -encoding, different code points can be represented with different number +or more code units may be needed to represent a single code point, +depending on the encoding. In a \fIfixed\-length\fP encoding, each code point +is represented with an equal number of code units. In \fIvariable\-length\fP +encodings, different code points can be represented with different number of code units. .INDENT 0.0 -.TP -.B ASCII -is a fixed\-length encoding. Its code space includes 0x100 -code points, from 0 to 0xFF. One code point is represented with exactly one -1\-byte code unit, which has the same value as the code point. Size of +.IP \(bu 2 +ASCII is a fixed\-length encoding. Its code space includes 0x100 +code points, from 0 to 0xFF. A code point is represented with exactly one +1\-byte code unit, which has the same value as the code point. The size of \fBYYCTYPE\fP must be 1 byte. -.TP -.B EBCDIC -is a fixed\-length encoding. Its code space includes 0x100 -code points, from 0 to 0xFF. One code point is represented with exactly -one 1\-byte code unit, which has the same value as the code point. Size +.IP \(bu 2 +EBCDIC is a fixed\-length encoding. Its code space includes 0x100 +code points, from 0 to 0xFF. A code point is represented with exactly +one 1\-byte code unit, which has the same value as the code point. The size of \fBYYCTYPE\fP must be 1 byte. -.TP -.B UCS\-2 -is a fixed\-length encoding. Its code space includes 0x10000 +.IP \(bu 2 +UCS\-2 is a fixed\-length encoding. Its code space includes 0x10000 code points, from 0 to 0xFFFF. One code point is represented with exactly one 2\-byte code unit, which has the same value as the code -point. Size of \fBYYCTYPE\fP must be 2 bytes. -.TP -.B UTF\-16 -is a variable\-length encoding. Its code space includes all +point. The size of \fBYYCTYPE\fP must be 2 bytes. +.IP \(bu 2 +UTF\-16 is a variable\-length encoding. Its code space includes all Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One -code point is represented with one or two 2\-byte code units. Size of +code point is represented with one or two 2\-byte code units. The size of \fBYYCTYPE\fP must be 2 bytes. -.TP -.B UTF\-32 -is a fixed\-length encoding. Its code space includes all +.IP \(bu 2 +UTF\-32 is a fixed\-length encoding. Its code space includes all Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One -code point is represented with exactly one 4\-byte code unit. Size of +code point is represented with exactly one 4\-byte code unit. The size of \fBYYCTYPE\fP must be 4 bytes. -.TP -.B UTF\-8 -is a variable\-length encoding. Its code space includes all +.IP \(bu 2 +UTF\-8 is a variable\-length encoding. Its code space includes all Unicode code points, from 0 to 0xD7FF and from 0xE000 to 0x10FFFF. One -code point is represented with sequence of one, two, three or four -1\-byte code units. Size of \fBYYCTYPE\fP must be 1 byte. +code point is represented with a sequence of one, two, three, or four +1\-byte code units. The size of \fBYYCTYPE\fP must be 1 byte. .UNINDENT .sp In Unicode, values from range 0xD800 to 0xDFFF (surrogates) are not -valid Unicode code points, any encoded sequence of code units, that +valid Unicode code points. Any encoded sequence of code units that would map to Unicode code points in the range 0xD800\-0xDFFF, is ill\-formed. The user can control how \fBre2c\fP treats such ill\-formed -sequences with \fB\-\-encoding\-policy \fP flag (see \fBOPTIONS\fP -for full explanation). -.sp -For some encodings, there are code units, that never occur in valid -encoded stream (e.g. 0xFF byte in UTF\-8). If the generated scanner must -check for invalid input, the only true way to do so is to use default -rule \fB*\fP\&. Note, that full range rule \fB[^]\fP won\(aqt catch invalid code units when variable\-length encoding is used -(\fB[^]\fP means "all valid code points", while default rule \fB*\fP means "all possible code units"). +sequences with the \fB\-\-encoding\-policy \fP switch. +.sp +For some encodings, there are code units that never occur in a valid +encoded stream (e.g., 0xFF byte in UTF\-8). If the generated scanner must +check for invalid input, the only correct way to do so is to use the default +rule (\fB*\fP). Note that the full range rule (\fB[^]\fP) won\(aqt catch invalid code units when a variable\-length encoding is used +(\fB[^]\fP means "any valid code point", whereas the default rule (\fB*\fP) means "any possible code unit"). .SH GENERIC INPUT API .sp -\fBre2c\fP usually operates on input using pointer\-like primitives -\fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP and \fBYYLIMIT\fP\&. +\fBre2c\fP usually operates on input with pointer\-like primitives +\fBYYCURSOR\fP, \fBYYMARKER\fP, \fBYYCTXMARKER\fP, and \fBYYLIMIT\fP\&. .sp -Generic input API (enabled with \fB\-\-input custom\fP switch) allows one to -customize input operations. In this mode, \fBre2c\fP will express all +The generic input API (enabled with the \fB\-\-input custom\fP switch) allows +customizing input operations. In this mode, \fBre2c\fP will express all operations on input in terms of the following primitives: .INDENT 0.0 .INDENT 3.5 @@ -983,19 +992,19 @@ _ T{ \fBYYSKIP ()\fP T} T{ -advance to the next character +advance to next character T} _ T{ \fBYYBACKUP ()\fP T} T{ -backup current input position +back up current input position T} _ T{ \fBYYBACKUPCTX ()\fP T} T{ -backup current input position for trailing context +back up current input position for trailing context T} _ T{ @@ -1029,7 +1038,7 @@ A couple of useful links that provide some examples: .UNINDENT .SH SEE ALSO .sp -You can find more information about \fBre2c\fP on the website: \fI\%http://re2c.org\fP\&. +You can find more information about \fBre2c\fP at: \fI\%http://re2c.org\fP\&. See also: flex(1), lex(1), quex (\fI\%http://quex.sourceforge.net\fP). .SH AUTHORS .sp @@ -1048,6 +1057,6 @@ Emmanuel Mogenet \fI\%mgix@mgix.com\fP Ulya Trofimovich \fI\%skvadrik@gmail.com\fP .SH VERSION INFORMATION .sp -This manpage describes \fBre2c\fP version 0.16, package date 21 Jan 2016. +This manpage describes \fBre2c\fP version 0.16, package date 09 Apr 2017. .\" Generated by docutils manpage writer. . diff --git a/re2c/doc/manpage.rst.in b/re2c/doc/manpage.rst.in index 1021b22e..521f229d 100644 --- a/re2c/doc/manpage.rst.in +++ b/re2c/doc/manpage.rst.in @@ -25,13 +25,13 @@ control and customize the generated DFA. OPTIONS ------- -.. include:: manual/options/options_list.rst +.. include:: @top_srcdir@/doc/manual/options/options_list.rst -.. include:: manual/warnings/warnings_general.rst +.. include:: @top_srcdir@/doc/manual/warnings/warnings_general.rst -.. include:: manual/warnings/warnings_general.rst +.. include:: @top_srcdir@/doc/manual/warnings/warnings_general.rst -.. include:: manual/warnings/warnings_list.rst +.. include:: @top_srcdir@/doc/manual/warnings/warnings_list.rst INTERFACE CODE @@ -42,7 +42,7 @@ The user must supply interface code either in the form of C/C++ code Which symbols must be defined and which are optional depends on the particular use case. -.. include:: manual/syntax/interface.rst_ +.. include:: @top_srcdir@/doc/manual/syntax/interface.rst_ SYNTAX @@ -66,7 +66,7 @@ allowed and ``re2c`` stops looking for code at the first line that does not begin with whitespace. If two or more rules overlap, the first rule is preferred. -.. include:: manual/syntax/rules.rst_ +.. include:: @top_srcdir@/doc/manual/syntax/rules.rst_ NAMED DEFINITIONS @@ -85,38 +85,38 @@ If ``-F`` is active, then named definitions are also of the form: INPLACE CONFIGURATIONS ~~~~~~~~~~~~~~~~~~~~~~ -.. include:: manual/syntax/configurations.rst_ +.. include:: @top_srcdir@/doc/manual/syntax/configurations.rst_ REGULAR EXPRESSIONS ~~~~~~~~~~~~~~~~~~~ -.. include:: manual/syntax/regular_expressions.rst_ +.. include:: @top_srcdir@/doc/manual/syntax/regular_expressions.rst_ SCANNER WITH STORABLE STATES ---------------------------- -.. include:: manual/features/state/state.rst_ +.. include:: @top_srcdir@/doc/manual/features/state/state.rst_ SCANNER WITH CONDITION SUPPORT ------------------------------ -.. include:: manual/features/conditions/conditions.rst_ +.. include:: @top_srcdir@/doc/manual/features/conditions/conditions.rst_ ENCODINGS --------- -.. include:: manual/features/encodings/encodings.rst_ +.. include:: @top_srcdir@/doc/manual/features/encodings/encodings.rst_ GENERIC INPUT API ----------------- -.. include:: manual/features/generic_api/generic_api.rst_ +.. include:: @top_srcdir@/doc/manual/features/generic_api/generic_api.rst_ A couple of useful links that provide some examples: -- 2.40.0