See the file COPYING for copying conditions
+* version 2.5.8
+
+** a new --posix option generates scanners with posix-style abc{1,3}
+ compatible parsing, see manual for the screwy details
+
* version 2.5.7
+** configure.in now includes a call to AC_PREREQ to enforce the
+ requirement for autoconf at least 2.50 (This only effects you if
+ you're doing flex development.)
+
** configure now uses autoconf's versioning information and configure
-** --help reports the bug-reporting address for flex
+ --help reports the bug-reporting address for flex
** test suite now only reports success versus failure; reporting
skipped is problematic under the current setup
* the manual:
** integrate the items in the faqs/ directory into the manual or code
- or similar
+ or similar (millaway is working on this; much of the FAQS directory
+ is old or uninformative)
*** create a section on flex design, features, etc.
-* repackage the distribution
+* address lex-replacement: document or provide an option through
+ configure for creating lex and libl.a files (but remember this has
+ posix implications)
-** address lex-replacement: document or provide an option through
- configure for creating lex and libl.a files
-
-*** decide what to do about abc{1,3} being broken in the posix standard
+* getext
** update gettext support from gettext-0.11 to gettext-0.11.2
** make sure all flex modules use gettext translation facilities
-** subdirectories
+*subdirectories
-*** in examples/manual, integrate the Makefile.examples into the
+** in examples/manual, integrate the Makefile.examples into the
Makefile.am
* test suite
tests/. millaway has assigned the rights to the test suite to me
and so the test suite will be under the flex license.
-* generic coding issues
+* generic coding
** move as much skeleton code as possible out of gen.c and into
flex.skl
@cindex patterns, precedence of operators
The regular expressions listed above are grouped according to
precedence, from highest precedence at the top to lowest at the bottom.
-Those grouped together have equal precedence. For example,
+Those grouped together have equal precedence (see special note on the
+precedence of the repeat operator, @samp{@{@}}, under the documentation
+for the @samp{--posix} POSIX compliance option). For example,
@exindex patterns, grouping and precedence
@example
@item -V, --version
prints the version number to @file{stdout} and exits.
+@item -X, --posix
+turns on maximum compatibility with the POSIX 1003.2-1992 definition of
+@code{lex}. Since @code{flex} was originally designed to implement the
+POSIX definition of @code{lex} this generally involves very few changes
+in behavior. At the current writing the known differences between
+@code{flex} and the POSIX standard are:
+
+@itemize
+@item
+In POSIX and AT&T @code{lex}, the repeat operator, @samp{@{@}}, has lower
+precedence than concatenation (thus @samp{ab@{3@}} yields @samp{ababab}).
+Most POSIX utilities use an Extended Regular Expression (ERE) precedence
+that has the precedence of the repeat operator higher than concatenation
+(which causes @samp{ab@{3@}} to yield @samp{abbb}). By default, @code{flex}
+places the precedence of the repeat operator higher than concatenation
+which matches the ERE processing of other POSIX utilities. When either
+@samp{--posix} or @samp{-l} are specified, @code{flex} will use the
+traditional AT&T and POSIX-compliant precedence for the repeat operator
+where concatenation has higher precedence than the repeat operator.
+@end itemize
+
@item -7, --7bit
instructs @code{flex} to generate a 7-bit scanner, i.e., one which can
only recognize 7-bit characters in its input. The advantage of using
@item
The precedence of the @samp{@{,@}} (numeric range) operator is
-different. @code{lex} interprets @samp{abc@{1,3@}} as match one, two,
+different. The AT&T and POSIX specifications of @code{lex}
+interpret @samp{abc@{1,3@}} as match one, two,
or three occurrences of @samp{abc}'', whereas @code{flex} interprets it
as ``match @samp{ab} followed by one, two, or three occurrences of
-@samp{c}''. The former is in agreement with the POSIX specification.
+@samp{c}''. The @samp{-l} and @samp{--posix} options do away with this
+incompatibility.
@item
The precedence of the @samp{^} operator is different. @code{lex}
* interactive - if true (-I), generate an interactive scanner
* caseins - if true (-i), generate a case-insensitive scanner
* lex_compat - if true (-l), maximize compatibility with AT&T lex
+ * posix_compat - if true (-X), maximize compatibility with POSIX lex
* do_yylineno - if true, generate code to maintain yylineno
* useecs - if true (-Ce flag), use equivalence classes
* fulltbl - if true (-Cf flag), don't compress the DFA state table
*/
extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-extern int interactive, caseins, lex_compat, do_yylineno;
+extern int interactive, caseins, lex_compat, posix_compat, do_yylineno;
extern int useecs, fulltbl, usemecs, fullspd;
extern int gen_line_dirs, performance_report, backing_up_report;
extern int reentrant, reentrant_bison_pure;
/* these globals are all defined and commented in flexdef.h */
int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-int interactive, caseins, lex_compat, do_yylineno, useecs, fulltbl, usemecs;
+int interactive, caseins, lex_compat, posix_compat, do_yylineno, useecs, fulltbl, usemecs;
int fullspd, gen_line_dirs, performance_report, backing_up_report;
int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap, csize;
int reentrant, reentrant_bison_pure;
putc( 'i', stderr );
if ( lex_compat )
putc( 'l', stderr );
+ if ( posix_compat )
+ putc( 'X', stderr );
if ( performance_report > 0 )
putc( 'p', stderr );
if ( performance_report > 1 )
scanopt_t sopt;
printstats = syntaxerror = trace = spprdflt = caseins = false;
- lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
+ lex_compat = posix_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
fullspd = long_align = nowarn = yymore_used = continued_action = false;
do_yylineno = yytext_is_array = in_rule = reject = do_stdinit = false;
yymore_really_used = reject_really_used = unspecified;
lex_compat = true;
break;
+ case OPT_POSIX_COMPAT:
+ posix_compat = true;
+ break;
+
case OPT_MAIN:
buf_strdefine(&userdef_buf, "YY_MAIN", "1");
do_yywrap = false;
" -B, --batch generate batch scanner (opposite of -I)\n"
" -i, --case-insensitive ignore case in patterns\n"
" -l, --lex-compat maximal compatibility with original lex\n"
+" -X, --posix-compat maximal compatibility with POSIX lex\n"
" -I, --interactive generate interactive scanner (opposite of -B)\n"
" --yylineno track line count in yylineno\n"
{"--interactive", OPT_INTERACTIVE,0},/* Generate interactive scanner (opposite of -B). */
{"-l", OPT_LEX_COMPAT,0},
{"--lex-compat", OPT_LEX_COMPAT,0},/* Maximal compatibility with original lex. */
+{"-X", OPT_POSIX_COMPAT,0},
+{"--posix-compat", OPT_POSIX_COMPAT,0},/* Maximal compatibility with POSIX lex. */
{"-L", OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
{"--noline", OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
{"--main", OPT_MAIN,0}, /* use built-in main() function. */
OPT_HELP,
OPT_INTERACTIVE,
OPT_LEX_COMPAT,
+ OPT_POSIX_COMPAT,
OPT_MAIN,
OPT_META_ECS,
OPT_NEVER_INTERACTIVE,
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation. This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'. The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
%{
/* Copyright (c) 1990 The Regents of the University of California. */
/* All rights reserved. */
| sect1 options
|
| error
- { synerr( "unknown error processing section 1" ); }
+ { synerr( _("unknown error processing section 1") ); }
;
sect1end : SECTEND
{ scinstal( nmstr, xcluflg ); }
| error
- { synerr( "bad start condition list" ); }
+ { synerr( _("bad start condition list") ); }
;
options : OPTION_OP optionlist
}
| error
- { synerr( "unrecognized rule" ); }
+ { synerr( _("unrecognized rule") ); }
;
scon_stk_ptr :
| sconname
| error
- { synerr( "bad start condition list" ); }
+ { synerr( _("bad start condition list") ); }
;
sconname : NAME
}
| re2 re '$'
- { synerr( "trailing context used twice" ); }
+ { synerr( _("trailing context used twice") ); }
| re '$'
{
if ( trlcontxt )
{
- synerr( "trailing context used twice" );
+ synerr( _("trailing context used twice") );
$$ = mkstate( SYM_EPSILON );
}
*/
if ( trlcontxt )
- synerr( "trailing context used twice" );
+ synerr( _("trailing context used twice") );
else
trlcontxt = true;
| singleton
{ $$ = $1; }
+
+ | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+ {
+ varlength = true;
+
+ if ( $3 > $5 || $3 < 0 )
+ {
+ synerr( _("bad iteration values") );
+ $$ = $1;
+ }
+ else
+ {
+ if ( $3 == 0 )
+ {
+ if ( $5 <= 0 )
+ {
+ synerr(
+ _("bad iteration values") );
+ $$ = $1;
+ }
+ else
+ $$ = mkopt(
+ mkrep( $1, 1, $5 ) );
+ }
+ else
+ $$ = mkrep( $1, $3, $5 );
+ }
+ }
+
+ | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+ {
+ varlength = true;
+
+ if ( $3 <= 0 )
+ {
+ synerr( _("iteration value must be positive") );
+ $$ = $1;
+ }
+
+ else
+ $$ = mkrep( $1, $3, INFINITY );
+ }
+
+ | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+ {
+ /* The series could be something like "(foo)",
+ * in which case we have no idea what its length
+ * is, so we punt here.
+ */
+ varlength = true;
+
+ if ( $3 <= 0 )
+ {
+ synerr( _("iteration value must be positive")
+ );
+ $$ = $1;
+ }
+
+ else
+ $$ = link_machines( $1,
+ copysingl( $1, $3 - 1 ) );
+ }
+
;
singleton : singleton '*'
$$ = mkopt( $1 );
}
- | singleton '{' NUMBER ',' NUMBER '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
- synerr( "bad iteration values" );
+ synerr( _("bad iteration values") );
$$ = $1;
}
else
if ( $5 <= 0 )
{
synerr(
- "bad iteration values" );
+ _("bad iteration values") );
$$ = $1;
}
else
}
}
- | singleton '{' NUMBER ',' '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
{
varlength = true;
if ( $3 <= 0 )
{
- synerr( "iteration value must be positive" );
+ synerr( _("iteration value must be positive") );
$$ = $1;
}
$$ = mkrep( $1, $3, INFINITY );
}
- | singleton '{' NUMBER '}'
+ | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
{
/* The singleton could be something like "(foo)",
* in which case we have no idea what its length
if ( $3 <= 0 )
{
- synerr( "iteration value must be positive" );
+ synerr( _("iteration value must be positive") );
$$ = $1;
}
}
if ( $2 > $4 )
- synerr( "negative range in character class" );
+ synerr( _("negative range in character class") );
else
{
input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
interactive interactive = option_sense;
lex-compat lex_compat = option_sense;
+ posix-compat posix_compat = option_sense;
main {
action_define( "YY_MAIN", option_sense );
/* Override yywrap */
^{OPTWS}"<" BEGIN(SC); return '<';
^{OPTWS}"^" return '^';
\" BEGIN(QUOTE); return '"';
- "{"/[[:digit:]] BEGIN(NUM); return '{';
+ "{"/[[:digit:]] {
+ BEGIN(NUM);
+ if ( lex_compat || posix_compat )
+ return BEGIN_REPEAT_POSIX;
+ else
+ return BEGIN_REPEAT_FLEX;
+ }
"$"/([[:blank:]]|{NL}) return '$';
{WS}"%{" {
}
"," return ',';
- "}" BEGIN(SECT2); return '}';
+ "}" {
+ BEGIN(SECT2);
+ if ( lex_compat || posix_compat )
+ return END_REPEAT_POSIX;
+ else
+ return END_REPEAT_FLEX;
+ }
. {
synerr( _( "bad character inside {}'s" ) );