From: Will Estes <wlestes@users.sourceforge.net>
Date: Wed, 19 Jun 2002 13:26:43 +0000 (+0000)
Subject: address typos in NEWS; add --posix option for ERE parsing the way posix wants it... 
X-Git-Tag: flex-2-5-10~57
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5916310e885f01716646b84bd6eecdb9a7d502ae;p=flex

address typos in NEWS; add --posix option for ERE parsing the way posix wants it; update the TODO file
---

diff --git a/NEWS b/NEWS
index 4eb8cf4..bc92806 100644
--- a/NEWS
+++ b/NEWS
@@ -3,10 +3,19 @@ changes between releases of flex
 
 See the file COPYING for copying conditions
 
+* version 2.5.8
+
+** a new --posix option generates scanners with posix-style abc{1,3}
+   compatible parsing, see manual for the screwy details
+
 * version 2.5.7
 
+** configure.in now includes a call to AC_PREREQ to enforce the
+   requirement for autoconf at least 2.50 (This only effects you if
+   you're doing flex development.)
+
 ** configure now uses autoconf's versioning information and configure
-** --help reports the bug-reporting address for flex
+   --help reports the bug-reporting address for flex
 
 ** test suite now only reports success versus failure; reporting
    skipped is problematic under the current setup
diff --git a/TODO b/TODO
index 878f93f..55d8ebe 100644
--- a/TODO
+++ b/TODO
@@ -5,24 +5,24 @@
 * the manual:
 
 ** integrate the items in the faqs/ directory into the manual or code
-  or similar
+  or similar (millaway is working on this; much of the FAQS directory
+  is old or uninformative)
 
 *** create a section on flex design, features, etc.
 
-* repackage the distribution
+* address lex-replacement: document or provide an option through
+   configure for creating lex and libl.a files (but remember this has
+   posix implications)
 
-** address lex-replacement: document or provide an option through
-   configure for creating lex and libl.a files
-
-*** decide what to do about abc{1,3} being broken in the posix standard
+* getext
 
 ** update gettext support from gettext-0.11 to gettext-0.11.2
 
 ** make sure all flex modules use gettext translation facilities
 
-** subdirectories
+*subdirectories
 
-*** in examples/manual, integrate the Makefile.examples into the
+** in examples/manual, integrate the Makefile.examples into the
     Makefile.am
 
 * test suite
@@ -33,7 +33,7 @@
    tests/. millaway has assigned the rights to the test suite to me
    and so the test suite will be under the flex license.
 
-* generic coding issues
+* generic coding
 
 ** move as much skeleton code as possible out of gen.c and into
   flex.skl
diff --git a/flex.texi b/flex.texi
index 5fa04a3..21ff20c 100644
--- a/flex.texi
+++ b/flex.texi
@@ -610,7 +610,9 @@ operators, @samp{-}, @samp{]]}, and, at the beginning of the class, @samp{^}.
 @cindex patterns, precedence of operators
 The regular expressions listed above are grouped according to
 precedence, from highest precedence at the top to lowest at the bottom.
-Those grouped together have equal precedence.  For example,
+Those grouped together have equal precedence (see special note on the
+precedence of the repeat operator, @samp{@{@}}, under the documentation
+for the @samp{--posix} POSIX compliance option).  For example,
 
 @exindex patterns, grouping and precedence
 @example
@@ -2376,6 +2378,27 @@ option is mostly for use in maintaining @code{flex}.
 @item -V, --version
 prints the version number to @file{stdout} and exits. 
 
+@item -X, --posix
+turns on maximum compatibility with the POSIX 1003.2-1992 definition of
+@code{lex}.  Since @code{flex} was originally designed to implement the
+POSIX definition of @code{lex} this generally involves very few changes
+in behavior.  At the current writing the known differences between
+@code{flex} and the POSIX standard are:
+
+@itemize
+@item 
+In POSIX and AT&T @code{lex}, the repeat operator, @samp{@{@}}, has lower
+precedence than concatenation (thus @samp{ab@{3@}} yields @samp{ababab}).
+Most POSIX utilities use an Extended Regular Expression (ERE) precedence
+that has the precedence of the repeat operator higher than concatenation
+(which causes @samp{ab@{3@}} to yield @samp{abbb}).  By default, @code{flex}
+places the precedence of the repeat operator higher than concatenation
+which matches the ERE processing of other POSIX utilities.  When either
+@samp{--posix} or @samp{-l} are specified, @code{flex} will use the
+traditional AT&T and POSIX-compliant precedence for the repeat operator
+where concatenation has higher precedence than the repeat operator.
+@end itemize
+
 @item -7, --7bit
 instructs @code{flex} to generate a 7-bit scanner, i.e., one which can
 only recognize 7-bit characters in its input.  The advantage of using
@@ -4045,10 +4068,12 @@ This is not the case with @code{lex} or the POSIX specification.  The
 
 @item 
 The precedence of the @samp{@{,@}} (numeric range) operator is
-different.  @code{lex} interprets @samp{abc@{1,3@}} as match one, two,
+different.  The AT&T and POSIX specifications of @code{lex}
+interpret @samp{abc@{1,3@}} as match one, two,
 or three occurrences of @samp{abc}'', whereas @code{flex} interprets it
 as ``match @samp{ab} followed by one, two, or three occurrences of
-@samp{c}''.  The former is in agreement with the POSIX specification.
+@samp{c}''.  The @samp{-l} and @samp{--posix} options do away with this
+incompatibility.
 
 @item 
 The precedence of the @samp{^} operator is different.  @code{lex}
diff --git a/flexdef.h b/flexdef.h
index 83843a1..93f7e10 100644
--- a/flexdef.h
+++ b/flexdef.h
@@ -333,6 +333,7 @@
  * interactive - if true (-I), generate an interactive scanner
  * caseins - if true (-i), generate a case-insensitive scanner
  * lex_compat - if true (-l), maximize compatibility with AT&T lex
+ * posix_compat - if true (-X), maximize compatibility with POSIX lex
  * do_yylineno - if true, generate code to maintain yylineno
  * useecs - if true (-Ce flag), use equivalence classes
  * fulltbl - if true (-Cf flag), don't compress the DFA state table
@@ -370,7 +371,7 @@
  */
 
 extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-extern int interactive, caseins, lex_compat, do_yylineno;
+extern int interactive, caseins, lex_compat, posix_compat, do_yylineno;
 extern int useecs, fulltbl, usemecs, fullspd;
 extern int gen_line_dirs, performance_report, backing_up_report;
 extern int reentrant, reentrant_bison_pure;
diff --git a/main.c b/main.c
index b82aee4..275b1e0 100644
--- a/main.c
+++ b/main.c
@@ -54,7 +54,7 @@ static char * basename2 PROTO((char* path, int should_strip_ext));
 
 /* these globals are all defined and commented in flexdef.h */
 int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-int interactive, caseins, lex_compat, do_yylineno, useecs, fulltbl, usemecs;
+int interactive, caseins, lex_compat, posix_compat, do_yylineno, useecs, fulltbl, usemecs;
 int fullspd, gen_line_dirs, performance_report, backing_up_report;
 int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap, csize;
 int reentrant, reentrant_bison_pure;
@@ -746,6 +746,8 @@ int exit_status;
 			putc( 'i', stderr );
 		if ( lex_compat )
 			putc( 'l', stderr );
+		if ( posix_compat )
+			putc( 'X', stderr );
 		if ( performance_report > 0 )
 			putc( 'p', stderr );
 		if ( performance_report > 1 )
@@ -923,7 +925,7 @@ char **argv;
         scanopt_t sopt;
 
 	printstats = syntaxerror = trace = spprdflt = caseins = false;
-	lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
+	lex_compat = posix_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
 	fullspd = long_align = nowarn = yymore_used = continued_action = false;
 	do_yylineno = yytext_is_array = in_rule = reject = do_stdinit = false;
 	yymore_really_used = reject_really_used = unspecified;
@@ -1068,6 +1070,10 @@ char **argv;
                     lex_compat = true;
                     break;
 
+            case OPT_POSIX_COMPAT:
+                    posix_compat = true;
+                    break;
+
             case OPT_MAIN:
                     buf_strdefine(&userdef_buf, "YY_MAIN", "1");
                     do_yywrap = false;
@@ -1743,6 +1749,7 @@ _(
 "  -B, --batch             generate batch scanner (opposite of -I)\n"
 "  -i, --case-insensitive  ignore case in patterns\n"
 "  -l, --lex-compat        maximal compatibility with original lex\n"
+"  -X, --posix-compat      maximal compatibility with POSIX lex\n"
 "  -I, --interactive       generate interactive scanner (opposite of -B)\n"
 "      --yylineno          track line count in yylineno\n"
 
diff --git a/options.c b/options.c
index 2d4ba3a..2091dba 100644
--- a/options.c
+++ b/options.c
@@ -81,6 +81,8 @@ optspec_t  flexopts[] = {
 {"--interactive",       OPT_INTERACTIVE,0},/* Generate interactive scanner (opposite of -B). */
 {"-l",                  OPT_LEX_COMPAT,0},
 {"--lex-compat",        OPT_LEX_COMPAT,0},/* Maximal compatibility with original lex. */
+{"-X",                  OPT_POSIX_COMPAT,0},
+{"--posix-compat",      OPT_POSIX_COMPAT,0},/* Maximal compatibility with POSIX lex. */
 {"-L",                  OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
 {"--noline",            OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
 {"--main",              OPT_MAIN,0}, /* use built-in main() function. */
diff --git a/options.h b/options.h
index 3488c68..afe0e4a 100644
--- a/options.h
+++ b/options.h
@@ -60,6 +60,7 @@ enum flexopt_flag_t {
     OPT_HELP,
     OPT_INTERACTIVE,
     OPT_LEX_COMPAT,
+    OPT_POSIX_COMPAT,
     OPT_MAIN,
     OPT_META_ECS,
     OPT_NEVER_INTERACTIVE,
diff --git a/parse.y b/parse.y
index 0645b7a..849536b 100644
--- a/parse.y
+++ b/parse.y
@@ -6,6 +6,25 @@
 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
 
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation.  This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'.  The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
 %{
 /*  Copyright (c) 1990 The Regents of the University of California. */
 /*  All rights reserved. */
@@ -152,7 +171,7 @@ sect1		:  sect1 startconddecl namelist1
 		|  sect1 options
 		|
 		|  error
-			{ synerr( "unknown error processing section 1" ); }
+			{ synerr( _("unknown error processing section 1") ); }
 		;
 
 sect1end	:  SECTEND
@@ -177,7 +196,7 @@ namelist1	:  namelist1 NAME
 			{ scinstal( nmstr, xcluflg ); }
 
 		|  error
-			{ synerr( "bad start condition list" ); }
+			{ synerr( _("bad start condition list") ); }
 		;
 
 options		:  OPTION_OP optionlist
@@ -304,7 +323,7 @@ flexrule	:  '^' rule
 			}
 
 		|  error
-			{ synerr( "unrecognized rule" ); }
+			{ synerr( _("unrecognized rule") ); }
 		;
 
 scon_stk_ptr	:
@@ -340,7 +359,7 @@ namelist2	:  namelist2 ',' sconname
 		|  sconname
 
 		|  error
-			{ synerr( "bad start condition list" ); }
+			{ synerr( _("bad start condition list") ); }
 		;
 
 sconname	:  NAME
@@ -422,7 +441,7 @@ rule		:  re2 re
 			}
 
 		|  re2 re '$'
-			{ synerr( "trailing context used twice" ); }
+			{ synerr( _("trailing context used twice") ); }
 
 		|  re '$'
 			{
@@ -435,7 +454,7 @@ rule		:  re2 re
 
 			if ( trlcontxt )
 				{
-				synerr( "trailing context used twice" );
+				synerr( _("trailing context used twice") );
 				$$ = mkstate( SYM_EPSILON );
 				}
 
@@ -504,7 +523,7 @@ re2		:  re '/'
 			 */
 
 			if ( trlcontxt )
-				synerr( "trailing context used twice" );
+				synerr( _("trailing context used twice") );
 			else
 				trlcontxt = true;
 
@@ -533,6 +552,69 @@ series		:  series singleton
 
 		|  singleton
 			{ $$ = $1; }
+
+		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+			{
+			varlength = true;
+
+			if ( $3 > $5 || $3 < 0 )
+				{
+				synerr( _("bad iteration values") );
+				$$ = $1;
+				}
+			else
+				{
+				if ( $3 == 0 )
+					{
+					if ( $5 <= 0 )
+						{
+						synerr(
+						_("bad iteration values") );
+						$$ = $1;
+						}
+					else
+						$$ = mkopt(
+							mkrep( $1, 1, $5 ) );
+					}
+				else
+					$$ = mkrep( $1, $3, $5 );
+				}
+			}
+
+		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+			{
+			varlength = true;
+
+			if ( $3 <= 0 )
+				{
+				synerr( _("iteration value must be positive") );
+				$$ = $1;
+				}
+
+			else
+				$$ = mkrep( $1, $3, INFINITY );
+			}
+
+		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+			{
+			/* The series could be something like "(foo)",
+			 * in which case we have no idea what its length
+			 * is, so we punt here.
+			 */
+			varlength = true;
+
+			if ( $3 <= 0 )
+				{
+				  synerr( _("iteration value must be positive")
+					  );
+				$$ = $1;
+				}
+
+			else
+				$$ = link_machines( $1,
+						copysingl( $1, $3 - 1 ) );
+			}
+
 		;
 
 singleton	:  singleton '*'
@@ -554,13 +636,13 @@ singleton	:  singleton '*'
 			$$ = mkopt( $1 );
 			}
 
-		|  singleton '{' NUMBER ',' NUMBER '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
 			{
 			varlength = true;
 
 			if ( $3 > $5 || $3 < 0 )
 				{
-				synerr( "bad iteration values" );
+				synerr( _("bad iteration values") );
 				$$ = $1;
 				}
 			else
@@ -570,7 +652,7 @@ singleton	:  singleton '*'
 					if ( $5 <= 0 )
 						{
 						synerr(
-						"bad iteration values" );
+						_("bad iteration values") );
 						$$ = $1;
 						}
 					else
@@ -582,13 +664,13 @@ singleton	:  singleton '*'
 				}
 			}
 
-		|  singleton '{' NUMBER ',' '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
 			{
 			varlength = true;
 
 			if ( $3 <= 0 )
 				{
-				synerr( "iteration value must be positive" );
+				synerr( _("iteration value must be positive") );
 				$$ = $1;
 				}
 
@@ -596,7 +678,7 @@ singleton	:  singleton '*'
 				$$ = mkrep( $1, $3, INFINITY );
 			}
 
-		|  singleton '{' NUMBER '}'
+		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
 			{
 			/* The singleton could be something like "(foo)",
 			 * in which case we have no idea what its length
@@ -606,7 +688,7 @@ singleton	:  singleton '*'
 
 			if ( $3 <= 0 )
 				{
-				synerr( "iteration value must be positive" );
+				synerr( _("iteration value must be positive") );
 				$$ = $1;
 				}
 
@@ -700,7 +782,7 @@ ccl		:  ccl CHAR '-' CHAR
 				}
 
 			if ( $2 > $4 )
-				synerr( "negative range in character class" );
+				synerr( _("negative range in character class") );
 
 			else
 				{
diff --git a/scan.l b/scan.l
index e86132f..723704c 100644
--- a/scan.l
+++ b/scan.l
@@ -246,6 +246,7 @@ LEXOPT		[aceknopr]
 	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
 	interactive	interactive = option_sense;
 	lex-compat	lex_compat = option_sense;
+	posix-compat	posix_compat = option_sense;
 	main		{
 			action_define( "YY_MAIN", option_sense );
             /* Override yywrap */
@@ -362,7 +363,13 @@ LEXOPT		[aceknopr]
 	^{OPTWS}"<"	BEGIN(SC); return '<';
 	^{OPTWS}"^"	return '^';
 	\"		BEGIN(QUOTE); return '"';
-	"{"/[[:digit:]]	BEGIN(NUM); return '{';
+	"{"/[[:digit:]]	{
+			BEGIN(NUM);
+			if ( lex_compat || posix_compat )
+				return BEGIN_REPEAT_POSIX;
+			else
+				return BEGIN_REPEAT_FLEX;
+			}
 	"$"/([[:blank:]]|{NL})	return '$';
 
 	{WS}"%{"		{
@@ -573,7 +580,13 @@ LEXOPT		[aceknopr]
 			}
 
 	","		return ',';
-	"}"		BEGIN(SECT2); return '}';
+	"}"		{
+			BEGIN(SECT2);
+			if ( lex_compat || posix_compat )
+				return END_REPEAT_POSIX;
+			else
+				return END_REPEAT_FLEX;
+			}
 
 	.		{
 			synerr( _( "bad character inside {}'s" ) );