]> granicus.if.org Git - flex/commitdiff
address typos in NEWS; add --posix option for ERE parsing the way posix wants it...
authorWill Estes <wlestes@users.sourceforge.net>
Wed, 19 Jun 2002 13:26:43 +0000 (13:26 +0000)
committerWill Estes <wlestes@users.sourceforge.net>
Wed, 19 Jun 2002 13:26:43 +0000 (13:26 +0000)
NEWS
TODO
flex.texi
flexdef.h
main.c
options.c
options.h
parse.y
scan.l

diff --git a/NEWS b/NEWS
index 4eb8cf4f5ff76cbb8956ddc4a0e320b9eaf077c9..bc9280610cb9f86bd2da06b21ef0ab3bef75affc 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,10 +3,19 @@ changes between releases of flex
 
 See the file COPYING for copying conditions
 
+* version 2.5.8
+
+** a new --posix option generates scanners with posix-style abc{1,3}
+   compatible parsing, see manual for the screwy details
+
 * version 2.5.7
 
+** configure.in now includes a call to AC_PREREQ to enforce the
+   requirement for autoconf at least 2.50 (This only effects you if
+   you're doing flex development.)
+
 ** configure now uses autoconf's versioning information and configure
-** --help reports the bug-reporting address for flex
+   --help reports the bug-reporting address for flex
 
 ** test suite now only reports success versus failure; reporting
    skipped is problematic under the current setup
diff --git a/TODO b/TODO
index 878f93f5c71fad49c926a462e9c5ae25b4e2842d..55d8ebef0ee4e46b38f698ecb69817d563075c94 100644 (file)
--- a/TODO
+++ b/TODO
@@ -5,24 +5,24 @@
 * the manual:
 
 ** integrate the items in the faqs/ directory into the manual or code
-  or similar
+  or similar (millaway is working on this; much of the FAQS directory
+  is old or uninformative)
 
 *** create a section on flex design, features, etc.
 
-* repackage the distribution
+* address lex-replacement: document or provide an option through
+   configure for creating lex and libl.a files (but remember this has
+   posix implications)
 
-** address lex-replacement: document or provide an option through
-   configure for creating lex and libl.a files
-
-*** decide what to do about abc{1,3} being broken in the posix standard
+* getext
 
 ** update gettext support from gettext-0.11 to gettext-0.11.2
 
 ** make sure all flex modules use gettext translation facilities
 
-*subdirectories
+*subdirectories
 
-*** in examples/manual, integrate the Makefile.examples into the
+** in examples/manual, integrate the Makefile.examples into the
     Makefile.am
 
 * test suite
@@ -33,7 +33,7 @@
    tests/. millaway has assigned the rights to the test suite to me
    and so the test suite will be under the flex license.
 
-* generic coding issues
+* generic coding
 
 ** move as much skeleton code as possible out of gen.c and into
   flex.skl
index 5fa04a30fa120d739be37f0c082503e9c69bc6e1..21ff20caef8e86d06865f6b179f0f3b72300342d 100644 (file)
--- a/flex.texi
+++ b/flex.texi
@@ -610,7 +610,9 @@ operators, @samp{-}, @samp{]]}, and, at the beginning of the class, @samp{^}.
 @cindex patterns, precedence of operators
 The regular expressions listed above are grouped according to
 precedence, from highest precedence at the top to lowest at the bottom.
-Those grouped together have equal precedence.  For example,
+Those grouped together have equal precedence (see special note on the
+precedence of the repeat operator, @samp{@{@}}, under the documentation
+for the @samp{--posix} POSIX compliance option).  For example,
 
 @exindex patterns, grouping and precedence
 @example
@@ -2376,6 +2378,27 @@ option is mostly for use in maintaining @code{flex}.
 @item -V, --version
 prints the version number to @file{stdout} and exits. 
 
+@item -X, --posix
+turns on maximum compatibility with the POSIX 1003.2-1992 definition of
+@code{lex}.  Since @code{flex} was originally designed to implement the
+POSIX definition of @code{lex} this generally involves very few changes
+in behavior.  At the current writing the known differences between
+@code{flex} and the POSIX standard are:
+
+@itemize
+@item 
+In POSIX and AT&T @code{lex}, the repeat operator, @samp{@{@}}, has lower
+precedence than concatenation (thus @samp{ab@{3@}} yields @samp{ababab}).
+Most POSIX utilities use an Extended Regular Expression (ERE) precedence
+that has the precedence of the repeat operator higher than concatenation
+(which causes @samp{ab@{3@}} to yield @samp{abbb}).  By default, @code{flex}
+places the precedence of the repeat operator higher than concatenation
+which matches the ERE processing of other POSIX utilities.  When either
+@samp{--posix} or @samp{-l} are specified, @code{flex} will use the
+traditional AT&T and POSIX-compliant precedence for the repeat operator
+where concatenation has higher precedence than the repeat operator.
+@end itemize
+
 @item -7, --7bit
 instructs @code{flex} to generate a 7-bit scanner, i.e., one which can
 only recognize 7-bit characters in its input.  The advantage of using
@@ -4045,10 +4068,12 @@ This is not the case with @code{lex} or the POSIX specification.  The
 
 @item 
 The precedence of the @samp{@{,@}} (numeric range) operator is
-different.  @code{lex} interprets @samp{abc@{1,3@}} as match one, two,
+different.  The AT&T and POSIX specifications of @code{lex}
+interpret @samp{abc@{1,3@}} as match one, two,
 or three occurrences of @samp{abc}'', whereas @code{flex} interprets it
 as ``match @samp{ab} followed by one, two, or three occurrences of
-@samp{c}''.  The former is in agreement with the POSIX specification.
+@samp{c}''.  The @samp{-l} and @samp{--posix} options do away with this
+incompatibility.
 
 @item 
 The precedence of the @samp{^} operator is different.  @code{lex}
index 83843a1013a1c16f8f1064a187cf341fe375d174..93f7e1013b7fcf74ca62f29e4b5ffc2a76e177cb 100644 (file)
--- a/flexdef.h
+++ b/flexdef.h
  * interactive - if true (-I), generate an interactive scanner
  * caseins - if true (-i), generate a case-insensitive scanner
  * lex_compat - if true (-l), maximize compatibility with AT&T lex
+ * posix_compat - if true (-X), maximize compatibility with POSIX lex
  * do_yylineno - if true, generate code to maintain yylineno
  * useecs - if true (-Ce flag), use equivalence classes
  * fulltbl - if true (-Cf flag), don't compress the DFA state table
  */
 
 extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-extern int interactive, caseins, lex_compat, do_yylineno;
+extern int interactive, caseins, lex_compat, posix_compat, do_yylineno;
 extern int useecs, fulltbl, usemecs, fullspd;
 extern int gen_line_dirs, performance_report, backing_up_report;
 extern int reentrant, reentrant_bison_pure;
diff --git a/main.c b/main.c
index b82aee4bbca8fcc7a06bed16c3782aa0d5023a01..275b1e065eb2f25607ae39b987355104c63bee49 100644 (file)
--- a/main.c
+++ b/main.c
@@ -54,7 +54,7 @@ static char * basename2 PROTO((char* path, int should_strip_ext));
 
 /* these globals are all defined and commented in flexdef.h */
 int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt;
-int interactive, caseins, lex_compat, do_yylineno, useecs, fulltbl, usemecs;
+int interactive, caseins, lex_compat, posix_compat, do_yylineno, useecs, fulltbl, usemecs;
 int fullspd, gen_line_dirs, performance_report, backing_up_report;
 int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap, csize;
 int reentrant, reentrant_bison_pure;
@@ -746,6 +746,8 @@ int exit_status;
                        putc( 'i', stderr );
                if ( lex_compat )
                        putc( 'l', stderr );
+               if ( posix_compat )
+                       putc( 'X', stderr );
                if ( performance_report > 0 )
                        putc( 'p', stderr );
                if ( performance_report > 1 )
@@ -923,7 +925,7 @@ char **argv;
         scanopt_t sopt;
 
        printstats = syntaxerror = trace = spprdflt = caseins = false;
-       lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
+       lex_compat = posix_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false;
        fullspd = long_align = nowarn = yymore_used = continued_action = false;
        do_yylineno = yytext_is_array = in_rule = reject = do_stdinit = false;
        yymore_really_used = reject_really_used = unspecified;
@@ -1068,6 +1070,10 @@ char **argv;
                     lex_compat = true;
                     break;
 
+            case OPT_POSIX_COMPAT:
+                    posix_compat = true;
+                    break;
+
             case OPT_MAIN:
                     buf_strdefine(&userdef_buf, "YY_MAIN", "1");
                     do_yywrap = false;
@@ -1743,6 +1749,7 @@ _(
 "  -B, --batch             generate batch scanner (opposite of -I)\n"
 "  -i, --case-insensitive  ignore case in patterns\n"
 "  -l, --lex-compat        maximal compatibility with original lex\n"
+"  -X, --posix-compat      maximal compatibility with POSIX lex\n"
 "  -I, --interactive       generate interactive scanner (opposite of -B)\n"
 "      --yylineno          track line count in yylineno\n"
 
index 2d4ba3a69d7a02c9f1037b8ff79be6c9f5135520..2091dba8ee7e834649a46b72b902300d90d8e584 100644 (file)
--- a/options.c
+++ b/options.c
@@ -81,6 +81,8 @@ optspec_t  flexopts[] = {
 {"--interactive",       OPT_INTERACTIVE,0},/* Generate interactive scanner (opposite of -B). */
 {"-l",                  OPT_LEX_COMPAT,0},
 {"--lex-compat",        OPT_LEX_COMPAT,0},/* Maximal compatibility with original lex. */
+{"-X",                  OPT_POSIX_COMPAT,0},
+{"--posix-compat",      OPT_POSIX_COMPAT,0},/* Maximal compatibility with POSIX lex. */
 {"-L",                  OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
 {"--noline",            OPT_NO_LINE,0},/* Suppress #line directives in scanner. */
 {"--main",              OPT_MAIN,0}, /* use built-in main() function. */
index 3488c68d0dfd5843b585e33ca38fccb7dd6f5088..afe0e4a4d1c65fc1181a3ca2dfd4be9829ff9f24 100644 (file)
--- a/options.h
+++ b/options.h
@@ -60,6 +60,7 @@ enum flexopt_flag_t {
     OPT_HELP,
     OPT_INTERACTIVE,
     OPT_LEX_COMPAT,
+    OPT_POSIX_COMPAT,
     OPT_MAIN,
     OPT_META_ECS,
     OPT_NEVER_INTERACTIVE,
diff --git a/parse.y b/parse.y
index 0645b7a03ccdeec972281b5eca45fd0d9867828f..849536b88b0027a9f859d19219d92e6aa4fd86d9 100644 (file)
--- a/parse.y
+++ b/parse.y
@@ -6,6 +6,25 @@
 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
 
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation.  This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'.  The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
 %{
 /*  Copyright (c) 1990 The Regents of the University of California. */
 /*  All rights reserved. */
@@ -152,7 +171,7 @@ sect1               :  sect1 startconddecl namelist1
                |  sect1 options
                |
                |  error
-                       { synerr( "unknown error processing section 1" ); }
+                       { synerr( _("unknown error processing section 1") ); }
                ;
 
 sect1end       :  SECTEND
@@ -177,7 +196,7 @@ namelist1   :  namelist1 NAME
                        { scinstal( nmstr, xcluflg ); }
 
                |  error
-                       { synerr( "bad start condition list" ); }
+                       { synerr( _("bad start condition list") ); }
                ;
 
 options                :  OPTION_OP optionlist
@@ -304,7 +323,7 @@ flexrule    :  '^' rule
                        }
 
                |  error
-                       { synerr( "unrecognized rule" ); }
+                       { synerr( _("unrecognized rule") ); }
                ;
 
 scon_stk_ptr   :
@@ -340,7 +359,7 @@ namelist2   :  namelist2 ',' sconname
                |  sconname
 
                |  error
-                       { synerr( "bad start condition list" ); }
+                       { synerr( _("bad start condition list") ); }
                ;
 
 sconname       :  NAME
@@ -422,7 +441,7 @@ rule                :  re2 re
                        }
 
                |  re2 re '$'
-                       { synerr( "trailing context used twice" ); }
+                       { synerr( _("trailing context used twice") ); }
 
                |  re '$'
                        {
@@ -435,7 +454,7 @@ rule                :  re2 re
 
                        if ( trlcontxt )
                                {
-                               synerr( "trailing context used twice" );
+                               synerr( _("trailing context used twice") );
                                $$ = mkstate( SYM_EPSILON );
                                }
 
@@ -504,7 +523,7 @@ re2         :  re '/'
                         */
 
                        if ( trlcontxt )
-                               synerr( "trailing context used twice" );
+                               synerr( _("trailing context used twice") );
                        else
                                trlcontxt = true;
 
@@ -533,6 +552,69 @@ series             :  series singleton
 
                |  singleton
                        { $$ = $1; }
+
+               |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+                       {
+                       varlength = true;
+
+                       if ( $3 > $5 || $3 < 0 )
+                               {
+                               synerr( _("bad iteration values") );
+                               $$ = $1;
+                               }
+                       else
+                               {
+                               if ( $3 == 0 )
+                                       {
+                                       if ( $5 <= 0 )
+                                               {
+                                               synerr(
+                                               _("bad iteration values") );
+                                               $$ = $1;
+                                               }
+                                       else
+                                               $$ = mkopt(
+                                                       mkrep( $1, 1, $5 ) );
+                                       }
+                               else
+                                       $$ = mkrep( $1, $3, $5 );
+                               }
+                       }
+
+               |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+                       {
+                       varlength = true;
+
+                       if ( $3 <= 0 )
+                               {
+                               synerr( _("iteration value must be positive") );
+                               $$ = $1;
+                               }
+
+                       else
+                               $$ = mkrep( $1, $3, INFINITY );
+                       }
+
+               |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+                       {
+                       /* The series could be something like "(foo)",
+                        * in which case we have no idea what its length
+                        * is, so we punt here.
+                        */
+                       varlength = true;
+
+                       if ( $3 <= 0 )
+                               {
+                                 synerr( _("iteration value must be positive")
+                                         );
+                               $$ = $1;
+                               }
+
+                       else
+                               $$ = link_machines( $1,
+                                               copysingl( $1, $3 - 1 ) );
+                       }
+
                ;
 
 singleton      :  singleton '*'
@@ -554,13 +636,13 @@ singleton :  singleton '*'
                        $$ = mkopt( $1 );
                        }
 
-               |  singleton '{' NUMBER ',' NUMBER '}'
+               |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
                        {
                        varlength = true;
 
                        if ( $3 > $5 || $3 < 0 )
                                {
-                               synerr( "bad iteration values" );
+                               synerr( _("bad iteration values") );
                                $$ = $1;
                                }
                        else
@@ -570,7 +652,7 @@ singleton   :  singleton '*'
                                        if ( $5 <= 0 )
                                                {
                                                synerr(
-                                               "bad iteration values" );
+                                               _("bad iteration values") );
                                                $$ = $1;
                                                }
                                        else
@@ -582,13 +664,13 @@ singleton :  singleton '*'
                                }
                        }
 
-               |  singleton '{' NUMBER ',' '}'
+               |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
                        {
                        varlength = true;
 
                        if ( $3 <= 0 )
                                {
-                               synerr( "iteration value must be positive" );
+                               synerr( _("iteration value must be positive") );
                                $$ = $1;
                                }
 
@@ -596,7 +678,7 @@ singleton   :  singleton '*'
                                $$ = mkrep( $1, $3, INFINITY );
                        }
 
-               |  singleton '{' NUMBER '}'
+               |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
                        {
                        /* The singleton could be something like "(foo)",
                         * in which case we have no idea what its length
@@ -606,7 +688,7 @@ singleton   :  singleton '*'
 
                        if ( $3 <= 0 )
                                {
-                               synerr( "iteration value must be positive" );
+                               synerr( _("iteration value must be positive") );
                                $$ = $1;
                                }
 
@@ -700,7 +782,7 @@ ccl         :  ccl CHAR '-' CHAR
                                }
 
                        if ( $2 > $4 )
-                               synerr( "negative range in character class" );
+                               synerr( _("negative range in character class") );
 
                        else
                                {
diff --git a/scan.l b/scan.l
index e86132f544f997892b10bcc00f3996e32bfb5462..723704c40825e85c159a9a6943636c0fc0bb72e8 100644 (file)
--- a/scan.l
+++ b/scan.l
@@ -246,6 +246,7 @@ LEXOPT              [aceknopr]
        input           ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
        interactive     interactive = option_sense;
        lex-compat      lex_compat = option_sense;
+       posix-compat    posix_compat = option_sense;
        main            {
                        action_define( "YY_MAIN", option_sense );
             /* Override yywrap */
@@ -362,7 +363,13 @@ LEXOPT             [aceknopr]
        ^{OPTWS}"<"     BEGIN(SC); return '<';
        ^{OPTWS}"^"     return '^';
        \"              BEGIN(QUOTE); return '"';
-       "{"/[[:digit:]] BEGIN(NUM); return '{';
+       "{"/[[:digit:]] {
+                       BEGIN(NUM);
+                       if ( lex_compat || posix_compat )
+                               return BEGIN_REPEAT_POSIX;
+                       else
+                               return BEGIN_REPEAT_FLEX;
+                       }
        "$"/([[:blank:]]|{NL})  return '$';
 
        {WS}"%{"                {
@@ -573,7 +580,13 @@ LEXOPT             [aceknopr]
                        }
 
        ","             return ',';
-       "}"             BEGIN(SECT2); return '}';
+       "}"             {
+                       BEGIN(SECT2);
+                       if ( lex_compat || posix_compat )
+                               return END_REPEAT_POSIX;
+                       else
+                               return END_REPEAT_FLEX;
+                       }
 
        .               {
                        synerr( _( "bad character inside {}'s" ) );