From: Will Estes Date: Wed, 19 Jun 2002 13:26:43 +0000 (+0000) Subject: address typos in NEWS; add --posix option for ERE parsing the way posix wants it... X-Git-Tag: flex-2-5-10~57 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5916310e885f01716646b84bd6eecdb9a7d502ae;p=flex address typos in NEWS; add --posix option for ERE parsing the way posix wants it; update the TODO file --- diff --git a/NEWS b/NEWS index 4eb8cf4..bc92806 100644 --- a/NEWS +++ b/NEWS @@ -3,10 +3,19 @@ changes between releases of flex See the file COPYING for copying conditions +* version 2.5.8 + +** a new --posix option generates scanners with posix-style abc{1,3} + compatible parsing, see manual for the screwy details + * version 2.5.7 +** configure.in now includes a call to AC_PREREQ to enforce the + requirement for autoconf at least 2.50 (This only effects you if + you're doing flex development.) + ** configure now uses autoconf's versioning information and configure -** --help reports the bug-reporting address for flex + --help reports the bug-reporting address for flex ** test suite now only reports success versus failure; reporting skipped is problematic under the current setup diff --git a/TODO b/TODO index 878f93f..55d8ebe 100644 --- a/TODO +++ b/TODO @@ -5,24 +5,24 @@ * the manual: ** integrate the items in the faqs/ directory into the manual or code - or similar + or similar (millaway is working on this; much of the FAQS directory + is old or uninformative) *** create a section on flex design, features, etc. -* repackage the distribution +* address lex-replacement: document or provide an option through + configure for creating lex and libl.a files (but remember this has + posix implications) -** address lex-replacement: document or provide an option through - configure for creating lex and libl.a files - -*** decide what to do about abc{1,3} being broken in the posix standard +* getext ** update gettext support from gettext-0.11 to gettext-0.11.2 ** make sure all flex modules use gettext translation facilities -** subdirectories +*subdirectories -*** in examples/manual, integrate the Makefile.examples into the +** in examples/manual, integrate the Makefile.examples into the Makefile.am * test suite @@ -33,7 +33,7 @@ tests/. millaway has assigned the rights to the test suite to me and so the test suite will be under the flex license. -* generic coding issues +* generic coding ** move as much skeleton code as possible out of gen.c and into flex.skl diff --git a/flex.texi b/flex.texi index 5fa04a3..21ff20c 100644 --- a/flex.texi +++ b/flex.texi @@ -610,7 +610,9 @@ operators, @samp{-}, @samp{]]}, and, at the beginning of the class, @samp{^}. @cindex patterns, precedence of operators The regular expressions listed above are grouped according to precedence, from highest precedence at the top to lowest at the bottom. -Those grouped together have equal precedence. For example, +Those grouped together have equal precedence (see special note on the +precedence of the repeat operator, @samp{@{@}}, under the documentation +for the @samp{--posix} POSIX compliance option). For example, @exindex patterns, grouping and precedence @example @@ -2376,6 +2378,27 @@ option is mostly for use in maintaining @code{flex}. @item -V, --version prints the version number to @file{stdout} and exits. +@item -X, --posix +turns on maximum compatibility with the POSIX 1003.2-1992 definition of +@code{lex}. Since @code{flex} was originally designed to implement the +POSIX definition of @code{lex} this generally involves very few changes +in behavior. At the current writing the known differences between +@code{flex} and the POSIX standard are: + +@itemize +@item +In POSIX and AT&T @code{lex}, the repeat operator, @samp{@{@}}, has lower +precedence than concatenation (thus @samp{ab@{3@}} yields @samp{ababab}). +Most POSIX utilities use an Extended Regular Expression (ERE) precedence +that has the precedence of the repeat operator higher than concatenation +(which causes @samp{ab@{3@}} to yield @samp{abbb}). By default, @code{flex} +places the precedence of the repeat operator higher than concatenation +which matches the ERE processing of other POSIX utilities. When either +@samp{--posix} or @samp{-l} are specified, @code{flex} will use the +traditional AT&T and POSIX-compliant precedence for the repeat operator +where concatenation has higher precedence than the repeat operator. +@end itemize + @item -7, --7bit instructs @code{flex} to generate a 7-bit scanner, i.e., one which can only recognize 7-bit characters in its input. The advantage of using @@ -4045,10 +4068,12 @@ This is not the case with @code{lex} or the POSIX specification. The @item The precedence of the @samp{@{,@}} (numeric range) operator is -different. @code{lex} interprets @samp{abc@{1,3@}} as match one, two, +different. The AT&T and POSIX specifications of @code{lex} +interpret @samp{abc@{1,3@}} as match one, two, or three occurrences of @samp{abc}'', whereas @code{flex} interprets it as ``match @samp{ab} followed by one, two, or three occurrences of -@samp{c}''. The former is in agreement with the POSIX specification. +@samp{c}''. The @samp{-l} and @samp{--posix} options do away with this +incompatibility. @item The precedence of the @samp{^} operator is different. @code{lex} diff --git a/flexdef.h b/flexdef.h index 83843a1..93f7e10 100644 --- a/flexdef.h +++ b/flexdef.h @@ -333,6 +333,7 @@ * interactive - if true (-I), generate an interactive scanner * caseins - if true (-i), generate a case-insensitive scanner * lex_compat - if true (-l), maximize compatibility with AT&T lex + * posix_compat - if true (-X), maximize compatibility with POSIX lex * do_yylineno - if true, generate code to maintain yylineno * useecs - if true (-Ce flag), use equivalence classes * fulltbl - if true (-Cf flag), don't compress the DFA state table @@ -370,7 +371,7 @@ */ extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; -extern int interactive, caseins, lex_compat, do_yylineno; +extern int interactive, caseins, lex_compat, posix_compat, do_yylineno; extern int useecs, fulltbl, usemecs, fullspd; extern int gen_line_dirs, performance_report, backing_up_report; extern int reentrant, reentrant_bison_pure; diff --git a/main.c b/main.c index b82aee4..275b1e0 100644 --- a/main.c +++ b/main.c @@ -54,7 +54,7 @@ static char * basename2 PROTO((char* path, int should_strip_ext)); /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; -int interactive, caseins, lex_compat, do_yylineno, useecs, fulltbl, usemecs; +int interactive, caseins, lex_compat, posix_compat, do_yylineno, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap, csize; int reentrant, reentrant_bison_pure; @@ -746,6 +746,8 @@ int exit_status; putc( 'i', stderr ); if ( lex_compat ) putc( 'l', stderr ); + if ( posix_compat ) + putc( 'X', stderr ); if ( performance_report > 0 ) putc( 'p', stderr ); if ( performance_report > 1 ) @@ -923,7 +925,7 @@ char **argv; scanopt_t sopt; printstats = syntaxerror = trace = spprdflt = caseins = false; - lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false; + lex_compat = posix_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false; fullspd = long_align = nowarn = yymore_used = continued_action = false; do_yylineno = yytext_is_array = in_rule = reject = do_stdinit = false; yymore_really_used = reject_really_used = unspecified; @@ -1068,6 +1070,10 @@ char **argv; lex_compat = true; break; + case OPT_POSIX_COMPAT: + posix_compat = true; + break; + case OPT_MAIN: buf_strdefine(&userdef_buf, "YY_MAIN", "1"); do_yywrap = false; @@ -1743,6 +1749,7 @@ _( " -B, --batch generate batch scanner (opposite of -I)\n" " -i, --case-insensitive ignore case in patterns\n" " -l, --lex-compat maximal compatibility with original lex\n" +" -X, --posix-compat maximal compatibility with POSIX lex\n" " -I, --interactive generate interactive scanner (opposite of -B)\n" " --yylineno track line count in yylineno\n" diff --git a/options.c b/options.c index 2d4ba3a..2091dba 100644 --- a/options.c +++ b/options.c @@ -81,6 +81,8 @@ optspec_t flexopts[] = { {"--interactive", OPT_INTERACTIVE,0},/* Generate interactive scanner (opposite of -B). */ {"-l", OPT_LEX_COMPAT,0}, {"--lex-compat", OPT_LEX_COMPAT,0},/* Maximal compatibility with original lex. */ +{"-X", OPT_POSIX_COMPAT,0}, +{"--posix-compat", OPT_POSIX_COMPAT,0},/* Maximal compatibility with POSIX lex. */ {"-L", OPT_NO_LINE,0},/* Suppress #line directives in scanner. */ {"--noline", OPT_NO_LINE,0},/* Suppress #line directives in scanner. */ {"--main", OPT_MAIN,0}, /* use built-in main() function. */ diff --git a/options.h b/options.h index 3488c68..afe0e4a 100644 --- a/options.h +++ b/options.h @@ -60,6 +60,7 @@ enum flexopt_flag_t { OPT_HELP, OPT_INTERACTIVE, OPT_LEX_COMPAT, + OPT_POSIX_COMPAT, OPT_MAIN, OPT_META_ECS, OPT_NEVER_INTERACTIVE, diff --git a/parse.y b/parse.y index 0645b7a..849536b 100644 --- a/parse.y +++ b/parse.y @@ -6,6 +6,25 @@ %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT +/* + *POSIX and AT&T lex place the + * precedence of the repeat operator, {}, below that of concatenation. + * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended + * Regular Expression (ERE) precedence that has the repeat operator + * higher than concatenation. This causes ab{3} to yield abbb. + * + * In order to support the POSIX and AT&T precedence and the flex + * precedence we define two token sets for the begin and end tokens of + * the repeat operator, '{' and '}'. The lexical scanner chooses + * which tokens to return based on whether posix_compat or lex_compat + * are specified. Specifying either posix_compat or lex_compat will + * cause flex to parse scanner files as per the AT&T and + * POSIX-mandated behavior. + */ + +%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX + + %{ /* Copyright (c) 1990 The Regents of the University of California. */ /* All rights reserved. */ @@ -152,7 +171,7 @@ sect1 : sect1 startconddecl namelist1 | sect1 options | | error - { synerr( "unknown error processing section 1" ); } + { synerr( _("unknown error processing section 1") ); } ; sect1end : SECTEND @@ -177,7 +196,7 @@ namelist1 : namelist1 NAME { scinstal( nmstr, xcluflg ); } | error - { synerr( "bad start condition list" ); } + { synerr( _("bad start condition list") ); } ; options : OPTION_OP optionlist @@ -304,7 +323,7 @@ flexrule : '^' rule } | error - { synerr( "unrecognized rule" ); } + { synerr( _("unrecognized rule") ); } ; scon_stk_ptr : @@ -340,7 +359,7 @@ namelist2 : namelist2 ',' sconname | sconname | error - { synerr( "bad start condition list" ); } + { synerr( _("bad start condition list") ); } ; sconname : NAME @@ -422,7 +441,7 @@ rule : re2 re } | re2 re '$' - { synerr( "trailing context used twice" ); } + { synerr( _("trailing context used twice") ); } | re '$' { @@ -435,7 +454,7 @@ rule : re2 re if ( trlcontxt ) { - synerr( "trailing context used twice" ); + synerr( _("trailing context used twice") ); $$ = mkstate( SYM_EPSILON ); } @@ -504,7 +523,7 @@ re2 : re '/' */ if ( trlcontxt ) - synerr( "trailing context used twice" ); + synerr( _("trailing context used twice") ); else trlcontxt = true; @@ -533,6 +552,69 @@ series : series singleton | singleton { $$ = $1; } + + | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX + { + varlength = true; + + if ( $3 > $5 || $3 < 0 ) + { + synerr( _("bad iteration values") ); + $$ = $1; + } + else + { + if ( $3 == 0 ) + { + if ( $5 <= 0 ) + { + synerr( + _("bad iteration values") ); + $$ = $1; + } + else + $$ = mkopt( + mkrep( $1, 1, $5 ) ); + } + else + $$ = mkrep( $1, $3, $5 ); + } + } + + | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX + { + varlength = true; + + if ( $3 <= 0 ) + { + synerr( _("iteration value must be positive") ); + $$ = $1; + } + + else + $$ = mkrep( $1, $3, INFINITY ); + } + + | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX + { + /* The series could be something like "(foo)", + * in which case we have no idea what its length + * is, so we punt here. + */ + varlength = true; + + if ( $3 <= 0 ) + { + synerr( _("iteration value must be positive") + ); + $$ = $1; + } + + else + $$ = link_machines( $1, + copysingl( $1, $3 - 1 ) ); + } + ; singleton : singleton '*' @@ -554,13 +636,13 @@ singleton : singleton '*' $$ = mkopt( $1 ); } - | singleton '{' NUMBER ',' NUMBER '}' + | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX { varlength = true; if ( $3 > $5 || $3 < 0 ) { - synerr( "bad iteration values" ); + synerr( _("bad iteration values") ); $$ = $1; } else @@ -570,7 +652,7 @@ singleton : singleton '*' if ( $5 <= 0 ) { synerr( - "bad iteration values" ); + _("bad iteration values") ); $$ = $1; } else @@ -582,13 +664,13 @@ singleton : singleton '*' } } - | singleton '{' NUMBER ',' '}' + | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX { varlength = true; if ( $3 <= 0 ) { - synerr( "iteration value must be positive" ); + synerr( _("iteration value must be positive") ); $$ = $1; } @@ -596,7 +678,7 @@ singleton : singleton '*' $$ = mkrep( $1, $3, INFINITY ); } - | singleton '{' NUMBER '}' + | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX { /* The singleton could be something like "(foo)", * in which case we have no idea what its length @@ -606,7 +688,7 @@ singleton : singleton '*' if ( $3 <= 0 ) { - synerr( "iteration value must be positive" ); + synerr( _("iteration value must be positive") ); $$ = $1; } @@ -700,7 +782,7 @@ ccl : ccl CHAR '-' CHAR } if ( $2 > $4 ) - synerr( "negative range in character class" ); + synerr( _("negative range in character class") ); else { diff --git a/scan.l b/scan.l index e86132f..723704c 100644 --- a/scan.l +++ b/scan.l @@ -246,6 +246,7 @@ LEXOPT [aceknopr] input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); interactive interactive = option_sense; lex-compat lex_compat = option_sense; + posix-compat posix_compat = option_sense; main { action_define( "YY_MAIN", option_sense ); /* Override yywrap */ @@ -362,7 +363,13 @@ LEXOPT [aceknopr] ^{OPTWS}"<" BEGIN(SC); return '<'; ^{OPTWS}"^" return '^'; \" BEGIN(QUOTE); return '"'; - "{"/[[:digit:]] BEGIN(NUM); return '{'; + "{"/[[:digit:]] { + BEGIN(NUM); + if ( lex_compat || posix_compat ) + return BEGIN_REPEAT_POSIX; + else + return BEGIN_REPEAT_FLEX; + } "$"/([[:blank:]]|{NL}) return '$'; {WS}"%{" { @@ -573,7 +580,13 @@ LEXOPT [aceknopr] } "," return ','; - "}" BEGIN(SECT2); return '}'; + "}" { + BEGIN(SECT2); + if ( lex_compat || posix_compat ) + return END_REPEAT_POSIX; + else + return END_REPEAT_FLEX; + } . { synerr( _( "bad character inside {}'s" ) );