From: John Millaway Date: Mon, 16 Dec 2002 23:33:08 +0000 (+0000) Subject: Fixed bug submitted by Bruce Lilly where character X-Git-Tag: flex-2-5-26~17 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=604e6483d56d1da0a19491c2a5a35f7bf048afc9;p=flex Fixed bug submitted by Bruce Lilly where character ranges would yield unexpected behavior in a caseless scanner. Also, flex now emits a warning if the range looks like trouble. --- diff --git a/parse.y b/parse.y index 5e6167c..4cde223 100644 --- a/parse.y +++ b/parse.y @@ -109,6 +109,13 @@ int scon_stk_ptr; static int madeany = false; /* whether we've made the '.' character class */ int previous_continued_action; /* whether the previous rule's action was '|' */ +#define format_warn3(fmt, a1, a2) \ + do{ \ + char fw3_msg[MAXLINE];\ + snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ + warn( fw3_msg );\ + }while(0) + /* Expand a POSIX character class expression. */ #define CCL_EXPR(func) \ do{ \ @@ -787,13 +794,39 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { - if ( caseins ) - { - if ( $2 >= 'A' && $2 <= 'Z' ) - $2 = clower( $2 ); - if ( $4 >= 'A' && $4 <= 'Z' ) - $4 = clower( $4 ); - } + + if (caseins) + { + /* Squish the character range to lowercase only if BOTH + * ends of the range are uppercase. + */ + if (isupper ($2) && isupper ($4)) + { + $2 = tolower ($2); + $4 = tolower ($4); + } + + /* If one end of the range has case and the other + * does not, or the cases are different, then we're not + * sure what range the user is trying to express. + * Examples: [@-z] or [S-t] + */ + else if (has_case ($2) != has_case ($4) + || (has_case ($2) && (b_islower ($2) != b_islower ($4)))) + format_warn3 ( + _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), + $2, $4); + + /* If the range spans uppercase characters but not + * lowercase (or vice-versa), then should we automatically + * include lowercase characters in the range? + * Example: [@-_] spans [a-z] but not [A-Z] + */ + else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) + format_warn3 ( + _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), + $2, $4); + } if ( $2 > $4 ) synerr( _("negative range in character class") );