Documented negated character class expressions.
Added regression test for negated character class expressions.
tests/test-rescan-nr/Makefile
tests/test-rescan-r/Makefile
tests/test-quotes/Makefile
+tests/test-ccl/Makefile
dnl --new-test-here-- This line is processed by tests/create-test.
)
@end verbatim
@end example
-Some notes on patterns are in order.
+A word of caution. Character classes are expanded immediately when seen in the @code{flex} input.
+This means the character classes are sensitive to the locale in which @code{flex}
+is executed, and the resulting scanner will not be sensitive to the runtime locale.
+This may or may not be desirable.
@itemize
newlines means that a pattern like @samp{[^"]*} can match the entire
input unless there's another quote in the input.
+Flex allows negation of character class expressions by prepending @samp{^} to
+the POSIX character class name.
+
+@example
+@verbatim
+ [:^alnum:] [:^alpha:] [:^blank:]
+ [:^cntrl:] [:^digit:] [:^graph:]
+ [:^lower:] [:^print:] [:^punct:]
+ [:^space:] [:^upper:] [:^xdigit:]
+@end verbatim
+@end example
+
+Flex will issue a warning if the expressions @samp{[:^upper:]} and
+@samp{[:^lower:]} appear in a case-insensitive scanner, since their meaning is
+unclear. The current behavior is to skip them entirely, but this may change
+without notice in future revisions of flex.
+
@cindex trailing context, limits of
@cindex ^ as non-special character in patterns
@cindex $ as normal character in patterns
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
/*
*POSIX and AT&T lex place the
* precedence of the repeat operator, {}, below that of concatenation.
ccladd( currccl, c ); \
}while(0)
+/* negated class */
+#define CCL_NEG_EXPR(func) \
+ do{ \
+ int c; \
+ for ( c = 0; c < csize; ++c ) \
+ if ( !func(c) ) \
+ ccladd( currccl, c ); \
+ }while(0)
+
/* While POSIX defines isblank(), it's not ANSI C. */
#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
}
;
-ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum); }
+ccl_expr:
+ CCE_ALNUM { CCL_EXPR(isalnum); }
| CCE_ALPHA { CCL_EXPR(isalpha); }
| CCE_BLANK { CCL_EXPR(IS_BLANK); }
| CCE_CNTRL { CCL_EXPR(iscntrl); }
| CCE_PRINT { CCL_EXPR(isprint); }
| CCE_PUNCT { CCL_EXPR(ispunct); }
| CCE_SPACE { CCL_EXPR(isspace); }
+ | CCE_XDIGIT { CCL_EXPR(isxdigit); }
| CCE_UPPER {
if ( caseins )
CCL_EXPR(islower);
else
CCL_EXPR(isupper);
}
- | CCE_XDIGIT { CCL_EXPR(isxdigit); }
+
+ | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
+ | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
+ | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
+ | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
+ | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
+ | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
+ | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
+ | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
+ | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
+ | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
+ | CCE_NEG_LOWER {
+ if ( caseins )
+ warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
+ else
+ CCL_NEG_EXPR(islower);
+ }
+ | CCE_NEG_UPPER {
+ if ( caseins )
+ warn(_("[:^upper:] ambiguous in case insensitive scanner"));
+ else
+ CCL_NEG_EXPR(isupper);
+ }
;
string : string CHAR
FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
CCL_CHAR ([^\\\n\]]|{ESCSEQ})
-CCL_EXPR ("[:"[[:alpha:]]+":]")
+CCL_EXPR ("[:"^?[[:alpha:]]+":]")
LEXOPT [aceknopr]
"[:space:]" BEGIN(CCL); return CCE_SPACE;
"[:upper:]" BEGIN(CCL); return CCE_UPPER;
"[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
+
+ "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM;
+ "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA;
+ "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK;
+ "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL;
+ "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT;
+ "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH;
+ "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER;
+ "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT;
+ "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT;
+ "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE;
+ "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER;
+ "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT;
{CCL_EXPR} {
format_synerr(
_( "bad character class expression: %s" ),
create-test
DIST_SUBDIRS = \
+ test-ccl \
test-quotes \
test-rescan-r \
test-rescan-nr \
test-table-opts
SUBDIRS = \
+ test-ccl \
test-quotes \
test-rescan-r \
test-rescan-nr \
bison-nr - Ordinary bison-bridge.
bison-yylloc - Reentrant scanner + pure parser. Requires bison.
bison-yylval - Reentrant scanner + pure parser. Requires bison.
+ccl - Character classes.
c-cpp-nr - Compile a C scanner with C++ compiler, nonreentrant.
c-cpp-r - Compile a C scanner with C++ compiler, reentrant.
c++-basic - The C++ scanner.
--- /dev/null
+Makefile
+Makefile.in
+parser.c
+parser.h
+scanner.c
+TEMPLATE
+OUTPUT
+.deps
+test-ccl
--- /dev/null
+# This file is part of flex.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+
+# Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE.
+
+FLEX = $(top_builddir)/flex
+
+builddir = @builddir@
+
+EXTRA_DIST = scanner.l test.input
+CLEANFILES = scanner.c scanner.h test-ccl OUTPUT $(OBJS)
+OBJS = scanner.o
+
+AM_CPPFLAGS = -I$(srcdir) -I$(builddir) -I$(top_srcdir) -I$(top_builddir)
+
+testname = test-ccl
+
+scanner.c: $(srcdir)/scanner.l
+ $(FLEX) $(LFLAGS) $<
+
+$(testname)$(EXEEXT): $(OBJS)
+ $(CC) -o $@ $(LDFLAGS) $(OBJS) $(LOADLIBES)
+
+test: $(testname)$(EXEEXT)
+ ./$(testname)$(EXEEXT) < $(srcdir)/test.input
+
+.c.o:
+ $(CC) -c -o $@ $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $<
--- /dev/null
+/*
+ * This file is part of flex.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+%{
+/* A template scanner file to build "scanner.c". */
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+/*#include "parser.h" */
+
+%}
+
+%option 8bit outfile="scanner.c" prefix="test"
+%option nounput nomain noyywrap
+%option warn
+
+
+%%
+
+"^alpha:"[[:^alpha:]]+@alpha@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^digit:"[[:^digit:]]+@digit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^alnum:"[[:^alnum:]]+@alnum@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^upper:"[[:^upper:]]+@upper@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^lower:"[[:^lower:]]+@lower@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^space:"[[:^space:]]+@space@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^blank:"[[:^blank:]]+@blank@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^punct:"[[:^punct:]]+@punct@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^cntrl:"[[:^cntrl:]]+@cntrl@\n printf("OK: %s", yytext); ++yylineno; return 1;
+"^xdigit:"[[:^xdigit:]]+@xdigit@\n printf("OK: %s", yytext); ++yylineno; return 1;
+
+.|\n {
+ printf("ERROR: at line %d\n", yylineno);
+ abort();
+ }
+%%
+
+int main(void);
+
+int
+main ()
+{
+ yyin = stdin;
+ yyout = stdout;
+ while (yylex())
+ ;
+ printf("TEST RETURNING OK.\n");
+ return 0;
+}
--- /dev/null
+^alpha:0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alpha@
+^digit:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@digit@
+^alnum:~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alnum@
+^upper:abcdefghijklmnopqrstuvwxyz0123456789 ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@upper@
+^lower:ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@lower@
+^space:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@space@
+^blank:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@blank@
+^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF Z@punct@
+^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@
+^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@