]> granicus.if.org Git - flex/commitdiff
Added negated character class expressions.
authorJohn Millaway <john43@users.sourceforge.net>
Wed, 22 Mar 2006 16:04:19 +0000 (16:04 +0000)
committerJohn Millaway <john43@users.sourceforge.net>
Wed, 22 Mar 2006 16:04:19 +0000 (16:04 +0000)
Documented negated character class expressions.
Added regression test for negated character class expressions.

configure.in
doc/flex.texi
parse.y
scan.l
tests/Makefile.am
tests/descriptions
tests/test-ccl/.cvsignore [new file with mode: 0644]
tests/test-ccl/Makefile.am [new file with mode: 0644]
tests/test-ccl/scanner.l [new file with mode: 0644]
tests/test-ccl/test.input [new file with mode: 0644]

index 27d5223f344336683eb3c1a411ac12383a2d06dd..dd6abed3b2c4a5bb73c44a52a8d3a00c242b6f31 100644 (file)
@@ -146,6 +146,7 @@ tests/test-top/Makefile
 tests/test-rescan-nr/Makefile
 tests/test-rescan-r/Makefile
 tests/test-quotes/Makefile
+tests/test-ccl/Makefile
 dnl --new-test-here-- This line is processed by tests/create-test.
 )
 
index 130cf096468a5844cc38ac5ff05dfdcdb8fad559..c43d3ee8984c88387cbe3ef972fc18b8a1862819 100644 (file)
@@ -886,7 +886,10 @@ For example, the following character classes are all equivalent:
 @end verbatim
 @end example
 
-Some notes on patterns are in order.
+A word of caution. Character classes are expanded immediately when seen in the @code{flex} input. 
+This means the character classes are sensitive to the locale in which @code{flex}
+is executed, and the resulting scanner will not be sensitive to the runtime locale.
+This may or may not be desirable.
 
 
 @itemize
@@ -927,6 +930,23 @@ unfortunately the inconsistency is historically entrenched.  Matching
 newlines means that a pattern like @samp{[^"]*} can match the entire
 input unless there's another quote in the input.
 
+Flex allows negation of character class expressions by prepending @samp{^} to
+the POSIX character class name.
+
+@example
+@verbatim
+    [:^alnum:] [:^alpha:] [:^blank:]
+    [:^cntrl:] [:^digit:] [:^graph:]
+    [:^lower:] [:^print:] [:^punct:]
+    [:^space:] [:^upper:] [:^xdigit:]
+@end verbatim
+@end example
+
+Flex will issue a warning if the expressions @samp{[:^upper:]} and
+@samp{[:^lower:]} appear in a case-insensitive scanner, since their meaning is
+unclear. The current behavior is to skip them entirely, but this may change
+without notice in future revisions of flex.
+
 @cindex trailing context, limits of
 @cindex ^ as non-special character in patterns
 @cindex $ as normal character in patterns
diff --git a/parse.y b/parse.y
index 0f56e0479c3aba80f09e3d997594ebb254d4aec6..e7c79cfc496c331ba7b3fc1223f5cee99a3f58be 100644 (file)
--- a/parse.y
+++ b/parse.y
@@ -7,6 +7,8 @@
 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
 
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
 /*
  *POSIX and AT&T lex place the
  * precedence of the repeat operator, {}, below that of concatenation.
@@ -125,6 +127,15 @@ int previous_continued_action;     /* whether the previous rule's action was '|' */
                        ccladd( currccl, c ); \
        }while(0)
 
+/* negated class */
+#define CCL_NEG_EXPR(func) \
+       do{ \
+       int c; \
+       for ( c = 0; c < csize; ++c ) \
+               if ( !func(c) ) \
+                       ccladd( currccl, c ); \
+       }while(0)
+
 /* While POSIX defines isblank(), it's not ANSI C. */
 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
 
@@ -872,7 +883,8 @@ ccl         :  ccl CHAR '-' CHAR
                        }
                ;
 
-ccl_expr:         CCE_ALNUM    { CCL_EXPR(isalnum); }
+ccl_expr:         
+           CCE_ALNUM   { CCL_EXPR(isalnum); }
                |  CCE_ALPHA    { CCL_EXPR(isalpha); }
                |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
                |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
@@ -882,13 +894,36 @@ ccl_expr:    CCE_ALNUM    { CCL_EXPR(isalnum); }
                |  CCE_PRINT    { CCL_EXPR(isprint); }
                |  CCE_PUNCT    { CCL_EXPR(ispunct); }
                |  CCE_SPACE    { CCL_EXPR(isspace); }
+               |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
                |  CCE_UPPER    {
                                if ( caseins )
                                        CCL_EXPR(islower);
                                else
                                        CCL_EXPR(isupper);
                                }
-               |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
+
+        |  CCE_NEG_ALNUM       { CCL_NEG_EXPR(isalnum); }
+               |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
+               |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
+               |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
+               |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
+               |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
+               |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
+               |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
+               |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
+               |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
+               |  CCE_NEG_LOWER        { 
+                               if ( caseins )
+                                       warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
+                               else
+                                       CCL_NEG_EXPR(islower);
+                               }
+               |  CCE_NEG_UPPER        {
+                               if ( caseins )
+                                       warn(_("[:^upper:] ambiguous in case insensitive scanner"));
+                               else
+                                       CCL_NEG_EXPR(isupper);
+                               }
                ;
                
 string         :  string CHAR
diff --git a/scan.l b/scan.l
index cdbd3e83b4362bb31baac3aa1c0054a1e12d7958..cff53868ca52d8fedd55e21f53d278ba02fefc1a 100644 (file)
--- a/scan.l
+++ b/scan.l
@@ -117,7 +117,7 @@ ESCSEQ              (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
 
 FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
 CCL_CHAR       ([^\\\n\]]|{ESCSEQ})
-CCL_EXPR       ("[:"[[:alpha:]]+":]")
+CCL_EXPR       ("[:"^?[[:alpha:]]+":]")
 
 LEXOPT         [aceknopr]
 
@@ -708,6 +708,19 @@ nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
        "[:space:]"     BEGIN(CCL); return CCE_SPACE;
        "[:upper:]"     BEGIN(CCL); return CCE_UPPER;
        "[:xdigit:]"    BEGIN(CCL); return CCE_XDIGIT;
+
+       "[:^alnum:]"    BEGIN(CCL); return CCE_NEG_ALNUM;
+       "[:^alpha:]"    BEGIN(CCL); return CCE_NEG_ALPHA;
+       "[:^blank:]"    BEGIN(CCL); return CCE_NEG_BLANK;
+       "[:^cntrl:]"    BEGIN(CCL); return CCE_NEG_CNTRL;
+       "[:^digit:]"    BEGIN(CCL); return CCE_NEG_DIGIT;
+       "[:^graph:]"    BEGIN(CCL); return CCE_NEG_GRAPH;
+       "[:^lower:]"    BEGIN(CCL); return CCE_NEG_LOWER;
+       "[:^print:]"    BEGIN(CCL); return CCE_NEG_PRINT;
+       "[:^punct:]"    BEGIN(CCL); return CCE_NEG_PUNCT;
+       "[:^space:]"    BEGIN(CCL); return CCE_NEG_SPACE;
+       "[:^upper:]"    BEGIN(CCL); return CCE_NEG_UPPER;
+       "[:^xdigit:]"   BEGIN(CCL); return CCE_NEG_XDIGIT;
        {CCL_EXPR}      {
                        format_synerr(
                                _( "bad character class expression: %s" ),
index c1f636627a35f9e051ecb07f5d721da90d60fa64..064467493c70f58c9e5e45d13bca5d3555fc1b7f 100644 (file)
@@ -26,6 +26,7 @@ dist_noinst_SCRIPTS = \
        create-test
 
 DIST_SUBDIRS = \
+       test-ccl \
        test-quotes \
        test-rescan-r \
        test-rescan-nr \
@@ -70,6 +71,7 @@ DIST_SUBDIRS = \
        test-table-opts
 
 SUBDIRS = \
+       test-ccl \
        test-quotes \
        test-rescan-r \
        test-rescan-nr \
index a8c3ebf1f17fc00df202fb4ac4261ec88ff93244..c8f3aaf10cd1495cf3545df080011a361c69314c 100644 (file)
@@ -8,6 +8,7 @@ basic-r               - Simple scanner, reentrant.
 bison-nr              - Ordinary bison-bridge.
 bison-yylloc          - Reentrant scanner + pure parser. Requires bison.
 bison-yylval          - Reentrant scanner + pure parser. Requires bison.
+ccl                   - Character classes.
 c-cpp-nr              - Compile a C scanner with C++ compiler, nonreentrant.
 c-cpp-r               - Compile a C scanner with C++ compiler, reentrant.
 c++-basic             - The C++ scanner.
diff --git a/tests/test-ccl/.cvsignore b/tests/test-ccl/.cvsignore
new file mode 100644 (file)
index 0000000..2f65350
--- /dev/null
@@ -0,0 +1,9 @@
+Makefile
+Makefile.in
+parser.c
+parser.h
+scanner.c
+TEMPLATE
+OUTPUT
+.deps
+test-ccl
diff --git a/tests/test-ccl/Makefile.am b/tests/test-ccl/Makefile.am
new file mode 100644 (file)
index 0000000..0a5fdc0
--- /dev/null
@@ -0,0 +1,44 @@
+# This file is part of flex.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+
+# Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE.
+
+FLEX = $(top_builddir)/flex
+
+builddir = @builddir@
+
+EXTRA_DIST = scanner.l test.input
+CLEANFILES = scanner.c scanner.h  test-ccl OUTPUT $(OBJS)
+OBJS = scanner.o
+
+AM_CPPFLAGS = -I$(srcdir) -I$(builddir) -I$(top_srcdir) -I$(top_builddir)
+
+testname = test-ccl
+
+scanner.c: $(srcdir)/scanner.l
+       $(FLEX) $(LFLAGS) $<
+
+$(testname)$(EXEEXT): $(OBJS)
+       $(CC) -o $@ $(LDFLAGS) $(OBJS) $(LOADLIBES)
+
+test: $(testname)$(EXEEXT)
+       ./$(testname)$(EXEEXT) < $(srcdir)/test.input
+
+.c.o:
+       $(CC) -c -o $@ $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $<
diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l
new file mode 100644 (file)
index 0000000..749ac71
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * This file is part of flex.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+%{
+/* A template scanner file to build "scanner.c". */
+#include <stdio.h>
+#include <stdlib.h>
+#include "config.h"
+/*#include "parser.h" */
+
+%}
+
+%option 8bit outfile="scanner.c" prefix="test"
+%option nounput nomain noyywrap
+%option warn
+
+
+%%
+
+"^alpha:"[[:^alpha:]]+@alpha@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^digit:"[[:^digit:]]+@digit@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^alnum:"[[:^alnum:]]+@alnum@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^upper:"[[:^upper:]]+@upper@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^lower:"[[:^lower:]]+@lower@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^space:"[[:^space:]]+@space@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^blank:"[[:^blank:]]+@blank@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^punct:"[[:^punct:]]+@punct@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^cntrl:"[[:^cntrl:]]+@cntrl@\n        printf("OK: %s", yytext); ++yylineno; return 1;
+"^xdigit:"[[:^xdigit:]]+@xdigit@\n      printf("OK: %s", yytext); ++yylineno; return 1;
+
+.|\n                       {
+    printf("ERROR: at line %d\n", yylineno);
+    abort();
+    }
+%%
+
+int main(void);
+
+int
+main ()
+{
+    yyin = stdin;
+    yyout = stdout;
+    while (yylex())
+        ;
+    printf("TEST RETURNING OK.\n");
+    return 0;
+}
diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input
new file mode 100644 (file)
index 0000000..c8c005a
--- /dev/null
@@ -0,0 +1,10 @@
+^alpha:0123456789      ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alpha@
+^digit:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ    ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@digit@
+^alnum:~!@#$%^&*(){}[]':;"<>,./?\+=_-`@alnum@
+^upper:abcdefghijklmnopqrstuvwxyz0123456789    ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@upper@
+^lower:ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF      ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@lower@
+^space:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@space@
+^blank:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@blank@
+^punct:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF    Z@punct@
+^cntrl:abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEF~!@#$%^&*(){}[]':;"<>,./?\+=_-`@cntrl@
+^xdigit:ghijklmnopqrstuvwxyzGHIJKLMNOPQRSTUVWXYZ       ~!@#$%^&*(){}[]':;"<>,./?\+=_-`@xdigit@