]> granicus.if.org Git - flex/commitdiff
8-bit char support.
authorVern Paxson <vern@ee.lbl.gov>
Mon, 15 Jan 1990 18:02:29 +0000 (18:02 +0000)
committerVern Paxson <vern@ee.lbl.gov>
Mon, 15 Jan 1990 18:02:29 +0000 (18:02 +0000)
Arbitrary indented/%{} code allowed in section 2.
\x escapes.
%t support.
Minor POSIX-compliance changes.
BEGIN(0) -> BEGIN(INITIAL).
yywrap() and set_input_file() for multiple input files.
C_COMMENT_2 removed.
2.2 Release.

scan.l

diff --git a/scan.l b/scan.l
index 8c4017413edc7373d8904b71cece67cf60f95aa5..dfc1d4734a9b521f73af3e582f288744d9376a7c 100644 (file)
--- a/scan.l
+++ b/scan.l
@@ -49,12 +49,12 @@ static char rcsid[] =
        return ( CHAR );
 
 #define RETURNNAME \
-       (void) strcpy( nmstr, yytext ); \
+       (void) strcpy( nmstr, (char *) yytext ); \
        return ( NAME );
 
 #define PUT_BACK_STRING(str, start) \
-       for ( i = strlen( str ) - 1; i >= start; --i ) \
-           unput(str[i])
+       for ( i = strlen( (char *) str ) - 1; i >= start; --i ) \
+           unput((str)[i])
 
 #define CHECK_REJECT(str) \
        if ( all_upper( str ) ) \
@@ -63,11 +63,13 @@ static char rcsid[] =
 #define CHECK_YYMORE(str) \
        if ( all_lower( str ) ) \
            yymore_used = true;
+
+#undef yywrap
 %}
 
 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
-%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT C_COMMENT_2 ACTION_COMMENT
-%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST
+%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
+%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION
 
 WS             [ \t\f]+
 OPTWS          [ \t\f]*
@@ -78,18 +80,19 @@ NOT_NAME    [^a-z_\n]+
 
 SCNAME         {NAME}
 
-ESCSEQ         \\([^\n]|[0-9]{1,3})
+ESCSEQ         \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})
 
 %%
     static int bracelevel, didadef;
-    int i, indented_code, checking_used;
-    char nmdef[MAXLINE], myesc();
+    int i, indented_code, checking_used, new_xlation;
+    int doing_codeblock = false;
+    Char nmdef[MAXLINE], myesc();
 
 ^{WS}                  indented_code = true; BEGIN(CODEBLOCK);
 ^#.*\n                 ++linenum; ECHO; /* treat as a comment */
 ^"/*"                  ECHO; BEGIN(C_COMMENT);
-^"%s"(tart)?           return ( SCDECL );
-^"%x"                  return ( XSCDECL );
+^"%s"{NAME}?           return ( SCDECL );
+^"%x"{NAME}?           return ( XSCDECL );
 ^"%{".*\n              {
                        ++linenum;
                        line_directive_out( stdout );
@@ -106,37 +109,68 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
                        return ( SECTEND );
                        }
 
-^"%used"               checking_used = REALLY_USED; BEGIN(USED_LIST);
-^"%unused"             checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
+^"%used"               {
+           pinpoint_message( "Warning, %%used/%%unused have been deprecated" );
+                       checking_used = REALLY_USED; BEGIN(USED_LIST);
+                       }
+^"%unused"             {
+                       checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
+           pinpoint_message( "Warning, %%used/%%unused have been deprecated" );
+                       checking_used = REALLY_NOT_USED; BEGIN(USED_LIST);
+                       }
 
 
-^"%"[^sx]" ".*\n               {
+^"%"[aeknopt]" ".*\n   {
+#ifdef NOTDEF
                        fprintf( stderr,
                             "old-style lex command at line %d ignored:\n\t%s",
                                 linenum, yytext );
+#endif
                        ++linenum;
                        }
 
+^"%"[cr]{OPTWS}                /* ignore old lex directive */
+
+%t{OPTWS}\n            {
+                       char *malloc();
+
+                       ++linenum;
+                       xlation = (int *) malloc( sizeof( int ) * (csize + 1) );
+
+                       for ( i = 1; i <= csize; ++i )
+                           xlation[i] = 0;
+
+                       if ( ! xlation )
+                           flexfatal(
+                               "dynamic memory failure building %t table" );
+
+                       num_xlations = 0;
+
+                       BEGIN(XLATION);
+                       }
+
+^"%"[^sxanpekotcru{}]{OPTWS}   synerr( "unrecognized '%' directive" );
+
 ^{NAME}                        {
-                       (void) strcpy( nmstr, yytext );
+                       (void) strcpy( nmstr, (char *) yytext );
                        didadef = false;
                        BEGIN(PICKUPDEF);
                        }
 
 {SCNAME}               RETURNNAME;
 ^{OPTWS}\n             ++linenum; /* allows blank lines in section 1 */
-\n                     ++linenum; return ( '\n' );
+{OPTWS}\n              ++linenum; return ( '\n' );
 .                      synerr( "illegal character" ); BEGIN(RECOVER);
 
 
-<C_COMMENT>"*/"                ECHO; BEGIN(0);
-<C_COMMENT>"*/".*\n    ++linenum; ECHO; BEGIN(0);
+<C_COMMENT>"*/"                ECHO; BEGIN(INITIAL);
+<C_COMMENT>"*/".*\n    ++linenum; ECHO; BEGIN(INITIAL);
 <C_COMMENT>[^*\n]+     ECHO;
 <C_COMMENT>"*"         ECHO;
 <C_COMMENT>\n          ++linenum; ECHO;
 
 
-<CODEBLOCK>^"%}".*\n   ++linenum; BEGIN(0);
+<CODEBLOCK>^"%}".*\n   ++linenum; BEGIN(INITIAL);
 <CODEBLOCK>"reject"    ECHO; CHECK_REJECT(yytext);
 <CODEBLOCK>"yymore"    ECHO; CHECK_YYMORE(yytext);
 <CODEBLOCK>{NAME}|{NOT_NAME}|. ECHO;
@@ -144,16 +178,16 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
                        ++linenum;
                        ECHO;
                        if ( indented_code )
-                           BEGIN(0);
+                           BEGIN(INITIAL);
                        }
 
 
 <PICKUPDEF>{WS}                /* separates name and definition */
 
 <PICKUPDEF>{NOT_WS}.*  {
-                       (void) strcpy( nmdef, yytext );
+                       (void) strcpy( (char *) nmdef, (char *) yytext );
 
-                       for ( i = strlen( nmdef ) - 1;
+                       for ( i = strlen( (char *) nmdef ) - 1;
                              i >= 0 &&
                              nmdef[i] == ' ' || nmdef[i] == '\t';
                              --i )
@@ -168,14 +202,14 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
 <PICKUPDEF>\n          {
                        if ( ! didadef )
                            synerr( "incomplete name definition" );
-                       BEGIN(0);
+                       BEGIN(INITIAL);
                        ++linenum;
                        }
 
-<RECOVER>.*\n          ++linenum; BEGIN(0); RETURNNAME;
+<RECOVER>.*\n          ++linenum; BEGIN(INITIAL); RETURNNAME;
 
 
-<USED_LIST>\n          ++linenum; BEGIN(0);
+<USED_LIST>\n          ++linenum; BEGIN(INITIAL);
 <USED_LIST>{WS}
 <USED_LIST>"reject"    {
                        if ( all_upper( yytext ) )
@@ -192,6 +226,25 @@ ESCSEQ             \\([^\n]|[0-9]{1,3})
 <USED_LIST>{NOT_WS}+   synerr( "unrecognized %used/%unused construct" );
 
 
+<XLATION>"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL);
+<XLATION>^{OPTWS}[0-9]+        ++num_xlations; new_xlation = true;
+<XLATION>^.            synerr( "bad row in translation table" );
+<XLATION>{WS}          /* ignore whitespace */
+
+<XLATION>{ESCSEQ}      {
+                       xlation[myesc( yytext )] =
+                               (new_xlation ? num_xlations : -num_xlations);
+                       new_xlation = false;
+                       }
+<XLATION>.             {
+                       xlation[yytext[0]] =
+                               (new_xlation ? num_xlations : -num_xlations);
+                       new_xlation = false;
+                       }
+
+<XLATION>\n            ++linenum;
+
+
 <SECT2PROLOG>.*\n/{NOT_WS}     {
                        ++linenum;
                        ACTION_ECHO;
@@ -205,14 +258,15 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
 
 <SECT2>^{OPTWS}\n      ++linenum; /* allow blank lines in section 2 */
 
-       /* this horrible mess of a rule matches indented lines which
-        * do not contain "/*".  We need to make the distinction because
-        * otherwise this rule will be taken instead of the rule which
-        * matches the beginning of comments like this one
-        */
-<SECT2>^{WS}([^/\n]|"/"[^*\n])*("/"?)\n        {
-                       synerr( "indented code found outside of action" );
-                       ++linenum;
+<SECT2>^({WS}|"%{")    {
+                       indented_code = (yytext[0] != '%');
+                       doing_codeblock = true;
+                       bracelevel = 1;
+
+                       if ( indented_code )
+                           ACTION_ECHO;
+
+                       BEGIN(CODEBLOCK_2);
                        }
 
 <SECT2>"<"             BEGIN(SC); return ( '<' );
@@ -229,8 +283,6 @@ ESCSEQ              \\([^\n]|[0-9]{1,3})
                        }
 <SECT2>{WS}"|".*\n     continued_action = true; ++linenum; return ( '\n' );
 
-<SECT2>^{OPTWS}"/*"    ACTION_ECHO; BEGIN(C_COMMENT_2);
-
 <SECT2>{WS}            {
                        /* this rule is separate from the one below because
                         * otherwise we get variable trailing context, so
@@ -262,10 +314,10 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
 <SECT2>"["([^\\\]\n]|{ESCSEQ})+"]"     {
                        int cclval;
 
-                       (void) strcpy( nmstr, yytext );
+                       (void) strcpy( nmstr, (char *) yytext );
 
                        /* check to see if we've already encountered this ccl */
-                       if ( (cclval = ccllookup( nmstr )) )
+                       if ( (cclval = ccllookup( (Char *) nmstr )) )
                            {
                            yylval = cclval;
                            ++cclreuse;
@@ -276,12 +328,12 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
                            /* we fudge a bit.  We know that this ccl will
                             * soon be numbered as lastccl + 1 by cclinit
                             */
-                           cclinstal( nmstr, lastccl + 1 );
+                           cclinstal( (Char *) nmstr, lastccl + 1 );
 
                            /* push back everything but the leading bracket
                             * so the ccl can be rescanned
                             */
-                           PUT_BACK_STRING(nmstr, 1);
+                           PUT_BACK_STRING((char *) nmstr, 1);
 
                            BEGIN(FIRSTCCL);
                            return ( '[' );
@@ -289,10 +341,10 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
                        }
 
 <SECT2>"{"{NAME}"}"    {
-                       register char *nmdefptr;
-                       char *ndlookup();
+                       register Char *nmdefptr;
+                       Char *ndlookup();
 
-                       (void) strcpy( nmstr, yytext );
+                       (void) strcpy( nmstr, (char *) yytext );
                        nmstr[yyleng - 1] = '\0';  /* chop trailing brace */
 
                        /* lookup from "nmstr + 1" to chop leading brace */
@@ -368,21 +420,32 @@ ESCSEQ            \\([^\n]|[0-9]{1,3})
 <BRACEERROR>\n         synerr( "missing }" ); ++linenum; BEGIN(SECT2);
 
 
-<PERCENT_BRACE_ACTION>{OPTWS}"%}".*    bracelevel = 0;
-<PERCENT_BRACE_ACTION,ACTION>"reject"  ACTION_ECHO; CHECK_REJECT(yytext);
-<PERCENT_BRACE_ACTION,ACTION>"yymore"  ACTION_ECHO; CHECK_YYMORE(yytext);
-<PERCENT_BRACE_ACTION>{NAME}|{NOT_NAME}|.      ACTION_ECHO;
-<PERCENT_BRACE_ACTION>\n               {
+<PERCENT_BRACE_ACTION,CODEBLOCK_2>{OPTWS}"%}".*                bracelevel = 0;
+<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"reject"      {
+                       ACTION_ECHO;
+                       CHECK_REJECT(yytext);
+                       }
+<PERCENT_BRACE_ACTION,CODEBLOCK_2,ACTION>"yymore"      {
+                       ACTION_ECHO;
+                       CHECK_YYMORE(yytext);
+                       }
+<PERCENT_BRACE_ACTION,CODEBLOCK_2>{NAME}|{NOT_NAME}|.  ACTION_ECHO;
+<PERCENT_BRACE_ACTION,CODEBLOCK_2>\n                   {
                        ++linenum;
                        ACTION_ECHO;
-                       if ( bracelevel == 0 )
+                       if ( bracelevel == 0 ||
+                            (doing_codeblock && indented_code) )
                            {
-                           fputs( "\tYY_BREAK\n", temp_action_file );
+                           if ( ! doing_codeblock )
+                               fputs( "\tYY_BREAK\n", temp_action_file );
+                           
+                           doing_codeblock = false;
                            BEGIN(SECT2);
                            }
                        }
 
-       /* REJECT and yymore() are checked for above, in PERCENT_BRACE_ACTION */
+
+       /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
 <ACTION>"{"            ACTION_ECHO; ++bracelevel;
 <ACTION>"}"            ACTION_ECHO; --bracelevel;
 <ACTION>[^a-z_{}"'/\n]+        ACTION_ECHO;
@@ -407,12 +470,6 @@ ESCSEQ             \\([^\n]|[0-9]{1,3})
 <ACTION_COMMENT>\n     ++linenum; ACTION_ECHO;
 <ACTION_COMMENT>.      ACTION_ECHO;
 
-<C_COMMENT_2>"*/"      ACTION_ECHO; BEGIN(SECT2);
-<C_COMMENT_2>"*/".*\n  ++linenum; ACTION_ECHO; BEGIN(SECT2);
-<C_COMMENT_2>[^*\n]+   ACTION_ECHO;
-<C_COMMENT_2>"*"       ACTION_ECHO;
-<C_COMMENT_2>\n                ++linenum; ACTION_ECHO;
-
 <ACTION_STRING>[^"\\\n]+       ACTION_ECHO;
 <ACTION_STRING>\\.     ACTION_ECHO;
 <ACTION_STRING>\n      ++linenum; ACTION_ECHO;
@@ -434,3 +491,40 @@ ESCSEQ             \\([^\n]|[0-9]{1,3})
 
 <SECT3>.*(\n?)         ECHO;
 %%
+
+
+int yywrap()
+
+    {
+    if ( --num_input_files > 0 )
+       {
+       set_input_file( *++input_files );
+       return ( 0 );
+       }
+
+    else
+       return ( 1 );
+    }
+
+
+/* set_input_file - open the given file (if NULL, stdin) for scanning */
+
+set_input_file( file )
+char *file;
+
+    {
+    if ( file )
+       {
+       infilename = file;
+       yyin = fopen( infilename, "r" );
+
+       if ( yyin == NULL )
+           lerrsf( "can't open %s", file );
+       }
+
+    else
+       {
+       yyin = stdin;
+       infilename = "<stdin>";
+       }
+    }