]> granicus.if.org Git - flex/commitdiff
changes for variable trailing context
authorVern Paxson <vern@ee.lbl.gov>
Fri, 19 May 1989 14:08:47 +0000 (14:08 +0000)
committerVern Paxson <vern@ee.lbl.gov>
Fri, 19 May 1989 14:08:47 +0000 (14:08 +0000)
nfa.c
parse.y

diff --git a/nfa.c b/nfa.c
index 641a182f8791055fe027c5e04e925e7f34a6af24..90d7471b8facf62e2e756169c347c12bb8096847 100644 (file)
--- a/nfa.c
+++ b/nfa.c
@@ -23,62 +23,15 @@ static char rcsid[] =
  *
  * synopsis
  *
- *   add_accept( mach, headcnt, trailcnt );
- *
- * the global ACCNUM is incremented and the new value becomes mach's
- * accepting number.  if headcnt or trailcnt is non-zero then the machine
- * recognizes a pattern with trailing context.  headcnt is the number of
- * characters in the matched part of the pattern, or zero if the matched
- * part has variable length.  trailcnt is the number of trailing context
- * characters in the pattern, or zero if the trailing context has variable
- * length.
+ *   add_accept( mach, accepting_number );
+ *
+ * accepting_number becomes mach's accepting number.
  */
 
-add_accept( mach, headcnt, trailcnt )
-int mach, headcnt, trailcnt;
+add_accept( mach, accepting_number )
+int mach;
 
     {
-    int astate;
-
-    fprintf( temp_action_file, "case %d:\n", ++accnum );
-
-    if ( headcnt > 0 || trailcnt > 0 )
-       { /* do trailing context magic to not match the trailing characters */
-       char *scanner_cp =
-           (fulltbl || fullspd) ? "yy_c_buf_p = yy_cp" : "yy_c_buf_p";
-       char *scanner_bp = (fulltbl || fullspd) ? "yy_bp" : "yy_b_buf_p";
-
-       fprintf( temp_action_file,
-           "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" );
-
-       if ( headcnt > 0 )
-           {
-           int head_offset = headcnt - 1;
-
-           if ( fullspd || fulltbl )
-               /* with the fast skeleton, the character pointer points
-                * to the *next* character to scan, rather than the one
-                * that was last scanned
-                */
-               ++head_offset;
-
-           if ( head_offset > 0 )
-               fprintf( temp_action_file, "%s = %s + %d;\n",
-                        scanner_cp, scanner_bp, head_offset );
-
-           else
-               fprintf( temp_action_file, "%s = %s;\n",
-                        scanner_cp, scanner_bp );
-           }
-
-       else
-           fprintf( temp_action_file, "%s -= %d;\n", scanner_cp, trailcnt );
-    
-       fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
-       }
-
-    line_directive_out( temp_action_file );
-
     /* hang the accepting number off an epsilon state.  if it is associated
      * with a state that has a non-epsilon out-transition, then the state
      * will accept BEFORE it makes that transition, i.e., one character
@@ -86,12 +39,12 @@ int mach, headcnt, trailcnt;
      */
 
     if ( transchar[finalst[mach]] == SYM_EPSILON )
-       accptnum[finalst[mach]] = accnum;
+       accptnum[finalst[mach]] = accepting_number;
 
     else
        {
-       astate = mkstate( SYM_EPSILON );
-       accptnum[astate] = accnum;
+       int astate = mkstate( SYM_EPSILON );
+       accptnum[astate] = accepting_number;
        mach = link_machines( mach, astate );
        }
     }
@@ -215,6 +168,82 @@ int mach;
     return ( init );
     }
 
+/* finish_rule - finish up the processing for a rule
+ *
+ * synopsis
+ *
+ *   finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
+ *
+ * An accepting number is added to the given machine.  If variable_trail_rule
+ * is true then the rule has trailing context and both the head and trail
+ * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
+ * the machine recognizes a pattern with trailing context and headcnt is
+ * the number of characters in the matched part of the pattern, or zero
+ * if the matched part has variable length.  trailcnt is the number of
+ * trailing context characters in the pattern, or zero if the trailing
+ * context has variable length.
+ */
+
+finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
+int mach, variable_trail_rule, headcnt, trailcnt;
+
+    {
+    add_accept( mach, num_rules );
+
+    /* we did this in new_rule(), but it often gets the wrong
+     * number because we do it before we start parsing the current rule
+     */
+    rule_type[num_rules] = linenum;
+
+    fprintf( temp_action_file, "case %d:\n", num_rules );
+
+    if ( variable_trail_rule )
+       {
+       rule_type[num_rules] = RULE_VARIABLE;
+
+       if ( performance_report )
+           fprintf( stderr, "Variable trailing context rule at line %d\n",
+                    rule_linenum[num_rules] );
+
+       variable_trailing_context_rules = true;
+       }
+
+    else
+       {
+       rule_type[num_rules] = RULE_NORMAL;
+
+       if ( headcnt > 0 || trailcnt > 0 )
+           {
+           /* do trailing context magic to not match the trailing characters */
+           char *scanner_cp = "yy_c_buf_p = yy_cp";
+           char *scanner_bp = "yy_bp";
+
+           fprintf( temp_action_file,
+       "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
+
+           if ( headcnt > 0 )
+               {
+               if ( headcnt > 0 )
+                   fprintf( temp_action_file, "%s = %s + %d;\n",
+                            scanner_cp, scanner_bp, headcnt );
+
+               else
+                   fprintf( temp_action_file, "%s = %s;\n",
+                            scanner_cp, scanner_bp );
+               }
+
+           else
+               fprintf( temp_action_file,
+                        "%s -= %d;\n", scanner_cp, trailcnt );
+       
+           fprintf( temp_action_file,
+                    "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
+           }
+       }
+
+    line_directive_out( temp_action_file );
+    }
+
 
 /* link_machines - connect two machines together
  *
@@ -254,6 +283,49 @@ int first, last;
     }
 
 
+/* mark_beginning_as_normal - mark each "beginning" state in a machine
+ *                            as being a "normal" (i.e., not trailing context-
+ *                            associated) states
+ *
+ * synopsis
+ *
+ *   mark_beginning_as_normal( mach )
+ *
+ *     mach - machine to mark
+ *
+ * The "beginning" states are the epsilon closure of the first state
+ */
+
+mark_beginning_as_normal( mach )
+register int mach;
+
+    {
+    switch ( state_type[mach] )
+       {
+       case STATE_NORMAL:
+           /* oh, we've already visited here */
+           return;
+
+       case STATE_TRAILING_CONTEXT:
+           state_type[mach] = STATE_NORMAL;
+
+           if ( transchar[mach] == SYM_EPSILON )
+               {
+               if ( trans1[mach] != NO_TRANSITION )
+                   mark_beginning_as_normal( trans1[mach] );
+
+               if ( trans2[mach] != NO_TRANSITION )
+                   mark_beginning_as_normal( trans2[mach] );
+               }
+           break;
+
+       default:
+           flexerror( "bad state type in mark_beginning_as_normal()" );
+           break;
+       }
+    }
+
+
 /* mkbranch - make a machine that branches to two machines
  *
  * synopsis
@@ -456,14 +528,15 @@ int mkrep( mach, lb, ub )
 int mach, lb, ub;
 
     {
-    int base, tail, copy, i;
+    int base_mach, tail, copy, i;
 
-    base = copysingl( mach, lb - 1 );
+    base_mach = copysingl( mach, lb - 1 );
 
     if ( ub == INFINITY )
        {
        copy = dupmachine( mach );
-       mach = link_machines( mach, link_machines( base, mkclos( copy ) ) );
+       mach = link_machines( mach,
+                             link_machines( base_mach, mkclos( copy ) ) );
        }
 
     else
@@ -476,7 +549,7 @@ int mach, lb, ub;
            tail = mkopt( link_machines( copy, tail ) );
            }
 
-       mach = link_machines( mach, link_machines( base, tail ) );
+       mach = link_machines( mach, link_machines( base_mach, tail ) );
        }
 
     return ( mach );
@@ -519,6 +592,7 @@ int sym;
        trans2 = reallocate_integer_array( trans2, current_mns );
        accptnum = reallocate_integer_array( accptnum, current_mns );
        assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
+       state_type = reallocate_integer_array( state_type, current_mns );
        }
 
     firstst[lastnfa] = lastnfa;
@@ -528,7 +602,8 @@ int sym;
     trans1[lastnfa] = NO_TRANSITION;
     trans2[lastnfa] = NO_TRANSITION;
     accptnum[lastnfa] = NIL;
-    assoc_rule[lastnfa] = linenum; /* identify rules by line number in input */
+    assoc_rule[lastnfa] = num_rules;
+    state_type[lastnfa] = current_state_type;
 
     /* fix up equivalence classes base on this transition.  Note that any
      * character which has its own transition gets its own equivalence class.
@@ -585,3 +660,31 @@ int statefrom, stateto;
        trans2[statefrom] = stateto;
        }
     }
+
+/* new_rule - initialize for a new rule
+ *
+ * synopsis
+ *
+ *   new_rule();
+ *
+ * the global num_rules is incremented and the any corresponding dynamic
+ * arrays (such as rule_type[]) are grown as needed.
+ */
+
+new_rule()
+
+    {
+    if ( ++num_rules >= current_max_rules )
+       {
+       ++num_reallocs;
+       current_max_rules += MAX_RULES_INCREMENT;
+       rule_type = reallocate_integer_array( rule_type, current_max_rules );
+       rule_linenum =
+           reallocate_integer_array( rule_linenum, current_max_rules );
+       }
+
+    if ( num_rules > MAX_RULE )
+       lerrif( "too many rules (> %d)!", MAX_RULE );
+
+    rule_linenum[num_rules] = linenum;
+    }
diff --git a/parse.y b/parse.y
index d9cf7beca26cc5e72c030a25928a487f4ff13a9f..55cb3c5c6ee0ba2ff468b7761dee52b2d1320d3b 100644 (file)
--- a/parse.y
+++ b/parse.y
@@ -24,7 +24,7 @@ static char rcsid[] =
 #endif
 
 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
-int trlcontxt, xcluflg, cclsorted, varlength;
+int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
 char clower();
 
 static int madeany = false;  /* whether we've made the '.' character class */
@@ -32,7 +32,7 @@ static int madeany = false;  /* whether we've made the '.' character class */
 %}
 
 %%
-goal            :  initlex sect1 sect1end sect2
+goal            :  initlex sect1 sect1end sect2 initforrule
                        { /* add default rule */
                        int def_rule;
 
@@ -41,13 +41,18 @@ goal            :  initlex sect1 sect1end sect2
 
                        def_rule = mkstate( -pat );
 
-                       add_accept( def_rule, 0, 0 );
+                       finish_rule( def_rule, variable_trail_rule, 0, 0 );
 
                        for ( i = 1; i <= lastsc; ++i )
                            scset[i] = mkbranch( scset[i], def_rule );
 
-                       fputs( "YY_DEFAULT_ACTION;\n\tYY_BREAK\n",
-                              temp_action_file );
+                       if ( spprdflt )
+                           fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
+                                  temp_action_file );
+                       else
+                           fputs( "ECHO", temp_action_file );
+
+                       fputs( ";\n\tYY_BREAK\n", temp_action_file );
                        }
                ;
 
@@ -100,25 +105,38 @@ sect2           :  sect2 initforrule flexrule '\n'
 initforrule     :
                        {
                        /* initialize for a parse of one rule */
-                       trlcontxt = varlength = false;
+                       trlcontxt = variable_trail_rule = varlength = false;
                        trailcnt = headcnt = rulelen = 0;
+                       current_state_type = STATE_NORMAL;
+                       new_rule();
                        }
                ;
 
 flexrule        :  scon '^' re eol 
                         {
                        pat = link_machines( $3, $4 );
-                       add_accept( pat, headcnt, trailcnt );
+                       finish_rule( pat, variable_trail_rule,
+                                    headcnt, trailcnt );
 
                        for ( i = 1; i <= actvp; ++i )
                            scbol[actvsc[i]] =
                                mkbranch( scbol[actvsc[i]], pat );
+
+                       if ( ! bol_needed )
+                           {
+                           bol_needed = true;
+
+                           if ( performance_report )
+                               fprintf( stderr,
+                       "'^' operator results in sub-optimal performance\n" );
+                           }
                        }
 
                |  scon re eol 
                         {
                        pat = link_machines( $2, $3 );
-                       add_accept( pat, headcnt, trailcnt );
+                       finish_rule( pat, variable_trail_rule,
+                                    headcnt, trailcnt );
 
                        for ( i = 1; i <= actvp; ++i )
                            scset[actvsc[i]] = 
@@ -128,7 +146,8 @@ flexrule        :  scon '^' re eol
                 |  '^' re eol 
                        {
                        pat = link_machines( $2, $3 );
-                       add_accept( pat, headcnt, trailcnt );
+                       finish_rule( pat, variable_trail_rule,
+                                    headcnt, trailcnt );
 
                        /* add to all non-exclusive start conditions,
                         * including the default (0) start condition
@@ -137,12 +156,22 @@ flexrule        :  scon '^' re eol
                        for ( i = 1; i <= lastsc; ++i )
                            if ( ! scxclu[i] )
                                scbol[i] = mkbranch( scbol[i], pat );
+
+                       if ( ! bol_needed )
+                           {
+                           bol_needed = true;
+
+                           if ( performance_report )
+                               fprintf( stderr,
+                       "'^' operator results in sub-optimal performance\n" );
+                           }
                        }
 
                 |  re eol 
                        {
                        pat = link_machines( $1, $2 );
-                       add_accept( pat, headcnt, trailcnt );
+                       finish_rule( pat, variable_trail_rule,
+                                    headcnt, trailcnt );
 
                        for ( i = 1; i <= lastsc; ++i )
                            if ( ! scxclu[i] )
@@ -207,8 +236,7 @@ eol             :  '$'
                            {
                            if ( varlength && headcnt == 0 )
                                /* both head and trail are variable-length */
-                               synerr( "illegal trailing context" );
-
+                               variable_trail_rule = true;
                            else
                                trailcnt = rulelen;
                            }
@@ -223,7 +251,33 @@ re              :  re '|' series
                        }
 
                |  re2 series
-                       { $$ = link_machines( $1, $2 ); }
+                       {
+                       if ( transchar[lastst[$2]] != SYM_EPSILON )
+                           /* provide final transition \now/ so it
+                            * will be marked as a trailing context
+                            * state
+                            */
+                           $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
+
+                       mark_beginning_as_normal( $2 );
+                       current_state_type = STATE_NORMAL;
+
+                       if ( varlength && headcnt == 0 )
+                           { /* variable trailing context rule */
+                           /* mark the first part of the rule as the accepting
+                            * "head" part of a trailing context rule
+                            */
+                           /* by the way, we didn't do this at the beginning
+                            * of this production because back then
+                            * current_state_type was set up for a trail
+                            * rule, and add_accept() can create a new
+                            * state ...
+                            */
+                           add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
+                           }
+
+                       $$ = link_machines( $1, $2 );
+                       }
 
                |  series
                        { $$ = $1; }
@@ -243,12 +297,14 @@ re2               :  re '/'
                            trlcontxt = true;
 
                        if ( varlength )
-                           /* the trailing context had better be fixed-length */
+                           /* we hope the trailing context is fixed-length */
                            varlength = false;
                        else
                            headcnt = rulelen;
 
                        rulelen = 0;
+
+                       current_state_type = STATE_TRAILING_CONTEXT;
                        $$ = $1;
                        }
                ;
@@ -290,13 +346,18 @@ singleton       :  singleton '*'
                        {
                        varlength = true;
 
-                       if ( $3 > $5 || $3 <= 0 )
+                       if ( $3 > $5 || $3 < 0 )
                            {
                            synerr( "bad iteration values" );
                            $$ = $1;
                            }
                        else
-                           $$ = mkrep( $1, $3, $5 );
+                           {
+                           if ( $3 == 0 )
+                               $$ = mkopt( mkrep( $1, $3, $5 ) );
+                           else
+                               $$ = mkrep( $1, $3, $5 );
+                           }
                        }
                                
                |  singleton '{' NUMBER ',' '}'
@@ -491,7 +552,7 @@ char str[];
 
     {
     syntaxerror = true;
-    fprintf( stderr, "Syntax error at line %d:  %s\n", linenum, str );
+    fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str );
     }