From: Vern Paxson Date: Fri, 19 May 1989 14:08:47 +0000 (+0000) Subject: changes for variable trailing context X-Git-Tag: flex-2-5-5b~615 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=52f3adc1d688e32a6299dd7f52a70d4bac455560;p=flex changes for variable trailing context --- diff --git a/nfa.c b/nfa.c index 641a182..90d7471 100644 --- a/nfa.c +++ b/nfa.c @@ -23,62 +23,15 @@ static char rcsid[] = * * synopsis * - * add_accept( mach, headcnt, trailcnt ); - * - * the global ACCNUM is incremented and the new value becomes mach's - * accepting number. if headcnt or trailcnt is non-zero then the machine - * recognizes a pattern with trailing context. headcnt is the number of - * characters in the matched part of the pattern, or zero if the matched - * part has variable length. trailcnt is the number of trailing context - * characters in the pattern, or zero if the trailing context has variable - * length. + * add_accept( mach, accepting_number ); + * + * accepting_number becomes mach's accepting number. */ -add_accept( mach, headcnt, trailcnt ) -int mach, headcnt, trailcnt; +add_accept( mach, accepting_number ) +int mach; { - int astate; - - fprintf( temp_action_file, "case %d:\n", ++accnum ); - - if ( headcnt > 0 || trailcnt > 0 ) - { /* do trailing context magic to not match the trailing characters */ - char *scanner_cp = - (fulltbl || fullspd) ? "yy_c_buf_p = yy_cp" : "yy_c_buf_p"; - char *scanner_bp = (fulltbl || fullspd) ? "yy_bp" : "yy_b_buf_p"; - - fprintf( temp_action_file, - "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" ); - - if ( headcnt > 0 ) - { - int head_offset = headcnt - 1; - - if ( fullspd || fulltbl ) - /* with the fast skeleton, the character pointer points - * to the *next* character to scan, rather than the one - * that was last scanned - */ - ++head_offset; - - if ( head_offset > 0 ) - fprintf( temp_action_file, "%s = %s + %d;\n", - scanner_cp, scanner_bp, head_offset ); - - else - fprintf( temp_action_file, "%s = %s;\n", - scanner_cp, scanner_bp ); - } - - else - fprintf( temp_action_file, "%s -= %d;\n", scanner_cp, trailcnt ); - - fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); - } - - line_directive_out( temp_action_file ); - /* hang the accepting number off an epsilon state. if it is associated * with a state that has a non-epsilon out-transition, then the state * will accept BEFORE it makes that transition, i.e., one character @@ -86,12 +39,12 @@ int mach, headcnt, trailcnt; */ if ( transchar[finalst[mach]] == SYM_EPSILON ) - accptnum[finalst[mach]] = accnum; + accptnum[finalst[mach]] = accepting_number; else { - astate = mkstate( SYM_EPSILON ); - accptnum[astate] = accnum; + int astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accepting_number; mach = link_machines( mach, astate ); } } @@ -215,6 +168,82 @@ int mach; return ( init ); } +/* finish_rule - finish up the processing for a rule + * + * synopsis + * + * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); + * + * An accepting number is added to the given machine. If variable_trail_rule + * is true then the rule has trailing context and both the head and trail + * are variable size. Otherwise if headcnt or trailcnt is non-zero then + * the machine recognizes a pattern with trailing context and headcnt is + * the number of characters in the matched part of the pattern, or zero + * if the matched part has variable length. trailcnt is the number of + * trailing context characters in the pattern, or zero if the trailing + * context has variable length. + */ + +finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) +int mach, variable_trail_rule, headcnt, trailcnt; + + { + add_accept( mach, num_rules ); + + /* we did this in new_rule(), but it often gets the wrong + * number because we do it before we start parsing the current rule + */ + rule_type[num_rules] = linenum; + + fprintf( temp_action_file, "case %d:\n", num_rules ); + + if ( variable_trail_rule ) + { + rule_type[num_rules] = RULE_VARIABLE; + + if ( performance_report ) + fprintf( stderr, "Variable trailing context rule at line %d\n", + rule_linenum[num_rules] ); + + variable_trailing_context_rules = true; + } + + else + { + rule_type[num_rules] = RULE_NORMAL; + + if ( headcnt > 0 || trailcnt > 0 ) + { + /* do trailing context magic to not match the trailing characters */ + char *scanner_cp = "yy_c_buf_p = yy_cp"; + char *scanner_bp = "yy_bp"; + + fprintf( temp_action_file, + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); + + if ( headcnt > 0 ) + { + if ( headcnt > 0 ) + fprintf( temp_action_file, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); + + else + fprintf( temp_action_file, "%s = %s;\n", + scanner_cp, scanner_bp ); + } + + else + fprintf( temp_action_file, + "%s -= %d;\n", scanner_cp, trailcnt ); + + fprintf( temp_action_file, + "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + } + } + + line_directive_out( temp_action_file ); + } + /* link_machines - connect two machines together * @@ -254,6 +283,49 @@ int first, last; } +/* mark_beginning_as_normal - mark each "beginning" state in a machine + * as being a "normal" (i.e., not trailing context- + * associated) states + * + * synopsis + * + * mark_beginning_as_normal( mach ) + * + * mach - machine to mark + * + * The "beginning" states are the epsilon closure of the first state + */ + +mark_beginning_as_normal( mach ) +register int mach; + + { + switch ( state_type[mach] ) + { + case STATE_NORMAL: + /* oh, we've already visited here */ + return; + + case STATE_TRAILING_CONTEXT: + state_type[mach] = STATE_NORMAL; + + if ( transchar[mach] == SYM_EPSILON ) + { + if ( trans1[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans1[mach] ); + + if ( trans2[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans2[mach] ); + } + break; + + default: + flexerror( "bad state type in mark_beginning_as_normal()" ); + break; + } + } + + /* mkbranch - make a machine that branches to two machines * * synopsis @@ -456,14 +528,15 @@ int mkrep( mach, lb, ub ) int mach, lb, ub; { - int base, tail, copy, i; + int base_mach, tail, copy, i; - base = copysingl( mach, lb - 1 ); + base_mach = copysingl( mach, lb - 1 ); if ( ub == INFINITY ) { copy = dupmachine( mach ); - mach = link_machines( mach, link_machines( base, mkclos( copy ) ) ); + mach = link_machines( mach, + link_machines( base_mach, mkclos( copy ) ) ); } else @@ -476,7 +549,7 @@ int mach, lb, ub; tail = mkopt( link_machines( copy, tail ) ); } - mach = link_machines( mach, link_machines( base, tail ) ); + mach = link_machines( mach, link_machines( base_mach, tail ) ); } return ( mach ); @@ -519,6 +592,7 @@ int sym; trans2 = reallocate_integer_array( trans2, current_mns ); accptnum = reallocate_integer_array( accptnum, current_mns ); assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); + state_type = reallocate_integer_array( state_type, current_mns ); } firstst[lastnfa] = lastnfa; @@ -528,7 +602,8 @@ int sym; trans1[lastnfa] = NO_TRANSITION; trans2[lastnfa] = NO_TRANSITION; accptnum[lastnfa] = NIL; - assoc_rule[lastnfa] = linenum; /* identify rules by line number in input */ + assoc_rule[lastnfa] = num_rules; + state_type[lastnfa] = current_state_type; /* fix up equivalence classes base on this transition. Note that any * character which has its own transition gets its own equivalence class. @@ -585,3 +660,31 @@ int statefrom, stateto; trans2[statefrom] = stateto; } } + +/* new_rule - initialize for a new rule + * + * synopsis + * + * new_rule(); + * + * the global num_rules is incremented and the any corresponding dynamic + * arrays (such as rule_type[]) are grown as needed. + */ + +new_rule() + + { + if ( ++num_rules >= current_max_rules ) + { + ++num_reallocs; + current_max_rules += MAX_RULES_INCREMENT; + rule_type = reallocate_integer_array( rule_type, current_max_rules ); + rule_linenum = + reallocate_integer_array( rule_linenum, current_max_rules ); + } + + if ( num_rules > MAX_RULE ) + lerrif( "too many rules (> %d)!", MAX_RULE ); + + rule_linenum[num_rules] = linenum; + } diff --git a/parse.y b/parse.y index d9cf7be..55cb3c5 100644 --- a/parse.y +++ b/parse.y @@ -24,7 +24,7 @@ static char rcsid[] = #endif int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; -int trlcontxt, xcluflg, cclsorted, varlength; +int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; char clower(); static int madeany = false; /* whether we've made the '.' character class */ @@ -32,7 +32,7 @@ static int madeany = false; /* whether we've made the '.' character class */ %} %% -goal : initlex sect1 sect1end sect2 +goal : initlex sect1 sect1end sect2 initforrule { /* add default rule */ int def_rule; @@ -41,13 +41,18 @@ goal : initlex sect1 sect1end sect2 def_rule = mkstate( -pat ); - add_accept( def_rule, 0, 0 ); + finish_rule( def_rule, variable_trail_rule, 0, 0 ); for ( i = 1; i <= lastsc; ++i ) scset[i] = mkbranch( scset[i], def_rule ); - fputs( "YY_DEFAULT_ACTION;\n\tYY_BREAK\n", - temp_action_file ); + if ( spprdflt ) + fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", + temp_action_file ); + else + fputs( "ECHO", temp_action_file ); + + fputs( ";\n\tYY_BREAK\n", temp_action_file ); } ; @@ -100,25 +105,38 @@ sect2 : sect2 initforrule flexrule '\n' initforrule : { /* initialize for a parse of one rule */ - trlcontxt = varlength = false; + trlcontxt = variable_trail_rule = varlength = false; trailcnt = headcnt = rulelen = 0; + current_state_type = STATE_NORMAL; + new_rule(); } ; flexrule : scon '^' re eol { pat = link_machines( $3, $4 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + fprintf( stderr, + "'^' operator results in sub-optimal performance\n" ); + } } | scon re eol { pat = link_machines( $2, $3 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) scset[actvsc[i]] = @@ -128,7 +146,8 @@ flexrule : scon '^' re eol | '^' re eol { pat = link_machines( $2, $3 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); /* add to all non-exclusive start conditions, * including the default (0) start condition @@ -137,12 +156,22 @@ flexrule : scon '^' re eol for ( i = 1; i <= lastsc; ++i ) if ( ! scxclu[i] ) scbol[i] = mkbranch( scbol[i], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + fprintf( stderr, + "'^' operator results in sub-optimal performance\n" ); + } } | re eol { pat = link_machines( $1, $2 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= lastsc; ++i ) if ( ! scxclu[i] ) @@ -207,8 +236,7 @@ eol : '$' { if ( varlength && headcnt == 0 ) /* both head and trail are variable-length */ - synerr( "illegal trailing context" ); - + variable_trail_rule = true; else trailcnt = rulelen; } @@ -223,7 +251,33 @@ re : re '|' series } | re2 series - { $$ = link_machines( $1, $2 ); } + { + if ( transchar[lastst[$2]] != SYM_EPSILON ) + /* provide final transition \now/ so it + * will be marked as a trailing context + * state + */ + $2 = link_machines( $2, mkstate( SYM_EPSILON ) ); + + mark_beginning_as_normal( $2 ); + current_state_type = STATE_NORMAL; + + if ( varlength && headcnt == 0 ) + { /* variable trailing context rule */ + /* mark the first part of the rule as the accepting + * "head" part of a trailing context rule + */ + /* by the way, we didn't do this at the beginning + * of this production because back then + * current_state_type was set up for a trail + * rule, and add_accept() can create a new + * state ... + */ + add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); + } + + $$ = link_machines( $1, $2 ); + } | series { $$ = $1; } @@ -243,12 +297,14 @@ re2 : re '/' trlcontxt = true; if ( varlength ) - /* the trailing context had better be fixed-length */ + /* we hope the trailing context is fixed-length */ varlength = false; else headcnt = rulelen; rulelen = 0; + + current_state_type = STATE_TRAILING_CONTEXT; $$ = $1; } ; @@ -290,13 +346,18 @@ singleton : singleton '*' { varlength = true; - if ( $3 > $5 || $3 <= 0 ) + if ( $3 > $5 || $3 < 0 ) { synerr( "bad iteration values" ); $$ = $1; } else - $$ = mkrep( $1, $3, $5 ); + { + if ( $3 == 0 ) + $$ = mkopt( mkrep( $1, $3, $5 ) ); + else + $$ = mkrep( $1, $3, $5 ); + } } | singleton '{' NUMBER ',' '}' @@ -491,7 +552,7 @@ char str[]; { syntaxerror = true; - fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); + fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); }