*
* synopsis
*
- * add_accept( mach, headcnt, trailcnt );
- *
- * the global ACCNUM is incremented and the new value becomes mach's
- * accepting number. if headcnt or trailcnt is non-zero then the machine
- * recognizes a pattern with trailing context. headcnt is the number of
- * characters in the matched part of the pattern, or zero if the matched
- * part has variable length. trailcnt is the number of trailing context
- * characters in the pattern, or zero if the trailing context has variable
- * length.
+ * add_accept( mach, accepting_number );
+ *
+ * accepting_number becomes mach's accepting number.
*/
-add_accept( mach, headcnt, trailcnt )
-int mach, headcnt, trailcnt;
+add_accept( mach, accepting_number )
+int mach;
{
- int astate;
-
- fprintf( temp_action_file, "case %d:\n", ++accnum );
-
- if ( headcnt > 0 || trailcnt > 0 )
- { /* do trailing context magic to not match the trailing characters */
- char *scanner_cp =
- (fulltbl || fullspd) ? "yy_c_buf_p = yy_cp" : "yy_c_buf_p";
- char *scanner_bp = (fulltbl || fullspd) ? "yy_bp" : "yy_b_buf_p";
-
- fprintf( temp_action_file,
- "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" );
-
- if ( headcnt > 0 )
- {
- int head_offset = headcnt - 1;
-
- if ( fullspd || fulltbl )
- /* with the fast skeleton, the character pointer points
- * to the *next* character to scan, rather than the one
- * that was last scanned
- */
- ++head_offset;
-
- if ( head_offset > 0 )
- fprintf( temp_action_file, "%s = %s + %d;\n",
- scanner_cp, scanner_bp, head_offset );
-
- else
- fprintf( temp_action_file, "%s = %s;\n",
- scanner_cp, scanner_bp );
- }
-
- else
- fprintf( temp_action_file, "%s -= %d;\n", scanner_cp, trailcnt );
-
- fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
- }
-
- line_directive_out( temp_action_file );
-
/* hang the accepting number off an epsilon state. if it is associated
* with a state that has a non-epsilon out-transition, then the state
* will accept BEFORE it makes that transition, i.e., one character
*/
if ( transchar[finalst[mach]] == SYM_EPSILON )
- accptnum[finalst[mach]] = accnum;
+ accptnum[finalst[mach]] = accepting_number;
else
{
- astate = mkstate( SYM_EPSILON );
- accptnum[astate] = accnum;
+ int astate = mkstate( SYM_EPSILON );
+ accptnum[astate] = accepting_number;
mach = link_machines( mach, astate );
}
}
return ( init );
}
+/* finish_rule - finish up the processing for a rule
+ *
+ * synopsis
+ *
+ * finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
+ *
+ * An accepting number is added to the given machine. If variable_trail_rule
+ * is true then the rule has trailing context and both the head and trail
+ * are variable size. Otherwise if headcnt or trailcnt is non-zero then
+ * the machine recognizes a pattern with trailing context and headcnt is
+ * the number of characters in the matched part of the pattern, or zero
+ * if the matched part has variable length. trailcnt is the number of
+ * trailing context characters in the pattern, or zero if the trailing
+ * context has variable length.
+ */
+
+finish_rule( mach, variable_trail_rule, headcnt, trailcnt )
+int mach, variable_trail_rule, headcnt, trailcnt;
+
+ {
+ add_accept( mach, num_rules );
+
+ /* we did this in new_rule(), but it often gets the wrong
+ * number because we do it before we start parsing the current rule
+ */
+ rule_type[num_rules] = linenum;
+
+ fprintf( temp_action_file, "case %d:\n", num_rules );
+
+ if ( variable_trail_rule )
+ {
+ rule_type[num_rules] = RULE_VARIABLE;
+
+ if ( performance_report )
+ fprintf( stderr, "Variable trailing context rule at line %d\n",
+ rule_linenum[num_rules] );
+
+ variable_trailing_context_rules = true;
+ }
+
+ else
+ {
+ rule_type[num_rules] = RULE_NORMAL;
+
+ if ( headcnt > 0 || trailcnt > 0 )
+ {
+ /* do trailing context magic to not match the trailing characters */
+ char *scanner_cp = "yy_c_buf_p = yy_cp";
+ char *scanner_bp = "yy_bp";
+
+ fprintf( temp_action_file,
+ "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
+
+ if ( headcnt > 0 )
+ {
+ if ( headcnt > 0 )
+ fprintf( temp_action_file, "%s = %s + %d;\n",
+ scanner_cp, scanner_bp, headcnt );
+
+ else
+ fprintf( temp_action_file, "%s = %s;\n",
+ scanner_cp, scanner_bp );
+ }
+
+ else
+ fprintf( temp_action_file,
+ "%s -= %d;\n", scanner_cp, trailcnt );
+
+ fprintf( temp_action_file,
+ "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
+ }
+ }
+
+ line_directive_out( temp_action_file );
+ }
+
/* link_machines - connect two machines together
*
}
+/* mark_beginning_as_normal - mark each "beginning" state in a machine
+ * as being a "normal" (i.e., not trailing context-
+ * associated) states
+ *
+ * synopsis
+ *
+ * mark_beginning_as_normal( mach )
+ *
+ * mach - machine to mark
+ *
+ * The "beginning" states are the epsilon closure of the first state
+ */
+
+mark_beginning_as_normal( mach )
+register int mach;
+
+ {
+ switch ( state_type[mach] )
+ {
+ case STATE_NORMAL:
+ /* oh, we've already visited here */
+ return;
+
+ case STATE_TRAILING_CONTEXT:
+ state_type[mach] = STATE_NORMAL;
+
+ if ( transchar[mach] == SYM_EPSILON )
+ {
+ if ( trans1[mach] != NO_TRANSITION )
+ mark_beginning_as_normal( trans1[mach] );
+
+ if ( trans2[mach] != NO_TRANSITION )
+ mark_beginning_as_normal( trans2[mach] );
+ }
+ break;
+
+ default:
+ flexerror( "bad state type in mark_beginning_as_normal()" );
+ break;
+ }
+ }
+
+
/* mkbranch - make a machine that branches to two machines
*
* synopsis
int mach, lb, ub;
{
- int base, tail, copy, i;
+ int base_mach, tail, copy, i;
- base = copysingl( mach, lb - 1 );
+ base_mach = copysingl( mach, lb - 1 );
if ( ub == INFINITY )
{
copy = dupmachine( mach );
- mach = link_machines( mach, link_machines( base, mkclos( copy ) ) );
+ mach = link_machines( mach,
+ link_machines( base_mach, mkclos( copy ) ) );
}
else
tail = mkopt( link_machines( copy, tail ) );
}
- mach = link_machines( mach, link_machines( base, tail ) );
+ mach = link_machines( mach, link_machines( base_mach, tail ) );
}
return ( mach );
trans2 = reallocate_integer_array( trans2, current_mns );
accptnum = reallocate_integer_array( accptnum, current_mns );
assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
+ state_type = reallocate_integer_array( state_type, current_mns );
}
firstst[lastnfa] = lastnfa;
trans1[lastnfa] = NO_TRANSITION;
trans2[lastnfa] = NO_TRANSITION;
accptnum[lastnfa] = NIL;
- assoc_rule[lastnfa] = linenum; /* identify rules by line number in input */
+ assoc_rule[lastnfa] = num_rules;
+ state_type[lastnfa] = current_state_type;
/* fix up equivalence classes base on this transition. Note that any
* character which has its own transition gets its own equivalence class.
trans2[statefrom] = stateto;
}
}
+
+/* new_rule - initialize for a new rule
+ *
+ * synopsis
+ *
+ * new_rule();
+ *
+ * the global num_rules is incremented and the any corresponding dynamic
+ * arrays (such as rule_type[]) are grown as needed.
+ */
+
+new_rule()
+
+ {
+ if ( ++num_rules >= current_max_rules )
+ {
+ ++num_reallocs;
+ current_max_rules += MAX_RULES_INCREMENT;
+ rule_type = reallocate_integer_array( rule_type, current_max_rules );
+ rule_linenum =
+ reallocate_integer_array( rule_linenum, current_max_rules );
+ }
+
+ if ( num_rules > MAX_RULE )
+ lerrif( "too many rules (> %d)!", MAX_RULE );
+
+ rule_linenum[num_rules] = linenum;
+ }
#endif
int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
-int trlcontxt, xcluflg, cclsorted, varlength;
+int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
char clower();
static int madeany = false; /* whether we've made the '.' character class */
%}
%%
-goal : initlex sect1 sect1end sect2
+goal : initlex sect1 sect1end sect2 initforrule
{ /* add default rule */
int def_rule;
def_rule = mkstate( -pat );
- add_accept( def_rule, 0, 0 );
+ finish_rule( def_rule, variable_trail_rule, 0, 0 );
for ( i = 1; i <= lastsc; ++i )
scset[i] = mkbranch( scset[i], def_rule );
- fputs( "YY_DEFAULT_ACTION;\n\tYY_BREAK\n",
- temp_action_file );
+ if ( spprdflt )
+ fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
+ temp_action_file );
+ else
+ fputs( "ECHO", temp_action_file );
+
+ fputs( ";\n\tYY_BREAK\n", temp_action_file );
}
;
initforrule :
{
/* initialize for a parse of one rule */
- trlcontxt = varlength = false;
+ trlcontxt = variable_trail_rule = varlength = false;
trailcnt = headcnt = rulelen = 0;
+ current_state_type = STATE_NORMAL;
+ new_rule();
}
;
flexrule : scon '^' re eol
{
pat = link_machines( $3, $4 );
- add_accept( pat, headcnt, trailcnt );
+ finish_rule( pat, variable_trail_rule,
+ headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scbol[actvsc[i]] =
mkbranch( scbol[actvsc[i]], pat );
+
+ if ( ! bol_needed )
+ {
+ bol_needed = true;
+
+ if ( performance_report )
+ fprintf( stderr,
+ "'^' operator results in sub-optimal performance\n" );
+ }
}
| scon re eol
{
pat = link_machines( $2, $3 );
- add_accept( pat, headcnt, trailcnt );
+ finish_rule( pat, variable_trail_rule,
+ headcnt, trailcnt );
for ( i = 1; i <= actvp; ++i )
scset[actvsc[i]] =
| '^' re eol
{
pat = link_machines( $2, $3 );
- add_accept( pat, headcnt, trailcnt );
+ finish_rule( pat, variable_trail_rule,
+ headcnt, trailcnt );
/* add to all non-exclusive start conditions,
* including the default (0) start condition
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scbol[i] = mkbranch( scbol[i], pat );
+
+ if ( ! bol_needed )
+ {
+ bol_needed = true;
+
+ if ( performance_report )
+ fprintf( stderr,
+ "'^' operator results in sub-optimal performance\n" );
+ }
}
| re eol
{
pat = link_machines( $1, $2 );
- add_accept( pat, headcnt, trailcnt );
+ finish_rule( pat, variable_trail_rule,
+ headcnt, trailcnt );
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
{
if ( varlength && headcnt == 0 )
/* both head and trail are variable-length */
- synerr( "illegal trailing context" );
-
+ variable_trail_rule = true;
else
trailcnt = rulelen;
}
}
| re2 series
- { $$ = link_machines( $1, $2 ); }
+ {
+ if ( transchar[lastst[$2]] != SYM_EPSILON )
+ /* provide final transition \now/ so it
+ * will be marked as a trailing context
+ * state
+ */
+ $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
+
+ mark_beginning_as_normal( $2 );
+ current_state_type = STATE_NORMAL;
+
+ if ( varlength && headcnt == 0 )
+ { /* variable trailing context rule */
+ /* mark the first part of the rule as the accepting
+ * "head" part of a trailing context rule
+ */
+ /* by the way, we didn't do this at the beginning
+ * of this production because back then
+ * current_state_type was set up for a trail
+ * rule, and add_accept() can create a new
+ * state ...
+ */
+ add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
+ }
+
+ $$ = link_machines( $1, $2 );
+ }
| series
{ $$ = $1; }
trlcontxt = true;
if ( varlength )
- /* the trailing context had better be fixed-length */
+ /* we hope the trailing context is fixed-length */
varlength = false;
else
headcnt = rulelen;
rulelen = 0;
+
+ current_state_type = STATE_TRAILING_CONTEXT;
$$ = $1;
}
;
{
varlength = true;
- if ( $3 > $5 || $3 <= 0 )
+ if ( $3 > $5 || $3 < 0 )
{
synerr( "bad iteration values" );
$$ = $1;
}
else
- $$ = mkrep( $1, $3, $5 );
+ {
+ if ( $3 == 0 )
+ $$ = mkopt( mkrep( $1, $3, $5 ) );
+ else
+ $$ = mkrep( $1, $3, $5 );
+ }
}
| singleton '{' NUMBER ',' '}'
{
syntaxerror = true;
- fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str );
+ fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str );
}