From: Vern Paxson Date: Wed, 14 Mar 1990 13:39:21 +0000 (+0000) Subject: Tweaks for NUL chars. X-Git-Tag: flex-2-5-5b~556 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f248f2d111793b35a158d9c52e5c9b892ce5a5bc;p=flex Tweaks for NUL chars. --- diff --git a/dfa.c b/dfa.c index f1be016..b8250ba 100644 --- a/dfa.c +++ b/dfa.c @@ -209,9 +209,9 @@ int state[]; { register int i, ec; - int out_char_set[CSIZE + 1]; + int out_char_set[CSIZE]; - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) { ec = abs( ecgroup[i] ); out_char_set[i] = state[ec]; @@ -222,7 +222,7 @@ int state[]; list_character_set( file, out_char_set ); /* now invert the members of the set to get the jam transitions */ - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) out_char_set[i] = ! out_char_set[i]; fprintf( file, "\n jam-transitions: EOF " ); @@ -406,14 +406,23 @@ ntod() { int *accset, ds, nacc, newds; - int duplist[CSIZE + 1], sym, hashval, numstates, dsize; - int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; + int sym, hashval, numstates, dsize; int *nset, *dset; int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; int num_start_states; int todo_head, todo_next; + /* note that the following are indexed by *equivalence classes* + * and not by characters. Since equivalence classes are indexed + * beginning with 1, even if the scanner accepts NUL's, this + * means that (since every character is potentially in its own + * equivalence class) these arrays must have room for indices + * from 1 to CSIZE, so their size must be CSIZE + 1. + */ + int duplist[CSIZE + 1], state[CSIZE + 1]; + int targfreq[CSIZE + 1], targstate[CSIZE + 1]; + /* this is so find_table_space(...) will know where to start looking in * chk/nxt for unused records for space to put in the state */ @@ -919,7 +928,8 @@ int symlist[]; if ( tch != SYM_EPSILON ) { if ( tch < -lastccl || tch > csize ) - flexfatal( "bad transition character detected in sympartition()" ); + flexfatal( + "bad transition character detected in sympartition()" ); if ( tch > 0 ) { /* character transition */ diff --git a/gen.c b/gen.c index fdc4feb..8bba86c 100644 --- a/gen.c +++ b/gen.c @@ -214,13 +214,14 @@ genecs() { register int i, j; - static char C_char_decl[] = "static const YY_CHAR %s[%d] =\n { 0,\n"; + static char C_char_decl[] = + "static const YY_CHAR %s[%d] =\n { %d,\n"; int numrows; Char clower(); - printf( C_char_decl, "yy_ec", csize + 1 ); + printf( C_char_decl, "yy_ec", csize, uses_NUL ? abs( ecgroup[0] ) : 0 ); - for ( i = 1; i <= csize; ++i ) + for ( i = 1; i < csize; ++i ) { if ( caseins && (i >= 'A') && (i <= 'Z') ) ecgroup[i] = ecgroup[clower( i )]; @@ -233,16 +234,22 @@ genecs() if ( trace ) { + char *readable_form(); + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + if ( uses_NUL ) + { + fprintf( stderr, "%4s = %-2d\n", + readable_form( 0 ), ecgroup[0] ); + } + numrows = (csize + 1) / 8; for ( j = 1; j <= numrows; ++j ) { - for ( i = j; i <= csize; i = i + numrows ) + for ( i = j; i < csize; i = i + numrows ) { - char *readable_form(); - fprintf( stderr, "%4s = %-2d", readable_form( i ), ecgroup[i] ); diff --git a/main.c b/main.c index 81842f8..95a1751 100644 --- a/main.c +++ b/main.c @@ -60,7 +60,7 @@ int current_state_type; int variable_trailing_context_rules; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; -int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int numecs, nextecm[CSIZE], ecgroup[CSIZE], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; int *xlation = (int *) 0; int num_xlations; @@ -79,7 +79,7 @@ Char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -int num_backtracking, bol_needed; +int num_backtracking, bol_needed, uses_NUL; FILE *temp_action_file; FILE *backtrack_file; int end_of_buffer_state; @@ -96,12 +96,6 @@ static char *outfile = "lexyy.c"; static int outfile_created = 0; -/* flex - main program - * - * synopsis (from the shell) - * flex [-v] [file ...] - */ - main( argc, argv ) int argc; char **argv; @@ -206,7 +200,7 @@ int status; (void) unlink( outfile ); } - if ( backtrack_report ) + if ( backtrack_report && backtrack_file ) { if ( num_backtracking == 0 ) fprintf( backtrack_file, "No backtracking.\n" ); @@ -550,7 +544,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; - variable_trailing_context_rules = bol_needed = false; + variable_trailing_context_rules = bol_needed = uses_NUL = false; linenum = sectnum = 1; firstprot = NIL; @@ -565,9 +559,9 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ if ( useecs ) { /* set up doubly-linked equivalence classes */ - ecgroup[1] = NIL; + ecgroup[0] = NIL; - for ( i = 2; i <= csize; ++i ) + for ( i = 1; i < csize; ++i ) { ecgroup[i] = i - 1; nextecm[i - 1] = i; @@ -578,7 +572,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ else { /* put everything in its own equivalence class */ - for ( i = 1; i <= csize; ++i ) + for ( i = 0; i < csize; ++i ) { ecgroup[i] = i; nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ @@ -627,7 +621,11 @@ readin() else if ( useecs ) { - numecs = cre8ecs( nextecm, ecgroup, csize ); + if ( uses_NUL ) + numecs = cre8ecs( nextecm, ecgroup, csize, 0 ); + else + numecs = cre8ecs( nextecm, ecgroup, csize - 1, 1 ); + ccl2ecl(); } diff --git a/misc.c b/misc.c index 119658a..e7d8398 100644 --- a/misc.c +++ b/misc.c @@ -610,12 +610,6 @@ Char array[]; array[sptr] = c; - if ( esc_char == '\0' ) - { - synerr( "escape sequence for null not allowed" ); - return ( 1 ); - } - return ( esc_char ); } diff --git a/parse.y b/parse.y index 07ce77f..12479a8 100644 --- a/parse.y +++ b/parse.y @@ -503,7 +503,7 @@ singleton : singleton '*' ++rulelen; if ( $1 == '\0' ) - synerr( "null in rule" ); + uses_NUL = true; if ( caseins && $1 >= 'A' && $1 <= 'Z' ) $1 = clower( $1 ); @@ -531,6 +531,9 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { + if ( $2 == '\0' || $4 == '\0' ) + uses_NUL = true; + if ( $2 > $4 ) synerr( "negative range in character class" ); @@ -559,6 +562,9 @@ ccl : ccl CHAR '-' CHAR | ccl CHAR { + if ( $2 == '\0' ) + uses_NUL = true; + if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); @@ -579,6 +585,9 @@ ccl : ccl CHAR '-' CHAR string : string CHAR { + if ( $2 == '\0' ) + uses_NUL = true; + if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); diff --git a/scan.l b/scan.l index dfc1d47..ee9a2be 100644 --- a/scan.l +++ b/scan.l @@ -135,15 +135,15 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) char *malloc(); ++linenum; - xlation = (int *) malloc( sizeof( int ) * (csize + 1) ); - - for ( i = 1; i <= csize; ++i ) - xlation[i] = 0; + xlation = (int *) malloc( sizeof( int ) * csize ); if ( ! xlation ) flexfatal( "dynamic memory failure building %t table" ); + for ( i = 0; i < csize; ++i ) + xlation[i] = 0; + num_xlations = 0; BEGIN(XLATION); diff --git a/tblcmp.c b/tblcmp.c index 840736d..ac822eb 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -233,7 +233,7 @@ cmptmps() * transitions */ - nummecs = cre8ecs( tecfwd, tecbck, numecs ); + nummecs = cre8ecs( tecfwd, tecbck, numecs, 1 ); } else