From: Peter Johnson Date: Wed, 12 Apr 2006 04:06:44 +0000 (-0000) Subject: Correctly handle input characters >127 by using unsigned char in the re2c X-Git-Tag: v0.5.0~17^2~20 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e542de015a60678a22119449cf76ab96f6c0cb4c;p=yasm Correctly handle input characters >127 by using unsigned char in the re2c tokenizers. Signed chars >127 are negative, and thus aren't caught by the [\000-\377] range. * gas-parser.h (YYCTYPE): Change to unsigned char. * gas-bison.y, gas-token.re: Cast as necessary to char. * nasm-parser.h, nasm-bison.y, nasm-token.re: Likewise. * lc3bid.re: Likewise. svn path=/trunk/yasm/; revision=1481 --- diff --git a/modules/arch/lc3b/lc3bid.re b/modules/arch/lc3b/lc3bid.re index 9b0dcee4..f4f16fea 100644 --- a/modules/arch/lc3b/lc3bid.re +++ b/modules/arch/lc3b/lc3bid.re @@ -313,7 +313,7 @@ yasm_lc3b__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, } -#define YYCTYPE char +#define YYCTYPE unsigned char #define YYCURSOR id #define YYLIMIT id #define YYMARKER marker @@ -327,10 +327,10 @@ yasm_lc3b__parse_cpu(yasm_arch *arch, const char *cpuid, size_t cpuid_len, yasm_arch_regtmod yasm_lc3b__parse_check_regtmod(yasm_arch *arch, unsigned long *data, - const char *id, size_t id_len, + const char *oid, size_t id_len, unsigned long line) { - const char *oid = id; + const YYCTYPE *id = (const YYCTYPE *)oid; /*const char *marker;*/ /*!re2c /* integer registers */ @@ -351,10 +351,10 @@ yasm_lc3b__parse_check_regtmod(yasm_arch *arch, unsigned long *data, yasm_arch_insnprefix yasm_lc3b__parse_check_insnprefix(yasm_arch *arch, unsigned long data[4], - const char *id, size_t id_len, + const char *oid, size_t id_len, unsigned long line) { - /*const char *oid = id;*/ + const YYCTYPE *id = (const YYCTYPE *)oid; /*const char *marker;*/ /*!re2c /* instructions */ diff --git a/modules/parsers/gas/gas-bison.y b/modules/parsers/gas/gas-bison.y index a77c4c52..4498d57b 100644 --- a/modules/parsers/gas/gas-bison.y +++ b/modules/parsers/gas/gas-bison.y @@ -144,7 +144,7 @@ input: /* empty */ if (parser_gas->save_input) yasm_linemap_add_source(parser_gas->linemap, parser_gas->prev_bc, - parser_gas->save_line[parser_gas->save_last ^ 1]); + (char *)parser_gas->save_line[parser_gas->save_last ^ 1]); yasm_linemap_goto_next(parser_gas->linemap); } ; diff --git a/modules/parsers/gas/gas-parser.h b/modules/parsers/gas/gas-parser.h index e0f712e9..2d3e0a2b 100644 --- a/modules/parsers/gas/gas-parser.h +++ b/modules/parsers/gas/gas-parser.h @@ -32,7 +32,7 @@ #include "gas-bison.h" -#define YYCTYPE char +#define YYCTYPE unsigned char typedef struct Scanner { YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; unsigned int tchar, tline, cline; @@ -55,7 +55,7 @@ typedef struct gas_rept { size_t linepos; /* position to start pulling chars from line */ int ended; /* seen endr directive yet? */ - char *oldbuf; /* saved previous fill buffer */ + YYCTYPE *oldbuf; /* saved previous fill buffer */ size_t oldbuflen; /* previous fill buffer length */ size_t oldbufpos; /* position in previous fill buffer */ } gas_rept; diff --git a/modules/parsers/gas/gas-token.re b/modules/parsers/gas/gas-token.re index 71f95585..35c6f86d 100644 --- a/modules/parsers/gas/gas-token.re +++ b/modules/parsers/gas/gas-token.re @@ -52,14 +52,16 @@ RCSID("$Id$"); s->tok = cursor; \ } +#define TOK ((char *)s->tok) #define TOKLEN (size_t)(cursor-s->tok) static size_t -rept_input(yasm_parser_gas *parser_gas, /*@out@*/ char *buf, size_t max_size) +rept_input(yasm_parser_gas *parser_gas, /*@out@*/ YYCTYPE *buf, + size_t max_size) { gas_rept *rept = parser_gas->rept; size_t numleft = max_size; - char *bufp = buf; + YYCTYPE *bufp = buf; /* If numrept is 0, copy out just the line end characters */ if (rept->numrept == 0) { @@ -147,7 +149,7 @@ fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor) if (!s->bot) first = 1; if((s->top - s->lim) < BSIZE){ - char *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); + YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); memcpy(buf, s->tok, (size_t)(s->lim - s->tok)); s->tok = buf; s->ptr = &buf[s->ptr - s->bot]; @@ -162,14 +164,14 @@ fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor) if (parser_gas->rept && parser_gas->rept->ended) { /* Pull from rept lines instead of preproc */ cnt = rept_input(parser_gas, s->lim, BSIZE); - } else if((cnt = yasm_preproc_input(parser_gas->preproc, s->lim, - BSIZE)) == 0) { + } else if((cnt = yasm_preproc_input(parser_gas->preproc, + (char *)s->lim, BSIZE)) == 0) { s->eof = &s->lim[cnt]; *s->eof++ = '\n'; } s->lim += cnt; if (first && parser_gas->save_input) { int i; - char *saveline; + YYCTYPE *saveline; parser_gas->save_last ^= 1; saveline = parser_gas->save_line[parser_gas->save_last]; /* save next line into cur_line */ @@ -186,7 +188,7 @@ save_line(yasm_parser_gas *parser_gas, YYCTYPE *cursor) { Scanner *s = &parser_gas->s; int i = 0; - char *saveline; + YYCTYPE *saveline; parser_gas->save_last ^= 1; saveline = parser_gas->save_line[parser_gas->save_last]; @@ -211,7 +213,7 @@ gas_parser_cleanup(yasm_parser_gas *parser_gas) #define STRBUF_ALLOC_SIZE 128 /* string buffer used when parsing strings/character constants */ -static char *strbuf = (char *)NULL; +static YYCTYPE *strbuf = NULL; /* length of strbuf (including terminating NULL character) */ static size_t strbuf_size = 0; @@ -277,7 +279,7 @@ scan: ([1-9] digit*) | "0" { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_dec(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_dec(TOK, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -286,7 +288,7 @@ scan: '0b' bindigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_bin(s->tok+2, cur_line); + lvalp->intn = yasm_intnum_create_bin(TOK+2, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -295,7 +297,7 @@ scan: "0" octdigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_oct(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_oct(TOK, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -305,7 +307,7 @@ scan: savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; /* skip 0 and x */ - lvalp->intn = yasm_intnum_create_hex(s->tok+2, cur_line); + lvalp->intn = yasm_intnum_create_hex(TOK+2, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -314,7 +316,7 @@ scan: "0" [DdEeFfTt] [-+]? (digit+)? ("." digit*)? ('e' [-+]? digit+)? { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->flt = yasm_floatnum_create(s->tok+2); + lvalp->flt = yasm_floatnum_create(TOK+2); s->tok[TOKLEN] = savech; RETURN(FLTNUM); } @@ -397,13 +399,13 @@ scan: /* label or maybe directive */ [.][a-zA-Z0-9_$.]* { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(DIR_ID); } /* label */ [_][a-zA-Z0-9_$.]* { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } @@ -412,7 +414,7 @@ scan: savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_regtmod - (parser_gas->arch, lvalp->arch_data, s->tok+1, TOKLEN-1, + (parser_gas->arch, lvalp->arch_data, TOK+1, TOKLEN-1, cur_line)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; @@ -444,7 +446,7 @@ scan: || s->tok[count] == '\r') count--; /* Just an identifier, return as such. */ - lvalp->str_val = yasm__xstrndup(s->tok, count); + lvalp->str_val = yasm__xstrndup(TOK, count); RETURN(LABEL); } @@ -457,7 +459,7 @@ scan: savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_insnprefix - (parser_gas->arch, lvalp->arch_data, s->tok, TOKLEN, + (parser_gas->arch, lvalp->arch_data, TOK, TOKLEN, cur_line)) { case YASM_ARCH_INSN: s->tok[TOKLEN] = savech; @@ -471,7 +473,7 @@ scan: } } /* Just an identifier, return as such. */ - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } @@ -500,7 +502,7 @@ section_directive: /*!re2c [a-zA-Z0-9_$.-]+ { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); parser_gas->state = INITIAL; RETURN(ID); } @@ -567,7 +569,7 @@ stringconst_scan: "\\" digit digit digit { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_oct(s->tok+1, cur_line); + lvalp->intn = yasm_intnum_create_oct(TOK+1, cur_line); s->tok[TOKLEN] = savech; strbuf_append(count++, cursor, s, cur_line, @@ -578,7 +580,7 @@ stringconst_scan: '\\x' hexdigit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_hex(s->tok+2, cur_line); + lvalp->intn = yasm_intnum_create_hex(TOK+2, cur_line); s->tok[TOKLEN] = savech; strbuf_append(count++, cursor, s, cur_line, @@ -601,7 +603,7 @@ stringconst_scan: dquot { strbuf_append(count, cursor, s, cur_line, '\0'); - lvalp->str.contents = strbuf; + lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; RETURN(STRING); } @@ -669,8 +671,9 @@ rept_scan: /* Add .line as first line to get line numbers correct */ new_line = yasm_xmalloc(sizeof(gas_rept_line)); new_line->data = yasm_xmalloc(40); - sprintf(new_line->data, ".line %lu;", rept->startline+1); - new_line->len = strlen(new_line->data); + sprintf((char *)new_line->data, ".line %lu;", + rept->startline+1); + new_line->len = strlen((char *)new_line->data); STAILQ_INSERT_HEAD(&rept->lines, new_line, link); /* Save previous fill buffer */ diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y index fbcf645c..dd8fea7f 100644 --- a/modules/parsers/nasm/nasm-bison.y +++ b/modules/parsers/nasm/nasm-bison.y @@ -136,7 +136,7 @@ input: /* empty */ if (parser_nasm->save_input) yasm_linemap_add_source(parser_nasm->linemap, parser_nasm->temp_bc, - parser_nasm->save_line[parser_nasm->save_last ^ 1]); + (char *)parser_nasm->save_line[parser_nasm->save_last ^ 1]); yasm_linemap_goto_next(parser_nasm->linemap); } ; diff --git a/modules/parsers/nasm/nasm-parser.h b/modules/parsers/nasm/nasm-parser.h index c10d0016..d3ae1c2c 100644 --- a/modules/parsers/nasm/nasm-parser.h +++ b/modules/parsers/nasm/nasm-parser.h @@ -29,7 +29,7 @@ #include "nasm-bison.h" -#define YYCTYPE char +#define YYCTYPE unsigned char typedef struct Scanner { YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; unsigned int tchar, tline, cline; diff --git a/modules/parsers/nasm/nasm-token.re b/modules/parsers/nasm/nasm-token.re index e8e9fa43..4ece0095 100644 --- a/modules/parsers/nasm/nasm-token.re +++ b/modules/parsers/nasm/nasm-token.re @@ -51,6 +51,7 @@ RCSID("$Id$"); s->tok = cursor; \ } +#define TOK ((char *)s->tok) #define TOKLEN (size_t)(cursor-s->tok) @@ -72,7 +73,7 @@ fill(yasm_parser_nasm *parser_nasm, YYCTYPE *cursor) if (!s->bot) first = 1; if((s->top - s->lim) < BSIZE){ - char *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); + YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); memcpy(buf, s->tok, (size_t)(s->lim - s->tok)); s->tok = buf; s->ptr = &buf[s->ptr - s->bot]; @@ -84,14 +85,14 @@ fill(yasm_parser_nasm *parser_nasm, YYCTYPE *cursor) yasm_xfree(s->bot); s->bot = buf; } - if((cnt = yasm_preproc_input(parser_nasm->preproc, s->lim, + if((cnt = yasm_preproc_input(parser_nasm->preproc, (char *)s->lim, BSIZE)) == 0) { s->eof = &s->lim[cnt]; *s->eof++ = '\n'; } s->lim += cnt; if (first && parser_nasm->save_input) { int i; - char *saveline; + YYCTYPE *saveline; parser_nasm->save_last ^= 1; saveline = parser_nasm->save_line[parser_nasm->save_last]; /* save next line into cur_line */ @@ -108,7 +109,7 @@ save_line(yasm_parser_nasm *parser_nasm, YYCTYPE *cursor) { Scanner *s = &parser_nasm->s; int i = 0; - char *saveline; + YYCTYPE *saveline; parser_nasm->save_last ^= 1; saveline = parser_nasm->save_line[parser_nasm->save_last]; @@ -133,7 +134,7 @@ nasm_parser_cleanup(yasm_parser_nasm *parser_nasm) #define STRBUF_ALLOC_SIZE 128 /* string buffer used when parsing strings/character constants */ -static char *strbuf = (char *)NULL; +static YYCTYPE *strbuf = NULL; /* length of strbuf (including terminating NULL character) */ static size_t strbuf_size = 0; @@ -185,7 +186,7 @@ scan: digit+ { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_dec(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_dec(TOK, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -193,21 +194,21 @@ scan: bindigit+ 'b' { s->tok[TOKLEN-1] = '\0'; /* strip off 'b' */ - lvalp->intn = yasm_intnum_create_bin(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_bin(TOK, cur_line); RETURN(INTNUM); } /* 777q or 777o - octal number */ octdigit+ [qQoO] { s->tok[TOKLEN-1] = '\0'; /* strip off 'q' or 'o' */ - lvalp->intn = yasm_intnum_create_oct(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_oct(TOK, cur_line); RETURN(INTNUM); } /* 0AAh form of hexidecimal number */ digit hexdigit* 'h' { s->tok[TOKLEN-1] = '\0'; /* strip off 'h' */ - lvalp->intn = yasm_intnum_create_hex(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_hex(TOK, cur_line); RETURN(INTNUM); } @@ -217,10 +218,10 @@ scan: s->tok[TOKLEN] = '\0'; if (s->tok[1] == 'x') /* skip 0 and x */ - lvalp->intn = yasm_intnum_create_hex(s->tok+2, cur_line); + lvalp->intn = yasm_intnum_create_hex(TOK+2, cur_line); else /* don't skip 0 */ - lvalp->intn = yasm_intnum_create_hex(s->tok+1, cur_line); + lvalp->intn = yasm_intnum_create_hex(TOK+1, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -229,7 +230,7 @@ scan: digit+ "." digit* ('e' [-+]? digit+)? { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->flt = yasm_floatnum_create(s->tok); + lvalp->flt = yasm_floatnum_create(TOK); s->tok[TOKLEN] = savech; RETURN(FLTNUM); } @@ -346,7 +347,7 @@ scan: /* special non-local ..@label and labels like ..start */ ".." [a-zA-Z0-9_$#@~.?]+ { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(SPECIAL_ID); } @@ -354,10 +355,10 @@ scan: "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* { /* override local labels in directive state */ if (parser_nasm->state == DIRECTIVE2) { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } else if (!parser_nasm->locallabel_base) { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); yasm__warning(YASM_WARN_GENERAL, cur_line, N_("no non-local label before `%s'"), lvalp->str_val); @@ -365,7 +366,7 @@ scan: len = TOKLEN + parser_nasm->locallabel_base_len; lvalp->str_val = yasm_xmalloc(len + 1); strcpy(lvalp->str_val, parser_nasm->locallabel_base); - strncat(lvalp->str_val, s->tok, TOKLEN); + strncat(lvalp->str_val, TOK, TOKLEN); lvalp->str_val[len] = '\0'; } @@ -374,7 +375,7 @@ scan: /* forced identifier */ "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } @@ -384,7 +385,7 @@ scan: s->tok[TOKLEN] = '\0'; if (parser_nasm->state != INSTRUCTION) switch (yasm_arch_parse_check_insnprefix - (parser_nasm->arch, lvalp->arch_data, s->tok, TOKLEN, + (parser_nasm->arch, lvalp->arch_data, TOK, TOKLEN, cur_line)) { case YASM_ARCH_INSN: parser_nasm->state = INSTRUCTION; @@ -397,7 +398,7 @@ scan: break; } switch (yasm_arch_parse_check_regtmod - (parser_nasm->arch, lvalp->arch_data, s->tok, TOKLEN, + (parser_nasm->arch, lvalp->arch_data, TOK, TOKLEN, cur_line)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; @@ -412,7 +413,7 @@ scan: s->tok[TOKLEN] = savech; } /* Just an identifier, return as such. */ - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(ID); } @@ -444,7 +445,7 @@ linechg: linechg_numcount++; savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - lvalp->intn = yasm_intnum_create_dec(s->tok, cur_line); + lvalp->intn = yasm_intnum_create_dec(TOK, cur_line); s->tok[TOKLEN] = savech; RETURN(INTNUM); } @@ -491,7 +492,7 @@ linechg2: (any \ [\r\n])+ { parser_nasm->state = LINECHG; - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(FILENAME); } */ @@ -510,7 +511,7 @@ directive: iletter+ { parser_nasm->state = DIRECTIVE2; - lvalp->str_val = yasm__xstrndup(s->tok, TOKLEN); + lvalp->str_val = yasm__xstrndup(TOK, TOKLEN); RETURN(DIRECTIVE_NAME); } @@ -539,7 +540,7 @@ stringconst_scan: else yasm__error(cur_line, N_("unterminated string")); strbuf[count] = '\0'; - lvalp->str.contents = strbuf; + lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; if (parser_nasm->save_input && cursor != s->eof) cursor = save_line(parser_nasm, cursor); @@ -549,7 +550,7 @@ stringconst_scan: any { if (s->tok[0] == endch) { strbuf[count] = '\0'; - lvalp->str.contents = strbuf; + lvalp->str.contents = (char *)strbuf; lvalp->str.len = count; RETURN(STRING); }