From d85e7014a8456e02d5920e1a10cef50c02aa653e Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sun, 17 Mar 2002 21:58:36 +0000 Subject: [PATCH] Binary object format output! Lots of structural changes needed, particularly with regards to resolve_label(). MAJOR FIXME: The tobytes() pass acts like a final optimization pass. This causes internal errors because of length mismatches (optimized to shorter len). svn path=/trunk/yasm/; revision=521 --- libyasm/arch.h | 12 +- libyasm/bytecode.c | 181 ++++++++++-- libyasm/bytecode.h | 41 ++- libyasm/coretype.h | 4 + libyasm/expr.c | 47 ++- libyasm/expr.h | 6 +- libyasm/section.c | 9 + libyasm/section.h | 2 + modules/arch/x86/x86-int.h | 7 +- modules/arch/x86/x86arch.c | 2 +- modules/arch/x86/x86bc.c | 268 ++++++++++++++--- modules/objfmts/bin/bin-objfmt.c | 316 ++++++++++++++++++++- modules/optimizers/basic/basic-optimizer.c | 46 +-- src/arch.h | 12 +- src/arch/x86/x86-int.h | 7 +- src/arch/x86/x86arch.c | 2 +- src/arch/x86/x86bc.c | 268 ++++++++++++++--- src/bytecode.c | 181 ++++++++++-- src/bytecode.h | 41 ++- src/coretype.h | 4 + src/expr.c | 47 ++- src/expr.h | 6 +- src/objfmts/bin/bin-objfmt.c | 316 ++++++++++++++++++++- src/optimizers/basic/basic-optimizer.c | 46 +-- src/section.c | 9 + src/section.h | 2 + 26 files changed, 1650 insertions(+), 232 deletions(-) diff --git a/libyasm/arch.h b/libyasm/arch.h index 81805e90..4fb5aa58 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -40,11 +40,13 @@ struct arch { void (*bc_print) (FILE *f, const bytecode *bc); /* See bytecode.h comments on bc_calc_len() */ - int (*bc_calc_len) (bytecode *bc, - intnum *(*resolve_label) (symrec *sym)); - /* See bytecode.h comments on bc_resolve() */ - void (*bc_resolve) (bytecode *bc, - intnum *(*resolve_label) (symrec *sym)); + int (*bc_calc_len) (bytecode *bc, const section *sect, + resolve_label_func resolve_label); + /* See bytecode.h comments on bc_tobytes() */ + int (*bc_tobytes) (bytecode *bc, unsigned char **bufp, + const section *sect, void *d, + output_expr_func output_expr, + resolve_label_func resolve_label); } bc; }; diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index e816e219..6b78ebe5 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -22,6 +22,8 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" @@ -318,7 +320,7 @@ bc_calc_len_data(bytecode_data *bc_data, unsigned long *len) static int bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, - intnum *(*resolve_label) (symrec *sym)) + const section *sect, resolve_label_func resolve_label) { int retval = 1; /*@null@*/ expr *temp; @@ -326,7 +328,7 @@ bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, temp = expr_copy(reserve->numitems); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (!num) retval = -1; @@ -338,7 +340,8 @@ bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, static int bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, - unsigned long line, intnum *(*resolve_label) (symrec *sym)) + unsigned long line, const section *sect, + resolve_label_func resolve_label) { FILE *f; /*@null@*/ expr *temp; @@ -349,7 +352,7 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, if (incbin->start) { temp = expr_copy(incbin->start); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (num) start = intnum_get_uint(num); @@ -362,7 +365,7 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, if (incbin->maxlen) { temp = expr_copy(incbin->maxlen); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (num) maxlen = intnum_get_uint(num); @@ -403,7 +406,8 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, } int -bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label) { int retval = 1; bytecode_data *bc_data; @@ -423,16 +427,17 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) break; case BC_RESERVE: reserve = bc_get_data(bc); - retval = bc_calc_len_reserve(reserve, &bc->len, resolve_label); + retval = bc_calc_len_reserve(reserve, &bc->len, sect, + resolve_label); break; case BC_INCBIN: incbin = bc_get_data(bc); - retval = bc_calc_len_incbin(incbin, &bc->len, bc->line, + retval = bc_calc_len_incbin(incbin, &bc->len, bc->line, sect, resolve_label); break; default: if (bc->type < cur_arch->bc.type_max) - retval = cur_arch->bc.bc_calc_len(bc, resolve_label); + retval = cur_arch->bc.bc_calc_len(bc, sect, resolve_label); else InternalError(_("Unknown bytecode type")); } @@ -441,7 +446,7 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) if (bc->multiple) { temp = expr_copy(bc->multiple); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (!num) retval = -1; @@ -453,41 +458,173 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) return retval; } -void -bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +static int +bc_tobytes_data(bytecode_data *bc_data, unsigned char **bufp, + const section *sect, const bytecode *bc, void *d, + output_expr_func output_expr) +{ + dataval *dv; + size_t slen; + size_t i; + + STAILQ_FOREACH(dv, &bc_data->datahead, link) { + switch (dv->type) { + case DV_EMPTY: + break; + case DV_EXPR: + if (output_expr(&dv->data.expn, bufp, bc_data->size, sect, bc, + 0, d)) + return 1; + break; + case DV_STRING: + slen = strlen(dv->data.str_val); + strncpy((char *)*bufp, dv->data.str_val, slen); + *bufp += slen; + /* pad with 0's to nearest multiple of size */ + slen %= bc_data->size; + if (slen > 0) { + slen = bc_data->size-slen; + for (i=0; inumitems, sect, 1, resolve_label); + num = expr_get_intnum(&reserve->numitems); + if (!num) + InternalError(_("could not determine number of items in bc_tobytes_reserve")); + numitems = intnum_get_uint(num)*reserve->itemsize; + + /* Go ahead and zero the bytes. Probably most objfmts will want it + * zero'd if they're actually going to output it. + */ + for (i=0; istart) { + expr_expand_labelequ(incbin->start, sect, 1, resolve_label); + num = expr_get_intnum(&incbin->start); + if (!num) + InternalError(_("could not determine start in bc_tobytes_incbin")); + start = intnum_get_uint(num); + } + + /* FIXME: Search include path for filename */ + + /* Open file */ + f = fopen(incbin->filename, "rb"); + if (!f) { + ErrorAt(line, _("`incbin': unable to open file `%s'"), + incbin->filename); + return 1; + } + + /* Seek to start of data */ + if (fseek(f, start, SEEK_SET) < 0) { + ErrorAt(line, _("`incbin': unable to seek on file `%s'"), + incbin->filename); + fclose(f); + return 1; + } + + /* Read buflen bytes */ + if (fread(*bufp, buflen, 1, f) < buflen) { + ErrorAt(line, _("`incbin': unable to read %lu bytes from file `%s'"), + buflen, incbin->filename); + fclose(f); + return 1; + } + + *bufp += buflen; + fclose(f); + return 0; +} + +/*@null@*/ /*@only@*/ unsigned char * +bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize, + /*@out@*/ unsigned long *multiple, /*@out@*/ int *gap, + const section *sect, void *d, output_expr_func output_expr, + resolve_label_func resolve_label) +{ + /*@only@*/ /*@null@*/ unsigned char *mybuf = NULL; + unsigned char *destbuf; + /*@dependent@*/ /*@null@*/ const intnum *num; bytecode_data *bc_data; bytecode_reserve *reserve; bytecode_incbin *incbin; + int error = 0; + + if (*bufsize < bc->len) { + mybuf = xmalloc(sizeof(bc->len)); + destbuf = mybuf; + } else + destbuf = buf; + + *gap = 0; switch (bc->type) { case BC_EMPTY: - InternalError(_("got empty bytecode in bc_resolve")); + InternalError(_("got empty bytecode in bc_tobytes")); case BC_DATA: bc_data = bc_get_data(bc); - bc_resolve_data(bc_data, &bc->len); + error = bc_tobytes_data(bc_data, &destbuf, sect, bc, d, + output_expr); break; case BC_RESERVE: reserve = bc_get_data(bc); - bc_resolve_reserve(reserve, &bc->len, resolve_label); + error = bc_tobytes_reserve(reserve, &destbuf, sect, resolve_label); + *gap = 1; break; case BC_INCBIN: incbin = bc_get_data(bc); - bc_resolve_incbin(incbin, &bc->len, bc->line, resolve_label); + error = bc_tobytes_incbin(incbin, &destbuf, bc->len, bc->line, + sect, resolve_label); break; default: if (bc->type < cur_arch->bc.type_max) - cur_arch->bc.bc_resolve(bc, resolve_label); + error = cur_arch->bc.bc_tobytes(bc, &destbuf, sect, d, + output_expr, resolve_label); else InternalError(_("Unknown bytecode type")); } if (bc->multiple) { - expr_expand_labelequ(bc->multiple, resolve_label); - bc->multiple = expr_simplify(bc->multiple); - } -#endif + expr_expand_labelequ(bc->multiple, sect, 1, resolve_label); + num = expr_get_intnum(&bc->multiple); + if (!num) + InternalError(_("could not determine multiple in bc_tobytes")); + *multiple = intnum_get_uint(num); + } else + *multiple = 1; + if (!error && ((destbuf - buf) != bc->len)) + InternalError(_("written length does not match optimized length")); + *bufsize = bc->len; + return mybuf; } void diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index eb8ec1c5..f011b6ec 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -66,14 +66,43 @@ void bc_print(FILE *f, const bytecode *bc); * This function does *not* modify bc other than the length/size values (eg * it doesn't keep the values returned by resolve_label except temporarily to * try to minimize the length). + * sect is passed along to resolve_label. */ -int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); - -/* Resolves all labels in bytecode. It does essentially the opposite of - * the above bc_calc_len(): it doesn't modify the length/size values, instead - * it saves the values returned by resolve_label to simplify expressions. +int bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label); + +/* Converts the bytecode bc into its byte representation. + * Inputs: + * bc - the bytecode to convert + * buf - where to put the byte representation + * bufsize - the size of buf + * d - the data to pass to each call to output_expr() + * output_expr - the function to call to convert expressions to byte rep + * output_expr inputs: + * bc - the bytecode containing the expr that is being output + * ep - a pointer to the expression to output + * bufp - pointer to pointer to buffer to contain byte representation + * valsize - the size (in bytes) to be used for the byte rep + * d - the data passed into bc_tobytes + * output_expr returns nonzero if an error occurred, 0 otherwise + * resolve_label - the function to call to determine the values of + * expressions that are *not* output to the file + * resolve_label inputs: + * sym - the symbol to resolve + * Outputs: + * bufsize - the size of the generated data. + * multiple - the number of times the data should be dup'ed when output + * gap - indicates the data does not really need to exist in the + * object file (eg res*-generated). buf is filled with + * bufsize 0 bytes. + * Returns either NULL (if buf was big enough to hold the entire byte + * representation), or a newly allocated buffer that should be used instead + * of buf for reading the byte representation. */ -void bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +/*@null@*/ /*@only@*/ unsigned char *bc_tobytes(bytecode *bc, + unsigned char *buf, unsigned long *bufsize, + /*@out@*/ unsigned long *multiple, /*@out@*/ int *gap, const section *sect, + void *d, output_expr_func output_expr, resolve_label_func resolve_label); /* void bcs_initialize(bytecodehead *headp); */ #define bcs_initialize(headp) STAILQ_INIT(headp) diff --git a/libyasm/coretype.h b/libyasm/coretype.h index 0389d7ee..afad9d93 100644 --- a/libyasm/coretype.h +++ b/libyasm/coretype.h @@ -76,4 +76,8 @@ typedef enum { SYM_EXTERN = 1 << 2 /* if it's declared EXTERN */ } SymVisibility; +typedef intnum *(*resolve_label_func) (symrec *sym, int withstart); +typedef int (*output_expr_func) (expr **ep, unsigned char **bufp, + unsigned long valsize, const section *sect, + const bytecode *bc, int rel, void *d); #endif diff --git a/libyasm/expr.c b/libyasm/expr.c index 31c32f3b..119e36b1 100644 --- a/libyasm/expr.c +++ b/libyasm/expr.c @@ -31,6 +31,8 @@ #include "expr.h" #include "symrec.h" +#include "section.h" + #include "expr-int.h" @@ -716,26 +718,41 @@ expr_contains(expr *e, ExprType t) return expr_traverse_leaves_in(e, &t, expr_contains_callback); } -/* NOTE: This can't be passed through *d because of data/function pointer - * portability issues. +/* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like + * diffsectsymbol - diffsectsymbol (where the diffsect's are the same). + * Currently symbols in different non-absolute sections are NOT expanded. + * This will NOT be easy to fix. */ -static intnum *(*labelequ_resolve_label) (symrec *sym); + +typedef struct labelequ_data { + resolve_label_func resolve_label; + const section *sect; + int withstart; +} labelequ_data; static int -expr_expand_labelequ_callback(ExprItem *ei, /*@unused@*/ void *d) +expr_expand_labelequ_callback(ExprItem *ei, void *d) { + labelequ_data *data = (labelequ_data *)d; const expr *equ_expr; - intnum *intn; + if (ei->type == EXPR_SYM) { equ_expr = symrec_get_equ(ei->data.sym); if (equ_expr) { ei->type = EXPR_EXPR; ei->data.expn = expr_copy(equ_expr); } else { - intn = labelequ_resolve_label(ei->data.sym); - if (intn) { - ei->type = EXPR_INT; - ei->data.intn = intn; + /*@dependent@*/ section *sect; + /*@dependent@*/ /*@null@*/ bytecode *precbc; + intnum *intn; + + if (symrec_get_label(ei->data.sym, §, &precbc) && + (sect == data->sect || section_is_absolute(sect))) { + intn = data->resolve_label(ei->data.sym, data->withstart); + if (intn) { + ei->type = EXPR_INT; + ei->data.intn = intn; + } } } } @@ -743,10 +760,14 @@ expr_expand_labelequ_callback(ExprItem *ei, /*@unused@*/ void *d) } void -expr_expand_labelequ(expr *e, intnum *(*resolve_label) (symrec *sym)) -{ - labelequ_resolve_label = resolve_label; - expr_traverse_leaves_in(e, NULL, expr_expand_labelequ_callback); +expr_expand_labelequ(expr *e, const section *sect, int withstart, + resolve_label_func resolve_label) +{ + labelequ_data data; + data.resolve_label = resolve_label; + data.sect = sect; + data.withstart = withstart; + expr_traverse_leaves_in(e, &data, expr_expand_labelequ_callback); } /* Traverse over expression tree, calling func for each operation AFTER the diff --git a/libyasm/expr.h b/libyasm/expr.h index b8a696ae..76476866 100644 --- a/libyasm/expr.h +++ b/libyasm/expr.h @@ -47,8 +47,12 @@ void expr_delete(/*@only@*/ /*@null@*/ expr *e); /* Expands all (symrec) equ's in the expression into full expression * instances. Also resolves labels, if possible. + * Srcsect and withstart are passed along to resolve_label and specify the + * referencing section and whether the section start should be included in + * the resolved address, respectively. */ -void expr_expand_labelequ(expr *e, intnum *(*resolve_label) (symrec *sym)); +void expr_expand_labelequ(expr *e, const section *srcsect, int withstart, + resolve_label_func resolve_label); /* Simplifies the expression e as much as possible, eliminating extraneous * branches and simplifying integer-only subexpressions. diff --git a/libyasm/section.c b/libyasm/section.c index 1deca903..d5b60a93 100644 --- a/libyasm/section.c +++ b/libyasm/section.c @@ -161,6 +161,15 @@ section_set_opt_flags(section *sect, unsigned long opt_flags) sect->opt_flags = opt_flags; } +void * +section_get_of_data(section *sect) +{ + if (sect->type == SECTION_GENERAL) + return sect->data.general.of_data; + else + return NULL; +} + void sections_delete(sectionhead *headp) { diff --git a/libyasm/section.h b/libyasm/section.h index ccea9f88..07165815 100644 --- a/libyasm/section.h +++ b/libyasm/section.h @@ -42,6 +42,8 @@ int section_is_absolute(section *sect); unsigned long section_get_opt_flags(const section *sect); void section_set_opt_flags(section *sect, unsigned long opt_flags); +/*@dependent@*/ /*@null@*/ void *section_get_of_data(section *sect); + void sections_delete(sectionhead *headp); void sections_print(FILE *f, const sectionhead *headp); diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h index d4cb2bb7..ce867271 100644 --- a/modules/arch/x86/x86-int.h +++ b/modules/arch/x86/x86-int.h @@ -96,8 +96,11 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); -int x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); -void x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +int x86_bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label); +int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr, + resolve_label_func resolve_label); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index d5ddf8da..3d8bec49 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -39,6 +39,6 @@ arch x86_arch = { x86_bc_delete, x86_bc_print, x86_bc_calc_len, - x86_bc_resolve + x86_bc_tobytes } }; diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index 06998050..939b8999 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -22,6 +22,8 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" @@ -461,8 +463,8 @@ x86_bc_print(FILE *f, const bytecode *bc) } static int -x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, - intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, const section *sect, + resolve_label_func resolve_label) { /*@null@*/ expr *temp; effaddr *ea = insn->ea; @@ -481,7 +483,7 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, assert(temp != NULL); /* Expand equ's and labels */ - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); /* Check validity of effective address and calc R/M bits of * Mod/RM byte and SIB byte. We won't know the Mod field @@ -539,7 +541,7 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, if (imm->val) { temp = expr_copy(imm->val); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); /* TODO: check imm->len vs. sized len from expr? */ @@ -570,8 +572,8 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, static int x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, - unsigned long offset, - intnum *(*resolve_label) (symrec *sym)) + unsigned long offset, const section *sect, + resolve_label_func resolve_label) { int retval = 1; /*@null@*/ expr *temp; @@ -586,7 +588,7 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, jmprel->opersize; /* We don't check here to see if forced forms are actually legal; we - * assume that they are, and only check it in x86_bc_resolve_jmprel(). + * assume that they are, and only check it in x86_bc_tobytes_jmprel(). */ switch (jmprel->op_sel) { case JR_SHORT_FORCED: @@ -605,11 +607,11 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, */ temp = expr_copy(jmprel->target); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 0, resolve_label); num = expr_get_intnum(&temp); if (num) { target = intnum_get_uint(num); - rel = (long)(target-offset); + rel = (long)(target-(offset+jmprel->shortop.opcode_len+1)); /* short displacement must fit within -128 <= rel <= +127 */ if (jmprel->shortop.opcode_len != 0 && rel >= -128 && rel <= 127) { @@ -624,12 +626,16 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, retval = 0; } } else { - /* Assume whichever size is claimed as default by op_sel */ - if (jmprel->op_sel == JR_SHORT) - jrshort = 1; - else + /* It's unknown (e.g. out of this segment or external). + * Thus, assume near displacement. If a near opcode is not + * available, use a short opcode instead. + */ + if (jmprel->nearop.opcode_len != 0) { + if (jmprel->shortop.opcode_len != 0) + retval = 0; jrshort = 0; - retval = 0; + } else + jrshort = 1; } expr_delete(temp); break; @@ -657,7 +663,8 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, } int -x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label) { x86_insn *insn; x86_jmprel *jmprel; @@ -665,29 +672,35 @@ x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - return x86_bc_calc_len_insn(insn, &bc->len, resolve_label); + return x86_bc_calc_len_insn(insn, &bc->len, sect, resolve_label); case X86_BC_JMPREL: jmprel = bc_get_data(bc); - return x86_bc_calc_len_jmprel(jmprel, &bc->len, bc->offset, + return x86_bc_calc_len_jmprel(jmprel, &bc->len, bc->offset, sect, resolve_label); default: break; } return 0; } -#if 0 + static int -x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) +x86_bc_tobytes_insn(x86_insn *insn, unsigned char **bufp, const section *sect, + const bytecode *bc, void *d, output_expr_func output_expr, + resolve_label_func resolve_label) { - effaddr *ea = insn->ea; + /*@null@*/ effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); immval *imm = insn->imm; + unsigned int i; + /* We need to figure out the EA first to determine the addrsize. + * Of course, the ModR/M, SIB, and displacement are not output until later. + */ if (ea) { if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || (!ead->valid_modrm && ead->need_modrm))) { - /* First expand equ's */ - expr_expand_labelequ(ea->disp, resolve_label); + /* Expand equ's and labels */ + expr_expand_labelequ(ea->disp, sect, 1, resolve_label); /* Check validity of effective address and calc R/M bits of * Mod/RM byte and SIB byte. We won't know the Mod field @@ -697,53 +710,214 @@ x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) if (!x86_expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, - &ead->sib, &ead->valid_sib, &ead->need_sib)) - return -1; /* failed, don't bother checking rest of insn */ + &ead->sib, &ead->valid_sib, + &ead->need_sib)) + InternalError(_("expr_checkea failed from x86 tobytes_insn")); } } - if (imm) { - const intnum *num; + /* Also check for shift_op special-casing (affects imm). */ + if (insn->shift_op && imm && imm->val) { + /*@dependent@*/ /*@null@*/ const intnum *num; + + expr_expand_labelequ(imm->val, sect, 1, resolve_label); + + num = expr_get_intnum(&imm->val); + if (num) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete imm, as it's not needed. */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } - if (imm->val) { - expr_expand_labelequ(imm->val, resolve_label); - imm->val = expr_simplify(imm->val); + /* Prefixes */ + if (insn->lockrep_pre != 0) + WRITE_BYTE(*bufp, insn->lockrep_pre); + if (ea && ead->segment != 0) + WRITE_BYTE(*bufp, ead->segment); + if (insn->opersize != 0 && insn->opersize != insn->mode_bits) + WRITE_BYTE(*bufp, 0x66); + if (insn->addrsize != 0 && insn->addrsize != insn->mode_bits) + WRITE_BYTE(*bufp, 0x67); + + /* Opcode */ + for (i=0; iopcode_len; i++) + WRITE_BYTE(*bufp, insn->opcode[i]); + + /* Effective address: ModR/M (if required), SIB (if required), and + * displacement (if required). + */ + if (ea) { + if (ead->need_modrm) { + if (!ead->valid_modrm) + InternalError(_("invalid Mod/RM in x86 tobytes_insn")); + WRITE_BYTE(*bufp, ead->modrm); + } + + if (ead->need_sib) { + if (!ead->valid_sib) + InternalError(_("invalid SIB in x86 tobytes_insn")); + WRITE_BYTE(*bufp, ead->sib); } - /* TODO: check imm f_len vs. len? */ - /* Handle shift_op special-casing */ - /*@-nullstate@*/ - if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { - /*@=nullstate@*/ + if (ea->disp) + if (output_expr(&ea->disp, bufp, ea->len, sect, bc, 0, d)) + return 1; + } + + /* Immediate (if required) */ + if (imm && imm->val) { + /* TODO: check imm->len vs. sized len from expr? */ + if (output_expr(&imm->val, bufp, imm->len, sect, bc, 0, d)) + return 1; + } + + return 0; +} + +static int +x86_bc_tobytes_jmprel(x86_jmprel *jmprel, unsigned char **bufp, + const section *sect, const bytecode *bc, void *d, + output_expr_func output_expr, + resolve_label_func resolve_label) +{ + /*@dependent@*/ /*@null@*/ const intnum *num; + unsigned long target; + long rel; + unsigned char opersize; + int jrshort = 0; + unsigned int i; + + /* Prefixes */ + if (jmprel->lockrep_pre != 0) + WRITE_BYTE(*bufp, jmprel->lockrep_pre); + /* FIXME: branch hints! */ + if (jmprel->opersize != 0 && jmprel->opersize != jmprel->mode_bits) + WRITE_BYTE(*bufp, 0x66); + if (jmprel->addrsize != 0 && jmprel->addrsize != jmprel->mode_bits) + WRITE_BYTE(*bufp, 0x67); + + /* As opersize may be 0, figure out its "real" value. */ + opersize = (jmprel->opersize == 0) ? jmprel->mode_bits : + jmprel->opersize; + + /* Get displacement value here so that forced forms can be checked. */ + expr_expand_labelequ(jmprel->target, sect, 0, resolve_label); + num = expr_get_intnum(&jmprel->target); + + /* Check here to see if forced forms are actually legal. */ + switch (jmprel->op_sel) { + case JR_SHORT_FORCED: + /* 1 byte relative displacement */ + jrshort = 1; + if (!num) { + ErrorAt(bc->line, + _("short jump target external or out of segment")); + return 1; + } else { + target = intnum_get_uint(num); + rel = (long)(target-(bc->offset+jmprel->shortop.opcode_len+1)); + /* does a short form exist? */ + if (jmprel->shortop.opcode_len == 0) { + ErrorAt(bc->line, _("short jump does not exist")); + return 1; + } + /* short displacement must fit within -128 <= rel <= +127 */ + if (rel < -128 || rel > 127) { + ErrorAt(bc->line, _("short jump out of range")); + return 1; + } + } + break; + case JR_NEAR_FORCED: + /* 2/4 byte relative displacement (depending on operand size) */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + ErrorAt(bc->line, _("near jump does not exist")); + return 1; + } + break; + default: + /* Try to find shortest displacement based on difference between + * target expr value and our (this bytecode's) offset. + */ if (num) { - if (intnum_get_uint(num) == 1) { - /* Use ,1 form: first copy ,1 opcode. */ - insn->opcode[0] = insn->opcode[1]; - /* Delete Imm, as it's not needed */ - expr_delete(imm->val); - xfree(imm); - insn->imm = (immval *)NULL; + target = intnum_get_uint(num); + rel = (long)(target-(bc->offset+jmprel->shortop.opcode_len+1)); + /* short displacement must fit within -128 <= rel <= +127 */ + if (jmprel->shortop.opcode_len != 0 && rel >= -128 && + rel <= 127) { + /* It fits into a short displacement. */ + jrshort = 1; + } else { + /* It's near. */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + InternalError(_("near jump does not exist")); + return 1; + } + } + } else { + /* It's unknown (e.g. out of this segment or external). + * Thus, assume near displacement. If a near opcode is not + * available, error out. + */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + ErrorAt(bc->line, + _("short jump target or out of segment")); + return 1; } - insn->shift_op = 0; } - } + break; } + if (jrshort) { + /* Opcode */ + for (i=0; ishortop.opcode_len; i++) + WRITE_BYTE(*bufp, jmprel->shortop.opcode[i]); + + /* Relative displacement */ + output_expr(&jmprel->target, bufp, 1, sect, bc, 1, d); + } else { + /* Opcode */ + for (i=0; inearop.opcode_len; i++) + WRITE_BYTE(*bufp, jmprel->nearop.opcode[i]); + + /* Relative displacement */ + output_expr(&jmprel->target, bufp, (opersize == 32) ? 4 : 2, sect, bc, + 1, d); + } return 0; } -#endif -void -x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) + +int +x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr, + resolve_label_func resolve_label) { x86_insn *insn; + x86_jmprel *jmprel; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - /*x86_bc_resolve_insn(insn, resolve_label);*/ + return x86_bc_tobytes_insn(insn, bufp, sect, bc, d, output_expr, + resolve_label); break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + return x86_bc_tobytes_jmprel(jmprel, bufp, sect, bc, d, + output_expr, resolve_label); default: break; } + return 1; } diff --git a/modules/objfmts/bin/bin-objfmt.c b/modules/objfmts/bin/bin-objfmt.c index 808f1cdb..5af6dc49 100644 --- a/modules/objfmts/bin/bin-objfmt.c +++ b/modules/objfmts/bin/bin-objfmt.c @@ -22,24 +22,338 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" #include "expr.h" #include "symrec.h" +#include "bytecode.h" #include "section.h" + +#include "expr-int.h" +#include "bc-int.h" + #include "objfmt.h" +#define REGULAR_OUTBUF_SIZE 1024 + +static intnum *bin_objfmt_resolve_label(symrec *sym, int withstart); + static void -bin_objfmt_initialize(const char *in_filename, const char *obj_filename) +bin_objfmt_initialize(/*@unused@*/ const char *in_filename, + /*@unused@*/ const char *obj_filename) { } +/* Aligns sect to either its specified alignment (in its objfmt-specific data) + * or def_align if no alignment was specified. Uses prevsect and base to both + * determine the new starting address (returned) and the total length of + * prevsect after sect has been aligned. + */ +static unsigned long +bin_objfmt_align_section(section *sect, section *prevsect, unsigned long base, + unsigned long def_align, + /*@out@*/ unsigned long *prevsectlen, + /*@out@*/ unsigned long *padamt) +{ + /*@dependent@*/ /*@null@*/ bytecode *last; + unsigned long start; + /*@dependent@*/ /*@null@*/ unsigned long *alignptr; + unsigned long align; + + /* Figure out the size of .text by looking at the last bytecode's offset + * plus its length. Add the start and size together to get the new start. + */ + last = bcs_last(section_get_bytecodes(prevsect)); + *prevsectlen = last->offset + last->len; + start = base + *prevsectlen; + + /* Round new start up to alignment of .data section, and adjust textlen to + * indicate padded size. Because aignment is always a power of two, we + * can use some bit trickery to do this easily. + */ + alignptr = section_get_of_data(sect); + if (alignptr) + align = *alignptr; + else + align = def_align; /* No alignment: use default */ + + if (start & ~(align-1)) + start = (start & ~(align-1)) + align; + + *padamt = start - (base + *prevsectlen); + + return start; +} + +static intnum * +bin_objfmt_resolve_label2(symrec *sym, /*@null@*/ const section *cursect, + unsigned long cursectstart, int withstart) +{ + /*@dependent@*/ section *sect; + /*@dependent@*/ /*@null@*/ bytecode *precbc; + /*@null@*/ bytecode *bc; + /*@null@*/ expr *startexpr; + /*@dependent@*/ /*@null@*/ const intnum *start; + unsigned long startval = 0; + + if (!symrec_get_label(sym, §, &precbc)) + return NULL; + + /* determine actual bc from preceding bc (how labels are stored) */ + if (!precbc) + bc = bcs_first(section_get_bytecodes(sect)); + else + bc = bcs_next(precbc); + assert(bc != NULL); + + /* Figure out the starting offset of the entire section */ + if (withstart || (cursect && sect != cursect) || + section_is_absolute(sect)) { + startexpr = expr_copy(section_get_start(sect)); + assert(startexpr != NULL); + expr_expand_labelequ(startexpr, sect, 1, bin_objfmt_resolve_label); + start = expr_get_intnum(&startexpr); + if (!start) + return NULL; + startval = intnum_get_uint(start); + expr_delete(startexpr); + + /* Compensate for current section start */ + startval -= cursectstart; + } + + /* If a section is done, the following will always succeed. If it's in- + * progress, this will fail if the bytecode comes AFTER the current one. + */ + if (precbc) + return intnum_new_int(startval + precbc->offset + precbc->len); + else + return intnum_new_int(startval + bc->offset); +} + +static intnum * +bin_objfmt_resolve_label(symrec *sym, int withstart) +{ + return bin_objfmt_resolve_label2(sym, NULL, 0, withstart); +} + +typedef struct bin_objfmt_expr_data { + const section *sect; + unsigned long start; + int withstart; +} bin_objfmt_expr_data; + +static int +bin_objfmt_expr_traverse_callback(ExprItem *ei, void *d) +{ + bin_objfmt_expr_data *data = (bin_objfmt_expr_data *)d; + const expr *equ_expr; + + if (ei->type == EXPR_SYM) { + equ_expr = symrec_get_equ(ei->data.sym); + if (equ_expr) { + ei->type = EXPR_EXPR; + ei->data.expn = expr_copy(equ_expr); + } else { + intnum *intn; + + intn = bin_objfmt_resolve_label2(ei->data.sym, data->sect, + data->start, data->withstart); + if (intn) { + ei->type = EXPR_INT; + ei->data.intn = intn; + } + } + } + return 0; +} + +typedef struct bin_objfmt_output_info { + FILE *f; + unsigned char *buf; + const section *sect; + unsigned long start; +} bin_objfmt_output_info; + +static int +bin_objfmt_output_expr(expr **ep, unsigned char **bufp, unsigned long valsize, + const section *sect, const bytecode *bc, int rel, + /*@null@*/ void *d) +{ + /*@null@*/ bin_objfmt_output_info *info = (bin_objfmt_output_info *)d; + bin_objfmt_expr_data data; + /*@dependent@*/ /*@null@*/ const intnum *num; + unsigned long val; + + assert(info != NULL); + + /* For binary output, this is trivial: any expression that doesn't simplify + * to an integer is an error (references something external). + * Other object formats need to generate their relocation list from here! + * Note: we can't just use expr_expand_labelequ() because it doesn't + * resolve between different sections (on purpose).. but for bin we + * WANT that. + */ + data.sect = sect; + if (rel) { + data.start = info->start; + data.withstart = 0; + } else { + data.start = 0; + data.withstart = 1; + } + expr_traverse_leaves_in(*ep, &data, bin_objfmt_expr_traverse_callback); + num = expr_get_intnum(ep); + if (!num) { + ErrorAt((*ep)->line, + _("binary object format does not support external references")); + return 1; + } + val = intnum_get_uint(num); + + if (rel) + val = (unsigned long)((long)(val - (bc->offset + bc->len))); + + /* Write value out. */ + /* FIXME: Should we warn here about truncation if it doesn't fit? */ + switch (valsize) { + case 1: + WRITE_BYTE(*bufp, val); + break; + case 2: + WRITE_SHORT(*bufp, val); + break; + case 4: + WRITE_LONG(*bufp, val); + break; + default: + InternalError(_("unexpected size in bin objfmt output expr")); + } + + return 0; +} + +static int +bin_objfmt_output_bytecode(bytecode *bc, /*@null@*/ void *d) +{ + /*@null@*/ bin_objfmt_output_info *info = (bin_objfmt_output_info *)d; + /*@null@*/ /*@only@*/ unsigned char *bigbuf; + unsigned long size = REGULAR_OUTBUF_SIZE; + unsigned long multiple; + unsigned long i; + int gap; + + bigbuf = bc_tobytes(bc, info->buf, &size, &multiple, &gap, info->sect, + info, bin_objfmt_output_expr, + bin_objfmt_resolve_label); + + /* Warn that gaps are converted to 0. The 0 bytes are generated by + * bc_tobytes() so no special handling is needed. + */ + if (gap) + Warning(_("uninitialized space declared in code/data section: zeroing")); + + assert(info != NULL); + + /* Output multiple copies of buf (or bigbuf if non-NULL) to file */ + for (i=0; ibuf, size, 1, info->f); + + /* If bigbuf was allocated, free it */ + if (bigbuf) + xfree(bigbuf); + + return 0; +} + static void bin_objfmt_output(FILE *f, sectionhead *sections) { + /*@null@*/ section *text, *data, *bss, *prevsect; + /*@null@*/ expr *startexpr; + /*@dependent@*/ /*@null@*/ const intnum *startnum; + unsigned long start, textstart, datastart; + unsigned long textlen = 0, textpad = 0, datalen = 0, datapad = 0; + unsigned long *prevsectlenptr, *prevsectpadptr; + unsigned long i; + bin_objfmt_output_info info; + + info.f = f; + info.buf = xmalloc(REGULAR_OUTBUF_SIZE); + + text = sections_find_general(sections, ".text"); + data = sections_find_general(sections, ".data"); + bss = sections_find_general(sections, ".bss"); + + if (!text) + InternalError(_("No `.text' section in bin objfmt output")); + + /* First determine the actual starting offsets for .data and .bss. + * As the order in the file is .text -> .data -> .bss (not present), + * use the last bytecode in .text (and the .text section start) to + * determine the starting offset in .data, and likewise for .bss. + * Also compensate properly for alignment. + */ + + /* Find out the start of .text */ + startexpr = expr_copy(section_get_start(text)); + assert(startexpr != NULL); + startnum = expr_get_intnum(&startexpr); + if (!startnum) + InternalError(_("Complex expr for start in bin objfmt output")); + start = intnum_get_uint(startnum); + expr_delete(startexpr); + textstart = start; + + /* Align .data and .bss (if present) by adjusting their starts. */ + prevsect = text; + prevsectlenptr = &textlen; + prevsectpadptr = &textpad; + if (data) { + start = bin_objfmt_align_section(data, prevsect, start, 4, + prevsectlenptr, prevsectpadptr); + section_set_start(data, start); + datastart = start; + prevsect = data; + prevsectlenptr = &datalen; + prevsectpadptr = &datapad; + } + if (bss) { + start = bin_objfmt_align_section(bss, prevsect, start, 4, + prevsectlenptr, prevsectpadptr); + section_set_start(bss, start); + } + + /* Output .text first. */ + info.sect = text; + info.start = textstart; + bcs_traverse(section_get_bytecodes(text), &info, + bin_objfmt_output_bytecode); + + /* If .data is present, output it */ + if (data) { + /* Add padding to align .data. Just use a for loop, as this will + * seldom be very many bytes. + */ + for (i=0; iopt_flags == BCFLAG_INPROGRESS) @@ -105,17 +112,17 @@ basic_optimize_bytecode_1(bytecode *bc, void *d) bc->opt_flags = BCFLAG_INPROGRESS; - if (!*precbc) + if (!data->precbc) bc->offset = 0; else - bc->offset = (*precbc)->offset + (*precbc)->len; - *precbc = bc; + bc->offset = data->precbc->offset + data->precbc->len; + data->precbc = bc; /* We're doing just a single pass, so essentially ignore whether the size * is minimum or not, and just check for indeterminate length (indicative * of circular reference). */ - if (bc_calc_len(bc, basic_optimize_resolve_label) < 0) { + if (bc_calc_len(bc, data->sect, basic_optimize_resolve_label) < 0) { ErrorAt(bc->line, _("Circular reference detected.")); return -1; } @@ -128,10 +135,13 @@ basic_optimize_bytecode_1(bytecode *bc, void *d) static int basic_optimize_section_1(section *sect, /*@unused@*/ /*@null@*/ void *d) { - bytecode *precbc = NULL; + basic_optimize_data data; unsigned long flags; int retval; + data.precbc = NULL; + data.sect = sect; + /* Don't even bother if we're in-progress or done. */ flags = section_get_opt_flags(sect); if (flags == SECTFLAG_INPROGRESS) @@ -141,8 +151,8 @@ basic_optimize_section_1(section *sect, /*@unused@*/ /*@null@*/ void *d) section_set_opt_flags(sect, SECTFLAG_INPROGRESS); - retval = bcs_traverse(section_get_bytecodes(sect), &precbc, - basic_optimize_bytecode_1); + retval = bcs_traverse(section_get_bytecodes(sect), &data, + basic_optimize_bytecode_1); if (retval != 0) return retval; diff --git a/src/arch.h b/src/arch.h index 81805e90..4fb5aa58 100644 --- a/src/arch.h +++ b/src/arch.h @@ -40,11 +40,13 @@ struct arch { void (*bc_print) (FILE *f, const bytecode *bc); /* See bytecode.h comments on bc_calc_len() */ - int (*bc_calc_len) (bytecode *bc, - intnum *(*resolve_label) (symrec *sym)); - /* See bytecode.h comments on bc_resolve() */ - void (*bc_resolve) (bytecode *bc, - intnum *(*resolve_label) (symrec *sym)); + int (*bc_calc_len) (bytecode *bc, const section *sect, + resolve_label_func resolve_label); + /* See bytecode.h comments on bc_tobytes() */ + int (*bc_tobytes) (bytecode *bc, unsigned char **bufp, + const section *sect, void *d, + output_expr_func output_expr, + resolve_label_func resolve_label); } bc; }; diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h index d4cb2bb7..ce867271 100644 --- a/src/arch/x86/x86-int.h +++ b/src/arch/x86/x86-int.h @@ -96,8 +96,11 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); -int x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); -void x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +int x86_bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label); +int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr, + resolve_label_func resolve_label); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/src/arch/x86/x86arch.c b/src/arch/x86/x86arch.c index d5ddf8da..3d8bec49 100644 --- a/src/arch/x86/x86arch.c +++ b/src/arch/x86/x86arch.c @@ -39,6 +39,6 @@ arch x86_arch = { x86_bc_delete, x86_bc_print, x86_bc_calc_len, - x86_bc_resolve + x86_bc_tobytes } }; diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c index 06998050..939b8999 100644 --- a/src/arch/x86/x86bc.c +++ b/src/arch/x86/x86bc.c @@ -22,6 +22,8 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" @@ -461,8 +463,8 @@ x86_bc_print(FILE *f, const bytecode *bc) } static int -x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, - intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, const section *sect, + resolve_label_func resolve_label) { /*@null@*/ expr *temp; effaddr *ea = insn->ea; @@ -481,7 +483,7 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, assert(temp != NULL); /* Expand equ's and labels */ - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); /* Check validity of effective address and calc R/M bits of * Mod/RM byte and SIB byte. We won't know the Mod field @@ -539,7 +541,7 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, if (imm->val) { temp = expr_copy(imm->val); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); /* TODO: check imm->len vs. sized len from expr? */ @@ -570,8 +572,8 @@ x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, static int x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, - unsigned long offset, - intnum *(*resolve_label) (symrec *sym)) + unsigned long offset, const section *sect, + resolve_label_func resolve_label) { int retval = 1; /*@null@*/ expr *temp; @@ -586,7 +588,7 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, jmprel->opersize; /* We don't check here to see if forced forms are actually legal; we - * assume that they are, and only check it in x86_bc_resolve_jmprel(). + * assume that they are, and only check it in x86_bc_tobytes_jmprel(). */ switch (jmprel->op_sel) { case JR_SHORT_FORCED: @@ -605,11 +607,11 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, */ temp = expr_copy(jmprel->target); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 0, resolve_label); num = expr_get_intnum(&temp); if (num) { target = intnum_get_uint(num); - rel = (long)(target-offset); + rel = (long)(target-(offset+jmprel->shortop.opcode_len+1)); /* short displacement must fit within -128 <= rel <= +127 */ if (jmprel->shortop.opcode_len != 0 && rel >= -128 && rel <= 127) { @@ -624,12 +626,16 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, retval = 0; } } else { - /* Assume whichever size is claimed as default by op_sel */ - if (jmprel->op_sel == JR_SHORT) - jrshort = 1; - else + /* It's unknown (e.g. out of this segment or external). + * Thus, assume near displacement. If a near opcode is not + * available, use a short opcode instead. + */ + if (jmprel->nearop.opcode_len != 0) { + if (jmprel->shortop.opcode_len != 0) + retval = 0; jrshort = 0; - retval = 0; + } else + jrshort = 1; } expr_delete(temp); break; @@ -657,7 +663,8 @@ x86_bc_calc_len_jmprel(x86_jmprel *jmprel, unsigned long *len, } int -x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label) { x86_insn *insn; x86_jmprel *jmprel; @@ -665,29 +672,35 @@ x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - return x86_bc_calc_len_insn(insn, &bc->len, resolve_label); + return x86_bc_calc_len_insn(insn, &bc->len, sect, resolve_label); case X86_BC_JMPREL: jmprel = bc_get_data(bc); - return x86_bc_calc_len_jmprel(jmprel, &bc->len, bc->offset, + return x86_bc_calc_len_jmprel(jmprel, &bc->len, bc->offset, sect, resolve_label); default: break; } return 0; } -#if 0 + static int -x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) +x86_bc_tobytes_insn(x86_insn *insn, unsigned char **bufp, const section *sect, + const bytecode *bc, void *d, output_expr_func output_expr, + resolve_label_func resolve_label) { - effaddr *ea = insn->ea; + /*@null@*/ effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); immval *imm = insn->imm; + unsigned int i; + /* We need to figure out the EA first to determine the addrsize. + * Of course, the ModR/M, SIB, and displacement are not output until later. + */ if (ea) { if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || (!ead->valid_modrm && ead->need_modrm))) { - /* First expand equ's */ - expr_expand_labelequ(ea->disp, resolve_label); + /* Expand equ's and labels */ + expr_expand_labelequ(ea->disp, sect, 1, resolve_label); /* Check validity of effective address and calc R/M bits of * Mod/RM byte and SIB byte. We won't know the Mod field @@ -697,53 +710,214 @@ x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) if (!x86_expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, - &ead->sib, &ead->valid_sib, &ead->need_sib)) - return -1; /* failed, don't bother checking rest of insn */ + &ead->sib, &ead->valid_sib, + &ead->need_sib)) + InternalError(_("expr_checkea failed from x86 tobytes_insn")); } } - if (imm) { - const intnum *num; + /* Also check for shift_op special-casing (affects imm). */ + if (insn->shift_op && imm && imm->val) { + /*@dependent@*/ /*@null@*/ const intnum *num; + + expr_expand_labelequ(imm->val, sect, 1, resolve_label); + + num = expr_get_intnum(&imm->val); + if (num) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete imm, as it's not needed. */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } - if (imm->val) { - expr_expand_labelequ(imm->val, resolve_label); - imm->val = expr_simplify(imm->val); + /* Prefixes */ + if (insn->lockrep_pre != 0) + WRITE_BYTE(*bufp, insn->lockrep_pre); + if (ea && ead->segment != 0) + WRITE_BYTE(*bufp, ead->segment); + if (insn->opersize != 0 && insn->opersize != insn->mode_bits) + WRITE_BYTE(*bufp, 0x66); + if (insn->addrsize != 0 && insn->addrsize != insn->mode_bits) + WRITE_BYTE(*bufp, 0x67); + + /* Opcode */ + for (i=0; iopcode_len; i++) + WRITE_BYTE(*bufp, insn->opcode[i]); + + /* Effective address: ModR/M (if required), SIB (if required), and + * displacement (if required). + */ + if (ea) { + if (ead->need_modrm) { + if (!ead->valid_modrm) + InternalError(_("invalid Mod/RM in x86 tobytes_insn")); + WRITE_BYTE(*bufp, ead->modrm); + } + + if (ead->need_sib) { + if (!ead->valid_sib) + InternalError(_("invalid SIB in x86 tobytes_insn")); + WRITE_BYTE(*bufp, ead->sib); } - /* TODO: check imm f_len vs. len? */ - /* Handle shift_op special-casing */ - /*@-nullstate@*/ - if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { - /*@=nullstate@*/ + if (ea->disp) + if (output_expr(&ea->disp, bufp, ea->len, sect, bc, 0, d)) + return 1; + } + + /* Immediate (if required) */ + if (imm && imm->val) { + /* TODO: check imm->len vs. sized len from expr? */ + if (output_expr(&imm->val, bufp, imm->len, sect, bc, 0, d)) + return 1; + } + + return 0; +} + +static int +x86_bc_tobytes_jmprel(x86_jmprel *jmprel, unsigned char **bufp, + const section *sect, const bytecode *bc, void *d, + output_expr_func output_expr, + resolve_label_func resolve_label) +{ + /*@dependent@*/ /*@null@*/ const intnum *num; + unsigned long target; + long rel; + unsigned char opersize; + int jrshort = 0; + unsigned int i; + + /* Prefixes */ + if (jmprel->lockrep_pre != 0) + WRITE_BYTE(*bufp, jmprel->lockrep_pre); + /* FIXME: branch hints! */ + if (jmprel->opersize != 0 && jmprel->opersize != jmprel->mode_bits) + WRITE_BYTE(*bufp, 0x66); + if (jmprel->addrsize != 0 && jmprel->addrsize != jmprel->mode_bits) + WRITE_BYTE(*bufp, 0x67); + + /* As opersize may be 0, figure out its "real" value. */ + opersize = (jmprel->opersize == 0) ? jmprel->mode_bits : + jmprel->opersize; + + /* Get displacement value here so that forced forms can be checked. */ + expr_expand_labelequ(jmprel->target, sect, 0, resolve_label); + num = expr_get_intnum(&jmprel->target); + + /* Check here to see if forced forms are actually legal. */ + switch (jmprel->op_sel) { + case JR_SHORT_FORCED: + /* 1 byte relative displacement */ + jrshort = 1; + if (!num) { + ErrorAt(bc->line, + _("short jump target external or out of segment")); + return 1; + } else { + target = intnum_get_uint(num); + rel = (long)(target-(bc->offset+jmprel->shortop.opcode_len+1)); + /* does a short form exist? */ + if (jmprel->shortop.opcode_len == 0) { + ErrorAt(bc->line, _("short jump does not exist")); + return 1; + } + /* short displacement must fit within -128 <= rel <= +127 */ + if (rel < -128 || rel > 127) { + ErrorAt(bc->line, _("short jump out of range")); + return 1; + } + } + break; + case JR_NEAR_FORCED: + /* 2/4 byte relative displacement (depending on operand size) */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + ErrorAt(bc->line, _("near jump does not exist")); + return 1; + } + break; + default: + /* Try to find shortest displacement based on difference between + * target expr value and our (this bytecode's) offset. + */ if (num) { - if (intnum_get_uint(num) == 1) { - /* Use ,1 form: first copy ,1 opcode. */ - insn->opcode[0] = insn->opcode[1]; - /* Delete Imm, as it's not needed */ - expr_delete(imm->val); - xfree(imm); - insn->imm = (immval *)NULL; + target = intnum_get_uint(num); + rel = (long)(target-(bc->offset+jmprel->shortop.opcode_len+1)); + /* short displacement must fit within -128 <= rel <= +127 */ + if (jmprel->shortop.opcode_len != 0 && rel >= -128 && + rel <= 127) { + /* It fits into a short displacement. */ + jrshort = 1; + } else { + /* It's near. */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + InternalError(_("near jump does not exist")); + return 1; + } + } + } else { + /* It's unknown (e.g. out of this segment or external). + * Thus, assume near displacement. If a near opcode is not + * available, error out. + */ + jrshort = 0; + if (jmprel->nearop.opcode_len == 0) { + ErrorAt(bc->line, + _("short jump target or out of segment")); + return 1; } - insn->shift_op = 0; } - } + break; } + if (jrshort) { + /* Opcode */ + for (i=0; ishortop.opcode_len; i++) + WRITE_BYTE(*bufp, jmprel->shortop.opcode[i]); + + /* Relative displacement */ + output_expr(&jmprel->target, bufp, 1, sect, bc, 1, d); + } else { + /* Opcode */ + for (i=0; inearop.opcode_len; i++) + WRITE_BYTE(*bufp, jmprel->nearop.opcode[i]); + + /* Relative displacement */ + output_expr(&jmprel->target, bufp, (opersize == 32) ? 4 : 2, sect, bc, + 1, d); + } return 0; } -#endif -void -x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) + +int +x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr, + resolve_label_func resolve_label) { x86_insn *insn; + x86_jmprel *jmprel; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - /*x86_bc_resolve_insn(insn, resolve_label);*/ + return x86_bc_tobytes_insn(insn, bufp, sect, bc, d, output_expr, + resolve_label); break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + return x86_bc_tobytes_jmprel(jmprel, bufp, sect, bc, d, + output_expr, resolve_label); default: break; } + return 1; } diff --git a/src/bytecode.c b/src/bytecode.c index e816e219..6b78ebe5 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -22,6 +22,8 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" @@ -318,7 +320,7 @@ bc_calc_len_data(bytecode_data *bc_data, unsigned long *len) static int bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, - intnum *(*resolve_label) (symrec *sym)) + const section *sect, resolve_label_func resolve_label) { int retval = 1; /*@null@*/ expr *temp; @@ -326,7 +328,7 @@ bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, temp = expr_copy(reserve->numitems); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (!num) retval = -1; @@ -338,7 +340,8 @@ bc_calc_len_reserve(bytecode_reserve *reserve, unsigned long *len, static int bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, - unsigned long line, intnum *(*resolve_label) (symrec *sym)) + unsigned long line, const section *sect, + resolve_label_func resolve_label) { FILE *f; /*@null@*/ expr *temp; @@ -349,7 +352,7 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, if (incbin->start) { temp = expr_copy(incbin->start); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (num) start = intnum_get_uint(num); @@ -362,7 +365,7 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, if (incbin->maxlen) { temp = expr_copy(incbin->maxlen); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (num) maxlen = intnum_get_uint(num); @@ -403,7 +406,8 @@ bc_calc_len_incbin(bytecode_incbin *incbin, unsigned long *len, } int -bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label) { int retval = 1; bytecode_data *bc_data; @@ -423,16 +427,17 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) break; case BC_RESERVE: reserve = bc_get_data(bc); - retval = bc_calc_len_reserve(reserve, &bc->len, resolve_label); + retval = bc_calc_len_reserve(reserve, &bc->len, sect, + resolve_label); break; case BC_INCBIN: incbin = bc_get_data(bc); - retval = bc_calc_len_incbin(incbin, &bc->len, bc->line, + retval = bc_calc_len_incbin(incbin, &bc->len, bc->line, sect, resolve_label); break; default: if (bc->type < cur_arch->bc.type_max) - retval = cur_arch->bc.bc_calc_len(bc, resolve_label); + retval = cur_arch->bc.bc_calc_len(bc, sect, resolve_label); else InternalError(_("Unknown bytecode type")); } @@ -441,7 +446,7 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) if (bc->multiple) { temp = expr_copy(bc->multiple); assert(temp != NULL); - expr_expand_labelequ(temp, resolve_label); + expr_expand_labelequ(temp, sect, 1, resolve_label); num = expr_get_intnum(&temp); if (!num) retval = -1; @@ -453,41 +458,173 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) return retval; } -void -bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +static int +bc_tobytes_data(bytecode_data *bc_data, unsigned char **bufp, + const section *sect, const bytecode *bc, void *d, + output_expr_func output_expr) +{ + dataval *dv; + size_t slen; + size_t i; + + STAILQ_FOREACH(dv, &bc_data->datahead, link) { + switch (dv->type) { + case DV_EMPTY: + break; + case DV_EXPR: + if (output_expr(&dv->data.expn, bufp, bc_data->size, sect, bc, + 0, d)) + return 1; + break; + case DV_STRING: + slen = strlen(dv->data.str_val); + strncpy((char *)*bufp, dv->data.str_val, slen); + *bufp += slen; + /* pad with 0's to nearest multiple of size */ + slen %= bc_data->size; + if (slen > 0) { + slen = bc_data->size-slen; + for (i=0; inumitems, sect, 1, resolve_label); + num = expr_get_intnum(&reserve->numitems); + if (!num) + InternalError(_("could not determine number of items in bc_tobytes_reserve")); + numitems = intnum_get_uint(num)*reserve->itemsize; + + /* Go ahead and zero the bytes. Probably most objfmts will want it + * zero'd if they're actually going to output it. + */ + for (i=0; istart) { + expr_expand_labelequ(incbin->start, sect, 1, resolve_label); + num = expr_get_intnum(&incbin->start); + if (!num) + InternalError(_("could not determine start in bc_tobytes_incbin")); + start = intnum_get_uint(num); + } + + /* FIXME: Search include path for filename */ + + /* Open file */ + f = fopen(incbin->filename, "rb"); + if (!f) { + ErrorAt(line, _("`incbin': unable to open file `%s'"), + incbin->filename); + return 1; + } + + /* Seek to start of data */ + if (fseek(f, start, SEEK_SET) < 0) { + ErrorAt(line, _("`incbin': unable to seek on file `%s'"), + incbin->filename); + fclose(f); + return 1; + } + + /* Read buflen bytes */ + if (fread(*bufp, buflen, 1, f) < buflen) { + ErrorAt(line, _("`incbin': unable to read %lu bytes from file `%s'"), + buflen, incbin->filename); + fclose(f); + return 1; + } + + *bufp += buflen; + fclose(f); + return 0; +} + +/*@null@*/ /*@only@*/ unsigned char * +bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize, + /*@out@*/ unsigned long *multiple, /*@out@*/ int *gap, + const section *sect, void *d, output_expr_func output_expr, + resolve_label_func resolve_label) +{ + /*@only@*/ /*@null@*/ unsigned char *mybuf = NULL; + unsigned char *destbuf; + /*@dependent@*/ /*@null@*/ const intnum *num; bytecode_data *bc_data; bytecode_reserve *reserve; bytecode_incbin *incbin; + int error = 0; + + if (*bufsize < bc->len) { + mybuf = xmalloc(sizeof(bc->len)); + destbuf = mybuf; + } else + destbuf = buf; + + *gap = 0; switch (bc->type) { case BC_EMPTY: - InternalError(_("got empty bytecode in bc_resolve")); + InternalError(_("got empty bytecode in bc_tobytes")); case BC_DATA: bc_data = bc_get_data(bc); - bc_resolve_data(bc_data, &bc->len); + error = bc_tobytes_data(bc_data, &destbuf, sect, bc, d, + output_expr); break; case BC_RESERVE: reserve = bc_get_data(bc); - bc_resolve_reserve(reserve, &bc->len, resolve_label); + error = bc_tobytes_reserve(reserve, &destbuf, sect, resolve_label); + *gap = 1; break; case BC_INCBIN: incbin = bc_get_data(bc); - bc_resolve_incbin(incbin, &bc->len, bc->line, resolve_label); + error = bc_tobytes_incbin(incbin, &destbuf, bc->len, bc->line, + sect, resolve_label); break; default: if (bc->type < cur_arch->bc.type_max) - cur_arch->bc.bc_resolve(bc, resolve_label); + error = cur_arch->bc.bc_tobytes(bc, &destbuf, sect, d, + output_expr, resolve_label); else InternalError(_("Unknown bytecode type")); } if (bc->multiple) { - expr_expand_labelequ(bc->multiple, resolve_label); - bc->multiple = expr_simplify(bc->multiple); - } -#endif + expr_expand_labelequ(bc->multiple, sect, 1, resolve_label); + num = expr_get_intnum(&bc->multiple); + if (!num) + InternalError(_("could not determine multiple in bc_tobytes")); + *multiple = intnum_get_uint(num); + } else + *multiple = 1; + if (!error && ((destbuf - buf) != bc->len)) + InternalError(_("written length does not match optimized length")); + *bufsize = bc->len; + return mybuf; } void diff --git a/src/bytecode.h b/src/bytecode.h index eb8ec1c5..f011b6ec 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -66,14 +66,43 @@ void bc_print(FILE *f, const bytecode *bc); * This function does *not* modify bc other than the length/size values (eg * it doesn't keep the values returned by resolve_label except temporarily to * try to minimize the length). + * sect is passed along to resolve_label. */ -int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); - -/* Resolves all labels in bytecode. It does essentially the opposite of - * the above bc_calc_len(): it doesn't modify the length/size values, instead - * it saves the values returned by resolve_label to simplify expressions. +int bc_calc_len(bytecode *bc, const section *sect, + resolve_label_func resolve_label); + +/* Converts the bytecode bc into its byte representation. + * Inputs: + * bc - the bytecode to convert + * buf - where to put the byte representation + * bufsize - the size of buf + * d - the data to pass to each call to output_expr() + * output_expr - the function to call to convert expressions to byte rep + * output_expr inputs: + * bc - the bytecode containing the expr that is being output + * ep - a pointer to the expression to output + * bufp - pointer to pointer to buffer to contain byte representation + * valsize - the size (in bytes) to be used for the byte rep + * d - the data passed into bc_tobytes + * output_expr returns nonzero if an error occurred, 0 otherwise + * resolve_label - the function to call to determine the values of + * expressions that are *not* output to the file + * resolve_label inputs: + * sym - the symbol to resolve + * Outputs: + * bufsize - the size of the generated data. + * multiple - the number of times the data should be dup'ed when output + * gap - indicates the data does not really need to exist in the + * object file (eg res*-generated). buf is filled with + * bufsize 0 bytes. + * Returns either NULL (if buf was big enough to hold the entire byte + * representation), or a newly allocated buffer that should be used instead + * of buf for reading the byte representation. */ -void bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +/*@null@*/ /*@only@*/ unsigned char *bc_tobytes(bytecode *bc, + unsigned char *buf, unsigned long *bufsize, + /*@out@*/ unsigned long *multiple, /*@out@*/ int *gap, const section *sect, + void *d, output_expr_func output_expr, resolve_label_func resolve_label); /* void bcs_initialize(bytecodehead *headp); */ #define bcs_initialize(headp) STAILQ_INIT(headp) diff --git a/src/coretype.h b/src/coretype.h index 0389d7ee..afad9d93 100644 --- a/src/coretype.h +++ b/src/coretype.h @@ -76,4 +76,8 @@ typedef enum { SYM_EXTERN = 1 << 2 /* if it's declared EXTERN */ } SymVisibility; +typedef intnum *(*resolve_label_func) (symrec *sym, int withstart); +typedef int (*output_expr_func) (expr **ep, unsigned char **bufp, + unsigned long valsize, const section *sect, + const bytecode *bc, int rel, void *d); #endif diff --git a/src/expr.c b/src/expr.c index 31c32f3b..119e36b1 100644 --- a/src/expr.c +++ b/src/expr.c @@ -31,6 +31,8 @@ #include "expr.h" #include "symrec.h" +#include "section.h" + #include "expr-int.h" @@ -716,26 +718,41 @@ expr_contains(expr *e, ExprType t) return expr_traverse_leaves_in(e, &t, expr_contains_callback); } -/* NOTE: This can't be passed through *d because of data/function pointer - * portability issues. +/* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like + * diffsectsymbol - diffsectsymbol (where the diffsect's are the same). + * Currently symbols in different non-absolute sections are NOT expanded. + * This will NOT be easy to fix. */ -static intnum *(*labelequ_resolve_label) (symrec *sym); + +typedef struct labelequ_data { + resolve_label_func resolve_label; + const section *sect; + int withstart; +} labelequ_data; static int -expr_expand_labelequ_callback(ExprItem *ei, /*@unused@*/ void *d) +expr_expand_labelequ_callback(ExprItem *ei, void *d) { + labelequ_data *data = (labelequ_data *)d; const expr *equ_expr; - intnum *intn; + if (ei->type == EXPR_SYM) { equ_expr = symrec_get_equ(ei->data.sym); if (equ_expr) { ei->type = EXPR_EXPR; ei->data.expn = expr_copy(equ_expr); } else { - intn = labelequ_resolve_label(ei->data.sym); - if (intn) { - ei->type = EXPR_INT; - ei->data.intn = intn; + /*@dependent@*/ section *sect; + /*@dependent@*/ /*@null@*/ bytecode *precbc; + intnum *intn; + + if (symrec_get_label(ei->data.sym, §, &precbc) && + (sect == data->sect || section_is_absolute(sect))) { + intn = data->resolve_label(ei->data.sym, data->withstart); + if (intn) { + ei->type = EXPR_INT; + ei->data.intn = intn; + } } } } @@ -743,10 +760,14 @@ expr_expand_labelequ_callback(ExprItem *ei, /*@unused@*/ void *d) } void -expr_expand_labelequ(expr *e, intnum *(*resolve_label) (symrec *sym)) -{ - labelequ_resolve_label = resolve_label; - expr_traverse_leaves_in(e, NULL, expr_expand_labelequ_callback); +expr_expand_labelequ(expr *e, const section *sect, int withstart, + resolve_label_func resolve_label) +{ + labelequ_data data; + data.resolve_label = resolve_label; + data.sect = sect; + data.withstart = withstart; + expr_traverse_leaves_in(e, &data, expr_expand_labelequ_callback); } /* Traverse over expression tree, calling func for each operation AFTER the diff --git a/src/expr.h b/src/expr.h index b8a696ae..76476866 100644 --- a/src/expr.h +++ b/src/expr.h @@ -47,8 +47,12 @@ void expr_delete(/*@only@*/ /*@null@*/ expr *e); /* Expands all (symrec) equ's in the expression into full expression * instances. Also resolves labels, if possible. + * Srcsect and withstart are passed along to resolve_label and specify the + * referencing section and whether the section start should be included in + * the resolved address, respectively. */ -void expr_expand_labelequ(expr *e, intnum *(*resolve_label) (symrec *sym)); +void expr_expand_labelequ(expr *e, const section *srcsect, int withstart, + resolve_label_func resolve_label); /* Simplifies the expression e as much as possible, eliminating extraneous * branches and simplifying integer-only subexpressions. diff --git a/src/objfmts/bin/bin-objfmt.c b/src/objfmts/bin/bin-objfmt.c index 808f1cdb..5af6dc49 100644 --- a/src/objfmts/bin/bin-objfmt.c +++ b/src/objfmts/bin/bin-objfmt.c @@ -22,24 +22,338 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "file.h" + #include "globals.h" #include "errwarn.h" #include "intnum.h" #include "expr.h" #include "symrec.h" +#include "bytecode.h" #include "section.h" + +#include "expr-int.h" +#include "bc-int.h" + #include "objfmt.h" +#define REGULAR_OUTBUF_SIZE 1024 + +static intnum *bin_objfmt_resolve_label(symrec *sym, int withstart); + static void -bin_objfmt_initialize(const char *in_filename, const char *obj_filename) +bin_objfmt_initialize(/*@unused@*/ const char *in_filename, + /*@unused@*/ const char *obj_filename) { } +/* Aligns sect to either its specified alignment (in its objfmt-specific data) + * or def_align if no alignment was specified. Uses prevsect and base to both + * determine the new starting address (returned) and the total length of + * prevsect after sect has been aligned. + */ +static unsigned long +bin_objfmt_align_section(section *sect, section *prevsect, unsigned long base, + unsigned long def_align, + /*@out@*/ unsigned long *prevsectlen, + /*@out@*/ unsigned long *padamt) +{ + /*@dependent@*/ /*@null@*/ bytecode *last; + unsigned long start; + /*@dependent@*/ /*@null@*/ unsigned long *alignptr; + unsigned long align; + + /* Figure out the size of .text by looking at the last bytecode's offset + * plus its length. Add the start and size together to get the new start. + */ + last = bcs_last(section_get_bytecodes(prevsect)); + *prevsectlen = last->offset + last->len; + start = base + *prevsectlen; + + /* Round new start up to alignment of .data section, and adjust textlen to + * indicate padded size. Because aignment is always a power of two, we + * can use some bit trickery to do this easily. + */ + alignptr = section_get_of_data(sect); + if (alignptr) + align = *alignptr; + else + align = def_align; /* No alignment: use default */ + + if (start & ~(align-1)) + start = (start & ~(align-1)) + align; + + *padamt = start - (base + *prevsectlen); + + return start; +} + +static intnum * +bin_objfmt_resolve_label2(symrec *sym, /*@null@*/ const section *cursect, + unsigned long cursectstart, int withstart) +{ + /*@dependent@*/ section *sect; + /*@dependent@*/ /*@null@*/ bytecode *precbc; + /*@null@*/ bytecode *bc; + /*@null@*/ expr *startexpr; + /*@dependent@*/ /*@null@*/ const intnum *start; + unsigned long startval = 0; + + if (!symrec_get_label(sym, §, &precbc)) + return NULL; + + /* determine actual bc from preceding bc (how labels are stored) */ + if (!precbc) + bc = bcs_first(section_get_bytecodes(sect)); + else + bc = bcs_next(precbc); + assert(bc != NULL); + + /* Figure out the starting offset of the entire section */ + if (withstart || (cursect && sect != cursect) || + section_is_absolute(sect)) { + startexpr = expr_copy(section_get_start(sect)); + assert(startexpr != NULL); + expr_expand_labelequ(startexpr, sect, 1, bin_objfmt_resolve_label); + start = expr_get_intnum(&startexpr); + if (!start) + return NULL; + startval = intnum_get_uint(start); + expr_delete(startexpr); + + /* Compensate for current section start */ + startval -= cursectstart; + } + + /* If a section is done, the following will always succeed. If it's in- + * progress, this will fail if the bytecode comes AFTER the current one. + */ + if (precbc) + return intnum_new_int(startval + precbc->offset + precbc->len); + else + return intnum_new_int(startval + bc->offset); +} + +static intnum * +bin_objfmt_resolve_label(symrec *sym, int withstart) +{ + return bin_objfmt_resolve_label2(sym, NULL, 0, withstart); +} + +typedef struct bin_objfmt_expr_data { + const section *sect; + unsigned long start; + int withstart; +} bin_objfmt_expr_data; + +static int +bin_objfmt_expr_traverse_callback(ExprItem *ei, void *d) +{ + bin_objfmt_expr_data *data = (bin_objfmt_expr_data *)d; + const expr *equ_expr; + + if (ei->type == EXPR_SYM) { + equ_expr = symrec_get_equ(ei->data.sym); + if (equ_expr) { + ei->type = EXPR_EXPR; + ei->data.expn = expr_copy(equ_expr); + } else { + intnum *intn; + + intn = bin_objfmt_resolve_label2(ei->data.sym, data->sect, + data->start, data->withstart); + if (intn) { + ei->type = EXPR_INT; + ei->data.intn = intn; + } + } + } + return 0; +} + +typedef struct bin_objfmt_output_info { + FILE *f; + unsigned char *buf; + const section *sect; + unsigned long start; +} bin_objfmt_output_info; + +static int +bin_objfmt_output_expr(expr **ep, unsigned char **bufp, unsigned long valsize, + const section *sect, const bytecode *bc, int rel, + /*@null@*/ void *d) +{ + /*@null@*/ bin_objfmt_output_info *info = (bin_objfmt_output_info *)d; + bin_objfmt_expr_data data; + /*@dependent@*/ /*@null@*/ const intnum *num; + unsigned long val; + + assert(info != NULL); + + /* For binary output, this is trivial: any expression that doesn't simplify + * to an integer is an error (references something external). + * Other object formats need to generate their relocation list from here! + * Note: we can't just use expr_expand_labelequ() because it doesn't + * resolve between different sections (on purpose).. but for bin we + * WANT that. + */ + data.sect = sect; + if (rel) { + data.start = info->start; + data.withstart = 0; + } else { + data.start = 0; + data.withstart = 1; + } + expr_traverse_leaves_in(*ep, &data, bin_objfmt_expr_traverse_callback); + num = expr_get_intnum(ep); + if (!num) { + ErrorAt((*ep)->line, + _("binary object format does not support external references")); + return 1; + } + val = intnum_get_uint(num); + + if (rel) + val = (unsigned long)((long)(val - (bc->offset + bc->len))); + + /* Write value out. */ + /* FIXME: Should we warn here about truncation if it doesn't fit? */ + switch (valsize) { + case 1: + WRITE_BYTE(*bufp, val); + break; + case 2: + WRITE_SHORT(*bufp, val); + break; + case 4: + WRITE_LONG(*bufp, val); + break; + default: + InternalError(_("unexpected size in bin objfmt output expr")); + } + + return 0; +} + +static int +bin_objfmt_output_bytecode(bytecode *bc, /*@null@*/ void *d) +{ + /*@null@*/ bin_objfmt_output_info *info = (bin_objfmt_output_info *)d; + /*@null@*/ /*@only@*/ unsigned char *bigbuf; + unsigned long size = REGULAR_OUTBUF_SIZE; + unsigned long multiple; + unsigned long i; + int gap; + + bigbuf = bc_tobytes(bc, info->buf, &size, &multiple, &gap, info->sect, + info, bin_objfmt_output_expr, + bin_objfmt_resolve_label); + + /* Warn that gaps are converted to 0. The 0 bytes are generated by + * bc_tobytes() so no special handling is needed. + */ + if (gap) + Warning(_("uninitialized space declared in code/data section: zeroing")); + + assert(info != NULL); + + /* Output multiple copies of buf (or bigbuf if non-NULL) to file */ + for (i=0; ibuf, size, 1, info->f); + + /* If bigbuf was allocated, free it */ + if (bigbuf) + xfree(bigbuf); + + return 0; +} + static void bin_objfmt_output(FILE *f, sectionhead *sections) { + /*@null@*/ section *text, *data, *bss, *prevsect; + /*@null@*/ expr *startexpr; + /*@dependent@*/ /*@null@*/ const intnum *startnum; + unsigned long start, textstart, datastart; + unsigned long textlen = 0, textpad = 0, datalen = 0, datapad = 0; + unsigned long *prevsectlenptr, *prevsectpadptr; + unsigned long i; + bin_objfmt_output_info info; + + info.f = f; + info.buf = xmalloc(REGULAR_OUTBUF_SIZE); + + text = sections_find_general(sections, ".text"); + data = sections_find_general(sections, ".data"); + bss = sections_find_general(sections, ".bss"); + + if (!text) + InternalError(_("No `.text' section in bin objfmt output")); + + /* First determine the actual starting offsets for .data and .bss. + * As the order in the file is .text -> .data -> .bss (not present), + * use the last bytecode in .text (and the .text section start) to + * determine the starting offset in .data, and likewise for .bss. + * Also compensate properly for alignment. + */ + + /* Find out the start of .text */ + startexpr = expr_copy(section_get_start(text)); + assert(startexpr != NULL); + startnum = expr_get_intnum(&startexpr); + if (!startnum) + InternalError(_("Complex expr for start in bin objfmt output")); + start = intnum_get_uint(startnum); + expr_delete(startexpr); + textstart = start; + + /* Align .data and .bss (if present) by adjusting their starts. */ + prevsect = text; + prevsectlenptr = &textlen; + prevsectpadptr = &textpad; + if (data) { + start = bin_objfmt_align_section(data, prevsect, start, 4, + prevsectlenptr, prevsectpadptr); + section_set_start(data, start); + datastart = start; + prevsect = data; + prevsectlenptr = &datalen; + prevsectpadptr = &datapad; + } + if (bss) { + start = bin_objfmt_align_section(bss, prevsect, start, 4, + prevsectlenptr, prevsectpadptr); + section_set_start(bss, start); + } + + /* Output .text first. */ + info.sect = text; + info.start = textstart; + bcs_traverse(section_get_bytecodes(text), &info, + bin_objfmt_output_bytecode); + + /* If .data is present, output it */ + if (data) { + /* Add padding to align .data. Just use a for loop, as this will + * seldom be very many bytes. + */ + for (i=0; iopt_flags == BCFLAG_INPROGRESS) @@ -105,17 +112,17 @@ basic_optimize_bytecode_1(bytecode *bc, void *d) bc->opt_flags = BCFLAG_INPROGRESS; - if (!*precbc) + if (!data->precbc) bc->offset = 0; else - bc->offset = (*precbc)->offset + (*precbc)->len; - *precbc = bc; + bc->offset = data->precbc->offset + data->precbc->len; + data->precbc = bc; /* We're doing just a single pass, so essentially ignore whether the size * is minimum or not, and just check for indeterminate length (indicative * of circular reference). */ - if (bc_calc_len(bc, basic_optimize_resolve_label) < 0) { + if (bc_calc_len(bc, data->sect, basic_optimize_resolve_label) < 0) { ErrorAt(bc->line, _("Circular reference detected.")); return -1; } @@ -128,10 +135,13 @@ basic_optimize_bytecode_1(bytecode *bc, void *d) static int basic_optimize_section_1(section *sect, /*@unused@*/ /*@null@*/ void *d) { - bytecode *precbc = NULL; + basic_optimize_data data; unsigned long flags; int retval; + data.precbc = NULL; + data.sect = sect; + /* Don't even bother if we're in-progress or done. */ flags = section_get_opt_flags(sect); if (flags == SECTFLAG_INPROGRESS) @@ -141,8 +151,8 @@ basic_optimize_section_1(section *sect, /*@unused@*/ /*@null@*/ void *d) section_set_opt_flags(sect, SECTFLAG_INPROGRESS); - retval = bcs_traverse(section_get_bytecodes(sect), &precbc, - basic_optimize_bytecode_1); + retval = bcs_traverse(section_get_bytecodes(sect), &data, + basic_optimize_bytecode_1); if (retval != 0) return retval; diff --git a/src/section.c b/src/section.c index 1deca903..d5b60a93 100644 --- a/src/section.c +++ b/src/section.c @@ -161,6 +161,15 @@ section_set_opt_flags(section *sect, unsigned long opt_flags) sect->opt_flags = opt_flags; } +void * +section_get_of_data(section *sect) +{ + if (sect->type == SECTION_GENERAL) + return sect->data.general.of_data; + else + return NULL; +} + void sections_delete(sectionhead *headp) { diff --git a/src/section.h b/src/section.h index ccea9f88..07165815 100644 --- a/src/section.h +++ b/src/section.h @@ -42,6 +42,8 @@ int section_is_absolute(section *sect); unsigned long section_get_opt_flags(const section *sect); void section_set_opt_flags(section *sect, unsigned long opt_flags); +/*@dependent@*/ /*@null@*/ void *section_get_of_data(section *sect); + void sections_delete(sectionhead *headp); void sections_print(FILE *f, const sectionhead *headp); -- 2.50.1