From: Peter Johnson Date: Thu, 10 Jan 2002 05:01:36 +0000 (-0000) Subject: Add new bc_resolve() family, and rewrite calc_len to /just/ calculate the X-Git-Tag: v0.1.0~87 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ee74365f05d5aff3edaafa51cf9722d8f0f6dd6b;p=yasm Add new bc_resolve() family, and rewrite calc_len to /just/ calculate the length, not modify anything else (particularly expressions w/temp. resolved labels). svn path=/trunk/yasm/; revision=452 --- diff --git a/libyasm/arch.h b/libyasm/arch.h index b8996795..81805e90 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -42,6 +42,9 @@ struct arch { /* See bytecode.h comments on bc_calc_len() */ int (*bc_calc_len) (bytecode *bc, intnum *(*resolve_label) (symrec *sym)); + /* See bytecode.h comments on bc_resolve() */ + void (*bc_resolve) (bytecode *bc, + intnum *(*resolve_label) (symrec *sym)); } bc; }; diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index 86ee2859..823e4062 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -310,6 +310,8 @@ bc_print(FILE *f, const bytecode *bc) int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) { + bc->len = 0; /* start at 0 */ + switch (bc->type) { case BC_EMPTY: InternalError(_("got empty bytecode in bc_calc_len")); @@ -328,6 +330,26 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) return 0; } +void +bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +{ + switch (bc->type) { + case BC_EMPTY: + InternalError(_("got empty bytecode in bc_resolve")); + case BC_DATA: + break; + case BC_RESERVE: + break; + case BC_INCBIN: + break; + default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_resolve(bc, resolve_label); + else + InternalError(_("Unknown bytecode type")); + } +} + void bcs_delete(bytecodehead *headp) { diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index b35459c6..2b0d96ac 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -62,9 +62,18 @@ void bc_print(FILE *f, const bytecode *bc); * Returns whether the length is the minimum possible (1=yes, 0=no). * resolve_label is the function used to determine the value (offset) of a * in-file label (eg, not an EXTERN variable, which is indeterminate). + * This function does *not* modify bc other than the length/size values (eg + * it doesn't keep the values returned by resolve_label except temporarily to + * try to minimize the length). */ int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +/* Resolves all labels in bytecode. It does essentially the opposite of + * the above bc_calc_len(): it doesn't modify the length/size values, instead + * it saves the values returned by resolve_label to simplify expressions. + */ +void bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); + /* void bcs_initialize(bytecodehead *headp); */ #define bcs_initialize(headp) STAILQ_INIT(headp) diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h index d07a43bd..d4cb2bb7 100644 --- a/modules/arch/x86/x86-int.h +++ b/modules/arch/x86/x86-int.h @@ -97,6 +97,7 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); int x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +void x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index 06da214d..d5ddf8da 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -38,6 +38,7 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_calc_len + x86_bc_calc_len, + x86_bc_resolve } }; diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index 70e59ff9..f49f1f45 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -464,7 +464,126 @@ x86_bc_print(FILE *f, const bytecode *bc) } static int -x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, + intnum *(*resolve_label) (symrec *sym)) +{ + /*@null@*/ expr *temp; + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + int retval = 1; /* may turn into 0 at some point */ + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* Create temp copy of disp, etc. */ + x86_effaddr_data ead_t = *ead; /* structure copy */ + unsigned char displen = ea->len; + + temp = expr_copy(ea->disp); + assert(temp != NULL); + + /* Expand equ's and labels */ + expr_expand_labelequ(temp, resolve_label); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!x86_expr_checkea(&temp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &displen, &ead_t.modrm, + &ead_t.valid_modrm, &ead_t.need_modrm, + &ead_t.sib, &ead_t.valid_sib, + &ead_t.need_sib)) + return -1; /* failed, don't bother checking rest of insn */ + + if (!temp) { + /* If the expression was deleted (temp=NULL), then make the + * temp info permanent. + */ + + /* Delete the "real" expression */ + expr_delete(ea->disp); + ea->disp = NULL; + *ead = ead_t; /* structure copy */ + ea->len = displen; + } else if (displen == 1) { + /* Fits into a byte. We'll assume it never gets bigger, so + * make temp info permanent, but NOT the expr itself (as that + * may change). + */ + expr_delete(temp); + *ead = ead_t; /* structure copy */ + ea->len = displen; + } else { + /* Fits into a word/dword, or unknown. As this /may/ change in + * a future pass, so discard temp info. + */ + expr_delete(temp); + retval = 0; /* may not be smallest size */ + + /* Handle unknown case, make displen word-sized */ + if (displen == 0xff) + displen = (insn->addrsize == 32) ? 4 : 2; + } + + /* Compute length of ea and add to total */ + *len += ead_t.need_modrm + ead_t.need_sib + displen; + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + temp = expr_copy(imm->val); + expr_expand_labelequ(temp, resolve_label); + + /* TODO: check imm->len vs. sized len from expr? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && temp && (num = expr_get_intnum(&temp))) { + if (num && intnum_get_uint(num) == 1) { + /* We can use the ,1 form: subtract out the imm len + * (as we add it back in below). + */ + *len -= imm->len; + } else + retval = 0; /* we could still get ,1 */ + } + + expr_delete(temp); + } + + *len += imm->len; + } + + *len += insn->opcode_len; + *len += (insn->addrsize != 0 && insn->addrsize != insn->mode_bits) ? 1:0; + *len += (insn->opersize != 0 && insn->opersize != insn->mode_bits) ? 1:0; + *len += (insn->lockrep_pre != 0) ? 1:0; + + return 0; +} + +int +x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + return x86_bc_calc_len_insn(insn, &bc->len, resolve_label); + default: + break; + } + return 0; +} +#if 0 +static int +x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -485,7 +604,7 @@ x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return 0; /* failed, don't bother checking rest of insn */ + return -1; /* failed, don't bother checking rest of insn */ } } @@ -518,20 +637,19 @@ x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) return 0; } - -int -x86_bc_calc_len(bytecode *bc, - intnum *(*resolve_label) (symrec *sym)) +#endif +void +x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - return x86_bc_calc_len_insn(insn, resolve_label); + /*x86_bc_resolve_insn(insn, resolve_label);*/ + break; default: break; } - return 0; } diff --git a/src/arch.h b/src/arch.h index b8996795..81805e90 100644 --- a/src/arch.h +++ b/src/arch.h @@ -42,6 +42,9 @@ struct arch { /* See bytecode.h comments on bc_calc_len() */ int (*bc_calc_len) (bytecode *bc, intnum *(*resolve_label) (symrec *sym)); + /* See bytecode.h comments on bc_resolve() */ + void (*bc_resolve) (bytecode *bc, + intnum *(*resolve_label) (symrec *sym)); } bc; }; diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h index d07a43bd..d4cb2bb7 100644 --- a/src/arch/x86/x86-int.h +++ b/src/arch/x86/x86-int.h @@ -97,6 +97,7 @@ typedef struct x86_jmprel { void x86_bc_delete(bytecode *bc); void x86_bc_print(FILE *f, const bytecode *bc); int x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +void x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, unsigned char nosplit, unsigned char *displen, diff --git a/src/arch/x86/x86arch.c b/src/arch/x86/x86arch.c index 06da214d..d5ddf8da 100644 --- a/src/arch/x86/x86arch.c +++ b/src/arch/x86/x86arch.c @@ -38,6 +38,7 @@ arch x86_arch = { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, - x86_bc_calc_len + x86_bc_calc_len, + x86_bc_resolve } }; diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c index 70e59ff9..f49f1f45 100644 --- a/src/arch/x86/x86bc.c +++ b/src/arch/x86/x86bc.c @@ -464,7 +464,126 @@ x86_bc_print(FILE *f, const bytecode *bc) } static int -x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) +x86_bc_calc_len_insn(x86_insn *insn, unsigned long *len, + intnum *(*resolve_label) (symrec *sym)) +{ + /*@null@*/ expr *temp; + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + int retval = 1; /* may turn into 0 at some point */ + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* Create temp copy of disp, etc. */ + x86_effaddr_data ead_t = *ead; /* structure copy */ + unsigned char displen = ea->len; + + temp = expr_copy(ea->disp); + assert(temp != NULL); + + /* Expand equ's and labels */ + expr_expand_labelequ(temp, resolve_label); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!x86_expr_checkea(&temp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &displen, &ead_t.modrm, + &ead_t.valid_modrm, &ead_t.need_modrm, + &ead_t.sib, &ead_t.valid_sib, + &ead_t.need_sib)) + return -1; /* failed, don't bother checking rest of insn */ + + if (!temp) { + /* If the expression was deleted (temp=NULL), then make the + * temp info permanent. + */ + + /* Delete the "real" expression */ + expr_delete(ea->disp); + ea->disp = NULL; + *ead = ead_t; /* structure copy */ + ea->len = displen; + } else if (displen == 1) { + /* Fits into a byte. We'll assume it never gets bigger, so + * make temp info permanent, but NOT the expr itself (as that + * may change). + */ + expr_delete(temp); + *ead = ead_t; /* structure copy */ + ea->len = displen; + } else { + /* Fits into a word/dword, or unknown. As this /may/ change in + * a future pass, so discard temp info. + */ + expr_delete(temp); + retval = 0; /* may not be smallest size */ + + /* Handle unknown case, make displen word-sized */ + if (displen == 0xff) + displen = (insn->addrsize == 32) ? 4 : 2; + } + + /* Compute length of ea and add to total */ + *len += ead_t.need_modrm + ead_t.need_sib + displen; + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + temp = expr_copy(imm->val); + expr_expand_labelequ(temp, resolve_label); + + /* TODO: check imm->len vs. sized len from expr? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && temp && (num = expr_get_intnum(&temp))) { + if (num && intnum_get_uint(num) == 1) { + /* We can use the ,1 form: subtract out the imm len + * (as we add it back in below). + */ + *len -= imm->len; + } else + retval = 0; /* we could still get ,1 */ + } + + expr_delete(temp); + } + + *len += imm->len; + } + + *len += insn->opcode_len; + *len += (insn->addrsize != 0 && insn->addrsize != insn->mode_bits) ? 1:0; + *len += (insn->opersize != 0 && insn->opersize != insn->mode_bits) ? 1:0; + *len += (insn->lockrep_pre != 0) ? 1:0; + + return 0; +} + +int +x86_bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + return x86_bc_calc_len_insn(insn, &bc->len, resolve_label); + default: + break; + } + return 0; +} +#if 0 +static int +x86_bc_resolve_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) { effaddr *ea = insn->ea; x86_effaddr_data *ead = ea_get_data(ea); @@ -485,7 +604,7 @@ x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) ea->nosplit, &ea->len, &ead->modrm, &ead->valid_modrm, &ead->need_modrm, &ead->sib, &ead->valid_sib, &ead->need_sib)) - return 0; /* failed, don't bother checking rest of insn */ + return -1; /* failed, don't bother checking rest of insn */ } } @@ -518,20 +637,19 @@ x86_bc_calc_len_insn(x86_insn *insn, intnum *(*resolve_label) (symrec *sym)) return 0; } - -int -x86_bc_calc_len(bytecode *bc, - intnum *(*resolve_label) (symrec *sym)) +#endif +void +x86_bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) { x86_insn *insn; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - return x86_bc_calc_len_insn(insn, resolve_label); + /*x86_bc_resolve_insn(insn, resolve_label);*/ + break; default: break; } - return 0; } diff --git a/src/bytecode.c b/src/bytecode.c index 86ee2859..823e4062 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -310,6 +310,8 @@ bc_print(FILE *f, const bytecode *bc) int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) { + bc->len = 0; /* start at 0 */ + switch (bc->type) { case BC_EMPTY: InternalError(_("got empty bytecode in bc_calc_len")); @@ -328,6 +330,26 @@ bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) return 0; } +void +bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)) +{ + switch (bc->type) { + case BC_EMPTY: + InternalError(_("got empty bytecode in bc_resolve")); + case BC_DATA: + break; + case BC_RESERVE: + break; + case BC_INCBIN: + break; + default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_resolve(bc, resolve_label); + else + InternalError(_("Unknown bytecode type")); + } +} + void bcs_delete(bytecodehead *headp) { diff --git a/src/bytecode.h b/src/bytecode.h index b35459c6..2b0d96ac 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -62,9 +62,18 @@ void bc_print(FILE *f, const bytecode *bc); * Returns whether the length is the minimum possible (1=yes, 0=no). * resolve_label is the function used to determine the value (offset) of a * in-file label (eg, not an EXTERN variable, which is indeterminate). + * This function does *not* modify bc other than the length/size values (eg + * it doesn't keep the values returned by resolve_label except temporarily to + * try to minimize the length). */ int bc_calc_len(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); +/* Resolves all labels in bytecode. It does essentially the opposite of + * the above bc_calc_len(): it doesn't modify the length/size values, instead + * it saves the values returned by resolve_label to simplify expressions. + */ +void bc_resolve(bytecode *bc, intnum *(*resolve_label) (symrec *sym)); + /* void bcs_initialize(bytecodehead *headp); */ #define bcs_initialize(headp) STAILQ_INIT(headp)