From 68c9dcc62cf472a7f25b76085df99d8781a89deb Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 15 Oct 2001 04:40:45 +0000 Subject: [PATCH] Add several major functions, including effective address ModRM calculation (only 16-bit is implemented at the moment) via expr_checkea(). svn path=/trunk/yasm/; revision=282 --- libyasm/expr.c | 408 +++++++++++++++++++++++++++++++++++-- libyasm/expr.h | 18 ++ modules/arch/x86/expr.c | 408 +++++++++++++++++++++++++++++++++++-- modules/arch/x86/x86expr.c | 408 +++++++++++++++++++++++++++++++++++-- src/arch/x86/expr.c | 408 +++++++++++++++++++++++++++++++++++-- src/arch/x86/x86expr.c | 408 +++++++++++++++++++++++++++++++++++-- src/expr.c | 408 +++++++++++++++++++++++++++++++++++-- src/expr.h | 18 ++ 8 files changed, 2394 insertions(+), 90 deletions(-) diff --git a/libyasm/expr.c b/libyasm/expr.c index aeb60041..f4ac1394 100644 --- a/libyasm/expr.c +++ b/libyasm/expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/libyasm/expr.h b/libyasm/expr.h index 4bcd4b92..5ce86f72 100644 --- a/libyasm/expr.h +++ b/libyasm/expr.h @@ -89,9 +89,27 @@ ExprItem *ExprReg(unsigned char reg, unsigned char size); #define expr_new_ident(r) \ expr_new ((ExprItem *)NULL, EXPR_IDENT, (r)) +void expr_delete(expr *e); + int expr_contains_float(const expr *); +int expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, + unsigned char *n_sib); + +/* Simplifies the expression e as much as possible, eliminating extraneous + * branches and simplifying integer-only subexpressions. + */ int expr_simplify(expr *); + +/* Gets the integer value of e if the expression is just an integer. If the + * expression is more complex (contains anything other than integers, ie + * floats, non-valued labels, registers), returns NULL. + */ +const intnum *expr_get_intnum(expr *e); + void expr_print(expr *); #endif diff --git a/modules/arch/x86/expr.c b/modules/arch/x86/expr.c index aeb60041..f4ac1394 100644 --- a/modules/arch/x86/expr.c +++ b/modules/arch/x86/expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c index aeb60041..f4ac1394 100644 --- a/modules/arch/x86/x86expr.c +++ b/modules/arch/x86/x86expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/src/arch/x86/expr.c b/src/arch/x86/expr.c index aeb60041..f4ac1394 100644 --- a/src/arch/x86/expr.c +++ b/src/arch/x86/expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/src/arch/x86/x86expr.c b/src/arch/x86/x86expr.c index aeb60041..f4ac1394 100644 --- a/src/arch/x86/x86expr.c +++ b/src/arch/x86/x86expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/src/expr.c b/src/expr.c index aeb60041..f4ac1394 100644 --- a/src/expr.c +++ b/src/expr.c @@ -81,6 +81,11 @@ struct expr { unsigned long line; }; +static int expr_traverse_nodes_post(expr *e, void *d, + int (*func) (expr *e, void *d)); +static int expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)); + /* allocate a new expression node, with children as defined. * If it's a unary operator, put the element on the right */ expr * @@ -157,41 +162,401 @@ ExprReg(unsigned char reg, unsigned char size) return e; } +static +int expr_delete_each(expr *e, void *d) +{ + switch (e->left.type) { + case EXPR_INT: + intnum_delete(e->left.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->left.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + switch (e->right.type) { + case EXPR_INT: + intnum_delete(e->right.data.intn); + break; + case EXPR_FLOAT: + floatnum_delete(e->right.data.flt); + break; + default: + break; /* none of the other types needs to be deleted */ + } + free(e->filename); + free(e); /* free ourselves */ + return 0; /* don't stop recursion */ +} + +void +expr_delete(expr *e) +{ + expr_traverse_nodes_post(e, NULL, expr_delete_each); +} + +static int +expr_contains_float_callback(const ExprItem *ei, void *d) +{ + return (ei->type == EXPR_FLOAT); +} + int expr_contains_float(const expr *e) +{ + return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback); +} + +typedef struct checkea_invalid16_data { + enum havereg { + HAVE_NONE = 0, + HAVE_BX = 1 << 0, + HAVE_SI = 1 << 1, + HAVE_DI = 1 << 2, + HAVE_BP = 1 << 3 + } havereg; + int regleft, regright; +} checkea_invalid16_data; + +/* Only works if ei->type == EXPR_REG (doesn't check). + * Overwrites ei with intnum of 0 (to eliminate regs from the final expr). + */ +static int +expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data) +{ + /* in order: ax,cx,dx,bx,sp,bp,si,di */ + static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI}; + + /* don't allow 32-bit registers */ + if (ei->data.reg.size != 16) + return 1; + + /* only allow BX, SI, DI, BP */ + if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */ + return 1; + /* OR it into havereg mask */ + data->havereg |= reg16[ei->data.reg.num & 7]; + + /* only one of each of BX/BP, SI/DI pairs is legal */ + if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP)) + return 1; + if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI)) + return 1; + + /* overwrite with 0 to eliminate register from displacement expr */ + ei->type = EXPR_INT; + ei->data.intn = intnum_new_int(0); + + /* we're okay */ + return 0; +} + +/* Returns 0 if expression is correct up to this point, 1 if there's an error. + * Updates d with new info if necessary. + * Must be called using expr_traverse_nodes_post() to work properly. + */ +static int +expr_checkea_invalid16_callback(expr *e, void *d) +{ + checkea_invalid16_data *data = (checkea_invalid16_data *)d; + + switch (e->left.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->left, data)) + return 1; + data->regleft = 1; + break; + default: + break; + } + switch (e->right.type) { + case EXPR_FLOAT: + return 1; /* disallow float values */ + case EXPR_REG: + /* record and check register values */ + if (expr_checkea_invalid16_reg(&e->right, data)) + return 1; + data->regright = 1; + break; + default: + break; + } + + /* only op allowed with register on right is ADD (and of course, IDENT) */ + if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT) + return 1; + + /* only ops allowed with register on left are ADD or SUB */ + if ((data->regleft && !data->regright) && e->op != EXPR_ADD && + e->op != EXPR_SUB) + return 1; + + /* we're okay */ + return 0; +} + +static int +expr_checkea_getregsize_callback(const ExprItem *ei, void *d) +{ + unsigned char *addrsize = (unsigned char *)d; + + if (ei->type == EXPR_REG) { + *addrsize = ei->data.reg.size; + return 1; + } else + return 0; +} + +int +expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib) +{ + const intnum *intn; + long dispval; + + if (*addrsize == 0) { + /* we need to figure out the address size from what we know about: + * - the displacement length + * - what registers are used in the expression + * - the bits setting + */ + switch (*displen) { + case 4: + /* must be 32-bit */ + *addrsize = 32; + break; + case 2: + /* must be 16-bit */ + *addrsize = 16; + break; + default: + /* check for use of 16 or 32-bit registers; if none are used + * default to bits setting. + */ + if (!expr_traverse_leaves_in(*e, addrsize, + expr_checkea_getregsize_callback)) + *addrsize = bits; + } + } + + if (*addrsize == 32 && (*n_modrm || *n_sib)) { + /* TODO */ + } else if (*addrsize == 16 && *n_modrm) { + static const unsigned char modrm16[16] = { + 0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */, + 0000 /* [BX+SI] */, 0005 /* [DI] */, 0001 /* [BX+DI] */, + 0377 /* invalid */, 0377 /* invalid */, 0006 /* [BP]+d */, + 0377 /* invalid */, 0002 /* [BP+SI] */, 0377 /* invalid */, + 0003 /* [BP+DI] */, 0377 /* invalid */, 0377 /* invalid */, + 0377 /* invalid */ + }; + checkea_invalid16_data data; + + data.havereg = HAVE_NONE; + data.regleft = 0; + data.regright = 0; + + /* 16-bit cannot have SIB */ + *sib = 0; + *v_sib = 0; + *n_sib = 0; + + /* Check for valid effective address, and get used registers */ + if (expr_traverse_nodes_post(*e, &data, + expr_checkea_invalid16_callback)) { + ErrorAt((*e)->filename, (*e)->line, _("invalid effective address")); + return 0; + } + + /* Simplify expr, which is now really just the displacement. This + * should get rid of the 0's we put in for registers in the callback. + */ + expr_simplify(*e); + + /* sanity check the modrm value; shouldn't be invalid because we + * checked for that in the callback! + */ + if (modrm16[data.havereg] & 0070) + InternalError(__LINE__, __FILE__, _("invalid havereg value")); + + *modrm |= modrm16[data.havereg]; + + *v_modrm = 0; /* default to not yet valid */ + + switch (*displen) { + case 0: + /* the displacement length hasn't been forced, try to + * determine what it is. + */ + switch (data.havereg) { + case HAVE_NONE: + /* no register in expression, so it must be disp16, and + * as the Mod bits are set to 0 above, we're done with + * the ModRM byte. + */ + *displen = 2; + *v_modrm = 1; + break; + case HAVE_BP: + /* for BP, there *must* be a displacement value, but we + * may not know the size (8 or 16) for sure right now. + * We can't leave displen at 0, because that just means + * unknown displacement, including none. + */ + *displen = 0xff; + break; + default: + break; + } + + intn = expr_get_intnum(*e); + if (!intn) + break; /* expr still has unknown values */ + + /* make sure the displacement will fit in 16 bits if unsigned, + * and 8 bits if signed. + */ + if (!intnum_check_size(intn, 2, 0) && + !intnum_check_size(intn, 1, 1)) { + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address")); + return 0; + } + + /* don't try to find out what size displacement we have if + * displen is known. + */ + if (*displen != 0 && *displen != 0xff) + break; + + /* Don't worry about overflows here (it's already guaranteed + * to be 16 or 8 bits). + */ + dispval = intnum_get_int(intn); + + /* Figure out what size displacement we will have. */ + if (*displen != 0xff && dispval == 0) { + /* if we know that the displacement is 0 right now, + * go ahead and delete the expr (making it so no + * displacement value is included in the output). + * The Mod bits of ModRM are set to 0 above, and + * we're done with the ModRM byte! + * + * Don't do this if we came from HAVE_BP above, so + * check *displen. + */ + expr_delete(*e); + *e = (expr *)NULL; + } else if (dispval >= -128 && dispval <= 127) { + /* It fits into a signed byte */ + *displen = 1; + *modrm |= 0100; + } else { + /* It's a 16-bit displacement */ + *displen = 2; + *modrm |= 0200; + } + *v_modrm = 1; /* We're done with ModRM */ + + break; + + /* If not 0, the displacement length was forced; set the Mod bits + * appropriately and we're done with the ModRM byte. We assume + * that the user knows what they're doing if they do an explicit + * override, so we don't check for overflow (we'll just truncate + * when we output). + */ + case 1: + *modrm |= 0100; + *v_modrm = 1; + break; + case 2: + *modrm |= 0200; + *v_modrm = 1; + break; + default: + /* any other size is an error */ + ErrorAt((*e)->filename, (*e)->line, + _("invalid effective address (displacement size)")); + return 0; + } + } + return 1; +} + +/* Traverse over expression tree, calling func for each operation AFTER the + * two branches (if expressions) have been traversed (eg, postorder + * traversal). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d)) +{ + if (!e) + return 0; + + /* traverse left side */ + if (e->left.type == EXPR_EXPR && + expr_traverse_nodes_post(e->left.data.expn, d, func)) + return 1; + + /* traverse right side */ + if (e->right.type == EXPR_EXPR && + expr_traverse_nodes_post(e->right.data.expn, d, func)) + return 1; + + /* do callback */ + return func(e, d); +} + +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +static int +expr_traverse_leaves_in(const expr *e, void *d, + int (*func) (const ExprItem *ei, void *d)) { if (!e) return 0; switch (e->left.type) { case EXPR_SYM: - if (expr_contains_float(symrec_get_equ(e->left.data.sym))) + if (expr_traverse_leaves_in(symrec_get_equ(e->left.data.sym), d, + func)) return 1; break; case EXPR_EXPR: - if (expr_contains_float(e->left.data.expn)) + if (expr_traverse_leaves_in(e->left.data.expn, d, func)) return 1; break; - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: - break; + default: + if (func(&e->left, d)) + return 1; } switch (e->right.type) { case EXPR_SYM: - return expr_contains_float(symrec_get_equ(e->right.data.sym)); + if (expr_traverse_leaves_in(symrec_get_equ(e->right.data.sym), d, + func)) + return 1; + break; case EXPR_EXPR: - return expr_contains_float(e->right.data.expn); - case EXPR_FLOAT: - return 1; - case EXPR_REG: - case EXPR_INT: - case EXPR_NONE: + if (expr_traverse_leaves_in(e->right.data.expn, d, func)) + return 1; break; + default: + return func(&e->right, d); } + return 0; } @@ -291,6 +656,19 @@ expr_simplify(expr *e) return simplified; } +const intnum * +expr_get_intnum(expr *e) +{ + while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) && + expr_simplify(e)) + ; + + if (e->op == EXPR_IDENT && e->right.type == EXPR_INT) + return e->right.data.intn; + else + return (intnum *)NULL; +} + void expr_print(expr *e) { diff --git a/src/expr.h b/src/expr.h index 4bcd4b92..5ce86f72 100644 --- a/src/expr.h +++ b/src/expr.h @@ -89,9 +89,27 @@ ExprItem *ExprReg(unsigned char reg, unsigned char size); #define expr_new_ident(r) \ expr_new ((ExprItem *)NULL, EXPR_IDENT, (r)) +void expr_delete(expr *e); + int expr_contains_float(const expr *); +int expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits, + unsigned char *displen, unsigned char *modrm, + unsigned char *v_modrm, unsigned char *n_modrm, + unsigned char *sib, unsigned char *v_sib, + unsigned char *n_sib); + +/* Simplifies the expression e as much as possible, eliminating extraneous + * branches and simplifying integer-only subexpressions. + */ int expr_simplify(expr *); + +/* Gets the integer value of e if the expression is just an integer. If the + * expression is more complex (contains anything other than integers, ie + * floats, non-valued labels, registers), returns NULL. + */ +const intnum *expr_get_intnum(expr *e); + void expr_print(expr *); #endif -- 2.40.0