if (ea) {
if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) ||
(!ea->valid_modrm && ea->need_modrm))) {
- /* First expand equ's and simplify expression */
+ /* First expand equ's */
expr_expand_equ(ea->disp);
- expr_simplify(ea->disp);
/* Check validity of effective address and calc R/M bits of
* Mod/RM byte and SIB byte. We won't know the Mod field
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
EXPR_LE,
EXPR_GE,
EXPR_NE,
- EXPR_IDENT /* if right is IDENT, then the entire expr is just a num */
+ EXPR_IDENT /* no operation, just a value */
} ExprOp;
#endif
typedef struct expr expr;
#endif
-expr *expr_new(ExprItem *, ExprOp, ExprItem *);
+expr *expr_new(ExprOp, ExprItem *, ExprItem *);
ExprItem *ExprSym(symrec *);
ExprItem *ExprExpr(expr *);
ExprItem *ExprReg(unsigned char reg, unsigned char size);
#define expr_new_tree(l,o,r) \
- expr_new (ExprExpr(l), (o), ExprExpr(r))
+ expr_new ((o), ExprExpr(l), ExprExpr(r))
#define expr_new_branch(o,r) \
- expr_new ((ExprItem *)NULL, (o), ExprExpr(r))
+ expr_new ((o), ExprExpr(r), (ExprItem *)NULL)
#define expr_new_ident(r) \
- expr_new ((ExprItem *)NULL, EXPR_IDENT, (r))
+ expr_new (EXPR_IDENT, (r), (ExprItem *)NULL)
/* allocates and makes an exact duplicate of e */
expr *expr_copy(const expr *e);
void expr_delete(expr *e);
-int expr_contains_float(expr *);
-
-int expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+int expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib,
/* Simplifies the expression e as much as possible, eliminating extraneous
* branches and simplifying integer-only subexpressions.
*/
-int expr_simplify(expr *);
+expr *expr_simplify(expr *e);
/* Gets the integer value of e if the expression is just an integer. If the
* expression is more complex (contains anything other than integers, ie
* floats, non-valued labels, registers), returns NULL.
*/
-const intnum *expr_get_intnum(expr *e);
+const intnum *expr_get_intnum(expr **ep);
void expr_print(expr *);
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
if (ea) {
if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) ||
(!ea->valid_modrm && ea->need_modrm))) {
- /* First expand equ's and simplify expression */
+ /* First expand equ's */
expr_expand_equ(ea->disp);
- expr_simplify(ea->disp);
/* Check validity of effective address and calc R/M bits of
* Mod/RM byte and SIB byte. We won't know the Mod field
RCSID("$IdPath$");
+/* Types listed in canonical sorting order. See expr_order_terms(). */
typedef enum {
- EXPR_NONE, /* for left side of a NOT, NEG, etc. */
- EXPR_SYM,
- EXPR_EXPR,
- EXPR_INT,
- EXPR_FLOAT,
- EXPR_REG
+ EXPR_NONE = 0,
+ EXPR_REG = 1<<0,
+ EXPR_INT = 1<<1,
+ EXPR_FLOAT = 1<<2,
+ EXPR_SYM = 1<<3,
+ EXPR_EXPR = 1<<4
} ExprType;
struct ExprItem {
} data;
};
+/* Some operations may allow more than two operand terms:
+ * ADD, MUL, OR, AND, XOR
+ */
struct expr {
- ExprItem left, right;
ExprOp op;
char *filename;
unsigned long line;
+ int numterms;
+ ExprItem terms[2]; /* structure may be extended to include more */
};
static int expr_traverse_nodes_post(expr *e, void *d,
/* allocate a new expression node, with children as defined.
* If it's a unary operator, put the element on the right */
expr *
-expr_new(ExprItem *left, ExprOp op, ExprItem *right)
+expr_new(ExprOp op, ExprItem *left, ExprItem *right)
{
expr *ptr;
ptr = xmalloc(sizeof(expr));
- ptr->left.type = EXPR_NONE;
ptr->op = op;
- ptr->right.type = EXPR_NONE;
+ ptr->numterms = 0;
+ ptr->terms[0].type = EXPR_NONE;
+ ptr->terms[1].type = EXPR_NONE;
if (left) {
- memcpy(&ptr->left, left, sizeof(ExprItem));
+ memcpy(&ptr->terms[0], left, sizeof(ExprItem));
free(left);
- }
- if (right) {
- memcpy(&ptr->right, right, sizeof(ExprItem));
- free(right);
+ ptr->numterms++;
} else {
InternalError(__LINE__, __FILE__,
_("Right side of expression must exist"));
}
+ if (right) {
+ memcpy(&ptr->terms[1], right, sizeof(ExprItem));
+ free(right);
+ ptr->numterms++;
+ }
+
ptr->filename = xstrdup(in_filename);
ptr->line = line_number;
return e;
}
+/* Negate just a single ExprItem by building a -1*ei subexpression */
static void
-expr_copy_side(ExprItem *dest, const ExprItem *ei)
+expr_xform_neg_item(expr *e, ExprItem *ei)
+{
+ expr *sube = xmalloc(sizeof(expr));
+
+ /* Build -1*ei subexpression */
+ sube->op = EXPR_MUL;
+ sube->filename = xstrdup(e->filename);
+ sube->line = e->line;
+ sube->numterms = 2;
+ sube->terms[0].type = EXPR_INT;
+ sube->terms[0].data.intn = intnum_new_int(-1);
+ sube->terms[1] = *ei; /* structure copy */
+
+ /* Replace original ExprItem with subexp */
+ ei->type = EXPR_EXPR;
+ ei->data.expn = sube;
+}
+
+/* Negates e by multiplying by -1, with distribution over lower-precedence
+ * operators (eg ADD) and special handling to simplify result w/ADD, NEG, and
+ * others.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg_helper(expr *e)
{
- dest->type = ei->type;
- switch (ei->type) {
- case EXPR_SYM:
- dest->data.sym = ei->data.sym;
+ expr *ne;
+ int i;
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* distribute (recursively if expr) over terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn =
+ expr_xform_neg_helper(e->terms[i].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[i]);
+ }
break;
- case EXPR_EXPR:
- dest->data.expn = expr_copy(ei->data.expn);
+ case EXPR_SUB:
+ /* change op to ADD, and recursively negate left side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[0].type == EXPR_EXPR)
+ e->terms[0].data.expn =
+ expr_xform_neg_helper(e->terms[0].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[0]);
break;
- case EXPR_INT:
- dest->data.intn = intnum_copy(ei->data.intn);
+ case EXPR_NEG:
+ /* Negating a negated value? Make it an IDENT. */
+ e->op = EXPR_IDENT;
break;
- case EXPR_FLOAT:
- dest->data.flt = floatnum_copy(ei->data.flt);
+ case EXPR_IDENT:
+ /* Negating an ident? Change it into a MUL w/ -1. */
+ e->op = EXPR_MUL;
+ e->numterms = 2;
+ e->terms[1].type = EXPR_INT;
+ e->terms[1].data.intn = intnum_new_int(-1);
break;
- case EXPR_REG:
- dest->data.reg.num = ei->data.reg.num;
- dest->data.reg.size = ei->data.reg.size;
+ default:
+ /* Everything else. MUL will be combined when it's leveled.
+ * Make a new expr (to replace e) with -1*e.
+ */
+ ne = xmalloc(sizeof(expr));
+ ne->op = EXPR_MUL;
+ ne->filename = xstrdup(e->filename);
+ ne->line = e->line;
+ ne->numterms = 2;
+ ne->terms[0].type = EXPR_INT;
+ ne->terms[0].data.intn = intnum_new_int(-1);
+ ne->terms[1].type = EXPR_EXPR;
+ ne->terms[1].data.expn = e;
+ return ne;
+ }
+ return e;
+}
+
+/* Transforms negatives into expressions that are easier to combine:
+ * -x -> -1*x
+ * a-b -> a+(-1*b)
+ *
+ * Call post-order on an expression tree to transform the entire tree.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_xform_neg(expr *e)
+{
+ switch (e->op) {
+ case EXPR_NEG:
+ /* Turn -x into -1*x */
+ e->op = EXPR_IDENT;
+ return expr_xform_neg_helper(e);
+ case EXPR_SUB:
+ /* Turn a-b into a+(-1*b) */
+
+ /* change op to ADD, and recursively negate right side (if expr) */
+ e->op = EXPR_ADD;
+ if (e->terms[1].type == EXPR_EXPR)
+ e->terms[1].data.expn =
+ expr_xform_neg_helper(e->terms[1].data.expn);
+ else
+ expr_xform_neg_item(e, &e->terms[1]);
break;
default:
break;
}
+
+ return e;
}
-expr *
-expr_copy(const expr *e)
+/* Level an entire expn tree */
+static expr *
+expr_xform_neg_tree(expr *e)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_xform_neg_tree(e->terms[i].data.expn);
+ }
+
+ /* do callback */
+ return expr_xform_neg(e);
+}
+
+/* Look for simple identities that make the entire result constant:
+ * 0*&x, -1|x, etc.
+ */
+static int
+expr_is_constant(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_zero(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_AND) ||
+ (intnum_is_neg1(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "left" identities like 0+x, 1*x, etc. */
+static int
+expr_can_delete_int_left(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR));
+}
+
+/* Look for simple "right" identities like x+|-0, x*&/1 */
+static int
+expr_can_delete_int_right(ExprOp op, intnum *intn)
+{
+ return ((intnum_is_pos1(intn) && op == EXPR_MUL) ||
+ (intnum_is_pos1(intn) && op == EXPR_DIV) ||
+ (intnum_is_zero(intn) && op == EXPR_ADD) ||
+ (intnum_is_zero(intn) && op == EXPR_SUB) ||
+ (intnum_is_neg1(intn) && op == EXPR_AND) ||
+ (intnum_is_zero(intn) && op == EXPR_OR) ||
+ (intnum_is_zero(intn) && op == EXPR_SHL) ||
+ (intnum_is_zero(intn) && op == EXPR_SHR));
+}
+
+/* Check for and simplify identities. Returns new number of expr terms.
+ * Sets e->op = EXPR_IDENT if numterms ends up being 1.
+ * Uses numterms parameter instead of e->numterms for basis of "new" number
+ * of terms.
+ * Assumes int_term is *only* integer term in e.
+ * NOTE: Really designed to only be used by expr_level_op().
+ */
+static int
+expr_simplify_identity(expr *e, int numterms, int int_term)
+{
+ int i;
+
+ /* Check for simple identities that delete the intnum.
+ * Don't delete if the intnum is the only thing in the expn.
+ */
+ if ((int_term == 0 && numterms > 1 &&
+ expr_can_delete_int_left(e->op, e->terms[0].data.intn)) ||
+ (int_term > 0 &&
+ expr_can_delete_int_right(e->op, e->terms[int_term].data.intn))) {
+ /* Delete the intnum */
+ intnum_delete(e->terms[int_term].data.intn);
+
+ /* Slide everything to its right over by 1 */
+ if (int_term != numterms-1) /* if it wasn't last.. */
+ memmove(&e->terms[int_term], &e->terms[int_term+1],
+ (numterms-1-int_term)*sizeof(ExprItem));
+
+ /* Update numterms */
+ numterms--;
+ }
+
+ /* Check for simple identites that delete everything BUT the intnum.
+ * Don't bother if the intnum is the only thing in the expn.
+ */
+ if (numterms > 1 &&
+ expr_is_constant(e->op, e->terms[int_term].data.intn)) {
+ /* Loop through, deleting everything but the integer term */
+ for (i=0; i<e->numterms; i++)
+ if (i != int_term)
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ case EXPR_EXPR:
+ expr_delete(e->terms[i].data.expn);
+ break;
+ default:
+ break;
+ }
+
+ /* Move integer term to the first term (if not already there) */
+ if (int_term != 0)
+ e->terms[0] = e->terms[int_term]; /* structure copy */
+
+ /* Set numterms to 1 */
+ numterms = 1;
+ }
+
+ /* Change expression to IDENT if possible. */
+ if (numterms == 1)
+ e->op = EXPR_IDENT;
+
+ /* Return the updated numterms */
+ return numterms;
+}
+
+/* Levels the expression tree starting at e. Eg:
+ * a+(b+c) -> a+b+c
+ * (a+b)+(c+d) -> a+b+c+d
+ * Naturally, only levels operators that allow more than two operand terms.
+ * NOTE: only does *one* level of leveling (no recursion). Should be called
+ * post-order on a tree to combine deeper levels.
+ * Also brings up any IDENT values into the current level (for ALL operators).
+ * Folds (combines by evaluation) *integer* constant values if fold_const != 0.
+ *
+ * Returns a possibly reallocated e.
+ */
+static expr *
+expr_level_op(expr *e, int fold_const)
+{
+ int i, j, o, fold_numterms, level_numterms, level_fold_numterms;
+ int first_int_term = -1;
+
+ /* Determine how many operands will need to be brought up (for leveling).
+ * Go ahead and bring up any IDENT'ed values.
+ */
+ level_numterms = e->numterms;
+ level_fold_numterms = 0;
+ for (i=0; i<e->numterms; i++) {
+ /* Search downward until we find something *other* than an
+ * IDENT, then bring it up to the current level.
+ */
+ while (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == EXPR_IDENT) {
+ expr *sube = e->terms[i].data.expn;
+ e->terms[i] = sube->terms[0];
+ free(sube->filename);
+ free(sube);
+ }
+
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* It's an expression w/the same operator, add in its numterms.
+ * But don't forget to subtract one for the expr itself!
+ */
+ level_numterms += e->terms[i].data.expn->numterms - 1;
+
+ /* If we're folding constants, count up the number of constants
+ * that will be merged in.
+ */
+ if (fold_const)
+ for (j=0; j<e->terms[i].data.expn->numterms; j++)
+ if (e->terms[i].data.expn->terms[j].type == EXPR_INT)
+ level_fold_numterms++;
+ }
+
+ /* Find the first integer term (if one is present) if we're folding
+ * constants.
+ */
+ if (fold_const && first_int_term == -1 && e->terms[i].type == EXPR_INT)
+ first_int_term = i;
+ }
+
+ /* Look for other integer terms if there's one and combine.
+ * Also eliminate empty spaces when combining and adjust numterms
+ * variables.
+ */
+ fold_numterms = e->numterms;
+ if (first_int_term != -1) {
+ for (i=first_int_term+1, o=first_int_term+1; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_INT) {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ e->terms[i].data.intn);
+ fold_numterms--;
+ level_numterms--;
+ /* make sure to delete folded intnum */
+ intnum_delete(e->terms[i].data.intn);
+ } else if (o != i) {
+ /* copy term if it changed places */
+ e->terms[o++] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities and make IDENT if possible. */
+ fold_numterms = expr_simplify_identity(e, fold_numterms,
+ first_int_term);
+ }
+
+ /* Only level operators that allow more than two operand terms.
+ * Also don't bother leveling if it's not necessary to bring up any terms.
+ */
+ if ((e->op != EXPR_ADD && e->op != EXPR_MUL && e->op != EXPR_OR &&
+ e->op != EXPR_AND && e->op != EXPR_XOR) ||
+ level_numterms <= fold_numterms) {
+ /* Downsize e if necessary */
+ if (fold_numterms < e->numterms && e->numterms > 2)
+ e = xrealloc(e, sizeof(expr)+((fold_numterms<2) ? 0 :
+ sizeof(ExprItem)*(fold_numterms-2)));
+ /* Update numterms */
+ e->numterms = fold_numterms;
+ return e;
+ }
+
+ /* Adjust numterms for constant folding from terms being "pulled up".
+ * Careful: if there's no integer term in e, then save space for it.
+ */
+ if (fold_const) {
+ level_numterms -= level_fold_numterms;
+ if (first_int_term == -1 && level_fold_numterms != 0)
+ level_numterms++;
+ }
+
+ /* Alloc more (or conceivably less, but not usually) space for e */
+ e = xrealloc(e, sizeof(expr)+((level_numterms<2) ? 0 :
+ sizeof(ExprItem)*(level_numterms-2)));
+
+ /* Copy up ExprItem's. Iterate from right to left to keep the same
+ * ordering as was present originally.
+ * Combine integer terms as necessary.
+ */
+ for (i=e->numterms-1, o=level_numterms-1; i>=0; i--) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ e->terms[i].data.expn->op == e->op) {
+ /* bring up subexpression */
+ expr *sube = e->terms[i].data.expn;
+
+ /* copy terms right to left */
+ for (j=sube->numterms-1; j>=0; j--) {
+ if (fold_const && sube->terms[j].type == EXPR_INT) {
+ /* Need to fold it in.. but if there's no int term already,
+ * just copy into a new one.
+ */
+ if (first_int_term == -1) {
+ first_int_term = o--;
+ e->terms[first_int_term] = sube->terms[j]; /* struc */
+ } else {
+ intnum_calc(e->terms[first_int_term].data.intn, e->op,
+ sube->terms[j].data.intn);
+ /* make sure to delete folded intnum */
+ intnum_delete(sube->terms[j].data.intn);
+ }
+ } else {
+ e->terms[o--] = sube->terms[j]; /* structure copy */
+ }
+ }
+
+ /* delete subexpression, but *don't delete nodes* (as we've just
+ * copied them!)
+ */
+ free(sube->filename);
+ free(sube);
+ } else if (o != i) {
+ /* copy operand if it changed places */
+ e->terms[o--] = e->terms[i];
+ }
+ }
+
+ /* Simplify identities, make IDENT if possible, and save to e->numterms. */
+ if (first_int_term != -1) {
+ e->numterms = expr_simplify_identity(e, level_numterms,
+ first_int_term);
+ } else {
+ e->numterms = level_numterms;
+ if (level_numterms == 1)
+ e->op = EXPR_IDENT;
+ }
+
+ return e;
+}
+
+/* Level an entire expn tree */
+static expr *
+expr_level_tree(expr *e, int fold_const)
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR)
+ e->terms[i].data.expn = expr_level_tree(e->terms[i].data.expn,
+ fold_const);
+ }
+
+ /* do callback */
+ return expr_level_op(e, fold_const);
+}
+
+/* Comparison function for expr_order_terms().
+ * Assumes ExprType enum is in canonical order.
+ */
+static int
+expr_order_terms_compare(const void *va, const void *vb)
+{
+ const ExprItem *a = va, *b = vb;
+ return (a->type - b->type);
+}
+
+/* Reorder terms of e into canonical order. Only reorders if reordering
+ * doesn't change meaning of expression. (eg, doesn't reorder SUB).
+ * Canonical order: REG, INT, FLOAT, SYM, EXPR.
+ * Multiple terms of a single type are kept in the same order as in
+ * the original expression.
+ * NOTE: Only performs reordering on *one* level (no recursion).
+ */
+static void
+expr_order_terms(expr *e)
+{
+ /* don't bother reordering if only one element */
+ if (e->numterms == 1)
+ return;
+
+ /* only reorder some types of operations */
+ switch (e->op) {
+ case EXPR_ADD:
+ case EXPR_MUL:
+ case EXPR_OR:
+ case EXPR_AND:
+ case EXPR_XOR:
+ /* Use mergesort to sort. It's fast on already sorted values and a
+ * stable sort (multiple terms of same type are kept in the same
+ * order).
+ */
+ mergesort(e->terms, e->numterms, sizeof(ExprItem),
+ expr_order_terms_compare);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Copy entire expression EXCEPT for index "except" at *top level only*. */
+static expr *
+expr_copy_except(const expr *e, int except)
{
expr *n;
+ int i;
if (!e)
return 0;
- n = xmalloc(sizeof(expr));
+ n = xmalloc(sizeof(expr)+sizeof(ExprItem)*(e->numterms-2));
- expr_copy_side(&n->left, &e->left);
- expr_copy_side(&n->right, &e->right);
n->op = e->op;
n->filename = xstrdup(e->filename);
n->line = e->line;
+ n->numterms = e->numterms;
+ for (i=0; i<e->numterms; i++) {
+ ExprItem *dest = &n->terms[i];
+ const ExprItem *src = &e->terms[i];
+
+ if (i != except) {
+ dest->type = src->type;
+ switch (src->type) {
+ case EXPR_SYM:
+ dest->data.sym = src->data.sym;
+ break;
+ case EXPR_EXPR:
+ dest->data.expn = expr_copy_except(src->data.expn, -1);
+ break;
+ case EXPR_INT:
+ dest->data.intn = intnum_copy(src->data.intn);
+ break;
+ case EXPR_FLOAT:
+ dest->data.flt = floatnum_copy(src->data.flt);
+ break;
+ case EXPR_REG:
+ dest->data.reg.num = src->data.reg.num;
+ dest->data.reg.size = src->data.reg.size;
+ break;
+ default:
+ break;
+ }
+ }
+ }
return n;
}
-static
-int expr_delete_each(expr *e, void *d)
+expr *
+expr_copy(const expr *e)
{
- switch (e->left.type) {
- case EXPR_INT:
- intnum_delete(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->left.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
- }
- switch (e->right.type) {
- case EXPR_INT:
- intnum_delete(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_delete(e->right.data.flt);
- break;
- default:
- break; /* none of the other types needs to be deleted */
+ return expr_copy_except(e, -1);
+}
+
+static int
+expr_delete_each(expr *e, void *d)
+{
+ int i;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_INT:
+ intnum_delete(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_delete(e->terms[i].data.flt);
+ break;
+ default:
+ break; /* none of the other types needs to be deleted */
+ }
}
free(e->filename);
free(e); /* free ourselves */
}
static int
-expr_contains_float_callback(ExprItem *ei, void *d)
+expr_contains_callback(ExprItem *ei, void *d)
{
- return (ei->type == EXPR_FLOAT);
+ ExprType *t = d;
+ return (ei->type & *t);
}
-int
-expr_contains_float(expr *e)
+static int
+expr_contains(expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, NULL, expr_contains_float_callback);
+ return expr_traverse_leaves_in(e, &t, expr_contains_callback);
}
typedef struct checkea_invalid16_data {
- enum havereg {
- HAVE_NONE = 0,
- HAVE_BX = 1 << 0,
- HAVE_SI = 1 << 1,
- HAVE_DI = 1 << 2,
- HAVE_BP = 1 << 3
- } havereg;
- int regleft, regright;
+ int bx, si, di, bp; /* total multiplier for each reg */
} checkea_invalid16_data;
/* Only works if ei->type == EXPR_REG (doesn't check).
* Overwrites ei with intnum of 0 (to eliminate regs from the final expr).
*/
-static int
-expr_checkea_invalid16_reg(ExprItem *ei, checkea_invalid16_data *data)
+static int *
+expr_checkea_get_reg16(ExprItem *ei, checkea_invalid16_data *data)
{
/* in order: ax,cx,dx,bx,sp,bp,si,di */
- static const char reg16[8] = {0,0,0,HAVE_BX,0,HAVE_BP,HAVE_SI,HAVE_DI};
+ static int *reg16[8] = {0,0,0,0,0,0,0,0};
+ int *ret;
+
+ reg16[3] = &data->bx;
+ reg16[5] = &data->bp;
+ reg16[6] = &data->si;
+ reg16[7] = &data->di;
/* don't allow 32-bit registers */
if (ei->data.reg.size != 16)
- return 1;
+ return 0;
- /* only allow BX, SI, DI, BP */
- if (!reg16[ei->data.reg.num & 7]) /* & 7 is sanity check */
- return 1;
- /* OR it into havereg mask */
- data->havereg |= reg16[ei->data.reg.num & 7];
+ ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
- /* only one of each of BX/BP, SI/DI pairs is legal */
- if ((data->havereg & HAVE_BX) && (data->havereg & HAVE_BP))
- return 1;
- if ((data->havereg & HAVE_SI) && (data->havereg & HAVE_DI))
- return 1;
+ /* only allow BX, SI, DI, BP */
+ if (!ret)
+ return 0;
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
ei->data.intn = intnum_new_int(0);
/* we're okay */
- return 0;
+ return ret;
}
-/* Returns 0 if expression is correct up to this point, 1 if there's an error.
- * Updates d with new info if necessary.
- * Must be called using expr_traverse_nodes_post() to work properly.
+/* Distribute over registers to help bring them to the topmost level of e.
+ * Also check for illegal operations against registers.
+ * Returns 0 if something was illegal, 1 if legal and nothing in e changed,
+ * and 2 if legal and e needs to be simplified.
+ *
+ * Only half joking: Someday make this/checkea able to accept crazy things
+ * like: (bx+di)*(bx+di)-bx*bx-2*bx*di-di*di+di? Probably not: NASM never
+ * accepted such things, and it's doubtful such an expn is valid anyway
+ * (even though the above one is). But even macros would be hard-pressed
+ * to generate something like this.
+ *
+ * e must already have been simplified for this function to work properly
+ * (as it doesn't think things like SUB are valid).
+ *
+ * IMPLEMENTATION NOTE: About the only thing this function really needs to
+ * "distribute" is: (non-float-expn or intnum) * (sum expn of registers).
+ *
+ * TODO: Clean up this code, make it easier to understand.
*/
static int
-expr_checkea_invalid16_callback(expr *e, void *d)
+expr_checkea_distcheck_reg(expr **ep)
{
- checkea_invalid16_data *data = (checkea_invalid16_data *)d;
+ expr *e = *ep;
+ int i;
+ int havereg = -1, havereg_expr = -1;
+ int retval = 1; /* default to legal, no changes */
- switch (e->left.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->left, data))
- return 1;
- data->regleft = 1;
- break;
- default:
- break;
- }
- switch (e->right.type) {
- case EXPR_FLOAT:
- return 1; /* disallow float values */
- case EXPR_REG:
- /* record and check register values */
- if (expr_checkea_invalid16_reg(&e->right, data))
- return 1;
- data->regright = 1;
- break;
- default:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_REG:
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL &&
+ e->op != EXPR_IDENT)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ break;
+ case EXPR_FLOAT:
+ /* Floats not allowed. */
+ return 0;
+ case EXPR_EXPR:
+ if (expr_contains(e->terms[i].data.expn, EXPR_REG)) {
+ int ret2;
+
+ /* Check op to make sure it's valid to use w/register. */
+ if (e->op != EXPR_ADD && e->op != EXPR_MUL)
+ return 0;
+ /* Check for reg*reg */
+ if (e->op == EXPR_MUL && havereg != -1)
+ return 0;
+ havereg = i;
+ havereg_expr = i;
+ /* Recurse to check lower levels */
+ ret2 = expr_checkea_distcheck_reg(&e->terms[i].data.expn);
+ if (ret2 == 0)
+ return 0;
+ if (ret2 == 2)
+ retval = 2;
+ } else if (expr_contains(e->terms[i].data.expn, EXPR_FLOAT))
+ return 0; /* Disallow floats */
+ break;
+ default:
+ break;
+ }
}
- /* only op allowed with register on right is ADD (and of course, IDENT) */
- if (data->regright && e->op != EXPR_ADD && e->op != EXPR_IDENT)
- return 1;
+ /* just exit if no registers were used */
+ if (havereg == -1)
+ return retval;
+
+ /* Distribute */
+ if (e->op == EXPR_MUL && havereg_expr != -1) {
+ expr *ne;
+
+ retval = 2; /* we're going to change it */
+
+ /* The reg expn *must* be EXPR_ADD at this point. Sanity check. */
+ if (e->terms[havereg_expr].type != EXPR_EXPR ||
+ e->terms[havereg_expr].data.expn->op != EXPR_ADD)
+ InternalError(__LINE__, __FILE__,
+ _("Register expression not ADD or EXPN"));
+
+ /* Iterate over each term in reg expn */
+ for (i=0; i<e->terms[havereg_expr].data.expn->numterms; i++) {
+ /* Copy everything EXCEPT havereg_expr term into new expression */
+ ne = expr_copy_except(e, havereg_expr);
+ /* Copy reg expr term into uncopied (empty) term in new expn */
+ ne->terms[havereg_expr] =
+ e->terms[havereg_expr].data.expn->terms[i]; /* struct copy */
+ /* Overwrite old reg expr term with new expn */
+ e->terms[havereg_expr].data.expn->terms[i].type = EXPR_EXPR;
+ e->terms[havereg_expr].data.expn->terms[i].data.expn = ne;
+ }
- /* only ops allowed with register on left are ADD or SUB */
- if ((data->regleft && !data->regright) && e->op != EXPR_ADD &&
- e->op != EXPR_SUB)
- return 1;
+ /* Replace e with expanded reg expn */
+ ne = e->terms[havereg_expr].data.expn;
+ e->terms[havereg_expr].type = EXPR_NONE; /* don't delete it! */
+ expr_delete(e); /* but everything else */
+ e = ne;
+ *ep = ne;
+ }
- /* we're okay */
- return 0;
+ return retval;
}
static int
}
int
-expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib, unsigned char *n_sib)
{
+ expr *e = *ep;
const intnum *intn;
long dispval;
+ int i;
+ int *reg;
if (*addrsize == 0) {
/* we need to figure out the address size from what we know about:
/* check for use of 16 or 32-bit registers; if none are used
* default to bits setting.
*/
- if (!expr_traverse_leaves_in(*e, addrsize,
+ if (!expr_traverse_leaves_in(e, addrsize,
expr_checkea_getregsize_callback))
*addrsize = bits;
+ /* TODO: Add optional warning here if switched address size
+ * from bits setting just by register use.. eg [ax] in
+ * 32-bit mode would generate a warning.
+ */
}
}
if (*addrsize == 32 && (*n_modrm || *n_sib)) {
- /* TODO */
} else if (*addrsize == 16 && *n_modrm) {
static const unsigned char modrm16[16] = {
0006 /* disp16 */, 0007 /* [BX] */, 0004 /* [SI] */,
0377 /* invalid */
};
checkea_invalid16_data data;
-
- data.havereg = HAVE_NONE;
- data.regleft = 0;
- data.regright = 0;
+ enum {
+ HAVE_NONE = 0,
+ HAVE_BX = 1<<0,
+ HAVE_SI = 1<<1,
+ HAVE_DI = 1<<2,
+ HAVE_BP = 1<<3
+ } havereg = HAVE_NONE;
+
+ data.bx = 0;
+ data.si = 0;
+ data.di = 0;
+ data.bp = 0;
/* 16-bit cannot have SIB */
*sib = 0;
*v_sib = 0;
*n_sib = 0;
- /* Check for valid effective address, and get used registers */
- if (expr_traverse_nodes_post(*e, &data,
- expr_checkea_invalid16_callback)) {
- ErrorAt((*e)->filename, (*e)->line, _("invalid effective address"));
+ /* Determine if expression is superficially valid:
+ * Valid expr should be [(int-equiv expn)]+[reg*(int-equiv expn)+...]
+ * where the [...] parts are optional.
+ * To check this, first look at top expn operator.. if it's not ADD or
+ * MUL, then no registers are valid for use.
+ */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ switch (expr_checkea_distcheck_reg(ep)) {
+ case 0:
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ case 2:
+ /* Need to simplify again */
+ *ep = expr_simplify(*ep);
+ e = *ep;
+ break;
+ default:
+ break;
+ }
+
+ switch (e->op) {
+ case EXPR_ADD:
+ /* Prescan for non-int multipliers.
+ * This is because if any of the terms is a more complex
+ * expr (eg, undetermined value), we don't want to try to
+ * figure out *any* of the expression, because each register
+ * lookup overwrites the register with a 0 value! And storing
+ * the state of this routine from one excution to the next
+ * would be a major chore.
+ */
+ for (i=0; i<e->numterms; i++)
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (e->terms[i].data.expn->numterms > 2)
+ return 1;
+ expr_order_terms(e->terms[i].data.expn);
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ return 1;
+ }
+
+ /* FALLTHROUGH */
+ case EXPR_IDENT:
+ /* Check each term for register (and possible multiplier). */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_REG) {
+ reg = expr_checkea_get_reg16(&e->terms[i], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg)++;
+ } else if (e->terms[i].type == EXPR_EXPR) {
+ /* Already ordered from ADD above, just grab the value.
+ * Sanity check for EXPR_INT.
+ */
+ if (e->terms[i].data.expn->terms[0].type != EXPR_REG)
+ InternalError(__LINE__, __FILE__,
+ _("Register not found in reg expn"));
+ if (e->terms[i].data.expn->terms[1].type != EXPR_INT)
+ InternalError(__LINE__, __FILE__,
+ _("Non-integer value in reg expn"));
+ reg =
+ expr_checkea_get_reg16(&e->terms[i].data.expn->terms[0],
+ &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) +=
+ intnum_get_int(e->terms[i].data.expn->terms[1].data.intn);
+ }
+ }
+ break;
+ case EXPR_MUL:
+ /* Here, too, check for non-int multipliers. */
+ if (e->numterms > 2)
+ return 1;
+ expr_order_terms(e);
+ if (e->terms[1].type != EXPR_INT)
+ return 1;
+ reg = expr_checkea_get_reg16(&e->terms[0], &data);
+ if (!reg) {
+ ErrorAt(e->filename, e->line,
+ _("invalid effective address"));
+ return 0;
+ }
+ (*reg) += intnum_get_int(e->terms[1].data.intn);
+ break;
+ default:
+ /* Should never get here! */
+ break;
+ }
+
+ /* negative reg multipliers are illegal. */
+ if (data.bx < 0 || data.si < 0 || data.di < 0 || data.bp < 0) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
return 0;
}
+ /* Set havereg appropriately */
+ if (data.bx > 0)
+ havereg |= HAVE_BX;
+ if (data.si > 0)
+ havereg |= HAVE_SI;
+ if (data.di > 0)
+ havereg |= HAVE_DI;
+ if (data.bp > 0)
+ havereg |= HAVE_BP;
+
/* Simplify expr, which is now really just the displacement. This
* should get rid of the 0's we put in for registers in the callback.
*/
- expr_simplify(*e);
+ *ep = expr_simplify(*ep);
+ e = *ep;
- /* sanity check the modrm value; shouldn't be invalid because we
- * checked for that in the callback!
- */
- if (modrm16[data.havereg] & 0070)
- InternalError(__LINE__, __FILE__, _("invalid havereg value"));
+ /* Check the modrm value for invalid combinations. */
+ if (modrm16[havereg] & 0070) {
+ ErrorAt(e->filename, e->line, _("invalid effective address"));
+ return 0;
+ }
- *modrm |= modrm16[data.havereg];
+ *modrm |= modrm16[havereg];
*v_modrm = 0; /* default to not yet valid */
/* the displacement length hasn't been forced, try to
* determine what it is.
*/
- switch (data.havereg) {
+ switch (havereg) {
case HAVE_NONE:
/* no register in expression, so it must be disp16, and
* as the Mod bits are set to 0 above, we're done with
break;
}
- intn = expr_get_intnum(*e);
+ intn = expr_get_intnum(ep);
if (!intn)
break; /* expr still has unknown values */
*/
if (!intnum_check_size(intn, 2, 0) &&
!intnum_check_size(intn, 1, 1)) {
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address"));
return 0;
}
* Don't do this if we came from HAVE_BP above, so
* check *displen.
*/
- expr_delete(*e);
- *e = (expr *)NULL;
+ expr_delete(e);
+ *ep = (expr *)NULL;
} else if (dispval >= -128 && dispval <= 127) {
/* It fits into a signed byte */
*displen = 1;
break;
default:
/* any other size is an error */
- ErrorAt((*e)->filename, (*e)->line,
+ ErrorAt(e->filename, e->line,
_("invalid effective address (displacement size)"));
return 0;
}
}
/* Traverse over expression tree, calling func for each operation AFTER the
- * two branches (if expressions) have been traversed (eg, postorder
+ * branches (if expressions) have been traversed (eg, postorder
* traversal). The data pointer d is passed to each func call.
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
static int
expr_traverse_nodes_post(expr *e, void *d, int (*func) (expr *e, void *d))
{
+ int i;
+
if (!e)
return 0;
- /* traverse left side */
- if (e->left.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->left.data.expn, d, func))
- return 1;
-
- /* traverse right side */
- if (e->right.type == EXPR_EXPR &&
- expr_traverse_nodes_post(e->right.data.expn, d, func))
- return 1;
+ /* traverse terms */
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR &&
+ expr_traverse_nodes_post(e->terms[i].data.expn, d, func))
+ return 1;
+ }
/* do callback */
return func(e, d);
expr_traverse_leaves_in(expr *e, void *d,
int (*func) (ExprItem *ei, void *d))
{
+ int i;
+
if (!e)
return 0;
- if (e->left.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->left.data.expn, d, func))
- return 1;
- } else {
- if (func(&e->left, d))
- return 1;
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
}
-
- if (e->right.type == EXPR_EXPR) {
- if (expr_traverse_leaves_in(e->right.data.expn, d, func))
- return 1;
- } else
- return func(&e->right, d);
-
return 0;
}
-/* get rid of unnecessary branches if possible. report. */
-int
+/* Simplify expression by getting rid of unnecessary branches. */
+expr *
expr_simplify(expr *e)
{
- int simplified = 0;
- ExprItem tmp;
-
- /* try to simplify the left side */
- if (e->left.type == EXPR_EXPR) {
- /* if the left subexpr isn't an IDENT, recurse simplification */
- if (e->left.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->left.data.expn);
-
- /* if the left subexpr is just an IDENT (or string thereof),
- * pull it up into the current node */
- while (e->left.type == EXPR_EXPR &&
- e->left.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->left.data.expn->right), sizeof(ExprItem));
- free(e->left.data.expn);
- memcpy(&e->left, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->left.type == EXPR_SYM) {
- /* try to get value of symbol */
- if (symrec_get_int_value(e->left.data.sym, &int_val, 0)) {
- e->left.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->left.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* ditto on the right */
- if (e->right.type == EXPR_EXPR) {
- if (e->right.data.expn->op != EXPR_IDENT)
- simplified |= expr_simplify(e->right.data.expn);
-
- while (e->right.type == EXPR_EXPR &&
- e->right.data.expn->op == EXPR_IDENT) {
- memcpy(&tmp, &(e->right.data.expn->right), sizeof(ExprItem));
- free(e->right.data.expn);
- memcpy(&e->right, &tmp, sizeof(ExprItem));
- simplified = 1;
- }
- }
-#if 0
- else if (e->right.type == EXPR_SYM) {
- if (symrec_get_int_value(e->right.data.sym, &int_val, 0)) {
- e->right.type = EXPR_INT;
- /* don't try to free the symrec here. */
- e->right.data.int_val = int_val;
- simplified = 1;
- }
- }
-#endif
-
- /* catch simple identities like 0+x, 1*x, etc., for x not a num */
- if (e->left.type == EXPR_INT &&
- ((intnum_is_pos1(e->left.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_neg1(e->left.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->left.data.intn) && e->op == EXPR_OR))) {
- intnum_delete(e->left.data.intn);
- e->op = EXPR_IDENT;
- simplified = 1;
- }
- /* and the corresponding x+|-0, x*&/1 */
- else if (e->right.type == EXPR_INT &&
- ((intnum_is_pos1(e->right.data.intn) && e->op == EXPR_MUL) ||
- (intnum_is_pos1(e->right.data.intn) && e->op == EXPR_DIV) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_ADD) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SUB) ||
- (intnum_is_neg1(e->right.data.intn) && e->op == EXPR_AND) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_OR) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHL) ||
- (intnum_is_zero(e->right.data.intn) && e->op == EXPR_SHR))) {
- intnum_delete(e->right.data.intn);
- e->op = EXPR_IDENT;
- e->right.type = e->left.type;
- memcpy(&e->right, &e->left, sizeof(ExprItem));
- simplified = 1;
- } else if ((e->left.type == EXPR_INT || e->left.type == EXPR_NONE) &&
- e->right.type == EXPR_INT && e->op != EXPR_IDENT) {
- intnum_calc(e->left.data.intn, e->op, e->right.data.intn);
- intnum_delete(e->right.data.intn);
- e->right.data.intn = e->left.data.intn;
- e->op = EXPR_IDENT;
- simplified = 1;
- }
-
- return simplified;
+ e = expr_xform_neg_tree(e);
+ e = expr_level_tree(e, 1);
+ return e;
}
const intnum *
-expr_get_intnum(expr *e)
+expr_get_intnum(expr **ep)
{
- while (!(e->op == EXPR_IDENT && e->right.type == EXPR_INT) &&
- expr_simplify(e))
- ;
+ *ep = expr_simplify(*ep);
- if (e->op == EXPR_IDENT && e->right.type == EXPR_INT)
- return e->right.data.intn;
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_INT)
+ return (*ep)->terms[0].data.intn;
else
return (intnum *)NULL;
}
expr_print(expr *e)
{
static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+ char opstr[3];
+ int i;
- if (e->op != EXPR_IDENT) {
- switch (e->left.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->left.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->left.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->left.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->left.data.flt);
- break;
- case EXPR_REG:
- if (e->left.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->left.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
- }
- }
switch (e->op) {
case EXPR_ADD:
- printf("+");
+ strcpy(opstr, "+");
break;
case EXPR_SUB:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_MUL:
- printf("*");
+ strcpy(opstr, "*");
break;
case EXPR_DIV:
- printf("/");
+ strcpy(opstr, "/");
break;
case EXPR_SIGNDIV:
- printf("//");
+ strcpy(opstr, "//");
break;
case EXPR_MOD:
- printf("%%");
+ strcpy(opstr, "%");
break;
case EXPR_SIGNMOD:
- printf("%%%%");
+ strcpy(opstr, "%%");
break;
case EXPR_NEG:
- printf("-");
+ strcpy(opstr, "-");
break;
case EXPR_NOT:
- printf("~");
+ strcpy(opstr, "~");
break;
case EXPR_OR:
- printf("|");
+ strcpy(opstr, "|");
break;
case EXPR_AND:
- printf("&");
+ strcpy(opstr, "&");
break;
case EXPR_XOR:
- printf("^");
+ strcpy(opstr, "^");
break;
case EXPR_SHL:
- printf("<<");
+ strcpy(opstr, "<<");
break;
case EXPR_SHR:
- printf(">>");
+ strcpy(opstr, ">>");
break;
case EXPR_LOR:
- printf("||");
+ strcpy(opstr, "||");
break;
case EXPR_LAND:
- printf("&&");
+ strcpy(opstr, "&&");
break;
case EXPR_LNOT:
- printf("!");
+ strcpy(opstr, "!");
break;
case EXPR_LT:
- printf("<");
+ strcpy(opstr, "<");
break;
case EXPR_GT:
- printf(">");
+ strcpy(opstr, ">");
break;
case EXPR_LE:
- printf("<=");
+ strcpy(opstr, "<=");
break;
case EXPR_GE:
- printf(">=");
+ strcpy(opstr, ">=");
break;
case EXPR_NE:
- printf("!=");
+ strcpy(opstr, "!=");
break;
case EXPR_EQ:
- printf("==");
+ strcpy(opstr, "==");
break;
case EXPR_IDENT:
+ opstr[0] = 0;
break;
}
- switch (e->right.type) {
- case EXPR_SYM:
- printf("%s", symrec_get_name(e->right.data.sym));
- break;
- case EXPR_EXPR:
- printf("(");
- expr_print(e->right.data.expn);
- printf(")");
- break;
- case EXPR_INT:
- intnum_print(e->right.data.intn);
- break;
- case EXPR_FLOAT:
- floatnum_print(e->right.data.flt);
- break;
- case EXPR_REG:
- if (e->right.data.reg.size == 32)
- printf("e");
- printf("%s", regs[e->right.data.reg.num]);
- break;
- case EXPR_NONE:
- break;
+ for (i=0; i<e->numterms; i++) {
+ switch (e->terms[i].type) {
+ case EXPR_SYM:
+ printf("%s", symrec_get_name(e->terms[i].data.sym));
+ break;
+ case EXPR_EXPR:
+ printf("(");
+ expr_print(e->terms[i].data.expn);
+ printf(")");
+ break;
+ case EXPR_INT:
+ intnum_print(e->terms[i].data.intn);
+ break;
+ case EXPR_FLOAT:
+ floatnum_print(e->terms[i].data.flt);
+ break;
+ case EXPR_REG:
+ if (e->terms[i].data.reg.size == 32)
+ printf("e");
+ printf("%s", regs[e->terms[i].data.reg.num&7]);
+ break;
+ case EXPR_NONE:
+ break;
+ }
+ if (i < e->numterms-1)
+ printf("%s", opstr);
}
}
EXPR_LE,
EXPR_GE,
EXPR_NE,
- EXPR_IDENT /* if right is IDENT, then the entire expr is just a num */
+ EXPR_IDENT /* no operation, just a value */
} ExprOp;
#endif
typedef struct expr expr;
#endif
-expr *expr_new(ExprItem *, ExprOp, ExprItem *);
+expr *expr_new(ExprOp, ExprItem *, ExprItem *);
ExprItem *ExprSym(symrec *);
ExprItem *ExprExpr(expr *);
ExprItem *ExprReg(unsigned char reg, unsigned char size);
#define expr_new_tree(l,o,r) \
- expr_new (ExprExpr(l), (o), ExprExpr(r))
+ expr_new ((o), ExprExpr(l), ExprExpr(r))
#define expr_new_branch(o,r) \
- expr_new ((ExprItem *)NULL, (o), ExprExpr(r))
+ expr_new ((o), ExprExpr(r), (ExprItem *)NULL)
#define expr_new_ident(r) \
- expr_new ((ExprItem *)NULL, EXPR_IDENT, (r))
+ expr_new (EXPR_IDENT, (r), (ExprItem *)NULL)
/* allocates and makes an exact duplicate of e */
expr *expr_copy(const expr *e);
void expr_delete(expr *e);
-int expr_contains_float(expr *);
-
-int expr_checkea(expr **e, unsigned char *addrsize, unsigned char bits,
+int expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
unsigned char *displen, unsigned char *modrm,
unsigned char *v_modrm, unsigned char *n_modrm,
unsigned char *sib, unsigned char *v_sib,
/* Simplifies the expression e as much as possible, eliminating extraneous
* branches and simplifying integer-only subexpressions.
*/
-int expr_simplify(expr *);
+expr *expr_simplify(expr *e);
/* Gets the integer value of e if the expression is just an integer. If the
* expression is more complex (contains anything other than integers, ie
* floats, non-valued labels, registers), returns NULL.
*/
-const intnum *expr_get_intnum(expr *e);
+const intnum *expr_get_intnum(expr **ep);
void expr_print(expr *);