From: Peter Johnson Date: Mon, 5 Nov 2001 05:49:19 +0000 (-0000) Subject: Split x86-specific stuff away from bytecode. X-Git-Tag: v0.1.0~214 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f58d7cb0931d5aed699c1cc20bceaa24531b2ec4;p=yasm Split x86-specific stuff away from bytecode. svn path=/trunk/yasm/; revision=316 --- diff --git a/frontends/yasm/yasm.c b/frontends/yasm/yasm.c index 0b4d711f..de7a27b9 100644 --- a/frontends/yasm/yasm.c +++ b/frontends/yasm/yasm.c @@ -41,6 +41,8 @@ RCSID("$IdPath$"); #include "preproc.h" #include "parser.h" +#include "arch.h" + #ifndef countof #define countof(x,y) (sizeof(x)/sizeof(y)) @@ -110,8 +112,11 @@ main(int argc, char *argv[]) switch_filename(""); } + /* Set x86 as the architecture */ + cur_arch = &x86_arch; + /* Get initial BITS setting from object format */ - mode_bits = dbg_objfmt.default_mode_bits; + x86_mode_bits = dbg_objfmt.default_mode_bits; sections = nasm_parser.do_parse(&nasm_parser, &dbg_objfmt, in); diff --git a/libyasm/arch.c b/libyasm/arch.c new file mode 100644 index 00000000..5cbe3591 --- /dev/null +++ b/libyasm/arch.c @@ -0,0 +1,29 @@ +/* $IdPath$ + * Architecture interface + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" + +#include "bytecode.h" + +#include "arch.h" + +arch *cur_arch; + diff --git a/libyasm/arch.h b/libyasm/arch.h index f1ed7268..b614aca4 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -28,9 +28,24 @@ struct arch { /* keyword used to select architecture */ const char *keyword; + + struct { + /* Maximum used bytecode type value+1. Should be set to + * BYTECODE_TYPE_BASE if no additional bytecode types are defined by + * the architecture. + */ + const int type_max; + + void (*bc_delete) (bytecode *bc); + void (*bc_print) (const bytecode *bc); + void (*bc_parser_finalize) (bytecode *bc); + } bc; }; /* Available architectures */ +#include "arch/x86/x86arch.h" extern arch x86_arch; +extern arch *cur_arch; + #endif diff --git a/libyasm/bc-int.h b/libyasm/bc-int.h new file mode 100644 index 00000000..f992c465 --- /dev/null +++ b/libyasm/bc-int.h @@ -0,0 +1,71 @@ +/* $IdPath$ + * Bytecode internal structures header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_BC_INT_H +#define YASM_BC_INT_H + +struct effaddr { + expr *disp; /* address displacement */ + unsigned char len; /* length of disp (in bytes), 0 if unknown, + * 0xff if unknown and required to be >0. + */ + unsigned char nosplit; /* 1 if reg*2 should not be split into + reg+reg. (0 if not) */ + + /* architecture-dependent data may be appended */ +}; +#define ea_get_data(x) (void *)(((char *)x)+sizeof(effaddr)) +#define ea_get_const_data(x) (const void *)(((const char *)x)+sizeof(effaddr)) + +struct immval { + expr *val; + + unsigned char len; /* length of val (in bytes), 0 if unknown */ + unsigned char isneg; /* the value has been explicitly negated */ + + unsigned char f_len; /* final imm length */ + unsigned char f_sign; /* 1 if final imm should be signed */ +}; + +struct bytecode { + STAILQ_ENTRY(bytecode) link; + + bytecode_type type; + + expr *multiple; /* number of times bytecode is repeated, + NULL=1 */ + + unsigned long len; /* total length of entire bytecode (including + multiple copies), 0 if unknown */ + + /* where it came from */ + const char *filename; + unsigned int lineno; + + /* other assembler state info */ + unsigned long offset; /* 0 if unknown */ + + /* architecture-dependent data may be appended */ +}; +#define bc_get_data(x) (void *)(((char *)x)+sizeof(bytecode)) +#define bc_get_const_data(x) (const void *)(((const char *)x)+sizeof(bytecode)) + +#endif diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index 407f01ec..6dae585d 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -29,40 +29,10 @@ RCSID("$IdPath$"); #include "bytecode.h" +#include "arch.h" -struct effaddr { - expr *disp; /* address displacement */ - unsigned char len; /* length of disp (in bytes), 0 if unknown, - * 0xff if unknown and required to be >0. - */ +#include "bc-int.h" - unsigned char segment; /* segment override, 0 if none */ - - /* How the spare (register) bits in Mod/RM are handled: - * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) - * They're set in bytecode_new_insn(). - */ - unsigned char modrm; - unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ - unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ - - unsigned char sib; - unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ - unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, - 0xff if unknown */ - unsigned char nosplit; /* 1 if reg*2 should not be split into - reg+reg. (0 if not) */ -}; - -struct immval { - expr *val; - - unsigned char len; /* length of val (in bytes), 0 if unknown */ - unsigned char isneg; /* the value has been explicitly negated */ - - unsigned char f_len; /* final imm length */ - unsigned char f_sign; /* 1 if final imm should be signed */ -}; struct dataval { STAILQ_ENTRY(dataval) link; @@ -75,158 +45,25 @@ struct dataval { } data; }; -struct bytecode { - STAILQ_ENTRY(bytecode) link; - - enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; - - /* This union has been somewhat tweaked to get it as small as possible - * on the 4-byte-aligned x86 architecture (without resorting to - * bitfields). In particular, insn and jmprel are the largest structures - * in the union, and are also the same size (after padding). jmprel - * can have another unsigned char added to the end without affecting - * its size. - * - * Don't worry about this too much, but keep it in mind when changing - * this structure. We care about the size of bytecode in particular - * because it accounts for the majority of the memory usage in the - * assembler when assembling a large file. - */ - union { - struct { - effaddr *ea; /* effective address */ - - immval *imm; /* immediate or relative value */ - - unsigned char opcode[3]; /* opcode */ - unsigned char opcode_len; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - /* HACK, but a space-saving one: shift opcodes have an immediate - * form and a ,1 form (with no immediate). In the parser, we - * set this and opcode_len=1, but store the ,1 version in the - * second byte of the opcode array. We then choose between the - * two versions once we know the actual value of imm (because we - * don't know it in the parser module). - * - * A override to force the imm version should just leave this at - * 0. Then later code won't know the ,1 version even exists. - * TODO: Figure out how this affects CPU flags processing. - * - * Call SetInsnShiftFlag() to set this flag to 1. - */ - unsigned char shift_op; - } insn; - struct { - expr *target; /* target location */ - - struct { - unsigned char opcode[3]; - unsigned char opcode_len; /* 0 = no opc for this version */ - } shortop, nearop; - - /* which opcode are we using? */ - /* The *FORCED forms are specified in the source as such */ - jmprel_opcode_sel op_sel; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - } jmprel; - struct { - /* non-converted data (linked list) */ - datavalhead datahead; - - /* final (converted) size of each element (in bytes) */ - unsigned char size; - } data; - struct { - expr *numitems; /* number of items to reserve */ - unsigned char itemsize; /* size of each item (in bytes) */ - } reserve; - } data; +typedef struct bytecode_data { + /* non-converted data (linked list) */ + datavalhead datahead; - expr *multiple; /* number of times bytecode is repeated, - NULL=1 */ + /* final (converted) size of each element (in bytes) */ + unsigned char size; +} bytecode_data; - unsigned long len; /* total length of entire bytecode (including - multiple copies), 0 if unknown */ - - /* where it came from */ - const char *filename; - unsigned int lineno; - - /* other assembler state info */ - unsigned long offset; /* 0 if unknown */ - unsigned char mode_bits; -}; +typedef struct bytecode_reserve { + expr *numitems; /* number of items to reserve */ + unsigned char itemsize; /* size of each item (in bytes) */ +} bytecode_reserve; /* Static structures for when NULL is passed to conversion functions. */ /* for Convert*ToBytes() */ unsigned char bytes_static[16]; -static bytecode *bytecode_new_common(void); - -effaddr * -effaddr_new_reg(unsigned long reg) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = (expr *)NULL; - ea->len = 0; - ea->segment = 0; - ea->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ - ea->valid_modrm = 1; - ea->need_modrm = 1; - ea->valid_sib = 0; - ea->need_sib = 0; - ea->nosplit = 0; - - return ea; -} - -effaddr * -effaddr_new_expr(expr *expr_ptr) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = expr_ptr; - ea->len = 0; - ea->segment = 0; - ea->modrm = 0; - ea->valid_modrm = 0; - ea->need_modrm = 1; - ea->valid_sib = 0; - ea->need_sib = 0xff; /* we won't know until we know more about expr and - the BITS/address override setting */ - ea->nosplit = 0; - - return ea; -} - -effaddr * -effaddr_new_imm(immval *im_ptr, unsigned char im_len) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = im_ptr->val; - ea->len = im_len; - ea->segment = 0; - ea->modrm = 0; - ea->valid_modrm = 0; - ea->need_modrm = 0; - ea->valid_sib = 0; - ea->need_sib = 0; - ea->nosplit = 0; - - return ea; -} - immval * -immval_new_int(unsigned long int_val) +imm_new_int(unsigned long int_val) { immval *im = xmalloc(sizeof(immval)); @@ -245,7 +82,7 @@ immval_new_int(unsigned long int_val) } immval * -immval_new_expr(expr *expr_ptr) +imm_new_expr(expr *expr_ptr) { immval *im = xmalloc(sizeof(immval)); @@ -257,19 +94,7 @@ immval_new_expr(expr *expr_ptr) } void -SetEASegment(effaddr *ptr, unsigned char segment) -{ - if (!ptr) - return; - - if (segment != 0 && ptr->segment != 0) - Warning(_("multiple segment overrides, using leftmost")); - - ptr->segment = segment; -} - -void -SetEALen(effaddr *ptr, unsigned char len) +ea_set_len(effaddr *ptr, unsigned char len) { if (!ptr) return; @@ -282,7 +107,7 @@ SetEALen(effaddr *ptr, unsigned char len) } void -SetEANosplit(effaddr *ptr, unsigned char nosplit) +ea_set_nosplit(effaddr *ptr, unsigned char nosplit) { if (!ptr) return; @@ -290,108 +115,8 @@ SetEANosplit(effaddr *ptr, unsigned char nosplit) ptr->nosplit = nosplit; } -effaddr * -GetInsnEA(bytecode *bc) -{ - if (!bc) - return NULL; - - if (bc->type != BC_INSN) - InternalError(_("Trying to get EA of non-instruction")); - - return bc->data.insn.ea; -} - void -SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize) -{ - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - bc->data.insn.opersize = opersize; - break; - case BC_JMPREL: - bc->data.jmprel.opersize = opersize; - break; - default: - InternalError(_("OperSize override applied to non-instruction")); - return; - } -} - -void -SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) -{ - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - bc->data.insn.addrsize = addrsize; - break; - case BC_JMPREL: - bc->data.jmprel.addrsize = addrsize; - break; - default: - InternalError(_("AddrSize override applied to non-instruction")); - return; - } -} - -void -SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) -{ - unsigned char *lockrep_pre = (unsigned char *)NULL; - - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - lockrep_pre = &bc->data.insn.lockrep_pre; - break; - case BC_JMPREL: - lockrep_pre = &bc->data.jmprel.lockrep_pre; - break; - default: - InternalError(_("LockRep prefix applied to non-instruction")); - return; - } - - if (*lockrep_pre != 0) - Warning(_("multiple LOCK or REP prefixes, using leftmost")); - - *lockrep_pre = prefix; -} - -void -SetInsnShiftFlag(bytecode *bc) -{ - if (!bc) - return; - - if (bc->type != BC_INSN) - InternalError(_("Attempted to set shift flag on non-instruction")); - - bc->data.insn.shift_op = 1; -} - -void -SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) -{ - if (!old_sel) - return; - - if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || - (*old_sel == JR_NEAR_FORCED))) - Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); - *old_sel = new_sel; -} - -void -SetBCMultiple(bytecode *bc, expr *e) +bc_set_multiple(bytecode *bc, expr *e) { if (bc->multiple) bc->multiple = expr_new_tree(bc->multiple, EXPR_MUL, e); @@ -399,10 +124,12 @@ SetBCMultiple(bytecode *bc, expr *e) bc->multiple = e; } -static bytecode * -bytecode_new_common(void) +bytecode * +bc_new_common(bytecode_type type, size_t datasize) { - bytecode *bc = xmalloc(sizeof(bytecode)); + bytecode *bc = xmalloc(sizeof(bytecode)+datasize); + + bc->type = type; bc->multiple = (expr *)NULL; bc->len = 0; @@ -411,146 +138,59 @@ bytecode_new_common(void) bc->lineno = line_number; bc->offset = 0; - bc->mode_bits = mode_bits; return bc; } bytecode * -bytecode_new_insn(unsigned char opersize, - unsigned char opcode_len, - unsigned char op0, - unsigned char op1, - unsigned char op2, - effaddr *ea_ptr, - unsigned char spare, - immval *im_ptr, - unsigned char im_len, - unsigned char im_sign) +bc_new_data(datavalhead *datahead, unsigned long size) { - bytecode *bc = bytecode_new_common(); + bytecode *bc = bc_new_common(BC_DATA, sizeof(bytecode_data)); + bytecode_data *data = bc_get_data(bc); - bc->type = BC_INSN; - - bc->data.insn.ea = ea_ptr; - if (ea_ptr) { - bc->data.insn.ea->modrm &= 0xC7; /* zero spare/reg bits */ - bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */ - } - - bc->data.insn.imm = im_ptr; - if (im_ptr) { - bc->data.insn.imm->f_sign = im_sign; - bc->data.insn.imm->f_len = im_len; - } - - bc->data.insn.opcode[0] = op0; - bc->data.insn.opcode[1] = op1; - bc->data.insn.opcode[2] = op2; - bc->data.insn.opcode_len = opcode_len; - - bc->data.insn.addrsize = 0; - bc->data.insn.opersize = opersize; - bc->data.insn.lockrep_pre = 0; - bc->data.insn.shift_op = 0; - - return bc; -} - -bytecode * -bytecode_new_jmprel(targetval *target, - unsigned char short_opcode_len, - unsigned char short_op0, - unsigned char short_op1, - unsigned char short_op2, - unsigned char near_opcode_len, - unsigned char near_op0, - unsigned char near_op1, - unsigned char near_op2, - unsigned char addrsize) -{ - bytecode *bc = bytecode_new_common(); - - bc->type = BC_JMPREL; - - bc->data.jmprel.target = target->val; - bc->data.jmprel.op_sel = target->op_sel; - - if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0)) - Error(_("no SHORT form of that jump instruction exists")); - if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0)) - Error(_("no NEAR form of that jump instruction exists")); - - bc->data.jmprel.shortop.opcode[0] = short_op0; - bc->data.jmprel.shortop.opcode[1] = short_op1; - bc->data.jmprel.shortop.opcode[2] = short_op2; - bc->data.jmprel.shortop.opcode_len = short_opcode_len; - - bc->data.jmprel.nearop.opcode[0] = near_op0; - bc->data.jmprel.nearop.opcode[1] = near_op1; - bc->data.jmprel.nearop.opcode[2] = near_op2; - bc->data.jmprel.nearop.opcode_len = near_opcode_len; - - bc->data.jmprel.addrsize = addrsize; - bc->data.jmprel.opersize = 0; - bc->data.jmprel.lockrep_pre = 0; + data->datahead = *datahead; + data->size = size; return bc; } bytecode * -bytecode_new_data(datavalhead *datahead, unsigned long size) +bc_new_reserve(expr *numitems, unsigned long itemsize) { - bytecode *bc = bytecode_new_common(); - - bc->type = BC_DATA; + bytecode *bc = bc_new_common(BC_RESERVE, sizeof(bytecode_reserve)); + bytecode_reserve *reserve = bc_get_data(bc); - bc->data.data.datahead = *datahead; - bc->data.data.size = size; - - return bc; -} - -bytecode * -bytecode_new_reserve(expr *numitems, unsigned long itemsize) -{ - bytecode *bc = bytecode_new_common(); - - bc->type = BC_RESERVE; - - bc->data.reserve.numitems = numitems; - bc->data.reserve.itemsize = itemsize; + reserve->numitems = numitems; + reserve->itemsize = itemsize; return bc; } void -bytecode_delete(bytecode *bc) +bc_delete(bytecode *bc) { + bytecode_data *data; + bytecode_reserve *reserve; + if (!bc) return; switch (bc->type) { case BC_EMPTY: break; - case BC_INSN: - if (bc->data.insn.ea) { - expr_delete(bc->data.insn.ea->disp); - xfree(bc->data.insn.ea); - } - if (bc->data.insn.imm) { - expr_delete(bc->data.insn.imm->val); - xfree(bc->data.insn.imm); - } - break; - case BC_JMPREL: - expr_delete(bc->data.jmprel.target); - break; case BC_DATA: - datavals_delete(&bc->data.data.datahead); + data = bc_get_data(bc); + dvs_delete(&data->datahead); break; case BC_RESERVE: - expr_delete(bc->data.reserve.numitems); + reserve = bc_get_data(bc); + expr_delete(reserve->numitems); + break; + default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_delete(bc); + else + InternalError(_("Unknown bytecode type")); break; } @@ -559,131 +199,43 @@ bytecode_delete(bytecode *bc) } int -bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val) +bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val) { return 0; /* TODO */ } void -bytecode_print(const bytecode *bc) +bc_print(const bytecode *bc) { + const bytecode_data *data; + const bytecode_reserve *reserve; + switch (bc->type) { case BC_EMPTY: printf("_Empty_\n"); break; - case BC_INSN: - printf("_Instruction_\n"); - printf("Effective Address:"); - if (!bc->data.insn.ea) - printf(" (nil)\n"); - else { - printf("\n Disp="); - if (bc->data.insn.ea->disp) - expr_print(bc->data.insn.ea->disp); - else - printf("(nil)"); - printf("\n"); - printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", - (unsigned int)bc->data.insn.ea->len, - (unsigned int)bc->data.insn.ea->segment, - (unsigned int)bc->data.insn.ea->nosplit); - printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", - (unsigned int)bc->data.insn.ea->modrm, - (unsigned int)bc->data.insn.ea->valid_modrm, - (unsigned int)bc->data.insn.ea->need_modrm); - printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", - (unsigned int)bc->data.insn.ea->sib, - (unsigned int)bc->data.insn.ea->valid_sib, - (unsigned int)bc->data.insn.ea->need_sib); - } - printf("Immediate Value:"); - if (!bc->data.insn.imm) - printf(" (nil)\n"); - else { - printf("\n Val="); - expr_print(bc->data.insn.imm->val); - printf("\n"); - printf(" Len=%u, IsNeg=%u\n", - (unsigned int)bc->data.insn.imm->len, - (unsigned int)bc->data.insn.imm->isneg); - printf(" FLen=%u, FSign=%u\n", - (unsigned int)bc->data.insn.imm->f_len, - (unsigned int)bc->data.insn.imm->f_sign); - } - printf("Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.insn.opcode[0], - (unsigned int)bc->data.insn.opcode[1], - (unsigned int)bc->data.insn.opcode[2], - (unsigned int)bc->data.insn.opcode_len); - printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", - (unsigned int)bc->data.insn.addrsize, - (unsigned int)bc->data.insn.opersize, - (unsigned int)bc->data.insn.lockrep_pre, - (unsigned int)bc->data.insn.shift_op); - break; - case BC_JMPREL: - printf("_Relative Jump_\n"); - printf("Target="); - expr_print(bc->data.jmprel.target); - printf("\nShort Form:\n"); - if (!bc->data.jmprel.shortop.opcode_len == 0) - printf(" None\n"); - else - printf(" Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.jmprel.shortop.opcode[0], - (unsigned int)bc->data.jmprel.shortop.opcode[1], - (unsigned int)bc->data.jmprel.shortop.opcode[2], - (unsigned int)bc->data.jmprel.shortop.opcode_len); - if (!bc->data.jmprel.nearop.opcode_len == 0) - printf(" None\n"); - else - printf(" Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.jmprel.nearop.opcode[0], - (unsigned int)bc->data.jmprel.nearop.opcode[1], - (unsigned int)bc->data.jmprel.nearop.opcode[2], - (unsigned int)bc->data.jmprel.nearop.opcode_len); - printf("OpSel="); - switch (bc->data.jmprel.op_sel) { - case JR_NONE: - printf("None"); - break; - case JR_SHORT: - printf("Short"); - break; - case JR_NEAR: - printf("Near"); - break; - case JR_SHORT_FORCED: - printf("Forced Short"); - break; - case JR_NEAR_FORCED: - printf("Forced Near"); - break; - default: - printf("UNKNOWN!!"); - break; - } - printf("\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", - (unsigned int)bc->data.jmprel.addrsize, - (unsigned int)bc->data.jmprel.opersize, - (unsigned int)bc->data.jmprel.lockrep_pre); - break; case BC_DATA: + data = bc_get_const_data(bc); printf("_Data_\n"); printf("Final Element Size=%u\n", - (unsigned int)bc->data.data.size); + (unsigned int)data->size); printf("Elements:\n"); - datavals_print(&bc->data.data.datahead); + dvs_print(&data->datahead); break; case BC_RESERVE: + reserve = bc_get_const_data(bc); printf("_Reserve_\n"); printf("Num Items="); - expr_print(bc->data.reserve.numitems); + expr_print(reserve->numitems); printf("\nItem Size=%u\n", - (unsigned int)bc->data.reserve.itemsize); + (unsigned int)reserve->itemsize); break; default: - printf("_Unknown_\n"); + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_print(bc); + else + printf("_Unknown_\n"); + break; } printf("Multiple="); if (!bc->multiple) @@ -694,95 +246,42 @@ bytecode_print(const bytecode *bc) printf("Length=%lu\n", bc->len); printf("Filename=\"%s\" Line Number=%u\n", bc->filename ? bc->filename : "", bc->lineno); - printf("Offset=%lx BITS=%u\n", bc->offset, bc->mode_bits); -} - -static void -bytecode_parser_finalize_insn(bytecode *bc) -{ - effaddr *ea = bc->data.insn.ea; - immval *imm = bc->data.insn.imm; - - if (ea) { - if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) || - (!ea->valid_modrm && ea->need_modrm))) { - /* First expand equ's */ - expr_expand_equ(ea->disp); - - /* Check validity of effective address and calc R/M bits of - * Mod/RM byte and SIB byte. We won't know the Mod field - * of the Mod/RM byte until we know more about the - * displacement. - */ - if (!expr_checkea(&ea->disp, &bc->data.insn.addrsize, - bc->mode_bits, ea->nosplit, &ea->len, &ea->modrm, - &ea->valid_modrm, &ea->need_modrm, &ea->sib, - &ea->valid_sib, &ea->need_sib)) - return; /* failed, don't bother checking rest of insn */ - } - } - - if (imm) { - const intnum *num; - - if (imm->val) { - expr_expand_equ(imm->val); - expr_simplify(imm->val); - } - /* TODO: check imm f_len vs. len? */ - - /* Handle shift_op special-casing */ - if (bc->data.insn.shift_op && (num = expr_get_intnum(&imm->val))) { - if (intnum_get_uint(num) == 1) { - /* Use ,1 form: first copy ,1 opcode. */ - bc->data.insn.opcode[0] = bc->data.insn.opcode[1]; - /* Delete ModRM, as it's no longer needed */ - xfree(ea); - bc->data.insn.ea = (effaddr *)NULL; - /* Delete Imm, as it's not needed */ - expr_delete(imm->val); - xfree(imm); - bc->data.insn.imm = (immval *)NULL; - } - bc->data.insn.shift_op = 0; - } - } - - + printf("Offset=%lx\n", bc->offset); } void -bytecode_parser_finalize(bytecode *bc) +bc_parser_finalize(bytecode *bc) { switch (bc->type) { case BC_EMPTY: /* FIXME: delete it (probably in bytecodes_ level, not here */ InternalError(_("got empty bytecode in parser_finalize")); break; - case BC_INSN: - bytecode_parser_finalize_insn(bc); - break; default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_parser_finalize(bc); + else + InternalError(_("Unknown bytecode type")); break; } } void -bytecodes_delete(bytecodehead *headp) +bcs_delete(bytecodehead *headp) { bytecode *cur, *next; cur = STAILQ_FIRST(headp); while (cur) { next = STAILQ_NEXT(cur, link); - bytecode_delete(cur); + bc_delete(cur); cur = next; } STAILQ_INIT(headp); } bytecode * -bytecodes_append(bytecodehead *headp, bytecode *bc) +bcs_append(bytecodehead *headp, bytecode *bc) { if (bc) { if (bc->type != BC_EMPTY) { @@ -796,27 +295,27 @@ bytecodes_append(bytecodehead *headp, bytecode *bc) } void -bytecodes_print(const bytecodehead *headp) +bcs_print(const bytecodehead *headp) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) { printf("---Next Bytecode---\n"); - bytecode_print(cur); + bc_print(cur); } } void -bytecodes_parser_finalize(bytecodehead *headp) +bcs_parser_finalize(bytecodehead *headp) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) - bytecode_parser_finalize(cur); + bc_parser_finalize(cur); } dataval * -dataval_new_expr(expr *expn) +dv_new_expr(expr *expn) { dataval *retval = xmalloc(sizeof(dataval)); @@ -827,7 +326,7 @@ dataval_new_expr(expr *expn) } dataval * -dataval_new_string(char *str_val) +dv_new_string(char *str_val) { dataval *retval = xmalloc(sizeof(dataval)); @@ -838,7 +337,7 @@ dataval_new_string(char *str_val) } void -datavals_delete(datavalhead *headp) +dvs_delete(datavalhead *headp) { dataval *cur, *next; @@ -854,7 +353,7 @@ datavals_delete(datavalhead *headp) } dataval * -datavals_append(datavalhead *headp, dataval *dv) +dvs_append(datavalhead *headp, dataval *dv) { if (dv) { STAILQ_INSERT_TAIL(headp, dv, link); @@ -864,7 +363,7 @@ datavals_append(datavalhead *headp, dataval *dv) } void -datavals_print(const datavalhead *head) +dvs_print(const datavalhead *head) { dataval *cur; diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index 3683bae9..38fa3113 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -27,87 +27,43 @@ typedef struct immval immval; typedef STAILQ_HEAD(datavalhead, dataval) datavalhead; typedef struct dataval dataval; +/* Additional types may be architecture-defined starting at + * BYTECODE_TYPE_BASE. + */ typedef enum { - JR_NONE, - JR_SHORT, - JR_NEAR, - JR_SHORT_FORCED, - JR_NEAR_FORCED -} jmprel_opcode_sel; - -typedef struct targetval { - expr *val; - - jmprel_opcode_sel op_sel; -} targetval; - -effaddr *effaddr_new_reg(unsigned long reg); -effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len); -effaddr *effaddr_new_expr(expr *expr_ptr); + BC_EMPTY = 0, + BC_DATA, + BC_RESERVE +} bytecode_type; +#define BYTECODE_TYPE_BASE BC_RESERVE+1 -immval *immval_new_int(unsigned long int_val); -immval *immval_new_expr(expr *expr_ptr); +immval *imm_new_int(unsigned long int_val); +immval *imm_new_expr(expr *e); -void SetEASegment(effaddr *ptr, unsigned char segment); -void SetEALen(effaddr *ptr, unsigned char len); -void SetEANosplit(effaddr *ptr, unsigned char nosplit); +void ea_set_len(effaddr *ea, unsigned char len); +void ea_set_nosplit(effaddr *ea, unsigned char nosplit); -effaddr *GetInsnEA(bytecode *bc); +void bc_set_multiple(bytecode *bc, expr *e); -void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); -void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); -void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); -void SetInsnShiftFlag(bytecode *bc); +bytecode *bc_new_common(bytecode_type type, size_t datasize); +bytecode *bc_new_data(datavalhead *datahead, unsigned long size); +bytecode *bc_new_reserve(expr *numitems, unsigned long itemsize); -void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); - -void SetBCMultiple(bytecode *bc, expr *e); - -/* IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling this - * function (it doesn't make a copy). - */ -bytecode *bytecode_new_insn(unsigned char opersize, - unsigned char opcode_len, - unsigned char op0, - unsigned char op1, - unsigned char op2, - effaddr *ea_ptr, - unsigned char spare, - immval *im_ptr, - unsigned char im_len, - unsigned char im_sign); - -/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */ -bytecode *bytecode_new_jmprel(targetval *target, - unsigned char short_opcode_len, - unsigned char short_op0, - unsigned char short_op1, - unsigned char short_op2, - unsigned char near_opcode_len, - unsigned char near_op0, - unsigned char near_op1, - unsigned char near_op2, - unsigned char addrsize); - -bytecode *bytecode_new_data(datavalhead *datahead, unsigned long size); - -bytecode *bytecode_new_reserve(expr *numitems, unsigned long itemsize); - -void bytecode_delete(bytecode *bc); +void bc_delete(bytecode *bc); /* Gets the offset of the bytecode specified by bc if possible. * Return value is IF POSSIBLE, not the value. */ -int bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val); +int bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val); -void bytecode_print(const bytecode *bc); +void bc_print(const bytecode *bc); -void bytecode_parser_finalize(bytecode *bc); +void bc_parser_finalize(bytecode *bc); /* void bytecodes_initialize(bytecodehead *headp); */ #define bytecodes_initialize(headp) STAILQ_INIT(headp) -void bytecodes_delete(bytecodehead *headp); +void bcs_delete(bytecodehead *headp); /* Adds bc to the list of bytecodes headp. * NOTE: Does not make a copy of bc; so don't pass this function @@ -115,20 +71,20 @@ void bytecodes_delete(bytecodehead *headp); * this function. If bc was actually appended (it wasn't NULL or empty), * then returns bc, otherwise returns NULL. */ -bytecode *bytecodes_append(bytecodehead *headp, bytecode *bc); +bytecode *bcs_append(bytecodehead *headp, bytecode *bc); -void bytecodes_print(const bytecodehead *headp); +void bcs_print(const bytecodehead *headp); -void bytecodes_parser_finalize(bytecodehead *headp); +void bcs_parser_finalize(bytecodehead *headp); -dataval *dataval_new_expr(expr *expn); -dataval *dataval_new_float(floatnum *flt); -dataval *dataval_new_string(char *str_val); +dataval *dv_new_expr(expr *expn); +dataval *dv_new_float(floatnum *flt); +dataval *dv_new_string(char *str_val); -/* void datavals_initialize(datavalhead *headp); */ -#define datavals_initialize(headp) STAILQ_INIT(headp) +/* void dvs_initialize(datavalhead *headp); */ +#define dvs_initialize(headp) STAILQ_INIT(headp) -void datavals_delete(datavalhead *headp); +void dvs_delete(datavalhead *headp); /* Adds dv to the list of datavals headp. * NOTE: Does not make a copy of dv; so don't pass this function @@ -136,8 +92,8 @@ void datavals_delete(datavalhead *headp); * this function. If dv was actually appended (it wasn't NULL), then * returns dv, otherwise returns NULL. */ -dataval *datavals_append(datavalhead *headp, dataval *dv); +dataval *dvs_append(datavalhead *headp, dataval *dv); -void datavals_print(const datavalhead *head); +void dvs_print(const datavalhead *head); #endif diff --git a/libyasm/linemgr.c b/libyasm/linemgr.c index c21436a7..2239872f 100644 --- a/libyasm/linemgr.c +++ b/libyasm/linemgr.c @@ -29,7 +29,6 @@ RCSID("$IdPath$"); const char *in_filename = (const char *)NULL; unsigned int line_number = 1; -unsigned char mode_bits = 0; unsigned int asm_options = 0; static ternary_tree filename_table = (ternary_tree)NULL; diff --git a/libyasm/linemgr.h b/libyasm/linemgr.h index 23376e13..d0457793 100644 --- a/libyasm/linemgr.h +++ b/libyasm/linemgr.h @@ -24,7 +24,6 @@ extern const char *in_filename; extern unsigned int line_number; -extern unsigned char mode_bits; extern unsigned int asm_options; void switch_filename(const char *filename); diff --git a/libyasm/section.c b/libyasm/section.c index 0ebccb7e..a95d185a 100644 --- a/libyasm/section.c +++ b/libyasm/section.c @@ -134,7 +134,7 @@ sections_parser_finalize(sectionhead *headp) section *cur; STAILQ_FOREACH(cur, headp, link) - bytecodes_parser_finalize(&cur->bc); + bcs_parser_finalize(&cur->bc); } bytecodehead * @@ -156,7 +156,7 @@ section_delete(section *sect) return; xfree(sect->name); - bytecodes_delete(§->bc); + bcs_delete(§->bc); xfree(sect); } @@ -176,5 +176,5 @@ section_print(const section *sect) } printf(" Bytecodes:\n"); - bytecodes_print(§->bc); + bcs_print(§->bc); } diff --git a/libyasm/symrec.c b/libyasm/symrec.c index eccb034f..020dee11 100644 --- a/libyasm/symrec.c +++ b/libyasm/symrec.c @@ -290,7 +290,7 @@ symrec_print(const symrec *sym) printf("[First bytecode]\n"); else { printf("[Preceding bytecode]\n"); - bytecode_print(sym->value.label.bc); + bc_print(sym->value.label.bc); } break; } diff --git a/libyasm/tests/Makefile.am b/libyasm/tests/Makefile.am index 9c7d9920..36e4bb1d 100644 --- a/libyasm/tests/Makefile.am +++ b/libyasm/tests/Makefile.am @@ -26,6 +26,7 @@ LDADD = \ $(top_builddir)/src/preprocs/raw/libpreproc.a \ $(top_builddir)/src/optimizers/dbg/liboptimizer.a \ $(top_builddir)/src/objfmts/dbg/libobjfmt.a \ + $(top_builddir)/src/arch/@ARCH@/libarch.a \ $(top_builddir)/src/libyasm.a \ $(INTLLIBS) @@ -54,5 +55,5 @@ memexpr_test_SOURCES = \ memexpr_test.c INCLUDES= -I$(top_srcdir) -I$(top_srcdir)/src -I$(top_srcdir)/check \ - -I$(top_builddir)/intl + -I$(top_srcdir)/src/arch/@ARCH@ -I$(top_builddir)/intl diff --git a/libyasm/tests/bytecode_test.c b/libyasm/tests/bytecode_test.c index 77eadc6a..f3411512 100644 --- a/libyasm/tests/bytecode_test.c +++ b/libyasm/tests/bytecode_test.c @@ -18,41 +18,41 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#ifdef STDC_HEADERS -# include -#endif +#include "util.h" #include "check.h" -#include "bytecode.c" +#include "bytecode.h" +#include "bc-int.h" +#include "arch.h" +#include "x86-int.h" -START_TEST(test_effaddr_new_reg) +START_TEST(test_x86_ea_new_reg) { effaddr *ea; + x86_effaddr_data *ead; int i; /* Test with NULL */ - ea = effaddr_new_reg(1); + ea = x86_ea_new_reg(1); fail_unless(ea != NULL, "Should die if out of memory (not return NULL)"); /* Test structure values function should set */ fail_unless(ea->len == 0, "len should be 0"); - fail_unless(ea->segment == 0, "Should be no segment override"); - fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid"); - fail_unless(ea->need_modrm == 1, "Mod/RM should be needed"); - fail_unless(ea->valid_sib == 0, "SIB should be invalid"); - fail_unless(ea->need_sib == 0, "SIB should not be needed"); + ead = ea_get_data(ea); + fail_unless(ead->segment == 0, "Should be no segment override"); + fail_unless(ead->valid_modrm == 1, "Mod/RM should be valid"); + fail_unless(ead->need_modrm == 1, "Mod/RM should be needed"); + fail_unless(ead->valid_sib == 0, "SIB should be invalid"); + fail_unless(ead->need_sib == 0, "SIB should not be needed"); free(ea); /* Exhaustively test generated Mod/RM byte with register values */ for(i=0; i<8; i++) { - ea = effaddr_new_reg(i); - fail_unless(ea->modrm == (0xC0 | (i & 0x07)), + ea = x86_ea_new_reg(i); + ead = ea_get_data(ea); + fail_unless(ead->modrm == (0xC0 | (i & 0x07)), "Invalid Mod/RM byte generated"); free(ea); } @@ -66,7 +66,7 @@ bytecode_suite(void) TCase *tc_conversion = tcase_create("Conversion"); suite_add_tcase(s, tc_conversion); - tcase_add_test(tc_conversion, test_effaddr_new_reg); + tcase_add_test(tc_conversion, test_x86_ea_new_reg); return s; } diff --git a/modules/arch/x86/Makefile.am b/modules/arch/x86/Makefile.am index 5847318c..5c5598d0 100644 --- a/modules/arch/x86/Makefile.am +++ b/modules/arch/x86/Makefile.am @@ -3,7 +3,10 @@ noinst_LIBRARIES = libarch.a libarch_a_SOURCES = \ - arch.c + x86arch.h \ + x86-int.h \ + arch.c \ + bytecode.c INCLUDES = \ -I$(top_srcdir)/src \ diff --git a/modules/arch/x86/arch.c b/modules/arch/x86/arch.c index a5df22fb..c9cf1cae 100644 --- a/modules/arch/x86/arch.c +++ b/modules/arch/x86/arch.c @@ -22,11 +22,22 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" #include "arch.h" +#include "x86-int.h" + + +unsigned char x86_mode_bits = 0; /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", - "x86" + "x86", + { + X86_BYTECODE_TYPE_MAX, + x86_bc_delete, + x86_bc_print, + x86_bc_parser_finalize + } }; diff --git a/modules/arch/x86/bytecode.c b/modules/arch/x86/bytecode.c new file mode 100644 index 00000000..80837116 --- /dev/null +++ b/modules/arch/x86/bytecode.c @@ -0,0 +1,512 @@ +/* + * x86 architecture description + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + +#include "bytecode.h" +#include "arch.h" + +#include "x86-int.h" + +#include "bc-int.h" + + +bytecode * +x86_bc_new_insn(x86_new_insn_data *d) +{ + bytecode *bc; + x86_insn *insn; + + bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn)); + insn = bc_get_data(bc); + + insn->ea = d->ea; + if (d->ea) { + x86_effaddr_data *ead = ea_get_data(d->ea); + ead->modrm &= 0xC7; /* zero spare/reg bits */ + ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ + } + + insn->imm = d->imm; + if (d->imm) { + insn->imm->f_len = d->im_len; + insn->imm->f_sign = d->im_sign; + } + + insn->opcode[0] = d->op[0]; + insn->opcode[1] = d->op[1]; + insn->opcode[2] = d->op[2]; + insn->opcode_len = d->op_len; + + insn->addrsize = 0; + insn->opersize = d->opersize; + insn->lockrep_pre = 0; + insn->shift_op = 0; + + insn->mode_bits = x86_mode_bits; + + return bc; +} + +bytecode * +x86_bc_new_jmprel(x86_new_jmprel_data *d) +{ + bytecode *bc; + x86_jmprel *jmprel; + + bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel)); + jmprel = bc_get_data(bc); + + jmprel->target = d->target->val; + jmprel->op_sel = d->target->op_sel; + + if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0)) + Error(_("no SHORT form of that jump instruction exists")); + if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0)) + Error(_("no NEAR form of that jump instruction exists")); + + jmprel->shortop.opcode[0] = d->short_op[0]; + jmprel->shortop.opcode[1] = d->short_op[1]; + jmprel->shortop.opcode[2] = d->short_op[2]; + jmprel->shortop.opcode_len = d->short_op_len; + + jmprel->nearop.opcode[0] = d->near_op[0]; + jmprel->nearop.opcode[1] = d->near_op[1]; + jmprel->nearop.opcode[2] = d->near_op[2]; + jmprel->nearop.opcode_len = d->near_op_len; + + jmprel->addrsize = d->addrsize; + jmprel->opersize = 0; + jmprel->lockrep_pre = 0; + + jmprel->mode_bits = x86_mode_bits; + + return bc; +} + +void +x86_ea_set_segment(effaddr *ea, unsigned char segment) +{ + x86_effaddr_data *ead; + + if (!ea) + return; + + ead = ea_get_data(ea); + + if (segment != 0 && ead->segment != 0) + Warning(_("multiple segment overrides, using leftmost")); + + ead->segment = segment; +} + +effaddr * +x86_ea_new_reg(unsigned long reg) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = (expr *)NULL; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ead->valid_modrm = 1; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_ea_new_expr(expr *e) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = e; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0xff; /* we won't know until we know more about expr and + the BITS/address override setting */ + + return ea; +} + +effaddr * +x86_ea_new_imm(immval *imm, unsigned char im_len) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = imm->val; + ea->len = im_len; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_bc_insn_get_ea(bytecode *bc) +{ + x86_insn *insn = bc_get_data(bc); + + if (!bc) + return NULL; + + if (bc->type != X86_BC_INSN) + InternalError(_("Trying to get EA of non-instruction")); + + return insn->ea; +} + +void +x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->opersize = opersize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->opersize = opersize; + break; + default: + InternalError(_("OperSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->addrsize = addrsize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->addrsize = addrsize; + break; + default: + InternalError(_("AddrSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix) +{ + x86_insn *insn; + x86_jmprel *jmprel; + unsigned char *lockrep_pre = (unsigned char *)NULL; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + lockrep_pre = &insn->lockrep_pre; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + lockrep_pre = &jmprel->lockrep_pre; + break; + default: + InternalError(_("LockRep prefix applied to non-instruction")); + return; + } + + if (*lockrep_pre != 0) + Warning(_("multiple LOCK or REP prefixes, using leftmost")); + + *lockrep_pre = prefix; +} + +void +x86_bc_insn_set_shift_flag(bytecode *bc) +{ + x86_insn *insn; + + if (!bc) + return; + + if (bc->type != X86_BC_INSN) + InternalError(_("Attempted to set shift flag on non-instruction")); + + insn = bc_get_data(bc); + + insn->shift_op = 1; +} + +void +x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel) +{ + if (!old_sel) + return; + + if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || + (*old_sel == JR_NEAR_FORCED))) + Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); + *old_sel = new_sel; +} + +void +x86_bc_delete(bytecode *bc) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + if (insn->ea) { + expr_delete(insn->ea->disp); + xfree(insn->ea); + } + if (insn->imm) { + expr_delete(insn->imm->val); + xfree(insn->imm); + } + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + expr_delete(jmprel->target); + break; + } +} + +void +x86_bc_print(const bytecode *bc) +{ + const x86_insn *insn; + const x86_jmprel *jmprel; + x86_effaddr_data *ead; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_const_data(bc); + printf("_Instruction_\n"); + printf("Effective Address:"); + if (!insn->ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + if (insn->ea->disp) + expr_print(insn->ea->disp); + else + printf("(nil)"); + printf("\n"); + ead = ea_get_data(insn->ea); + printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", + (unsigned int)insn->ea->len, + (unsigned int)ead->segment, + (unsigned int)insn->ea->nosplit); + printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", + (unsigned int)ead->modrm, + (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", + (unsigned int)ead->sib, + (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); + } + printf("Immediate Value:"); + if (!insn->imm) + printf(" (nil)\n"); + else { + printf("\n Val="); + expr_print(insn->imm->val); + printf("\n"); + printf(" Len=%u, IsNeg=%u\n", + (unsigned int)insn->imm->len, + (unsigned int)insn->imm->isneg); + printf(" FLen=%u, FSign=%u\n", + (unsigned int)insn->imm->f_len, + (unsigned int)insn->imm->f_sign); + } + printf("Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)insn->opcode[0], + (unsigned int)insn->opcode[1], + (unsigned int)insn->opcode[2], + (unsigned int)insn->opcode_len); + printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", + (unsigned int)insn->addrsize, + (unsigned int)insn->opersize, + (unsigned int)insn->lockrep_pre, + (unsigned int)insn->shift_op); + printf("BITS=%u\n", (unsigned int)insn->mode_bits); + break; + case X86_BC_JMPREL: + jmprel = bc_get_const_data(bc); + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(jmprel->target); + printf("\nShort Form:\n"); + if (!jmprel->shortop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->shortop.opcode[0], + (unsigned int)jmprel->shortop.opcode[1], + (unsigned int)jmprel->shortop.opcode[2], + (unsigned int)jmprel->shortop.opcode_len); + if (!jmprel->nearop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->nearop.opcode[0], + (unsigned int)jmprel->nearop.opcode[1], + (unsigned int)jmprel->nearop.opcode[2], + (unsigned int)jmprel->nearop.opcode_len); + printf("OpSel="); + switch (jmprel->op_sel) { + case JR_NONE: + printf("None"); + break; + case JR_SHORT: + printf("Short"); + break; + case JR_NEAR: + printf("Near"); + break; + case JR_SHORT_FORCED: + printf("Forced Short"); + break; + case JR_NEAR_FORCED: + printf("Forced Near"); + break; + default: + printf("UNKNOWN!!"); + break; + } + printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", + (unsigned int)jmprel->mode_bits, + (unsigned int)jmprel->addrsize, + (unsigned int)jmprel->opersize, + (unsigned int)jmprel->lockrep_pre); + break; + } +} + +static void +x86_bc_parser_finalize_insn(x86_insn *insn) +{ + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* First expand equ's */ + expr_expand_equ(ea->disp); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &ea->len, &ead->modrm, + &ead->valid_modrm, &ead->need_modrm, &ead->sib, + &ead->valid_sib, &ead->need_sib)) + return; /* failed, don't bother checking rest of insn */ + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + expr_expand_equ(imm->val); + expr_simplify(imm->val); + } + /* TODO: check imm f_len vs. len? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete ModRM, as it's no longer needed */ + xfree(ea); + insn->ea = (effaddr *)NULL; + /* Delete Imm, as it's not needed */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } + + +} + +void +x86_bc_parser_finalize(bytecode *bc) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + x86_bc_parser_finalize_insn(insn); + break; + default: + break; + } +} + diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h new file mode 100644 index 00000000..2c3336ee --- /dev/null +++ b/modules/arch/x86/x86-int.h @@ -0,0 +1,95 @@ +/* $IdPath$ + * x86 internals header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_X86_INT_H +#define YASM_X86_INT_H + +typedef struct x86_effaddr_data { + unsigned char segment; /* segment override, 0 if none */ + + /* How the spare (register) bits in Mod/RM are handled: + * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) + * They're set in bytecode_new_insn(). + */ + unsigned char modrm; + unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ + unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ + + unsigned char sib; + unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ + unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, + 0xff if unknown */ +} x86_effaddr_data; + +typedef struct x86_insn { + effaddr *ea; /* effective address */ + + immval *imm; /* immediate or relative value */ + + unsigned char opcode[3]; /* opcode */ + unsigned char opcode_len; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call x86_SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; + + unsigned char mode_bits; +} x86_insn; + +typedef struct x86_jmprel { + expr *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; /* 0 = no opc for this version */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + x86_jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + unsigned char mode_bits; +} x86_jmprel; + +void x86_bc_delete(bytecode *bc); +void x86_bc_print(const bytecode *bc); +void x86_bc_parser_finalize(bytecode *bc); + +#endif diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index a5df22fb..c9cf1cae 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -22,11 +22,22 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" #include "arch.h" +#include "x86-int.h" + + +unsigned char x86_mode_bits = 0; /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", - "x86" + "x86", + { + X86_BYTECODE_TYPE_MAX, + x86_bc_delete, + x86_bc_print, + x86_bc_parser_finalize + } }; diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h new file mode 100644 index 00000000..70a207c1 --- /dev/null +++ b/modules/arch/x86/x86arch.h @@ -0,0 +1,93 @@ +/* $IdPath$ + * x86 Architecture header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_X86ARCH_H +#define YASM_X86ARCH_H + +typedef enum { + X86_BC_INSN = BYTECODE_TYPE_BASE, + X86_BC_JMPREL +} x86_bytecode_type; +#define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1 + +typedef enum { + JR_NONE, + JR_SHORT, + JR_NEAR, + JR_SHORT_FORCED, + JR_NEAR_FORCED +} x86_jmprel_opcode_sel; + +typedef struct x86_targetval { + expr *val; + + x86_jmprel_opcode_sel op_sel; +} x86_targetval; + +void x86_ea_set_segment(effaddr *ea, unsigned char segment); +effaddr *x86_ea_new_reg(unsigned long reg); +effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len); +effaddr *x86_ea_new_expr(expr *e); + +effaddr *x86_bc_insn_get_ea(bytecode *bc); + +void x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize); +void x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize); +void x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix); +void x86_bc_insn_set_shift_flag(bytecode *bc); + +void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel); + +/* Structure with *all* inputs passed to x86_bytecode_new_insn(). + * IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling the + * function (it doesn't make a copy). + */ +typedef struct x86_new_insn_data { + effaddr *ea; + immval *imm; + unsigned char opersize; + unsigned char op_len; + unsigned char op[3]; + unsigned char spare; /* bits to go in 'spare' field of ModRM */ + unsigned char im_len; + unsigned char im_sign; +} x86_new_insn_data; + +bytecode *x86_bc_new_insn(x86_new_insn_data *d); + +/* Structure with *all* inputs passed to x86_bytecode_new_jmprel(). + * Pass 0 for the opcode_len if that version of the opcode doesn't exist. + */ +typedef struct x86_new_jmprel_data { + x86_targetval *target; + unsigned char short_op_len; + unsigned char short_op[3]; + unsigned char near_op_len; + unsigned char near_op[3]; + unsigned char addrsize; +} x86_new_jmprel_data; + +bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d); + +extern unsigned char x86_mode_bits; + +#endif diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c new file mode 100644 index 00000000..80837116 --- /dev/null +++ b/modules/arch/x86/x86bc.c @@ -0,0 +1,512 @@ +/* + * x86 architecture description + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + +#include "bytecode.h" +#include "arch.h" + +#include "x86-int.h" + +#include "bc-int.h" + + +bytecode * +x86_bc_new_insn(x86_new_insn_data *d) +{ + bytecode *bc; + x86_insn *insn; + + bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn)); + insn = bc_get_data(bc); + + insn->ea = d->ea; + if (d->ea) { + x86_effaddr_data *ead = ea_get_data(d->ea); + ead->modrm &= 0xC7; /* zero spare/reg bits */ + ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ + } + + insn->imm = d->imm; + if (d->imm) { + insn->imm->f_len = d->im_len; + insn->imm->f_sign = d->im_sign; + } + + insn->opcode[0] = d->op[0]; + insn->opcode[1] = d->op[1]; + insn->opcode[2] = d->op[2]; + insn->opcode_len = d->op_len; + + insn->addrsize = 0; + insn->opersize = d->opersize; + insn->lockrep_pre = 0; + insn->shift_op = 0; + + insn->mode_bits = x86_mode_bits; + + return bc; +} + +bytecode * +x86_bc_new_jmprel(x86_new_jmprel_data *d) +{ + bytecode *bc; + x86_jmprel *jmprel; + + bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel)); + jmprel = bc_get_data(bc); + + jmprel->target = d->target->val; + jmprel->op_sel = d->target->op_sel; + + if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0)) + Error(_("no SHORT form of that jump instruction exists")); + if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0)) + Error(_("no NEAR form of that jump instruction exists")); + + jmprel->shortop.opcode[0] = d->short_op[0]; + jmprel->shortop.opcode[1] = d->short_op[1]; + jmprel->shortop.opcode[2] = d->short_op[2]; + jmprel->shortop.opcode_len = d->short_op_len; + + jmprel->nearop.opcode[0] = d->near_op[0]; + jmprel->nearop.opcode[1] = d->near_op[1]; + jmprel->nearop.opcode[2] = d->near_op[2]; + jmprel->nearop.opcode_len = d->near_op_len; + + jmprel->addrsize = d->addrsize; + jmprel->opersize = 0; + jmprel->lockrep_pre = 0; + + jmprel->mode_bits = x86_mode_bits; + + return bc; +} + +void +x86_ea_set_segment(effaddr *ea, unsigned char segment) +{ + x86_effaddr_data *ead; + + if (!ea) + return; + + ead = ea_get_data(ea); + + if (segment != 0 && ead->segment != 0) + Warning(_("multiple segment overrides, using leftmost")); + + ead->segment = segment; +} + +effaddr * +x86_ea_new_reg(unsigned long reg) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = (expr *)NULL; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ead->valid_modrm = 1; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_ea_new_expr(expr *e) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = e; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0xff; /* we won't know until we know more about expr and + the BITS/address override setting */ + + return ea; +} + +effaddr * +x86_ea_new_imm(immval *imm, unsigned char im_len) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = imm->val; + ea->len = im_len; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_bc_insn_get_ea(bytecode *bc) +{ + x86_insn *insn = bc_get_data(bc); + + if (!bc) + return NULL; + + if (bc->type != X86_BC_INSN) + InternalError(_("Trying to get EA of non-instruction")); + + return insn->ea; +} + +void +x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->opersize = opersize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->opersize = opersize; + break; + default: + InternalError(_("OperSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->addrsize = addrsize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->addrsize = addrsize; + break; + default: + InternalError(_("AddrSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix) +{ + x86_insn *insn; + x86_jmprel *jmprel; + unsigned char *lockrep_pre = (unsigned char *)NULL; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + lockrep_pre = &insn->lockrep_pre; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + lockrep_pre = &jmprel->lockrep_pre; + break; + default: + InternalError(_("LockRep prefix applied to non-instruction")); + return; + } + + if (*lockrep_pre != 0) + Warning(_("multiple LOCK or REP prefixes, using leftmost")); + + *lockrep_pre = prefix; +} + +void +x86_bc_insn_set_shift_flag(bytecode *bc) +{ + x86_insn *insn; + + if (!bc) + return; + + if (bc->type != X86_BC_INSN) + InternalError(_("Attempted to set shift flag on non-instruction")); + + insn = bc_get_data(bc); + + insn->shift_op = 1; +} + +void +x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel) +{ + if (!old_sel) + return; + + if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || + (*old_sel == JR_NEAR_FORCED))) + Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); + *old_sel = new_sel; +} + +void +x86_bc_delete(bytecode *bc) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + if (insn->ea) { + expr_delete(insn->ea->disp); + xfree(insn->ea); + } + if (insn->imm) { + expr_delete(insn->imm->val); + xfree(insn->imm); + } + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + expr_delete(jmprel->target); + break; + } +} + +void +x86_bc_print(const bytecode *bc) +{ + const x86_insn *insn; + const x86_jmprel *jmprel; + x86_effaddr_data *ead; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_const_data(bc); + printf("_Instruction_\n"); + printf("Effective Address:"); + if (!insn->ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + if (insn->ea->disp) + expr_print(insn->ea->disp); + else + printf("(nil)"); + printf("\n"); + ead = ea_get_data(insn->ea); + printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", + (unsigned int)insn->ea->len, + (unsigned int)ead->segment, + (unsigned int)insn->ea->nosplit); + printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", + (unsigned int)ead->modrm, + (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", + (unsigned int)ead->sib, + (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); + } + printf("Immediate Value:"); + if (!insn->imm) + printf(" (nil)\n"); + else { + printf("\n Val="); + expr_print(insn->imm->val); + printf("\n"); + printf(" Len=%u, IsNeg=%u\n", + (unsigned int)insn->imm->len, + (unsigned int)insn->imm->isneg); + printf(" FLen=%u, FSign=%u\n", + (unsigned int)insn->imm->f_len, + (unsigned int)insn->imm->f_sign); + } + printf("Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)insn->opcode[0], + (unsigned int)insn->opcode[1], + (unsigned int)insn->opcode[2], + (unsigned int)insn->opcode_len); + printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", + (unsigned int)insn->addrsize, + (unsigned int)insn->opersize, + (unsigned int)insn->lockrep_pre, + (unsigned int)insn->shift_op); + printf("BITS=%u\n", (unsigned int)insn->mode_bits); + break; + case X86_BC_JMPREL: + jmprel = bc_get_const_data(bc); + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(jmprel->target); + printf("\nShort Form:\n"); + if (!jmprel->shortop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->shortop.opcode[0], + (unsigned int)jmprel->shortop.opcode[1], + (unsigned int)jmprel->shortop.opcode[2], + (unsigned int)jmprel->shortop.opcode_len); + if (!jmprel->nearop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->nearop.opcode[0], + (unsigned int)jmprel->nearop.opcode[1], + (unsigned int)jmprel->nearop.opcode[2], + (unsigned int)jmprel->nearop.opcode_len); + printf("OpSel="); + switch (jmprel->op_sel) { + case JR_NONE: + printf("None"); + break; + case JR_SHORT: + printf("Short"); + break; + case JR_NEAR: + printf("Near"); + break; + case JR_SHORT_FORCED: + printf("Forced Short"); + break; + case JR_NEAR_FORCED: + printf("Forced Near"); + break; + default: + printf("UNKNOWN!!"); + break; + } + printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", + (unsigned int)jmprel->mode_bits, + (unsigned int)jmprel->addrsize, + (unsigned int)jmprel->opersize, + (unsigned int)jmprel->lockrep_pre); + break; + } +} + +static void +x86_bc_parser_finalize_insn(x86_insn *insn) +{ + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* First expand equ's */ + expr_expand_equ(ea->disp); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &ea->len, &ead->modrm, + &ead->valid_modrm, &ead->need_modrm, &ead->sib, + &ead->valid_sib, &ead->need_sib)) + return; /* failed, don't bother checking rest of insn */ + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + expr_expand_equ(imm->val); + expr_simplify(imm->val); + } + /* TODO: check imm f_len vs. len? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete ModRM, as it's no longer needed */ + xfree(ea); + insn->ea = (effaddr *)NULL; + /* Delete Imm, as it's not needed */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } + + +} + +void +x86_bc_parser_finalize(bytecode *bc) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + x86_bc_parser_finalize_insn(insn); + break; + default: + break; + } +} + diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in index 54893a76..fa318fd3 100644 --- a/modules/parsers/nasm/bison.y.in +++ b/modules/parsers/nasm/bison.y.in @@ -40,6 +40,7 @@ RCSID("$IdPath$"); #include "section.h" #include "objfmt.h" +#include "arch.h" #define YYDEBUG 1 @@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base; static bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; +/* additional data declarations (dynamically generated) */ +/* @DATADECLS@ */ + %} %union { @@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc; effaddr *ea; expr *exp; immval *im_val; - targetval tgt_val; + x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; @@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc; %% input: /* empty */ | input line { - nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section), + nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), $2); if (nasm_parser_temp_bc) nasm_parser_prev_bc = nasm_parser_temp_bc; @@ -145,10 +149,10 @@ line: '\n' { $$ = (bytecode *)NULL; } ; lineexp: exp - | TIMES expr exp { $$ = $3; SetBCMultiple($$, $2); } + | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } | label { $$ = (bytecode *)NULL; } | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; SetBCMultiple($$, $3); } + | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } | label_id EQU expr { symrec_define_equ($1, $3); xfree($1); @@ -157,22 +161,16 @@ lineexp: exp ; exp: instr - | DECLARE_DATA datavals { $$ = bytecode_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bytecode_new_reserve($2, $1); } + | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } + | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } ; -datavals: dataval { - datavals_initialize(&$$); - datavals_append(&$$, $1); - } - | datavals ',' dataval { - datavals_append(&$1, $3); - $$ = $1; - } +datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } + | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dataval_new_expr($1); } - | STRING { $$ = dataval_new_string($1); } +dataval: expr_no_string { $$ = dv_new_expr($1); } + | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); $$ = (dataval *)NULL; @@ -317,17 +315,20 @@ memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | error { Error(_("invalid effective address")); } ; -memaddr: memexpr { $$ = effaddr_new_expr($1); SetEASegment($$, 0); } - | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen($$, 1); } - | WORD memaddr { $$ = $2; SetEALen($$, 2); } - | DWORD memaddr { $$ = $2; SetEALen($$, 4); } - | NOSPLIT memaddr { $$ = $2; SetEANosplit($$, 1); } +memaddr: memexpr { + $$ = x86_ea_new_expr($1); + x86_ea_set_segment($$, 0); + } + | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } + | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } + | WORD memaddr { $$ = $2; ea_set_len($$, 2); } + | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } + | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -378,43 +379,43 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { $$ = effaddr_new_reg($1); } +rm8x: reg8 { $$ = x86_ea_new_reg($1); } | mem8x ; -rm16x: reg16 { $$ = effaddr_new_reg($1); } +rm16x: reg16 { $$ = x86_ea_new_reg($1); } | mem16x ; -rm32x: reg32 { $$ = effaddr_new_reg($1); } +rm32x: reg32 { $$ = x86_ea_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { $$ = effaddr_new_reg($1); } +rm64x: MMXREG { $$ = x86_ea_new_reg($1); } | mem64x ; -rm128x: XMMREG { $$ = effaddr_new_reg($1); } +rm128x: XMMREG { $$ = x86_ea_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { $$ = effaddr_new_reg($1); } +rm8: reg8 { $$ = x86_ea_new_reg($1); } | mem8 ; -rm16: reg16 { $$ = effaddr_new_reg($1); } +rm16: reg16 { $$ = x86_ea_new_reg($1); } | mem16 ; -rm32: reg32 { $$ = effaddr_new_reg($1); } +rm32: reg32 { $$ = x86_ea_new_reg($1); } | mem32 ; -rm64: MMXREG { $$ = effaddr_new_reg($1); } +rm64: MMXREG { $$ = x86_ea_new_reg($1); } | mem64 ; -rm128: XMMREG { $$ = effaddr_new_reg($1); } +rm128: XMMREG { $$ = x86_ea_new_reg($1); } | mem128 ; /* immediate values */ -imm: expr { $$ = immval_new_expr($1); } +imm: expr { $$ = imm_new_expr($1); } ; /* explicit immediates */ @@ -437,9 +438,18 @@ imm32: imm ; /* jump targets */ -target: expr { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); } - | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } - | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +target: expr { + $$.val = $1; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); + } + | SHORT target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + } + | NEAR target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + } ; /* expression trees */ @@ -493,18 +503,36 @@ explabel: ID { $$ = symrec_use($1); xfree($1); } ; instr: instrbase - | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } - | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); } - | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } - | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } - | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } - | REPZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); } + | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } + | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } + | REG_CS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); + } + | REG_SS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); + } + | REG_DS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); + } + | REG_ES instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); + } + | REG_FS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); + } + | REG_GS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); + } + | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } + | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } + | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } + | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } ; /* instruction grammars (dynamically generated) */ @@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val) if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32)) Error(_("`%s' is not a valid argument to [BITS]"), val); else - mode_bits = (unsigned char)lval; + x86_mode_bits = (unsigned char)lval; } else { printf("Directive: Name=`%s' Value=`%s'\n", name, val); } diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl index adaa58db..be391260 100755 --- a/modules/parsers/nasm/gen_instr.pl +++ b/modules/parsers/nasm/gen_instr.pl @@ -353,7 +353,8 @@ sub cond_action_if ( $ $ $ $ $ $ $ ) my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_); return rule_header ($rule, $tokens, $count) . <<"EOF"; if (\$$regarg == $val) { - \$\$ = $func(@$a_eax); + @$a_eax + \$\$ = $func; } EOF } @@ -362,7 +363,8 @@ sub cond_action_elsif ( $ $ $ $ ) my ($regarg, $val, $func, $a_eax) = splice (@_); return <<"EOF"; else if (\$$regarg == $val) { - \$\$ = $func(@$a_eax); + @$a_eax + \$\$ = $func; } EOF } @@ -371,7 +373,8 @@ sub cond_action_else ( $ $ ) my ($func, $a_args) = splice (@_); return <<"EOF" . rule_footer; else { - \$\$ = $func (@$a_args); + @$a_args + \$\$ = $func; } EOF } @@ -388,7 +391,8 @@ sub action ( @ $ ) { my ($rule, $tokens, $func, $a_args, $count) = splice @_; return rule_header ($rule, $tokens, $count) - . " \$\$ = $func (@$a_args);\n" + . " @$a_args\n" + . " \$\$ = $func;\n" . rule_footer; } @@ -396,8 +400,9 @@ sub action_setshiftflag ( @ $ ) { my ($rule, $tokens, $func, $a_args, $count) = splice @_; return rule_header ($rule, $tokens, $count) - . " \$\$ = $func (@$a_args);\n" - . " SetInsnShiftFlag(\$\$);\n" + . " @$a_args\n" + . " \$\$ = $func;\n" + . " x86_bc_insn_set_shift_flag(\$\$);\n" . rule_footer; } @@ -421,7 +426,12 @@ sub output_yacc ($@) while () { - if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) + if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/}) + { + print GRAMMAR "static x86_new_insn_data idata;\n"; + print GRAMMAR "static x86_new_jmprel_data jrdata;\n"; + } + elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) { my $len = length("%token "); print GRAMMAR "%token "; @@ -500,69 +510,82 @@ sub output_yacc ($@) if $inst->[OPERANDS] ne 'nil'; $tokens =~ s/,/ ',' /g; $tokens =~ s/:/ ':' /g; - my $func = "bytecode_new_jmprel"; + my $datastruct = "x86_new_jmprel_data"; + my $datastructname = "jrdata"; + my $func = "x86_bc_new_jmprel(&$datastructname)"; # Create the argument list for bytecode_new my @args; # Target argument: HACK: Always assumed to be arg 1. - push @args, '&$2,'; + push @args, 'target=&$2;'; # test for short opcode "nil" if($inst->[SHORTOPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0,'; + push @args, 'short_op_len=0;'; + push @args, 'short_op[0]=0;'; + push @args, 'short_op[1]=0;'; + push @args, 'short_op[2]=0;'; } else { - # number of bytes of short opcode - push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; - # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[SHORTOPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[SHORTOPCODE]; + # number of bytes of short opcode + push @args, "short_op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; + push @args, "short_op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o; + push @args, "short_op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o; + push @args, "short_op[2]=0;" if @opcodes < 3; } # test for near opcode "nil" if($inst->[NEAROPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0,'; + push @args, 'near_op_len=0;'; + push @args, 'near_op[0]=0;'; + push @args, 'near_op[1]=0;'; + push @args, 'near_op[2]=0;'; } else { - # number of bytes of near opcode - push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; - # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[NEAROPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[NEAROPCODE]; + # number of bytes of near opcode + push @args, "near_op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; + push @args, "near_op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o; + push @args, "near_op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o; + push @args, "near_op[2]=0;" if @opcodes < 3; } # address size - push @args, "$inst->[ADSIZE]"; + push @args, "addrsize=$inst->[ADSIZE];"; $args[-1] =~ s/nil/0/; # now that we've constructed the arglist, subst $0.\d s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + # and add the data structure reference + s/^/$datastructname./g foreach (@args); + # generate the grammar print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); } @@ -583,65 +606,79 @@ sub output_yacc ($@) $tokens =~ s/:/ ':' /g; # offset args my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; - my $func = "bytecode_new_insn"; + my $datastruct = "x86_new_insn_data"; + my $datastructname = "idata"; + my $func = "x86_bc_new_insn(&$datastructname)"; # Create the argument list for bytecode_new my @args; # operand size - push @args, "$inst->[OPSIZE],"; + push @args, "opersize=$inst->[OPSIZE];"; $args[-1] =~ s/nil/0/; - # number of bytes of opcodes - push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[OPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[OPCODE]; + # number of bytes of opcodes + push @args, "op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg; + push @args, "op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,/o; + push @args, "op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; + push @args, "op[2]=0;" if @opcodes < 3; # effective addresses - push @args, $inst->[EFFADDR]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/; - $args[-1] =~ s/nil/0/; + my $effaddr = $inst->[EFFADDR]; + $effaddr =~ s/^nil/(effaddr *)NULL,0/; + $effaddr =~ s/nil/0/; # don't let a $0.\d match slip into the following rules. - $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/; - $args[-1] =~ s[(\$\d+)i,\s*(\d+)] - ["effaddr_new_imm($1, ".($2/8)."), 0"]e; - $args[-1] .= ','; + $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/; + $effaddr =~ s[(\$\d+)i,\s*(\d+)] + ["x86_ea_new_imm($1^ ".($2/8)."),0"]e; - die $args[-1] if $args[-1] =~ m/\d+[ri]/; + die $effaddr if $effaddr =~ m/\d+[ri]/; + + my @effaddr_split = split ',', $effaddr; + $effaddr_split[0] =~ s/\^/,/; + push @args, "ea=$effaddr_split[0];"; + push @args, "spare=$effaddr_split[1];"; # immediate sources - push @args, $inst->[IMM]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(immval *)NULL, 0/; + my $imm = $inst->[IMM]; + $imm =~ s/nil/(immval *)NULL,0/; # don't match $0.\d in the following rules. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s[^([0-9A-Fa-f]+),] - [immval_new_int(0x$1),]; - $args[-1] =~ s[^\$0.(\d+),] - [immval_new_int(\$1\[$1\]),]; + $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $imm =~ s[^([0-9A-Fa-f]+),] + [imm_new_int(0x$1),]; + $imm =~ s[^\$0.(\d+),] + [imm_new_int(\$1\[$1\]),]; # divide the second, and only the second, by 8 bits/byte - $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $imm .= ($3||'') eq 's' ? ',1' : ',0'; + + die $imm if $imm =~ m/\d+s/; - die $args[-1] if $args[-1] =~ m/\d+s/; + my @imm_split = split ",", $imm; + push @args, "imm=$imm_split[0];"; + push @args, "im_len=$imm_split[1];"; + push @args, "im_sign=$imm_split[2];"; # now that we've constructed the arglist, subst $0.\d s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + + # and add the data structure reference + s/^/$datastructname./g foreach (@args); # see if we match one of the cases to defer if (($inst->[OPERANDS]||"") =~ m/,ONE/) @@ -691,8 +728,9 @@ sub output_yacc ($@) # Now output imm version, with second opcode byte # set to ,1 opcode. Also call SetInsnShiftFlag(). $tokens =~ s/imm8x/imm/; - die "no space for ONE?" if $args[3] !~ m/0,/; - $args[3] = $ONE->[3]->[2]; + die "no space for ONE?" if $args[3] !~ m/0;/; + my $oneval = $ONE->[3]->[2]; + $args[3] =~ s/0/$oneval/; print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); } elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y index 54893a76..fa318fd3 100644 --- a/modules/parsers/nasm/nasm-bison.y +++ b/modules/parsers/nasm/nasm-bison.y @@ -40,6 +40,7 @@ RCSID("$IdPath$"); #include "section.h" #include "objfmt.h" +#include "arch.h" #define YYDEBUG 1 @@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base; static bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; +/* additional data declarations (dynamically generated) */ +/* @DATADECLS@ */ + %} %union { @@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc; effaddr *ea; expr *exp; immval *im_val; - targetval tgt_val; + x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; @@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc; %% input: /* empty */ | input line { - nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section), + nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), $2); if (nasm_parser_temp_bc) nasm_parser_prev_bc = nasm_parser_temp_bc; @@ -145,10 +149,10 @@ line: '\n' { $$ = (bytecode *)NULL; } ; lineexp: exp - | TIMES expr exp { $$ = $3; SetBCMultiple($$, $2); } + | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } | label { $$ = (bytecode *)NULL; } | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; SetBCMultiple($$, $3); } + | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } | label_id EQU expr { symrec_define_equ($1, $3); xfree($1); @@ -157,22 +161,16 @@ lineexp: exp ; exp: instr - | DECLARE_DATA datavals { $$ = bytecode_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bytecode_new_reserve($2, $1); } + | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } + | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } ; -datavals: dataval { - datavals_initialize(&$$); - datavals_append(&$$, $1); - } - | datavals ',' dataval { - datavals_append(&$1, $3); - $$ = $1; - } +datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } + | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dataval_new_expr($1); } - | STRING { $$ = dataval_new_string($1); } +dataval: expr_no_string { $$ = dv_new_expr($1); } + | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); $$ = (dataval *)NULL; @@ -317,17 +315,20 @@ memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | error { Error(_("invalid effective address")); } ; -memaddr: memexpr { $$ = effaddr_new_expr($1); SetEASegment($$, 0); } - | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen($$, 1); } - | WORD memaddr { $$ = $2; SetEALen($$, 2); } - | DWORD memaddr { $$ = $2; SetEALen($$, 4); } - | NOSPLIT memaddr { $$ = $2; SetEANosplit($$, 1); } +memaddr: memexpr { + $$ = x86_ea_new_expr($1); + x86_ea_set_segment($$, 0); + } + | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } + | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } + | WORD memaddr { $$ = $2; ea_set_len($$, 2); } + | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } + | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -378,43 +379,43 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { $$ = effaddr_new_reg($1); } +rm8x: reg8 { $$ = x86_ea_new_reg($1); } | mem8x ; -rm16x: reg16 { $$ = effaddr_new_reg($1); } +rm16x: reg16 { $$ = x86_ea_new_reg($1); } | mem16x ; -rm32x: reg32 { $$ = effaddr_new_reg($1); } +rm32x: reg32 { $$ = x86_ea_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { $$ = effaddr_new_reg($1); } +rm64x: MMXREG { $$ = x86_ea_new_reg($1); } | mem64x ; -rm128x: XMMREG { $$ = effaddr_new_reg($1); } +rm128x: XMMREG { $$ = x86_ea_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { $$ = effaddr_new_reg($1); } +rm8: reg8 { $$ = x86_ea_new_reg($1); } | mem8 ; -rm16: reg16 { $$ = effaddr_new_reg($1); } +rm16: reg16 { $$ = x86_ea_new_reg($1); } | mem16 ; -rm32: reg32 { $$ = effaddr_new_reg($1); } +rm32: reg32 { $$ = x86_ea_new_reg($1); } | mem32 ; -rm64: MMXREG { $$ = effaddr_new_reg($1); } +rm64: MMXREG { $$ = x86_ea_new_reg($1); } | mem64 ; -rm128: XMMREG { $$ = effaddr_new_reg($1); } +rm128: XMMREG { $$ = x86_ea_new_reg($1); } | mem128 ; /* immediate values */ -imm: expr { $$ = immval_new_expr($1); } +imm: expr { $$ = imm_new_expr($1); } ; /* explicit immediates */ @@ -437,9 +438,18 @@ imm32: imm ; /* jump targets */ -target: expr { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); } - | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } - | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +target: expr { + $$.val = $1; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); + } + | SHORT target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + } + | NEAR target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + } ; /* expression trees */ @@ -493,18 +503,36 @@ explabel: ID { $$ = symrec_use($1); xfree($1); } ; instr: instrbase - | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } - | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); } - | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } - | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } - | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } - | REPZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); } + | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } + | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } + | REG_CS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); + } + | REG_SS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); + } + | REG_DS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); + } + | REG_ES instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); + } + | REG_FS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); + } + | REG_GS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); + } + | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } + | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } + | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } + | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } ; /* instruction grammars (dynamically generated) */ @@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val) if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32)) Error(_("`%s' is not a valid argument to [BITS]"), val); else - mode_bits = (unsigned char)lval; + x86_mode_bits = (unsigned char)lval; } else { printf("Directive: Name=`%s' Value=`%s'\n", name, val); } diff --git a/modules/parsers/nasm/token.l.in b/modules/parsers/nasm/token.l.in index 8c1d6834..54002180 100644 --- a/modules/parsers/nasm/token.l.in +++ b/modules/parsers/nasm/token.l.in @@ -33,6 +33,8 @@ RCSID("$IdPath$"); #include "bytecode.h" +#include "arch.h" + #include "bison.h" diff --git a/src/Makefile.am b/src/Makefile.am index a1a4f899..0fbd211b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,6 +25,7 @@ noinst_LIBRARIES = libyasm.a libyasm_a_SOURCES = \ bytecode.c \ bytecode.h \ + bc-int.h \ expr.c \ expr.h \ symrec.c \ @@ -37,6 +38,7 @@ libyasm_a_SOURCES = \ file.h \ section.c \ section.h \ + arch.c \ arch.h \ objfmt.h \ options.h \ diff --git a/src/arch.c b/src/arch.c new file mode 100644 index 00000000..5cbe3591 --- /dev/null +++ b/src/arch.c @@ -0,0 +1,29 @@ +/* $IdPath$ + * Architecture interface + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" + +#include "bytecode.h" + +#include "arch.h" + +arch *cur_arch; + diff --git a/src/arch.h b/src/arch.h index f1ed7268..b614aca4 100644 --- a/src/arch.h +++ b/src/arch.h @@ -28,9 +28,24 @@ struct arch { /* keyword used to select architecture */ const char *keyword; + + struct { + /* Maximum used bytecode type value+1. Should be set to + * BYTECODE_TYPE_BASE if no additional bytecode types are defined by + * the architecture. + */ + const int type_max; + + void (*bc_delete) (bytecode *bc); + void (*bc_print) (const bytecode *bc); + void (*bc_parser_finalize) (bytecode *bc); + } bc; }; /* Available architectures */ +#include "arch/x86/x86arch.h" extern arch x86_arch; +extern arch *cur_arch; + #endif diff --git a/src/arch/x86/Makefile.am b/src/arch/x86/Makefile.am index 5847318c..5c5598d0 100644 --- a/src/arch/x86/Makefile.am +++ b/src/arch/x86/Makefile.am @@ -3,7 +3,10 @@ noinst_LIBRARIES = libarch.a libarch_a_SOURCES = \ - arch.c + x86arch.h \ + x86-int.h \ + arch.c \ + bytecode.c INCLUDES = \ -I$(top_srcdir)/src \ diff --git a/src/arch/x86/arch.c b/src/arch/x86/arch.c index a5df22fb..c9cf1cae 100644 --- a/src/arch/x86/arch.c +++ b/src/arch/x86/arch.c @@ -22,11 +22,22 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" #include "arch.h" +#include "x86-int.h" + + +unsigned char x86_mode_bits = 0; /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", - "x86" + "x86", + { + X86_BYTECODE_TYPE_MAX, + x86_bc_delete, + x86_bc_print, + x86_bc_parser_finalize + } }; diff --git a/src/arch/x86/bytecode.c b/src/arch/x86/bytecode.c new file mode 100644 index 00000000..80837116 --- /dev/null +++ b/src/arch/x86/bytecode.c @@ -0,0 +1,512 @@ +/* + * x86 architecture description + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + +#include "bytecode.h" +#include "arch.h" + +#include "x86-int.h" + +#include "bc-int.h" + + +bytecode * +x86_bc_new_insn(x86_new_insn_data *d) +{ + bytecode *bc; + x86_insn *insn; + + bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn)); + insn = bc_get_data(bc); + + insn->ea = d->ea; + if (d->ea) { + x86_effaddr_data *ead = ea_get_data(d->ea); + ead->modrm &= 0xC7; /* zero spare/reg bits */ + ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ + } + + insn->imm = d->imm; + if (d->imm) { + insn->imm->f_len = d->im_len; + insn->imm->f_sign = d->im_sign; + } + + insn->opcode[0] = d->op[0]; + insn->opcode[1] = d->op[1]; + insn->opcode[2] = d->op[2]; + insn->opcode_len = d->op_len; + + insn->addrsize = 0; + insn->opersize = d->opersize; + insn->lockrep_pre = 0; + insn->shift_op = 0; + + insn->mode_bits = x86_mode_bits; + + return bc; +} + +bytecode * +x86_bc_new_jmprel(x86_new_jmprel_data *d) +{ + bytecode *bc; + x86_jmprel *jmprel; + + bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel)); + jmprel = bc_get_data(bc); + + jmprel->target = d->target->val; + jmprel->op_sel = d->target->op_sel; + + if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0)) + Error(_("no SHORT form of that jump instruction exists")); + if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0)) + Error(_("no NEAR form of that jump instruction exists")); + + jmprel->shortop.opcode[0] = d->short_op[0]; + jmprel->shortop.opcode[1] = d->short_op[1]; + jmprel->shortop.opcode[2] = d->short_op[2]; + jmprel->shortop.opcode_len = d->short_op_len; + + jmprel->nearop.opcode[0] = d->near_op[0]; + jmprel->nearop.opcode[1] = d->near_op[1]; + jmprel->nearop.opcode[2] = d->near_op[2]; + jmprel->nearop.opcode_len = d->near_op_len; + + jmprel->addrsize = d->addrsize; + jmprel->opersize = 0; + jmprel->lockrep_pre = 0; + + jmprel->mode_bits = x86_mode_bits; + + return bc; +} + +void +x86_ea_set_segment(effaddr *ea, unsigned char segment) +{ + x86_effaddr_data *ead; + + if (!ea) + return; + + ead = ea_get_data(ea); + + if (segment != 0 && ead->segment != 0) + Warning(_("multiple segment overrides, using leftmost")); + + ead->segment = segment; +} + +effaddr * +x86_ea_new_reg(unsigned long reg) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = (expr *)NULL; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ead->valid_modrm = 1; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_ea_new_expr(expr *e) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = e; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0xff; /* we won't know until we know more about expr and + the BITS/address override setting */ + + return ea; +} + +effaddr * +x86_ea_new_imm(immval *imm, unsigned char im_len) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = imm->val; + ea->len = im_len; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_bc_insn_get_ea(bytecode *bc) +{ + x86_insn *insn = bc_get_data(bc); + + if (!bc) + return NULL; + + if (bc->type != X86_BC_INSN) + InternalError(_("Trying to get EA of non-instruction")); + + return insn->ea; +} + +void +x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->opersize = opersize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->opersize = opersize; + break; + default: + InternalError(_("OperSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->addrsize = addrsize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->addrsize = addrsize; + break; + default: + InternalError(_("AddrSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix) +{ + x86_insn *insn; + x86_jmprel *jmprel; + unsigned char *lockrep_pre = (unsigned char *)NULL; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + lockrep_pre = &insn->lockrep_pre; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + lockrep_pre = &jmprel->lockrep_pre; + break; + default: + InternalError(_("LockRep prefix applied to non-instruction")); + return; + } + + if (*lockrep_pre != 0) + Warning(_("multiple LOCK or REP prefixes, using leftmost")); + + *lockrep_pre = prefix; +} + +void +x86_bc_insn_set_shift_flag(bytecode *bc) +{ + x86_insn *insn; + + if (!bc) + return; + + if (bc->type != X86_BC_INSN) + InternalError(_("Attempted to set shift flag on non-instruction")); + + insn = bc_get_data(bc); + + insn->shift_op = 1; +} + +void +x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel) +{ + if (!old_sel) + return; + + if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || + (*old_sel == JR_NEAR_FORCED))) + Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); + *old_sel = new_sel; +} + +void +x86_bc_delete(bytecode *bc) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + if (insn->ea) { + expr_delete(insn->ea->disp); + xfree(insn->ea); + } + if (insn->imm) { + expr_delete(insn->imm->val); + xfree(insn->imm); + } + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + expr_delete(jmprel->target); + break; + } +} + +void +x86_bc_print(const bytecode *bc) +{ + const x86_insn *insn; + const x86_jmprel *jmprel; + x86_effaddr_data *ead; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_const_data(bc); + printf("_Instruction_\n"); + printf("Effective Address:"); + if (!insn->ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + if (insn->ea->disp) + expr_print(insn->ea->disp); + else + printf("(nil)"); + printf("\n"); + ead = ea_get_data(insn->ea); + printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", + (unsigned int)insn->ea->len, + (unsigned int)ead->segment, + (unsigned int)insn->ea->nosplit); + printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", + (unsigned int)ead->modrm, + (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", + (unsigned int)ead->sib, + (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); + } + printf("Immediate Value:"); + if (!insn->imm) + printf(" (nil)\n"); + else { + printf("\n Val="); + expr_print(insn->imm->val); + printf("\n"); + printf(" Len=%u, IsNeg=%u\n", + (unsigned int)insn->imm->len, + (unsigned int)insn->imm->isneg); + printf(" FLen=%u, FSign=%u\n", + (unsigned int)insn->imm->f_len, + (unsigned int)insn->imm->f_sign); + } + printf("Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)insn->opcode[0], + (unsigned int)insn->opcode[1], + (unsigned int)insn->opcode[2], + (unsigned int)insn->opcode_len); + printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", + (unsigned int)insn->addrsize, + (unsigned int)insn->opersize, + (unsigned int)insn->lockrep_pre, + (unsigned int)insn->shift_op); + printf("BITS=%u\n", (unsigned int)insn->mode_bits); + break; + case X86_BC_JMPREL: + jmprel = bc_get_const_data(bc); + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(jmprel->target); + printf("\nShort Form:\n"); + if (!jmprel->shortop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->shortop.opcode[0], + (unsigned int)jmprel->shortop.opcode[1], + (unsigned int)jmprel->shortop.opcode[2], + (unsigned int)jmprel->shortop.opcode_len); + if (!jmprel->nearop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->nearop.opcode[0], + (unsigned int)jmprel->nearop.opcode[1], + (unsigned int)jmprel->nearop.opcode[2], + (unsigned int)jmprel->nearop.opcode_len); + printf("OpSel="); + switch (jmprel->op_sel) { + case JR_NONE: + printf("None"); + break; + case JR_SHORT: + printf("Short"); + break; + case JR_NEAR: + printf("Near"); + break; + case JR_SHORT_FORCED: + printf("Forced Short"); + break; + case JR_NEAR_FORCED: + printf("Forced Near"); + break; + default: + printf("UNKNOWN!!"); + break; + } + printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", + (unsigned int)jmprel->mode_bits, + (unsigned int)jmprel->addrsize, + (unsigned int)jmprel->opersize, + (unsigned int)jmprel->lockrep_pre); + break; + } +} + +static void +x86_bc_parser_finalize_insn(x86_insn *insn) +{ + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* First expand equ's */ + expr_expand_equ(ea->disp); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &ea->len, &ead->modrm, + &ead->valid_modrm, &ead->need_modrm, &ead->sib, + &ead->valid_sib, &ead->need_sib)) + return; /* failed, don't bother checking rest of insn */ + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + expr_expand_equ(imm->val); + expr_simplify(imm->val); + } + /* TODO: check imm f_len vs. len? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete ModRM, as it's no longer needed */ + xfree(ea); + insn->ea = (effaddr *)NULL; + /* Delete Imm, as it's not needed */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } + + +} + +void +x86_bc_parser_finalize(bytecode *bc) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + x86_bc_parser_finalize_insn(insn); + break; + default: + break; + } +} + diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h new file mode 100644 index 00000000..2c3336ee --- /dev/null +++ b/src/arch/x86/x86-int.h @@ -0,0 +1,95 @@ +/* $IdPath$ + * x86 internals header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_X86_INT_H +#define YASM_X86_INT_H + +typedef struct x86_effaddr_data { + unsigned char segment; /* segment override, 0 if none */ + + /* How the spare (register) bits in Mod/RM are handled: + * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) + * They're set in bytecode_new_insn(). + */ + unsigned char modrm; + unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ + unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ + + unsigned char sib; + unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ + unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, + 0xff if unknown */ +} x86_effaddr_data; + +typedef struct x86_insn { + effaddr *ea; /* effective address */ + + immval *imm; /* immediate or relative value */ + + unsigned char opcode[3]; /* opcode */ + unsigned char opcode_len; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call x86_SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; + + unsigned char mode_bits; +} x86_insn; + +typedef struct x86_jmprel { + expr *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; /* 0 = no opc for this version */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + x86_jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + unsigned char mode_bits; +} x86_jmprel; + +void x86_bc_delete(bytecode *bc); +void x86_bc_print(const bytecode *bc); +void x86_bc_parser_finalize(bytecode *bc); + +#endif diff --git a/src/arch/x86/x86arch.c b/src/arch/x86/x86arch.c index a5df22fb..c9cf1cae 100644 --- a/src/arch/x86/x86arch.c +++ b/src/arch/x86/x86arch.c @@ -22,11 +22,22 @@ #include "util.h" RCSID("$IdPath$"); +#include "bytecode.h" #include "arch.h" +#include "x86-int.h" + + +unsigned char x86_mode_bits = 0; /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", - "x86" + "x86", + { + X86_BYTECODE_TYPE_MAX, + x86_bc_delete, + x86_bc_print, + x86_bc_parser_finalize + } }; diff --git a/src/arch/x86/x86arch.h b/src/arch/x86/x86arch.h new file mode 100644 index 00000000..70a207c1 --- /dev/null +++ b/src/arch/x86/x86arch.h @@ -0,0 +1,93 @@ +/* $IdPath$ + * x86 Architecture header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_X86ARCH_H +#define YASM_X86ARCH_H + +typedef enum { + X86_BC_INSN = BYTECODE_TYPE_BASE, + X86_BC_JMPREL +} x86_bytecode_type; +#define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1 + +typedef enum { + JR_NONE, + JR_SHORT, + JR_NEAR, + JR_SHORT_FORCED, + JR_NEAR_FORCED +} x86_jmprel_opcode_sel; + +typedef struct x86_targetval { + expr *val; + + x86_jmprel_opcode_sel op_sel; +} x86_targetval; + +void x86_ea_set_segment(effaddr *ea, unsigned char segment); +effaddr *x86_ea_new_reg(unsigned long reg); +effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len); +effaddr *x86_ea_new_expr(expr *e); + +effaddr *x86_bc_insn_get_ea(bytecode *bc); + +void x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize); +void x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize); +void x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix); +void x86_bc_insn_set_shift_flag(bytecode *bc); + +void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel); + +/* Structure with *all* inputs passed to x86_bytecode_new_insn(). + * IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling the + * function (it doesn't make a copy). + */ +typedef struct x86_new_insn_data { + effaddr *ea; + immval *imm; + unsigned char opersize; + unsigned char op_len; + unsigned char op[3]; + unsigned char spare; /* bits to go in 'spare' field of ModRM */ + unsigned char im_len; + unsigned char im_sign; +} x86_new_insn_data; + +bytecode *x86_bc_new_insn(x86_new_insn_data *d); + +/* Structure with *all* inputs passed to x86_bytecode_new_jmprel(). + * Pass 0 for the opcode_len if that version of the opcode doesn't exist. + */ +typedef struct x86_new_jmprel_data { + x86_targetval *target; + unsigned char short_op_len; + unsigned char short_op[3]; + unsigned char near_op_len; + unsigned char near_op[3]; + unsigned char addrsize; +} x86_new_jmprel_data; + +bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d); + +extern unsigned char x86_mode_bits; + +#endif diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c new file mode 100644 index 00000000..80837116 --- /dev/null +++ b/src/arch/x86/x86bc.c @@ -0,0 +1,512 @@ +/* + * x86 architecture description + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + +#include "bytecode.h" +#include "arch.h" + +#include "x86-int.h" + +#include "bc-int.h" + + +bytecode * +x86_bc_new_insn(x86_new_insn_data *d) +{ + bytecode *bc; + x86_insn *insn; + + bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn)); + insn = bc_get_data(bc); + + insn->ea = d->ea; + if (d->ea) { + x86_effaddr_data *ead = ea_get_data(d->ea); + ead->modrm &= 0xC7; /* zero spare/reg bits */ + ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ + } + + insn->imm = d->imm; + if (d->imm) { + insn->imm->f_len = d->im_len; + insn->imm->f_sign = d->im_sign; + } + + insn->opcode[0] = d->op[0]; + insn->opcode[1] = d->op[1]; + insn->opcode[2] = d->op[2]; + insn->opcode_len = d->op_len; + + insn->addrsize = 0; + insn->opersize = d->opersize; + insn->lockrep_pre = 0; + insn->shift_op = 0; + + insn->mode_bits = x86_mode_bits; + + return bc; +} + +bytecode * +x86_bc_new_jmprel(x86_new_jmprel_data *d) +{ + bytecode *bc; + x86_jmprel *jmprel; + + bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel)); + jmprel = bc_get_data(bc); + + jmprel->target = d->target->val; + jmprel->op_sel = d->target->op_sel; + + if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0)) + Error(_("no SHORT form of that jump instruction exists")); + if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0)) + Error(_("no NEAR form of that jump instruction exists")); + + jmprel->shortop.opcode[0] = d->short_op[0]; + jmprel->shortop.opcode[1] = d->short_op[1]; + jmprel->shortop.opcode[2] = d->short_op[2]; + jmprel->shortop.opcode_len = d->short_op_len; + + jmprel->nearop.opcode[0] = d->near_op[0]; + jmprel->nearop.opcode[1] = d->near_op[1]; + jmprel->nearop.opcode[2] = d->near_op[2]; + jmprel->nearop.opcode_len = d->near_op_len; + + jmprel->addrsize = d->addrsize; + jmprel->opersize = 0; + jmprel->lockrep_pre = 0; + + jmprel->mode_bits = x86_mode_bits; + + return bc; +} + +void +x86_ea_set_segment(effaddr *ea, unsigned char segment) +{ + x86_effaddr_data *ead; + + if (!ea) + return; + + ead = ea_get_data(ea); + + if (segment != 0 && ead->segment != 0) + Warning(_("multiple segment overrides, using leftmost")); + + ead->segment = segment; +} + +effaddr * +x86_ea_new_reg(unsigned long reg) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = (expr *)NULL; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ead->valid_modrm = 1; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_ea_new_expr(expr *e) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = e; + ea->len = 0; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 1; + ead->valid_sib = 0; + ead->need_sib = 0xff; /* we won't know until we know more about expr and + the BITS/address override setting */ + + return ea; +} + +effaddr * +x86_ea_new_imm(immval *imm, unsigned char im_len) +{ + effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); + x86_effaddr_data *ead = ea_get_data(ea); + + ea->disp = imm->val; + ea->len = im_len; + ea->nosplit = 0; + ead->segment = 0; + ead->modrm = 0; + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + + return ea; +} + +effaddr * +x86_bc_insn_get_ea(bytecode *bc) +{ + x86_insn *insn = bc_get_data(bc); + + if (!bc) + return NULL; + + if (bc->type != X86_BC_INSN) + InternalError(_("Trying to get EA of non-instruction")); + + return insn->ea; +} + +void +x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->opersize = opersize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->opersize = opersize; + break; + default: + InternalError(_("OperSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + insn->addrsize = addrsize; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + jmprel->addrsize = addrsize; + break; + default: + InternalError(_("AddrSize override applied to non-instruction")); + return; + } +} + +void +x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix) +{ + x86_insn *insn; + x86_jmprel *jmprel; + unsigned char *lockrep_pre = (unsigned char *)NULL; + + if (!bc) + return; + + switch (bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + lockrep_pre = &insn->lockrep_pre; + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + lockrep_pre = &jmprel->lockrep_pre; + break; + default: + InternalError(_("LockRep prefix applied to non-instruction")); + return; + } + + if (*lockrep_pre != 0) + Warning(_("multiple LOCK or REP prefixes, using leftmost")); + + *lockrep_pre = prefix; +} + +void +x86_bc_insn_set_shift_flag(bytecode *bc) +{ + x86_insn *insn; + + if (!bc) + return; + + if (bc->type != X86_BC_INSN) + InternalError(_("Attempted to set shift flag on non-instruction")); + + insn = bc_get_data(bc); + + insn->shift_op = 1; +} + +void +x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, + x86_jmprel_opcode_sel new_sel) +{ + if (!old_sel) + return; + + if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || + (*old_sel == JR_NEAR_FORCED))) + Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); + *old_sel = new_sel; +} + +void +x86_bc_delete(bytecode *bc) +{ + x86_insn *insn; + x86_jmprel *jmprel; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + if (insn->ea) { + expr_delete(insn->ea->disp); + xfree(insn->ea); + } + if (insn->imm) { + expr_delete(insn->imm->val); + xfree(insn->imm); + } + break; + case X86_BC_JMPREL: + jmprel = bc_get_data(bc); + expr_delete(jmprel->target); + break; + } +} + +void +x86_bc_print(const bytecode *bc) +{ + const x86_insn *insn; + const x86_jmprel *jmprel; + x86_effaddr_data *ead; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_const_data(bc); + printf("_Instruction_\n"); + printf("Effective Address:"); + if (!insn->ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + if (insn->ea->disp) + expr_print(insn->ea->disp); + else + printf("(nil)"); + printf("\n"); + ead = ea_get_data(insn->ea); + printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", + (unsigned int)insn->ea->len, + (unsigned int)ead->segment, + (unsigned int)insn->ea->nosplit); + printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", + (unsigned int)ead->modrm, + (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", + (unsigned int)ead->sib, + (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); + } + printf("Immediate Value:"); + if (!insn->imm) + printf(" (nil)\n"); + else { + printf("\n Val="); + expr_print(insn->imm->val); + printf("\n"); + printf(" Len=%u, IsNeg=%u\n", + (unsigned int)insn->imm->len, + (unsigned int)insn->imm->isneg); + printf(" FLen=%u, FSign=%u\n", + (unsigned int)insn->imm->f_len, + (unsigned int)insn->imm->f_sign); + } + printf("Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)insn->opcode[0], + (unsigned int)insn->opcode[1], + (unsigned int)insn->opcode[2], + (unsigned int)insn->opcode_len); + printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", + (unsigned int)insn->addrsize, + (unsigned int)insn->opersize, + (unsigned int)insn->lockrep_pre, + (unsigned int)insn->shift_op); + printf("BITS=%u\n", (unsigned int)insn->mode_bits); + break; + case X86_BC_JMPREL: + jmprel = bc_get_const_data(bc); + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(jmprel->target); + printf("\nShort Form:\n"); + if (!jmprel->shortop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->shortop.opcode[0], + (unsigned int)jmprel->shortop.opcode[1], + (unsigned int)jmprel->shortop.opcode[2], + (unsigned int)jmprel->shortop.opcode_len); + if (!jmprel->nearop.opcode_len == 0) + printf(" None\n"); + else + printf(" Opcode: %02x %02x %02x OpLen=%u\n", + (unsigned int)jmprel->nearop.opcode[0], + (unsigned int)jmprel->nearop.opcode[1], + (unsigned int)jmprel->nearop.opcode[2], + (unsigned int)jmprel->nearop.opcode_len); + printf("OpSel="); + switch (jmprel->op_sel) { + case JR_NONE: + printf("None"); + break; + case JR_SHORT: + printf("Short"); + break; + case JR_NEAR: + printf("Near"); + break; + case JR_SHORT_FORCED: + printf("Forced Short"); + break; + case JR_NEAR_FORCED: + printf("Forced Near"); + break; + default: + printf("UNKNOWN!!"); + break; + } + printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", + (unsigned int)jmprel->mode_bits, + (unsigned int)jmprel->addrsize, + (unsigned int)jmprel->opersize, + (unsigned int)jmprel->lockrep_pre); + break; + } +} + +static void +x86_bc_parser_finalize_insn(x86_insn *insn) +{ + effaddr *ea = insn->ea; + x86_effaddr_data *ead = ea_get_data(ea); + immval *imm = insn->imm; + + if (ea) { + if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || + (!ead->valid_modrm && ead->need_modrm))) { + /* First expand equ's */ + expr_expand_equ(ea->disp); + + /* Check validity of effective address and calc R/M bits of + * Mod/RM byte and SIB byte. We won't know the Mod field + * of the Mod/RM byte until we know more about the + * displacement. + */ + if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits, + ea->nosplit, &ea->len, &ead->modrm, + &ead->valid_modrm, &ead->need_modrm, &ead->sib, + &ead->valid_sib, &ead->need_sib)) + return; /* failed, don't bother checking rest of insn */ + } + } + + if (imm) { + const intnum *num; + + if (imm->val) { + expr_expand_equ(imm->val); + expr_simplify(imm->val); + } + /* TODO: check imm f_len vs. len? */ + + /* Handle shift_op special-casing */ + if (insn->shift_op && (num = expr_get_intnum(&imm->val))) { + if (intnum_get_uint(num) == 1) { + /* Use ,1 form: first copy ,1 opcode. */ + insn->opcode[0] = insn->opcode[1]; + /* Delete ModRM, as it's no longer needed */ + xfree(ea); + insn->ea = (effaddr *)NULL; + /* Delete Imm, as it's not needed */ + expr_delete(imm->val); + xfree(imm); + insn->imm = (immval *)NULL; + } + insn->shift_op = 0; + } + } + + +} + +void +x86_bc_parser_finalize(bytecode *bc) +{ + x86_insn *insn; + + switch ((x86_bytecode_type)bc->type) { + case X86_BC_INSN: + insn = bc_get_data(bc); + x86_bc_parser_finalize_insn(insn); + break; + default: + break; + } +} + diff --git a/src/bc-int.h b/src/bc-int.h new file mode 100644 index 00000000..f992c465 --- /dev/null +++ b/src/bc-int.h @@ -0,0 +1,71 @@ +/* $IdPath$ + * Bytecode internal structures header file + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef YASM_BC_INT_H +#define YASM_BC_INT_H + +struct effaddr { + expr *disp; /* address displacement */ + unsigned char len; /* length of disp (in bytes), 0 if unknown, + * 0xff if unknown and required to be >0. + */ + unsigned char nosplit; /* 1 if reg*2 should not be split into + reg+reg. (0 if not) */ + + /* architecture-dependent data may be appended */ +}; +#define ea_get_data(x) (void *)(((char *)x)+sizeof(effaddr)) +#define ea_get_const_data(x) (const void *)(((const char *)x)+sizeof(effaddr)) + +struct immval { + expr *val; + + unsigned char len; /* length of val (in bytes), 0 if unknown */ + unsigned char isneg; /* the value has been explicitly negated */ + + unsigned char f_len; /* final imm length */ + unsigned char f_sign; /* 1 if final imm should be signed */ +}; + +struct bytecode { + STAILQ_ENTRY(bytecode) link; + + bytecode_type type; + + expr *multiple; /* number of times bytecode is repeated, + NULL=1 */ + + unsigned long len; /* total length of entire bytecode (including + multiple copies), 0 if unknown */ + + /* where it came from */ + const char *filename; + unsigned int lineno; + + /* other assembler state info */ + unsigned long offset; /* 0 if unknown */ + + /* architecture-dependent data may be appended */ +}; +#define bc_get_data(x) (void *)(((char *)x)+sizeof(bytecode)) +#define bc_get_const_data(x) (const void *)(((const char *)x)+sizeof(bytecode)) + +#endif diff --git a/src/bytecode.c b/src/bytecode.c index 407f01ec..6dae585d 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -29,40 +29,10 @@ RCSID("$IdPath$"); #include "bytecode.h" +#include "arch.h" -struct effaddr { - expr *disp; /* address displacement */ - unsigned char len; /* length of disp (in bytes), 0 if unknown, - * 0xff if unknown and required to be >0. - */ +#include "bc-int.h" - unsigned char segment; /* segment override, 0 if none */ - - /* How the spare (register) bits in Mod/RM are handled: - * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) - * They're set in bytecode_new_insn(). - */ - unsigned char modrm; - unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ - unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ - - unsigned char sib; - unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ - unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, - 0xff if unknown */ - unsigned char nosplit; /* 1 if reg*2 should not be split into - reg+reg. (0 if not) */ -}; - -struct immval { - expr *val; - - unsigned char len; /* length of val (in bytes), 0 if unknown */ - unsigned char isneg; /* the value has been explicitly negated */ - - unsigned char f_len; /* final imm length */ - unsigned char f_sign; /* 1 if final imm should be signed */ -}; struct dataval { STAILQ_ENTRY(dataval) link; @@ -75,158 +45,25 @@ struct dataval { } data; }; -struct bytecode { - STAILQ_ENTRY(bytecode) link; - - enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; - - /* This union has been somewhat tweaked to get it as small as possible - * on the 4-byte-aligned x86 architecture (without resorting to - * bitfields). In particular, insn and jmprel are the largest structures - * in the union, and are also the same size (after padding). jmprel - * can have another unsigned char added to the end without affecting - * its size. - * - * Don't worry about this too much, but keep it in mind when changing - * this structure. We care about the size of bytecode in particular - * because it accounts for the majority of the memory usage in the - * assembler when assembling a large file. - */ - union { - struct { - effaddr *ea; /* effective address */ - - immval *imm; /* immediate or relative value */ - - unsigned char opcode[3]; /* opcode */ - unsigned char opcode_len; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - /* HACK, but a space-saving one: shift opcodes have an immediate - * form and a ,1 form (with no immediate). In the parser, we - * set this and opcode_len=1, but store the ,1 version in the - * second byte of the opcode array. We then choose between the - * two versions once we know the actual value of imm (because we - * don't know it in the parser module). - * - * A override to force the imm version should just leave this at - * 0. Then later code won't know the ,1 version even exists. - * TODO: Figure out how this affects CPU flags processing. - * - * Call SetInsnShiftFlag() to set this flag to 1. - */ - unsigned char shift_op; - } insn; - struct { - expr *target; /* target location */ - - struct { - unsigned char opcode[3]; - unsigned char opcode_len; /* 0 = no opc for this version */ - } shortop, nearop; - - /* which opcode are we using? */ - /* The *FORCED forms are specified in the source as such */ - jmprel_opcode_sel op_sel; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - } jmprel; - struct { - /* non-converted data (linked list) */ - datavalhead datahead; - - /* final (converted) size of each element (in bytes) */ - unsigned char size; - } data; - struct { - expr *numitems; /* number of items to reserve */ - unsigned char itemsize; /* size of each item (in bytes) */ - } reserve; - } data; +typedef struct bytecode_data { + /* non-converted data (linked list) */ + datavalhead datahead; - expr *multiple; /* number of times bytecode is repeated, - NULL=1 */ + /* final (converted) size of each element (in bytes) */ + unsigned char size; +} bytecode_data; - unsigned long len; /* total length of entire bytecode (including - multiple copies), 0 if unknown */ - - /* where it came from */ - const char *filename; - unsigned int lineno; - - /* other assembler state info */ - unsigned long offset; /* 0 if unknown */ - unsigned char mode_bits; -}; +typedef struct bytecode_reserve { + expr *numitems; /* number of items to reserve */ + unsigned char itemsize; /* size of each item (in bytes) */ +} bytecode_reserve; /* Static structures for when NULL is passed to conversion functions. */ /* for Convert*ToBytes() */ unsigned char bytes_static[16]; -static bytecode *bytecode_new_common(void); - -effaddr * -effaddr_new_reg(unsigned long reg) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = (expr *)NULL; - ea->len = 0; - ea->segment = 0; - ea->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ - ea->valid_modrm = 1; - ea->need_modrm = 1; - ea->valid_sib = 0; - ea->need_sib = 0; - ea->nosplit = 0; - - return ea; -} - -effaddr * -effaddr_new_expr(expr *expr_ptr) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = expr_ptr; - ea->len = 0; - ea->segment = 0; - ea->modrm = 0; - ea->valid_modrm = 0; - ea->need_modrm = 1; - ea->valid_sib = 0; - ea->need_sib = 0xff; /* we won't know until we know more about expr and - the BITS/address override setting */ - ea->nosplit = 0; - - return ea; -} - -effaddr * -effaddr_new_imm(immval *im_ptr, unsigned char im_len) -{ - effaddr *ea = xmalloc(sizeof(effaddr)); - - ea->disp = im_ptr->val; - ea->len = im_len; - ea->segment = 0; - ea->modrm = 0; - ea->valid_modrm = 0; - ea->need_modrm = 0; - ea->valid_sib = 0; - ea->need_sib = 0; - ea->nosplit = 0; - - return ea; -} - immval * -immval_new_int(unsigned long int_val) +imm_new_int(unsigned long int_val) { immval *im = xmalloc(sizeof(immval)); @@ -245,7 +82,7 @@ immval_new_int(unsigned long int_val) } immval * -immval_new_expr(expr *expr_ptr) +imm_new_expr(expr *expr_ptr) { immval *im = xmalloc(sizeof(immval)); @@ -257,19 +94,7 @@ immval_new_expr(expr *expr_ptr) } void -SetEASegment(effaddr *ptr, unsigned char segment) -{ - if (!ptr) - return; - - if (segment != 0 && ptr->segment != 0) - Warning(_("multiple segment overrides, using leftmost")); - - ptr->segment = segment; -} - -void -SetEALen(effaddr *ptr, unsigned char len) +ea_set_len(effaddr *ptr, unsigned char len) { if (!ptr) return; @@ -282,7 +107,7 @@ SetEALen(effaddr *ptr, unsigned char len) } void -SetEANosplit(effaddr *ptr, unsigned char nosplit) +ea_set_nosplit(effaddr *ptr, unsigned char nosplit) { if (!ptr) return; @@ -290,108 +115,8 @@ SetEANosplit(effaddr *ptr, unsigned char nosplit) ptr->nosplit = nosplit; } -effaddr * -GetInsnEA(bytecode *bc) -{ - if (!bc) - return NULL; - - if (bc->type != BC_INSN) - InternalError(_("Trying to get EA of non-instruction")); - - return bc->data.insn.ea; -} - void -SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize) -{ - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - bc->data.insn.opersize = opersize; - break; - case BC_JMPREL: - bc->data.jmprel.opersize = opersize; - break; - default: - InternalError(_("OperSize override applied to non-instruction")); - return; - } -} - -void -SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) -{ - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - bc->data.insn.addrsize = addrsize; - break; - case BC_JMPREL: - bc->data.jmprel.addrsize = addrsize; - break; - default: - InternalError(_("AddrSize override applied to non-instruction")); - return; - } -} - -void -SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) -{ - unsigned char *lockrep_pre = (unsigned char *)NULL; - - if (!bc) - return; - - switch (bc->type) { - case BC_INSN: - lockrep_pre = &bc->data.insn.lockrep_pre; - break; - case BC_JMPREL: - lockrep_pre = &bc->data.jmprel.lockrep_pre; - break; - default: - InternalError(_("LockRep prefix applied to non-instruction")); - return; - } - - if (*lockrep_pre != 0) - Warning(_("multiple LOCK or REP prefixes, using leftmost")); - - *lockrep_pre = prefix; -} - -void -SetInsnShiftFlag(bytecode *bc) -{ - if (!bc) - return; - - if (bc->type != BC_INSN) - InternalError(_("Attempted to set shift flag on non-instruction")); - - bc->data.insn.shift_op = 1; -} - -void -SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) -{ - if (!old_sel) - return; - - if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) || - (*old_sel == JR_NEAR_FORCED))) - Warning(_("multiple SHORT or NEAR specifiers, using leftmost")); - *old_sel = new_sel; -} - -void -SetBCMultiple(bytecode *bc, expr *e) +bc_set_multiple(bytecode *bc, expr *e) { if (bc->multiple) bc->multiple = expr_new_tree(bc->multiple, EXPR_MUL, e); @@ -399,10 +124,12 @@ SetBCMultiple(bytecode *bc, expr *e) bc->multiple = e; } -static bytecode * -bytecode_new_common(void) +bytecode * +bc_new_common(bytecode_type type, size_t datasize) { - bytecode *bc = xmalloc(sizeof(bytecode)); + bytecode *bc = xmalloc(sizeof(bytecode)+datasize); + + bc->type = type; bc->multiple = (expr *)NULL; bc->len = 0; @@ -411,146 +138,59 @@ bytecode_new_common(void) bc->lineno = line_number; bc->offset = 0; - bc->mode_bits = mode_bits; return bc; } bytecode * -bytecode_new_insn(unsigned char opersize, - unsigned char opcode_len, - unsigned char op0, - unsigned char op1, - unsigned char op2, - effaddr *ea_ptr, - unsigned char spare, - immval *im_ptr, - unsigned char im_len, - unsigned char im_sign) +bc_new_data(datavalhead *datahead, unsigned long size) { - bytecode *bc = bytecode_new_common(); + bytecode *bc = bc_new_common(BC_DATA, sizeof(bytecode_data)); + bytecode_data *data = bc_get_data(bc); - bc->type = BC_INSN; - - bc->data.insn.ea = ea_ptr; - if (ea_ptr) { - bc->data.insn.ea->modrm &= 0xC7; /* zero spare/reg bits */ - bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */ - } - - bc->data.insn.imm = im_ptr; - if (im_ptr) { - bc->data.insn.imm->f_sign = im_sign; - bc->data.insn.imm->f_len = im_len; - } - - bc->data.insn.opcode[0] = op0; - bc->data.insn.opcode[1] = op1; - bc->data.insn.opcode[2] = op2; - bc->data.insn.opcode_len = opcode_len; - - bc->data.insn.addrsize = 0; - bc->data.insn.opersize = opersize; - bc->data.insn.lockrep_pre = 0; - bc->data.insn.shift_op = 0; - - return bc; -} - -bytecode * -bytecode_new_jmprel(targetval *target, - unsigned char short_opcode_len, - unsigned char short_op0, - unsigned char short_op1, - unsigned char short_op2, - unsigned char near_opcode_len, - unsigned char near_op0, - unsigned char near_op1, - unsigned char near_op2, - unsigned char addrsize) -{ - bytecode *bc = bytecode_new_common(); - - bc->type = BC_JMPREL; - - bc->data.jmprel.target = target->val; - bc->data.jmprel.op_sel = target->op_sel; - - if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0)) - Error(_("no SHORT form of that jump instruction exists")); - if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0)) - Error(_("no NEAR form of that jump instruction exists")); - - bc->data.jmprel.shortop.opcode[0] = short_op0; - bc->data.jmprel.shortop.opcode[1] = short_op1; - bc->data.jmprel.shortop.opcode[2] = short_op2; - bc->data.jmprel.shortop.opcode_len = short_opcode_len; - - bc->data.jmprel.nearop.opcode[0] = near_op0; - bc->data.jmprel.nearop.opcode[1] = near_op1; - bc->data.jmprel.nearop.opcode[2] = near_op2; - bc->data.jmprel.nearop.opcode_len = near_opcode_len; - - bc->data.jmprel.addrsize = addrsize; - bc->data.jmprel.opersize = 0; - bc->data.jmprel.lockrep_pre = 0; + data->datahead = *datahead; + data->size = size; return bc; } bytecode * -bytecode_new_data(datavalhead *datahead, unsigned long size) +bc_new_reserve(expr *numitems, unsigned long itemsize) { - bytecode *bc = bytecode_new_common(); - - bc->type = BC_DATA; + bytecode *bc = bc_new_common(BC_RESERVE, sizeof(bytecode_reserve)); + bytecode_reserve *reserve = bc_get_data(bc); - bc->data.data.datahead = *datahead; - bc->data.data.size = size; - - return bc; -} - -bytecode * -bytecode_new_reserve(expr *numitems, unsigned long itemsize) -{ - bytecode *bc = bytecode_new_common(); - - bc->type = BC_RESERVE; - - bc->data.reserve.numitems = numitems; - bc->data.reserve.itemsize = itemsize; + reserve->numitems = numitems; + reserve->itemsize = itemsize; return bc; } void -bytecode_delete(bytecode *bc) +bc_delete(bytecode *bc) { + bytecode_data *data; + bytecode_reserve *reserve; + if (!bc) return; switch (bc->type) { case BC_EMPTY: break; - case BC_INSN: - if (bc->data.insn.ea) { - expr_delete(bc->data.insn.ea->disp); - xfree(bc->data.insn.ea); - } - if (bc->data.insn.imm) { - expr_delete(bc->data.insn.imm->val); - xfree(bc->data.insn.imm); - } - break; - case BC_JMPREL: - expr_delete(bc->data.jmprel.target); - break; case BC_DATA: - datavals_delete(&bc->data.data.datahead); + data = bc_get_data(bc); + dvs_delete(&data->datahead); break; case BC_RESERVE: - expr_delete(bc->data.reserve.numitems); + reserve = bc_get_data(bc); + expr_delete(reserve->numitems); + break; + default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_delete(bc); + else + InternalError(_("Unknown bytecode type")); break; } @@ -559,131 +199,43 @@ bytecode_delete(bytecode *bc) } int -bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val) +bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val) { return 0; /* TODO */ } void -bytecode_print(const bytecode *bc) +bc_print(const bytecode *bc) { + const bytecode_data *data; + const bytecode_reserve *reserve; + switch (bc->type) { case BC_EMPTY: printf("_Empty_\n"); break; - case BC_INSN: - printf("_Instruction_\n"); - printf("Effective Address:"); - if (!bc->data.insn.ea) - printf(" (nil)\n"); - else { - printf("\n Disp="); - if (bc->data.insn.ea->disp) - expr_print(bc->data.insn.ea->disp); - else - printf("(nil)"); - printf("\n"); - printf(" Len=%u SegmentOv=%02x NoSplit=%u\n", - (unsigned int)bc->data.insn.ea->len, - (unsigned int)bc->data.insn.ea->segment, - (unsigned int)bc->data.insn.ea->nosplit); - printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n", - (unsigned int)bc->data.insn.ea->modrm, - (unsigned int)bc->data.insn.ea->valid_modrm, - (unsigned int)bc->data.insn.ea->need_modrm); - printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n", - (unsigned int)bc->data.insn.ea->sib, - (unsigned int)bc->data.insn.ea->valid_sib, - (unsigned int)bc->data.insn.ea->need_sib); - } - printf("Immediate Value:"); - if (!bc->data.insn.imm) - printf(" (nil)\n"); - else { - printf("\n Val="); - expr_print(bc->data.insn.imm->val); - printf("\n"); - printf(" Len=%u, IsNeg=%u\n", - (unsigned int)bc->data.insn.imm->len, - (unsigned int)bc->data.insn.imm->isneg); - printf(" FLen=%u, FSign=%u\n", - (unsigned int)bc->data.insn.imm->f_len, - (unsigned int)bc->data.insn.imm->f_sign); - } - printf("Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.insn.opcode[0], - (unsigned int)bc->data.insn.opcode[1], - (unsigned int)bc->data.insn.opcode[2], - (unsigned int)bc->data.insn.opcode_len); - printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n", - (unsigned int)bc->data.insn.addrsize, - (unsigned int)bc->data.insn.opersize, - (unsigned int)bc->data.insn.lockrep_pre, - (unsigned int)bc->data.insn.shift_op); - break; - case BC_JMPREL: - printf("_Relative Jump_\n"); - printf("Target="); - expr_print(bc->data.jmprel.target); - printf("\nShort Form:\n"); - if (!bc->data.jmprel.shortop.opcode_len == 0) - printf(" None\n"); - else - printf(" Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.jmprel.shortop.opcode[0], - (unsigned int)bc->data.jmprel.shortop.opcode[1], - (unsigned int)bc->data.jmprel.shortop.opcode[2], - (unsigned int)bc->data.jmprel.shortop.opcode_len); - if (!bc->data.jmprel.nearop.opcode_len == 0) - printf(" None\n"); - else - printf(" Opcode: %02x %02x %02x OpLen=%u\n", - (unsigned int)bc->data.jmprel.nearop.opcode[0], - (unsigned int)bc->data.jmprel.nearop.opcode[1], - (unsigned int)bc->data.jmprel.nearop.opcode[2], - (unsigned int)bc->data.jmprel.nearop.opcode_len); - printf("OpSel="); - switch (bc->data.jmprel.op_sel) { - case JR_NONE: - printf("None"); - break; - case JR_SHORT: - printf("Short"); - break; - case JR_NEAR: - printf("Near"); - break; - case JR_SHORT_FORCED: - printf("Forced Short"); - break; - case JR_NEAR_FORCED: - printf("Forced Near"); - break; - default: - printf("UNKNOWN!!"); - break; - } - printf("\nAddrSize=%u OperSize=%u LockRepPre=%02x\n", - (unsigned int)bc->data.jmprel.addrsize, - (unsigned int)bc->data.jmprel.opersize, - (unsigned int)bc->data.jmprel.lockrep_pre); - break; case BC_DATA: + data = bc_get_const_data(bc); printf("_Data_\n"); printf("Final Element Size=%u\n", - (unsigned int)bc->data.data.size); + (unsigned int)data->size); printf("Elements:\n"); - datavals_print(&bc->data.data.datahead); + dvs_print(&data->datahead); break; case BC_RESERVE: + reserve = bc_get_const_data(bc); printf("_Reserve_\n"); printf("Num Items="); - expr_print(bc->data.reserve.numitems); + expr_print(reserve->numitems); printf("\nItem Size=%u\n", - (unsigned int)bc->data.reserve.itemsize); + (unsigned int)reserve->itemsize); break; default: - printf("_Unknown_\n"); + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_print(bc); + else + printf("_Unknown_\n"); + break; } printf("Multiple="); if (!bc->multiple) @@ -694,95 +246,42 @@ bytecode_print(const bytecode *bc) printf("Length=%lu\n", bc->len); printf("Filename=\"%s\" Line Number=%u\n", bc->filename ? bc->filename : "", bc->lineno); - printf("Offset=%lx BITS=%u\n", bc->offset, bc->mode_bits); -} - -static void -bytecode_parser_finalize_insn(bytecode *bc) -{ - effaddr *ea = bc->data.insn.ea; - immval *imm = bc->data.insn.imm; - - if (ea) { - if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) || - (!ea->valid_modrm && ea->need_modrm))) { - /* First expand equ's */ - expr_expand_equ(ea->disp); - - /* Check validity of effective address and calc R/M bits of - * Mod/RM byte and SIB byte. We won't know the Mod field - * of the Mod/RM byte until we know more about the - * displacement. - */ - if (!expr_checkea(&ea->disp, &bc->data.insn.addrsize, - bc->mode_bits, ea->nosplit, &ea->len, &ea->modrm, - &ea->valid_modrm, &ea->need_modrm, &ea->sib, - &ea->valid_sib, &ea->need_sib)) - return; /* failed, don't bother checking rest of insn */ - } - } - - if (imm) { - const intnum *num; - - if (imm->val) { - expr_expand_equ(imm->val); - expr_simplify(imm->val); - } - /* TODO: check imm f_len vs. len? */ - - /* Handle shift_op special-casing */ - if (bc->data.insn.shift_op && (num = expr_get_intnum(&imm->val))) { - if (intnum_get_uint(num) == 1) { - /* Use ,1 form: first copy ,1 opcode. */ - bc->data.insn.opcode[0] = bc->data.insn.opcode[1]; - /* Delete ModRM, as it's no longer needed */ - xfree(ea); - bc->data.insn.ea = (effaddr *)NULL; - /* Delete Imm, as it's not needed */ - expr_delete(imm->val); - xfree(imm); - bc->data.insn.imm = (immval *)NULL; - } - bc->data.insn.shift_op = 0; - } - } - - + printf("Offset=%lx\n", bc->offset); } void -bytecode_parser_finalize(bytecode *bc) +bc_parser_finalize(bytecode *bc) { switch (bc->type) { case BC_EMPTY: /* FIXME: delete it (probably in bytecodes_ level, not here */ InternalError(_("got empty bytecode in parser_finalize")); break; - case BC_INSN: - bytecode_parser_finalize_insn(bc); - break; default: + if (bc->type < cur_arch->bc.type_max) + cur_arch->bc.bc_parser_finalize(bc); + else + InternalError(_("Unknown bytecode type")); break; } } void -bytecodes_delete(bytecodehead *headp) +bcs_delete(bytecodehead *headp) { bytecode *cur, *next; cur = STAILQ_FIRST(headp); while (cur) { next = STAILQ_NEXT(cur, link); - bytecode_delete(cur); + bc_delete(cur); cur = next; } STAILQ_INIT(headp); } bytecode * -bytecodes_append(bytecodehead *headp, bytecode *bc) +bcs_append(bytecodehead *headp, bytecode *bc) { if (bc) { if (bc->type != BC_EMPTY) { @@ -796,27 +295,27 @@ bytecodes_append(bytecodehead *headp, bytecode *bc) } void -bytecodes_print(const bytecodehead *headp) +bcs_print(const bytecodehead *headp) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) { printf("---Next Bytecode---\n"); - bytecode_print(cur); + bc_print(cur); } } void -bytecodes_parser_finalize(bytecodehead *headp) +bcs_parser_finalize(bytecodehead *headp) { bytecode *cur; STAILQ_FOREACH(cur, headp, link) - bytecode_parser_finalize(cur); + bc_parser_finalize(cur); } dataval * -dataval_new_expr(expr *expn) +dv_new_expr(expr *expn) { dataval *retval = xmalloc(sizeof(dataval)); @@ -827,7 +326,7 @@ dataval_new_expr(expr *expn) } dataval * -dataval_new_string(char *str_val) +dv_new_string(char *str_val) { dataval *retval = xmalloc(sizeof(dataval)); @@ -838,7 +337,7 @@ dataval_new_string(char *str_val) } void -datavals_delete(datavalhead *headp) +dvs_delete(datavalhead *headp) { dataval *cur, *next; @@ -854,7 +353,7 @@ datavals_delete(datavalhead *headp) } dataval * -datavals_append(datavalhead *headp, dataval *dv) +dvs_append(datavalhead *headp, dataval *dv) { if (dv) { STAILQ_INSERT_TAIL(headp, dv, link); @@ -864,7 +363,7 @@ datavals_append(datavalhead *headp, dataval *dv) } void -datavals_print(const datavalhead *head) +dvs_print(const datavalhead *head) { dataval *cur; diff --git a/src/bytecode.h b/src/bytecode.h index 3683bae9..38fa3113 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -27,87 +27,43 @@ typedef struct immval immval; typedef STAILQ_HEAD(datavalhead, dataval) datavalhead; typedef struct dataval dataval; +/* Additional types may be architecture-defined starting at + * BYTECODE_TYPE_BASE. + */ typedef enum { - JR_NONE, - JR_SHORT, - JR_NEAR, - JR_SHORT_FORCED, - JR_NEAR_FORCED -} jmprel_opcode_sel; - -typedef struct targetval { - expr *val; - - jmprel_opcode_sel op_sel; -} targetval; - -effaddr *effaddr_new_reg(unsigned long reg); -effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len); -effaddr *effaddr_new_expr(expr *expr_ptr); + BC_EMPTY = 0, + BC_DATA, + BC_RESERVE +} bytecode_type; +#define BYTECODE_TYPE_BASE BC_RESERVE+1 -immval *immval_new_int(unsigned long int_val); -immval *immval_new_expr(expr *expr_ptr); +immval *imm_new_int(unsigned long int_val); +immval *imm_new_expr(expr *e); -void SetEASegment(effaddr *ptr, unsigned char segment); -void SetEALen(effaddr *ptr, unsigned char len); -void SetEANosplit(effaddr *ptr, unsigned char nosplit); +void ea_set_len(effaddr *ea, unsigned char len); +void ea_set_nosplit(effaddr *ea, unsigned char nosplit); -effaddr *GetInsnEA(bytecode *bc); +void bc_set_multiple(bytecode *bc, expr *e); -void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); -void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); -void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); -void SetInsnShiftFlag(bytecode *bc); +bytecode *bc_new_common(bytecode_type type, size_t datasize); +bytecode *bc_new_data(datavalhead *datahead, unsigned long size); +bytecode *bc_new_reserve(expr *numitems, unsigned long itemsize); -void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); - -void SetBCMultiple(bytecode *bc, expr *e); - -/* IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling this - * function (it doesn't make a copy). - */ -bytecode *bytecode_new_insn(unsigned char opersize, - unsigned char opcode_len, - unsigned char op0, - unsigned char op1, - unsigned char op2, - effaddr *ea_ptr, - unsigned char spare, - immval *im_ptr, - unsigned char im_len, - unsigned char im_sign); - -/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */ -bytecode *bytecode_new_jmprel(targetval *target, - unsigned char short_opcode_len, - unsigned char short_op0, - unsigned char short_op1, - unsigned char short_op2, - unsigned char near_opcode_len, - unsigned char near_op0, - unsigned char near_op1, - unsigned char near_op2, - unsigned char addrsize); - -bytecode *bytecode_new_data(datavalhead *datahead, unsigned long size); - -bytecode *bytecode_new_reserve(expr *numitems, unsigned long itemsize); - -void bytecode_delete(bytecode *bc); +void bc_delete(bytecode *bc); /* Gets the offset of the bytecode specified by bc if possible. * Return value is IF POSSIBLE, not the value. */ -int bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val); +int bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val); -void bytecode_print(const bytecode *bc); +void bc_print(const bytecode *bc); -void bytecode_parser_finalize(bytecode *bc); +void bc_parser_finalize(bytecode *bc); /* void bytecodes_initialize(bytecodehead *headp); */ #define bytecodes_initialize(headp) STAILQ_INIT(headp) -void bytecodes_delete(bytecodehead *headp); +void bcs_delete(bytecodehead *headp); /* Adds bc to the list of bytecodes headp. * NOTE: Does not make a copy of bc; so don't pass this function @@ -115,20 +71,20 @@ void bytecodes_delete(bytecodehead *headp); * this function. If bc was actually appended (it wasn't NULL or empty), * then returns bc, otherwise returns NULL. */ -bytecode *bytecodes_append(bytecodehead *headp, bytecode *bc); +bytecode *bcs_append(bytecodehead *headp, bytecode *bc); -void bytecodes_print(const bytecodehead *headp); +void bcs_print(const bytecodehead *headp); -void bytecodes_parser_finalize(bytecodehead *headp); +void bcs_parser_finalize(bytecodehead *headp); -dataval *dataval_new_expr(expr *expn); -dataval *dataval_new_float(floatnum *flt); -dataval *dataval_new_string(char *str_val); +dataval *dv_new_expr(expr *expn); +dataval *dv_new_float(floatnum *flt); +dataval *dv_new_string(char *str_val); -/* void datavals_initialize(datavalhead *headp); */ -#define datavals_initialize(headp) STAILQ_INIT(headp) +/* void dvs_initialize(datavalhead *headp); */ +#define dvs_initialize(headp) STAILQ_INIT(headp) -void datavals_delete(datavalhead *headp); +void dvs_delete(datavalhead *headp); /* Adds dv to the list of datavals headp. * NOTE: Does not make a copy of dv; so don't pass this function @@ -136,8 +92,8 @@ void datavals_delete(datavalhead *headp); * this function. If dv was actually appended (it wasn't NULL), then * returns dv, otherwise returns NULL. */ -dataval *datavals_append(datavalhead *headp, dataval *dv); +dataval *dvs_append(datavalhead *headp, dataval *dv); -void datavals_print(const datavalhead *head); +void dvs_print(const datavalhead *head); #endif diff --git a/src/globals.c b/src/globals.c index c21436a7..2239872f 100644 --- a/src/globals.c +++ b/src/globals.c @@ -29,7 +29,6 @@ RCSID("$IdPath$"); const char *in_filename = (const char *)NULL; unsigned int line_number = 1; -unsigned char mode_bits = 0; unsigned int asm_options = 0; static ternary_tree filename_table = (ternary_tree)NULL; diff --git a/src/globals.h b/src/globals.h index 23376e13..d0457793 100644 --- a/src/globals.h +++ b/src/globals.h @@ -24,7 +24,6 @@ extern const char *in_filename; extern unsigned int line_number; -extern unsigned char mode_bits; extern unsigned int asm_options; void switch_filename(const char *filename); diff --git a/src/linemgr.c b/src/linemgr.c index c21436a7..2239872f 100644 --- a/src/linemgr.c +++ b/src/linemgr.c @@ -29,7 +29,6 @@ RCSID("$IdPath$"); const char *in_filename = (const char *)NULL; unsigned int line_number = 1; -unsigned char mode_bits = 0; unsigned int asm_options = 0; static ternary_tree filename_table = (ternary_tree)NULL; diff --git a/src/linemgr.h b/src/linemgr.h index 23376e13..d0457793 100644 --- a/src/linemgr.h +++ b/src/linemgr.h @@ -24,7 +24,6 @@ extern const char *in_filename; extern unsigned int line_number; -extern unsigned char mode_bits; extern unsigned int asm_options; void switch_filename(const char *filename); diff --git a/src/main.c b/src/main.c index 0b4d711f..de7a27b9 100644 --- a/src/main.c +++ b/src/main.c @@ -41,6 +41,8 @@ RCSID("$IdPath$"); #include "preproc.h" #include "parser.h" +#include "arch.h" + #ifndef countof #define countof(x,y) (sizeof(x)/sizeof(y)) @@ -110,8 +112,11 @@ main(int argc, char *argv[]) switch_filename(""); } + /* Set x86 as the architecture */ + cur_arch = &x86_arch; + /* Get initial BITS setting from object format */ - mode_bits = dbg_objfmt.default_mode_bits; + x86_mode_bits = dbg_objfmt.default_mode_bits; sections = nasm_parser.do_parse(&nasm_parser, &dbg_objfmt, in); diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in index 54893a76..fa318fd3 100644 --- a/src/parsers/nasm/bison.y.in +++ b/src/parsers/nasm/bison.y.in @@ -40,6 +40,7 @@ RCSID("$IdPath$"); #include "section.h" #include "objfmt.h" +#include "arch.h" #define YYDEBUG 1 @@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base; static bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; +/* additional data declarations (dynamically generated) */ +/* @DATADECLS@ */ + %} %union { @@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc; effaddr *ea; expr *exp; immval *im_val; - targetval tgt_val; + x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; @@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc; %% input: /* empty */ | input line { - nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section), + nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), $2); if (nasm_parser_temp_bc) nasm_parser_prev_bc = nasm_parser_temp_bc; @@ -145,10 +149,10 @@ line: '\n' { $$ = (bytecode *)NULL; } ; lineexp: exp - | TIMES expr exp { $$ = $3; SetBCMultiple($$, $2); } + | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } | label { $$ = (bytecode *)NULL; } | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; SetBCMultiple($$, $3); } + | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } | label_id EQU expr { symrec_define_equ($1, $3); xfree($1); @@ -157,22 +161,16 @@ lineexp: exp ; exp: instr - | DECLARE_DATA datavals { $$ = bytecode_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bytecode_new_reserve($2, $1); } + | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } + | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } ; -datavals: dataval { - datavals_initialize(&$$); - datavals_append(&$$, $1); - } - | datavals ',' dataval { - datavals_append(&$1, $3); - $$ = $1; - } +datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } + | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dataval_new_expr($1); } - | STRING { $$ = dataval_new_string($1); } +dataval: expr_no_string { $$ = dv_new_expr($1); } + | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); $$ = (dataval *)NULL; @@ -317,17 +315,20 @@ memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | error { Error(_("invalid effective address")); } ; -memaddr: memexpr { $$ = effaddr_new_expr($1); SetEASegment($$, 0); } - | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen($$, 1); } - | WORD memaddr { $$ = $2; SetEALen($$, 2); } - | DWORD memaddr { $$ = $2; SetEALen($$, 4); } - | NOSPLIT memaddr { $$ = $2; SetEANosplit($$, 1); } +memaddr: memexpr { + $$ = x86_ea_new_expr($1); + x86_ea_set_segment($$, 0); + } + | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } + | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } + | WORD memaddr { $$ = $2; ea_set_len($$, 2); } + | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } + | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -378,43 +379,43 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { $$ = effaddr_new_reg($1); } +rm8x: reg8 { $$ = x86_ea_new_reg($1); } | mem8x ; -rm16x: reg16 { $$ = effaddr_new_reg($1); } +rm16x: reg16 { $$ = x86_ea_new_reg($1); } | mem16x ; -rm32x: reg32 { $$ = effaddr_new_reg($1); } +rm32x: reg32 { $$ = x86_ea_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { $$ = effaddr_new_reg($1); } +rm64x: MMXREG { $$ = x86_ea_new_reg($1); } | mem64x ; -rm128x: XMMREG { $$ = effaddr_new_reg($1); } +rm128x: XMMREG { $$ = x86_ea_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { $$ = effaddr_new_reg($1); } +rm8: reg8 { $$ = x86_ea_new_reg($1); } | mem8 ; -rm16: reg16 { $$ = effaddr_new_reg($1); } +rm16: reg16 { $$ = x86_ea_new_reg($1); } | mem16 ; -rm32: reg32 { $$ = effaddr_new_reg($1); } +rm32: reg32 { $$ = x86_ea_new_reg($1); } | mem32 ; -rm64: MMXREG { $$ = effaddr_new_reg($1); } +rm64: MMXREG { $$ = x86_ea_new_reg($1); } | mem64 ; -rm128: XMMREG { $$ = effaddr_new_reg($1); } +rm128: XMMREG { $$ = x86_ea_new_reg($1); } | mem128 ; /* immediate values */ -imm: expr { $$ = immval_new_expr($1); } +imm: expr { $$ = imm_new_expr($1); } ; /* explicit immediates */ @@ -437,9 +438,18 @@ imm32: imm ; /* jump targets */ -target: expr { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); } - | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } - | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +target: expr { + $$.val = $1; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); + } + | SHORT target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + } + | NEAR target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + } ; /* expression trees */ @@ -493,18 +503,36 @@ explabel: ID { $$ = symrec_use($1); xfree($1); } ; instr: instrbase - | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } - | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); } - | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } - | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } - | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } - | REPZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); } + | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } + | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } + | REG_CS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); + } + | REG_SS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); + } + | REG_DS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); + } + | REG_ES instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); + } + | REG_FS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); + } + | REG_GS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); + } + | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } + | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } + | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } + | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } ; /* instruction grammars (dynamically generated) */ @@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val) if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32)) Error(_("`%s' is not a valid argument to [BITS]"), val); else - mode_bits = (unsigned char)lval; + x86_mode_bits = (unsigned char)lval; } else { printf("Directive: Name=`%s' Value=`%s'\n", name, val); } diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl index adaa58db..be391260 100755 --- a/src/parsers/nasm/gen_instr.pl +++ b/src/parsers/nasm/gen_instr.pl @@ -353,7 +353,8 @@ sub cond_action_if ( $ $ $ $ $ $ $ ) my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_); return rule_header ($rule, $tokens, $count) . <<"EOF"; if (\$$regarg == $val) { - \$\$ = $func(@$a_eax); + @$a_eax + \$\$ = $func; } EOF } @@ -362,7 +363,8 @@ sub cond_action_elsif ( $ $ $ $ ) my ($regarg, $val, $func, $a_eax) = splice (@_); return <<"EOF"; else if (\$$regarg == $val) { - \$\$ = $func(@$a_eax); + @$a_eax + \$\$ = $func; } EOF } @@ -371,7 +373,8 @@ sub cond_action_else ( $ $ ) my ($func, $a_args) = splice (@_); return <<"EOF" . rule_footer; else { - \$\$ = $func (@$a_args); + @$a_args + \$\$ = $func; } EOF } @@ -388,7 +391,8 @@ sub action ( @ $ ) { my ($rule, $tokens, $func, $a_args, $count) = splice @_; return rule_header ($rule, $tokens, $count) - . " \$\$ = $func (@$a_args);\n" + . " @$a_args\n" + . " \$\$ = $func;\n" . rule_footer; } @@ -396,8 +400,9 @@ sub action_setshiftflag ( @ $ ) { my ($rule, $tokens, $func, $a_args, $count) = splice @_; return rule_header ($rule, $tokens, $count) - . " \$\$ = $func (@$a_args);\n" - . " SetInsnShiftFlag(\$\$);\n" + . " @$a_args\n" + . " \$\$ = $func;\n" + . " x86_bc_insn_set_shift_flag(\$\$);\n" . rule_footer; } @@ -421,7 +426,12 @@ sub output_yacc ($@) while () { - if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) + if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/}) + { + print GRAMMAR "static x86_new_insn_data idata;\n"; + print GRAMMAR "static x86_new_jmprel_data jrdata;\n"; + } + elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) { my $len = length("%token "); print GRAMMAR "%token "; @@ -500,69 +510,82 @@ sub output_yacc ($@) if $inst->[OPERANDS] ne 'nil'; $tokens =~ s/,/ ',' /g; $tokens =~ s/:/ ':' /g; - my $func = "bytecode_new_jmprel"; + my $datastruct = "x86_new_jmprel_data"; + my $datastructname = "jrdata"; + my $func = "x86_bc_new_jmprel(&$datastructname)"; # Create the argument list for bytecode_new my @args; # Target argument: HACK: Always assumed to be arg 1. - push @args, '&$2,'; + push @args, 'target=&$2;'; # test for short opcode "nil" if($inst->[SHORTOPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0,'; + push @args, 'short_op_len=0;'; + push @args, 'short_op[0]=0;'; + push @args, 'short_op[1]=0;'; + push @args, 'short_op[2]=0;'; } else { - # number of bytes of short opcode - push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; - # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[SHORTOPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[SHORTOPCODE]; + # number of bytes of short opcode + push @args, "short_op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; + push @args, "short_op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o; + push @args, "short_op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o; + push @args, "short_op[2]=0;" if @opcodes < 3; } # test for near opcode "nil" if($inst->[NEAROPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0,'; + push @args, 'near_op_len=0;'; + push @args, 'near_op[0]=0;'; + push @args, 'near_op[1]=0;'; + push @args, 'near_op[2]=0;'; } else { - # number of bytes of near opcode - push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; - # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[NEAROPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[NEAROPCODE]; + # number of bytes of near opcode + push @args, "near_op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; + push @args, "near_op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o; + push @args, "near_op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o; + push @args, "near_op[2]=0;" if @opcodes < 3; } # address size - push @args, "$inst->[ADSIZE]"; + push @args, "addrsize=$inst->[ADSIZE];"; $args[-1] =~ s/nil/0/; # now that we've constructed the arglist, subst $0.\d s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + # and add the data structure reference + s/^/$datastructname./g foreach (@args); + # generate the grammar print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); } @@ -583,65 +606,79 @@ sub output_yacc ($@) $tokens =~ s/:/ ':' /g; # offset args my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; - my $func = "bytecode_new_insn"; + my $datastruct = "x86_new_insn_data"; + my $datastructname = "idata"; + my $func = "x86_bc_new_insn(&$datastructname)"; # Create the argument list for bytecode_new my @args; # operand size - push @args, "$inst->[OPSIZE],"; + push @args, "opersize=$inst->[OPSIZE];"; $args[-1] =~ s/nil/0/; - # number of bytes of opcodes - push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; # opcode piece 1 (and 2 and 3 if attached) - push @args, $inst->[OPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; - $args[-1] .= ','; + my @opcodes = split ",", $inst->[OPCODE]; + # number of bytes of opcodes + push @args, "op_len=".@opcodes.";"; + for (my $i=0; $i < @opcodes; ++$i) + { + $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg; + push @args, "op[$i]=$opcodes[$i];"; + } # opcode piece 2 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,/o; + push @args, "op[1]=0;" if @opcodes < 2; # opcode piece 3 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; + push @args, "op[2]=0;" if @opcodes < 3; # effective addresses - push @args, $inst->[EFFADDR]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/; - $args[-1] =~ s/nil/0/; + my $effaddr = $inst->[EFFADDR]; + $effaddr =~ s/^nil/(effaddr *)NULL,0/; + $effaddr =~ s/nil/0/; # don't let a $0.\d match slip into the following rules. - $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/; - $args[-1] =~ s[(\$\d+)i,\s*(\d+)] - ["effaddr_new_imm($1, ".($2/8)."), 0"]e; - $args[-1] .= ','; + $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/; + $effaddr =~ s[(\$\d+)i,\s*(\d+)] + ["x86_ea_new_imm($1^ ".($2/8)."),0"]e; - die $args[-1] if $args[-1] =~ m/\d+[ri]/; + die $effaddr if $effaddr =~ m/\d+[ri]/; + + my @effaddr_split = split ',', $effaddr; + $effaddr_split[0] =~ s/\^/,/; + push @args, "ea=$effaddr_split[0];"; + push @args, "spare=$effaddr_split[1];"; # immediate sources - push @args, $inst->[IMM]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(immval *)NULL, 0/; + my $imm = $inst->[IMM]; + $imm =~ s/nil/(immval *)NULL,0/; # don't match $0.\d in the following rules. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s[^([0-9A-Fa-f]+),] - [immval_new_int(0x$1),]; - $args[-1] =~ s[^\$0.(\d+),] - [immval_new_int(\$1\[$1\]),]; + $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $imm =~ s[^([0-9A-Fa-f]+),] + [imm_new_int(0x$1),]; + $imm =~ s[^\$0.(\d+),] + [imm_new_int(\$1\[$1\]),]; # divide the second, and only the second, by 8 bits/byte - $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $imm .= ($3||'') eq 's' ? ',1' : ',0'; + + die $imm if $imm =~ m/\d+s/; - die $args[-1] if $args[-1] =~ m/\d+s/; + my @imm_split = split ",", $imm; + push @args, "imm=$imm_split[0];"; + push @args, "im_len=$imm_split[1];"; + push @args, "im_sign=$imm_split[2];"; # now that we've constructed the arglist, subst $0.\d s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + + # and add the data structure reference + s/^/$datastructname./g foreach (@args); # see if we match one of the cases to defer if (($inst->[OPERANDS]||"") =~ m/,ONE/) @@ -691,8 +728,9 @@ sub output_yacc ($@) # Now output imm version, with second opcode byte # set to ,1 opcode. Also call SetInsnShiftFlag(). $tokens =~ s/imm8x/imm/; - die "no space for ONE?" if $args[3] !~ m/0,/; - $args[3] = $ONE->[3]->[2]; + die "no space for ONE?" if $args[3] !~ m/0;/; + my $oneval = $ONE->[3]->[2]; + $args[3] =~ s/0/$oneval/; print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); } elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) diff --git a/src/parsers/nasm/nasm-bison.y b/src/parsers/nasm/nasm-bison.y index 54893a76..fa318fd3 100644 --- a/src/parsers/nasm/nasm-bison.y +++ b/src/parsers/nasm/nasm-bison.y @@ -40,6 +40,7 @@ RCSID("$IdPath$"); #include "section.h" #include "objfmt.h" +#include "arch.h" #define YYDEBUG 1 @@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base; static bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; +/* additional data declarations (dynamically generated) */ +/* @DATADECLS@ */ + %} %union { @@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc; effaddr *ea; expr *exp; immval *im_val; - targetval tgt_val; + x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; @@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc; %% input: /* empty */ | input line { - nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section), + nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), $2); if (nasm_parser_temp_bc) nasm_parser_prev_bc = nasm_parser_temp_bc; @@ -145,10 +149,10 @@ line: '\n' { $$ = (bytecode *)NULL; } ; lineexp: exp - | TIMES expr exp { $$ = $3; SetBCMultiple($$, $2); } + | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } | label { $$ = (bytecode *)NULL; } | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; SetBCMultiple($$, $3); } + | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } | label_id EQU expr { symrec_define_equ($1, $3); xfree($1); @@ -157,22 +161,16 @@ lineexp: exp ; exp: instr - | DECLARE_DATA datavals { $$ = bytecode_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bytecode_new_reserve($2, $1); } + | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } + | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } ; -datavals: dataval { - datavals_initialize(&$$); - datavals_append(&$$, $1); - } - | datavals ',' dataval { - datavals_append(&$1, $3); - $$ = $1; - } +datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } + | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dataval_new_expr($1); } - | STRING { $$ = dataval_new_string($1); } +dataval: expr_no_string { $$ = dv_new_expr($1); } + | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); $$ = (dataval *)NULL; @@ -317,17 +315,20 @@ memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | error { Error(_("invalid effective address")); } ; -memaddr: memexpr { $$ = effaddr_new_expr($1); SetEASegment($$, 0); } - | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen($$, 1); } - | WORD memaddr { $$ = $2; SetEALen($$, 2); } - | DWORD memaddr { $$ = $2; SetEALen($$, 4); } - | NOSPLIT memaddr { $$ = $2; SetEANosplit($$, 1); } +memaddr: memexpr { + $$ = x86_ea_new_expr($1); + x86_ea_set_segment($$, 0); + } + | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } + | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } + | WORD memaddr { $$ = $2; ea_set_len($$, 2); } + | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } + | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -378,43 +379,43 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { $$ = effaddr_new_reg($1); } +rm8x: reg8 { $$ = x86_ea_new_reg($1); } | mem8x ; -rm16x: reg16 { $$ = effaddr_new_reg($1); } +rm16x: reg16 { $$ = x86_ea_new_reg($1); } | mem16x ; -rm32x: reg32 { $$ = effaddr_new_reg($1); } +rm32x: reg32 { $$ = x86_ea_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { $$ = effaddr_new_reg($1); } +rm64x: MMXREG { $$ = x86_ea_new_reg($1); } | mem64x ; -rm128x: XMMREG { $$ = effaddr_new_reg($1); } +rm128x: XMMREG { $$ = x86_ea_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { $$ = effaddr_new_reg($1); } +rm8: reg8 { $$ = x86_ea_new_reg($1); } | mem8 ; -rm16: reg16 { $$ = effaddr_new_reg($1); } +rm16: reg16 { $$ = x86_ea_new_reg($1); } | mem16 ; -rm32: reg32 { $$ = effaddr_new_reg($1); } +rm32: reg32 { $$ = x86_ea_new_reg($1); } | mem32 ; -rm64: MMXREG { $$ = effaddr_new_reg($1); } +rm64: MMXREG { $$ = x86_ea_new_reg($1); } | mem64 ; -rm128: XMMREG { $$ = effaddr_new_reg($1); } +rm128: XMMREG { $$ = x86_ea_new_reg($1); } | mem128 ; /* immediate values */ -imm: expr { $$ = immval_new_expr($1); } +imm: expr { $$ = imm_new_expr($1); } ; /* explicit immediates */ @@ -437,9 +438,18 @@ imm32: imm ; /* jump targets */ -target: expr { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); } - | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } - | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +target: expr { + $$.val = $1; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); + } + | SHORT target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + } + | NEAR target { + $$ = $2; + x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + } ; /* expression trees */ @@ -493,18 +503,36 @@ explabel: ID { $$ = symrec_use($1); xfree($1); } ; instr: instrbase - | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } - | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); } - | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } - | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } - | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } - | REPZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); } + | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } + | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } + | REG_CS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); + } + | REG_SS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); + } + | REG_DS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); + } + | REG_ES instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); + } + | REG_FS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); + } + | REG_GS instr { + $$ = $2; + x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); + } + | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } + | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } + | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } + | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } ; /* instruction grammars (dynamically generated) */ @@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val) if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32)) Error(_("`%s' is not a valid argument to [BITS]"), val); else - mode_bits = (unsigned char)lval; + x86_mode_bits = (unsigned char)lval; } else { printf("Directive: Name=`%s' Value=`%s'\n", name, val); } diff --git a/src/parsers/nasm/token.l.in b/src/parsers/nasm/token.l.in index 8c1d6834..54002180 100644 --- a/src/parsers/nasm/token.l.in +++ b/src/parsers/nasm/token.l.in @@ -33,6 +33,8 @@ RCSID("$IdPath$"); #include "bytecode.h" +#include "arch.h" + #include "bison.h" diff --git a/src/section.c b/src/section.c index 0ebccb7e..a95d185a 100644 --- a/src/section.c +++ b/src/section.c @@ -134,7 +134,7 @@ sections_parser_finalize(sectionhead *headp) section *cur; STAILQ_FOREACH(cur, headp, link) - bytecodes_parser_finalize(&cur->bc); + bcs_parser_finalize(&cur->bc); } bytecodehead * @@ -156,7 +156,7 @@ section_delete(section *sect) return; xfree(sect->name); - bytecodes_delete(§->bc); + bcs_delete(§->bc); xfree(sect); } @@ -176,5 +176,5 @@ section_print(const section *sect) } printf(" Bytecodes:\n"); - bytecodes_print(§->bc); + bcs_print(§->bc); } diff --git a/src/symrec.c b/src/symrec.c index eccb034f..020dee11 100644 --- a/src/symrec.c +++ b/src/symrec.c @@ -290,7 +290,7 @@ symrec_print(const symrec *sym) printf("[First bytecode]\n"); else { printf("[Preceding bytecode]\n"); - bytecode_print(sym->value.label.bc); + bc_print(sym->value.label.bc); } break; } diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 9c7d9920..36e4bb1d 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -26,6 +26,7 @@ LDADD = \ $(top_builddir)/src/preprocs/raw/libpreproc.a \ $(top_builddir)/src/optimizers/dbg/liboptimizer.a \ $(top_builddir)/src/objfmts/dbg/libobjfmt.a \ + $(top_builddir)/src/arch/@ARCH@/libarch.a \ $(top_builddir)/src/libyasm.a \ $(INTLLIBS) @@ -54,5 +55,5 @@ memexpr_test_SOURCES = \ memexpr_test.c INCLUDES= -I$(top_srcdir) -I$(top_srcdir)/src -I$(top_srcdir)/check \ - -I$(top_builddir)/intl + -I$(top_srcdir)/src/arch/@ARCH@ -I$(top_builddir)/intl diff --git a/src/tests/bytecode_test.c b/src/tests/bytecode_test.c index 77eadc6a..f3411512 100644 --- a/src/tests/bytecode_test.c +++ b/src/tests/bytecode_test.c @@ -18,41 +18,41 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#ifdef STDC_HEADERS -# include -#endif +#include "util.h" #include "check.h" -#include "bytecode.c" +#include "bytecode.h" +#include "bc-int.h" +#include "arch.h" +#include "x86-int.h" -START_TEST(test_effaddr_new_reg) +START_TEST(test_x86_ea_new_reg) { effaddr *ea; + x86_effaddr_data *ead; int i; /* Test with NULL */ - ea = effaddr_new_reg(1); + ea = x86_ea_new_reg(1); fail_unless(ea != NULL, "Should die if out of memory (not return NULL)"); /* Test structure values function should set */ fail_unless(ea->len == 0, "len should be 0"); - fail_unless(ea->segment == 0, "Should be no segment override"); - fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid"); - fail_unless(ea->need_modrm == 1, "Mod/RM should be needed"); - fail_unless(ea->valid_sib == 0, "SIB should be invalid"); - fail_unless(ea->need_sib == 0, "SIB should not be needed"); + ead = ea_get_data(ea); + fail_unless(ead->segment == 0, "Should be no segment override"); + fail_unless(ead->valid_modrm == 1, "Mod/RM should be valid"); + fail_unless(ead->need_modrm == 1, "Mod/RM should be needed"); + fail_unless(ead->valid_sib == 0, "SIB should be invalid"); + fail_unless(ead->need_sib == 0, "SIB should not be needed"); free(ea); /* Exhaustively test generated Mod/RM byte with register values */ for(i=0; i<8; i++) { - ea = effaddr_new_reg(i); - fail_unless(ea->modrm == (0xC0 | (i & 0x07)), + ea = x86_ea_new_reg(i); + ead = ea_get_data(ea); + fail_unless(ead->modrm == (0xC0 | (i & 0x07)), "Invalid Mod/RM byte generated"); free(ea); } @@ -66,7 +66,7 @@ bytecode_suite(void) TCase *tc_conversion = tcase_create("Conversion"); suite_add_tcase(s, tc_conversion); - tcase_add_test(tc_conversion, test_effaddr_new_reg); + tcase_add_test(tc_conversion, test_x86_ea_new_reg); return s; }