]> granicus.if.org Git - yasm/commitdiff
Split x86-specific stuff away from bytecode.
authorPeter Johnson <peter@tortall.net>
Mon, 5 Nov 2001 05:49:19 +0000 (05:49 -0000)
committerPeter Johnson <peter@tortall.net>
Mon, 5 Nov 2001 05:49:19 +0000 (05:49 -0000)
svn path=/trunk/yasm/; revision=316

49 files changed:
frontends/yasm/yasm.c
libyasm/arch.c [new file with mode: 0644]
libyasm/arch.h
libyasm/bc-int.h [new file with mode: 0644]
libyasm/bytecode.c
libyasm/bytecode.h
libyasm/linemgr.c
libyasm/linemgr.h
libyasm/section.c
libyasm/symrec.c
libyasm/tests/Makefile.am
libyasm/tests/bytecode_test.c
modules/arch/x86/Makefile.am
modules/arch/x86/arch.c
modules/arch/x86/bytecode.c [new file with mode: 0644]
modules/arch/x86/x86-int.h [new file with mode: 0644]
modules/arch/x86/x86arch.c
modules/arch/x86/x86arch.h [new file with mode: 0644]
modules/arch/x86/x86bc.c [new file with mode: 0644]
modules/parsers/nasm/bison.y.in
modules/parsers/nasm/gen_instr.pl
modules/parsers/nasm/nasm-bison.y
modules/parsers/nasm/token.l.in
src/Makefile.am
src/arch.c [new file with mode: 0644]
src/arch.h
src/arch/x86/Makefile.am
src/arch/x86/arch.c
src/arch/x86/bytecode.c [new file with mode: 0644]
src/arch/x86/x86-int.h [new file with mode: 0644]
src/arch/x86/x86arch.c
src/arch/x86/x86arch.h [new file with mode: 0644]
src/arch/x86/x86bc.c [new file with mode: 0644]
src/bc-int.h [new file with mode: 0644]
src/bytecode.c
src/bytecode.h
src/globals.c
src/globals.h
src/linemgr.c
src/linemgr.h
src/main.c
src/parsers/nasm/bison.y.in
src/parsers/nasm/gen_instr.pl
src/parsers/nasm/nasm-bison.y
src/parsers/nasm/token.l.in
src/section.c
src/symrec.c
src/tests/Makefile.am
src/tests/bytecode_test.c

index 0b4d711f9803e7446390b227b9defa020e734c95..de7a27b93c6d21e9ee738ada9164b97ebcbd7044 100644 (file)
@@ -41,6 +41,8 @@ RCSID("$IdPath$");
 #include "preproc.h"
 #include "parser.h"
 
+#include "arch.h"
+
 
 #ifndef countof
 #define countof(x,y)   (sizeof(x)/sizeof(y))
@@ -110,8 +112,11 @@ main(int argc, char *argv[])
        switch_filename("<STDIN>");
     }
 
+    /* Set x86 as the architecture */
+    cur_arch = &x86_arch;
+
     /* Get initial BITS setting from object format */
-    mode_bits = dbg_objfmt.default_mode_bits;
+    x86_mode_bits = dbg_objfmt.default_mode_bits;
 
     sections = nasm_parser.do_parse(&nasm_parser, &dbg_objfmt, in);
 
diff --git a/libyasm/arch.c b/libyasm/arch.c
new file mode 100644 (file)
index 0000000..5cbe359
--- /dev/null
@@ -0,0 +1,29 @@
+/* $IdPath$
+ * Architecture interface
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+arch *cur_arch;
+
index f1ed7268c771292648f58d051a8ddb3d9c0d7755..b614aca446123e92698d90587ea288e5de5fd908 100644 (file)
@@ -28,9 +28,24 @@ struct arch {
 
     /* keyword used to select architecture */
     const char *keyword;
+
+    struct {
+       /* Maximum used bytecode type value+1.  Should be set to
+        * BYTECODE_TYPE_BASE if no additional bytecode types are defined by
+        * the architecture.
+        */
+       const int type_max;
+
+       void (*bc_delete) (bytecode *bc);
+       void (*bc_print) (const bytecode *bc);
+       void (*bc_parser_finalize) (bytecode *bc);
+    } bc;
 };
 
 /* Available architectures */
+#include "arch/x86/x86arch.h"
 extern arch x86_arch;
 
+extern arch *cur_arch;
+
 #endif
diff --git a/libyasm/bc-int.h b/libyasm/bc-int.h
new file mode 100644 (file)
index 0000000..f992c46
--- /dev/null
@@ -0,0 +1,71 @@
+/* $IdPath$
+ * Bytecode internal structures header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_BC_INT_H
+#define YASM_BC_INT_H
+
+struct effaddr {
+    expr *disp;                        /* address displacement */
+    unsigned char len;         /* length of disp (in bytes), 0 if unknown,
+                                * 0xff if unknown and required to be >0.
+                                */
+    unsigned char nosplit;     /* 1 if reg*2 should not be split into
+                                  reg+reg. (0 if not) */
+
+    /* architecture-dependent data may be appended */
+};
+#define ea_get_data(x)         (void *)(((char *)x)+sizeof(effaddr))
+#define ea_get_const_data(x)   (const void *)(((const char *)x)+sizeof(effaddr))
+
+struct immval {
+    expr *val;
+
+    unsigned char len;         /* length of val (in bytes), 0 if unknown */
+    unsigned char isneg;       /* the value has been explicitly negated */
+
+    unsigned char f_len;       /* final imm length */
+    unsigned char f_sign;      /* 1 if final imm should be signed */
+};
+
+struct bytecode {
+    STAILQ_ENTRY(bytecode) link;
+
+    bytecode_type type;
+
+    expr *multiple;            /* number of times bytecode is repeated,
+                                  NULL=1 */
+
+    unsigned long len;         /* total length of entire bytecode (including
+                                  multiple copies), 0 if unknown */
+
+    /* where it came from */
+    const char *filename;
+    unsigned int lineno;
+
+    /* other assembler state info */
+    unsigned long offset;      /* 0 if unknown */
+
+    /* architecture-dependent data may be appended */
+};
+#define bc_get_data(x)         (void *)(((char *)x)+sizeof(bytecode))
+#define bc_get_const_data(x)   (const void *)(((const char *)x)+sizeof(bytecode))
+
+#endif
index 407f01eccf51b19d3f92370cac0214f903b73615..6dae585dd00fed11469795639b3b0e18c19e41fe 100644 (file)
@@ -29,40 +29,10 @@ RCSID("$IdPath$");
 
 #include "bytecode.h"
 
+#include "arch.h"
 
-struct effaddr {
-    expr *disp;                        /* address displacement */
-    unsigned char len;         /* length of disp (in bytes), 0 if unknown,
-                                * 0xff if unknown and required to be >0.
-                                */
+#include "bc-int.h"
 
-    unsigned char segment;     /* segment override, 0 if none */
-
-    /* How the spare (register) bits in Mod/RM are handled:
-     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
-     * They're set in bytecode_new_insn().
-     */
-    unsigned char modrm;
-    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
-    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
-
-    unsigned char sib;
-    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
-    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
-                                  0xff if unknown */
-    unsigned char nosplit;     /* 1 if reg*2 should not be split into
-                                  reg+reg. (0 if not) */
-};
-
-struct immval {
-    expr *val;
-
-    unsigned char len;         /* length of val (in bytes), 0 if unknown */
-    unsigned char isneg;       /* the value has been explicitly negated */
-
-    unsigned char f_len;       /* final imm length */
-    unsigned char f_sign;      /* 1 if final imm should be signed */
-};
 
 struct dataval {
     STAILQ_ENTRY(dataval) link;
@@ -75,158 +45,25 @@ struct dataval {
     } data;
 };
 
-struct bytecode {
-    STAILQ_ENTRY(bytecode) link;
-
-    enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
-
-    /* This union has been somewhat tweaked to get it as small as possible
-     * on the 4-byte-aligned x86 architecture (without resorting to
-     * bitfields).  In particular, insn and jmprel are the largest structures
-     * in the union, and are also the same size (after padding).  jmprel
-     * can have another unsigned char added to the end without affecting
-     * its size.
-     *
-     * Don't worry about this too much, but keep it in mind when changing
-     * this structure.  We care about the size of bytecode in particular
-     * because it accounts for the majority of the memory usage in the
-     * assembler when assembling a large file.
-     */
-    union {
-       struct {
-           effaddr *ea;        /* effective address */
-
-           immval *imm;        /* immediate or relative value */
-
-           unsigned char opcode[3];    /* opcode */
-           unsigned char opcode_len;
-
-           unsigned char addrsize;     /* 0 or =mode_bits => no override */
-           unsigned char opersize;     /* 0 indicates no override */
-           unsigned char lockrep_pre;  /* 0 indicates no prefix */
-
-           /* HACK, but a space-saving one: shift opcodes have an immediate
-            * form and a ,1 form (with no immediate).  In the parser, we
-            * set this and opcode_len=1, but store the ,1 version in the
-            * second byte of the opcode array.  We then choose between the
-            * two versions once we know the actual value of imm (because we
-            * don't know it in the parser module).
-            *
-            * A override to force the imm version should just leave this at
-            * 0.  Then later code won't know the ,1 version even exists.
-            * TODO: Figure out how this affects CPU flags processing.
-            *
-            * Call SetInsnShiftFlag() to set this flag to 1.
-            */
-           unsigned char shift_op;
-       } insn;
-       struct {
-           expr *target;               /* target location */
-
-           struct {
-               unsigned char opcode[3];
-               unsigned char opcode_len;   /* 0 = no opc for this version */
-           } shortop, nearop;
-
-           /* which opcode are we using? */
-           /* The *FORCED forms are specified in the source as such */
-           jmprel_opcode_sel op_sel;
-
-           unsigned char addrsize;     /* 0 or =mode_bits => no override */
-           unsigned char opersize;     /* 0 indicates no override */
-           unsigned char lockrep_pre;  /* 0 indicates no prefix */
-       } jmprel;
-       struct {
-           /* non-converted data (linked list) */
-           datavalhead datahead;
-
-           /* final (converted) size of each element (in bytes) */
-           unsigned char size;
-       } data;
-       struct {
-           expr *numitems;             /* number of items to reserve */
-           unsigned char itemsize;     /* size of each item (in bytes) */
-       } reserve;
-    } data;
+typedef struct bytecode_data {
+    /* non-converted data (linked list) */
+    datavalhead datahead;
 
-    expr *multiple;            /* number of times bytecode is repeated,
-                                  NULL=1 */
+    /* final (converted) size of each element (in bytes) */
+    unsigned char size;
+} bytecode_data;
 
-    unsigned long len;         /* total length of entire bytecode (including
-                                  multiple copies), 0 if unknown */
-
-    /* where it came from */
-    const char *filename;
-    unsigned int lineno;
-
-    /* other assembler state info */
-    unsigned long offset;      /* 0 if unknown */
-    unsigned char mode_bits;
-};
+typedef struct bytecode_reserve {
+    expr *numitems;            /* number of items to reserve */
+    unsigned char itemsize;    /* size of each item (in bytes) */
+} bytecode_reserve;
 
 /* Static structures for when NULL is passed to conversion functions. */
 /*  for Convert*ToBytes() */
 unsigned char bytes_static[16];
 
-static bytecode *bytecode_new_common(void);
-
-effaddr *
-effaddr_new_reg(unsigned long reg)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = (expr *)NULL;
-    ea->len = 0;
-    ea->segment = 0;
-    ea->modrm = 0xC0 | (reg & 0x07);   /* Mod=11, R/M=Reg, Reg=0 */
-    ea->valid_modrm = 1;
-    ea->need_modrm = 1;
-    ea->valid_sib = 0;
-    ea->need_sib = 0;
-    ea->nosplit = 0;
-
-    return ea;
-}
-
-effaddr *
-effaddr_new_expr(expr *expr_ptr)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = expr_ptr;
-    ea->len = 0;
-    ea->segment = 0;
-    ea->modrm = 0;
-    ea->valid_modrm = 0;
-    ea->need_modrm = 1;
-    ea->valid_sib = 0;
-    ea->need_sib = 0xff;    /* we won't know until we know more about expr and
-                              the BITS/address override setting */
-    ea->nosplit = 0;
-
-    return ea;
-}
-
-effaddr *
-effaddr_new_imm(immval *im_ptr, unsigned char im_len)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = im_ptr->val;
-    ea->len = im_len;
-    ea->segment = 0;
-    ea->modrm = 0;
-    ea->valid_modrm = 0;
-    ea->need_modrm = 0;
-    ea->valid_sib = 0;
-    ea->need_sib = 0;
-    ea->nosplit = 0;
-
-    return ea;
-}
-
 immval *
-immval_new_int(unsigned long int_val)
+imm_new_int(unsigned long int_val)
 {
     immval *im = xmalloc(sizeof(immval));
 
@@ -245,7 +82,7 @@ immval_new_int(unsigned long int_val)
 }
 
 immval *
-immval_new_expr(expr *expr_ptr)
+imm_new_expr(expr *expr_ptr)
 {
     immval *im = xmalloc(sizeof(immval));
 
@@ -257,19 +94,7 @@ immval_new_expr(expr *expr_ptr)
 }
 
 void
-SetEASegment(effaddr *ptr, unsigned char segment)
-{
-    if (!ptr)
-       return;
-
-    if (segment != 0 && ptr->segment != 0)
-       Warning(_("multiple segment overrides, using leftmost"));
-
-    ptr->segment = segment;
-}
-
-void
-SetEALen(effaddr *ptr, unsigned char len)
+ea_set_len(effaddr *ptr, unsigned char len)
 {
     if (!ptr)
        return;
@@ -282,7 +107,7 @@ SetEALen(effaddr *ptr, unsigned char len)
 }
 
 void
-SetEANosplit(effaddr *ptr, unsigned char nosplit)
+ea_set_nosplit(effaddr *ptr, unsigned char nosplit)
 {
     if (!ptr)
        return;
@@ -290,108 +115,8 @@ SetEANosplit(effaddr *ptr, unsigned char nosplit)
     ptr->nosplit = nosplit;
 }
 
-effaddr *
-GetInsnEA(bytecode *bc)
-{
-    if (!bc)
-       return NULL;
-
-    if (bc->type != BC_INSN)
-       InternalError(_("Trying to get EA of non-instruction"));
-
-    return bc->data.insn.ea;
-}
-
 void
-SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize)
-{
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           bc->data.insn.opersize = opersize;
-           break;
-       case BC_JMPREL:
-           bc->data.jmprel.opersize = opersize;
-           break;
-       default:
-           InternalError(_("OperSize override applied to non-instruction"));
-           return;
-    }
-}
-
-void
-SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize)
-{
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           bc->data.insn.addrsize = addrsize;
-           break;
-       case BC_JMPREL:
-           bc->data.jmprel.addrsize = addrsize;
-           break;
-       default:
-           InternalError(_("AddrSize override applied to non-instruction"));
-           return;
-    }
-}
-
-void
-SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix)
-{
-    unsigned char *lockrep_pre = (unsigned char *)NULL;
-
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           lockrep_pre = &bc->data.insn.lockrep_pre;
-           break;
-       case BC_JMPREL:
-           lockrep_pre = &bc->data.jmprel.lockrep_pre;
-           break;
-       default:
-           InternalError(_("LockRep prefix applied to non-instruction"));
-           return;
-    }
-
-    if (*lockrep_pre != 0)
-       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
-
-    *lockrep_pre = prefix;
-}
-
-void
-SetInsnShiftFlag(bytecode *bc)
-{
-    if (!bc)
-       return;
-
-    if (bc->type != BC_INSN)
-       InternalError(_("Attempted to set shift flag on non-instruction"));
-
-    bc->data.insn.shift_op = 1;
-}
-
-void
-SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel)
-{
-    if (!old_sel)
-       return;
-
-    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
-                              (*old_sel == JR_NEAR_FORCED)))
-       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
-    *old_sel = new_sel;
-}
-
-void
-SetBCMultiple(bytecode *bc, expr *e)
+bc_set_multiple(bytecode *bc, expr *e)
 {
     if (bc->multiple)
        bc->multiple = expr_new_tree(bc->multiple, EXPR_MUL, e);
@@ -399,10 +124,12 @@ SetBCMultiple(bytecode *bc, expr *e)
        bc->multiple = e;
 }
 
-static bytecode *
-bytecode_new_common(void)
+bytecode *
+bc_new_common(bytecode_type type, size_t datasize)
 {
-    bytecode *bc = xmalloc(sizeof(bytecode));
+    bytecode *bc = xmalloc(sizeof(bytecode)+datasize);
+
+    bc->type = type;
 
     bc->multiple = (expr *)NULL;
     bc->len = 0;
@@ -411,146 +138,59 @@ bytecode_new_common(void)
     bc->lineno = line_number;
 
     bc->offset = 0;
-    bc->mode_bits = mode_bits;
 
     return bc;
 }
 
 bytecode *
-bytecode_new_insn(unsigned char  opersize,
-                 unsigned char  opcode_len,
-                 unsigned char  op0,
-                 unsigned char  op1,
-                 unsigned char  op2,
-                 effaddr       *ea_ptr,
-                 unsigned char  spare,
-                 immval        *im_ptr,
-                 unsigned char  im_len,
-                 unsigned char  im_sign)
+bc_new_data(datavalhead *datahead, unsigned long size)
 {
-    bytecode *bc = bytecode_new_common();
+    bytecode *bc = bc_new_common(BC_DATA, sizeof(bytecode_data));
+    bytecode_data *data = bc_get_data(bc);
 
-    bc->type = BC_INSN;
-
-    bc->data.insn.ea = ea_ptr;
-    if (ea_ptr) {
-       bc->data.insn.ea->modrm &= 0xC7;        /* zero spare/reg bits */
-       bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */
-    }
-
-    bc->data.insn.imm = im_ptr;
-    if (im_ptr) {
-       bc->data.insn.imm->f_sign = im_sign;
-       bc->data.insn.imm->f_len = im_len;
-    }
-
-    bc->data.insn.opcode[0] = op0;
-    bc->data.insn.opcode[1] = op1;
-    bc->data.insn.opcode[2] = op2;
-    bc->data.insn.opcode_len = opcode_len;
-
-    bc->data.insn.addrsize = 0;
-    bc->data.insn.opersize = opersize;
-    bc->data.insn.lockrep_pre = 0;
-    bc->data.insn.shift_op = 0;
-
-    return bc;
-}
-
-bytecode *
-bytecode_new_jmprel(targetval     *target,
-                   unsigned char  short_opcode_len,
-                   unsigned char  short_op0,
-                   unsigned char  short_op1,
-                   unsigned char  short_op2,
-                   unsigned char  near_opcode_len,
-                   unsigned char  near_op0,
-                   unsigned char  near_op1,
-                   unsigned char  near_op2,
-                   unsigned char  addrsize)
-{
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_JMPREL;
-
-    bc->data.jmprel.target = target->val;
-    bc->data.jmprel.op_sel = target->op_sel;
-
-    if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0))
-       Error(_("no SHORT form of that jump instruction exists"));
-    if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0))
-       Error(_("no NEAR form of that jump instruction exists"));
-
-    bc->data.jmprel.shortop.opcode[0] = short_op0;
-    bc->data.jmprel.shortop.opcode[1] = short_op1;
-    bc->data.jmprel.shortop.opcode[2] = short_op2;
-    bc->data.jmprel.shortop.opcode_len = short_opcode_len;
-
-    bc->data.jmprel.nearop.opcode[0] = near_op0;
-    bc->data.jmprel.nearop.opcode[1] = near_op1;
-    bc->data.jmprel.nearop.opcode[2] = near_op2;
-    bc->data.jmprel.nearop.opcode_len = near_opcode_len;
-
-    bc->data.jmprel.addrsize = addrsize;
-    bc->data.jmprel.opersize = 0;
-    bc->data.jmprel.lockrep_pre = 0;
+    data->datahead = *datahead;
+    data->size = size;
 
     return bc;
 }
 
 bytecode *
-bytecode_new_data(datavalhead *datahead, unsigned long size)
+bc_new_reserve(expr *numitems, unsigned long itemsize)
 {
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_DATA;
+    bytecode *bc = bc_new_common(BC_RESERVE, sizeof(bytecode_reserve));
+    bytecode_reserve *reserve = bc_get_data(bc);
 
-    bc->data.data.datahead = *datahead;
-    bc->data.data.size = size;
-
-    return bc;
-}
-
-bytecode *
-bytecode_new_reserve(expr *numitems, unsigned long itemsize)
-{
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_RESERVE;
-
-    bc->data.reserve.numitems = numitems;
-    bc->data.reserve.itemsize = itemsize;
+    reserve->numitems = numitems;
+    reserve->itemsize = itemsize;
 
     return bc;
 }
 
 void
-bytecode_delete(bytecode *bc)
+bc_delete(bytecode *bc)
 {
+    bytecode_data *data;
+    bytecode_reserve *reserve;
+
     if (!bc)
        return;
 
     switch (bc->type) {
        case BC_EMPTY:
            break;
-       case BC_INSN:
-           if (bc->data.insn.ea) {
-               expr_delete(bc->data.insn.ea->disp);
-               xfree(bc->data.insn.ea);
-           }
-           if (bc->data.insn.imm) {
-               expr_delete(bc->data.insn.imm->val);
-               xfree(bc->data.insn.imm);
-           }
-           break;
-       case BC_JMPREL:
-           expr_delete(bc->data.jmprel.target);
-           break;
        case BC_DATA:
-           datavals_delete(&bc->data.data.datahead);
+           data = bc_get_data(bc);
+           dvs_delete(&data->datahead);
            break;
        case BC_RESERVE:
-           expr_delete(bc->data.reserve.numitems);
+           reserve = bc_get_data(bc);
+           expr_delete(reserve->numitems);
+           break;
+       default:
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_delete(bc);
+           else
+               InternalError(_("Unknown bytecode type"));
            break;
     }
 
@@ -559,131 +199,43 @@ bytecode_delete(bytecode *bc)
 }
 
 int
-bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val)
+bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val)
 {
     return 0;  /* TODO */
 }
 
 void
-bytecode_print(const bytecode *bc)
+bc_print(const bytecode *bc)
 {
+    const bytecode_data *data;
+    const bytecode_reserve *reserve;
+
     switch (bc->type) {
        case BC_EMPTY:
            printf("_Empty_\n");
            break;
-       case BC_INSN:
-           printf("_Instruction_\n");
-           printf("Effective Address:");
-           if (!bc->data.insn.ea)
-               printf(" (nil)\n");
-           else {
-               printf("\n Disp=");
-               if (bc->data.insn.ea->disp)
-                   expr_print(bc->data.insn.ea->disp);
-               else
-                   printf("(nil)");
-               printf("\n");
-               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
-                      (unsigned int)bc->data.insn.ea->len,
-                      (unsigned int)bc->data.insn.ea->segment,
-                      (unsigned int)bc->data.insn.ea->nosplit);
-               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
-                      (unsigned int)bc->data.insn.ea->modrm,
-                      (unsigned int)bc->data.insn.ea->valid_modrm,
-                      (unsigned int)bc->data.insn.ea->need_modrm);
-               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
-                      (unsigned int)bc->data.insn.ea->sib,
-                      (unsigned int)bc->data.insn.ea->valid_sib,
-                      (unsigned int)bc->data.insn.ea->need_sib);
-           }
-           printf("Immediate Value:");
-           if (!bc->data.insn.imm)
-               printf(" (nil)\n");
-           else {
-               printf("\n Val=");
-               expr_print(bc->data.insn.imm->val);
-               printf("\n");
-               printf(" Len=%u, IsNeg=%u\n",
-                      (unsigned int)bc->data.insn.imm->len,
-                      (unsigned int)bc->data.insn.imm->isneg);
-               printf(" FLen=%u, FSign=%u\n",
-                      (unsigned int)bc->data.insn.imm->f_len,
-                      (unsigned int)bc->data.insn.imm->f_sign);
-           }
-           printf("Opcode: %02x %02x %02x OpLen=%u\n",
-                  (unsigned int)bc->data.insn.opcode[0],
-                  (unsigned int)bc->data.insn.opcode[1],
-                  (unsigned int)bc->data.insn.opcode[2],
-                  (unsigned int)bc->data.insn.opcode_len);
-           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
-                  (unsigned int)bc->data.insn.addrsize,
-                  (unsigned int)bc->data.insn.opersize,
-                  (unsigned int)bc->data.insn.lockrep_pre,
-                  (unsigned int)bc->data.insn.shift_op);
-           break;
-       case BC_JMPREL:
-           printf("_Relative Jump_\n");
-           printf("Target=");
-           expr_print(bc->data.jmprel.target);
-           printf("\nShort Form:\n");
-           if (!bc->data.jmprel.shortop.opcode_len == 0)
-               printf(" None\n");
-           else
-               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
-                      (unsigned int)bc->data.jmprel.shortop.opcode[0],
-                      (unsigned int)bc->data.jmprel.shortop.opcode[1],
-                      (unsigned int)bc->data.jmprel.shortop.opcode[2],
-                      (unsigned int)bc->data.jmprel.shortop.opcode_len);
-           if (!bc->data.jmprel.nearop.opcode_len == 0)
-               printf(" None\n");
-           else
-               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
-                      (unsigned int)bc->data.jmprel.nearop.opcode[0],
-                      (unsigned int)bc->data.jmprel.nearop.opcode[1],
-                      (unsigned int)bc->data.jmprel.nearop.opcode[2],
-                      (unsigned int)bc->data.jmprel.nearop.opcode_len);
-           printf("OpSel=");
-           switch (bc->data.jmprel.op_sel) {
-               case JR_NONE:
-                   printf("None");
-                   break;
-               case JR_SHORT:
-                   printf("Short");
-                   break;
-               case JR_NEAR:
-                   printf("Near");
-                   break;
-               case JR_SHORT_FORCED:
-                   printf("Forced Short");
-                   break;
-               case JR_NEAR_FORCED:
-                   printf("Forced Near");
-                   break;
-               default:
-                   printf("UNKNOWN!!");
-                   break;
-           }
-           printf("\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
-                  (unsigned int)bc->data.jmprel.addrsize,
-                  (unsigned int)bc->data.jmprel.opersize,
-                  (unsigned int)bc->data.jmprel.lockrep_pre);
-           break;
        case BC_DATA:
+           data = bc_get_const_data(bc);
            printf("_Data_\n");
            printf("Final Element Size=%u\n",
-                  (unsigned int)bc->data.data.size);
+                  (unsigned int)data->size);
            printf("Elements:\n");
-           datavals_print(&bc->data.data.datahead);
+           dvs_print(&data->datahead);
            break;
        case BC_RESERVE:
+           reserve = bc_get_const_data(bc);
            printf("_Reserve_\n");
            printf("Num Items=");
-           expr_print(bc->data.reserve.numitems);
+           expr_print(reserve->numitems);
            printf("\nItem Size=%u\n",
-                  (unsigned int)bc->data.reserve.itemsize);
+                  (unsigned int)reserve->itemsize);
            break;
        default:
-           printf("_Unknown_\n");
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_print(bc);
+           else
+               printf("_Unknown_\n");
+           break;
     }
     printf("Multiple=");
     if (!bc->multiple)
@@ -694,95 +246,42 @@ bytecode_print(const bytecode *bc)
     printf("Length=%lu\n", bc->len);
     printf("Filename=\"%s\" Line Number=%u\n",
           bc->filename ? bc->filename : "<UNKNOWN>", bc->lineno);
-    printf("Offset=%lx BITS=%u\n", bc->offset, bc->mode_bits);
-}
-
-static void
-bytecode_parser_finalize_insn(bytecode *bc)
-{
-    effaddr *ea = bc->data.insn.ea;
-    immval *imm = bc->data.insn.imm;
-
-    if (ea) {
-       if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) ||
-                          (!ea->valid_modrm && ea->need_modrm))) {
-           /* First expand equ's */
-           expr_expand_equ(ea->disp);
-
-           /* Check validity of effective address and calc R/M bits of
-            * Mod/RM byte and SIB byte.  We won't know the Mod field
-            * of the Mod/RM byte until we know more about the
-            * displacement.
-            */
-           if (!expr_checkea(&ea->disp, &bc->data.insn.addrsize,
-                             bc->mode_bits, ea->nosplit, &ea->len, &ea->modrm,
-                             &ea->valid_modrm, &ea->need_modrm, &ea->sib,
-                             &ea->valid_sib, &ea->need_sib))
-               return;     /* failed, don't bother checking rest of insn */
-       }
-    }
-
-    if (imm) {
-       const intnum *num;
-
-       if (imm->val) {
-           expr_expand_equ(imm->val);
-           expr_simplify(imm->val);
-       }
-       /* TODO: check imm f_len vs. len? */
-
-       /* Handle shift_op special-casing */
-       if (bc->data.insn.shift_op && (num = expr_get_intnum(&imm->val))) {
-           if (intnum_get_uint(num) == 1) {
-               /* Use ,1 form: first copy ,1 opcode. */
-               bc->data.insn.opcode[0] = bc->data.insn.opcode[1];
-               /* Delete ModRM, as it's no longer needed */
-               xfree(ea);
-               bc->data.insn.ea = (effaddr *)NULL;
-               /* Delete Imm, as it's not needed */
-               expr_delete(imm->val);
-               xfree(imm);
-               bc->data.insn.imm = (immval *)NULL;
-           }
-           bc->data.insn.shift_op = 0;
-       }
-    }
-
-    
+    printf("Offset=%lx\n", bc->offset);
 }
 
 void
-bytecode_parser_finalize(bytecode *bc)
+bc_parser_finalize(bytecode *bc)
 {
     switch (bc->type) {
        case BC_EMPTY:
            /* FIXME: delete it (probably in bytecodes_ level, not here */
            InternalError(_("got empty bytecode in parser_finalize"));
            break;
-       case BC_INSN:
-           bytecode_parser_finalize_insn(bc);
-           break;
        default:
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_parser_finalize(bc);
+           else
+               InternalError(_("Unknown bytecode type"));
            break;
     }
 }
 
 void
-bytecodes_delete(bytecodehead *headp)
+bcs_delete(bytecodehead *headp)
 {
     bytecode *cur, *next;
 
     cur = STAILQ_FIRST(headp);
     while (cur) {
        next = STAILQ_NEXT(cur, link);
-       bytecode_delete(cur);
+       bc_delete(cur);
        cur = next;
     }
     STAILQ_INIT(headp);
 }
 
 bytecode *
-bytecodes_append(bytecodehead *headp, bytecode *bc)
+bcs_append(bytecodehead *headp, bytecode *bc)
 {
     if (bc) {
        if (bc->type != BC_EMPTY) {
@@ -796,27 +295,27 @@ bytecodes_append(bytecodehead *headp, bytecode *bc)
 }
 
 void
-bytecodes_print(const bytecodehead *headp)
+bcs_print(const bytecodehead *headp)
 {
     bytecode *cur;
 
     STAILQ_FOREACH(cur, headp, link) {
        printf("---Next Bytecode---\n");
-       bytecode_print(cur);
+       bc_print(cur);
     }
 }
 
 void
-bytecodes_parser_finalize(bytecodehead *headp)
+bcs_parser_finalize(bytecodehead *headp)
 {
     bytecode *cur;
 
     STAILQ_FOREACH(cur, headp, link)
-       bytecode_parser_finalize(cur);
+       bc_parser_finalize(cur);
 }
 
 dataval *
-dataval_new_expr(expr *expn)
+dv_new_expr(expr *expn)
 {
     dataval *retval = xmalloc(sizeof(dataval));
 
@@ -827,7 +326,7 @@ dataval_new_expr(expr *expn)
 }
 
 dataval *
-dataval_new_string(char *str_val)
+dv_new_string(char *str_val)
 {
     dataval *retval = xmalloc(sizeof(dataval));
 
@@ -838,7 +337,7 @@ dataval_new_string(char *str_val)
 }
 
 void
-datavals_delete(datavalhead *headp)
+dvs_delete(datavalhead *headp)
 {
     dataval *cur, *next;
 
@@ -854,7 +353,7 @@ datavals_delete(datavalhead *headp)
 }
 
 dataval *
-datavals_append(datavalhead *headp, dataval *dv)
+dvs_append(datavalhead *headp, dataval *dv)
 {
     if (dv) {
        STAILQ_INSERT_TAIL(headp, dv, link);
@@ -864,7 +363,7 @@ datavals_append(datavalhead *headp, dataval *dv)
 }
 
 void
-datavals_print(const datavalhead *head)
+dvs_print(const datavalhead *head)
 {
     dataval *cur;
 
index 3683bae9eae169385378818edef3e721867f8d24..38fa31138628753505e4064cfef0f622c514cd62 100644 (file)
@@ -27,87 +27,43 @@ typedef struct immval immval;
 typedef STAILQ_HEAD(datavalhead, dataval) datavalhead;
 typedef struct dataval dataval;
 
+/* Additional types may be architecture-defined starting at
+ * BYTECODE_TYPE_BASE.
+ */
 typedef enum {
-    JR_NONE,
-    JR_SHORT,
-    JR_NEAR,
-    JR_SHORT_FORCED,
-    JR_NEAR_FORCED
-} jmprel_opcode_sel;
-
-typedef struct targetval {
-    expr *val;
-
-    jmprel_opcode_sel op_sel;
-} targetval;
-
-effaddr *effaddr_new_reg(unsigned long reg);
-effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len);
-effaddr *effaddr_new_expr(expr *expr_ptr);
+    BC_EMPTY = 0,
+    BC_DATA,
+    BC_RESERVE
+} bytecode_type;
+#define BYTECODE_TYPE_BASE  BC_RESERVE+1
 
-immval *immval_new_int(unsigned long int_val);
-immval *immval_new_expr(expr *expr_ptr);
+immval *imm_new_int(unsigned long int_val);
+immval *imm_new_expr(expr *e);
 
-void SetEASegment(effaddr *ptr, unsigned char segment);
-void SetEALen(effaddr *ptr, unsigned char len);
-void SetEANosplit(effaddr *ptr, unsigned char nosplit);
+void ea_set_len(effaddr *ea, unsigned char len);
+void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
 
-effaddr *GetInsnEA(bytecode *bc);
+void bc_set_multiple(bytecode *bc, expr *e);
 
-void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize);
-void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize);
-void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix);
-void SetInsnShiftFlag(bytecode *bc);
+bytecode *bc_new_common(bytecode_type type, size_t datasize);
+bytecode *bc_new_data(datavalhead *datahead, unsigned long size);
+bytecode *bc_new_reserve(expr *numitems, unsigned long itemsize);
 
-void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel);
-
-void SetBCMultiple(bytecode *bc, expr *e);
-
-/* IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling this
- * function (it doesn't make a copy).
- */
-bytecode *bytecode_new_insn(unsigned char  opersize,
-                           unsigned char  opcode_len,
-                           unsigned char  op0,
-                           unsigned char  op1,
-                           unsigned char  op2,
-                           effaddr       *ea_ptr,
-                           unsigned char  spare,
-                           immval        *im_ptr,
-                           unsigned char  im_len,
-                           unsigned char  im_sign);
-
-/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */
-bytecode *bytecode_new_jmprel(targetval     *target,
-                             unsigned char  short_opcode_len,
-                             unsigned char  short_op0,
-                             unsigned char  short_op1,
-                             unsigned char  short_op2,
-                             unsigned char  near_opcode_len,
-                             unsigned char  near_op0,
-                             unsigned char  near_op1,
-                             unsigned char  near_op2,
-                             unsigned char  addrsize);
-
-bytecode *bytecode_new_data(datavalhead *datahead, unsigned long size);
-
-bytecode *bytecode_new_reserve(expr *numitems, unsigned long itemsize);
-
-void bytecode_delete(bytecode *bc);
+void bc_delete(bytecode *bc);
 
 /* Gets the offset of the bytecode specified by bc if possible.
  * Return value is IF POSSIBLE, not the value.
  */
-int bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val);
+int bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val);
 
-void bytecode_print(const bytecode *bc);
+void bc_print(const bytecode *bc);
 
-void bytecode_parser_finalize(bytecode *bc);
+void bc_parser_finalize(bytecode *bc);
 
 /* void bytecodes_initialize(bytecodehead *headp); */
 #define        bytecodes_initialize(headp)     STAILQ_INIT(headp)
 
-void bytecodes_delete(bytecodehead *headp);
+void bcs_delete(bytecodehead *headp);
 
 /* Adds bc to the list of bytecodes headp.
  * NOTE: Does not make a copy of bc; so don't pass this function
@@ -115,20 +71,20 @@ void bytecodes_delete(bytecodehead *headp);
  * this function.  If bc was actually appended (it wasn't NULL or empty),
  * then returns bc, otherwise returns NULL.
  */
-bytecode *bytecodes_append(bytecodehead *headp, bytecode *bc);
+bytecode *bcs_append(bytecodehead *headp, bytecode *bc);
 
-void bytecodes_print(const bytecodehead *headp);
+void bcs_print(const bytecodehead *headp);
 
-void bytecodes_parser_finalize(bytecodehead *headp);
+void bcs_parser_finalize(bytecodehead *headp);
 
-dataval *dataval_new_expr(expr *expn);
-dataval *dataval_new_float(floatnum *flt);
-dataval *dataval_new_string(char *str_val);
+dataval *dv_new_expr(expr *expn);
+dataval *dv_new_float(floatnum *flt);
+dataval *dv_new_string(char *str_val);
 
-/* void datavals_initialize(datavalhead *headp); */
-#define        datavals_initialize(headp)      STAILQ_INIT(headp)
+/* void dvs_initialize(datavalhead *headp); */
+#define        dvs_initialize(headp)   STAILQ_INIT(headp)
 
-void datavals_delete(datavalhead *headp);
+void dvs_delete(datavalhead *headp);
 
 /* Adds dv to the list of datavals headp.
  * NOTE: Does not make a copy of dv; so don't pass this function
@@ -136,8 +92,8 @@ void datavals_delete(datavalhead *headp);
  * this function.  If dv was actually appended (it wasn't NULL), then
  * returns dv, otherwise returns NULL.
  */
-dataval *datavals_append(datavalhead *headp, dataval *dv);
+dataval *dvs_append(datavalhead *headp, dataval *dv);
 
-void datavals_print(const datavalhead *head);
+void dvs_print(const datavalhead *head);
 
 #endif
index c21436a7176c6bf8ada78d0d2dc166cb73fedae7..2239872f5ed6f91886adeeb90151d632988af379 100644 (file)
@@ -29,7 +29,6 @@ RCSID("$IdPath$");
 
 const char *in_filename = (const char *)NULL;
 unsigned int line_number = 1;
-unsigned char mode_bits = 0;
 unsigned int asm_options = 0;
 
 static ternary_tree filename_table = (ternary_tree)NULL;
index 23376e131012b5b4151751a393b36daa0aa357c0..d0457793a6e4aa38c72f3eee9ebd9d2092ec493d 100644 (file)
@@ -24,7 +24,6 @@
 
 extern const char *in_filename;
 extern unsigned int line_number;
-extern unsigned char mode_bits;
 extern unsigned int asm_options;
 
 void switch_filename(const char *filename);
index 0ebccb7e67d69f886d97105fe5d544b0572b494a..a95d185a1acbb954eb5ef33deedb6f4512733d1a 100644 (file)
@@ -134,7 +134,7 @@ sections_parser_finalize(sectionhead *headp)
     section *cur;
     
     STAILQ_FOREACH(cur, headp, link)
-       bytecodes_parser_finalize(&cur->bc);
+       bcs_parser_finalize(&cur->bc);
 }
 
 bytecodehead *
@@ -156,7 +156,7 @@ section_delete(section *sect)
        return;
 
     xfree(sect->name);
-    bytecodes_delete(&sect->bc);
+    bcs_delete(&sect->bc);
     xfree(sect);
 }
 
@@ -176,5 +176,5 @@ section_print(const section *sect)
     }
 
     printf(" Bytecodes:\n");
-    bytecodes_print(&sect->bc);
+    bcs_print(&sect->bc);
 }
index eccb034f7e66a7e0cf1b9541a082de826b0b3d98..020dee11559b4976057f7ae50c54b953fa602e7b 100644 (file)
@@ -290,7 +290,7 @@ symrec_print(const symrec *sym)
                printf("[First bytecode]\n");
            else {
                printf("[Preceding bytecode]\n");
-               bytecode_print(sym->value.label.bc);
+               bc_print(sym->value.label.bc);
            }
            break;
     }
index 9c7d99200da54de01e6c42ca009190c3dd24aeb1..36e4bb1d0d521b63ddc446418453f7a4cb0f566a 100644 (file)
@@ -26,6 +26,7 @@ LDADD = \
        $(top_builddir)/src/preprocs/raw/libpreproc.a           \
        $(top_builddir)/src/optimizers/dbg/liboptimizer.a       \
        $(top_builddir)/src/objfmts/dbg/libobjfmt.a             \
+       $(top_builddir)/src/arch/@ARCH@/libarch.a               \
        $(top_builddir)/src/libyasm.a                           \
        $(INTLLIBS)
 
@@ -54,5 +55,5 @@ memexpr_test_SOURCES = \
        memexpr_test.c
 
 INCLUDES= -I$(top_srcdir) -I$(top_srcdir)/src -I$(top_srcdir)/check \
-       -I$(top_builddir)/intl
+       -I$(top_srcdir)/src/arch/@ARCH@ -I$(top_builddir)/intl
 
index 77eadc6ad9c9302dc427994a5b85c06439aee684..f34115126ddb074fdf88366bc99aa1c8c612c3bf 100644 (file)
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#ifdef STDC_HEADERS
-# include <stdlib.h>
-#endif
+#include "util.h"
 
 #include "check.h"
 
-#include "bytecode.c"
+#include "bytecode.h"
+#include "bc-int.h"
+#include "arch.h"
+#include "x86-int.h"
 
-START_TEST(test_effaddr_new_reg)
+START_TEST(test_x86_ea_new_reg)
 {
     effaddr *ea;
+    x86_effaddr_data *ead;
     int i;
 
     /* Test with NULL */
-    ea = effaddr_new_reg(1);
+    ea = x86_ea_new_reg(1);
     fail_unless(ea != NULL, "Should die if out of memory (not return NULL)");
 
     /* Test structure values function should set */
     fail_unless(ea->len == 0, "len should be 0");
-    fail_unless(ea->segment == 0, "Should be no segment override");
-    fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid");
-    fail_unless(ea->need_modrm == 1, "Mod/RM should be needed");
-    fail_unless(ea->valid_sib == 0, "SIB should be invalid");
-    fail_unless(ea->need_sib == 0, "SIB should not be needed");
+    ead = ea_get_data(ea);
+    fail_unless(ead->segment == 0, "Should be no segment override");
+    fail_unless(ead->valid_modrm == 1, "Mod/RM should be valid");
+    fail_unless(ead->need_modrm == 1, "Mod/RM should be needed");
+    fail_unless(ead->valid_sib == 0, "SIB should be invalid");
+    fail_unless(ead->need_sib == 0, "SIB should not be needed");
 
     free(ea);
 
     /* Exhaustively test generated Mod/RM byte with register values */
     for(i=0; i<8; i++) {
-       ea = effaddr_new_reg(i);
-       fail_unless(ea->modrm == (0xC0 | (i & 0x07)),
+       ea = x86_ea_new_reg(i);
+       ead = ea_get_data(ea);
+       fail_unless(ead->modrm == (0xC0 | (i & 0x07)),
                    "Invalid Mod/RM byte generated");
        free(ea);
     }
@@ -66,7 +66,7 @@ bytecode_suite(void)
     TCase *tc_conversion = tcase_create("Conversion");
 
     suite_add_tcase(s, tc_conversion);
-    tcase_add_test(tc_conversion, test_effaddr_new_reg);
+    tcase_add_test(tc_conversion, test_x86_ea_new_reg);
 
     return s;
 }
index 5847318c0b4e093d096a0063113e8f7a5fda8280..5c5598d07237f439a0e75ab5856b69f9c0a9bb58 100644 (file)
@@ -3,7 +3,10 @@
 noinst_LIBRARIES = libarch.a
 
 libarch_a_SOURCES = \
-       arch.c
+       x86arch.h       \
+       x86-int.h       \
+       arch.c          \
+       bytecode.c
 
 INCLUDES = \
        -I$(top_srcdir)/src     \
index a5df22fb1dc394bfc42217130e09422918d80dbc..c9cf1caef15d2a6d3240d6d9c9146913659b41c4 100644 (file)
 #include "util.h"
 RCSID("$IdPath$");
 
+#include "bytecode.h"
 #include "arch.h"
 
+#include "x86-int.h"
+
+
+unsigned char x86_mode_bits = 0;
 
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
-    "x86"
+    "x86",
+    {
+       X86_BYTECODE_TYPE_MAX,
+       x86_bc_delete,
+       x86_bc_print,
+       x86_bc_parser_finalize
+    }
 };
diff --git a/modules/arch/x86/bytecode.c b/modules/arch/x86/bytecode.c
new file mode 100644 (file)
index 0000000..8083711
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * x86 architecture description
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
+#include "bytecode.h"
+#include "arch.h"
+
+#include "x86-int.h"
+
+#include "bc-int.h"
+
+
+bytecode *
+x86_bc_new_insn(x86_new_insn_data *d)
+{
+    bytecode *bc;
+    x86_insn *insn;
+   
+    bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn));
+    insn = bc_get_data(bc);
+
+    insn->ea = d->ea;
+    if (d->ea) {
+       x86_effaddr_data *ead = ea_get_data(d->ea);
+       ead->modrm &= 0xC7;     /* zero spare/reg bits */
+       ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
+    }
+
+    insn->imm = d->imm;
+    if (d->imm) {
+       insn->imm->f_len = d->im_len;
+       insn->imm->f_sign = d->im_sign;
+    }
+
+    insn->opcode[0] = d->op[0];
+    insn->opcode[1] = d->op[1];
+    insn->opcode[2] = d->op[2];
+    insn->opcode_len = d->op_len;
+
+    insn->addrsize = 0;
+    insn->opersize = d->opersize;
+    insn->lockrep_pre = 0;
+    insn->shift_op = 0;
+
+    insn->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+bytecode *
+x86_bc_new_jmprel(x86_new_jmprel_data *d)
+{
+    bytecode *bc;
+    x86_jmprel *jmprel;
+
+    bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel));
+    jmprel = bc_get_data(bc);
+
+    jmprel->target = d->target->val;
+    jmprel->op_sel = d->target->op_sel;
+
+    if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0))
+       Error(_("no SHORT form of that jump instruction exists"));
+    if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0))
+       Error(_("no NEAR form of that jump instruction exists"));
+
+    jmprel->shortop.opcode[0] = d->short_op[0];
+    jmprel->shortop.opcode[1] = d->short_op[1];
+    jmprel->shortop.opcode[2] = d->short_op[2];
+    jmprel->shortop.opcode_len = d->short_op_len;
+
+    jmprel->nearop.opcode[0] = d->near_op[0];
+    jmprel->nearop.opcode[1] = d->near_op[1];
+    jmprel->nearop.opcode[2] = d->near_op[2];
+    jmprel->nearop.opcode_len = d->near_op_len;
+
+    jmprel->addrsize = d->addrsize;
+    jmprel->opersize = 0;
+    jmprel->lockrep_pre = 0;
+
+    jmprel->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+void
+x86_ea_set_segment(effaddr *ea, unsigned char segment)
+{
+    x86_effaddr_data *ead;
+
+    if (!ea)
+       return;
+
+    ead = ea_get_data(ea);
+
+    if (segment != 0 && ead->segment != 0)
+       Warning(_("multiple segment overrides, using leftmost"));
+
+    ead->segment = segment;
+}
+
+effaddr *
+x86_ea_new_reg(unsigned long reg)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = (expr *)NULL;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0xC0 | (reg & 0x07);  /* Mod=11, R/M=Reg, Reg=0 */
+    ead->valid_modrm = 1;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_expr(expr *e)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = e;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0xff;   /* we won't know until we know more about expr and
+                              the BITS/address override setting */
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_imm(immval *imm, unsigned char im_len)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = imm->val;
+    ea->len = im_len;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 0;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_bc_insn_get_ea(bytecode *bc)
+{
+    x86_insn *insn = bc_get_data(bc);
+
+    if (!bc)
+       return NULL;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Trying to get EA of non-instruction"));
+
+    return insn->ea;
+}
+
+void
+x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->opersize = opersize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->opersize = opersize;
+           break;
+       default:
+           InternalError(_("OperSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->addrsize = addrsize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->addrsize = addrsize;
+           break;
+       default:
+           InternalError(_("AddrSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+    unsigned char *lockrep_pre = (unsigned char *)NULL;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           lockrep_pre = &insn->lockrep_pre;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           lockrep_pre = &jmprel->lockrep_pre;
+           break;
+       default:
+           InternalError(_("LockRep prefix applied to non-instruction"));
+           return;
+    }
+
+    if (*lockrep_pre != 0)
+       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
+
+    *lockrep_pre = prefix;
+}
+
+void
+x86_bc_insn_set_shift_flag(bytecode *bc)
+{
+    x86_insn *insn;
+
+    if (!bc)
+       return;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Attempted to set shift flag on non-instruction"));
+
+    insn = bc_get_data(bc);
+
+    insn->shift_op = 1;
+}
+
+void
+x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                         x86_jmprel_opcode_sel new_sel)
+{
+    if (!old_sel)
+       return;
+
+    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
+                              (*old_sel == JR_NEAR_FORCED)))
+       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
+    *old_sel = new_sel;
+}
+
+void
+x86_bc_delete(bytecode *bc)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           if (insn->ea) {
+               expr_delete(insn->ea->disp);
+               xfree(insn->ea);
+           }
+           if (insn->imm) {
+               expr_delete(insn->imm->val);
+               xfree(insn->imm);
+           }
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           expr_delete(jmprel->target);
+           break;
+    }
+}
+
+void
+x86_bc_print(const bytecode *bc)
+{
+    const x86_insn *insn;
+    const x86_jmprel *jmprel;
+    x86_effaddr_data *ead;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_const_data(bc);
+           printf("_Instruction_\n");
+           printf("Effective Address:");
+           if (!insn->ea)
+               printf(" (nil)\n");
+           else {
+               printf("\n Disp=");
+               if (insn->ea->disp)
+                   expr_print(insn->ea->disp);
+               else
+                   printf("(nil)");
+               printf("\n");
+               ead = ea_get_data(insn->ea);
+               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
+                      (unsigned int)insn->ea->len,
+                      (unsigned int)ead->segment,
+                      (unsigned int)insn->ea->nosplit);
+               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
+                      (unsigned int)ead->modrm,
+                      (unsigned int)ead->valid_modrm,
+                      (unsigned int)ead->need_modrm);
+               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
+                      (unsigned int)ead->sib,
+                      (unsigned int)ead->valid_sib,
+                      (unsigned int)ead->need_sib);
+           }
+           printf("Immediate Value:");
+           if (!insn->imm)
+               printf(" (nil)\n");
+           else {
+               printf("\n Val=");
+               expr_print(insn->imm->val);
+               printf("\n");
+               printf(" Len=%u, IsNeg=%u\n",
+                      (unsigned int)insn->imm->len,
+                      (unsigned int)insn->imm->isneg);
+               printf(" FLen=%u, FSign=%u\n",
+                      (unsigned int)insn->imm->f_len,
+                      (unsigned int)insn->imm->f_sign);
+           }
+           printf("Opcode: %02x %02x %02x OpLen=%u\n",
+                  (unsigned int)insn->opcode[0],
+                  (unsigned int)insn->opcode[1],
+                  (unsigned int)insn->opcode[2],
+                  (unsigned int)insn->opcode_len);
+           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
+                  (unsigned int)insn->addrsize,
+                  (unsigned int)insn->opersize,
+                  (unsigned int)insn->lockrep_pre,
+                  (unsigned int)insn->shift_op);
+           printf("BITS=%u\n", (unsigned int)insn->mode_bits);
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_const_data(bc);
+           printf("_Relative Jump_\n");
+           printf("Target=");
+           expr_print(jmprel->target);
+           printf("\nShort Form:\n");
+           if (!jmprel->shortop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->shortop.opcode[0],
+                      (unsigned int)jmprel->shortop.opcode[1],
+                      (unsigned int)jmprel->shortop.opcode[2],
+                      (unsigned int)jmprel->shortop.opcode_len);
+           if (!jmprel->nearop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->nearop.opcode[0],
+                      (unsigned int)jmprel->nearop.opcode[1],
+                      (unsigned int)jmprel->nearop.opcode[2],
+                      (unsigned int)jmprel->nearop.opcode_len);
+           printf("OpSel=");
+           switch (jmprel->op_sel) {
+               case JR_NONE:
+                   printf("None");
+                   break;
+               case JR_SHORT:
+                   printf("Short");
+                   break;
+               case JR_NEAR:
+                   printf("Near");
+                   break;
+               case JR_SHORT_FORCED:
+                   printf("Forced Short");
+                   break;
+               case JR_NEAR_FORCED:
+                   printf("Forced Near");
+                   break;
+               default:
+                   printf("UNKNOWN!!");
+                   break;
+           }
+           printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
+                  (unsigned int)jmprel->mode_bits,
+                  (unsigned int)jmprel->addrsize,
+                  (unsigned int)jmprel->opersize,
+                  (unsigned int)jmprel->lockrep_pre);
+           break;
+    }
+}
+
+static void
+x86_bc_parser_finalize_insn(x86_insn *insn)
+{
+    effaddr *ea = insn->ea;
+    x86_effaddr_data *ead = ea_get_data(ea);
+    immval *imm = insn->imm;
+
+    if (ea) {
+       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
+                          (!ead->valid_modrm && ead->need_modrm))) {
+           /* First expand equ's */
+           expr_expand_equ(ea->disp);
+
+           /* Check validity of effective address and calc R/M bits of
+            * Mod/RM byte and SIB byte.  We won't know the Mod field
+            * of the Mod/RM byte until we know more about the
+            * displacement.
+            */
+           if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits,
+                             ea->nosplit, &ea->len, &ead->modrm,
+                             &ead->valid_modrm, &ead->need_modrm, &ead->sib,
+                             &ead->valid_sib, &ead->need_sib))
+               return;     /* failed, don't bother checking rest of insn */
+       }
+    }
+
+    if (imm) {
+       const intnum *num;
+
+       if (imm->val) {
+           expr_expand_equ(imm->val);
+           expr_simplify(imm->val);
+       }
+       /* TODO: check imm f_len vs. len? */
+
+       /* Handle shift_op special-casing */
+       if (insn->shift_op && (num = expr_get_intnum(&imm->val))) {
+           if (intnum_get_uint(num) == 1) {
+               /* Use ,1 form: first copy ,1 opcode. */
+               insn->opcode[0] = insn->opcode[1];
+               /* Delete ModRM, as it's no longer needed */
+               xfree(ea);
+               insn->ea = (effaddr *)NULL;
+               /* Delete Imm, as it's not needed */
+               expr_delete(imm->val);
+               xfree(imm);
+               insn->imm = (immval *)NULL;
+           }
+           insn->shift_op = 0;
+       }
+    }
+
+    
+}
+
+void
+x86_bc_parser_finalize(bytecode *bc)
+{
+    x86_insn *insn;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           x86_bc_parser_finalize_insn(insn);
+           break;
+       default:
+           break;
+    }
+}
+
diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h
new file mode 100644 (file)
index 0000000..2c3336e
--- /dev/null
@@ -0,0 +1,95 @@
+/* $IdPath$
+ * x86 internals header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_X86_INT_H
+#define YASM_X86_INT_H
+
+typedef struct x86_effaddr_data {
+    unsigned char segment;     /* segment override, 0 if none */
+
+    /* How the spare (register) bits in Mod/RM are handled:
+     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+     * They're set in bytecode_new_insn().
+     */
+    unsigned char modrm;
+    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
+
+    unsigned char sib;
+    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
+    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
+                                  0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+    effaddr *ea;       /* effective address */
+
+    immval *imm;       /* immediate or relative value */
+
+    unsigned char opcode[3];   /* opcode */
+    unsigned char opcode_len;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    /* HACK, but a space-saving one: shift opcodes have an immediate
+     * form and a ,1 form (with no immediate).  In the parser, we
+     * set this and opcode_len=1, but store the ,1 version in the
+     * second byte of the opcode array.  We then choose between the
+     * two versions once we know the actual value of imm (because we
+     * don't know it in the parser module).
+     *
+     * A override to force the imm version should just leave this at
+     * 0.  Then later code won't know the ,1 version even exists.
+     * TODO: Figure out how this affects CPU flags processing.
+     *
+     * Call x86_SetInsnShiftFlag() to set this flag to 1.
+     */
+    unsigned char shift_op;
+
+    unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+    expr *target;              /* target location */
+
+    struct {
+       unsigned char opcode[3];
+       unsigned char opcode_len;   /* 0 = no opc for this version */
+    } shortop, nearop;
+
+    /* which opcode are we using? */
+    /* The *FORCED forms are specified in the source as such */
+    x86_jmprel_opcode_sel op_sel;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(const bytecode *bc);
+void x86_bc_parser_finalize(bytecode *bc);
+
+#endif
index a5df22fb1dc394bfc42217130e09422918d80dbc..c9cf1caef15d2a6d3240d6d9c9146913659b41c4 100644 (file)
 #include "util.h"
 RCSID("$IdPath$");
 
+#include "bytecode.h"
 #include "arch.h"
 
+#include "x86-int.h"
+
+
+unsigned char x86_mode_bits = 0;
 
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
-    "x86"
+    "x86",
+    {
+       X86_BYTECODE_TYPE_MAX,
+       x86_bc_delete,
+       x86_bc_print,
+       x86_bc_parser_finalize
+    }
 };
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h
new file mode 100644 (file)
index 0000000..70a207c
--- /dev/null
@@ -0,0 +1,93 @@
+/* $IdPath$
+ * x86 Architecture header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_X86ARCH_H
+#define YASM_X86ARCH_H
+
+typedef enum {
+    X86_BC_INSN = BYTECODE_TYPE_BASE,
+    X86_BC_JMPREL
+} x86_bytecode_type;
+#define X86_BYTECODE_TYPE_MAX  X86_BC_JMPREL+1
+
+typedef enum {
+    JR_NONE,
+    JR_SHORT,
+    JR_NEAR,
+    JR_SHORT_FORCED,
+    JR_NEAR_FORCED
+} x86_jmprel_opcode_sel;
+
+typedef struct x86_targetval {
+    expr *val;
+
+    x86_jmprel_opcode_sel op_sel;
+} x86_targetval;
+
+void x86_ea_set_segment(effaddr *ea, unsigned char segment);
+effaddr *x86_ea_new_reg(unsigned long reg);
+effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_expr(expr *e);
+
+effaddr *x86_bc_insn_get_ea(bytecode *bc);
+
+void x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize);
+void x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize);
+void x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix);
+void x86_bc_insn_set_shift_flag(bytecode *bc);
+
+void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                              x86_jmprel_opcode_sel new_sel);
+
+/* Structure with *all* inputs passed to x86_bytecode_new_insn().
+ * IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling the
+ * function (it doesn't make a copy).
+ */
+typedef struct x86_new_insn_data {
+    effaddr *ea;
+    immval *imm;
+    unsigned char opersize;
+    unsigned char op_len;
+    unsigned char op[3];
+    unsigned char spare;       /* bits to go in 'spare' field of ModRM */
+    unsigned char im_len;
+    unsigned char im_sign;
+} x86_new_insn_data;
+
+bytecode *x86_bc_new_insn(x86_new_insn_data *d);
+
+/* Structure with *all* inputs passed to x86_bytecode_new_jmprel().
+ * Pass 0 for the opcode_len if that version of the opcode doesn't exist.
+ */
+typedef struct x86_new_jmprel_data {
+    x86_targetval *target;
+    unsigned char short_op_len;
+    unsigned char short_op[3];
+    unsigned char near_op_len;
+    unsigned char near_op[3];
+    unsigned char addrsize;
+} x86_new_jmprel_data;
+
+bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d);
+
+extern unsigned char x86_mode_bits;
+
+#endif
diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c
new file mode 100644 (file)
index 0000000..8083711
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * x86 architecture description
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
+#include "bytecode.h"
+#include "arch.h"
+
+#include "x86-int.h"
+
+#include "bc-int.h"
+
+
+bytecode *
+x86_bc_new_insn(x86_new_insn_data *d)
+{
+    bytecode *bc;
+    x86_insn *insn;
+   
+    bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn));
+    insn = bc_get_data(bc);
+
+    insn->ea = d->ea;
+    if (d->ea) {
+       x86_effaddr_data *ead = ea_get_data(d->ea);
+       ead->modrm &= 0xC7;     /* zero spare/reg bits */
+       ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
+    }
+
+    insn->imm = d->imm;
+    if (d->imm) {
+       insn->imm->f_len = d->im_len;
+       insn->imm->f_sign = d->im_sign;
+    }
+
+    insn->opcode[0] = d->op[0];
+    insn->opcode[1] = d->op[1];
+    insn->opcode[2] = d->op[2];
+    insn->opcode_len = d->op_len;
+
+    insn->addrsize = 0;
+    insn->opersize = d->opersize;
+    insn->lockrep_pre = 0;
+    insn->shift_op = 0;
+
+    insn->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+bytecode *
+x86_bc_new_jmprel(x86_new_jmprel_data *d)
+{
+    bytecode *bc;
+    x86_jmprel *jmprel;
+
+    bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel));
+    jmprel = bc_get_data(bc);
+
+    jmprel->target = d->target->val;
+    jmprel->op_sel = d->target->op_sel;
+
+    if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0))
+       Error(_("no SHORT form of that jump instruction exists"));
+    if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0))
+       Error(_("no NEAR form of that jump instruction exists"));
+
+    jmprel->shortop.opcode[0] = d->short_op[0];
+    jmprel->shortop.opcode[1] = d->short_op[1];
+    jmprel->shortop.opcode[2] = d->short_op[2];
+    jmprel->shortop.opcode_len = d->short_op_len;
+
+    jmprel->nearop.opcode[0] = d->near_op[0];
+    jmprel->nearop.opcode[1] = d->near_op[1];
+    jmprel->nearop.opcode[2] = d->near_op[2];
+    jmprel->nearop.opcode_len = d->near_op_len;
+
+    jmprel->addrsize = d->addrsize;
+    jmprel->opersize = 0;
+    jmprel->lockrep_pre = 0;
+
+    jmprel->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+void
+x86_ea_set_segment(effaddr *ea, unsigned char segment)
+{
+    x86_effaddr_data *ead;
+
+    if (!ea)
+       return;
+
+    ead = ea_get_data(ea);
+
+    if (segment != 0 && ead->segment != 0)
+       Warning(_("multiple segment overrides, using leftmost"));
+
+    ead->segment = segment;
+}
+
+effaddr *
+x86_ea_new_reg(unsigned long reg)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = (expr *)NULL;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0xC0 | (reg & 0x07);  /* Mod=11, R/M=Reg, Reg=0 */
+    ead->valid_modrm = 1;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_expr(expr *e)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = e;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0xff;   /* we won't know until we know more about expr and
+                              the BITS/address override setting */
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_imm(immval *imm, unsigned char im_len)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = imm->val;
+    ea->len = im_len;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 0;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_bc_insn_get_ea(bytecode *bc)
+{
+    x86_insn *insn = bc_get_data(bc);
+
+    if (!bc)
+       return NULL;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Trying to get EA of non-instruction"));
+
+    return insn->ea;
+}
+
+void
+x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->opersize = opersize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->opersize = opersize;
+           break;
+       default:
+           InternalError(_("OperSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->addrsize = addrsize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->addrsize = addrsize;
+           break;
+       default:
+           InternalError(_("AddrSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+    unsigned char *lockrep_pre = (unsigned char *)NULL;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           lockrep_pre = &insn->lockrep_pre;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           lockrep_pre = &jmprel->lockrep_pre;
+           break;
+       default:
+           InternalError(_("LockRep prefix applied to non-instruction"));
+           return;
+    }
+
+    if (*lockrep_pre != 0)
+       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
+
+    *lockrep_pre = prefix;
+}
+
+void
+x86_bc_insn_set_shift_flag(bytecode *bc)
+{
+    x86_insn *insn;
+
+    if (!bc)
+       return;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Attempted to set shift flag on non-instruction"));
+
+    insn = bc_get_data(bc);
+
+    insn->shift_op = 1;
+}
+
+void
+x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                         x86_jmprel_opcode_sel new_sel)
+{
+    if (!old_sel)
+       return;
+
+    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
+                              (*old_sel == JR_NEAR_FORCED)))
+       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
+    *old_sel = new_sel;
+}
+
+void
+x86_bc_delete(bytecode *bc)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           if (insn->ea) {
+               expr_delete(insn->ea->disp);
+               xfree(insn->ea);
+           }
+           if (insn->imm) {
+               expr_delete(insn->imm->val);
+               xfree(insn->imm);
+           }
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           expr_delete(jmprel->target);
+           break;
+    }
+}
+
+void
+x86_bc_print(const bytecode *bc)
+{
+    const x86_insn *insn;
+    const x86_jmprel *jmprel;
+    x86_effaddr_data *ead;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_const_data(bc);
+           printf("_Instruction_\n");
+           printf("Effective Address:");
+           if (!insn->ea)
+               printf(" (nil)\n");
+           else {
+               printf("\n Disp=");
+               if (insn->ea->disp)
+                   expr_print(insn->ea->disp);
+               else
+                   printf("(nil)");
+               printf("\n");
+               ead = ea_get_data(insn->ea);
+               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
+                      (unsigned int)insn->ea->len,
+                      (unsigned int)ead->segment,
+                      (unsigned int)insn->ea->nosplit);
+               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
+                      (unsigned int)ead->modrm,
+                      (unsigned int)ead->valid_modrm,
+                      (unsigned int)ead->need_modrm);
+               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
+                      (unsigned int)ead->sib,
+                      (unsigned int)ead->valid_sib,
+                      (unsigned int)ead->need_sib);
+           }
+           printf("Immediate Value:");
+           if (!insn->imm)
+               printf(" (nil)\n");
+           else {
+               printf("\n Val=");
+               expr_print(insn->imm->val);
+               printf("\n");
+               printf(" Len=%u, IsNeg=%u\n",
+                      (unsigned int)insn->imm->len,
+                      (unsigned int)insn->imm->isneg);
+               printf(" FLen=%u, FSign=%u\n",
+                      (unsigned int)insn->imm->f_len,
+                      (unsigned int)insn->imm->f_sign);
+           }
+           printf("Opcode: %02x %02x %02x OpLen=%u\n",
+                  (unsigned int)insn->opcode[0],
+                  (unsigned int)insn->opcode[1],
+                  (unsigned int)insn->opcode[2],
+                  (unsigned int)insn->opcode_len);
+           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
+                  (unsigned int)insn->addrsize,
+                  (unsigned int)insn->opersize,
+                  (unsigned int)insn->lockrep_pre,
+                  (unsigned int)insn->shift_op);
+           printf("BITS=%u\n", (unsigned int)insn->mode_bits);
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_const_data(bc);
+           printf("_Relative Jump_\n");
+           printf("Target=");
+           expr_print(jmprel->target);
+           printf("\nShort Form:\n");
+           if (!jmprel->shortop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->shortop.opcode[0],
+                      (unsigned int)jmprel->shortop.opcode[1],
+                      (unsigned int)jmprel->shortop.opcode[2],
+                      (unsigned int)jmprel->shortop.opcode_len);
+           if (!jmprel->nearop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->nearop.opcode[0],
+                      (unsigned int)jmprel->nearop.opcode[1],
+                      (unsigned int)jmprel->nearop.opcode[2],
+                      (unsigned int)jmprel->nearop.opcode_len);
+           printf("OpSel=");
+           switch (jmprel->op_sel) {
+               case JR_NONE:
+                   printf("None");
+                   break;
+               case JR_SHORT:
+                   printf("Short");
+                   break;
+               case JR_NEAR:
+                   printf("Near");
+                   break;
+               case JR_SHORT_FORCED:
+                   printf("Forced Short");
+                   break;
+               case JR_NEAR_FORCED:
+                   printf("Forced Near");
+                   break;
+               default:
+                   printf("UNKNOWN!!");
+                   break;
+           }
+           printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
+                  (unsigned int)jmprel->mode_bits,
+                  (unsigned int)jmprel->addrsize,
+                  (unsigned int)jmprel->opersize,
+                  (unsigned int)jmprel->lockrep_pre);
+           break;
+    }
+}
+
+static void
+x86_bc_parser_finalize_insn(x86_insn *insn)
+{
+    effaddr *ea = insn->ea;
+    x86_effaddr_data *ead = ea_get_data(ea);
+    immval *imm = insn->imm;
+
+    if (ea) {
+       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
+                          (!ead->valid_modrm && ead->need_modrm))) {
+           /* First expand equ's */
+           expr_expand_equ(ea->disp);
+
+           /* Check validity of effective address and calc R/M bits of
+            * Mod/RM byte and SIB byte.  We won't know the Mod field
+            * of the Mod/RM byte until we know more about the
+            * displacement.
+            */
+           if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits,
+                             ea->nosplit, &ea->len, &ead->modrm,
+                             &ead->valid_modrm, &ead->need_modrm, &ead->sib,
+                             &ead->valid_sib, &ead->need_sib))
+               return;     /* failed, don't bother checking rest of insn */
+       }
+    }
+
+    if (imm) {
+       const intnum *num;
+
+       if (imm->val) {
+           expr_expand_equ(imm->val);
+           expr_simplify(imm->val);
+       }
+       /* TODO: check imm f_len vs. len? */
+
+       /* Handle shift_op special-casing */
+       if (insn->shift_op && (num = expr_get_intnum(&imm->val))) {
+           if (intnum_get_uint(num) == 1) {
+               /* Use ,1 form: first copy ,1 opcode. */
+               insn->opcode[0] = insn->opcode[1];
+               /* Delete ModRM, as it's no longer needed */
+               xfree(ea);
+               insn->ea = (effaddr *)NULL;
+               /* Delete Imm, as it's not needed */
+               expr_delete(imm->val);
+               xfree(imm);
+               insn->imm = (immval *)NULL;
+           }
+           insn->shift_op = 0;
+       }
+    }
+
+    
+}
+
+void
+x86_bc_parser_finalize(bytecode *bc)
+{
+    x86_insn *insn;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           x86_bc_parser_finalize_insn(insn);
+           break;
+       default:
+           break;
+    }
+}
+
index 54893a7695ef90df3be9602fd496bf6b108ad499..fa318fd3b2dff626e129363f6eb321ae18fdb0d1 100644 (file)
@@ -40,6 +40,7 @@ RCSID("$IdPath$");
 #include "section.h"
 #include "objfmt.h"
 
+#include "arch.h"
 
 #define YYDEBUG 1
 
@@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base;
 static bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
 
+/* additional data declarations (dynamically generated) */
+/* @DATADECLS@ */
+
 %}
 
 %union {
@@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc;
     effaddr *ea;
     expr *exp;
     immval *im_val;
-    targetval tgt_val;
+    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
@@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc;
 %%
 input: /* empty */
     | input line    {
-       nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section),
+       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
                                               $2);
        if (nasm_parser_temp_bc)
            nasm_parser_prev_bc = nasm_parser_temp_bc;
@@ -145,10 +149,10 @@ line: '\n'                { $$ = (bytecode *)NULL; }
 ;
 
 lineexp: exp
-    | TIMES expr exp                   { $$ = $3; SetBCMultiple($$, $2); }
+    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
     | label                            { $$ = (bytecode *)NULL; }
     | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; SetBCMultiple($$, $3); }
+    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
     | label_id EQU expr                        {
        symrec_define_equ($1, $3);
        xfree($1);
@@ -157,22 +161,16 @@ lineexp: exp
 ;
 
 exp: instr
-    | DECLARE_DATA datavals        { $$ = bytecode_new_data(&$2, $1); }
-    | RESERVE_SPACE expr           { $$ = bytecode_new_reserve($2, $1); }
+    | DECLARE_DATA datavals        { $$ = bc_new_data(&$2, $1); }
+    | RESERVE_SPACE expr           { $$ = bc_new_reserve($2, $1); }
 ;
 
-datavals: dataval          {
-       datavals_initialize(&$$);
-       datavals_append(&$$, $1);
-    }
-    | datavals ',' dataval  {
-       datavals_append(&$1, $3);
-       $$ = $1;
-    }
+datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
+    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dataval_new_expr($1); }
-    | STRING           { $$ = dataval_new_string($1); }
+dataval: expr_no_string        { $$ = dv_new_expr($1); }
+    | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
        $$ = (dataval *)NULL;
@@ -317,17 +315,20 @@ memexpr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
     | error                    { Error(_("invalid effective address")); }
 ;
 
-memaddr: memexpr           { $$ = effaddr_new_expr($1); SetEASegment($$, 0); }
-    | REG_CS ':' memaddr    { $$ = $3; SetEASegment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; SetEASegment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; SetEASegment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; SetEASegment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; SetEASegment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; SetEASegment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; SetEALen($$, 1); }
-    | WORD memaddr         { $$ = $2; SetEALen($$, 2); }
-    | DWORD memaddr        { $$ = $2; SetEALen($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; SetEANosplit($$, 1); }
+memaddr: memexpr           {
+       $$ = x86_ea_new_expr($1);
+       x86_ea_set_segment($$, 0);
+    }
+    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
+    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
+    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
+    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
+    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
+    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
+    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
+    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
+    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
+    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
 mem: '[' memaddr ']'   { $$ = $2; }
@@ -378,43 +379,43 @@ mem1632: mem
 ;
 
 /* explicit register or memory */
-rm8x: reg8     { $$ = effaddr_new_reg($1); }
+rm8x: reg8     { $$ = x86_ea_new_reg($1); }
     | mem8x
 ;
-rm16x: reg16   { $$ = effaddr_new_reg($1); }
+rm16x: reg16   { $$ = x86_ea_new_reg($1); }
     | mem16x
 ;
-rm32x: reg32   { $$ = effaddr_new_reg($1); }
+rm32x: reg32   { $$ = x86_ea_new_reg($1); }
     | mem32x
 ;
 /* not needed:
-rm64x: MMXREG  { $$ = effaddr_new_reg($1); }
+rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
     | mem64x
 ;
-rm128x: XMMREG { $$ = effaddr_new_reg($1); }
+rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
     | mem128x
 ;
 */
 
 /* implicit register or memory */
-rm8: reg8      { $$ = effaddr_new_reg($1); }
+rm8: reg8      { $$ = x86_ea_new_reg($1); }
     | mem8
 ;
-rm16: reg16    { $$ = effaddr_new_reg($1); }
+rm16: reg16    { $$ = x86_ea_new_reg($1); }
     | mem16
 ;
-rm32: reg32    { $$ = effaddr_new_reg($1); }
+rm32: reg32    { $$ = x86_ea_new_reg($1); }
     | mem32
 ;
-rm64: MMXREG   { $$ = effaddr_new_reg($1); }
+rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
     | mem64
 ;
-rm128: XMMREG  { $$ = effaddr_new_reg($1); }
+rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
     | mem128
 ;
 
 /* immediate values */
-imm: expr   { $$ = immval_new_expr($1); }
+imm: expr   { $$ = imm_new_expr($1); }
 ;
 
 /* explicit immediates */
@@ -437,9 +438,18 @@ imm32: imm
 ;
 
 /* jump targets */
-target: expr           { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); }
-    | SHORT target     { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
-    | NEAR target      { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+target: expr           {
+       $$.val = $1;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+    }
+    | SHORT target     {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+    }
+    | NEAR target      {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+    }
 ;
 
 /* expression trees */
@@ -493,18 +503,36 @@ explabel: ID              { $$ = symrec_use($1); xfree($1); }
 ;
 
 instr: instrbase
-    | OPERSIZE instr   { $$ = $2; SetInsnOperSizeOverride($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; SetInsnAddrSizeOverride($$, $1); }
-    | REG_CS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); }
-    | REG_SS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); }
-    | REG_DS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); }
-    | REG_ES instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); }
-    | REG_FS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); }
-    | REG_GS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); }
-    | LOCK instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); }
-    | REP instr                { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); }
+    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
+    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
+    | REG_CS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
+    }
+    | REG_SS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
+    }
+    | REG_DS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
+    }
+    | REG_ES instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
+    }
+    | REG_FS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
+    }
+    | REG_GS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
+    }
+    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
+    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
+    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
+    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
 ;
 
 /* instruction grammars (dynamically generated) */
@@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val)
        if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32))
            Error(_("`%s' is not a valid argument to [BITS]"), val);
        else
-           mode_bits = (unsigned char)lval;
+           x86_mode_bits = (unsigned char)lval;
     } else {
        printf("Directive: Name=`%s' Value=`%s'\n", name, val);
     }
index adaa58dbe18c6d10eeb8844fc1217dd26600a5b4..be391260f953462aef6585b58a160b293c76c240 100755 (executable)
@@ -353,7 +353,8 @@ sub cond_action_if ( $ $ $ $ $ $ $ )
     my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
     return rule_header ($rule, $tokens, $count) . <<"EOF";
         if (\$$regarg == $val) {
-            \$\$ = $func(@$a_eax);
+            @$a_eax
+            \$\$ = $func;
         }
 EOF
 }
@@ -362,7 +363,8 @@ sub cond_action_elsif ( $ $ $ $ )
     my ($regarg, $val, $func, $a_eax) = splice (@_);
     return <<"EOF";
         else if (\$$regarg == $val) {
-            \$\$ = $func(@$a_eax);
+            @$a_eax
+            \$\$ = $func;
         }
 EOF
 }
@@ -371,7 +373,8 @@ sub cond_action_else ( $ $ )
     my ($func, $a_args) = splice (@_);
     return <<"EOF" . rule_footer;
         else {
-            \$\$ = $func (@$a_args);
+            @$a_args
+            \$\$ = $func;
         }
 EOF
 }
@@ -388,7 +391,8 @@ sub action ( @ $ )
 {
     my ($rule, $tokens, $func, $a_args, $count) = splice @_;
     return rule_header ($rule, $tokens, $count)
-       . "        \$\$ = $func (@$a_args);\n"
+       . "        @$a_args\n"
+       . "        \$\$ = $func;\n"
        . rule_footer; 
 }
 
@@ -396,8 +400,9 @@ sub action_setshiftflag ( @ $ )
 {
     my ($rule, $tokens, $func, $a_args, $count) = splice @_;
     return rule_header ($rule, $tokens, $count)
-       . "        \$\$ = $func (@$a_args);\n"
-       . "        SetInsnShiftFlag(\$\$);\n"
+       . "        @$a_args\n"
+       . "        \$\$ = $func;\n"
+       . "        x86_bc_insn_set_shift_flag(\$\$);\n"
        . rule_footer; 
 }
 
@@ -421,7 +426,12 @@ sub output_yacc ($@)
 
     while (<IN>)
     {
-       if (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
+       if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
+       {
+           print GRAMMAR "static x86_new_insn_data idata;\n";
+           print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
+       }
+       elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
        {
            my $len = length("%token <groupdata>");
            print GRAMMAR "%token <groupdata>";
@@ -500,69 +510,82 @@ sub output_yacc ($@)
                            if $inst->[OPERANDS] ne 'nil';
                        $tokens =~ s/,/ ',' /g;
                        $tokens =~ s/:/ ':' /g;
-                       my $func = "bytecode_new_jmprel";
+                       my $datastruct = "x86_new_jmprel_data";
+                       my $datastructname = "jrdata";
+                       my $func = "x86_bc_new_jmprel(&$datastructname)";
 
                        # Create the argument list for bytecode_new
                        my @args;
 
                        # Target argument: HACK: Always assumed to be arg 1.
-                       push @args, '&$2,';
+                       push @args, 'target=&$2;';
 
                        # test for short opcode "nil"
                        if($inst->[SHORTOPCODE] =~ m/nil/)
                        {
-                           push @args, '0, 0, 0, 0,';
+                           push @args, 'short_op_len=0;';
+                           push @args, 'short_op[0]=0;';
+                           push @args, 'short_op[1]=0;';
+                           push @args, 'short_op[2]=0;';
                        }
                        else
                        {
-                           # number of bytes of short opcode
-                           push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";
-
                            # opcode piece 1 (and 2 and 3 if attached)
-                           push @args, $inst->[SHORTOPCODE];
-                           $args[-1] =~ s/,/, /;
-                           $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           # don't match $0.\d in the following rule.
-                           $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
-                           $args[-1] .= ',';
+                           my @opcodes = split ",", $inst->[SHORTOPCODE];
+                           # number of bytes of short opcode
+                           push @args, "short_op_len=".@opcodes.";";
+                           for (my $i=0; $i < @opcodes; ++$i)
+                           {
+                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                               # don't match $0.\d in the following rule.
+                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
+                               push @args, "short_op[$i]=$opcodes[$i];";
+                           }
 
                            # opcode piece 2 (if not attached)
-                           push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
+                           push @args, "short_op[1]=0;" if @opcodes < 2;
                            # opcode piece 3 (if not attached)
-                           push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
+                           push @args, "short_op[2]=0;" if @opcodes < 3;
                        }
 
                        # test for near opcode "nil"
                        if($inst->[NEAROPCODE] =~ m/nil/)
                        {
-                           push @args, '0, 0, 0, 0,';
+                           push @args, 'near_op_len=0;';
+                           push @args, 'near_op[0]=0;';
+                           push @args, 'near_op[1]=0;';
+                           push @args, 'near_op[2]=0;';
                        }
                        else
                        {
-                           # number of bytes of near opcode
-                           push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";
-
                            # opcode piece 1 (and 2 and 3 if attached)
-                           push @args, $inst->[NEAROPCODE];
-                           $args[-1] =~ s/,/, /;
-                           $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           # don't match $0.\d in the following rule.
-                           $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
-                           $args[-1] .= ',';
+                           my @opcodes = split ",", $inst->[NEAROPCODE];
+                           # number of bytes of near opcode
+                           push @args, "near_op_len=".@opcodes.";";
+                           for (my $i=0; $i < @opcodes; ++$i)
+                           {
+                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                               # don't match $0.\d in the following rule.
+                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
+                               push @args, "near_op[$i]=$opcodes[$i];";
+                           }
 
                            # opcode piece 2 (if not attached)
-                           push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
+                           push @args, "near_op[1]=0;" if @opcodes < 2;
                            # opcode piece 3 (if not attached)
-                           push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
+                           push @args, "near_op[2]=0;" if @opcodes < 3;
                        }
 
                        # address size
-                       push @args, "$inst->[ADSIZE]";
+                       push @args, "addrsize=$inst->[ADSIZE];";
                        $args[-1] =~ s/nil/0/;
 
                        # now that we've constructed the arglist, subst $0.\d
                        s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
 
+                       # and add the data structure reference
+                       s/^/$datastructname./g foreach (@args);
+
                        # generate the grammar
                        print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
                    }
@@ -583,65 +606,79 @@ sub output_yacc ($@)
                        $tokens =~ s/:/ ':' /g;
                        # offset args
                        my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
-                       my $func = "bytecode_new_insn";
+                       my $datastruct = "x86_new_insn_data";
+                       my $datastructname = "idata";
+                       my $func = "x86_bc_new_insn(&$datastructname)";
 
                        # Create the argument list for bytecode_new
                        my @args;
 
                        # operand size
-                       push @args, "$inst->[OPSIZE],";
+                       push @args, "opersize=$inst->[OPSIZE];";
                        $args[-1] =~ s/nil/0/;
 
-                       # number of bytes of opcodes
-                       push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
 
                        # opcode piece 1 (and 2 and 3 if attached)
-                       push @args, $inst->[OPCODE];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                       # don't match $0.\d in the following rule.
-                       $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
-                       $args[-1] .= ',';
+                       my @opcodes = split ",", $inst->[OPCODE];
+                       # number of bytes of opcodes
+                       push @args, "op_len=".@opcodes.";";
+                       for (my $i=0; $i < @opcodes; ++$i)
+                       {
+                           $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                           # don't match $0.\d in the following rule.
+                           $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
+                           push @args, "op[$i]=$opcodes[$i];";
+                       }
 
                        # opcode piece 2 (if not attached)
-                       push @args, "0," if $inst->[OPCODE] !~ m/,/o;
+                       push @args, "op[1]=0;" if @opcodes < 2;
                        # opcode piece 3 (if not attached)
-                       push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
+                       push @args, "op[2]=0;" if @opcodes < 3;
 
                        # effective addresses
-                       push @args, $inst->[EFFADDR];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
-                       $args[-1] =~ s/nil/0/;
+                       my $effaddr = $inst->[EFFADDR];
+                       $effaddr =~ s/^nil/(effaddr *)NULL,0/;
+                       $effaddr =~ s/nil/0/;
                        # don't let a $0.\d match slip into the following rules.
-                       $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+                       $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
                        #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
-                       $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/;
-                       $args[-1] =~ s[(\$\d+)i,\s*(\d+)]
-                           ["effaddr_new_imm($1, ".($2/8)."), 0"]e;
-                       $args[-1] .= ',';
+                       $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
+                       $effaddr =~ s[(\$\d+)i,\s*(\d+)]
+                           ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
 
-                       die $args[-1] if $args[-1] =~ m/\d+[ri]/;
+                       die $effaddr if $effaddr =~ m/\d+[ri]/;
+
+                       my @effaddr_split = split ',', $effaddr;
+                       $effaddr_split[0] =~ s/\^/,/;
+                       push @args, "ea=$effaddr_split[0];";
+                       push @args, "spare=$effaddr_split[1];";
 
                        # immediate sources
-                       push @args, $inst->[IMM];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/nil/(immval *)NULL, 0/;
+                       my $imm = $inst->[IMM];
+                       $imm =~ s/nil/(immval *)NULL,0/;
                        # don't match $0.\d in the following rules.
-                       $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       $args[-1] =~ s[^([0-9A-Fa-f]+),]
-                           [immval_new_int(0x$1),];
-                       $args[-1] =~ s[^\$0.(\d+),]
-                           [immval_new_int(\$1\[$1\]),];
+                       $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+                       $imm =~ s[^([0-9A-Fa-f]+),]
+                           [imm_new_int(0x$1),];
+                       $imm =~ s[^\$0.(\d+),]
+                           [imm_new_int(\$1\[$1\]),];
 
                        # divide the second, and only the second, by 8 bits/byte
-                       $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
-                       $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
+                       $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
+                       $imm .= ($3||'') eq 's' ? ',1' : ',0';
+
+                       die $imm if $imm =~ m/\d+s/;
 
-                       die $args[-1] if $args[-1] =~ m/\d+s/;
+                       my @imm_split = split ",", $imm;
+                       push @args, "imm=$imm_split[0];";
+                       push @args, "im_len=$imm_split[1];";
+                       push @args, "im_sign=$imm_split[2];";
 
                        # now that we've constructed the arglist, subst $0.\d
                        s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
+
+                       # and add the data structure reference
+                       s/^/$datastructname./g foreach (@args);
                    
                        # see if we match one of the cases to defer
                        if (($inst->[OPERANDS]||"") =~ m/,ONE/)
@@ -691,8 +728,9 @@ sub output_yacc ($@)
                            # Now output imm version, with second opcode byte
                            # set to ,1 opcode.  Also call SetInsnShiftFlag().
                            $tokens =~ s/imm8x/imm/;
-                           die "no space for ONE?" if $args[3] !~ m/0,/;
-                           $args[3] = $ONE->[3]->[2];
+                           die "no space for ONE?" if $args[3] !~ m/0;/;
+                           my $oneval = $ONE->[3]->[2];
+                           $args[3] =~ s/0/$oneval/;
                            print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
                        }
                        elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
index 54893a7695ef90df3be9602fd496bf6b108ad499..fa318fd3b2dff626e129363f6eb321ae18fdb0d1 100644 (file)
@@ -40,6 +40,7 @@ RCSID("$IdPath$");
 #include "section.h"
 #include "objfmt.h"
 
+#include "arch.h"
 
 #define YYDEBUG 1
 
@@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base;
 static bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
 
+/* additional data declarations (dynamically generated) */
+/* @DATADECLS@ */
+
 %}
 
 %union {
@@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc;
     effaddr *ea;
     expr *exp;
     immval *im_val;
-    targetval tgt_val;
+    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
@@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc;
 %%
 input: /* empty */
     | input line    {
-       nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section),
+       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
                                               $2);
        if (nasm_parser_temp_bc)
            nasm_parser_prev_bc = nasm_parser_temp_bc;
@@ -145,10 +149,10 @@ line: '\n'                { $$ = (bytecode *)NULL; }
 ;
 
 lineexp: exp
-    | TIMES expr exp                   { $$ = $3; SetBCMultiple($$, $2); }
+    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
     | label                            { $$ = (bytecode *)NULL; }
     | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; SetBCMultiple($$, $3); }
+    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
     | label_id EQU expr                        {
        symrec_define_equ($1, $3);
        xfree($1);
@@ -157,22 +161,16 @@ lineexp: exp
 ;
 
 exp: instr
-    | DECLARE_DATA datavals        { $$ = bytecode_new_data(&$2, $1); }
-    | RESERVE_SPACE expr           { $$ = bytecode_new_reserve($2, $1); }
+    | DECLARE_DATA datavals        { $$ = bc_new_data(&$2, $1); }
+    | RESERVE_SPACE expr           { $$ = bc_new_reserve($2, $1); }
 ;
 
-datavals: dataval          {
-       datavals_initialize(&$$);
-       datavals_append(&$$, $1);
-    }
-    | datavals ',' dataval  {
-       datavals_append(&$1, $3);
-       $$ = $1;
-    }
+datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
+    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dataval_new_expr($1); }
-    | STRING           { $$ = dataval_new_string($1); }
+dataval: expr_no_string        { $$ = dv_new_expr($1); }
+    | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
        $$ = (dataval *)NULL;
@@ -317,17 +315,20 @@ memexpr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
     | error                    { Error(_("invalid effective address")); }
 ;
 
-memaddr: memexpr           { $$ = effaddr_new_expr($1); SetEASegment($$, 0); }
-    | REG_CS ':' memaddr    { $$ = $3; SetEASegment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; SetEASegment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; SetEASegment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; SetEASegment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; SetEASegment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; SetEASegment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; SetEALen($$, 1); }
-    | WORD memaddr         { $$ = $2; SetEALen($$, 2); }
-    | DWORD memaddr        { $$ = $2; SetEALen($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; SetEANosplit($$, 1); }
+memaddr: memexpr           {
+       $$ = x86_ea_new_expr($1);
+       x86_ea_set_segment($$, 0);
+    }
+    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
+    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
+    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
+    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
+    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
+    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
+    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
+    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
+    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
+    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
 mem: '[' memaddr ']'   { $$ = $2; }
@@ -378,43 +379,43 @@ mem1632: mem
 ;
 
 /* explicit register or memory */
-rm8x: reg8     { $$ = effaddr_new_reg($1); }
+rm8x: reg8     { $$ = x86_ea_new_reg($1); }
     | mem8x
 ;
-rm16x: reg16   { $$ = effaddr_new_reg($1); }
+rm16x: reg16   { $$ = x86_ea_new_reg($1); }
     | mem16x
 ;
-rm32x: reg32   { $$ = effaddr_new_reg($1); }
+rm32x: reg32   { $$ = x86_ea_new_reg($1); }
     | mem32x
 ;
 /* not needed:
-rm64x: MMXREG  { $$ = effaddr_new_reg($1); }
+rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
     | mem64x
 ;
-rm128x: XMMREG { $$ = effaddr_new_reg($1); }
+rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
     | mem128x
 ;
 */
 
 /* implicit register or memory */
-rm8: reg8      { $$ = effaddr_new_reg($1); }
+rm8: reg8      { $$ = x86_ea_new_reg($1); }
     | mem8
 ;
-rm16: reg16    { $$ = effaddr_new_reg($1); }
+rm16: reg16    { $$ = x86_ea_new_reg($1); }
     | mem16
 ;
-rm32: reg32    { $$ = effaddr_new_reg($1); }
+rm32: reg32    { $$ = x86_ea_new_reg($1); }
     | mem32
 ;
-rm64: MMXREG   { $$ = effaddr_new_reg($1); }
+rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
     | mem64
 ;
-rm128: XMMREG  { $$ = effaddr_new_reg($1); }
+rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
     | mem128
 ;
 
 /* immediate values */
-imm: expr   { $$ = immval_new_expr($1); }
+imm: expr   { $$ = imm_new_expr($1); }
 ;
 
 /* explicit immediates */
@@ -437,9 +438,18 @@ imm32: imm
 ;
 
 /* jump targets */
-target: expr           { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); }
-    | SHORT target     { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
-    | NEAR target      { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+target: expr           {
+       $$.val = $1;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+    }
+    | SHORT target     {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+    }
+    | NEAR target      {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+    }
 ;
 
 /* expression trees */
@@ -493,18 +503,36 @@ explabel: ID              { $$ = symrec_use($1); xfree($1); }
 ;
 
 instr: instrbase
-    | OPERSIZE instr   { $$ = $2; SetInsnOperSizeOverride($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; SetInsnAddrSizeOverride($$, $1); }
-    | REG_CS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); }
-    | REG_SS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); }
-    | REG_DS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); }
-    | REG_ES instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); }
-    | REG_FS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); }
-    | REG_GS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); }
-    | LOCK instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); }
-    | REP instr                { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); }
+    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
+    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
+    | REG_CS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
+    }
+    | REG_SS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
+    }
+    | REG_DS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
+    }
+    | REG_ES instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
+    }
+    | REG_FS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
+    }
+    | REG_GS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
+    }
+    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
+    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
+    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
+    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
 ;
 
 /* instruction grammars (dynamically generated) */
@@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val)
        if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32))
            Error(_("`%s' is not a valid argument to [BITS]"), val);
        else
-           mode_bits = (unsigned char)lval;
+           x86_mode_bits = (unsigned char)lval;
     } else {
        printf("Directive: Name=`%s' Value=`%s'\n", name, val);
     }
index 8c1d683446cafded20f4c5d037fa99a4d09e9f83..54002180ea017fab1ef89aae0a915646e2111989 100644 (file)
@@ -33,6 +33,8 @@ RCSID("$IdPath$");
 
 #include "bytecode.h"
 
+#include "arch.h"
+
 #include "bison.h"
 
 
index a1a4f8998f540ff73c2567ba767b8eb77753aa08..0fbd211bfbc174316caee48b7b13140028b40930 100644 (file)
@@ -25,6 +25,7 @@ noinst_LIBRARIES = libyasm.a
 libyasm_a_SOURCES = \
        bytecode.c              \
        bytecode.h              \
+       bc-int.h                \
        expr.c                  \
        expr.h                  \
        symrec.c                \
@@ -37,6 +38,7 @@ libyasm_a_SOURCES = \
        file.h                  \
        section.c               \
        section.h               \
+       arch.c                  \
        arch.h                  \
        objfmt.h                \
        options.h               \
diff --git a/src/arch.c b/src/arch.c
new file mode 100644 (file)
index 0000000..5cbe359
--- /dev/null
@@ -0,0 +1,29 @@
+/* $IdPath$
+ * Architecture interface
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+arch *cur_arch;
+
index f1ed7268c771292648f58d051a8ddb3d9c0d7755..b614aca446123e92698d90587ea288e5de5fd908 100644 (file)
@@ -28,9 +28,24 @@ struct arch {
 
     /* keyword used to select architecture */
     const char *keyword;
+
+    struct {
+       /* Maximum used bytecode type value+1.  Should be set to
+        * BYTECODE_TYPE_BASE if no additional bytecode types are defined by
+        * the architecture.
+        */
+       const int type_max;
+
+       void (*bc_delete) (bytecode *bc);
+       void (*bc_print) (const bytecode *bc);
+       void (*bc_parser_finalize) (bytecode *bc);
+    } bc;
 };
 
 /* Available architectures */
+#include "arch/x86/x86arch.h"
 extern arch x86_arch;
 
+extern arch *cur_arch;
+
 #endif
index 5847318c0b4e093d096a0063113e8f7a5fda8280..5c5598d07237f439a0e75ab5856b69f9c0a9bb58 100644 (file)
@@ -3,7 +3,10 @@
 noinst_LIBRARIES = libarch.a
 
 libarch_a_SOURCES = \
-       arch.c
+       x86arch.h       \
+       x86-int.h       \
+       arch.c          \
+       bytecode.c
 
 INCLUDES = \
        -I$(top_srcdir)/src     \
index a5df22fb1dc394bfc42217130e09422918d80dbc..c9cf1caef15d2a6d3240d6d9c9146913659b41c4 100644 (file)
 #include "util.h"
 RCSID("$IdPath$");
 
+#include "bytecode.h"
 #include "arch.h"
 
+#include "x86-int.h"
+
+
+unsigned char x86_mode_bits = 0;
 
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
-    "x86"
+    "x86",
+    {
+       X86_BYTECODE_TYPE_MAX,
+       x86_bc_delete,
+       x86_bc_print,
+       x86_bc_parser_finalize
+    }
 };
diff --git a/src/arch/x86/bytecode.c b/src/arch/x86/bytecode.c
new file mode 100644 (file)
index 0000000..8083711
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * x86 architecture description
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
+#include "bytecode.h"
+#include "arch.h"
+
+#include "x86-int.h"
+
+#include "bc-int.h"
+
+
+bytecode *
+x86_bc_new_insn(x86_new_insn_data *d)
+{
+    bytecode *bc;
+    x86_insn *insn;
+   
+    bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn));
+    insn = bc_get_data(bc);
+
+    insn->ea = d->ea;
+    if (d->ea) {
+       x86_effaddr_data *ead = ea_get_data(d->ea);
+       ead->modrm &= 0xC7;     /* zero spare/reg bits */
+       ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
+    }
+
+    insn->imm = d->imm;
+    if (d->imm) {
+       insn->imm->f_len = d->im_len;
+       insn->imm->f_sign = d->im_sign;
+    }
+
+    insn->opcode[0] = d->op[0];
+    insn->opcode[1] = d->op[1];
+    insn->opcode[2] = d->op[2];
+    insn->opcode_len = d->op_len;
+
+    insn->addrsize = 0;
+    insn->opersize = d->opersize;
+    insn->lockrep_pre = 0;
+    insn->shift_op = 0;
+
+    insn->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+bytecode *
+x86_bc_new_jmprel(x86_new_jmprel_data *d)
+{
+    bytecode *bc;
+    x86_jmprel *jmprel;
+
+    bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel));
+    jmprel = bc_get_data(bc);
+
+    jmprel->target = d->target->val;
+    jmprel->op_sel = d->target->op_sel;
+
+    if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0))
+       Error(_("no SHORT form of that jump instruction exists"));
+    if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0))
+       Error(_("no NEAR form of that jump instruction exists"));
+
+    jmprel->shortop.opcode[0] = d->short_op[0];
+    jmprel->shortop.opcode[1] = d->short_op[1];
+    jmprel->shortop.opcode[2] = d->short_op[2];
+    jmprel->shortop.opcode_len = d->short_op_len;
+
+    jmprel->nearop.opcode[0] = d->near_op[0];
+    jmprel->nearop.opcode[1] = d->near_op[1];
+    jmprel->nearop.opcode[2] = d->near_op[2];
+    jmprel->nearop.opcode_len = d->near_op_len;
+
+    jmprel->addrsize = d->addrsize;
+    jmprel->opersize = 0;
+    jmprel->lockrep_pre = 0;
+
+    jmprel->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+void
+x86_ea_set_segment(effaddr *ea, unsigned char segment)
+{
+    x86_effaddr_data *ead;
+
+    if (!ea)
+       return;
+
+    ead = ea_get_data(ea);
+
+    if (segment != 0 && ead->segment != 0)
+       Warning(_("multiple segment overrides, using leftmost"));
+
+    ead->segment = segment;
+}
+
+effaddr *
+x86_ea_new_reg(unsigned long reg)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = (expr *)NULL;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0xC0 | (reg & 0x07);  /* Mod=11, R/M=Reg, Reg=0 */
+    ead->valid_modrm = 1;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_expr(expr *e)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = e;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0xff;   /* we won't know until we know more about expr and
+                              the BITS/address override setting */
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_imm(immval *imm, unsigned char im_len)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = imm->val;
+    ea->len = im_len;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 0;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_bc_insn_get_ea(bytecode *bc)
+{
+    x86_insn *insn = bc_get_data(bc);
+
+    if (!bc)
+       return NULL;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Trying to get EA of non-instruction"));
+
+    return insn->ea;
+}
+
+void
+x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->opersize = opersize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->opersize = opersize;
+           break;
+       default:
+           InternalError(_("OperSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->addrsize = addrsize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->addrsize = addrsize;
+           break;
+       default:
+           InternalError(_("AddrSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+    unsigned char *lockrep_pre = (unsigned char *)NULL;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           lockrep_pre = &insn->lockrep_pre;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           lockrep_pre = &jmprel->lockrep_pre;
+           break;
+       default:
+           InternalError(_("LockRep prefix applied to non-instruction"));
+           return;
+    }
+
+    if (*lockrep_pre != 0)
+       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
+
+    *lockrep_pre = prefix;
+}
+
+void
+x86_bc_insn_set_shift_flag(bytecode *bc)
+{
+    x86_insn *insn;
+
+    if (!bc)
+       return;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Attempted to set shift flag on non-instruction"));
+
+    insn = bc_get_data(bc);
+
+    insn->shift_op = 1;
+}
+
+void
+x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                         x86_jmprel_opcode_sel new_sel)
+{
+    if (!old_sel)
+       return;
+
+    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
+                              (*old_sel == JR_NEAR_FORCED)))
+       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
+    *old_sel = new_sel;
+}
+
+void
+x86_bc_delete(bytecode *bc)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           if (insn->ea) {
+               expr_delete(insn->ea->disp);
+               xfree(insn->ea);
+           }
+           if (insn->imm) {
+               expr_delete(insn->imm->val);
+               xfree(insn->imm);
+           }
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           expr_delete(jmprel->target);
+           break;
+    }
+}
+
+void
+x86_bc_print(const bytecode *bc)
+{
+    const x86_insn *insn;
+    const x86_jmprel *jmprel;
+    x86_effaddr_data *ead;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_const_data(bc);
+           printf("_Instruction_\n");
+           printf("Effective Address:");
+           if (!insn->ea)
+               printf(" (nil)\n");
+           else {
+               printf("\n Disp=");
+               if (insn->ea->disp)
+                   expr_print(insn->ea->disp);
+               else
+                   printf("(nil)");
+               printf("\n");
+               ead = ea_get_data(insn->ea);
+               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
+                      (unsigned int)insn->ea->len,
+                      (unsigned int)ead->segment,
+                      (unsigned int)insn->ea->nosplit);
+               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
+                      (unsigned int)ead->modrm,
+                      (unsigned int)ead->valid_modrm,
+                      (unsigned int)ead->need_modrm);
+               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
+                      (unsigned int)ead->sib,
+                      (unsigned int)ead->valid_sib,
+                      (unsigned int)ead->need_sib);
+           }
+           printf("Immediate Value:");
+           if (!insn->imm)
+               printf(" (nil)\n");
+           else {
+               printf("\n Val=");
+               expr_print(insn->imm->val);
+               printf("\n");
+               printf(" Len=%u, IsNeg=%u\n",
+                      (unsigned int)insn->imm->len,
+                      (unsigned int)insn->imm->isneg);
+               printf(" FLen=%u, FSign=%u\n",
+                      (unsigned int)insn->imm->f_len,
+                      (unsigned int)insn->imm->f_sign);
+           }
+           printf("Opcode: %02x %02x %02x OpLen=%u\n",
+                  (unsigned int)insn->opcode[0],
+                  (unsigned int)insn->opcode[1],
+                  (unsigned int)insn->opcode[2],
+                  (unsigned int)insn->opcode_len);
+           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
+                  (unsigned int)insn->addrsize,
+                  (unsigned int)insn->opersize,
+                  (unsigned int)insn->lockrep_pre,
+                  (unsigned int)insn->shift_op);
+           printf("BITS=%u\n", (unsigned int)insn->mode_bits);
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_const_data(bc);
+           printf("_Relative Jump_\n");
+           printf("Target=");
+           expr_print(jmprel->target);
+           printf("\nShort Form:\n");
+           if (!jmprel->shortop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->shortop.opcode[0],
+                      (unsigned int)jmprel->shortop.opcode[1],
+                      (unsigned int)jmprel->shortop.opcode[2],
+                      (unsigned int)jmprel->shortop.opcode_len);
+           if (!jmprel->nearop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->nearop.opcode[0],
+                      (unsigned int)jmprel->nearop.opcode[1],
+                      (unsigned int)jmprel->nearop.opcode[2],
+                      (unsigned int)jmprel->nearop.opcode_len);
+           printf("OpSel=");
+           switch (jmprel->op_sel) {
+               case JR_NONE:
+                   printf("None");
+                   break;
+               case JR_SHORT:
+                   printf("Short");
+                   break;
+               case JR_NEAR:
+                   printf("Near");
+                   break;
+               case JR_SHORT_FORCED:
+                   printf("Forced Short");
+                   break;
+               case JR_NEAR_FORCED:
+                   printf("Forced Near");
+                   break;
+               default:
+                   printf("UNKNOWN!!");
+                   break;
+           }
+           printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
+                  (unsigned int)jmprel->mode_bits,
+                  (unsigned int)jmprel->addrsize,
+                  (unsigned int)jmprel->opersize,
+                  (unsigned int)jmprel->lockrep_pre);
+           break;
+    }
+}
+
+static void
+x86_bc_parser_finalize_insn(x86_insn *insn)
+{
+    effaddr *ea = insn->ea;
+    x86_effaddr_data *ead = ea_get_data(ea);
+    immval *imm = insn->imm;
+
+    if (ea) {
+       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
+                          (!ead->valid_modrm && ead->need_modrm))) {
+           /* First expand equ's */
+           expr_expand_equ(ea->disp);
+
+           /* Check validity of effective address and calc R/M bits of
+            * Mod/RM byte and SIB byte.  We won't know the Mod field
+            * of the Mod/RM byte until we know more about the
+            * displacement.
+            */
+           if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits,
+                             ea->nosplit, &ea->len, &ead->modrm,
+                             &ead->valid_modrm, &ead->need_modrm, &ead->sib,
+                             &ead->valid_sib, &ead->need_sib))
+               return;     /* failed, don't bother checking rest of insn */
+       }
+    }
+
+    if (imm) {
+       const intnum *num;
+
+       if (imm->val) {
+           expr_expand_equ(imm->val);
+           expr_simplify(imm->val);
+       }
+       /* TODO: check imm f_len vs. len? */
+
+       /* Handle shift_op special-casing */
+       if (insn->shift_op && (num = expr_get_intnum(&imm->val))) {
+           if (intnum_get_uint(num) == 1) {
+               /* Use ,1 form: first copy ,1 opcode. */
+               insn->opcode[0] = insn->opcode[1];
+               /* Delete ModRM, as it's no longer needed */
+               xfree(ea);
+               insn->ea = (effaddr *)NULL;
+               /* Delete Imm, as it's not needed */
+               expr_delete(imm->val);
+               xfree(imm);
+               insn->imm = (immval *)NULL;
+           }
+           insn->shift_op = 0;
+       }
+    }
+
+    
+}
+
+void
+x86_bc_parser_finalize(bytecode *bc)
+{
+    x86_insn *insn;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           x86_bc_parser_finalize_insn(insn);
+           break;
+       default:
+           break;
+    }
+}
+
diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h
new file mode 100644 (file)
index 0000000..2c3336e
--- /dev/null
@@ -0,0 +1,95 @@
+/* $IdPath$
+ * x86 internals header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_X86_INT_H
+#define YASM_X86_INT_H
+
+typedef struct x86_effaddr_data {
+    unsigned char segment;     /* segment override, 0 if none */
+
+    /* How the spare (register) bits in Mod/RM are handled:
+     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+     * They're set in bytecode_new_insn().
+     */
+    unsigned char modrm;
+    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
+
+    unsigned char sib;
+    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
+    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
+                                  0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+    effaddr *ea;       /* effective address */
+
+    immval *imm;       /* immediate or relative value */
+
+    unsigned char opcode[3];   /* opcode */
+    unsigned char opcode_len;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    /* HACK, but a space-saving one: shift opcodes have an immediate
+     * form and a ,1 form (with no immediate).  In the parser, we
+     * set this and opcode_len=1, but store the ,1 version in the
+     * second byte of the opcode array.  We then choose between the
+     * two versions once we know the actual value of imm (because we
+     * don't know it in the parser module).
+     *
+     * A override to force the imm version should just leave this at
+     * 0.  Then later code won't know the ,1 version even exists.
+     * TODO: Figure out how this affects CPU flags processing.
+     *
+     * Call x86_SetInsnShiftFlag() to set this flag to 1.
+     */
+    unsigned char shift_op;
+
+    unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+    expr *target;              /* target location */
+
+    struct {
+       unsigned char opcode[3];
+       unsigned char opcode_len;   /* 0 = no opc for this version */
+    } shortop, nearop;
+
+    /* which opcode are we using? */
+    /* The *FORCED forms are specified in the source as such */
+    x86_jmprel_opcode_sel op_sel;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(const bytecode *bc);
+void x86_bc_parser_finalize(bytecode *bc);
+
+#endif
index a5df22fb1dc394bfc42217130e09422918d80dbc..c9cf1caef15d2a6d3240d6d9c9146913659b41c4 100644 (file)
 #include "util.h"
 RCSID("$IdPath$");
 
+#include "bytecode.h"
 #include "arch.h"
 
+#include "x86-int.h"
+
+
+unsigned char x86_mode_bits = 0;
 
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
-    "x86"
+    "x86",
+    {
+       X86_BYTECODE_TYPE_MAX,
+       x86_bc_delete,
+       x86_bc_print,
+       x86_bc_parser_finalize
+    }
 };
diff --git a/src/arch/x86/x86arch.h b/src/arch/x86/x86arch.h
new file mode 100644 (file)
index 0000000..70a207c
--- /dev/null
@@ -0,0 +1,93 @@
+/* $IdPath$
+ * x86 Architecture header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_X86ARCH_H
+#define YASM_X86ARCH_H
+
+typedef enum {
+    X86_BC_INSN = BYTECODE_TYPE_BASE,
+    X86_BC_JMPREL
+} x86_bytecode_type;
+#define X86_BYTECODE_TYPE_MAX  X86_BC_JMPREL+1
+
+typedef enum {
+    JR_NONE,
+    JR_SHORT,
+    JR_NEAR,
+    JR_SHORT_FORCED,
+    JR_NEAR_FORCED
+} x86_jmprel_opcode_sel;
+
+typedef struct x86_targetval {
+    expr *val;
+
+    x86_jmprel_opcode_sel op_sel;
+} x86_targetval;
+
+void x86_ea_set_segment(effaddr *ea, unsigned char segment);
+effaddr *x86_ea_new_reg(unsigned long reg);
+effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_expr(expr *e);
+
+effaddr *x86_bc_insn_get_ea(bytecode *bc);
+
+void x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize);
+void x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize);
+void x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix);
+void x86_bc_insn_set_shift_flag(bytecode *bc);
+
+void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                              x86_jmprel_opcode_sel new_sel);
+
+/* Structure with *all* inputs passed to x86_bytecode_new_insn().
+ * IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling the
+ * function (it doesn't make a copy).
+ */
+typedef struct x86_new_insn_data {
+    effaddr *ea;
+    immval *imm;
+    unsigned char opersize;
+    unsigned char op_len;
+    unsigned char op[3];
+    unsigned char spare;       /* bits to go in 'spare' field of ModRM */
+    unsigned char im_len;
+    unsigned char im_sign;
+} x86_new_insn_data;
+
+bytecode *x86_bc_new_insn(x86_new_insn_data *d);
+
+/* Structure with *all* inputs passed to x86_bytecode_new_jmprel().
+ * Pass 0 for the opcode_len if that version of the opcode doesn't exist.
+ */
+typedef struct x86_new_jmprel_data {
+    x86_targetval *target;
+    unsigned char short_op_len;
+    unsigned char short_op[3];
+    unsigned char near_op_len;
+    unsigned char near_op[3];
+    unsigned char addrsize;
+} x86_new_jmprel_data;
+
+bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d);
+
+extern unsigned char x86_mode_bits;
+
+#endif
diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c
new file mode 100644 (file)
index 0000000..8083711
--- /dev/null
@@ -0,0 +1,512 @@
+/*
+ * x86 architecture description
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
+#include "bytecode.h"
+#include "arch.h"
+
+#include "x86-int.h"
+
+#include "bc-int.h"
+
+
+bytecode *
+x86_bc_new_insn(x86_new_insn_data *d)
+{
+    bytecode *bc;
+    x86_insn *insn;
+   
+    bc = bc_new_common(X86_BC_INSN, sizeof(x86_insn));
+    insn = bc_get_data(bc);
+
+    insn->ea = d->ea;
+    if (d->ea) {
+       x86_effaddr_data *ead = ea_get_data(d->ea);
+       ead->modrm &= 0xC7;     /* zero spare/reg bits */
+       ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
+    }
+
+    insn->imm = d->imm;
+    if (d->imm) {
+       insn->imm->f_len = d->im_len;
+       insn->imm->f_sign = d->im_sign;
+    }
+
+    insn->opcode[0] = d->op[0];
+    insn->opcode[1] = d->op[1];
+    insn->opcode[2] = d->op[2];
+    insn->opcode_len = d->op_len;
+
+    insn->addrsize = 0;
+    insn->opersize = d->opersize;
+    insn->lockrep_pre = 0;
+    insn->shift_op = 0;
+
+    insn->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+bytecode *
+x86_bc_new_jmprel(x86_new_jmprel_data *d)
+{
+    bytecode *bc;
+    x86_jmprel *jmprel;
+
+    bc = bc_new_common(X86_BC_JMPREL, sizeof(x86_jmprel));
+    jmprel = bc_get_data(bc);
+
+    jmprel->target = d->target->val;
+    jmprel->op_sel = d->target->op_sel;
+
+    if ((d->target->op_sel == JR_SHORT_FORCED) && (d->near_op_len == 0))
+       Error(_("no SHORT form of that jump instruction exists"));
+    if ((d->target->op_sel == JR_NEAR_FORCED) && (d->short_op_len == 0))
+       Error(_("no NEAR form of that jump instruction exists"));
+
+    jmprel->shortop.opcode[0] = d->short_op[0];
+    jmprel->shortop.opcode[1] = d->short_op[1];
+    jmprel->shortop.opcode[2] = d->short_op[2];
+    jmprel->shortop.opcode_len = d->short_op_len;
+
+    jmprel->nearop.opcode[0] = d->near_op[0];
+    jmprel->nearop.opcode[1] = d->near_op[1];
+    jmprel->nearop.opcode[2] = d->near_op[2];
+    jmprel->nearop.opcode_len = d->near_op_len;
+
+    jmprel->addrsize = d->addrsize;
+    jmprel->opersize = 0;
+    jmprel->lockrep_pre = 0;
+
+    jmprel->mode_bits = x86_mode_bits;
+
+    return bc;
+}
+
+void
+x86_ea_set_segment(effaddr *ea, unsigned char segment)
+{
+    x86_effaddr_data *ead;
+
+    if (!ea)
+       return;
+
+    ead = ea_get_data(ea);
+
+    if (segment != 0 && ead->segment != 0)
+       Warning(_("multiple segment overrides, using leftmost"));
+
+    ead->segment = segment;
+}
+
+effaddr *
+x86_ea_new_reg(unsigned long reg)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = (expr *)NULL;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0xC0 | (reg & 0x07);  /* Mod=11, R/M=Reg, Reg=0 */
+    ead->valid_modrm = 1;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_expr(expr *e)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = e;
+    ea->len = 0;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 1;
+    ead->valid_sib = 0;
+    ead->need_sib = 0xff;   /* we won't know until we know more about expr and
+                              the BITS/address override setting */
+
+    return ea;
+}
+
+effaddr *
+x86_ea_new_imm(immval *imm, unsigned char im_len)
+{
+    effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
+    x86_effaddr_data *ead = ea_get_data(ea);
+
+    ea->disp = imm->val;
+    ea->len = im_len;
+    ea->nosplit = 0;
+    ead->segment = 0;
+    ead->modrm = 0;
+    ead->valid_modrm = 0;
+    ead->need_modrm = 0;
+    ead->valid_sib = 0;
+    ead->need_sib = 0;
+
+    return ea;
+}
+
+effaddr *
+x86_bc_insn_get_ea(bytecode *bc)
+{
+    x86_insn *insn = bc_get_data(bc);
+
+    if (!bc)
+       return NULL;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Trying to get EA of non-instruction"));
+
+    return insn->ea;
+}
+
+void
+x86_bc_insn_opersize_override(bytecode *bc, unsigned char opersize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->opersize = opersize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->opersize = opersize;
+           break;
+       default:
+           InternalError(_("OperSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_addrsize_override(bytecode *bc, unsigned char addrsize)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           insn->addrsize = addrsize;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           jmprel->addrsize = addrsize;
+           break;
+       default:
+           InternalError(_("AddrSize override applied to non-instruction"));
+           return;
+    }
+}
+
+void
+x86_bc_insn_set_lockrep_prefix(bytecode *bc, unsigned char prefix)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+    unsigned char *lockrep_pre = (unsigned char *)NULL;
+
+    if (!bc)
+       return;
+
+    switch (bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           lockrep_pre = &insn->lockrep_pre;
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           lockrep_pre = &jmprel->lockrep_pre;
+           break;
+       default:
+           InternalError(_("LockRep prefix applied to non-instruction"));
+           return;
+    }
+
+    if (*lockrep_pre != 0)
+       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
+
+    *lockrep_pre = prefix;
+}
+
+void
+x86_bc_insn_set_shift_flag(bytecode *bc)
+{
+    x86_insn *insn;
+
+    if (!bc)
+       return;
+
+    if (bc->type != X86_BC_INSN)
+       InternalError(_("Attempted to set shift flag on non-instruction"));
+
+    insn = bc_get_data(bc);
+
+    insn->shift_op = 1;
+}
+
+void
+x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
+                         x86_jmprel_opcode_sel new_sel)
+{
+    if (!old_sel)
+       return;
+
+    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
+                              (*old_sel == JR_NEAR_FORCED)))
+       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
+    *old_sel = new_sel;
+}
+
+void
+x86_bc_delete(bytecode *bc)
+{
+    x86_insn *insn;
+    x86_jmprel *jmprel;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           if (insn->ea) {
+               expr_delete(insn->ea->disp);
+               xfree(insn->ea);
+           }
+           if (insn->imm) {
+               expr_delete(insn->imm->val);
+               xfree(insn->imm);
+           }
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_data(bc);
+           expr_delete(jmprel->target);
+           break;
+    }
+}
+
+void
+x86_bc_print(const bytecode *bc)
+{
+    const x86_insn *insn;
+    const x86_jmprel *jmprel;
+    x86_effaddr_data *ead;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_const_data(bc);
+           printf("_Instruction_\n");
+           printf("Effective Address:");
+           if (!insn->ea)
+               printf(" (nil)\n");
+           else {
+               printf("\n Disp=");
+               if (insn->ea->disp)
+                   expr_print(insn->ea->disp);
+               else
+                   printf("(nil)");
+               printf("\n");
+               ead = ea_get_data(insn->ea);
+               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
+                      (unsigned int)insn->ea->len,
+                      (unsigned int)ead->segment,
+                      (unsigned int)insn->ea->nosplit);
+               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
+                      (unsigned int)ead->modrm,
+                      (unsigned int)ead->valid_modrm,
+                      (unsigned int)ead->need_modrm);
+               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
+                      (unsigned int)ead->sib,
+                      (unsigned int)ead->valid_sib,
+                      (unsigned int)ead->need_sib);
+           }
+           printf("Immediate Value:");
+           if (!insn->imm)
+               printf(" (nil)\n");
+           else {
+               printf("\n Val=");
+               expr_print(insn->imm->val);
+               printf("\n");
+               printf(" Len=%u, IsNeg=%u\n",
+                      (unsigned int)insn->imm->len,
+                      (unsigned int)insn->imm->isneg);
+               printf(" FLen=%u, FSign=%u\n",
+                      (unsigned int)insn->imm->f_len,
+                      (unsigned int)insn->imm->f_sign);
+           }
+           printf("Opcode: %02x %02x %02x OpLen=%u\n",
+                  (unsigned int)insn->opcode[0],
+                  (unsigned int)insn->opcode[1],
+                  (unsigned int)insn->opcode[2],
+                  (unsigned int)insn->opcode_len);
+           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
+                  (unsigned int)insn->addrsize,
+                  (unsigned int)insn->opersize,
+                  (unsigned int)insn->lockrep_pre,
+                  (unsigned int)insn->shift_op);
+           printf("BITS=%u\n", (unsigned int)insn->mode_bits);
+           break;
+       case X86_BC_JMPREL:
+           jmprel = bc_get_const_data(bc);
+           printf("_Relative Jump_\n");
+           printf("Target=");
+           expr_print(jmprel->target);
+           printf("\nShort Form:\n");
+           if (!jmprel->shortop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->shortop.opcode[0],
+                      (unsigned int)jmprel->shortop.opcode[1],
+                      (unsigned int)jmprel->shortop.opcode[2],
+                      (unsigned int)jmprel->shortop.opcode_len);
+           if (!jmprel->nearop.opcode_len == 0)
+               printf(" None\n");
+           else
+               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
+                      (unsigned int)jmprel->nearop.opcode[0],
+                      (unsigned int)jmprel->nearop.opcode[1],
+                      (unsigned int)jmprel->nearop.opcode[2],
+                      (unsigned int)jmprel->nearop.opcode_len);
+           printf("OpSel=");
+           switch (jmprel->op_sel) {
+               case JR_NONE:
+                   printf("None");
+                   break;
+               case JR_SHORT:
+                   printf("Short");
+                   break;
+               case JR_NEAR:
+                   printf("Near");
+                   break;
+               case JR_SHORT_FORCED:
+                   printf("Forced Short");
+                   break;
+               case JR_NEAR_FORCED:
+                   printf("Forced Near");
+                   break;
+               default:
+                   printf("UNKNOWN!!");
+                   break;
+           }
+           printf("BITS=%u\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
+                  (unsigned int)jmprel->mode_bits,
+                  (unsigned int)jmprel->addrsize,
+                  (unsigned int)jmprel->opersize,
+                  (unsigned int)jmprel->lockrep_pre);
+           break;
+    }
+}
+
+static void
+x86_bc_parser_finalize_insn(x86_insn *insn)
+{
+    effaddr *ea = insn->ea;
+    x86_effaddr_data *ead = ea_get_data(ea);
+    immval *imm = insn->imm;
+
+    if (ea) {
+       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
+                          (!ead->valid_modrm && ead->need_modrm))) {
+           /* First expand equ's */
+           expr_expand_equ(ea->disp);
+
+           /* Check validity of effective address and calc R/M bits of
+            * Mod/RM byte and SIB byte.  We won't know the Mod field
+            * of the Mod/RM byte until we know more about the
+            * displacement.
+            */
+           if (!expr_checkea(&ea->disp, &insn->addrsize, insn->mode_bits,
+                             ea->nosplit, &ea->len, &ead->modrm,
+                             &ead->valid_modrm, &ead->need_modrm, &ead->sib,
+                             &ead->valid_sib, &ead->need_sib))
+               return;     /* failed, don't bother checking rest of insn */
+       }
+    }
+
+    if (imm) {
+       const intnum *num;
+
+       if (imm->val) {
+           expr_expand_equ(imm->val);
+           expr_simplify(imm->val);
+       }
+       /* TODO: check imm f_len vs. len? */
+
+       /* Handle shift_op special-casing */
+       if (insn->shift_op && (num = expr_get_intnum(&imm->val))) {
+           if (intnum_get_uint(num) == 1) {
+               /* Use ,1 form: first copy ,1 opcode. */
+               insn->opcode[0] = insn->opcode[1];
+               /* Delete ModRM, as it's no longer needed */
+               xfree(ea);
+               insn->ea = (effaddr *)NULL;
+               /* Delete Imm, as it's not needed */
+               expr_delete(imm->val);
+               xfree(imm);
+               insn->imm = (immval *)NULL;
+           }
+           insn->shift_op = 0;
+       }
+    }
+
+    
+}
+
+void
+x86_bc_parser_finalize(bytecode *bc)
+{
+    x86_insn *insn;
+
+    switch ((x86_bytecode_type)bc->type) {
+       case X86_BC_INSN:
+           insn = bc_get_data(bc);
+           x86_bc_parser_finalize_insn(insn);
+           break;
+       default:
+           break;
+    }
+}
+
diff --git a/src/bc-int.h b/src/bc-int.h
new file mode 100644 (file)
index 0000000..f992c46
--- /dev/null
@@ -0,0 +1,71 @@
+/* $IdPath$
+ * Bytecode internal structures header file
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef YASM_BC_INT_H
+#define YASM_BC_INT_H
+
+struct effaddr {
+    expr *disp;                        /* address displacement */
+    unsigned char len;         /* length of disp (in bytes), 0 if unknown,
+                                * 0xff if unknown and required to be >0.
+                                */
+    unsigned char nosplit;     /* 1 if reg*2 should not be split into
+                                  reg+reg. (0 if not) */
+
+    /* architecture-dependent data may be appended */
+};
+#define ea_get_data(x)         (void *)(((char *)x)+sizeof(effaddr))
+#define ea_get_const_data(x)   (const void *)(((const char *)x)+sizeof(effaddr))
+
+struct immval {
+    expr *val;
+
+    unsigned char len;         /* length of val (in bytes), 0 if unknown */
+    unsigned char isneg;       /* the value has been explicitly negated */
+
+    unsigned char f_len;       /* final imm length */
+    unsigned char f_sign;      /* 1 if final imm should be signed */
+};
+
+struct bytecode {
+    STAILQ_ENTRY(bytecode) link;
+
+    bytecode_type type;
+
+    expr *multiple;            /* number of times bytecode is repeated,
+                                  NULL=1 */
+
+    unsigned long len;         /* total length of entire bytecode (including
+                                  multiple copies), 0 if unknown */
+
+    /* where it came from */
+    const char *filename;
+    unsigned int lineno;
+
+    /* other assembler state info */
+    unsigned long offset;      /* 0 if unknown */
+
+    /* architecture-dependent data may be appended */
+};
+#define bc_get_data(x)         (void *)(((char *)x)+sizeof(bytecode))
+#define bc_get_const_data(x)   (const void *)(((const char *)x)+sizeof(bytecode))
+
+#endif
index 407f01eccf51b19d3f92370cac0214f903b73615..6dae585dd00fed11469795639b3b0e18c19e41fe 100644 (file)
@@ -29,40 +29,10 @@ RCSID("$IdPath$");
 
 #include "bytecode.h"
 
+#include "arch.h"
 
-struct effaddr {
-    expr *disp;                        /* address displacement */
-    unsigned char len;         /* length of disp (in bytes), 0 if unknown,
-                                * 0xff if unknown and required to be >0.
-                                */
+#include "bc-int.h"
 
-    unsigned char segment;     /* segment override, 0 if none */
-
-    /* How the spare (register) bits in Mod/RM are handled:
-     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
-     * They're set in bytecode_new_insn().
-     */
-    unsigned char modrm;
-    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
-    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
-
-    unsigned char sib;
-    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
-    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
-                                  0xff if unknown */
-    unsigned char nosplit;     /* 1 if reg*2 should not be split into
-                                  reg+reg. (0 if not) */
-};
-
-struct immval {
-    expr *val;
-
-    unsigned char len;         /* length of val (in bytes), 0 if unknown */
-    unsigned char isneg;       /* the value has been explicitly negated */
-
-    unsigned char f_len;       /* final imm length */
-    unsigned char f_sign;      /* 1 if final imm should be signed */
-};
 
 struct dataval {
     STAILQ_ENTRY(dataval) link;
@@ -75,158 +45,25 @@ struct dataval {
     } data;
 };
 
-struct bytecode {
-    STAILQ_ENTRY(bytecode) link;
-
-    enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
-
-    /* This union has been somewhat tweaked to get it as small as possible
-     * on the 4-byte-aligned x86 architecture (without resorting to
-     * bitfields).  In particular, insn and jmprel are the largest structures
-     * in the union, and are also the same size (after padding).  jmprel
-     * can have another unsigned char added to the end without affecting
-     * its size.
-     *
-     * Don't worry about this too much, but keep it in mind when changing
-     * this structure.  We care about the size of bytecode in particular
-     * because it accounts for the majority of the memory usage in the
-     * assembler when assembling a large file.
-     */
-    union {
-       struct {
-           effaddr *ea;        /* effective address */
-
-           immval *imm;        /* immediate or relative value */
-
-           unsigned char opcode[3];    /* opcode */
-           unsigned char opcode_len;
-
-           unsigned char addrsize;     /* 0 or =mode_bits => no override */
-           unsigned char opersize;     /* 0 indicates no override */
-           unsigned char lockrep_pre;  /* 0 indicates no prefix */
-
-           /* HACK, but a space-saving one: shift opcodes have an immediate
-            * form and a ,1 form (with no immediate).  In the parser, we
-            * set this and opcode_len=1, but store the ,1 version in the
-            * second byte of the opcode array.  We then choose between the
-            * two versions once we know the actual value of imm (because we
-            * don't know it in the parser module).
-            *
-            * A override to force the imm version should just leave this at
-            * 0.  Then later code won't know the ,1 version even exists.
-            * TODO: Figure out how this affects CPU flags processing.
-            *
-            * Call SetInsnShiftFlag() to set this flag to 1.
-            */
-           unsigned char shift_op;
-       } insn;
-       struct {
-           expr *target;               /* target location */
-
-           struct {
-               unsigned char opcode[3];
-               unsigned char opcode_len;   /* 0 = no opc for this version */
-           } shortop, nearop;
-
-           /* which opcode are we using? */
-           /* The *FORCED forms are specified in the source as such */
-           jmprel_opcode_sel op_sel;
-
-           unsigned char addrsize;     /* 0 or =mode_bits => no override */
-           unsigned char opersize;     /* 0 indicates no override */
-           unsigned char lockrep_pre;  /* 0 indicates no prefix */
-       } jmprel;
-       struct {
-           /* non-converted data (linked list) */
-           datavalhead datahead;
-
-           /* final (converted) size of each element (in bytes) */
-           unsigned char size;
-       } data;
-       struct {
-           expr *numitems;             /* number of items to reserve */
-           unsigned char itemsize;     /* size of each item (in bytes) */
-       } reserve;
-    } data;
+typedef struct bytecode_data {
+    /* non-converted data (linked list) */
+    datavalhead datahead;
 
-    expr *multiple;            /* number of times bytecode is repeated,
-                                  NULL=1 */
+    /* final (converted) size of each element (in bytes) */
+    unsigned char size;
+} bytecode_data;
 
-    unsigned long len;         /* total length of entire bytecode (including
-                                  multiple copies), 0 if unknown */
-
-    /* where it came from */
-    const char *filename;
-    unsigned int lineno;
-
-    /* other assembler state info */
-    unsigned long offset;      /* 0 if unknown */
-    unsigned char mode_bits;
-};
+typedef struct bytecode_reserve {
+    expr *numitems;            /* number of items to reserve */
+    unsigned char itemsize;    /* size of each item (in bytes) */
+} bytecode_reserve;
 
 /* Static structures for when NULL is passed to conversion functions. */
 /*  for Convert*ToBytes() */
 unsigned char bytes_static[16];
 
-static bytecode *bytecode_new_common(void);
-
-effaddr *
-effaddr_new_reg(unsigned long reg)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = (expr *)NULL;
-    ea->len = 0;
-    ea->segment = 0;
-    ea->modrm = 0xC0 | (reg & 0x07);   /* Mod=11, R/M=Reg, Reg=0 */
-    ea->valid_modrm = 1;
-    ea->need_modrm = 1;
-    ea->valid_sib = 0;
-    ea->need_sib = 0;
-    ea->nosplit = 0;
-
-    return ea;
-}
-
-effaddr *
-effaddr_new_expr(expr *expr_ptr)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = expr_ptr;
-    ea->len = 0;
-    ea->segment = 0;
-    ea->modrm = 0;
-    ea->valid_modrm = 0;
-    ea->need_modrm = 1;
-    ea->valid_sib = 0;
-    ea->need_sib = 0xff;    /* we won't know until we know more about expr and
-                              the BITS/address override setting */
-    ea->nosplit = 0;
-
-    return ea;
-}
-
-effaddr *
-effaddr_new_imm(immval *im_ptr, unsigned char im_len)
-{
-    effaddr *ea = xmalloc(sizeof(effaddr));
-
-    ea->disp = im_ptr->val;
-    ea->len = im_len;
-    ea->segment = 0;
-    ea->modrm = 0;
-    ea->valid_modrm = 0;
-    ea->need_modrm = 0;
-    ea->valid_sib = 0;
-    ea->need_sib = 0;
-    ea->nosplit = 0;
-
-    return ea;
-}
-
 immval *
-immval_new_int(unsigned long int_val)
+imm_new_int(unsigned long int_val)
 {
     immval *im = xmalloc(sizeof(immval));
 
@@ -245,7 +82,7 @@ immval_new_int(unsigned long int_val)
 }
 
 immval *
-immval_new_expr(expr *expr_ptr)
+imm_new_expr(expr *expr_ptr)
 {
     immval *im = xmalloc(sizeof(immval));
 
@@ -257,19 +94,7 @@ immval_new_expr(expr *expr_ptr)
 }
 
 void
-SetEASegment(effaddr *ptr, unsigned char segment)
-{
-    if (!ptr)
-       return;
-
-    if (segment != 0 && ptr->segment != 0)
-       Warning(_("multiple segment overrides, using leftmost"));
-
-    ptr->segment = segment;
-}
-
-void
-SetEALen(effaddr *ptr, unsigned char len)
+ea_set_len(effaddr *ptr, unsigned char len)
 {
     if (!ptr)
        return;
@@ -282,7 +107,7 @@ SetEALen(effaddr *ptr, unsigned char len)
 }
 
 void
-SetEANosplit(effaddr *ptr, unsigned char nosplit)
+ea_set_nosplit(effaddr *ptr, unsigned char nosplit)
 {
     if (!ptr)
        return;
@@ -290,108 +115,8 @@ SetEANosplit(effaddr *ptr, unsigned char nosplit)
     ptr->nosplit = nosplit;
 }
 
-effaddr *
-GetInsnEA(bytecode *bc)
-{
-    if (!bc)
-       return NULL;
-
-    if (bc->type != BC_INSN)
-       InternalError(_("Trying to get EA of non-instruction"));
-
-    return bc->data.insn.ea;
-}
-
 void
-SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize)
-{
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           bc->data.insn.opersize = opersize;
-           break;
-       case BC_JMPREL:
-           bc->data.jmprel.opersize = opersize;
-           break;
-       default:
-           InternalError(_("OperSize override applied to non-instruction"));
-           return;
-    }
-}
-
-void
-SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize)
-{
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           bc->data.insn.addrsize = addrsize;
-           break;
-       case BC_JMPREL:
-           bc->data.jmprel.addrsize = addrsize;
-           break;
-       default:
-           InternalError(_("AddrSize override applied to non-instruction"));
-           return;
-    }
-}
-
-void
-SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix)
-{
-    unsigned char *lockrep_pre = (unsigned char *)NULL;
-
-    if (!bc)
-       return;
-
-    switch (bc->type) {
-       case BC_INSN:
-           lockrep_pre = &bc->data.insn.lockrep_pre;
-           break;
-       case BC_JMPREL:
-           lockrep_pre = &bc->data.jmprel.lockrep_pre;
-           break;
-       default:
-           InternalError(_("LockRep prefix applied to non-instruction"));
-           return;
-    }
-
-    if (*lockrep_pre != 0)
-       Warning(_("multiple LOCK or REP prefixes, using leftmost"));
-
-    *lockrep_pre = prefix;
-}
-
-void
-SetInsnShiftFlag(bytecode *bc)
-{
-    if (!bc)
-       return;
-
-    if (bc->type != BC_INSN)
-       InternalError(_("Attempted to set shift flag on non-instruction"));
-
-    bc->data.insn.shift_op = 1;
-}
-
-void
-SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel)
-{
-    if (!old_sel)
-       return;
-
-    if (new_sel != JR_NONE && ((*old_sel == JR_SHORT_FORCED) ||
-                              (*old_sel == JR_NEAR_FORCED)))
-       Warning(_("multiple SHORT or NEAR specifiers, using leftmost"));
-    *old_sel = new_sel;
-}
-
-void
-SetBCMultiple(bytecode *bc, expr *e)
+bc_set_multiple(bytecode *bc, expr *e)
 {
     if (bc->multiple)
        bc->multiple = expr_new_tree(bc->multiple, EXPR_MUL, e);
@@ -399,10 +124,12 @@ SetBCMultiple(bytecode *bc, expr *e)
        bc->multiple = e;
 }
 
-static bytecode *
-bytecode_new_common(void)
+bytecode *
+bc_new_common(bytecode_type type, size_t datasize)
 {
-    bytecode *bc = xmalloc(sizeof(bytecode));
+    bytecode *bc = xmalloc(sizeof(bytecode)+datasize);
+
+    bc->type = type;
 
     bc->multiple = (expr *)NULL;
     bc->len = 0;
@@ -411,146 +138,59 @@ bytecode_new_common(void)
     bc->lineno = line_number;
 
     bc->offset = 0;
-    bc->mode_bits = mode_bits;
 
     return bc;
 }
 
 bytecode *
-bytecode_new_insn(unsigned char  opersize,
-                 unsigned char  opcode_len,
-                 unsigned char  op0,
-                 unsigned char  op1,
-                 unsigned char  op2,
-                 effaddr       *ea_ptr,
-                 unsigned char  spare,
-                 immval        *im_ptr,
-                 unsigned char  im_len,
-                 unsigned char  im_sign)
+bc_new_data(datavalhead *datahead, unsigned long size)
 {
-    bytecode *bc = bytecode_new_common();
+    bytecode *bc = bc_new_common(BC_DATA, sizeof(bytecode_data));
+    bytecode_data *data = bc_get_data(bc);
 
-    bc->type = BC_INSN;
-
-    bc->data.insn.ea = ea_ptr;
-    if (ea_ptr) {
-       bc->data.insn.ea->modrm &= 0xC7;        /* zero spare/reg bits */
-       bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */
-    }
-
-    bc->data.insn.imm = im_ptr;
-    if (im_ptr) {
-       bc->data.insn.imm->f_sign = im_sign;
-       bc->data.insn.imm->f_len = im_len;
-    }
-
-    bc->data.insn.opcode[0] = op0;
-    bc->data.insn.opcode[1] = op1;
-    bc->data.insn.opcode[2] = op2;
-    bc->data.insn.opcode_len = opcode_len;
-
-    bc->data.insn.addrsize = 0;
-    bc->data.insn.opersize = opersize;
-    bc->data.insn.lockrep_pre = 0;
-    bc->data.insn.shift_op = 0;
-
-    return bc;
-}
-
-bytecode *
-bytecode_new_jmprel(targetval     *target,
-                   unsigned char  short_opcode_len,
-                   unsigned char  short_op0,
-                   unsigned char  short_op1,
-                   unsigned char  short_op2,
-                   unsigned char  near_opcode_len,
-                   unsigned char  near_op0,
-                   unsigned char  near_op1,
-                   unsigned char  near_op2,
-                   unsigned char  addrsize)
-{
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_JMPREL;
-
-    bc->data.jmprel.target = target->val;
-    bc->data.jmprel.op_sel = target->op_sel;
-
-    if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0))
-       Error(_("no SHORT form of that jump instruction exists"));
-    if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0))
-       Error(_("no NEAR form of that jump instruction exists"));
-
-    bc->data.jmprel.shortop.opcode[0] = short_op0;
-    bc->data.jmprel.shortop.opcode[1] = short_op1;
-    bc->data.jmprel.shortop.opcode[2] = short_op2;
-    bc->data.jmprel.shortop.opcode_len = short_opcode_len;
-
-    bc->data.jmprel.nearop.opcode[0] = near_op0;
-    bc->data.jmprel.nearop.opcode[1] = near_op1;
-    bc->data.jmprel.nearop.opcode[2] = near_op2;
-    bc->data.jmprel.nearop.opcode_len = near_opcode_len;
-
-    bc->data.jmprel.addrsize = addrsize;
-    bc->data.jmprel.opersize = 0;
-    bc->data.jmprel.lockrep_pre = 0;
+    data->datahead = *datahead;
+    data->size = size;
 
     return bc;
 }
 
 bytecode *
-bytecode_new_data(datavalhead *datahead, unsigned long size)
+bc_new_reserve(expr *numitems, unsigned long itemsize)
 {
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_DATA;
+    bytecode *bc = bc_new_common(BC_RESERVE, sizeof(bytecode_reserve));
+    bytecode_reserve *reserve = bc_get_data(bc);
 
-    bc->data.data.datahead = *datahead;
-    bc->data.data.size = size;
-
-    return bc;
-}
-
-bytecode *
-bytecode_new_reserve(expr *numitems, unsigned long itemsize)
-{
-    bytecode *bc = bytecode_new_common();
-
-    bc->type = BC_RESERVE;
-
-    bc->data.reserve.numitems = numitems;
-    bc->data.reserve.itemsize = itemsize;
+    reserve->numitems = numitems;
+    reserve->itemsize = itemsize;
 
     return bc;
 }
 
 void
-bytecode_delete(bytecode *bc)
+bc_delete(bytecode *bc)
 {
+    bytecode_data *data;
+    bytecode_reserve *reserve;
+
     if (!bc)
        return;
 
     switch (bc->type) {
        case BC_EMPTY:
            break;
-       case BC_INSN:
-           if (bc->data.insn.ea) {
-               expr_delete(bc->data.insn.ea->disp);
-               xfree(bc->data.insn.ea);
-           }
-           if (bc->data.insn.imm) {
-               expr_delete(bc->data.insn.imm->val);
-               xfree(bc->data.insn.imm);
-           }
-           break;
-       case BC_JMPREL:
-           expr_delete(bc->data.jmprel.target);
-           break;
        case BC_DATA:
-           datavals_delete(&bc->data.data.datahead);
+           data = bc_get_data(bc);
+           dvs_delete(&data->datahead);
            break;
        case BC_RESERVE:
-           expr_delete(bc->data.reserve.numitems);
+           reserve = bc_get_data(bc);
+           expr_delete(reserve->numitems);
+           break;
+       default:
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_delete(bc);
+           else
+               InternalError(_("Unknown bytecode type"));
            break;
     }
 
@@ -559,131 +199,43 @@ bytecode_delete(bytecode *bc)
 }
 
 int
-bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val)
+bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val)
 {
     return 0;  /* TODO */
 }
 
 void
-bytecode_print(const bytecode *bc)
+bc_print(const bytecode *bc)
 {
+    const bytecode_data *data;
+    const bytecode_reserve *reserve;
+
     switch (bc->type) {
        case BC_EMPTY:
            printf("_Empty_\n");
            break;
-       case BC_INSN:
-           printf("_Instruction_\n");
-           printf("Effective Address:");
-           if (!bc->data.insn.ea)
-               printf(" (nil)\n");
-           else {
-               printf("\n Disp=");
-               if (bc->data.insn.ea->disp)
-                   expr_print(bc->data.insn.ea->disp);
-               else
-                   printf("(nil)");
-               printf("\n");
-               printf(" Len=%u SegmentOv=%02x NoSplit=%u\n",
-                      (unsigned int)bc->data.insn.ea->len,
-                      (unsigned int)bc->data.insn.ea->segment,
-                      (unsigned int)bc->data.insn.ea->nosplit);
-               printf(" ModRM=%03o ValidRM=%u NeedRM=%u\n",
-                      (unsigned int)bc->data.insn.ea->modrm,
-                      (unsigned int)bc->data.insn.ea->valid_modrm,
-                      (unsigned int)bc->data.insn.ea->need_modrm);
-               printf(" SIB=%03o ValidSIB=%u NeedSIB=%u\n",
-                      (unsigned int)bc->data.insn.ea->sib,
-                      (unsigned int)bc->data.insn.ea->valid_sib,
-                      (unsigned int)bc->data.insn.ea->need_sib);
-           }
-           printf("Immediate Value:");
-           if (!bc->data.insn.imm)
-               printf(" (nil)\n");
-           else {
-               printf("\n Val=");
-               expr_print(bc->data.insn.imm->val);
-               printf("\n");
-               printf(" Len=%u, IsNeg=%u\n",
-                      (unsigned int)bc->data.insn.imm->len,
-                      (unsigned int)bc->data.insn.imm->isneg);
-               printf(" FLen=%u, FSign=%u\n",
-                      (unsigned int)bc->data.insn.imm->f_len,
-                      (unsigned int)bc->data.insn.imm->f_sign);
-           }
-           printf("Opcode: %02x %02x %02x OpLen=%u\n",
-                  (unsigned int)bc->data.insn.opcode[0],
-                  (unsigned int)bc->data.insn.opcode[1],
-                  (unsigned int)bc->data.insn.opcode[2],
-                  (unsigned int)bc->data.insn.opcode_len);
-           printf("AddrSize=%u OperSize=%u LockRepPre=%02x ShiftOp=%u\n",
-                  (unsigned int)bc->data.insn.addrsize,
-                  (unsigned int)bc->data.insn.opersize,
-                  (unsigned int)bc->data.insn.lockrep_pre,
-                  (unsigned int)bc->data.insn.shift_op);
-           break;
-       case BC_JMPREL:
-           printf("_Relative Jump_\n");
-           printf("Target=");
-           expr_print(bc->data.jmprel.target);
-           printf("\nShort Form:\n");
-           if (!bc->data.jmprel.shortop.opcode_len == 0)
-               printf(" None\n");
-           else
-               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
-                      (unsigned int)bc->data.jmprel.shortop.opcode[0],
-                      (unsigned int)bc->data.jmprel.shortop.opcode[1],
-                      (unsigned int)bc->data.jmprel.shortop.opcode[2],
-                      (unsigned int)bc->data.jmprel.shortop.opcode_len);
-           if (!bc->data.jmprel.nearop.opcode_len == 0)
-               printf(" None\n");
-           else
-               printf(" Opcode: %02x %02x %02x OpLen=%u\n",
-                      (unsigned int)bc->data.jmprel.nearop.opcode[0],
-                      (unsigned int)bc->data.jmprel.nearop.opcode[1],
-                      (unsigned int)bc->data.jmprel.nearop.opcode[2],
-                      (unsigned int)bc->data.jmprel.nearop.opcode_len);
-           printf("OpSel=");
-           switch (bc->data.jmprel.op_sel) {
-               case JR_NONE:
-                   printf("None");
-                   break;
-               case JR_SHORT:
-                   printf("Short");
-                   break;
-               case JR_NEAR:
-                   printf("Near");
-                   break;
-               case JR_SHORT_FORCED:
-                   printf("Forced Short");
-                   break;
-               case JR_NEAR_FORCED:
-                   printf("Forced Near");
-                   break;
-               default:
-                   printf("UNKNOWN!!");
-                   break;
-           }
-           printf("\nAddrSize=%u OperSize=%u LockRepPre=%02x\n",
-                  (unsigned int)bc->data.jmprel.addrsize,
-                  (unsigned int)bc->data.jmprel.opersize,
-                  (unsigned int)bc->data.jmprel.lockrep_pre);
-           break;
        case BC_DATA:
+           data = bc_get_const_data(bc);
            printf("_Data_\n");
            printf("Final Element Size=%u\n",
-                  (unsigned int)bc->data.data.size);
+                  (unsigned int)data->size);
            printf("Elements:\n");
-           datavals_print(&bc->data.data.datahead);
+           dvs_print(&data->datahead);
            break;
        case BC_RESERVE:
+           reserve = bc_get_const_data(bc);
            printf("_Reserve_\n");
            printf("Num Items=");
-           expr_print(bc->data.reserve.numitems);
+           expr_print(reserve->numitems);
            printf("\nItem Size=%u\n",
-                  (unsigned int)bc->data.reserve.itemsize);
+                  (unsigned int)reserve->itemsize);
            break;
        default:
-           printf("_Unknown_\n");
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_print(bc);
+           else
+               printf("_Unknown_\n");
+           break;
     }
     printf("Multiple=");
     if (!bc->multiple)
@@ -694,95 +246,42 @@ bytecode_print(const bytecode *bc)
     printf("Length=%lu\n", bc->len);
     printf("Filename=\"%s\" Line Number=%u\n",
           bc->filename ? bc->filename : "<UNKNOWN>", bc->lineno);
-    printf("Offset=%lx BITS=%u\n", bc->offset, bc->mode_bits);
-}
-
-static void
-bytecode_parser_finalize_insn(bytecode *bc)
-{
-    effaddr *ea = bc->data.insn.ea;
-    immval *imm = bc->data.insn.imm;
-
-    if (ea) {
-       if ((ea->disp) && ((!ea->valid_sib && ea->need_sib) ||
-                          (!ea->valid_modrm && ea->need_modrm))) {
-           /* First expand equ's */
-           expr_expand_equ(ea->disp);
-
-           /* Check validity of effective address and calc R/M bits of
-            * Mod/RM byte and SIB byte.  We won't know the Mod field
-            * of the Mod/RM byte until we know more about the
-            * displacement.
-            */
-           if (!expr_checkea(&ea->disp, &bc->data.insn.addrsize,
-                             bc->mode_bits, ea->nosplit, &ea->len, &ea->modrm,
-                             &ea->valid_modrm, &ea->need_modrm, &ea->sib,
-                             &ea->valid_sib, &ea->need_sib))
-               return;     /* failed, don't bother checking rest of insn */
-       }
-    }
-
-    if (imm) {
-       const intnum *num;
-
-       if (imm->val) {
-           expr_expand_equ(imm->val);
-           expr_simplify(imm->val);
-       }
-       /* TODO: check imm f_len vs. len? */
-
-       /* Handle shift_op special-casing */
-       if (bc->data.insn.shift_op && (num = expr_get_intnum(&imm->val))) {
-           if (intnum_get_uint(num) == 1) {
-               /* Use ,1 form: first copy ,1 opcode. */
-               bc->data.insn.opcode[0] = bc->data.insn.opcode[1];
-               /* Delete ModRM, as it's no longer needed */
-               xfree(ea);
-               bc->data.insn.ea = (effaddr *)NULL;
-               /* Delete Imm, as it's not needed */
-               expr_delete(imm->val);
-               xfree(imm);
-               bc->data.insn.imm = (immval *)NULL;
-           }
-           bc->data.insn.shift_op = 0;
-       }
-    }
-
-    
+    printf("Offset=%lx\n", bc->offset);
 }
 
 void
-bytecode_parser_finalize(bytecode *bc)
+bc_parser_finalize(bytecode *bc)
 {
     switch (bc->type) {
        case BC_EMPTY:
            /* FIXME: delete it (probably in bytecodes_ level, not here */
            InternalError(_("got empty bytecode in parser_finalize"));
            break;
-       case BC_INSN:
-           bytecode_parser_finalize_insn(bc);
-           break;
        default:
+           if (bc->type < cur_arch->bc.type_max)
+               cur_arch->bc.bc_parser_finalize(bc);
+           else
+               InternalError(_("Unknown bytecode type"));
            break;
     }
 }
 
 void
-bytecodes_delete(bytecodehead *headp)
+bcs_delete(bytecodehead *headp)
 {
     bytecode *cur, *next;
 
     cur = STAILQ_FIRST(headp);
     while (cur) {
        next = STAILQ_NEXT(cur, link);
-       bytecode_delete(cur);
+       bc_delete(cur);
        cur = next;
     }
     STAILQ_INIT(headp);
 }
 
 bytecode *
-bytecodes_append(bytecodehead *headp, bytecode *bc)
+bcs_append(bytecodehead *headp, bytecode *bc)
 {
     if (bc) {
        if (bc->type != BC_EMPTY) {
@@ -796,27 +295,27 @@ bytecodes_append(bytecodehead *headp, bytecode *bc)
 }
 
 void
-bytecodes_print(const bytecodehead *headp)
+bcs_print(const bytecodehead *headp)
 {
     bytecode *cur;
 
     STAILQ_FOREACH(cur, headp, link) {
        printf("---Next Bytecode---\n");
-       bytecode_print(cur);
+       bc_print(cur);
     }
 }
 
 void
-bytecodes_parser_finalize(bytecodehead *headp)
+bcs_parser_finalize(bytecodehead *headp)
 {
     bytecode *cur;
 
     STAILQ_FOREACH(cur, headp, link)
-       bytecode_parser_finalize(cur);
+       bc_parser_finalize(cur);
 }
 
 dataval *
-dataval_new_expr(expr *expn)
+dv_new_expr(expr *expn)
 {
     dataval *retval = xmalloc(sizeof(dataval));
 
@@ -827,7 +326,7 @@ dataval_new_expr(expr *expn)
 }
 
 dataval *
-dataval_new_string(char *str_val)
+dv_new_string(char *str_val)
 {
     dataval *retval = xmalloc(sizeof(dataval));
 
@@ -838,7 +337,7 @@ dataval_new_string(char *str_val)
 }
 
 void
-datavals_delete(datavalhead *headp)
+dvs_delete(datavalhead *headp)
 {
     dataval *cur, *next;
 
@@ -854,7 +353,7 @@ datavals_delete(datavalhead *headp)
 }
 
 dataval *
-datavals_append(datavalhead *headp, dataval *dv)
+dvs_append(datavalhead *headp, dataval *dv)
 {
     if (dv) {
        STAILQ_INSERT_TAIL(headp, dv, link);
@@ -864,7 +363,7 @@ datavals_append(datavalhead *headp, dataval *dv)
 }
 
 void
-datavals_print(const datavalhead *head)
+dvs_print(const datavalhead *head)
 {
     dataval *cur;
 
index 3683bae9eae169385378818edef3e721867f8d24..38fa31138628753505e4064cfef0f622c514cd62 100644 (file)
@@ -27,87 +27,43 @@ typedef struct immval immval;
 typedef STAILQ_HEAD(datavalhead, dataval) datavalhead;
 typedef struct dataval dataval;
 
+/* Additional types may be architecture-defined starting at
+ * BYTECODE_TYPE_BASE.
+ */
 typedef enum {
-    JR_NONE,
-    JR_SHORT,
-    JR_NEAR,
-    JR_SHORT_FORCED,
-    JR_NEAR_FORCED
-} jmprel_opcode_sel;
-
-typedef struct targetval {
-    expr *val;
-
-    jmprel_opcode_sel op_sel;
-} targetval;
-
-effaddr *effaddr_new_reg(unsigned long reg);
-effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len);
-effaddr *effaddr_new_expr(expr *expr_ptr);
+    BC_EMPTY = 0,
+    BC_DATA,
+    BC_RESERVE
+} bytecode_type;
+#define BYTECODE_TYPE_BASE  BC_RESERVE+1
 
-immval *immval_new_int(unsigned long int_val);
-immval *immval_new_expr(expr *expr_ptr);
+immval *imm_new_int(unsigned long int_val);
+immval *imm_new_expr(expr *e);
 
-void SetEASegment(effaddr *ptr, unsigned char segment);
-void SetEALen(effaddr *ptr, unsigned char len);
-void SetEANosplit(effaddr *ptr, unsigned char nosplit);
+void ea_set_len(effaddr *ea, unsigned char len);
+void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
 
-effaddr *GetInsnEA(bytecode *bc);
+void bc_set_multiple(bytecode *bc, expr *e);
 
-void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize);
-void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize);
-void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix);
-void SetInsnShiftFlag(bytecode *bc);
+bytecode *bc_new_common(bytecode_type type, size_t datasize);
+bytecode *bc_new_data(datavalhead *datahead, unsigned long size);
+bytecode *bc_new_reserve(expr *numitems, unsigned long itemsize);
 
-void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel);
-
-void SetBCMultiple(bytecode *bc, expr *e);
-
-/* IMPORTANT: ea_ptr and im_ptr cannot be reused or freed after calling this
- * function (it doesn't make a copy).
- */
-bytecode *bytecode_new_insn(unsigned char  opersize,
-                           unsigned char  opcode_len,
-                           unsigned char  op0,
-                           unsigned char  op1,
-                           unsigned char  op2,
-                           effaddr       *ea_ptr,
-                           unsigned char  spare,
-                           immval        *im_ptr,
-                           unsigned char  im_len,
-                           unsigned char  im_sign);
-
-/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */
-bytecode *bytecode_new_jmprel(targetval     *target,
-                             unsigned char  short_opcode_len,
-                             unsigned char  short_op0,
-                             unsigned char  short_op1,
-                             unsigned char  short_op2,
-                             unsigned char  near_opcode_len,
-                             unsigned char  near_op0,
-                             unsigned char  near_op1,
-                             unsigned char  near_op2,
-                             unsigned char  addrsize);
-
-bytecode *bytecode_new_data(datavalhead *datahead, unsigned long size);
-
-bytecode *bytecode_new_reserve(expr *numitems, unsigned long itemsize);
-
-void bytecode_delete(bytecode *bc);
+void bc_delete(bytecode *bc);
 
 /* Gets the offset of the bytecode specified by bc if possible.
  * Return value is IF POSSIBLE, not the value.
  */
-int bytecode_get_offset(section *sect, bytecode *bc, unsigned long *ret_val);
+int bc_get_offset(section *sect, bytecode *bc, unsigned long *ret_val);
 
-void bytecode_print(const bytecode *bc);
+void bc_print(const bytecode *bc);
 
-void bytecode_parser_finalize(bytecode *bc);
+void bc_parser_finalize(bytecode *bc);
 
 /* void bytecodes_initialize(bytecodehead *headp); */
 #define        bytecodes_initialize(headp)     STAILQ_INIT(headp)
 
-void bytecodes_delete(bytecodehead *headp);
+void bcs_delete(bytecodehead *headp);
 
 /* Adds bc to the list of bytecodes headp.
  * NOTE: Does not make a copy of bc; so don't pass this function
@@ -115,20 +71,20 @@ void bytecodes_delete(bytecodehead *headp);
  * this function.  If bc was actually appended (it wasn't NULL or empty),
  * then returns bc, otherwise returns NULL.
  */
-bytecode *bytecodes_append(bytecodehead *headp, bytecode *bc);
+bytecode *bcs_append(bytecodehead *headp, bytecode *bc);
 
-void bytecodes_print(const bytecodehead *headp);
+void bcs_print(const bytecodehead *headp);
 
-void bytecodes_parser_finalize(bytecodehead *headp);
+void bcs_parser_finalize(bytecodehead *headp);
 
-dataval *dataval_new_expr(expr *expn);
-dataval *dataval_new_float(floatnum *flt);
-dataval *dataval_new_string(char *str_val);
+dataval *dv_new_expr(expr *expn);
+dataval *dv_new_float(floatnum *flt);
+dataval *dv_new_string(char *str_val);
 
-/* void datavals_initialize(datavalhead *headp); */
-#define        datavals_initialize(headp)      STAILQ_INIT(headp)
+/* void dvs_initialize(datavalhead *headp); */
+#define        dvs_initialize(headp)   STAILQ_INIT(headp)
 
-void datavals_delete(datavalhead *headp);
+void dvs_delete(datavalhead *headp);
 
 /* Adds dv to the list of datavals headp.
  * NOTE: Does not make a copy of dv; so don't pass this function
@@ -136,8 +92,8 @@ void datavals_delete(datavalhead *headp);
  * this function.  If dv was actually appended (it wasn't NULL), then
  * returns dv, otherwise returns NULL.
  */
-dataval *datavals_append(datavalhead *headp, dataval *dv);
+dataval *dvs_append(datavalhead *headp, dataval *dv);
 
-void datavals_print(const datavalhead *head);
+void dvs_print(const datavalhead *head);
 
 #endif
index c21436a7176c6bf8ada78d0d2dc166cb73fedae7..2239872f5ed6f91886adeeb90151d632988af379 100644 (file)
@@ -29,7 +29,6 @@ RCSID("$IdPath$");
 
 const char *in_filename = (const char *)NULL;
 unsigned int line_number = 1;
-unsigned char mode_bits = 0;
 unsigned int asm_options = 0;
 
 static ternary_tree filename_table = (ternary_tree)NULL;
index 23376e131012b5b4151751a393b36daa0aa357c0..d0457793a6e4aa38c72f3eee9ebd9d2092ec493d 100644 (file)
@@ -24,7 +24,6 @@
 
 extern const char *in_filename;
 extern unsigned int line_number;
-extern unsigned char mode_bits;
 extern unsigned int asm_options;
 
 void switch_filename(const char *filename);
index c21436a7176c6bf8ada78d0d2dc166cb73fedae7..2239872f5ed6f91886adeeb90151d632988af379 100644 (file)
@@ -29,7 +29,6 @@ RCSID("$IdPath$");
 
 const char *in_filename = (const char *)NULL;
 unsigned int line_number = 1;
-unsigned char mode_bits = 0;
 unsigned int asm_options = 0;
 
 static ternary_tree filename_table = (ternary_tree)NULL;
index 23376e131012b5b4151751a393b36daa0aa357c0..d0457793a6e4aa38c72f3eee9ebd9d2092ec493d 100644 (file)
@@ -24,7 +24,6 @@
 
 extern const char *in_filename;
 extern unsigned int line_number;
-extern unsigned char mode_bits;
 extern unsigned int asm_options;
 
 void switch_filename(const char *filename);
index 0b4d711f9803e7446390b227b9defa020e734c95..de7a27b93c6d21e9ee738ada9164b97ebcbd7044 100644 (file)
@@ -41,6 +41,8 @@ RCSID("$IdPath$");
 #include "preproc.h"
 #include "parser.h"
 
+#include "arch.h"
+
 
 #ifndef countof
 #define countof(x,y)   (sizeof(x)/sizeof(y))
@@ -110,8 +112,11 @@ main(int argc, char *argv[])
        switch_filename("<STDIN>");
     }
 
+    /* Set x86 as the architecture */
+    cur_arch = &x86_arch;
+
     /* Get initial BITS setting from object format */
-    mode_bits = dbg_objfmt.default_mode_bits;
+    x86_mode_bits = dbg_objfmt.default_mode_bits;
 
     sections = nasm_parser.do_parse(&nasm_parser, &dbg_objfmt, in);
 
index 54893a7695ef90df3be9602fd496bf6b108ad499..fa318fd3b2dff626e129363f6eb321ae18fdb0d1 100644 (file)
@@ -40,6 +40,7 @@ RCSID("$IdPath$");
 #include "section.h"
 #include "objfmt.h"
 
+#include "arch.h"
 
 #define YYDEBUG 1
 
@@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base;
 static bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
 
+/* additional data declarations (dynamically generated) */
+/* @DATADECLS@ */
+
 %}
 
 %union {
@@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc;
     effaddr *ea;
     expr *exp;
     immval *im_val;
-    targetval tgt_val;
+    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
@@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc;
 %%
 input: /* empty */
     | input line    {
-       nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section),
+       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
                                               $2);
        if (nasm_parser_temp_bc)
            nasm_parser_prev_bc = nasm_parser_temp_bc;
@@ -145,10 +149,10 @@ line: '\n'                { $$ = (bytecode *)NULL; }
 ;
 
 lineexp: exp
-    | TIMES expr exp                   { $$ = $3; SetBCMultiple($$, $2); }
+    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
     | label                            { $$ = (bytecode *)NULL; }
     | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; SetBCMultiple($$, $3); }
+    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
     | label_id EQU expr                        {
        symrec_define_equ($1, $3);
        xfree($1);
@@ -157,22 +161,16 @@ lineexp: exp
 ;
 
 exp: instr
-    | DECLARE_DATA datavals        { $$ = bytecode_new_data(&$2, $1); }
-    | RESERVE_SPACE expr           { $$ = bytecode_new_reserve($2, $1); }
+    | DECLARE_DATA datavals        { $$ = bc_new_data(&$2, $1); }
+    | RESERVE_SPACE expr           { $$ = bc_new_reserve($2, $1); }
 ;
 
-datavals: dataval          {
-       datavals_initialize(&$$);
-       datavals_append(&$$, $1);
-    }
-    | datavals ',' dataval  {
-       datavals_append(&$1, $3);
-       $$ = $1;
-    }
+datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
+    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dataval_new_expr($1); }
-    | STRING           { $$ = dataval_new_string($1); }
+dataval: expr_no_string        { $$ = dv_new_expr($1); }
+    | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
        $$ = (dataval *)NULL;
@@ -317,17 +315,20 @@ memexpr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
     | error                    { Error(_("invalid effective address")); }
 ;
 
-memaddr: memexpr           { $$ = effaddr_new_expr($1); SetEASegment($$, 0); }
-    | REG_CS ':' memaddr    { $$ = $3; SetEASegment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; SetEASegment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; SetEASegment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; SetEASegment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; SetEASegment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; SetEASegment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; SetEALen($$, 1); }
-    | WORD memaddr         { $$ = $2; SetEALen($$, 2); }
-    | DWORD memaddr        { $$ = $2; SetEALen($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; SetEANosplit($$, 1); }
+memaddr: memexpr           {
+       $$ = x86_ea_new_expr($1);
+       x86_ea_set_segment($$, 0);
+    }
+    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
+    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
+    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
+    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
+    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
+    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
+    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
+    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
+    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
+    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
 mem: '[' memaddr ']'   { $$ = $2; }
@@ -378,43 +379,43 @@ mem1632: mem
 ;
 
 /* explicit register or memory */
-rm8x: reg8     { $$ = effaddr_new_reg($1); }
+rm8x: reg8     { $$ = x86_ea_new_reg($1); }
     | mem8x
 ;
-rm16x: reg16   { $$ = effaddr_new_reg($1); }
+rm16x: reg16   { $$ = x86_ea_new_reg($1); }
     | mem16x
 ;
-rm32x: reg32   { $$ = effaddr_new_reg($1); }
+rm32x: reg32   { $$ = x86_ea_new_reg($1); }
     | mem32x
 ;
 /* not needed:
-rm64x: MMXREG  { $$ = effaddr_new_reg($1); }
+rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
     | mem64x
 ;
-rm128x: XMMREG { $$ = effaddr_new_reg($1); }
+rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
     | mem128x
 ;
 */
 
 /* implicit register or memory */
-rm8: reg8      { $$ = effaddr_new_reg($1); }
+rm8: reg8      { $$ = x86_ea_new_reg($1); }
     | mem8
 ;
-rm16: reg16    { $$ = effaddr_new_reg($1); }
+rm16: reg16    { $$ = x86_ea_new_reg($1); }
     | mem16
 ;
-rm32: reg32    { $$ = effaddr_new_reg($1); }
+rm32: reg32    { $$ = x86_ea_new_reg($1); }
     | mem32
 ;
-rm64: MMXREG   { $$ = effaddr_new_reg($1); }
+rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
     | mem64
 ;
-rm128: XMMREG  { $$ = effaddr_new_reg($1); }
+rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
     | mem128
 ;
 
 /* immediate values */
-imm: expr   { $$ = immval_new_expr($1); }
+imm: expr   { $$ = imm_new_expr($1); }
 ;
 
 /* explicit immediates */
@@ -437,9 +438,18 @@ imm32: imm
 ;
 
 /* jump targets */
-target: expr           { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); }
-    | SHORT target     { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
-    | NEAR target      { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+target: expr           {
+       $$.val = $1;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+    }
+    | SHORT target     {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+    }
+    | NEAR target      {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+    }
 ;
 
 /* expression trees */
@@ -493,18 +503,36 @@ explabel: ID              { $$ = symrec_use($1); xfree($1); }
 ;
 
 instr: instrbase
-    | OPERSIZE instr   { $$ = $2; SetInsnOperSizeOverride($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; SetInsnAddrSizeOverride($$, $1); }
-    | REG_CS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); }
-    | REG_SS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); }
-    | REG_DS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); }
-    | REG_ES instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); }
-    | REG_FS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); }
-    | REG_GS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); }
-    | LOCK instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); }
-    | REP instr                { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); }
+    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
+    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
+    | REG_CS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
+    }
+    | REG_SS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
+    }
+    | REG_DS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
+    }
+    | REG_ES instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
+    }
+    | REG_FS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
+    }
+    | REG_GS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
+    }
+    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
+    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
+    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
+    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
 ;
 
 /* instruction grammars (dynamically generated) */
@@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val)
        if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32))
            Error(_("`%s' is not a valid argument to [BITS]"), val);
        else
-           mode_bits = (unsigned char)lval;
+           x86_mode_bits = (unsigned char)lval;
     } else {
        printf("Directive: Name=`%s' Value=`%s'\n", name, val);
     }
index adaa58dbe18c6d10eeb8844fc1217dd26600a5b4..be391260f953462aef6585b58a160b293c76c240 100755 (executable)
@@ -353,7 +353,8 @@ sub cond_action_if ( $ $ $ $ $ $ $ )
     my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
     return rule_header ($rule, $tokens, $count) . <<"EOF";
         if (\$$regarg == $val) {
-            \$\$ = $func(@$a_eax);
+            @$a_eax
+            \$\$ = $func;
         }
 EOF
 }
@@ -362,7 +363,8 @@ sub cond_action_elsif ( $ $ $ $ )
     my ($regarg, $val, $func, $a_eax) = splice (@_);
     return <<"EOF";
         else if (\$$regarg == $val) {
-            \$\$ = $func(@$a_eax);
+            @$a_eax
+            \$\$ = $func;
         }
 EOF
 }
@@ -371,7 +373,8 @@ sub cond_action_else ( $ $ )
     my ($func, $a_args) = splice (@_);
     return <<"EOF" . rule_footer;
         else {
-            \$\$ = $func (@$a_args);
+            @$a_args
+            \$\$ = $func;
         }
 EOF
 }
@@ -388,7 +391,8 @@ sub action ( @ $ )
 {
     my ($rule, $tokens, $func, $a_args, $count) = splice @_;
     return rule_header ($rule, $tokens, $count)
-       . "        \$\$ = $func (@$a_args);\n"
+       . "        @$a_args\n"
+       . "        \$\$ = $func;\n"
        . rule_footer; 
 }
 
@@ -396,8 +400,9 @@ sub action_setshiftflag ( @ $ )
 {
     my ($rule, $tokens, $func, $a_args, $count) = splice @_;
     return rule_header ($rule, $tokens, $count)
-       . "        \$\$ = $func (@$a_args);\n"
-       . "        SetInsnShiftFlag(\$\$);\n"
+       . "        @$a_args\n"
+       . "        \$\$ = $func;\n"
+       . "        x86_bc_insn_set_shift_flag(\$\$);\n"
        . rule_footer; 
 }
 
@@ -421,7 +426,12 @@ sub output_yacc ($@)
 
     while (<IN>)
     {
-       if (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
+       if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
+       {
+           print GRAMMAR "static x86_new_insn_data idata;\n";
+           print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
+       }
+       elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
        {
            my $len = length("%token <groupdata>");
            print GRAMMAR "%token <groupdata>";
@@ -500,69 +510,82 @@ sub output_yacc ($@)
                            if $inst->[OPERANDS] ne 'nil';
                        $tokens =~ s/,/ ',' /g;
                        $tokens =~ s/:/ ':' /g;
-                       my $func = "bytecode_new_jmprel";
+                       my $datastruct = "x86_new_jmprel_data";
+                       my $datastructname = "jrdata";
+                       my $func = "x86_bc_new_jmprel(&$datastructname)";
 
                        # Create the argument list for bytecode_new
                        my @args;
 
                        # Target argument: HACK: Always assumed to be arg 1.
-                       push @args, '&$2,';
+                       push @args, 'target=&$2;';
 
                        # test for short opcode "nil"
                        if($inst->[SHORTOPCODE] =~ m/nil/)
                        {
-                           push @args, '0, 0, 0, 0,';
+                           push @args, 'short_op_len=0;';
+                           push @args, 'short_op[0]=0;';
+                           push @args, 'short_op[1]=0;';
+                           push @args, 'short_op[2]=0;';
                        }
                        else
                        {
-                           # number of bytes of short opcode
-                           push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";
-
                            # opcode piece 1 (and 2 and 3 if attached)
-                           push @args, $inst->[SHORTOPCODE];
-                           $args[-1] =~ s/,/, /;
-                           $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           # don't match $0.\d in the following rule.
-                           $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
-                           $args[-1] .= ',';
+                           my @opcodes = split ",", $inst->[SHORTOPCODE];
+                           # number of bytes of short opcode
+                           push @args, "short_op_len=".@opcodes.";";
+                           for (my $i=0; $i < @opcodes; ++$i)
+                           {
+                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                               # don't match $0.\d in the following rule.
+                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
+                               push @args, "short_op[$i]=$opcodes[$i];";
+                           }
 
                            # opcode piece 2 (if not attached)
-                           push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
+                           push @args, "short_op[1]=0;" if @opcodes < 2;
                            # opcode piece 3 (if not attached)
-                           push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
+                           push @args, "short_op[2]=0;" if @opcodes < 3;
                        }
 
                        # test for near opcode "nil"
                        if($inst->[NEAROPCODE] =~ m/nil/)
                        {
-                           push @args, '0, 0, 0, 0,';
+                           push @args, 'near_op_len=0;';
+                           push @args, 'near_op[0]=0;';
+                           push @args, 'near_op[1]=0;';
+                           push @args, 'near_op[2]=0;';
                        }
                        else
                        {
-                           # number of bytes of near opcode
-                           push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";
-
                            # opcode piece 1 (and 2 and 3 if attached)
-                           push @args, $inst->[NEAROPCODE];
-                           $args[-1] =~ s/,/, /;
-                           $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           # don't match $0.\d in the following rule.
-                           $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
-                           $args[-1] .= ',';
+                           my @opcodes = split ",", $inst->[NEAROPCODE];
+                           # number of bytes of near opcode
+                           push @args, "near_op_len=".@opcodes.";";
+                           for (my $i=0; $i < @opcodes; ++$i)
+                           {
+                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                               # don't match $0.\d in the following rule.
+                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
+                               push @args, "near_op[$i]=$opcodes[$i];";
+                           }
 
                            # opcode piece 2 (if not attached)
-                           push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
+                           push @args, "near_op[1]=0;" if @opcodes < 2;
                            # opcode piece 3 (if not attached)
-                           push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
+                           push @args, "near_op[2]=0;" if @opcodes < 3;
                        }
 
                        # address size
-                       push @args, "$inst->[ADSIZE]";
+                       push @args, "addrsize=$inst->[ADSIZE];";
                        $args[-1] =~ s/nil/0/;
 
                        # now that we've constructed the arglist, subst $0.\d
                        s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
 
+                       # and add the data structure reference
+                       s/^/$datastructname./g foreach (@args);
+
                        # generate the grammar
                        print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
                    }
@@ -583,65 +606,79 @@ sub output_yacc ($@)
                        $tokens =~ s/:/ ':' /g;
                        # offset args
                        my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
-                       my $func = "bytecode_new_insn";
+                       my $datastruct = "x86_new_insn_data";
+                       my $datastructname = "idata";
+                       my $func = "x86_bc_new_insn(&$datastructname)";
 
                        # Create the argument list for bytecode_new
                        my @args;
 
                        # operand size
-                       push @args, "$inst->[OPSIZE],";
+                       push @args, "opersize=$inst->[OPSIZE];";
                        $args[-1] =~ s/nil/0/;
 
-                       # number of bytes of opcodes
-                       push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
 
                        # opcode piece 1 (and 2 and 3 if attached)
-                       push @args, $inst->[OPCODE];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                       # don't match $0.\d in the following rule.
-                       $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
-                       $args[-1] .= ',';
+                       my @opcodes = split ",", $inst->[OPCODE];
+                       # number of bytes of opcodes
+                       push @args, "op_len=".@opcodes.";";
+                       for (my $i=0; $i < @opcodes; ++$i)
+                       {
+                           $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+                           # don't match $0.\d in the following rule.
+                           $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
+                           push @args, "op[$i]=$opcodes[$i];";
+                       }
 
                        # opcode piece 2 (if not attached)
-                       push @args, "0," if $inst->[OPCODE] !~ m/,/o;
+                       push @args, "op[1]=0;" if @opcodes < 2;
                        # opcode piece 3 (if not attached)
-                       push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
+                       push @args, "op[2]=0;" if @opcodes < 3;
 
                        # effective addresses
-                       push @args, $inst->[EFFADDR];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
-                       $args[-1] =~ s/nil/0/;
+                       my $effaddr = $inst->[EFFADDR];
+                       $effaddr =~ s/^nil/(effaddr *)NULL,0/;
+                       $effaddr =~ s/nil/0/;
                        # don't let a $0.\d match slip into the following rules.
-                       $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+                       $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
                        #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
-                       $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/;
-                       $args[-1] =~ s[(\$\d+)i,\s*(\d+)]
-                           ["effaddr_new_imm($1, ".($2/8)."), 0"]e;
-                       $args[-1] .= ',';
+                       $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
+                       $effaddr =~ s[(\$\d+)i,\s*(\d+)]
+                           ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
 
-                       die $args[-1] if $args[-1] =~ m/\d+[ri]/;
+                       die $effaddr if $effaddr =~ m/\d+[ri]/;
+
+                       my @effaddr_split = split ',', $effaddr;
+                       $effaddr_split[0] =~ s/\^/,/;
+                       push @args, "ea=$effaddr_split[0];";
+                       push @args, "spare=$effaddr_split[1];";
 
                        # immediate sources
-                       push @args, $inst->[IMM];
-                       $args[-1] =~ s/,/, /;
-                       $args[-1] =~ s/nil/(immval *)NULL, 0/;
+                       my $imm = $inst->[IMM];
+                       $imm =~ s/nil/(immval *)NULL,0/;
                        # don't match $0.\d in the following rules.
-                       $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       $args[-1] =~ s[^([0-9A-Fa-f]+),]
-                           [immval_new_int(0x$1),];
-                       $args[-1] =~ s[^\$0.(\d+),]
-                           [immval_new_int(\$1\[$1\]),];
+                       $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+                       $imm =~ s[^([0-9A-Fa-f]+),]
+                           [imm_new_int(0x$1),];
+                       $imm =~ s[^\$0.(\d+),]
+                           [imm_new_int(\$1\[$1\]),];
 
                        # divide the second, and only the second, by 8 bits/byte
-                       $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
-                       $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
+                       $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
+                       $imm .= ($3||'') eq 's' ? ',1' : ',0';
+
+                       die $imm if $imm =~ m/\d+s/;
 
-                       die $args[-1] if $args[-1] =~ m/\d+s/;
+                       my @imm_split = split ",", $imm;
+                       push @args, "imm=$imm_split[0];";
+                       push @args, "im_len=$imm_split[1];";
+                       push @args, "im_sign=$imm_split[2];";
 
                        # now that we've constructed the arglist, subst $0.\d
                        s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
+
+                       # and add the data structure reference
+                       s/^/$datastructname./g foreach (@args);
                    
                        # see if we match one of the cases to defer
                        if (($inst->[OPERANDS]||"") =~ m/,ONE/)
@@ -691,8 +728,9 @@ sub output_yacc ($@)
                            # Now output imm version, with second opcode byte
                            # set to ,1 opcode.  Also call SetInsnShiftFlag().
                            $tokens =~ s/imm8x/imm/;
-                           die "no space for ONE?" if $args[3] !~ m/0,/;
-                           $args[3] = $ONE->[3]->[2];
+                           die "no space for ONE?" if $args[3] !~ m/0;/;
+                           my $oneval = $ONE->[3]->[2];
+                           $args[3] =~ s/0/$oneval/;
                            print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
                        }
                        elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
index 54893a7695ef90df3be9602fd496bf6b108ad499..fa318fd3b2dff626e129363f6eb321ae18fdb0d1 100644 (file)
@@ -40,6 +40,7 @@ RCSID("$IdPath$");
 #include "section.h"
 #include "objfmt.h"
 
+#include "arch.h"
 
 #define YYDEBUG 1
 
@@ -56,6 +57,9 @@ extern char *nasm_parser_locallabel_base;
 static bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
 
+/* additional data declarations (dynamically generated) */
+/* @DATADECLS@ */
+
 %}
 
 %union {
@@ -68,7 +72,7 @@ static bytecode *nasm_parser_temp_bc;
     effaddr *ea;
     expr *exp;
     immval *im_val;
-    targetval tgt_val;
+    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
@@ -126,7 +130,7 @@ static bytecode *nasm_parser_temp_bc;
 %%
 input: /* empty */
     | input line    {
-       nasm_parser_temp_bc = bytecodes_append(section_get_bytecodes(nasm_parser_cur_section),
+       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
                                               $2);
        if (nasm_parser_temp_bc)
            nasm_parser_prev_bc = nasm_parser_temp_bc;
@@ -145,10 +149,10 @@ line: '\n'                { $$ = (bytecode *)NULL; }
 ;
 
 lineexp: exp
-    | TIMES expr exp                   { $$ = $3; SetBCMultiple($$, $2); }
+    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
     | label                            { $$ = (bytecode *)NULL; }
     | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; SetBCMultiple($$, $3); }
+    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
     | label_id EQU expr                        {
        symrec_define_equ($1, $3);
        xfree($1);
@@ -157,22 +161,16 @@ lineexp: exp
 ;
 
 exp: instr
-    | DECLARE_DATA datavals        { $$ = bytecode_new_data(&$2, $1); }
-    | RESERVE_SPACE expr           { $$ = bytecode_new_reserve($2, $1); }
+    | DECLARE_DATA datavals        { $$ = bc_new_data(&$2, $1); }
+    | RESERVE_SPACE expr           { $$ = bc_new_reserve($2, $1); }
 ;
 
-datavals: dataval          {
-       datavals_initialize(&$$);
-       datavals_append(&$$, $1);
-    }
-    | datavals ',' dataval  {
-       datavals_append(&$1, $3);
-       $$ = $1;
-    }
+datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
+    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dataval_new_expr($1); }
-    | STRING           { $$ = dataval_new_string($1); }
+dataval: expr_no_string        { $$ = dv_new_expr($1); }
+    | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
        $$ = (dataval *)NULL;
@@ -317,17 +315,20 @@ memexpr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
     | error                    { Error(_("invalid effective address")); }
 ;
 
-memaddr: memexpr           { $$ = effaddr_new_expr($1); SetEASegment($$, 0); }
-    | REG_CS ':' memaddr    { $$ = $3; SetEASegment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; SetEASegment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; SetEASegment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; SetEASegment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; SetEASegment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; SetEASegment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; SetEALen($$, 1); }
-    | WORD memaddr         { $$ = $2; SetEALen($$, 2); }
-    | DWORD memaddr        { $$ = $2; SetEALen($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; SetEANosplit($$, 1); }
+memaddr: memexpr           {
+       $$ = x86_ea_new_expr($1);
+       x86_ea_set_segment($$, 0);
+    }
+    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
+    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
+    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
+    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
+    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
+    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
+    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
+    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
+    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
+    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
 mem: '[' memaddr ']'   { $$ = $2; }
@@ -378,43 +379,43 @@ mem1632: mem
 ;
 
 /* explicit register or memory */
-rm8x: reg8     { $$ = effaddr_new_reg($1); }
+rm8x: reg8     { $$ = x86_ea_new_reg($1); }
     | mem8x
 ;
-rm16x: reg16   { $$ = effaddr_new_reg($1); }
+rm16x: reg16   { $$ = x86_ea_new_reg($1); }
     | mem16x
 ;
-rm32x: reg32   { $$ = effaddr_new_reg($1); }
+rm32x: reg32   { $$ = x86_ea_new_reg($1); }
     | mem32x
 ;
 /* not needed:
-rm64x: MMXREG  { $$ = effaddr_new_reg($1); }
+rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
     | mem64x
 ;
-rm128x: XMMREG { $$ = effaddr_new_reg($1); }
+rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
     | mem128x
 ;
 */
 
 /* implicit register or memory */
-rm8: reg8      { $$ = effaddr_new_reg($1); }
+rm8: reg8      { $$ = x86_ea_new_reg($1); }
     | mem8
 ;
-rm16: reg16    { $$ = effaddr_new_reg($1); }
+rm16: reg16    { $$ = x86_ea_new_reg($1); }
     | mem16
 ;
-rm32: reg32    { $$ = effaddr_new_reg($1); }
+rm32: reg32    { $$ = x86_ea_new_reg($1); }
     | mem32
 ;
-rm64: MMXREG   { $$ = effaddr_new_reg($1); }
+rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
     | mem64
 ;
-rm128: XMMREG  { $$ = effaddr_new_reg($1); }
+rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
     | mem128
 ;
 
 /* immediate values */
-imm: expr   { $$ = immval_new_expr($1); }
+imm: expr   { $$ = imm_new_expr($1); }
 ;
 
 /* explicit immediates */
@@ -437,9 +438,18 @@ imm32: imm
 ;
 
 /* jump targets */
-target: expr           { $$.val = $1; SetOpcodeSel(&$$.op_sel, JR_NONE); }
-    | SHORT target     { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
-    | NEAR target      { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+target: expr           {
+       $$.val = $1;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+    }
+    | SHORT target     {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+    }
+    | NEAR target      {
+       $$ = $2;
+       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+    }
 ;
 
 /* expression trees */
@@ -493,18 +503,36 @@ explabel: ID              { $$ = symrec_use($1); xfree($1); }
 ;
 
 instr: instrbase
-    | OPERSIZE instr   { $$ = $2; SetInsnOperSizeOverride($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; SetInsnAddrSizeOverride($$, $1); }
-    | REG_CS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x2E); }
-    | REG_SS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x36); }
-    | REG_DS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x3E); }
-    | REG_ES instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x26); }
-    | REG_FS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x64); }
-    | REG_GS instr     { $$ = $2; SetEASegment(GetInsnEA($$), 0x65); }
-    | LOCK instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); }
-    | REP instr                { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; SetInsnLockRepPrefix($$, 0xF4); }
+    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
+    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
+    | REG_CS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
+    }
+    | REG_SS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
+    }
+    | REG_DS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
+    }
+    | REG_ES instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
+    }
+    | REG_FS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
+    }
+    | REG_GS instr     {
+       $$ = $2;
+       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
+    }
+    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
+    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
+    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
+    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
 ;
 
 /* instruction grammars (dynamically generated) */
@@ -528,7 +556,7 @@ nasm_parser_directive(const char *name, const char *val)
        if (*val == '\0' || *end != '\0' || (lval != 16 && lval != 32))
            Error(_("`%s' is not a valid argument to [BITS]"), val);
        else
-           mode_bits = (unsigned char)lval;
+           x86_mode_bits = (unsigned char)lval;
     } else {
        printf("Directive: Name=`%s' Value=`%s'\n", name, val);
     }
index 8c1d683446cafded20f4c5d037fa99a4d09e9f83..54002180ea017fab1ef89aae0a915646e2111989 100644 (file)
@@ -33,6 +33,8 @@ RCSID("$IdPath$");
 
 #include "bytecode.h"
 
+#include "arch.h"
+
 #include "bison.h"
 
 
index 0ebccb7e67d69f886d97105fe5d544b0572b494a..a95d185a1acbb954eb5ef33deedb6f4512733d1a 100644 (file)
@@ -134,7 +134,7 @@ sections_parser_finalize(sectionhead *headp)
     section *cur;
     
     STAILQ_FOREACH(cur, headp, link)
-       bytecodes_parser_finalize(&cur->bc);
+       bcs_parser_finalize(&cur->bc);
 }
 
 bytecodehead *
@@ -156,7 +156,7 @@ section_delete(section *sect)
        return;
 
     xfree(sect->name);
-    bytecodes_delete(&sect->bc);
+    bcs_delete(&sect->bc);
     xfree(sect);
 }
 
@@ -176,5 +176,5 @@ section_print(const section *sect)
     }
 
     printf(" Bytecodes:\n");
-    bytecodes_print(&sect->bc);
+    bcs_print(&sect->bc);
 }
index eccb034f7e66a7e0cf1b9541a082de826b0b3d98..020dee11559b4976057f7ae50c54b953fa602e7b 100644 (file)
@@ -290,7 +290,7 @@ symrec_print(const symrec *sym)
                printf("[First bytecode]\n");
            else {
                printf("[Preceding bytecode]\n");
-               bytecode_print(sym->value.label.bc);
+               bc_print(sym->value.label.bc);
            }
            break;
     }
index 9c7d99200da54de01e6c42ca009190c3dd24aeb1..36e4bb1d0d521b63ddc446418453f7a4cb0f566a 100644 (file)
@@ -26,6 +26,7 @@ LDADD = \
        $(top_builddir)/src/preprocs/raw/libpreproc.a           \
        $(top_builddir)/src/optimizers/dbg/liboptimizer.a       \
        $(top_builddir)/src/objfmts/dbg/libobjfmt.a             \
+       $(top_builddir)/src/arch/@ARCH@/libarch.a               \
        $(top_builddir)/src/libyasm.a                           \
        $(INTLLIBS)
 
@@ -54,5 +55,5 @@ memexpr_test_SOURCES = \
        memexpr_test.c
 
 INCLUDES= -I$(top_srcdir) -I$(top_srcdir)/src -I$(top_srcdir)/check \
-       -I$(top_builddir)/intl
+       -I$(top_srcdir)/src/arch/@ARCH@ -I$(top_builddir)/intl
 
index 77eadc6ad9c9302dc427994a5b85c06439aee684..f34115126ddb074fdf88366bc99aa1c8c612c3bf 100644 (file)
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#ifdef STDC_HEADERS
-# include <stdlib.h>
-#endif
+#include "util.h"
 
 #include "check.h"
 
-#include "bytecode.c"
+#include "bytecode.h"
+#include "bc-int.h"
+#include "arch.h"
+#include "x86-int.h"
 
-START_TEST(test_effaddr_new_reg)
+START_TEST(test_x86_ea_new_reg)
 {
     effaddr *ea;
+    x86_effaddr_data *ead;
     int i;
 
     /* Test with NULL */
-    ea = effaddr_new_reg(1);
+    ea = x86_ea_new_reg(1);
     fail_unless(ea != NULL, "Should die if out of memory (not return NULL)");
 
     /* Test structure values function should set */
     fail_unless(ea->len == 0, "len should be 0");
-    fail_unless(ea->segment == 0, "Should be no segment override");
-    fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid");
-    fail_unless(ea->need_modrm == 1, "Mod/RM should be needed");
-    fail_unless(ea->valid_sib == 0, "SIB should be invalid");
-    fail_unless(ea->need_sib == 0, "SIB should not be needed");
+    ead = ea_get_data(ea);
+    fail_unless(ead->segment == 0, "Should be no segment override");
+    fail_unless(ead->valid_modrm == 1, "Mod/RM should be valid");
+    fail_unless(ead->need_modrm == 1, "Mod/RM should be needed");
+    fail_unless(ead->valid_sib == 0, "SIB should be invalid");
+    fail_unless(ead->need_sib == 0, "SIB should not be needed");
 
     free(ea);
 
     /* Exhaustively test generated Mod/RM byte with register values */
     for(i=0; i<8; i++) {
-       ea = effaddr_new_reg(i);
-       fail_unless(ea->modrm == (0xC0 | (i & 0x07)),
+       ea = x86_ea_new_reg(i);
+       ead = ea_get_data(ea);
+       fail_unless(ead->modrm == (0xC0 | (i & 0x07)),
                    "Invalid Mod/RM byte generated");
        free(ea);
     }
@@ -66,7 +66,7 @@ bytecode_suite(void)
     TCase *tc_conversion = tcase_create("Conversion");
 
     suite_add_tcase(s, tc_conversion);
-    tcase_add_test(tc_conversion, test_effaddr_new_reg);
+    tcase_add_test(tc_conversion, test_x86_ea_new_reg);
 
     return s;
 }