]> granicus.if.org Git - yasm/commitdiff
Massive restructuring of lexing and parsing (NASM-compatible parser).
authorPeter Johnson <peter@tortall.net>
Fri, 9 Aug 2002 05:43:03 +0000 (05:43 -0000)
committerPeter Johnson <peter@tortall.net>
Fri, 9 Aug 2002 05:43:03 +0000 (05:43 -0000)
Bugzilla Bug#3.  Not all instructions are parsed yet, so this is actually a
minor feature regression from the user side, but this commit lays the framework
for much easier additions of new assembler syntaxes and architectures.  The
re2c tool is now used to generate the lexers, as it's much more flexibly
function-oriented than lex.
 - nasm-bison.y is a repocopy+modify of nasm/bison.y.in.
 - x86arch.h now includes all of x86-int.h

svn path=/trunk/yasm/; revision=670

51 files changed:
.cvsignore
frontends/yasm/yasm.c
libyasm/arch.c
libyasm/arch.h
libyasm/bytecode.c
libyasm/bytecode.h
libyasm/expr-int.h
libyasm/expr.c
libyasm/expr.h
libyasm/tests/bytecode_test.c
libyasm/tests/memexpr_test.c
modules/arch/x86/Makefile.inc
modules/arch/x86/instrs.dat [deleted file]
modules/arch/x86/x86-int.h [deleted file]
modules/arch/x86/x86arch.c
modules/arch/x86/x86arch.h
modules/arch/x86/x86bc.c
modules/arch/x86/x86expr.c
modules/arch/x86/x86id.re [new file with mode: 0644]
modules/parsers/nasm/Makefile.inc
modules/parsers/nasm/bison.y.in [deleted file]
modules/parsers/nasm/gen_instr.pl [deleted file]
modules/parsers/nasm/nasm-bison.y
modules/parsers/nasm/nasm-parser.c
modules/parsers/nasm/nasm-token.re [new file with mode: 0644]
modules/parsers/nasm/token.l.in [deleted file]
src/arch.c
src/arch.h
src/arch/x86/Makefile.inc
src/arch/x86/instrs.dat [deleted file]
src/arch/x86/x86-int.h [deleted file]
src/arch/x86/x86arch.c
src/arch/x86/x86arch.h
src/arch/x86/x86bc.c
src/arch/x86/x86expr.c
src/arch/x86/x86id.re [new file with mode: 0644]
src/bytecode.c
src/bytecode.h
src/expr-int.h
src/expr.c
src/expr.h
src/main.c
src/parsers/nasm/Makefile.inc
src/parsers/nasm/bison.y.in [deleted file]
src/parsers/nasm/gen_instr.pl [deleted file]
src/parsers/nasm/nasm-bison.y
src/parsers/nasm/nasm-parser.c
src/parsers/nasm/nasm-token.re [new file with mode: 0644]
src/parsers/nasm/token.l.in [deleted file]
src/tests/bytecode_test.c
src/tests/memexpr_test.c

index 1dc9ca739c593ffec5ff703f09b6a3dbb7a234e0..441b21614d6467692bcceba1c2c7c61e25df366e 100644 (file)
@@ -1,8 +1,6 @@
 autom4te.cache
-nasm-bison.y
 nasm-bison.c
 nasm-bison.h
-nasm-token.l
 nasm-token.c
 yapp-token.c
 yasm
index 07c4a53635cc9756c7b4371cb8f354fe9e726cd2..a8c7c33de0f882f61baaa864f7e715e3cde789ae 100644 (file)
@@ -289,7 +289,7 @@ main(int argc, char *argv[])
     }
 
     /* Get initial BITS setting from object format */
-    x86_mode_bits = cur_objfmt->default_mode_bits;
+    /*x86_mode_bits = cur_objfmt->default_mode_bits;*/
 
     /* Parse! */
     sections = cur_parser->do_parse(cur_parser, in, in_filename);
index 5a997421b5ae1c2766c9a8b425d6a93505f6f35f..780fb26ed1bb76b4cf37689c3a1dd606c68dcefc 100644 (file)
 #include "util.h"
 /*@unused@*/ RCSID("$IdPath$");
 
+#include "globals.h"
+#include "expr.h"
+
 #include "bytecode.h"
 
 #include "arch.h"
 
+
 arch *cur_arch;
 
+insn_operand *
+operand_new_reg(unsigned long reg)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_REG;
+    retval->data.reg = reg;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_segreg(unsigned long segreg)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_SEGREG;
+    retval->data.reg = segreg;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_mem(/*@only@*/ effaddr *ea)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_MEMORY;
+    retval->data.ea = ea;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_imm(/*@only@*/ expr *val)
+{
+    insn_operand *retval;
+    const unsigned long *reg;
+
+    reg = expr_get_reg(&val, 0);
+    if (reg) {
+       retval = operand_new_reg(*reg);
+       expr_delete(val);
+    } else {
+       retval = xmalloc(sizeof(insn_operand));
+       retval->type = INSN_OPERAND_IMM;
+       retval->data.val = val;
+       retval->targetmod = 0;
+       retval->size = 0;
+    }
+
+    return retval;
+}
+
+void
+operand_print(FILE *f, const insn_operand *op)
+{
+    switch (op->type) {
+       case INSN_OPERAND_REG:
+           fprintf(f, "%*sReg=", indent_level, "");
+           cur_arch->reg_print(f, op->data.reg);
+           fprintf(f, "\n");
+           break;
+       case INSN_OPERAND_SEGREG:
+           fprintf(f, "%*sSegReg=", indent_level, "");
+           cur_arch->segreg_print(f, op->data.reg);
+           fprintf(f, "\n");
+           break;
+       case INSN_OPERAND_MEMORY:
+           fprintf(f, "%*sMemory=\n", indent_level, "");
+           indent_level++;
+           ea_print(f, op->data.ea);
+           indent_level--;
+           break;
+       case INSN_OPERAND_IMM:
+           fprintf(f, "%*sImm=", indent_level, "");
+           expr_print(f, op->data.val);
+           fprintf(f, "\n");
+           break;
+    }
+    fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod);
+    fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size);
+}
+
+void
+ops_delete(insn_operandhead *headp, int content)
+{
+    insn_operand *cur, *next;
+
+    cur = STAILQ_FIRST(headp);
+    while (cur) {
+       next = STAILQ_NEXT(cur, link);
+       if (content)
+           switch (cur->type) {
+               case INSN_OPERAND_MEMORY:
+                   ea_delete(cur->data.ea);
+                   break;
+               case INSN_OPERAND_IMM:
+                   expr_delete(cur->data.val);
+                   break;
+               default:
+                   break;
+           }
+       xfree(cur);
+       cur = next;
+    }
+    STAILQ_INIT(headp);
+}
+
+/*@null@*/ insn_operand *
+ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op)
+{
+    if (op) {
+       STAILQ_INSERT_TAIL(headp, op, link);
+       return op;
+    }
+    return (insn_operand *)NULL;
+}
+
+void
+ops_print(FILE *f, const insn_operandhead *headp)
+{
+    insn_operand *cur;
+
+    STAILQ_FOREACH (cur, headp, link)
+       operand_print(f, cur);
+}
index 2e53ae3024f1cbc7243ed4dab526044eb92cb36b..18e3faf6b813d1ef6668aa449552afcf6f020acf 100644 (file)
@@ -1,7 +1,7 @@
 /* $IdPath$
  * Architecture header file
  *
- *  Copyright (C) 2001  Peter Johnson
+ *  Copyright (C) 2002  Peter Johnson
  *
  *  This file is part of YASM.
  *
 #ifndef YASM_ARCH_H
 #define YASM_ARCH_H
 
+typedef enum arch_check_id_retval {
+    ARCH_CHECK_ID_NONE = 0,    /* just a normal identifier */
+    ARCH_CHECK_ID_INSN,                /* an instruction */
+    ARCH_CHECK_ID_PREFIX,      /* an instruction prefix */ 
+    ARCH_CHECK_ID_REG,         /* a register */
+    ARCH_CHECK_ID_SEGREG,      /* a segment register (for memory overrides) */
+    ARCH_CHECK_ID_TARGETMOD    /* an target modifier (for jumps) */
+} arch_check_id_retval;
+
+typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand)
+       insn_operandhead;
+
+typedef struct insn_operand insn_operand;
+
+/* Different assemblers order instruction operands differently.  Also, some
+ * differ on how exactly various registers are specified.  There's no great
+ * solution to this, as the parsers aren't supposed to have knowledge of the
+ * architectural internals, and the architecture is supposed to be parser-
+ * independent.  To make things work, as a rather hackish solution, we give the
+ * architecture a little knowledge about the general "flavor" of the parser,
+ * and let the architecture decide what to do with it.  Most architectures will
+ * probably not even use this, but it's required for some (x86 in particular)
+ * for correct behavior on all parsers.
+ */
+typedef enum arch_syntax_flavor {
+    ARCH_SYNTAX_FLAVOR_NASM = 1,       /* like NASM */
+    ARCH_SYNTAX_FLAVOR_GAS             /* like GAS */
+} arch_syntax_flavor;
+
 struct arch {
     /* one-line description of the architecture */
     const char *name;
@@ -29,6 +58,67 @@ struct arch {
     /* keyword used to select architecture */
     const char *keyword;
 
+    struct {
+       /* All "data" below starts the parse initialized to 0.  Thus, it is
+        * okay for a funtion to use/check previously stored data to see if
+        * it's been called before on the same piece of data.
+        */
+
+       /* Switches available instructions/registers/etc. based on a
+        * user-specified CPU identifier.  Should modify behavior ONLY of
+        * parse functions!  The bytecode and output functions should be able
+        * to handle any CPU.
+        */
+       void (*switch_cpu) (const char *cpuid);
+
+       /* Checks an generic identifier to see if it matches architecture
+        * specific names for instructions, registers, etc (see the
+        * arch_check_id_retval enum above for the various types this function
+        * can detect & return.  Unrecognized identifiers should be returned
+        * as NONE so they can be treated as normal symbols.  Any additional
+        * data beyond just the type (almost always necessary) should be
+        * returned into the space provided by the data parameter.
+        * Note: even though this is passed a data[4], only data[0] should be
+        * used for TARGETMOD, REG, and SEGREG return values.
+        */
+       arch_check_id_retval (*check_identifier) (unsigned long data[4],
+                                                 const char *id);
+
+       /* Architecture-specific directive support.  Returns 1 if directive was
+        * not recognized.  Returns 0 if directive was recognized, even if it
+        * wasn't valid.  Should modify behavior ONLY of parse functions, much
+        * like switch_cpu() above.
+        */
+       int (*directive) (const char *name, valparamhead *valparams,
+                         /*@null@*/ valparamhead *objext_valparams,
+                         sectionhead *headp);
+
+       /* Creates an instruction.  Creates a bytecode by matching the
+        * instruction data and the parameters given with a valid instruction.
+        * If no match is found (the instruction is invalid), returns NULL.
+        * All zero data indicates an empty instruction should be created.
+        */
+       /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4],
+                                          int num_operands, /*@null@*/
+                                          insn_operandhead *operands);
+
+       /* Handle an instruction prefix by modifying bc as necessary. */
+       void (*handle_prefix) (bytecode *bc, const unsigned long data[4]);
+
+       /* Handle an segment register instruction prefix by modifying bc as
+        * necessary.
+        */
+       void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg);
+
+       /* Handle memory expression segment overrides by modifying ea as
+        * necessary.
+        */
+       void (*handle_seg_override) (effaddr *ea, unsigned long segreg);
+
+       /* Convert an expression into an effective address. */
+       effaddr * (*ea_new_expr) (/*@keep@*/ expr *e);
+    } parse;
+
     struct {
        /* Maximum used bytecode type value+1.  Should be set to
         * BYTECODE_TYPE_BASE if no additional bytecode types are defined by
@@ -48,10 +138,77 @@ struct arch {
                           const section *sect, void *d,
                           output_expr_func output_expr);
     } bc;
+
+    /* Gets the equivalent register size in bytes.  Returns 0 if there is no
+     * suitable equivalent size.
+     */
+    unsigned int (*get_reg_size) (unsigned long reg);
+
+    void (*reg_print) (FILE *f, unsigned long reg);
+    void (*segreg_print) (FILE *f, unsigned long segreg);
+
+    /* Deletes the arch-specific data in ea.  May be NULL if no special
+     * deletion is required (e.g. there's no dynamically allocated pointers
+     * in the ea data).
+     */
+    void (*ea_data_delete) (effaddr *ea);
+
+    void (*ea_data_print) (FILE *f, const effaddr *ea);
+};
+
+struct insn_operand {
+    /*@reldef@*/ STAILQ_ENTRY(insn_operand) link;
+
+    enum {
+       INSN_OPERAND_REG = 1,   /* a register */
+       INSN_OPERAND_SEGREG,    /* a segment register */
+       INSN_OPERAND_MEMORY,    /* an effective address (memory reference) */
+       INSN_OPERAND_IMM        /* an immediate or jump target */
+    } type;
+
+    union {
+       unsigned long reg;      /* arch data for reg/segreg */
+       effaddr *ea;            /* effective address for memory references */
+       expr *val;              /* value of immediate or jump target */
+    } data;
+
+    unsigned long targetmod;   /* arch target modifier, 0 if none */
+
+    /* Specified size of the operand, in bytes.  0 if not user-specified. */
+    unsigned int size;
 };
 
+/* insn_operand constructors.  operand_new_imm() will look for cases of a
+ * single register and create an INSN_OPERAND_REG variant of insn_operand.
+ */
+insn_operand *operand_new_reg(unsigned long reg);
+insn_operand *operand_new_segreg(unsigned long segreg);
+insn_operand *operand_new_mem(/*@only@*/ effaddr *ea);
+insn_operand *operand_new_imm(/*@only@*/ expr *val);
+
+void operand_print(FILE *f, const insn_operand *op);
+
+#define ops_initialize(headp)  STAILQ_INIT(headp)
+#define ops_first(headp)       STAILQ_FIRST(headp)
+#define ops_next(cur)          STAILQ_NEXT(cur, link)
+
+/* Deletes operands linked list.  Deletes content of each operand if content i
+ * nonzero.
+ */
+void ops_delete(insn_operandhead *headp, int content);
+
+/* Adds op to the list of operands headp.
+ * NOTE: Does not make a copy of op; so don't pass this function
+ * static or local variables, and discard the op pointer after calling
+ * this function.  If op was actually appended (it wasn't NULL), then
+ * returns op, otherwise returns NULL.
+ */
+/*@null@*/ insn_operand *ops_append(insn_operandhead *headp,
+                                   /*@returned@*/ /*@null@*/ insn_operand *op);
+
+void ops_print(FILE *f, const insn_operandhead *headp);
+
 /* Available architectures */
-#include "arch/x86/x86arch.h"
 extern arch x86_arch;
 
 extern arch *cur_arch;
index 15aa5b3924bece07dd1998c73bf84e15896a60e2..6a46d8c34245d865b6e973190de9e46d49e25666 100644 (file)
@@ -103,6 +103,12 @@ imm_new_expr(expr *expr_ptr)
     return im;
 }
 
+const expr *
+ea_get_disp(const effaddr *ptr)
+{
+    return ptr->disp;
+}
+
 void
 ea_set_len(effaddr *ptr, unsigned char len)
 {
@@ -125,6 +131,30 @@ ea_set_nosplit(effaddr *ptr, unsigned char nosplit)
     ptr->nosplit = nosplit;
 }
 
+/*@-nullstate@*/
+void
+ea_delete(effaddr *ea)
+{
+    if (cur_arch->ea_data_delete)
+       cur_arch->ea_data_delete(ea);
+    expr_delete(ea->disp);
+    xfree(ea);
+}
+/*@=nullstate@*/
+
+/*@-nullstate@*/
+void
+ea_print(FILE *f, const effaddr *ea)
+{
+    fprintf(f, "%*sDisp=", indent_level, "");
+    expr_print(f, ea->disp);
+    fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len);
+    fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit);
+    if (cur_arch->ea_data_print)
+       cur_arch->ea_data_print(f, ea);
+}
+/*@=nullstate@*/
+
 void
 bc_set_multiple(bytecode *bc, expr *e)
 {
@@ -258,6 +288,7 @@ bc_delete(bytecode *bc)
            break;
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_data(bc);
+           assert(cur_objfmt != NULL);
            if (cur_objfmt->bc_objfmt_data_delete)
                cur_objfmt->bc_objfmt_data_delete(objfmt_data->type,
                                                  objfmt_data->data);
@@ -336,6 +367,7 @@ bc_print(FILE *f, const bytecode *bc)
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_const_data(bc);
            fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, "");
+           assert(cur_objfmt != NULL);
            if (cur_objfmt->bc_objfmt_data_print)
                cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type,
                                                 objfmt_data->data);
@@ -408,7 +440,7 @@ bc_resolve_reserve(bytecode_reserve *reserve, unsigned long *len, int save,
     expr_expand_labelequ(*tempp, sect, 1, resolve_label);
     num = expr_get_intnum(tempp);
     if (!num) {
-       if (expr_contains(temp, EXPR_FLOAT))
+       if (temp && expr_contains(temp, EXPR_FLOAT))
            ErrorAt(line,
                    _("expression must not contain floating point value"));
        retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
@@ -534,10 +566,10 @@ bc_resolve(bytecode *bc, int save, const section *sect,
        case BC_ALIGN:
            /* TODO */
            InternalError(_("TODO: align bytecode not implemented!"));
-           break;
+           /*break;*/
        case BC_OBJFMT_DATA:
            InternalError(_("resolving objfmt data bytecode?"));
-           break;
+           /*break;*/
        default:
            if (bc->type < cur_arch->bc.type_max)
                retval = cur_arch->bc.bc_resolve(bc, save, sect,
@@ -559,7 +591,7 @@ bc_resolve(bytecode *bc, int save, const section *sect,
        expr_expand_labelequ(*tempp, sect, 1, resolve_label);
        num = expr_get_intnum(tempp);
        if (!num) {
-           if (expr_contains(temp, EXPR_FLOAT))
+           if (temp && expr_contains(temp, EXPR_FLOAT))
                ErrorAt(bc->line,
                        _("expression must not contain floating point value"));
            retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
@@ -716,7 +748,7 @@ bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize,
        case BC_ALIGN:
            /* TODO */
            InternalError(_("TODO: align bytecode not implemented!"));
-           break;
+           /*break;*/
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_data(bc);
            if (output_bc_objfmt_data)
index d61c6bcd74e3b26e15c361781a6554ffcd1b54ba..e68a7ac2ab01e21c15ad9e9aead0151f9a037076 100644 (file)
@@ -43,8 +43,11 @@ typedef enum {
 /*@only@*/ immval *imm_new_int(unsigned long int_val);
 /*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e);
 
+/*@observer@*/ const expr *ea_get_disp(const effaddr *ea);
 void ea_set_len(effaddr *ea, unsigned char len);
 void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
+void ea_delete(/*@only@*/ effaddr *ea);
+void ea_print(FILE *f, const effaddr *ea);
 
 void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e);
 
index 1f0ed2df280af019db260bf18e8501f67cae14d0..a562c2baa65cb2b2870bda952ae79e1038582a63 100644 (file)
@@ -39,11 +39,7 @@ struct ExprItem {
        expr *expn;
        intnum *intn;
        floatnum *flt;
-       /* FIXME: reg structure is moderately x86-specific (namely size) */
-       struct reg {
-           unsigned char num;
-           unsigned char size; /* in bits, eg AX=16, EAX=32 */
-       } reg;
+       unsigned long reg;
     } data;
 };
 
@@ -62,6 +58,9 @@ struct expr {
  *
  * Stops early (and returns 1) if func returns 1.  Otherwise returns 0.
  */
+int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d,
+                                 int (*func) (/*@null@*/ const ExprItem *ei,
+                                              /*@null@*/ void *d));
 int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d,
                            int (*func) (/*@null@*/ ExprItem *ei,
                                         /*@null@*/ void *d));
@@ -88,6 +87,6 @@ void expr_order_terms(expr *e);
 /* Copy entire expression EXCEPT for index "except" at *top level only*. */
 expr *expr_copy_except(const expr *e, int except);
 
-int expr_contains(expr *e, ExprType t);
+int expr_contains(const expr *e, ExprType t);
 
 #endif
index ade3789aa4946d96921f8c64321777564dba8dfb..aa2775280c439746a549ae9d47e090216240a2d1 100644 (file)
 #include "expr.h"
 #include "symrec.h"
 
+#include "bytecode.h"
 #include "section.h"
 
+#include "arch.h"
+
 #include "expr-int.h"
 
 
@@ -135,12 +138,11 @@ ExprFloat(floatnum *f)
 }
 
 ExprItem *
-ExprReg(unsigned char reg, unsigned char size)
+ExprReg(unsigned long reg)
 {
     ExprItem *e = xmalloc(sizeof(ExprItem));
     e->type = EXPR_REG;
-    e->data.reg.num = reg;
-    e->data.reg.size = size;
+    e->data.reg = reg;
     return e;
 }
 
@@ -662,8 +664,7 @@ expr_copy_except(const expr *e, int except)
                    dest->data.flt = floatnum_copy(src->data.flt);
                    break;
                case EXPR_REG:
-                   dest->data.reg.num = src->data.reg.num;
-                   dest->data.reg.size = src->data.reg.size;
+                   dest->data.reg = src->data.reg;
                    break;
                default:
                    break;
@@ -709,16 +710,16 @@ expr_delete(expr *e)
 /*@=mustfree@*/
 
 static int
-expr_contains_callback(ExprItem *ei, void *d)
+expr_contains_callback(const ExprItem *ei, void *d)
 {
     ExprType *t = d;
     return (ei->type & *t);
 }
 
 int
-expr_contains(expr *e, ExprType t)
+expr_contains(const expr *e, ExprType t)
 {
-    return expr_traverse_leaves_in(e, &t, expr_contains_callback);
+    return expr_traverse_leaves_in_const(e, &t, expr_contains_callback);
 }
 
 /* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like
@@ -801,6 +802,33 @@ expr_traverse_nodes_post(expr *e, void *d,
     return func(e, d);
 }
 
+/* Traverse over expression tree in order, calling func for each leaf
+ * (non-operation).  The data pointer d is passed to each func call.
+ *
+ * Stops early (and returns 1) if func returns 1.  Otherwise returns 0.
+ */
+int
+expr_traverse_leaves_in_const(const expr *e, void *d,
+                             int (*func) (/*@null@*/ const ExprItem *ei,
+                                          /*@null@*/ void *d))
+{
+    int i;
+
+    if (!e)
+       return 0;
+
+    for (i=0; i<e->numterms; i++) {
+       if (e->terms[i].type == EXPR_EXPR) {
+           if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func))
+               return 1;
+       } else {
+           if (func(&e->terms[i], d))
+               return 1;
+       }
+    }
+    return 0;
+}
+
 /* Traverse over expression tree in order, calling func for each leaf
  * (non-operation).  The data pointer d is passed to each func call.
  *
@@ -877,10 +905,23 @@ expr_get_symrec(expr **ep, int simplify)
 }
 /*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
 
+/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/
+const unsigned long *
+expr_get_reg(expr **ep, int simplify)
+{
+    if (simplify)
+       *ep = expr_simplify(*ep);
+
+    if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG)
+       return &((*ep)->terms[0].data.reg);
+    else
+       return NULL;
+}
+/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+
 void
 expr_print(FILE *f, const expr *e)
 {
-    static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
     char opstr[3];
     int i;
 
@@ -982,9 +1023,7 @@ expr_print(FILE *f, const expr *e)
                floatnum_print(f, e->terms[i].data.flt);
                break;
            case EXPR_REG:
-               if (e->terms[i].data.reg.size == 32)
-                   fprintf(f, "e");
-               fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]);
+               cur_arch->reg_print(f, e->terms[i].data.reg);
                break;
            case EXPR_NONE:
                break;
index fb97248beed420d33b71f21aae4518f23f7499e7..4521286c69b89bcb19ad3f1f889da0d9fbb6c271 100644 (file)
@@ -31,7 +31,7 @@ typedef struct ExprItem ExprItem;
 /*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *);
 /*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *);
 /*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *);
-/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size);
+/*@only@*/ ExprItem *ExprReg(unsigned long reg);
 
 #define expr_new_tree(l,o,r) \
     expr_new ((o), ExprExpr(l), ExprExpr(r))
@@ -79,6 +79,13 @@ void expr_expand_labelequ(expr *e, const section *srcsect, int withstart,
 /*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep,
                                                         int simplify);
 
+/* Gets the register value of e if the expression is just a register.  If the
+ * expression is more complex, returns NULL.  Simplifies the expr first if
+ * simplify is nonzero.
+ */
+/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep,
+                                                            int simplify);
+
 void expr_print(FILE *f, /*@null@*/ const expr *);
 
 #endif
index f34115126ddb074fdf88366bc99aa1c8c612c3bf..f702596c608a6c3965e4b8e720c77451a9845def 100644 (file)
@@ -25,7 +25,7 @@
 #include "bytecode.h"
 #include "bc-int.h"
 #include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
 
 START_TEST(test_x86_ea_new_reg)
 {
index ec9c001d237e8a09f0db84a337b10c7440fdaa8e..86f7f320084b87ff4903cdb713548db75b632671 100644 (file)
@@ -32,7 +32,7 @@
 
 #include "bytecode.h"
 #include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
 
 typedef enum {
     REG_AX = 0,
index 3d16e3d317df45b374782ec83b21c4a992f01b58..7bfef4d0085a7f2b1397a25a83c7b4384d464dbf 100644 (file)
@@ -3,10 +3,19 @@
 YASMARCHFILES += \
        src/arch/x86/x86arch.c  \
        src/arch/x86/x86arch.h  \
-       src/arch/x86/x86-int.h  \
        src/arch/x86/x86bc.c    \
-       src/arch/x86/x86expr.c
+       src/arch/x86/x86expr.c  \
+       x86id.c
+
+x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+       re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
+
+BUILT_SOURCES += \
+       x86id.c
+
+CLEANFILES += \
+       x86id.c
 
 EXTRA_DIST += \
        src/arch/x86/README     \
-       src/arch/x86/instrs.dat
+       src/arch/x86/x86id.re
diff --git a/modules/arch/x86/instrs.dat b/modules/arch/x86/instrs.dat
deleted file mode 100644 (file)
index 02e5ad9..0000000
+++ /dev/null
@@ -1,1208 +0,0 @@
-; $IdPath$
-; List of valid instruction/operand combinations
-;
-;    Copyright (C) 2001  Peter Johnson
-;
-;    This file is part of YASM.
-;
-;    YASM is free software; you can redistribute it and/or modify
-;    it under the terms of the GNU General Public License as published by
-;    the Free Software Foundation; either version 2 of the License, or
-;    (at your option) any later version.
-;
-;    YASM is distributed in the hope that it will be useful,
-;    but WITHOUT ANY WARRANTY; without even the implied warranty of
-;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;    GNU General Public License for more details.
-;
-;    You should have received a copy of the GNU General Public License
-;    along with this program; if not, write to the Free Software
-;    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-;
-; Meanings of codes:
-;  $x refers to operand x
-;  "nil" in a field indicates the lack of that field in the instruction
-;   (there MUST be some text in every field in this document)
-;  Sizes are in bits (8,16,32 are the only valid quantities)
-;
-; Column definitions:
-;  Inst     - Instruction, should be lowercase
-;  Operands - Single combination of valid operands
-;             "TO" is not counted in the operand count.
-;  OpSize   - Fixed operand size.  Can generate prefix byte.
-;  Opcode   - One or two bytes of opcode.
-;  EffAddr  - Effective Address (ModRM/SIB/Off).  First value is the memory
-;             operand, second specifies what value goes into the reg/spare
-;             bits in the ModRM byte.
-;             $xr indicates operand is register, not ModRM (needs convert to RM)
-;             $xi indicates operand is immediate (2nd parm is size in bits)
-;  Imm      - Immediate source operand and forced size (in bits).
-;             "s" after size indicates signed number
-;             A number instead of a $x is a hex constant value.
-;
-; A ':' at the beginning of the line means that the instruction following the
-;  ':' is a synonym for the instruction in the 2nd column.
-;
-; See the parser file for a list of possible operand values and their meanings.
-; gen_instr.pl translates this list into lexer and parser code.
-;
-; Instructions are listed in the same order as that in GNU binutils
-;  /include/opcode/i386.h, used for the GAS assembler.  See
-;  <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h?cvsroot=src>.
-;
-; TODO:
-;  Finish instructions (may require changing parser code).
-;  Doublecheck instruction encodings, allowable operands.
-;  Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes).
-;  Doublecheck AMD and Cyrix instructions.
-;  Doublecheck the segreg mov instructions.
-;
-; Instruction Groupings (to shorten parser code).
-;  The $0.1, $0.2, and $0.3 will get replaced with the parameters given for
-;   the instruction using the group during lexing & parsing.  These parameters
-;   may be in the opcode, opsize, effaddr, or immediate.
-;  When opsize is a parameter, its usage in instructions that use the group
-;   looks slightly different than normal, because the parameters are
-;   specified in hexidecimal while the normal opsize usage is in decimal.
-;   Thus 10 and 20 are used instead of 16 and 32 respectively.
-;  The first CPU grouping for the instruction is OR'ed with the CPU value in
-;   the group CPU fields with @0 in their list.  This allows one grouping to
-;   be used for instructions with different CPU values.
-;  Restrictions on groupings:
-;   - $0.? may not appear in the operand, the first part of the effaddr, the
-;     second part of the imm, or the CPU fields.
-;   - @0, @1 may only appear in the CPU field.
-;  Restrictions on instructions based on groupings:
-;   - no other operand combinations are allowed (eg, if an instruction uses a
-;     group, that must be the ONLY line for the instruction)
-;
-; Notes on code generation:
-;  Each group generates a lex token of the group name (sans !).  Bison rules
-;   are generated for each of the operand combinations for the group just as
-;   with a regular instruction, except for the addition of the $0.? fields.
-;   Each $0.? field is replaced by $1.d? in the generated code (eg,
-;   $0.1->$1.d1, etc).
-;  When an instruction that uses a group is encountered, eg:
-;   inst!grpname  parm1[,parm2[,parm3]]
-;  The following lex code is generated:
-;   inst { yylval.groupdata[0]=0xparm1; return GRPNAME; }
-;  (and additional yylval.groupdata[#-1]=0xparm#; if needed)
-;
-; KEY
-;
-; !Grp Operands                OpSize  Opcode          EffAddr         Imm     CPU
-; Inst Operands                OpSize  Opcode          EffAddr         Imm     CPU
-; Inst!Grp             Parameters      CPU @0          CPU @1
-;
-; Groupings used throughout
-;
-;  One byte opcode instructions with no operands:
-!onebyte       nil             $0.1    $0.2            nil             nil     @0
-;  Two byte opcode instructions with no operands:
-!twobyte       nil             nil     $0.1,$0.2       nil             nil     @0
-;  Three byte opcode instructions with no operands:
-!threebyte     nil             nil     $0.1,$0.2,$0.3  nil             nil     @0
-;  One byte opcode instructions with general memory operand:
-!onebytemem    mem             nil     $0.1            $1,$0.2         nil     @0
-;  Two byte opcode instructions with general memory operand:
-!twobytemem    mem             nil     $0.1,$0.2       $1,$0.3         nil     @0
-;
-; Move instructions
-;
-; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89).
-mov    reg8,reg8               nil     88              $1r,$2          nil     8086
-mov    reg16,reg16             16      89              $1r,$2          nil     8086
-mov    reg32,reg32             32      89              $1r,$2          nil     386
-mov    mem,reg8                nil     88              $1,$2           nil     8086
-mov    mem8x,reg8              nil     88              $1,$2           nil     8086
-mov    mem,reg16               16      89              $1,$2           nil     8086
-mov    mem16x,reg16            16      89              $1,$2           nil     8086
-mov    mem,reg32               32      89              $1,$2           nil     386
-mov    mem32x,reg32            32      89              $1,$2           nil     386
-mov    reg8,mem8               nil     8A              $2,$1           nil     8086
-mov    reg16,mem16             16      8B              $2,$1           nil     8086
-mov    reg32,mem32             32      8B              $2,$1           nil     386
-mov    mem,segreg              nil     8C              $1,$2           nil     8086
-mov    reg16,segreg            16      8C              $1r,$2          nil     8086
-mov    mem16x,segreg           16      8C              $1,$2           nil     8086
-mov    reg32,segreg            32      8C              $1r,$2          nil     386
-mov    mem32x,segreg           32      8C              $1,$2           nil     386
-mov    segreg,mem              nil     8E              $2,$1           nil     8086
-mov    segreg,rm16x            nil     8E              $2,$1           nil     8086
-mov    segreg,rm32x            nil     8E              $2,$1           nil     386
-;mov   reg_al,memoff8
-;mov   reg_ax,memoff16
-;mov   reg_eax,memoff32
-;mov   memoff8,reg_al
-;mov   memoff16,reg_ax
-;mov   memoff32,reg_eax
-mov    reg8,imm8               nil     B0+$1           nil             $2,8    8086
-mov    reg16,imm16             16      B8+$1           nil             $2,16   8086
-mov    reg32,imm32             32      B8+$1           nil             $2,32   386
-mov    mem8x,imm8              nil     C6              $1,0            $2,8    8086
-mov    mem,imm8x               nil     C6              $1,0            $2,8    8086
-mov    mem16x,imm16            16      C7              $1,0            $2,16   8086
-mov    mem,imm16x              16      C7              $1,0            $2,16   8086
-mov    mem32x,imm32            32      C7              $1,0            $2,32   8086
-mov    mem,imm32x              32      C7              $1,0            $2,32   8086
-mov    CRREG_NOTCR4,reg32      nil     0F,22           $2r,$1          nil     386,PRIV
-mov    CR4,reg32               nil     0F,22           $2r,$1          nil     P5,PRIV
-mov    reg32,CRREG_NOTCR4      nil     0F,20           $1r,$2          nil     386,PRIV
-mov    reg32,CR4               nil     0F,20           $1r,$2          nil     P5,PRIV
-mov    reg32,DRREG             nil     0F,21           $1r,$2          nil     386,PRIV
-mov    DRREG,reg32             nil     0F,23           $2r,$1          nil     386,PRIV
-;
-; Move with sign/zero extend
-;
-!movszx        reg16,rm8               16      0F,$0.1         $2,$1           nil     386
-!movszx        reg32,rm8x              32      0F,$0.1         $2,$1           nil     386
-!movszx        reg32,rm16x             nil     0F,$0.1+1       $2,$1           nil     386
-movsx!movszx           BE
-movzx!movszx           B6
-;
-; Push instructions
-;
-push   mem16x                  16      FF              $1,6            nil     8086
-push   mem32x                  32      FF              $1,6            nil     386
-push   reg16                   16      50+$1           nil             nil     8086
-push   reg32                   32      50+$1           nil             nil     386
-push   imm8x                   nil     6A              nil             $1,8    8086
-push   imm16x                  16      68              nil             $1,16   8086
-push   imm32x                  32      68              nil             $1,32   386
-push   reg_cs                  nil     0E              nil             nil     8086
-push   reg_ss                  nil     16              nil             nil     8086
-push   reg_ds                  nil     1E              nil             nil     8086
-push   reg_es                  nil     06              nil             nil     8086
-push   reg_fs                  nil     0F,A0           nil             nil     386
-push   reg_gs                  nil     0F,A8           nil             nil     386
-pusha!onebyte          nil,60          186
-pushad!onebyte         20,60           386
-pushaw!onebyte         10,60           186
-;
-; Pop instructions
-;
-pop    mem16x                  16      8F              $1,0            nil     8086
-pop    mem32x                  32      8F              $1,0            nil     386
-pop    reg16                   16      58+$1           nil             nil     8086
-pop    reg32                   32      58+$1           nil             nil     386
-pop    reg_ds                  nil     1F              nil             nil     8086
-pop    reg_es                  nil     07              nil             nil     8086
-pop    reg_ss                  nil     17              nil             nil     8086
-pop    reg_fs                  nil     0F,A1           nil             nil     386
-pop    reg_gs                  nil     0F,A9           nil             nil     386
-popa!onebyte           nil,61          186
-popad!onebyte          20,61           386
-popaw!onebyte          10,61           186
-;
-; Exchange instructions
-;
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg8,reg8               nil     86              $1r,$2          nil     8086
-xchg   mem,reg8                nil     86              $1,$2           nil     8086
-xchg   mem8x,reg8              nil     86              $1,$2           nil     8086
-xchg   reg8,mem8               nil     86              $2,$1           nil     8086
-xchg   reg_ax,reg16            16      90+$2           nil             nil     8086
-xchg   reg16,reg_ax            16      90+$1           nil             nil     8086
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg16,reg16             16      87              $1r,$2          nil     8086
-xchg   mem,reg16               16      87              $1,$2           nil     8086
-xchg   mem16x,reg16            16      87              $1,$2           nil     8086
-xchg   reg16,mem16             16      87              $2,$1           nil     8086
-xchg   reg_eax,reg32           32      90+$2           nil             nil     386
-xchg   reg32,reg_eax           32      90+$1           nil             nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg32,reg32             32      87              $1r,$2          nil     386
-xchg   mem,reg32               32      87              $1,$2           nil     386
-xchg   mem32x,reg32            32      87              $1,$2           nil     386
-xchg   reg32,mem32             32      87              $2,$1           nil     386
-;
-; In/out from ports
-;
-in     reg_al,imm8             nil     E4              nil             $2,8    8086
-in     reg_ax,imm8             16      E5              nil             $2,8    8086
-in     reg_eax,imm8            32      E5              nil             $2,8    386
-in     reg_al,reg_dx           nil     EC              nil             nil     8086
-in     reg_ax,reg_dx           16      ED              nil             nil     8086
-in     reg_eax,reg_dx          32      ED              nil             nil     386
-out    imm8,reg_al             nil     E6              nil             $1,8    8086
-out    imm8,reg_ax             16      E7              nil             $1,8    8086
-out    imm8,reg_eax            32      E7              nil             $1,8    386
-out    reg_dx,reg_al           nil     EE              nil             nil     8086
-out    reg_dx,reg_ax           16      EF              nil             nil     8086
-out    reg_dx,reg_eax          32      EF              nil             nil     386
-;
-; Load effective address
-;
-lea    reg16,mem16             16      8D              $2,$1           nil     8086
-lea    reg32,mem32             32      8D              $2,$1           nil     386
-;
-; Load segment registers from memory
-;
-lds    reg16,mem               16      C5              $2,$1           nil     8086
-lds    reg32,mem               32      C5              $2,$1           nil     386
-les    reg16,mem               16      C4              $2,$1           nil     8086
-les    reg32,mem               32      C4              $2,$1           nil     386
-lfs    reg16,mem               16      0F,B4           $2,$1           nil     386
-lfs    reg32,mem               32      0F,B4           $2,$1           nil     386
-lgs    reg16,mem               16      0F,B5           $2,$1           nil     386
-lgs    reg32,mem               32      0F,B5           $2,$1           nil     386
-lss    reg16,mem               16      0F,B2           $2,$1           nil     386
-lss    reg32,mem               32      0F,B2           $2,$1           nil     386
-;
-; Flags register instructions
-;
-clc!onebyte            nil,F8          8086
-cld!onebyte            nil,FC          8086
-cli!onebyte            nil,FA          8086
-clts!twobyte           0F,06           286,PRIV
-cmc!onebyte            nil,F5          8086
-lahf!onebyte           nil,9F          8086
-sahf!onebyte           nil,9E          8086
-pushf!onebyte          nil,9C          8086
-pushfd!onebyte         20,9C           386
-pushfw!onebyte         10,9C           8086
-popf!onebyte           nil,9D          8086
-popfd!onebyte          20,9D           386
-popfw!onebyte          10,9D           8086
-stc!onebyte            nil,F9          8086
-std!onebyte            nil,FD          8086
-sti!onebyte            nil,FB          8086
-;
-; Arithmetic
-;
-;  General arithmetic
-!arith reg_al,imm8             nil     $0.1+4          nil             $2,8    8086
-!arith reg_ax,imm16            16      $0.1+5          nil             $2,16   8086
-!arith reg_eax,imm32           32      $0.1+5          nil             $2,32   386
-!arith reg8,imm8               nil     80              $1r,$0.2        $2,8    8086
-!arith mem8x,imm               nil     80              $1,$0.2         $2,8    8086
-!arith mem,imm8x               nil     80              $1,$0.2         $2,8    8086
-!arith reg16,imm               16      81              $1r,$0.2        $2,16   8086
-!arith mem16x,imm              16      81              $1,$0.2         $2,16   8086
-!arith reg16,imm16x            16      81              $1r,$0.2        $2,16   8086
-!arith mem,imm16x              16      81              $1,$0.2         $2,16   8086
-!arith reg32,imm               32      81              $1r,$0.2        $2,32   386
-!arith mem32x,imm              32      81              $1,$0.2         $2,32   386
-!arith reg32,imm32x            32      81              $1r,$0.2        $2,32   386
-!arith mem,imm32x              32      81              $1,$0.2         $2,32   386
-!arith reg16,imm8x             16      83              $1r,$0.2        $2,8s   8086
-!arith mem16x,imm8x            16      83              $1,$0.2         $2,8s   8086
-!arith reg32,imm8x             32      83              $1r,$0.2        $2,8s   386
-!arith mem32x,imm8x            32      83              $1,$0.2         $2,8s   386
-; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1).
-!arith reg8,reg8               nil     $0.1            $1r,$2          nil     8086
-!arith reg16,reg16             16      $0.1+1          $1r,$2          nil     8086
-!arith reg32,reg32             32      $0.1+1          $1r,$2          nil     386
-!arith mem,reg8                nil     $0.1            $1,$2           nil     8086
-!arith mem8x,reg8              nil     $0.1            $1,$2           nil     8086
-!arith mem,reg16               16      $0.1+1          $1,$2           nil     8086
-!arith mem16x,reg16            16      $0.1+1          $1,$2           nil     8086
-!arith mem,reg32               32      $0.1+1          $1,$2           nil     386
-!arith mem32x,reg32            32      $0.1+1          $1,$2           nil     386
-!arith reg8,mem8               nil     $0.1+2          $2,$1           nil     8086
-!arith reg16,mem16             16      $0.1+3          $2,$1           nil     8086
-!arith reg32,mem32             32      $0.1+3          $2,$1           nil     386
-;  INC/DEC
-!incdec        rm8x                    nil     FE              $1,$0.1         nil     8086
-!incdec        mem16x                  16      FF              $1,$0.1         nil     8086
-!incdec        mem32x                  32      FF              $1,$0.1         nil     386
-!incdec        reg16                   16      $0.2+$1         nil             nil     8086
-!incdec        reg32                   32      $0.2+$1         nil             nil     386
-;  "F6" opcodes (DIV/IDIV/MUL/NEG/NOT):
-!groupf6       rm8x            nil     F6              $1,$0.1         nil     8086
-!groupf6       rm16x           16      F7              $1,$0.1         nil     8086
-!groupf6       rm32x           32      F7              $1,$0.1         nil     386
-add!arith              00,0
-inc!incdec             0,40
-sub!arith              28,5
-dec!incdec             1,48
-sbb!arith              18,3
-cmp!arith              38,7
-test   reg_al,imm8             nil     A8              nil             $2,8    8086
-test   reg_ax,imm16            16      A9              nil             $2,16   8086
-test   reg_eax,imm32           32      A9              nil             $2,32   386
-test   reg8,imm8               nil     F6              $1r,0           $2,8    8086
-test   mem8x,imm               nil     F6              $1,0            $2,8    8086
-test   mem,imm8x               nil     F6              $1,0            $2,8    8086
-test   reg16,imm16             16      F7              $1r,0           $2,16   8086
-test   mem16x,imm              16      F7              $1,0            $2,16   8086
-test   mem,imm16x              16      F7              $1,0            $2,16   8086
-test   reg32,imm32             32      F7              $1r,0           $2,32   386
-test   mem32x,imm              32      F7              $1,0            $2,32   386
-test   mem,imm32x              32      F7              $1,0            $2,32   386
-; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1
-test   reg8,reg8               nil     84              $1r,$2          nil     8086
-test   reg16,reg16             16      85              $1r,$2          nil     8086
-test   reg32,reg32             32      85              $1r,$2          nil     386
-test   mem,reg8                nil     84              $1,$2           nil     8086
-test   mem8x,reg8              nil     84              $1,$2           nil     8086
-test   mem,reg16               16      85              $1,$2           nil     8086
-test   mem16x,reg16            16      85              $1,$2           nil     8086
-test   mem,reg32               32      85              $1,$2           nil     386
-test   mem32x,reg32            32      85              $1,$2           nil     386
-test   reg8,mem8               nil     84              $2,$1           nil     8086
-test   reg16,mem16             16      85              $2,$1           nil     8086
-test   reg32,mem32             32      85              $2,$1           nil     386
-and!arith              20,4
-or!arith               08,1
-xor!arith              30,6
-adc!arith              10,2
-neg!groupf6            3
-not!groupf6            2
-aaa!onebyte            nil,37          8086
-aas!onebyte            nil,3F          8086
-daa!onebyte            nil,27          8086
-das!onebyte            nil,2F          8086
-aad    nil                     nil     D5,0A           nil             nil     8086
-aad    imm8                    nil     D5              nil             $1,8    8086
-aam    nil                     nil     D4,0A           nil             nil     8086
-aam    imm8                    nil     D4              nil             $1,8    8086
-;
-; Conversion instructions
-;
-cbw!onebyte            10,98           8086
-cwde!onebyte           20,98           386
-cwd!onebyte            10,99           8086
-cdq!onebyte            20,99           386
-;
-; Multiplication and division
-;
-mul!groupf6            4
-imul   rm8x                    nil     F6              $1,5            nil     8086
-imul   rm16x                   16      F7              $1,5            nil     8086
-imul   rm32x                   32      F7              $1,5            nil     386
-imul   reg16,rm16              16      0F,AF           $2,$1           nil     386
-imul   reg32,rm32              32      0F,AF           $2,$1           nil     386
-imul   reg16,rm16,imm8x        16      6B              $2,$1           $3,8s   186
-imul   reg32,rm32,imm8x        32      6B              $2,$1           $3,8s   386
-imul   reg16,imm8x             16      6B              $1r,$1          $2,8s   186
-imul   reg32,imm8x             32      6B              $1r,$1          $2,8s   386
-imul   reg16,rm16,imm16        16      69              $2,$1           $3,16s  186
-imul   reg32,rm32,imm32        32      69              $2,$1           $3,32s  386
-imul   reg16,imm16             16      69              $1r,$1          $2,16s  186
-imul   reg32,imm32             32      69              $1r,$1          $2,32s  386
-div!groupf6            6
-idiv!groupf6           7
-;
-; Shifts
-;
-;  Standard
-!shift rm8x,ONE                nil     D0              $1,$0.1         nil     8086
-!shift rm8x,reg_cl             nil     D2              $1,$0.1         nil     8086
-!shift rm8x,imm8               nil     C0              $1,$0.1         $2,8    186
-!shift rm16x,ONE               16      D1              $1,$0.1         nil     8086
-!shift rm16x,reg_cl            16      D3              $1,$0.1         nil     8086
-!shift rm16x,imm8              16      C1              $1,$0.1         $2,8    186
-!shift rm32x,ONE               32      D1              $1,$0.1         nil     386
-!shift rm32x,reg_cl            32      D3              $1,$0.1         nil     386
-!shift rm32x,imm8              32      C1              $1,$0.1         $2,8    386
-;  Doubleword
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,imm8        16      0F,$0.1         $1r,$2          $3,8    386
-!shlrd mem,reg16,imm8          16      0F,$0.1         $1,$2           $3,8    386
-!shlrd mem16x,reg16,imm8       16      0F,$0.1         $1,$2           $3,8    386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,reg_cl      16      0F,$0.1+1       $1r,$2          nil     386
-!shlrd mem,reg16,reg_cl        16      0F,$0.1+1       $1,$2           nil     386
-!shlrd mem16x,reg16,reg_cl     16      0F,$0.1+1       $1,$2           nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,imm8        32      0F,$0.1         $1r,$2          $3,8    386
-!shlrd mem,reg32,imm8          32      0F,$0.1         $1,$2           $3,8    386
-!shlrd mem32x,reg32,imm8       32      0F,$0.1         $1,$2           $3,8    386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,reg_cl      32      0F,$0.1+1       $1r,$2          nil     386
-!shlrd mem,reg32,reg_cl        32      0F,$0.1+1       $1,$2           nil     386
-!shlrd mem32x,reg32,reg_cl     32      0F,$0.1+1       $1,$2           nil     386
-rol!shift              0
-ror!shift              1
-rcl!shift              2
-rcr!shift              3
-sal!shift              4
-shl!shift              4
-shr!shift              5
-sar!shift              7
-shld!shlrd             A4
-shrd!shlrd             AC
-;
-; Control transfer instructions (unconditional)
-;
-; Special format for relative targets:
-; !Grp/Inst    target          AdSize  ShrtOp  NearOp          ShrtCPU NearCPU
-;
-!jmpcall       target          nil     $0.1?$0.2       $0.3    8086    8086
-!jmpcall       imm:imm         nil     $0.4            $2i,nil         $1,16   8086
-!jmpcall       WORD imm:imm    16      $0.4            $2i,16          $1,16   8086
-!jmpcall       DWORD imm:imm   32      $0.4            $2i,32          $1,16   386
-!jmpcall       memfar          nil     FF              $1,$0.4+1       nil     8086
-!jmpcall       WORD memfar     16      FF              $1,$0.4+1       nil     8086
-!jmpcall       DWORD memfar    32      FF              $1,$0.4+1       nil     386
-!jmpcall       mem             nil     FF              $1,$0.4         nil     8086
-!jmpcall       rm16x           16      FF              $1,$0.4         nil     8086
-!jmpcall       rm32x           32      FF              $1,$0.4         nil     386
-call!jmpcall           nil,0,E8,9A,2
-jmp!jmpcall            1,EB,E9,EA,4
-ret!onebyte            nil,C3          8086
-retn   nil                     nil     C3              nil             nil     8086
-retf   nil                     nil     CB              nil             nil     8086
-retn   imm16                   nil     C2              nil             $1,16   8086
-retf   imm16                   nil     CA              nil             $1,16   8086
-enter  imm16,imm8              nil     C8              $1i,16          $2,8    186
-leave!onebyte          nil,C9          186
-;
-; Conditional jumps
-;
-!jcc           target          nil     70+$0.1 0F,80+$0.1      8086    386
-jo!jcc                 0
-jno!jcc                        1
-jb!jcc                 2
-jc!jcc                 2
-jnae!jcc               2
-jnb!jcc                        3
-jnc!jcc                        3
-jae!jcc                        3
-je!jcc                 4
-jz!jcc                 4
-jne!jcc                        5
-jnz!jcc                        5
-jbe!jcc                        6
-jna!jcc                        6
-jnbe!jcc               7
-ja!jcc                 7
-js!jcc                 8
-jns!jcc                        9
-jp!jcc                 A
-jpe!jcc                        A
-jnp!jcc                        B
-jpo!jcc                        B
-jl!jcc                 C
-jnge!jcc               C
-jnl!jcc                        D
-jge!jcc                        D
-jle!jcc                        E
-jng!jcc                        E
-jnle!jcc               F
-jg!jcc                 F
-jcxz           target          16      E3      nil             8086    8086
-jecxz          target          32      E3      nil             386     386
-;
-; Loop instructions
-;
-!loopg         target          nil     E0+$0.1 nil             8086    8086
-!loopg         target,reg_cx   16      E0+$0.1 nil             8086    8086
-!loopg         target,reg_ecx  32      E0+$0.1 nil             386     386
-loop!loopg             2
-loopz!loopg            1
-loope!loopg            1
-loopnz!loopg           0
-loopne!loopg           0
-;
-; Set byte on flag instructions
-;
-!setcc rm8                     nil     0F,90+$0.1      $1,2            nil     386
-seto!setcc             0
-setno!setcc            1
-setb!setcc             2
-setc!setcc             2
-setnae!setcc           2
-setnb!setcc            3
-setnc!setcc            3
-setae!setcc            3
-sete!setcc             4
-setz!setcc             4
-setne!setcc            5
-setnz!setcc            5
-setbe!setcc            6
-setna!setcc            6
-setnbe!setcc           7
-seta!setcc             7
-sets!setcc             8
-setns!setcc            9
-setp!setcc             A
-setpe!setcc            A
-setnp!setcc            B
-setpo!setcc            B
-setl!setcc             C
-setnge!setcc           C
-setnl!setcc            D
-setge!setcc            D
-setle!setcc            E
-setng!setcc            E
-setnle!setcc           F
-setg!setcc             F
-;
-; String instructions
-;
-;  NOTE: cmpsd,movsd can't go to !onebyte group because of other variations
-cmpsb!onebyte          nil,A6          8086
-cmpsw!onebyte          10,A7           8086
-cmpsd  nil                     32      A7              nil             nil     386
-insb!onebyte           nil,6C          8086
-insw!onebyte           10,6D           8086
-insd!onebyte           20,6D           386
-outsb!onebyte          nil,6E          8086
-outsw!onebyte          10,6F           8086
-outsd!onebyte          20,6F           386
-lodsb!onebyte          nil,AC          8086
-lodsw!onebyte          10,AD           8086
-lodsd!onebyte          20,AD           386
-movsb!onebyte          nil,A4          8086
-movsw!onebyte          10,A5           8086
-movsd  nil                     32      A5              nil             nil     386
-scasb!onebyte          nil,AE          8086
-scasw!onebyte          10,AF           8086
-scasd!onebyte          20,AF           386
-stosb!onebyte          nil,AA          8086
-stosw!onebyte          10,AB           8086
-stosd!onebyte          20,AB           386
-xlat!onebyte           nil,D7          8086
-xlatb!onebyte          nil,D7          8086
-;
-; Bit manipulation
-;
-;  Bit tests
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest       reg16,reg16     16      0F,$0.1         $1r,$2          nil     386
-!bittest       mem,reg16       16      0F,$0.1         $1,$2           nil     386
-!bittest       mem16x,reg16    16      0F,$0.1         $1,$2           nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest       reg32,reg32     32      0F,$0.1         $1r,$2          nil     386
-!bittest       mem,reg32       32      0F,$0.1         $1,$2           nil     386
-!bittest       mem32x,reg32    32      0F,$0.1         $1,$2           nil     386
-!bittest       reg16,imm8      16      0F,BA           $1r,$0.2        $2,8    386
-!bittest       mem16x,imm8     16      0F,BA           $1,$0.2         $2,8    386
-!bittest       reg32,imm8      32      0F,BA           $1r,$0.2        $2,8    386
-!bittest       mem32x,imm8     32      0F,BA           $1,$0.2         $2,8    386
-;  Bit scans
-!bsfr  reg16,rm16              16      0F,BC+$0.1      $2,$1           nil     386
-!bsfr  reg32,rm32              32      0F,BC+$0.1      $2,$1           nil     386
-bsf!bsfr               0
-bsr!bsfr               1
-bt!bittest             A3,4
-btc!bittest            BB,7
-btr!bittest            B3,6
-bts!bittest            AB,5
-;
-; Interrupts and operating system instructions
-;
-int    imm8                    nil     CD              nil             $1,8    8086
-int3!onebyte           nil,CC          8086
-int03!onebyte          nil,CC          8086
-into!onebyte           nil,CE          8086
-iret!onebyte           nil,CF          8086
-iretw!onebyte          10,CF           8086
-iretd!onebyte          20,CF           386
-rsm!twobyte            0F,AA           P5,SMM
-bound  reg16,mem16             16      62              $2,$1           nil     186
-bound  reg32,mem32             32      62              $2,$1           nil     386
-hlt!onebyte            nil,F4          8086,PRIV
-nop!onebyte            nil,90          8086
-;
-; Protection control
-;
-;  286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW):
-!prot286       rm16            nil     0F,00           $1,$0.1         nil     286,PROT,@0
-arpl   rm16,reg16              nil     63              $1,$2           nil     286,PROT
-lar    reg16,rm16              16      0F,02           $2,$1           nil     286,PROT
-lar    reg32,rm32              32      0F,02           $2,$1           nil     386,PROT
-lgdt!twobytemem                0F,01,2         286,PRIV
-lidt!twobytemem                0F,01,3         286,PRIV
-lldt!prot286           2               PRIV
-lmsw   rm16                    nil     0F,01           $1,6            nil     286,PRIV
-lsl    reg16,rm16              16      0F,03           $2,$1           nil     286,PROT
-lsl    reg32,rm32              32      0F,03           $2,$1           nil     286,PROT
-ltr!prot286            3               PRIV
-sgdt!twobytemem                0F,01,0         286
-sidt!twobytemem                0F,01,1         286
-sldt   mem1632                 nil     0F,00           $1,0            nil     286
-sldt   reg16                   16      0F,00           $1r,0           nil     286
-sldt   reg32                   32      0F,00           $1r,0           nil     386
-smsw   mem1632                 nil     0F,01           $1,4            nil     286
-smsw   reg16                   16      0F,01           $1r,4           nil     286
-smsw   reg32                   32      0F,01           $1r,4           nil     386
-str!prot286            1
-verr!prot286           4
-verw!prot286           5
-;
-; Floating point instructions
-;
-;  Load
-fld    mem32x                  nil     D9              $1,0            nil     8086,FPU
-fld    mem64x                  nil     DD              $1,0            nil     8086,FPU
-fld    mem80x                  nil     DB              $1,5            nil     8086,FPU
-fld    fpureg                  nil     D9,C0+$1        nil             nil     8086,FPU
-fild   mem16x                  nil     DF              $1,0            nil     8086,FPU
-fild   mem32x                  nil     DB              $1,0            nil     8086,FPU
-fild   mem64x                  nil     DF              $1,5            nil     8086,FPU
-fbld   mem80                   nil     DF              $1,4            nil     8086,FPU
-;  Store
-fst    mem32x                  nil     D9              $1,2            nil     8086,FPU
-fst    mem64x                  nil     DD              $1,2            nil     8086,FPU
-fst    fpureg                  nil     DD,D0+$1        nil             nil     8086,FPU
-fist   mem16x                  nil     DF              $1,2            nil     8086,FPU
-fist   mem32x                  nil     DB              $1,2            nil     8086,FPU
-;  Store (with pop)
-fstp   mem32x                  nil     D9              $1,3            nil     8086,FPU
-fstp   mem64x                  nil     DD              $1,3            nil     8086,FPU
-fstp   mem80x                  nil     DB              $1,7            nil     8086,FPU
-fstp   fpureg                  nil     DD,D8+$1        nil             nil     8086,FPU
-fistp  mem16x                  nil     DF              $1,3            nil     8086,FPU
-fistp  mem32x                  nil     DB              $1,3            nil     8086,FPU
-fistp  mem64x                  nil     DF              $1,7            nil     8086,FPU
-fbstp  mem80                   nil     DF              $1,6            nil     8086,FPU
-;  Exchange (with ST0)
-fxch   fpureg                  nil     D9,C8+$1        nil             nil     8086,FPU
-fxch   ST0,ST0                 nil     D9,C8           nil             nil     8086,FPU
-fxch   ST0,FPUREG_NOTST0       nil     D9,C8+$2        nil             nil     8086,FPU
-fxch   FPUREG_NOTST0,ST0       nil     D9,C8+$1        nil             nil     8086,FPU
-fxch   nil                     nil     D9,C9           nil             nil     8086,FPU
-;  Comparisons
-!fcomg mem32x                  nil     D8              $1,$0.1         nil     8086,FPU
-!fcomg mem64x                  nil     DC              $1,$0.1         nil     8086,FPU
-!fcomg fpureg                  nil     D8,$0.2+$1      nil             nil     8086,FPU
-!fcomg ST0,fpureg              nil     D8,$0.2+$2      nil             nil     8086,FPU
-;  Extended comparisons
-!fcomg2        fpureg                  nil     $0.1,$0.2+$1    nil             nil     @0,FPU
-!fcomg2        ST0,fpureg              nil     $0.1,$0.2+$2    nil             nil     @0,FPU
-;  Comparison (without pop)
-fcom!fcomg             2,D0
-ficom  mem16x                  nil     DE              $1,2            nil     8086,FPU
-ficom  mem32x                  nil     DA              $1,2            nil     8086,FPU
-;  Comparison (with pop)
-fcomp!fcomg            3,D8
-ficomp mem16x                  nil     DE              $1,3            nil     8086,FPU
-ficomp mem32x                  nil     DA              $1,3            nil     8086,FPU
-fcompp!twobyte         DE,D9           8086,FPU
-;  Unordered comparison (with pop)
-fucom!fcomg2           DD,E0           286,FPU
-fucomp!fcomg2          DD,E8           286,FPU
-fucompp!twobyte                DA,E9           286,FPU
-ftst!twobyte           D9,E4           8086,FPU
-fxam!twobyte           D9,E5           8086,FPU
-;  Load constants into ST0
-fld1!twobyte           D9,E8           8086,FPU
-fldl2t!twobyte         D9,E9           8086,FPU
-fldl2e!twobyte         D9,EA           8086,FPU
-fldpi!twobyte          D9,EB           8086,FPU
-fldlg2!twobyte         D9,EC           8086,FPU
-fldln2!twobyte         D9,ED           8086,FPU
-fldz!twobyte           D9,EE           8086,FPU
-;  Arithmetic
-!farith        mem32x                  nil     D8              $1,$0.1         nil     8086,FPU
-!farith        mem64x                  nil     DC              $1,$0.1         nil     8086,FPU
-!farith        fpureg                  nil     D8,$0.2+$1      nil             nil     8086,FPU
-!farith        ST0,ST0                 nil     D8,$0.2         nil             nil     8086,FPU
-!farith        ST0,FPUREG_NOTST0       nil     D8,$0.2+$2      nil             nil     8086,FPU
-!farith        TO fpureg               nil     DC,$0.3+$1      nil             nil     8086,FPU
-!farith        FPUREG_NOTST0,ST0       nil     DC,$0.3+$1      nil             nil     8086,FPU
-!farithp       fpureg          nil     DE,$0.1+$1      nil             nil     8086,FPU
-!farithp       fpureg,ST0      nil     DE,$0.1+$1      nil             nil     8086,FPU
-!fiarith       mem32x          nil     DA              $1,$0.1         nil     8086,FPU
-!fiarith       mem16x          nil     DE              $1,$0.1         nil     8086,FPU
-fadd!farith            0,C0,C0
-faddp!farithp          C0
-fiadd!fiarith          0
-fsub!farith            4,E0,E8
-fisub!fiarith          4
-fsubp!farithp          E8
-fsubr!farith           5,E8,E0
-fisubr!fiarith         5
-fsubrp!farithp         E0
-;  Multiply
-fmul!farith            1,C8,C8
-fimul!fiarith          1
-fmulp!farithp          C8
-;  Divide
-fdiv!farith            6,F0,F8
-fidiv!fiarith          6
-fdivp!farithp          F8
-fdivr!farith           7,F8,F0
-fidivr!fiarith         7
-fdivrp!farithp         F0
-;  Other arithmetic
-f2xm1!twobyte          D9,F0           8086,FPU
-fyl2x!twobyte          D9,F1           8086,FPU
-fptan!twobyte          D9,F2           8086,FPU
-fpatan!twobyte         D9,F3           8086,FPU
-fxtract!twobyte                D9,F4           8086,FPU
-fprem1!twobyte         D9,F5           286,FPU
-fdecstp!twobyte                D9,F6           8086,FPU
-fincstp!twobyte                D9,F7           8086,FPU
-fprem!twobyte          D9,F8           8086,FPU
-fyl2xp1!twobyte                D9,F9           8086,FPU
-fsqrt!twobyte          D9,FA           8086,FPU
-fsincos!twobyte                D9,FB           286,FPU
-frndint!twobyte                D9,FC           8086,FPU
-fscale!twobyte         D9,FD           8086,FPU
-fsin!twobyte           D9,FE           286,FPU
-fcos!twobyte           D9,FF           286,FPU
-fchs!twobyte           D9,E0           8086,FPU
-fabs!twobyte           D9,E1           8086,FPU
-;  Processor control
-fninit!twobyte         DB,E3           8086,FPU
-finit!threebyte                9B,DB,E3        8086,FPU
-fldcw  mem16                   nil     D9              $1,5            nil     8086,FPU
-fnstcw mem16                   nil     D9              $1,7            nil     8086,FPU
-fstcw  mem16                   nil     9B,D9           $1,7            nil     8086,FPU
-fnstsw mem16                   nil     DD              $1,7            nil     8086,FPU
-fnstsw reg_ax                  nil     DF,E0           nil             nil     8086,FPU
-fstsw  mem16                   nil     9B,DD           $1,7            nil     8086,FPU
-fstsw  reg_ax                  nil     9B,DF,E0        nil             nil     8086,FPU
-fnclex!twobyte         DB,E2           8086,FPU
-fclex!threebyte                9B,DB,E2        8086,FPU
-fnstenv!onebytemem     D9,6            8086,FPU
-fstenv!twobytemem      9B,D9,6         8086,FPU
-fldenv!onebytemem      D9,4            8086,FPU
-fnsave!onebytemem      DD,6            8086,FPU
-fsave!twobytemem       9B,DD,6         8086,FPU
-frstor!onebytemem      DD,4            8086,FPU
-ffree  fpureg                  nil     DD,C0+$1        nil             nil     8086,FPU
-ffreep fpureg                  nil     DF,C0+$1        nil             nil     P6,FPU,UNDOC
-fnop!twobyte           D9,D0           8086,FPU
-fwait!onebyte          nil,9B          8086,FPU
-;
-; Prefixes (should the others be here too? should wait be a prefix?)
-;
-wait!onebyte           nil,9B          8086
-;
-; 486 extensions
-;
-;  Compare & exchange, exchange & add
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg8,reg8       nil     0F,$0.1         $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg8        nil     0F,$0.1         $1,$2           nil     @0
-!cmpxchgxadd   mem8x,reg8      nil     0F,$0.1         $1,$2           nil     @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg16,reg16     16      0F,$0.1+1       $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg16       16      0F,$0.1+1       $1,$2           nil     @0
-!cmpxchgxadd   mem16x,reg16    16      0F,$0.1+1       $1,$2           nil     @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg32,reg32     32      0F,$0.1+1       $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg32       32      0F,$0.1+1       $1,$2           nil     @0
-!cmpxchgxadd   mem32x,reg32    32      0F,$0.1+1       $1,$2           nil     @0
-bswap  reg32                   32      0F,C8+$1        nil             nil     486
-xadd!cmpxchgxadd       C0              486
-cmpxchg!cmpxchgxadd    B0              486
-cmpxchg486!cmpxchgxadd A6              486,UNDOC
-invd!twobyte           0F,08           486,PRIV
-wbinvd!twobyte         0F,09           486,PRIV
-invlpg!twobytemem      0F,01,7         486,PRIV
-;
-; 586 and late 486 extensions
-;
-cpuid!twobyte          0F,A2           486
-;
-; Pentium extensions
-;
-wrmsr!twobyte          0F,30           P5,PRIV
-rdtsc!twobyte          0F,31           P5
-rdmsr!twobyte          0F,32           P5,PRIV
-cmpxchg8b      mem64           nil     0F,C7           $1,1            nil     P5
-;
-; Pentium II/Pentium Pro extensions
-;
-sysenter!twobyte       0F,34           P6
-sysexit!twobyte                0F,35           P6,PRIV
-fxsave!twobytemem      0F,AE,0         P6,FPU
-fxrstor!twobytemem     0F,AE,1         P6,FPU
-rdpmc!twobyte          0F,33           P6
-ud2!twobyte            0F,0B           286
-ud1!twobyte            0F,B9           286,UNDOC
-; cmov
-; fcmov
-fcomi!fcomg2           DB,F0           P6
-fucomi!fcomg2          DB,E8           P6
-fcomip!fcomg2          DF,F0           P6
-fucomip!fcomg2         DF,E8           P6
-;
-; Pentium4 extensions
-;
-movnti mem32,reg32             nil     0F,C3           $1,$2           nil     P4
-clflush        mem8                    nil     0F,AE           $1,7            nil     KATMAI
-lfence!threebyte       0F,AE,E8        KATMAI
-mfence!threebyte       0F,AE,F0        KATMAI
-pause!twobyte          F3,90           P4
-;
-; MMX/SSE2 instructions
-;
-;  General
-!mmxsse        MMXREG,rm64             nil     0F,$0.1         $2,$1           nil     @0,MMX
-!mmxsse        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     @1
-;  Shifts
-!pshift        MMXREG,rm64             nil     0F,$0.1         $2,$1           nil     P5,MMX
-!pshift        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     P4,SSE2
-!pshift        MMXREG,imm8             nil     0F,$0.2         $1r,$0.3        $2,8    P5,MMX
-!pshift        XMMREG,imm8             nil     66,0F,$0.2      $1r,$0.3        $2,8    P4,SSE2
-emms!twobyte           0F,77           P5,MMX
-movd   MMXREG,rm32             nil     0F,6E           $2,$1           nil     P5,MMX
-movd   rm32,MMXREG             nil     0F,7E           $1,$2           nil     P5,MMX
-movd   XMMREG,rm32             nil     66,0F,6E        $2,$1           nil     P4,SSE2
-movd   rm32,XMMREG             nil     66,0F,7E        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq   MMXREG,MMXREG           nil     0F,6F           $2r,$1          nil     P5,MMX
-movq   MMXREG,mem64            nil     0F,6F           $2,$1           nil     P5,MMX
-movq   mem64,MMXREG            nil     0F,7F           $1,$2           nil     P5,MMX
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq   XMMREG,XMMREG           nil     F3,0F,7E        $2r,$1          nil     P4,SSE2
-movq   XMMREG,mem64            nil     F3,0F,7E        $2,$1           nil     P4,SSE2
-movq   mem64,XMMREG            nil     66,0F,D6        $1,$2           nil     P4,SSE2
-packssdw!mmxsse                6B              P5              P4,SSE2
-packsswb!mmxsse                63              P5              P4,SSE2
-packuswb!mmxsse                67              P5              P4,SSE2
-paddb!mmxsse           FC              P5              P4,SSE2
-paddw!mmxsse           FD              P5              P4,SSE2
-paddd!mmxsse           FE              P5              P4,SSE2
-paddq!mmxsse           D4              P5              P4,SSE2
-paddsb!mmxsse          EC              P5              P4,SSE2
-paddsw!mmxsse          ED              P5              P4,SSE2
-paddusb!mmxsse         DC              P5              P4,SSE2
-paddusw!mmxsse         DD              P5              P4,SSE2
-pand!mmxsse            DB              P5              P4,SSE2
-pandn!mmxsse           DF              P5              P4,SSE2
-pcmpeqb!mmxsse         74              P5              P4,SSE2
-pcmpeqw!mmxsse         75              P5              P4,SSE2
-pcmpeqd!mmxsse         76              P5              P4,SSE2
-pcmpgtb!mmxsse         64              P5              P4,SSE2
-pcmpgtw!mmxsse         65              P5              P4,SSE2
-pcmpgtd!mmxsse         66              P5              P4,SSE2
-pmaddwd!mmxsse         F5              P5              P4,SSE2
-pmulhw!mmxsse          E5              P5              P4,SSE2
-pmullw!mmxsse          D5              P5              P4,SSE2
-por!mmxsse             EB              P5              P4,SSE2
-psllw!pshift           F1,71,6
-pslld!pshift           F2,72,6
-psllq!pshift           F3,73,6
-psraw!pshift           E1,71,4
-psrad!pshift           E2,72,4
-psrlw!pshift           D1,71,2
-psrld!pshift           D2,72,2
-psrlq!pshift           D3,73,2
-psubb  MMXREG,imm8             nil     0F,F8           $1r,2           $2,8    P5,MMX
-psubb  XMMREG,imm8             nil     66,0F,F8        $1r,2           $2,8    P4,SSE2
-psubw  MMXREG,imm8             nil     0F,F9           $1r,2           $2,8    P5,MMX
-psubw  XMMREG,imm8             nil     66,0F,F9        $1r,2           $2,8    P4,SSE2
-psubd!mmxsse           FA              P5              P4,SSE2
-psubq!mmxsse           FB              P5              P4,SSE2
-psubsb!mmxsse          E8              P5              P4,SSE2
-psubsw!mmxsse          E9              P5              P4,SSE2
-psubusb!mmxsse         D8              P5              P4,SSE2
-psubusw!mmxsse         D9              P5              P4,SSE2
-punpckhbw!mmxsse       68              P5              P4,SSE2
-punpckhwd!mmxsse       69              P5              P4,SSE2
-punpckhdq!mmxsse       6A              P5              P4,SSE2
-punpcklbw!mmxsse       60              P5              P4,SSE2
-punpcklwd!mmxsse       61              P5              P4,SSE2
-punpckldq!mmxsse       62              P5              P4,SSE2
-pxor!mmxsse            EF              P5              P4,SSE2
-;
-; PIII (Katmai) new instructions / SIMD instructions
-;
-;  Standard
-!sseps XMMREG,rm128            nil     0F,$0.1         $2,$1           nil     @0
-!ssess XMMREG,rm128            nil     F3,0F,$0.1      $2,$1           nil     @0
-;  With immediate
-!ssepsimm      XMMREG,rm128,imm8       nil     0F,$0.1 $2,$1           $3,8    KATMAI,SSE
-;  Comparisons
-!ssecmpps      XMMREG,rm128    nil     0F,C2           $2,$1           $0.1,8  KATMAI,SSE
-!ssecmpss      XMMREG,rm128    nil     F3,0F,C2        $2,$1           $0.1,8  KATMAI,SSE
-addps!sseps            58              KATMAI,SSE
-addss!ssess            58              KATMAI,SSE
-andnps!sseps           55              KATMAI,SSE
-andps!sseps            54              KATMAI,SSE
-cmpeqps!ssecmpps       0
-cmpeqss!ssecmpss       0
-cmpleps!ssecmpps       2
-cmpless!ssecmpss       2
-cmpltps!ssecmpps       1
-cmpltss!ssecmpss       1
-cmpneqps!ssecmpps      4
-cmpneqss!ssecmpss      4
-cmpnleps!ssecmpps      6
-cmpnless!ssecmpss      6
-cmpnltps!ssecmpps      5
-cmpnltss!ssecmpss      5
-cmpordps!ssecmpps      7
-cmpordss!ssecmpss      7
-cmpunordps!ssecmpps    3
-cmpunordss!ssecmpss    3
-cmpps!ssepsimm         C2
-cmpss  XMMREG,rm128,imm8       nil     F3,0F,C2        $2,$1           $3,8    KATMAI,SSE
-comiss!sseps           2F              KATMAI,SSE
-cvtpi2ps!sseps         2A              KATMAI,SSE
-cvtps2pi!sseps         2D              KATMAI,SSE
-cvtsi2ss!ssess         2A              KATMAI,SSE
-cvtss2si!ssess         2D              KATMAI,SSE
-cvttps2pi!sseps                2C              KATMAI,SSE
-cvttss2si!ssess                2C              KATMAI,SSE
-divps!sseps            5E              KATMAI,SSE
-divss!ssess            5E              KATMAI,SSE
-ldmxcsr        mem32                   nil     0F,AE           $1,2            nil     KATMAI,SSE
-maskmovq       MMXREG,MMXREG   nil     0F,F7           $2r,$1          nil     KATMAI,MMX
-maxps!sseps            5F              KATMAI,SSE
-maxss!ssess            5F              KATMAI,SSE
-minps!sseps            5D              KATMAI,SSE
-minss!ssess            5D              KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movaps XMMREG,XMMREG           nil     0F,28           $2r,$1          nil     KATMAI,SSE
-movaps XMMREG,mem128           nil     0F,28           $2,$1           nil     KATMAI,SSE
-movaps mem128,XMMREG           nil     0F,29           $1,$2           nil     KATMAI,SSE
-movhlps        XMMREG,XMMREG           nil     0F,12           $2r,$1          nil     KATMAI,SSE
-movhps XMMREG,mem64            nil     0F,16           $2,$1           nil     KATMAI,SSE
-movhps mem64,XMMREG            nil     0F,17           $1,$2           nil     KATMAI,SSE
-movlhps        XMMREG,XMMREG           nil     0F,16           $2r,$1          nil     KATMAI,SSE
-movlps XMMREG,mem64            nil     0F,12           $2,$1           nil     KATMAI,SSE
-movlps mem64,XMMREG            nil     0F,13           $1,$2           nil     KATMAI,SSE
-movmskps       reg32,XMMREG    nil     0F,50           $1r,$2          nil     KATMAI,SSE
-movntps        mem128,XMMREG           nil     0F,2B           $1,$2           nil     KATMAI,SSE
-movntq mem64,MMXREG            nil     0F,E7           $1,$2           nil     KATMAI,MMX
-movntdq        mem128,XMMREG           nil     66,0F,E7        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movss  XMMREG,XMMREG           nil     F3,0F,10        $2r,$1          nil     KATMAI,SSE
-movss  XMMREG,mem64            nil     F3,0F,10        $2,$1           nil     KATMAI,SSE
-movss  mem64,XMMREG            nil     F3,0F,11        $1,$2           nil     KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movups XMMREG,XMMREG           nil     0F,10           $2r,$1          nil     KATMAI,SSE
-movups XMMREG,mem64            nil     0F,10           $2,$1           nil     KATMAI,SSE
-movups mem64,XMMREG            nil     0F,11           $1,$2           nil     KATMAI,SSE
-mulps!sseps            59              KATMAI,SSE
-mulss!ssess            59              KATMAI,SSE
-orps!sseps             56              KATMAI,SSE
-pavgb!mmxsse           E0              KATMAI          P4,SSE2
-pavgw!mmxsse           E3              KATMAI          P4,SSE2
-pextrw reg32,MMXREG,imm8       nil     0F,C5           $1r,$2          $3,8    KATMAI,MMX
-pextrw reg32,XMMREG,imm8       nil     66,0F,C5        $1r,$2          $3,8    P4,SSE2
-pinsrw MMXREG,reg32,imm8       nil     0F,C4           $2r,$1          $3,8    KATMAI,MMX
-pinsrw MMXREG,rm16,imm8        nil     0F,C4           $2,$1           $3,8    KATMAI,MMX
-pinsrw XMMREG,reg32,imm8       nil     66,0F,C4        $2r,$1          $3,8    P4,SSE2
-pinsrw XMMREG,rm16,imm8        nil     66,0F,C4        $2,$1           $3,8    P4,SSE2
-pmaxsw!mmxsse          EE              KATMAI          P4,SSE2
-pmaxub!mmxsse          DE              KATMAI          P4,SSE2
-pminsw!mmxsse          EA              KATMAI          P4,SSE2
-pminub!mmxsse          DA              KATMAI          P4,SSE2
-pmovmskb       reg32,MMXREG    nil     0F,D7           $1r,$2          nil     KATMAI,SSE
-pmovmskb       reg32,XMMREG    nil     66,0F,D7        $1r,$2          nil     P4,SSE2
-pmulhuw!mmxsse         E4              KATMAI          P4,SSE2
-prefetchnta!twobytemem 0F,18,0         KATMAI
-prefetcht0!twobytemem  0F,18,1         KATMAI
-prefetcht1!twobytemem  0F,18,2         KATMAI
-prefetcht2!twobytemem  0F,18,3         KATMAI
-psadbw!mmxsse          F6              KATMAI          KATMAI,SSE
-pshufw MMXREG,rm64,imm8        nil     0F,70           $2,$1           $3,8    KATMAI,MMX
-rcpps!sseps            53              KATMAI,SSE
-rcpss!ssess            53              KATMAI,SSE
-rsqrtps!sseps          52              KATMAI,SSE
-rsqrtss!ssess          52              KATMAI,SSE
-sfence!threebyte       0F,AE,F8        KATMAI
-shufps!ssepsimm                C6
-sqrtps!sseps           51              KATMAI,SSE
-sqrtss!ssess           51              KATMAI,SSE
-stmxcsr        mem32                   nil     0F,AE           $1,3            nil     KATMAI,SSE
-subps!sseps            5C              KATMAI,SSE
-subss!ssess            5C              KATMAI,SSE
-ucomiss!ssess          2E              KATMAI,SSE
-unpckhps!sseps         15              KATMAI,SSE
-unpcklps!sseps         14              KATMAI,SSE
-xorps!sseps            57              KATMAI,SSE
-;
-; SSE2 instructions
-;
-;  Standard
-!sse2pd        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     P4,SSE2
-!sse2sd        XMMREG,rm128            nil     F2,0F,$0.1      $2,$1           nil     P4,SSE2
-;  With immediate
-!sse2pdimm     XMMREG,rm128,imm8       nil     66,0F,$0.1      $2,$1   $3,8    P4,SSE2
-;  Comparisons
-!sse2cmppd     XMMREG,rm128    nil     66,0F,C2        $2,$1           $0.1,8  P4,SSE2
-!sse2cmpsd     XMMREG,rm128    nil     F2,0F,C2        $2,$1           $0.1,8  P4,SSE2
-addpd!sse2pd           58
-addsd!sse2sd           58
-andnpd!sse2pd          55
-andpd!sse2pd           54
-cmpeqpd!sse2cmppd      0
-cmpeqsd!sse2cmpsd      0
-cmplepd!sse2cmppd      2
-cmplesd!sse2cmpsd      2
-cmpltpd!sse2cmppd      1
-cmpltsd!sse2cmpsd      1
-cmpneqpd!sse2cmppd     4
-cmpneqsd!sse2cmpsd     4
-cmpnlepd!sse2cmppd     6
-cmpnlesd!sse2cmpsd     6
-cmpnltpd!sse2cmppd     5
-cmpnltsd!sse2cmpsd     5
-cmpordpd!sse2cmppd     7
-cmpordsd!sse2cmpsd     7
-cmpunordpd!sse2cmppd   3
-cmpunordsd!sse2cmpsd   3
-cmppd!sse2pdimm                C2
-cmpsd  XMMREG,rm128,imm8       nil     F2,0F,C2        $2,$1           $3,8    P4,SSE2
-comisd!sse2pd          2F
-cvtpi2pd!sse2pd                2A
-cvtsi2sd!sse2sd                2A
-divpd!sse2pd           5E
-divsd!sse2sd           5E
-maxpd!sse2pd           5F
-maxsd!sse2sd           5F
-minpd!sse2pd           5D
-minsd!sse2sd           5D
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movapd XMMREG,XMMREG           nil     66,0F,28        $2r,$1          nil     P4,SSE2
-movapd XMMREG,mem128           nil     66,0F,28        $2,$1           nil     P4,SSE2
-movapd mem128,XMMREG           nil     66,0F,29        $1,$2           nil     P4,SSE2
-movhpd XMMREG,mem64            nil     66,0F,16        $2,$1           nil     P4,SSE2
-movhpd mem64,XMMREG            nil     66,0F,17        $1,$2           nil     P4,SSE2
-movlpd XMMREG,mem64            nil     66,0F,12        $2,$1           nil     P4,SSE2
-movlpd mem64,XMMREG            nil     66,0F,13        $1,$2           nil     P4,SSE2
-movmskpd       reg32,XMMREG    nil     66,0F,50        $1r,$2          nil     P4,SSE2
-movntpd        mem128,XMMREG           nil     66,0F,2B        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movsd  XMMREG,XMMREG           nil     F2,0F,10        $2r,$1          nil     P4,SSE2
-movsd  XMMREG,mem64            nil     F2,0F,10        $2,$1           nil     P4,SSE2
-movsd  mem64,XMMREG            nil     F2,0F,11        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movupd XMMREG,XMMREG           nil     66,0F,10        $2r,$1          nil     P4,SSE2
-movupd XMMREG,mem64            nil     66,0F,10        $2,$1           nil     P4,SSE2
-movupd mem64,XMMREG            nil     66,0F,11        $1,$2           nil     P4,SSE2
-mulpd!sse2pd           59
-mulsd!sse2sd           59
-orpd!sse2pd            56
-shufpd!sse2pdimm       C6
-sqrtpd!sse2pd          51
-sqrtsd!sse2sd          51
-subpd!sse2pd           5C
-subsd!sse2sd           5C
-ucomisd!sse2sd         2E
-unpckhpd!sse2pd                15
-unpcklpd!sse2pd                14
-xorpd!sse2pd           57
-cvtdq2pd!ssess         E6              P4,SSE2
-cvtpd2dq!sse2sd                E6
-cvtdq2ps!sseps         5B              P4,SSE2
-cvtpd2pi!sse2pd                2D
-cvtpd2ps!sse2pd                5A
-cvtps2pd!sseps         5A              P4,SSE2
-cvtps2dq!sse2pd                5B
-cvtsd2si!sse2sd                2D
-cvtsd2ss!sse2sd                5A
-cvtss2sd!ssess         5A              P4,SSE2
-cvttpd2pi!sse2pd       2C
-cvttsd2si!sse2sd       2C
-cvttpd2dq!sse2pd       E6
-cvttps2dq!ssess                5B              P4,SSE2
-maskmovdqu     XMMREG,XMMREG   nil     66,0F,F7        $2r,$1          nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqa XMMREG,XMMREG           nil     66,0F,6F        $2r,$1          nil     P4,SSE2
-movdqa XMMREG,mem128           nil     66,0F,6F        $2,$1           nil     P4,SSE2
-movdqa mem128,XMMREG           nil     66,0F,7F        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqu XMMREG,XMMREG           nil     F3,0F,6F        $2r,$1          nil     P4,SSE2
-movdqu XMMREG,mem128           nil     F3,0F,6F        $2,$1           nil     P4,SSE2
-movdqu mem128,XMMREG           nil     F3,0F,7F        $1,$2           nil     P4,SSE2
-movdq2q        MMXREG,XMMREG           nil     F2,0F,D6        $2r,$1          nil     P4,SSE2
-movq2dq        XMMREG,MMXREG           nil     F3,0F,D6        $2r,$1          nil     P4,SSE2
-pmuludq!mmxsse         F4              P4              P4,SSE2
-pshufd!sse2pdimm       70
-pshufhw        XMMREG,rm128,imm8       nil     F3,0F,70        $2,$1           $3,8    P4,SSE2
-pshuflw        XMMREG,rm128,imm8       nil     F2,0F,70        $2,$1           $3,8    P4,SSE2
-pslldq XMMREG,imm8             nil     66,0F,73        $1r,7           $2,8    P4,SSE2
-psrldq XMMREG,imm8             nil     66,0F,73        $1r,3           $2,8    P4,SSE2
-punpckhqdq!sse2pd      6D
-punpcklqdq!sse2pd      6C
-;
-; AMD 3DNow! instructions
-;
-!now3d MMXREG,rm64             nil     0F,0F           $2,$1           $0.1,8  @0,3DNOW,AMD
-prefetch!twobytemem    0F,0D,0         P5,3DNOW,AMD
-prefetchw!twobytemem   0F,0D,1         P5,3DNOW,AMD
-femms!twobyte          0F,0E           P5,3DNOW,AMD
-pavgusb!now3d          BF              P5
-pf2id!now3d            1D              P5
-pf2iw!now3d            1C              ATHLON
-pfacc!now3d            AE              P5
-pfadd!now3d            9E              P5
-pfcmpeq!now3d          B0              P5
-pfcmpge!now3d          90              P5
-pfcmpgt!now3d          A0              P5
-pfmax!now3d            A4              P5
-pfmin!now3d            94              P5
-pfmul!now3d            B4              P5
-pfnacc!now3d           8A              ATHLON
-pfpnacc!now3d          8E              ATHLON
-pfrcp!now3d            96              P5
-pfrcpit1!now3d         A6              P5
-pfrcpit2!now3d         B6              P5
-pfrsqit1!now3d         A7              P5
-pfrsqrt!now3d          97              P5
-pfsub!now3d            9A              P5
-pfsubr!now3d           AA              P5
-pi2fd!now3d            0D              P5
-pi2fw!now3d            0C              ATHLON
-pmulhrwa!now3d         B7              P5
-pswapd!now3d           BB              ATHLON
-;
-; AMD extensions
-;
-syscall!twobyte                0F,05           P6,AMD
-sysret!twobyte         0F,07           P6,PRIV,AMD
-; swapgs
-;
-; Cyrix MMX instructions
-;
-!cyrixmmx      MMXREG,rm64     nil     0F,$0.1         $2,$1           nil     P5,MMX,CYRIX
-paddsiw!cyrixmmx       51
-paveb!cyrixmmx         50
-pdistib!cyrixmmx       54
-pmachriw       MMXREG,mem64    nil     0F,5E           $2,$1           nil     P5,MMX,CYRIX
-pmagw!cyrixmmx         52
-pmulhriw!cyrixmmx      5D
-pmulhrwc!cyrixmmx      59
-pmvgezb!cyrixmmx       5C
-pmvlzb!cyrixmmx                5B
-pmvnzb!cyrixmmx                5A
-pmvzb!cyrixmmx         58
-psubsiw!cyrixmmx       55
-;
-; Cyrix extensions
-;
-!cyrixsmm      mem80           nil     0F,$0.1         $1,0            nil     486,CYRIX,SMM
-rdshr!twobyte          0F,36           P6,CYRIX,SMM
-rsdc   segreg,mem80            nil     0F,79           $2,$1           nil     486,CYRIX,SMM
-rsldt!cyrixsmm         7B
-rsts!cyrixsmm          7D
-svdc   mem80,segreg            nil     0F,78           $1,$2           nil     486,CYRIX,SMM
-svldt!cyrixsmm         7A
-svts!cyrixsmm          7C
-smint!twobyte          0F,38           P6,CYRIX
-smintold!twobyte       0F,7E           486,CYRIX,OBS
-wrshr!twobyte          0F,37           P6,CYRIX,SMM
-;
-; Obsolete/Undocumented Instructions
-;
-fsetpm!twobyte         DB,E4           286,FPU,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts   reg16,reg16             16      0F,A7           $1r,$2          nil     386,UNDOC,OBS
-ibts   mem,reg16               16      0F,A7           $1,$2           nil     386,UNDOC,OBS
-ibts   mem16x,reg16            16      0F,A7           $1,$2           nil     386,UNDOC,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts   reg32,reg32             32      0F,A7           $1r,$2          nil     386,UNDOC,OBS
-ibts   mem,reg32               32      0F,A7           $1,$2           nil     386,UNDOC,OBS
-ibts   mem32x,reg32            32      0F,A7           $1,$2           nil     386,UNDOC,OBS
-loadall!twobyte                0F,07           386,UNDOC
-loadall286!twobyte     0F,05           286,UNDOC
-;pop   reg_cs                  nil     0F              nil             nil     8086,UNDOC,OBS
-salc!onebyte           nil,D6          8086,UNDOC
-smi!onebyte            nil,F1          386,UNDOC
-; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11).
-umov   reg8,reg8               nil     0F,10           $1r,$2          nil     386,UNDOC
-umov   reg16,reg16             16      0F,11           $1r,$2          nil     386,UNDOC
-umov   reg32,reg32             32      0F,11           $1r,$2          nil     386,UNDOC
-umov   mem,reg8                nil     0F,10           $1,$2           nil     386,UNDOC
-umov   mem8x,reg8              nil     0F,10           $1,$2           nil     386,UNDOC
-umov   mem,reg16               16      0F,11           $1,$2           nil     386,UNDOC
-umov   mem16x,reg16            16      0F,11           $1,$2           nil     386,UNDOC
-umov   mem,reg32               32      0F,11           $1,$2           nil     386,UNDOC
-umov   mem32x,reg32            32      0F,11           $1,$2           nil     386,UNDOC
-umov   reg8,mem8               nil     0F,12           $2,$1           nil     386,UNDOC
-umov   reg16,mem16             16      0F,13           $2,$1           nil     386,UNDOC
-umov   reg32,mem32             32      0F,13           $2,$1           nil     386,UNDOC
-xbts   reg16,mem16             16      0F,A6           $2,$1           nil     386,UNDOC,OBS
-xbts   reg32,mem32             32      0F,A6           $2,$1           nil     386,UNDOC,OBS
diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h
deleted file mode 100644 (file)
index 86811b6..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-/* $IdPath$
- * x86 internals header file
- *
- *  Copyright (C) 2001  Peter Johnson
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef YASM_X86_INT_H
-#define YASM_X86_INT_H
-
-typedef struct x86_effaddr_data {
-    unsigned char segment;     /* segment override, 0 if none */
-
-    /* How the spare (register) bits in Mod/RM are handled:
-     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
-     * They're set in bytecode_new_insn().
-     */
-    unsigned char modrm;
-    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
-    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
-
-    unsigned char sib;
-    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
-    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
-                                  0xff if unknown */
-} x86_effaddr_data;
-
-typedef struct x86_insn {
-    /*@null@*/ effaddr *ea;    /* effective address */
-
-    /*@null@*/ immval *imm;    /* immediate or relative value */
-
-    unsigned char opcode[3];   /* opcode */
-    unsigned char opcode_len;
-
-    unsigned char addrsize;    /* 0 or =mode_bits => no override */
-    unsigned char opersize;    /* 0 indicates no override */
-    unsigned char lockrep_pre; /* 0 indicates no prefix */
-
-    /* HACK, but a space-saving one: shift opcodes have an immediate
-     * form and a ,1 form (with no immediate).  In the parser, we
-     * set this and opcode_len=1, but store the ,1 version in the
-     * second byte of the opcode array.  We then choose between the
-     * two versions once we know the actual value of imm (because we
-     * don't know it in the parser module).
-     *
-     * A override to force the imm version should just leave this at
-     * 0.  Then later code won't know the ,1 version even exists.
-     * TODO: Figure out how this affects CPU flags processing.
-     *
-     * Call x86_SetInsnShiftFlag() to set this flag to 1.
-     */
-    unsigned char shift_op;
-
-    /* HACK, similar to that for shift_op above, for optimizing instructions
-     * that take a sign-extended imm8 as well as imm values (eg, the arith
-     * instructions and a subset of the imul instructions).
-     */
-    unsigned char signext_imm8_op;
-
-    unsigned char mode_bits;
-} x86_insn;
-
-typedef struct x86_jmprel {
-    expr *target;              /* target location */
-
-    struct {
-       unsigned char opcode[3];
-       unsigned char opcode_len;   /* 0 = no opc for this version */
-    } shortop, nearop;
-
-    /* which opcode are we using? */
-    /* The *FORCED forms are specified in the source as such */
-    x86_jmprel_opcode_sel op_sel;
-
-    unsigned char addrsize;    /* 0 or =mode_bits => no override */
-    unsigned char opersize;    /* 0 indicates no override */
-    unsigned char lockrep_pre; /* 0 indicates no prefix */
-
-    unsigned char mode_bits;
-} x86_jmprel;
-
-void x86_bc_delete(bytecode *bc);
-void x86_bc_print(FILE *f, const bytecode *bc);
-bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
-                               resolve_label_func resolve_label);
-int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
-                  void *d, output_expr_func output_expr);
-
-int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
-                    unsigned char nosplit, unsigned char *displen,
-                    unsigned char *modrm, unsigned char *v_modrm,
-                    unsigned char *n_modrm, unsigned char *sib,
-                    unsigned char *v_sib, unsigned char *n_sib);
-
-#endif
index 755e8bed5ea2420c950ce254c062049a31c2959a..c43feb116aa869f7efb3dfa674b9bef2ac96730f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * x86 architecture description
  *
- *  Copyright (C) 2001  Peter Johnson
+ *  Copyright (C) 2002  Peter Johnson
  *
  *  This file is part of YASM.
  *
 #include "util.h"
 /*@unused@*/ RCSID("$IdPath$");
 
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
 #include "bytecode.h"
+
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 
 unsigned char x86_mode_bits = 0;
 
+int
+x86_directive(const char *name, valparamhead *valparams,
+             /*@unused@*/ /*@null@*/ valparamhead *objext_valparams,
+             /*@unused@*/ sectionhead *headp)
+{
+    valparam *vp;
+    const intnum *intn;
+    long lval;
+
+    if (strcasecmp(name, "bits") == 0) {
+       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
+           (intn = expr_get_intnum(&vp->param)) != NULL &&
+           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
+           x86_mode_bits = (unsigned char)lval;
+       else
+           Error(_("invalid argument to [%s]"), "BITS");
+       return 0;
+    } else
+       return 1;
+}
+
+unsigned int
+x86_get_reg_size(unsigned long reg)
+{
+    switch ((x86_expritem_reg_size)(reg & ~7)) {
+       case X86_REG8:
+           return 1;
+       case X86_REG16:
+           return 2;
+       case X86_REG32:
+       case X86_CRREG:
+       case X86_DRREG:
+       case X86_TRREG:
+           return 4;
+       case X86_MMXREG:
+           return 8;
+       case X86_XMMREG:
+           return 16;
+       case X86_FPUREG:
+           return 10;
+       default:
+           InternalError(_("unknown register size"));
+    }
+    return 0;
+}
+
+void
+x86_reg_print(FILE *f, unsigned long reg)
+{
+    static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"};
+    static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+
+    switch ((x86_expritem_reg_size)(reg&~7)) {
+       case X86_REG8:
+           fprintf(f, "%s", name8[reg&7]);
+           break;
+       case X86_REG16:
+           fprintf(f, "%s", name1632[reg&7]);
+           break;
+       case X86_REG32:
+           fprintf(f, "e%s", name1632[reg&7]);
+           break;
+       case X86_MMXREG:
+           fprintf(f, "mm%d", (int)(reg&7));
+           break;
+       case X86_XMMREG:
+           fprintf(f, "xmm%d", (int)(reg&7));
+           break;
+       case X86_CRREG:
+           fprintf(f, "cr%d", (int)(reg&7));
+           break;
+       case X86_DRREG:
+           fprintf(f, "dr%d", (int)(reg&7));
+           break;
+       case X86_TRREG:
+           fprintf(f, "tr%d", (int)(reg&7));
+           break;
+       case X86_FPUREG:
+           fprintf(f, "st%d", (int)(reg&7));
+           break;
+       default:
+           InternalError(_("unknown register size"));
+    }
+}
+
+void
+x86_segreg_print(FILE *f, unsigned long segreg)
+{
+    static const char *name[] = {"es","cs","ss","ds","fs","gs"};
+    fprintf(f, "%s", name[segreg&7]);
+}
+
+void
+x86_handle_prefix(bytecode *bc, const unsigned long data[4])
+{
+    switch((x86_parse_insn_prefix)data[0]) {
+       case X86_LOCKREP:
+           x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]);
+           break;
+       case X86_ADDRSIZE:
+           x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]);
+           break;
+       case X86_OPERSIZE:
+           x86_bc_insn_opersize_override(bc, (unsigned char)data[1]);
+           break;
+    }
+}
+
+void
+x86_handle_seg_prefix(bytecode *bc, unsigned long segreg)
+{
+    x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8));
+}
+
+void
+x86_handle_seg_override(effaddr *ea, unsigned long segreg)
+{
+    x86_ea_set_segment(ea, (unsigned char)(segreg>>8));
+}
+
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
     "x86",
+    {
+       x86_switch_cpu,
+       x86_check_identifier,
+       x86_directive,
+       x86_new_insn,
+       x86_handle_prefix,
+       x86_handle_seg_prefix,
+       x86_handle_seg_override,
+       x86_ea_new_expr
+    },
     {
        X86_BYTECODE_TYPE_MAX,
        x86_bc_delete,
        x86_bc_print,
        x86_bc_resolve,
        x86_bc_tobytes
-    }
+    },
+    x86_get_reg_size,
+    x86_reg_print,
+    x86_segreg_print,
+    NULL,      /* x86_ea_data_delete */
+    x86_ea_data_print
 };
index 336201b8e9928b9f6e0a24c6e985c36fcae83e28..c44c0ddca0ca1d73f0f3e109cc3051e0b6a87c56 100644 (file)
@@ -28,6 +28,31 @@ typedef enum {
 } x86_bytecode_type;
 #define X86_BYTECODE_TYPE_MAX  X86_BC_JMPREL+1
 
+/* 0-7 (low 3 bits) used for register number, stored in same data area */
+typedef enum {
+    X86_REG8 = 0x8,
+    X86_REG16 = 0x10,
+    X86_REG32 = 0x20,
+    X86_MMXREG = 0x40,
+    X86_XMMREG = 0x80,
+    X86_CRREG = 0xC0,
+    X86_DRREG = 0xC8,
+    X86_TRREG = 0xF0,
+    X86_FPUREG = 0xF8
+} x86_expritem_reg_size;
+
+typedef enum {
+    X86_LOCKREP = 1,
+    X86_ADDRSIZE,
+    X86_OPERSIZE
+} x86_parse_insn_prefix;
+
+typedef enum {
+    X86_NEAR,
+    X86_SHORT,
+    X86_FAR
+} x86_parse_targetmod;
+
 typedef enum {
     JR_NONE,
     JR_SHORT,
@@ -44,7 +69,7 @@ typedef struct x86_targetval {
 
 void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment);
 effaddr *x86_ea_new_reg(unsigned char reg);
-effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len);
 effaddr *x86_ea_new_expr(/*@keep@*/ expr *e);
 
 /*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc);
@@ -63,7 +88,7 @@ void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
  */
 typedef struct x86_new_insn_data {
     /*@keep@*/ /*@null@*/ effaddr *ea;
-    /*@keep@*/ /*@null@*/ immval *imm;
+    /*@keep@*/ /*@null@*/ expr *imm;
     unsigned char opersize;
     unsigned char op_len;
     unsigned char op[3];
@@ -90,4 +115,116 @@ bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d);
 
 extern unsigned char x86_mode_bits;
 
+typedef struct x86_effaddr_data {
+    unsigned char segment;     /* segment override, 0 if none */
+
+    /* How the spare (register) bits in Mod/RM are handled:
+     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+     * They're set in bytecode_new_insn().
+     */
+    unsigned char modrm;
+    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
+
+    unsigned char sib;
+    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
+    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
+                                  0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+    /*@null@*/ effaddr *ea;    /* effective address */
+
+    /*@null@*/ immval *imm;    /* immediate or relative value */
+
+    unsigned char opcode[3];   /* opcode */
+    unsigned char opcode_len;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    /* HACK, but a space-saving one: shift opcodes have an immediate
+     * form and a ,1 form (with no immediate).  In the parser, we
+     * set this and opcode_len=1, but store the ,1 version in the
+     * second byte of the opcode array.  We then choose between the
+     * two versions once we know the actual value of imm (because we
+     * don't know it in the parser module).
+     *
+     * A override to force the imm version should just leave this at
+     * 0.  Then later code won't know the ,1 version even exists.
+     * TODO: Figure out how this affects CPU flags processing.
+     *
+     * Call x86_SetInsnShiftFlag() to set this flag to 1.
+     */
+    unsigned char shift_op;
+
+    /* HACK, similar to that for shift_op above, for optimizing instructions
+     * that take a sign-extended imm8 as well as imm values (eg, the arith
+     * instructions and a subset of the imul instructions).
+     */
+    unsigned char signext_imm8_op;
+
+    unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+    expr *target;              /* target location */
+
+    struct {
+       unsigned char opcode[3];
+       unsigned char opcode_len;   /* 0 = no opc for this version */
+    } shortop, nearop;
+
+    /* which opcode are we using? */
+    /* The *FORCED forms are specified in the source as such */
+    x86_jmprel_opcode_sel op_sel;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(FILE *f, const bytecode *bc);
+bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
+                               resolve_label_func resolve_label);
+int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
+                  void *d, output_expr_func output_expr);
+
+int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
+                    unsigned char nosplit, unsigned char *displen,
+                    unsigned char *modrm, unsigned char *v_modrm,
+                    unsigned char *n_modrm, unsigned char *sib,
+                    unsigned char *v_sib, unsigned char *n_sib);
+
+void x86_switch_cpu(const char *cpuid);
+
+arch_check_id_retval x86_check_identifier(unsigned long data[2],
+                                         const char *id);
+
+int x86_directive(const char *name, valparamhead *valparams,
+                 /*@null@*/ valparamhead *objext_valparams,
+                 sectionhead *headp);
+
+/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2],
+                                 int num_operands,
+                                 /*@null@*/ insn_operandhead *operands);
+
+void x86_handle_prefix(bytecode *bc, const unsigned long data[4]);
+
+void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg);
+
+void x86_handle_seg_override(effaddr *ea, unsigned long segreg);
+
+unsigned int x86_get_reg_size(unsigned long reg);
+
+void x86_reg_print(FILE *f, unsigned long reg);
+
+void x86_segreg_print(FILE *f, unsigned long segreg);
+
+void x86_ea_data_print(FILE *f, const effaddr *ea);
+
 #endif
index 8cc4d4b45c2aed3ae017653608a20507f309070e..4393a0c85ed13c178ec8ee83146eaac06fb73858 100644 (file)
@@ -32,7 +32,7 @@
 #include "bytecode.h"
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 #include "bc-int.h"
 
@@ -54,11 +54,12 @@ x86_bc_new_insn(x86_new_insn_data *d)
        ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
     }
 
-    insn->imm = d->imm;
     if (d->imm) {
+       insn->imm = imm_new_expr(d->imm);
        insn->imm->len = d->im_len;
        insn->imm->sign = d->im_sign;
-    }
+    } else
+       insn->imm = NULL;
 
     insn->opcode[0] = d->op[0];
     insn->opcode[1] = d->op[1];
@@ -173,12 +174,12 @@ x86_ea_new_expr(expr *e)
 
 /*@-compmempass@*/
 effaddr *
-x86_ea_new_imm(immval *imm, unsigned char im_len)
+x86_ea_new_imm(expr *imm, unsigned char im_len)
 {
     effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
     x86_effaddr_data *ead = ea_get_data(ea);
 
-    ea->disp = imm->val;
+    ea->disp = imm;
     ea->len = im_len;
     ea->nosplit = 0;
     ead->segment = 0;
@@ -320,10 +321,8 @@ x86_bc_delete(bytecode *bc)
     switch ((x86_bytecode_type)bc->type) {
        case X86_BC_INSN:
            insn = bc_get_data(bc);
-           if (insn->ea) {
-               expr_delete(insn->ea->disp);
-               xfree(insn->ea);
-           }
+           if (insn->ea)
+               ea_delete(insn->ea);
            if (insn->imm) {
                expr_delete(insn->imm->val);
                xfree(insn->imm);
@@ -336,40 +335,38 @@ x86_bc_delete(bytecode *bc)
     }
 }
 
+void
+x86_ea_data_print(FILE *f, const effaddr *ea)
+{
+    const x86_effaddr_data *ead = ea_get_const_data(ea);
+    fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "",
+           (unsigned int)ead->segment);
+    fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "",
+           (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm,
+           (unsigned int)ead->need_modrm);
+    fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "",
+           (unsigned int)ead->sib, (unsigned int)ead->valid_sib,
+           (unsigned int)ead->need_sib);
+}
+
 void
 x86_bc_print(FILE *f, const bytecode *bc)
 {
     const x86_insn *insn;
     const x86_jmprel *jmprel;
-    x86_effaddr_data *ead;
 
     switch ((x86_bytecode_type)bc->type) {
        case X86_BC_INSN:
            insn = bc_get_const_data(bc);
            fprintf(f, "%*s_Instruction_\n", indent_level, "");
            fprintf(f, "%*sEffective Address:", indent_level, "");
-           if (!insn->ea)
-               fprintf(f, " (nil)\n");
-           else {
-               indent_level++;
-               fprintf(f, "\n%*sDisp=", indent_level, "");
-               expr_print(f, insn->ea->disp);
+           if (insn->ea) {
                fprintf(f, "\n");
-               ead = ea_get_data(insn->ea);
-               fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n",
-                       indent_level, "", (unsigned int)insn->ea->len,
-                       (unsigned int)ead->segment,
-                       (unsigned int)insn->ea->nosplit);
-               fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n",
-                       indent_level, "", (unsigned int)ead->modrm,
-                       (unsigned int)ead->valid_modrm,
-                       (unsigned int)ead->need_modrm);
-               fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n",
-                       indent_level, "", (unsigned int)ead->sib,
-                       (unsigned int)ead->valid_sib,
-                       (unsigned int)ead->need_sib);
+               indent_level++;
+               ea_print(f, insn->ea);
                indent_level--;
-           }
+           } else
+               fprintf(f, " (nil)\n");
            fprintf(f, "%*sImmediate Value:", indent_level, "");
            if (!insn->imm)
                fprintf(f, " (nil)\n");
@@ -477,8 +474,7 @@ x86_bc_resolve_insn(x86_insn *insn, unsigned long *len, int save,
        x86_effaddr_data ead_t = *ead;  /* structure copy */
        unsigned char displen = ea->len;
 
-       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
-                          (!ead->valid_modrm && ead->need_modrm))) {
+       if (ea->disp) {
            temp = expr_copy(ea->disp);
            assert(temp != NULL);
 
index d041cc42e4a74f8263eb3c8f14f88ae14ec321eb..a30f14d34b2e10d11ed3e3ec6e20abb7019f5cb1 100644 (file)
@@ -33,7 +33,7 @@
 #include "bytecode.h"
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 #include "expr-int.h"
 
@@ -48,10 +48,10 @@ x86_expr_checkea_get_reg32(ExprItem *ei, /*returned*/ void *d)
     int *ret;
 
     /* don't allow 16-bit registers */
-    if (ei->data.reg.size != 32)
+    if ((ei->data.reg & ~7) != X86_REG32)
        return 0;
 
-    ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */
+    ret = &data[ei->data.reg & 7];
 
     /* overwrite with 0 to eliminate register from displacement expr */
     ei->type = EXPR_INT;
@@ -84,10 +84,11 @@ x86_expr_checkea_get_reg16(ExprItem *ei, void *d)
     reg16[7] = &data->di;
 
     /* don't allow 32-bit registers */
-    if (ei->data.reg.size != 16)
+    if ((ei->data.reg & ~7) != X86_REG16)
        return 0;
 
-    ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
+    /* & 7 for sanity check */
+    ret = reg16[ei->data.reg & 7];
 
     /* only allow BX, SI, DI, BP */
     if (!ret)
@@ -469,7 +470,7 @@ x86_expr_checkea_getregsize_callback(ExprItem *ei, void *d)
     unsigned char *addrsize = (unsigned char *)d;
 
     if (ei->type == EXPR_REG) {
-       *addrsize = ei->data.reg.size;
+       *addrsize = (unsigned char)ei->data.reg & ~7;
        return 1;
     } else
        return 0;
@@ -757,6 +758,12 @@ x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
        return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE,
                                        havereg == HAVE_BP, displen, modrm,
                                        v_modrm);
+    } else if (!*n_modrm && !*n_sib) {
+       /* Special case for MOV MemOffs opcode: displacement but no modrm. */
+       if (*addrsize == 32)
+           *displen = 4;
+       else if (*addrsize == 16)
+           *displen = 2;
     }
     return 1;
 }
diff --git a/modules/arch/x86/x86id.re b/modules/arch/x86/x86id.re
new file mode 100644 (file)
index 0000000..46ab2db
--- /dev/null
@@ -0,0 +1,1282 @@
+/*
+ * x86 identifier recognition and instruction handling
+ *
+ *  Copyright (C) 2002  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+#include "src/arch/x86/x86arch.h"
+
+#include "expr-int.h"
+#include "bc-int.h"
+
+
+/* Available CPU feature flags */
+#define CPU_Any            (0)         /* Any old cpu will do */
+#define CPU_086            CPU_Any
+#define CPU_186            (1<<0)      /* i186 or better required */
+#define CPU_286            (1<<1)      /* i286 or better required */
+#define CPU_386            (1<<2)      /* i386 or better required */
+#define CPU_486            (1<<3)      /* i486 or better required */
+#define CPU_586            (1<<4)      /* i585 or better required */
+#define CPU_686            (1<<5)      /* i686 or better required */
+#define CPU_P3     (1<<6)      /* Pentium3 or better required */
+#define CPU_P4     (1<<7)      /* Pentium4 or better required */
+#define CPU_IA64    (1<<8)     /* IA-64 or better required */
+#define CPU_K6     (1<<9)      /* AMD K6 or better required */
+#define CPU_Athlon  (1<<10)    /* AMD Athlon or better required */
+#define CPU_Hammer  (1<<11)    /* AMD Sledgehammer or better required */
+#define CPU_FPU            (1<<12)     /* FPU support required */
+#define CPU_MMX            (1<<13)     /* MMX support required */
+#define CPU_SSE            (1<<14)     /* Streaming SIMD extensions required */
+#define CPU_SSE2    (1<<15)    /* Streaming SIMD extensions 2 required */
+#define CPU_3DNow   (1<<16)    /* 3DNow! support required */
+#define CPU_Cyrix   (1<<17)    /* Cyrix-specific instruction */
+#define CPU_AMD            (1<<18)     /* AMD-specific inst. (older than K6) */
+#define CPU_SMM            (1<<19)     /* System Management Mode instruction */
+#define CPU_Prot    (1<<20)    /* Protected mode only instruction */
+#define CPU_Undoc   (1<<21)    /* Undocumented instruction */
+#define CPU_Obs            (1<<22)     /* Obsolete instruction */
+#define CPU_Priv    (1<<23)    /* Priveleged instruction */
+
+/* What instructions/features are enabled?  Defaults to all. */
+static unsigned long cpu_enabled = ~CPU_Any;
+
+/* Opcode modifiers.  The opcode bytes are in "reverse" order because the
+ * parameters are read from the arch-specific data in LSB->MSB order.
+ * (only for asthetic reasons in the lexer code below, no practical reason).
+ */
+#define MOD_Op2Add  (1<<0)     /* Parameter adds to opcode byte 2 */
+#define MOD_Gap0    (1<<1)     /* Eats a parameter */
+#define MOD_Op1Add  (1<<2)     /* Parameter adds to opcode byte 1 */
+#define MOD_Gap1    (1<<3)     /* Eats a parameter */
+#define MOD_Op0Add  (1<<4)     /* Parameter adds to opcode byte 0 */
+#define MOD_SpAdd   (1<<5)     /* Parameter adds to "spare" value */
+#define MOD_OpSizeR (1<<6)     /* Parameter replaces opersize */
+#define MOD_Imm8    (1<<7)     /* Parameter is included as immediate byte */
+
+/* Operand types.  These are more detailed than the "general" types for all
+ * architectures, as they include the size, for instance.
+ * Bit Breakdown (from LSB to MSB):
+ *  - 4 bits = general type (must be exact match, except for =3):
+ *             0 = immediate
+ *             1 = any general purpose, MMX, XMM, or FPU register
+ *             2 = memory
+ *             3 = any general purpose, MMX, XMM, or FPU register OR memory
+ *             4 = segreg
+ *             5 = any CR register
+ *             6 = any DR register
+ *             7 = any TR register
+ *             8 = ST0
+ *             9 = AL/AX/EAX (depending on size)
+ *             A = CL/CX/ECX (depending on size)
+ *             B = CR4
+ *             C = memory offset (an EA, but with no registers allowed)
+ *                 [special case for MOV opcode]
+ *  - 3 bits = size (user-specified, or from register size):
+ *             0 = any size acceptable
+ *             1/2/3/4 = 8/16/32/64 bits (from user or reg size)
+ *             5/6 = 80/128 bits (from user)
+ *  - 1 bit = size implicit or explicit ("strictness" of size matching on
+ *            non-registers -- registers are always strictly matched):
+ *            0 = user size must exactly match size above.
+ *            1 = user size either unspecified or exactly match size above.
+ *
+ * MSBs than the above are actions: what to do with the operand if the
+ * instruction matches.  Essentially describes what part of the output bytecode
+ * gets the operand.  This may require conversion (e.g. a register going into
+ * an ea field).  Naturally, only one of each of these may be contained in the
+ * operands of a single insn_info structure.
+ *  - 3 bits = action:
+ *             0 = does nothing (operand data is discarded)
+ *             1 = operand data goes into ea field
+ *             2 = operand data goes into imm field
+ *             3 = operand data goes into "spare" field
+ *             4 = operand data is added to opcode byte 0
+ */
+#define OPT_Imm                0x0
+#define OPT_Reg                0x1
+#define OPT_Mem                0x2
+#define OPT_RM         0x3
+#define OPT_SegReg     0x4
+#define OPT_CRReg      0x5
+#define OPT_DRReg      0x6
+#define OPT_TRReg      0x7
+#define OPT_ST0                0x8
+#define OPT_Areg       0x9
+#define OPT_Creg       0xA
+#define OPT_CR4                0xB
+#define OPT_MemOffs    0xC
+#define OPT_MASK       0x000F
+
+#define OPS_Any                (0<<4)
+#define OPS_8          (1<<4)
+#define OPS_16         (2<<4)
+#define OPS_32         (3<<4)
+#define OPS_64         (4<<4)
+#define OPS_80         (5<<4)
+#define OPS_128                (6<<4)
+#define OPS_MASK       0x0070
+#define OPS_SHIFT      4
+
+#define OPS_Relaxed    (1<<7)
+#define OPS_RMASK      0x0080
+
+#define OPA_None       (0<<8)
+#define OPA_EA         (1<<8)
+#define OPA_Imm                (2<<8)
+#define OPA_Spare      (3<<8)
+#define OPA_Op0Add     (4<<8)
+#define OPA_MASK       0x0700
+
+typedef struct x86_insn_info {
+    /* The CPU feature flags needed to execute this instruction.  This is OR'ed
+     * with arch-specific data[2].  This combined value is compared with
+     * cpu_enabled to see if all bits set here are set in cpu_enabled--if so,
+     * the instruction is available on this CPU.
+     */
+    unsigned long cpu;
+
+    /* Opcode modifiers for variations of instruction.  As each modifier reads
+     * its parameter in LSB->MSB order from the arch-specific data[1] from the
+     * lexer data, and the LSB of the arch-specific data[1] is reserved for the
+     * count of insn_info structures in the instruction grouping, there can
+     * only be a maximum of 3 modifiers.
+     */
+    unsigned long modifiers;
+
+    /* Operand Size */
+    unsigned char opersize;
+
+    /* The length of the basic opcode */
+    unsigned char opcode_len;
+
+    /* The basic 1-3 byte opcode */
+    unsigned char opcode[3];
+
+    /* The 3-bit "spare" value (extended opcode) for the R/M byte field */
+    unsigned char spare;
+
+    /* The number of operands this form of the instruction takes */
+    unsigned char num_operands;
+
+    /* The types of each operand, see above */
+    unsigned int operands[3];
+} x86_insn_info;
+
+/* Define lexer arch-specific data with 0-3 modifiers. */
+#define DEF_INSN_DATA(group, mod, cpu) do { \
+    data[0] = (unsigned long)group##_insn; \
+    data[1] = ((mod)<<8) | \
+             ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \
+    data[2] = cpu; \
+    } while (0)
+
+#define RET_INSN(group, mod, cpu)      do { \
+    DEF_INSN_DATA(group, mod, cpu); \
+    return ARCH_CHECK_ID_INSN; \
+    } while (0)
+
+/*
+ * General instruction groupings
+ */
+
+/* One byte opcode instructions with no operands */
+static const x86_insn_info onebyte_insn[] = {
+    { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Two byte opcode instructions with no operands */
+static const x86_insn_info twobyte_insn[] = {
+    { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Three byte opcode instructions with no operands */
+static const x86_insn_info threebyte_insn[] = {
+    { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0,
+      {0, 0, 0} }
+};
+
+/* One byte opcode instructions with general memory operand */
+static const x86_insn_info onebytemem_insn[] = {
+    { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Two byte opcode instructions with general memory operand */
+static const x86_insn_info twobytemem_insn[] = {
+    { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Move instructions */
+static const x86_insn_info mov_insn[] = {
+    { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} },
+    { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} },
+    { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} },
+    { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
+    { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
+    { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
+    { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+    /* TODO: segreg here */
+    { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+    /* Need two sets here, one for strictness on left side, one for right. */
+    { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
+    { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+      {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+      {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2,
+      {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} }
+};
+
+/* Move with sign/zero extend */
+static const x86_insn_info movszx_insn[] = {
+    { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
+    { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} }
+};
+
+
+bytecode *
+x86_new_insn(const unsigned long data[4], int num_operands,
+            insn_operandhead *operands)
+{
+    x86_new_insn_data d;
+    int num_info = (int)(data[1]&0xFF);
+    x86_insn_info *info = (x86_insn_info *)data[0];
+    unsigned long mod_data = data[1] >> 8;
+    int found = 0;
+    insn_operand *op;
+    int i;
+    static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0};
+
+    /* Just do a simple linear search through the info array for a match.
+     * First match wins.
+     */
+    for (; num_info>0 && !found; num_info--, info++) {
+       unsigned long cpu;
+       unsigned int size;
+       int mismatch = 0;
+
+       /* Match CPU */
+       cpu = info->cpu | data[2];
+       if ((cpu_enabled & cpu) != cpu)
+           continue;
+
+       /* Match # of operands */
+       if (num_operands != info->num_operands)
+           continue;
+
+       if (!operands) {
+           found = 1;      /* no operands -> must have a match here. */
+           break;
+       }
+
+       /* Match each operand type and size */
+       for(i = 0, op = ops_first(operands); op && i<info->num_operands &&
+           !mismatch; op = ops_next(op), i++) {
+           /* Check operand type */
+           switch (info->operands[i] & OPT_MASK) {
+               case OPT_Imm:
+                   if (op->type != INSN_OPERAND_IMM)
+                       mismatch = 1;
+                   break;
+               case OPT_Reg:
+                   if (op->type != INSN_OPERAND_REG)
+                       mismatch = 1;
+                   else {
+                       size = op->data.reg & ~7;
+                       if (size == X86_CRREG || size == X86_DRREG ||
+                           size == X86_TRREG)
+                           mismatch = 1;
+                   }
+                   break;
+               case OPT_Mem:
+                   if (op->type != INSN_OPERAND_MEMORY)
+                       mismatch = 1;
+                   break;
+               case OPT_RM:
+                   if (op->type != INSN_OPERAND_REG &&
+                       op->type != INSN_OPERAND_MEMORY)
+                       mismatch = 1;
+                   break;
+               case OPT_SegReg:
+                   if (op->type != INSN_OPERAND_SEGREG)
+                       mismatch = 1;
+                   break;
+               case OPT_CRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_CRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_DRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_DRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_TRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_TRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_ST0:
+                   if (op->type != INSN_OPERAND_REG ||
+                       op->data.reg != X86_FPUREG)
+                       mismatch = 1;
+                   break;
+               case OPT_Areg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       ((info->operands[i] & OPS_MASK) == OPS_8 &&
+                        op->data.reg != (X86_REG8 | 0)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_16 &&
+                        op->data.reg != (X86_REG16 | 0)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_32 &&
+                        op->data.reg != (X86_REG32 | 0)))
+                       mismatch = 1;
+                   break;
+               case OPT_Creg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       ((info->operands[i] & OPS_MASK) == OPS_8 &&
+                        op->data.reg != (X86_REG8 | 1)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_16 &&
+                        op->data.reg != (X86_REG16 | 1)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_32 &&
+                        op->data.reg != (X86_REG32 | 1)))
+                       mismatch = 1;
+                   break;
+               case OPT_CR4:
+                   if (op->type != INSN_OPERAND_REG ||
+                       op->data.reg != (X86_CRREG | 4))
+                       mismatch = 1;
+                   break;
+               case OPT_MemOffs:
+                   if (op->type != INSN_OPERAND_MEMORY ||
+                       expr_contains(ea_get_disp(op->data.ea), EXPR_REG))
+                       mismatch = 1;
+                   break;
+               default:
+                   InternalError(_("invalid operand type"));
+           }
+
+           if (mismatch)
+               break;
+
+           /* Check operand size */
+           size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT];
+           if (op->type == INSN_OPERAND_REG && op->size == 0) {
+               /* Register size must exactly match */
+               if (x86_get_reg_size(op->data.reg) != size)
+                   mismatch = 1;
+           } else {
+               if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) {
+                   /* Relaxed checking */
+                   if (size != 0 && op->size != size && op->size != 0)
+                       mismatch = 1;
+               } else {
+                   /* Strict checking */
+                   if (op->size != size)
+                       mismatch = 1;
+               }
+           }
+       }
+
+       if (!mismatch) {
+           found = 1;
+           break;
+       }
+    }
+
+    if (!found) {
+       /* Didn't find a matching one */
+       /* FIXME: This needs to be more descriptive of certain reasons for a
+        * mismatch.  E.g.:
+        *  "mismatch in operand sizes"
+        *  "operand size not specified"
+        * etc.  This will probably require adding dummy error catchers in the
+        * insn list which are only looked at if we get here.
+        */
+       Error(_("invalid combination of opcode and operands"));
+       return NULL;
+    }
+
+    /* Copy what we can from info */
+    d.ea = NULL;
+    d.imm = NULL;
+    d.opersize = info->opersize;
+    d.op_len = info->opcode_len;
+    d.op[0] = info->opcode[0];
+    d.op[1] = info->opcode[1];
+    d.op[2] = info->opcode[2];
+    d.spare = info->spare;
+    d.im_len = 0;
+    d.im_sign = 0;
+
+    /* Apply modifiers */
+    if (info->modifiers & MOD_Op2Add) {
+       d.op[2] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Gap0)
+       mod_data >>= 8;
+    if (info->modifiers & MOD_Op1Add) {
+       d.op[1] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Gap1)
+       mod_data >>= 8;
+    if (info->modifiers & MOD_Op0Add) {
+       d.op[0] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_SpAdd) {
+       d.spare += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_OpSizeR) {
+       d.opersize = (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Imm8) {
+       d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF)));
+       d.im_len = 1;
+       /*mod_data >>= 8;*/
+    }
+
+    /* Go through operands and assign */
+    if (operands) {
+       for(i = 0, op = ops_first(operands); op && i<info->num_operands;
+           op = ops_next(op), i++) {
+           switch (info->operands[i] & OPA_MASK) {
+               case OPA_None:
+                   /* Throw away the operand contents */
+                   switch (op->type) {
+                       case INSN_OPERAND_REG:
+                       case INSN_OPERAND_SEGREG:
+                           break;
+                       case INSN_OPERAND_MEMORY:
+                           ea_delete(op->data.ea);
+                           break;
+                       case INSN_OPERAND_IMM:
+                           expr_delete(op->data.val);
+                           break;
+                   }
+                   break;
+               case OPA_EA:
+                   switch (op->type) {
+                       case INSN_OPERAND_REG:
+                           d.ea = x86_ea_new_reg((unsigned char)op->data.reg);
+                           break;
+                       case INSN_OPERAND_SEGREG:
+                           InternalError(_("invalid operand conversion"));
+                       case INSN_OPERAND_MEMORY:
+                           d.ea = op->data.ea;
+                           if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) {
+                               /* Special-case for MOV MemOffs instruction */
+                               x86_effaddr_data *ead = ea_get_data(d.ea);
+                               ead->valid_modrm = 0;
+                               ead->need_modrm = 0;
+                               ead->valid_sib = 0;
+                               ead->need_sib = 0;
+                           }
+                           break;
+                       case INSN_OPERAND_IMM:
+                           d.ea = x86_ea_new_imm(op->data.val,
+                               size_lookup[(info->operands[i] &
+                                            OPS_MASK)>>OPS_SHIFT]);
+                           break;
+                   }
+                   break;
+               case OPA_Imm:
+                   if (op->type == INSN_OPERAND_IMM) {
+                       d.imm = op->data.val;
+                       d.im_len = size_lookup[(info->operands[i] &
+                                               OPS_MASK)>>OPS_SHIFT];
+                   } else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               case OPA_Spare:
+                   if (op->type == INSN_OPERAND_REG ||
+                       op->type == INSN_OPERAND_SEGREG)
+                       d.spare = (unsigned char)(op->data.reg&7);
+                   else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               case OPA_Op0Add:
+                   if (op->type == INSN_OPERAND_REG)
+                       d.op[0] += (unsigned char)(op->data.reg&7);
+                   else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               default:
+                   InternalError(_("unknown operand action"));
+           }
+       }
+    }
+
+    /* Create the bytecode and return it */
+    return x86_bc_new_insn(&d);
+}
+
+
+#define YYCTYPE                char
+#define YYCURSOR       id
+#define YYLIMIT                id
+#define YYMARKER       marker
+#define YYFILL(n)
+
+/*!re2c
+  any = [\000-\377];
+  A = [aA];
+  B = [bB];
+  C = [cC];
+  D = [dD];
+  E = [eE];
+  F = [fF];
+  G = [gG];
+  H = [hH];
+  I = [iI];
+  J = [jJ];
+  K = [kK];
+  L = [lL];
+  M = [mM];
+  N = [nN];
+  O = [oO];
+  P = [pP];
+  Q = [qQ];
+  R = [rR];
+  S = [sS];
+  T = [tT];
+  U = [uU];
+  V = [vV];
+  W = [wW];
+  X = [xX];
+  Y = [yY];
+  Z = [zZ];
+*/
+
+void
+x86_switch_cpu(const char *id)
+{
+    const char *marker;
+
+    /*!re2c
+       /* The standard CPU names /set/ cpu_enabled. */
+       "8086" {
+           cpu_enabled = CPU_Priv;
+           return;
+       }
+       ("80" | I)? "186" {
+           cpu_enabled = CPU_186|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "286" {
+           cpu_enabled = CPU_186|CPU_286|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "386" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "486" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM|
+                         CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I? "586") | (P E N T I U M) | (P "5") {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (P "2") | (P E N T I U M "-"? ("2" | (I I))) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot|
+                         CPU_Priv;
+           return;
+       }
+       (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I A "-"? "64") | (I T A N I U M) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE|
+                         CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       K "6" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot|
+                         CPU_Priv;
+           return;
+       }
+       A T H L O N {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (S L E D G E)? (H A M M E R) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|
+                         CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+
+       /* Features have "no" versions to disable them, and only set/reset the
+        * specific feature being changed.  All other bits are left alone.
+        */
+       F P U           { cpu_enabled |= CPU_FPU; return; }
+       N O F P U       { cpu_enabled &= ~CPU_FPU; return; }
+       M M X           { cpu_enabled |= CPU_MMX; return; }
+       N O M M X       { cpu_enabled &= ~CPU_MMX; return; }
+       S S E           { cpu_enabled |= CPU_SSE; return; }
+       N O S S E       { cpu_enabled &= ~CPU_SSE; return; }
+       S S E "2"       { cpu_enabled |= CPU_SSE2; return; }
+       N O S S E "2"   { cpu_enabled &= ~CPU_SSE2; return; }
+       "3" D N O W     { cpu_enabled |= CPU_3DNow; return; }
+       N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; }
+       C Y R I X       { cpu_enabled |= CPU_Cyrix; return; }
+       N O C Y R I X   { cpu_enabled &= ~CPU_Cyrix; return; }
+       A M D           { cpu_enabled |= CPU_AMD; return; }
+       N O A M D       { cpu_enabled &= ~CPU_AMD; return; }
+       S M M           { cpu_enabled |= CPU_SMM; return; }
+       N O S M M       { cpu_enabled &= ~CPU_SMM; return; }
+       P R O T         { cpu_enabled |= CPU_Prot; return; }
+       N O P R O T     { cpu_enabled &= ~CPU_Prot; return; }
+       U N D O C       { cpu_enabled |= CPU_Undoc; return; }
+       N O U N D O C   { cpu_enabled &= ~CPU_Undoc; return; }
+       O B S           { cpu_enabled |= CPU_Obs; return; }
+       N O O B S       { cpu_enabled &= ~CPU_Obs; return; }
+       P R I V         { cpu_enabled |= CPU_Priv; return; }
+       N O P R I V     { cpu_enabled &= ~CPU_Priv; return; }
+
+       /* catchalls */
+       [A-Za-z0-9]+    {
+           Warning(_("unrecognized CPU identifier `%s'"), id);
+           return;
+       }
+       any             {
+           Warning(_("unrecognized CPU identifier `%s'"), id);
+           return;
+       }
+    */
+}
+
+arch_check_id_retval
+x86_check_identifier(unsigned long data[4], const char *id)
+{
+    const char *oid = id;
+    const char *marker;
+    /*!re2c
+       /* target modifiers */
+       N E A R         {
+           data[0] = X86_NEAR;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+       S H O R T       {
+           data[0] = X86_SHORT;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+       F A R           {
+           data[0] = X86_FAR;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+
+       /* operand size overrides */
+       O "16"  {
+           data[0] = X86_OPERSIZE;
+           data[1] = 16;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       O "32"  {
+           data[0] = X86_OPERSIZE;
+           data[1] = 32;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       /* address size overrides */
+       A "16"  {
+           data[0] = X86_ADDRSIZE;
+           data[1] = 16;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       A "32"  {
+           data[0] = X86_ADDRSIZE;
+           data[1] = 32;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+
+       /* instruction prefixes */
+       L O C K         {
+           data[0] = X86_LOCKREP; 
+           data[1] = 0xF0;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P N E       {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF2;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P N Z       {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF2;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P           {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF3;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P E         {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF4;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P Z         {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF4;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+
+       /* control, debug, and test registers */
+       C R [02-4]      {
+           data[0] = X86_CRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       D R [0-7]       {
+           data[0] = X86_DRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       T R [0-7]       {
+           data[0] = X86_TRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+
+       /* floating point, MMX, and SSE/SSE2 registers */
+       S T [0-7]       {
+           data[0] = X86_FPUREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       M M [0-7]       {
+           data[0] = X86_MMXREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       X M M [0-7]     {
+           data[0] = X86_XMMREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+
+       /* integer registers */
+       E A X   { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; }
+       E C X   { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; }
+       E D X   { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; }
+       E B X   { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; }
+       E S P   { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; }
+       E B P   { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; }
+       E S I   { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; }
+       E D I   { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; }
+
+       A X     { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; }
+       C X     { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; }
+       D X     { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; }
+       B X     { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; }
+       S P     { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; }
+       B P     { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; }
+       S I     { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; }
+       D I     { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; }
+
+       A L     { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; }
+       C L     { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; }
+       D L     { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; }
+       B L     { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; }
+       A H     { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; }
+       C H     { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; }
+       D H     { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; }
+       B H     { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; }
+
+       /* segment registers */
+       E S     { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; }
+       C S     { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; }
+       S S     { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; }
+       D S     { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; }
+       F S     { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; }
+       G S     { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; }
+
+       /* instructions */
+
+       /* Move */
+       M O V { RET_INSN(mov, 0, CPU_Any); }
+       /* Move with sign/zero extend */
+       M O V S X { RET_INSN(movszx, 0xBE, CPU_386); }
+       M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); }
+       /* Push instructions */
+       /* P U S H */
+       P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); }
+       P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); }
+       P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); }
+       /* Pop instructions */
+       /* P O P */
+       P O P A { RET_INSN(onebyte, 0x0061, CPU_186); }
+       P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); }
+       P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); }
+       /* Exchange */
+       /* X C H G */
+       /* In/out from ports */
+       /* I N */
+       /* O U T */
+       /* Load effective address */
+       /* L E A */
+       /* Load segment registers from memory */
+       /* L D S */
+       /* L E S */
+       /* L F S */
+       /* L G S */
+       /* L S S */
+       /* Flags register instructions */
+       C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); }
+       C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); }
+       C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); }
+       C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); }
+       C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); }
+       L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); }
+       S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); }
+       P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); }
+       P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); }
+       P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); }
+       P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); }
+       P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); }
+       P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); }
+       S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); }
+       S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); }
+       S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); }
+       /* Arithmetic */
+       /* A D D */
+       /* I N C */
+       /* S U B */
+       /* D E C */
+       /* S B B */
+       /* C M P */
+       /* T E S T */
+       /* A N D */
+       /* O R */
+       /* X O R */
+       /* A D C */
+       /* N E G */
+       /* N O T */
+       A A A { RET_INSN(onebyte, 0x0037, CPU_Any); }
+       A A S { RET_INSN(onebyte, 0x003F, CPU_Any); }
+       D A A { RET_INSN(onebyte, 0x0027, CPU_Any); }
+       D A S { RET_INSN(onebyte, 0x002F, CPU_Any); }
+       /* A A D */
+       /* A A M */
+       /* Conversion instructions */
+       C B W { RET_INSN(onebyte, 0x1098, CPU_Any); }
+       C W D E { RET_INSN(onebyte, 0x2098, CPU_386); }
+       C W D { RET_INSN(onebyte, 0x1099, CPU_Any); }
+       C D Q { RET_INSN(onebyte, 0x2099, CPU_386); }
+       /* Multiplication and division */
+       /* M U L */
+       /* I M U L */
+       /* D I V */
+       /* I D I V */
+       /* Shifts */
+       /* R O L */
+       /* R O R */
+       /* R C L */
+       /* R C R */
+       /* S A L */
+       /* S H L */
+       /* S H R */
+       /* S A R */
+       /* S H L D */
+       /* S H R D */
+       /* Control transfer instructions (unconditional) */
+       /* C A L L */
+       /* J M P */
+       R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); }
+       /* R E T N */
+       /* R E T F */
+       /* E N T E R */
+       L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); }
+       /* Conditional jumps */
+       /* J O */
+       /* J N O */
+       /* J B */
+       /* JC */
+       /* J N A E */
+       /* J N B */
+       /* J N C */
+       /* J A E */
+       /* J E */
+       /* J Z */
+       /* J N E */
+       /* J N Z */
+       /* J B E */
+       /* J N A */
+       /* J N B E */
+       /* J A */
+       /* J S */
+       /* J N S */
+       /* J P */
+       /* J P E */
+       /* J N P */
+       /* J P O */
+       /* J L */
+       /* J N G E */
+       /* J N L */
+       /* J G E */
+       /* J L E */
+       /* J N G */
+       /* J N L E */
+       /* J G */
+       /* J C X Z */
+       /* J E C X Z */
+       /* Loop instructions */
+       /* L O O P */
+       /* L O O P Z */
+       /* L O O P E */
+       /* L O O P N Z */
+       /* L O O P N E */
+       /* Set byte on flag instructions */
+       /* S E T O */
+       /* S E T N O */
+       /* S E T B */
+       /* S E T C */
+       /* S E T N A E */
+       /* S E T N B */
+       /* S E T N C */
+       /* S E T A E */
+       /* S E T E */
+       /* S E T Z */
+       /* S E T N E */
+       /* S E T N Z */
+       /* S E T B E */
+       /* S E T N A */
+       /* S E T N B E */
+       /* S E T A */
+       /* S E T S */
+       /* S E T N S */
+       /* S E T P */
+       /* S E T P E */
+       /* S E T N P */
+       /* S E T P O */
+       /* S E T L */
+       /* S E T N G E */
+       /* S E T N L */
+       /* S E T G E */
+       /* S E T L E */
+       /* S E T N G */
+       /* S E T N L E */
+       /* S E T G */
+       /* String instructions. */
+       C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); }
+       C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); }
+       /* C M P S D */
+       I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); }
+       I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); }
+       I N S D { RET_INSN(onebyte, 0x206D, CPU_386); }
+       O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); }
+       O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); }
+       O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); }
+       L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); }
+       L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); }
+       L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); }
+       M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); }
+       M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); }
+       /* M O V S D */
+       S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); }
+       S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); }
+       S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); }
+       S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); }
+       S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); }
+       S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); }
+       X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); }
+       /* Bit manipulation */
+       /* B S F */
+       /* B S R */
+       /* B T */
+       /* B T C */
+       /* B T R */
+       /* B T S */
+       /* Interrupts and operating system instructions */
+       /* I N T */
+       I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+       I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+       I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); }
+       I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); }
+       I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); }
+       I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); }
+       R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); }
+       /* B O U N D */
+       H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); }
+       N O P { RET_INSN(onebyte, 0x0090, CPU_Any); }
+       /* Protection control */
+       /* A R P L */
+       /* L A R */
+       L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); }
+       L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); }
+       /* L L D T */
+       /* L M S W */
+       /* L S L */
+       /* L T R */
+       S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); }
+       S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); }
+       /* S L D T */
+       /* S M S W */
+       /* S T R */
+       /* V E R R */
+       /* V E R W */
+       /* Floating point instructions */
+       /* F L D */
+       /* F I L D */
+       /* F B L D */
+       /* F S T */
+       /* F I S T */
+       /* F S T P */
+       /* F I S T P */
+       /* F B S T P */
+       /* F X C H */
+       /* F C O M */
+       /* F I C O M */
+       /* F C O M P */
+       /* F I C O M P */
+       F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); }
+       /* F U C O M */
+       /* F U C O M P */
+       F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); }
+       F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); }
+       F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); }
+       F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); }
+       F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); }
+       F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); }
+       F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); }
+       F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); }
+       F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); }
+       F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); }
+       /* F A D D */
+       /* F A D D P */
+       /* F I A D D */
+       /* F S U B */
+       /* F I S U B */
+       /* F S U B P */
+       /* F S U B R */
+       /* F I S U B R */
+       /* F S U B R P */
+       /* F M U L */
+       /* F I M U L */
+       /* F M U L P */
+       /* F D I V */
+       /* F I D I V */
+       /* F D I V P */
+       /* F D I V R */
+       /* F I D I V R */
+       /* F D I V R P */
+       F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); }
+       F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); }
+       F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); }
+       F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); }
+       F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); }
+       F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); }
+       F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); }
+       F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); }
+       F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); }
+       F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); }
+       F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); }
+       F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); }
+       F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); }
+       F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); }
+       F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); }
+       F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); }
+       F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); }
+       F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); }
+       F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); }
+       F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); }
+       /* F L D C W */
+       /* F N S T C W */
+       /* F S T C W */
+       /* F N S T S W */
+       /* F S T S W */
+       F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); }
+       F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); }
+       F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); }
+       F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); }
+       F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); }
+       F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); }
+       F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); }
+       F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); }
+       /* F F R E E */
+       /* F F R E E P */
+       F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); }
+       F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); }
+       /* Prefixes (should the others be here too? should wait be a prefix? */
+       W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); }
+       /* 486 extensions */
+       /* B S W A P */
+       /* X A D D */
+       /* C M P X C H G */
+       /* C M P X C H G 4 8 6 */
+       I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); }
+       W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); }
+       I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); }
+       /* 586+ and late 486 extensions */
+       C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); }
+       /* Pentium extensions */
+       W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); }
+       R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); }
+       R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); }
+       /* C M P X C H G 8 B */
+       /* Pentium II/Pentium Pro extensions */
+       S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); }
+       S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); }
+       F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); }
+       F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); }
+       R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); }
+       U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); }
+       U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); }
+       /* C M O V */
+       /* F C M O V */
+       /* F C O M I */
+       /* F U C O M I */
+       /* F C O M I P */
+       /* F U C O M I P */
+       /* Pentium4 extensions */
+       /* M O V N T I */
+       /* C L F L U S H */
+       L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); }
+       M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); }
+       P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); }
+       /* MMX/SSE2 instructions */
+       E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); }
+       /* PIII (Katmai) new instructions / SIMD instructions */
+       /* ... */
+       P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); }
+       P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); }
+       P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); }
+       P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); }
+       /* ... */
+       S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); }
+       /* ... */
+       /* SSE2 instructions */
+       /* AMD 3DNow! instructions */
+       P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+       P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+       F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); }
+       /* ... */
+       /* AMD extensions */
+       S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); }
+       S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); }
+       /* Cyrix MMX instructions */
+       /* Cyrix extensions */
+       R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); }
+       /* R S D C */
+       /* R S L D T */
+       /* R S T S */
+       /* S V D C */
+       /* S V L D T */
+       /* S V T S */
+       S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); }
+       S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); }
+       W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); }
+       /* Obsolete/undocumented instructions */
+       F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); }
+       /* I B T S */
+       L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); }
+       L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); }
+       S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); }
+       S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); }
+       /* U M O V */
+       /* X B T S */
+
+
+       /* catchalls */
+       [A-Za-z0-9]+    {
+           return ARCH_CHECK_ID_NONE;
+       }
+       any     {
+           return ARCH_CHECK_ID_NONE;
+       }
+    */
+}
index dbe7010d0be51ed53f7f67456321381a559cb7ff..b2a242821a3311f63cc4781d9ee5acae17ce285b 100644 (file)
@@ -3,57 +3,22 @@
 YASMPARSERFILES += \
        src/parsers/nasm/nasm-parser.c          \
        src/parsers/nasm/nasm-defs.h            \
-       nasm-bison.y                            \
+       src/parsers/nasm/nasm-bison.y           \
        nasm-bison.h                            \
-       nasm-token.l
-
-if DEV
-
-nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl
-       $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y
-
-else
-
-nasm-token.l: $(srcdir)/nasm-token.l
-       @echo Warning: Not generating nasm-token.l from nasm-token.l.in.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-token.l .
-nasm-token.c: $(srcdir)/nasm-token.c
-       @echo Warning: Not generating nasm-token.c from nasm-token.l.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-token.c .
-nasm-bison.y: $(srcdir)/nasm-bison.y
-       @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.y .
-nasm-bison.c: $(srcdir)/nasm-bison.c
-       @echo Warning: Not generating nasm-bison.c from nasm-bison.y.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.c .
-nasm-bison.h: $(srcdir)/nasm-bison.h
-       @echo Warning: Not generating nasm-bison.h from nasm-bison.y.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.h .
-
-endif
+       nasm-token.c
 
-noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl
+nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+       re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
 
 BUILT_SOURCES += \
-       nasm-bison.y                            \
        nasm-bison.c                            \
        nasm-bison.h                            \
-       nasm-token.l                            \
        nasm-token.c
 
 CLEANFILES += \
-       nasm-bison.y                            \
        nasm-bison.c                            \
        nasm-bison.h                            \
-       nasm-token.l                            \
        nasm-token.c
 
 EXTRA_DIST += \
-       src/parsers/nasm/token.l.in             \
-       src/parsers/nasm/bison.y.in             \
-       src/parsers/nasm/gen_instr.pl
+       src/parsers/nasm/nasm-token.re
diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in
deleted file mode 100644 (file)
index b3f0533..0000000
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * NASM-compatible bison parser
- *
- *  Copyright (C) 2001  Peter Johnson, Michael Urman
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#ifdef STDC_HEADERS
-# include <math.h>
-#endif
-
-#include "bitvect.h"
-
-#include "globals.h"
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-#include "section.h"
-#include "objfmt.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-
-void init_table(void);
-extern int nasm_parser_lex(void);
-void nasm_parser_error(const char *);
-static void nasm_parser_directive(const char *name,
-                                 valparamhead *valparams,
-                                 /*@null@*/ valparamhead *objext_valparams);
-
-extern objfmt *nasm_parser_objfmt;
-extern sectionhead nasm_parser_sections;
-extern section *nasm_parser_cur_section;
-extern char *nasm_parser_locallabel_base;
-
-static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
-static bytecode *nasm_parser_temp_bc;
-
-/* additional data declarations (dynamically generated) */
-/* @DATADECLS@ */
-
-/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/
-%}
-
-%union {
-    unsigned int int_info;
-    char *str_val;
-    intnum *intn;
-    floatnum *flt;
-    symrec *sym;
-    unsigned char groupdata[5];
-    effaddr *ea;
-    expr *exp;
-    immval *im_val;
-    x86_targetval tgt_val;
-    datavalhead datahead;
-    dataval *data;
-    bytecode *bc;
-    valparamhead dir_valparams;
-    valparam *dir_valparam;
-}
-
-%token <intn> INTNUM
-%token <flt> FLTNUM
-%token <str_val> DIRECTIVE_NAME STRING FILENAME
-%token <int_info> BYTE WORD DWORD QWORD TWORD DQWORD
-%token <int_info> DECLARE_DATA
-%token <int_info> RESERVE_SPACE
-%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
-%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
-%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
-%token <str_val> ID LOCAL_ID SPECIAL_ID
-%token LINE
-
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
-%type <sym> explabel
-%type <str_val> label_id
-%type <tgt_val> target
-%type <data> dataval
-%type <datahead> datavals
-%type <dir_valparams> directive_valparams
-%type <dir_valparam> directive_valparam
-
-%left '|'
-%left '^'
-%left '&'
-%left LEFT_OP RIGHT_OP
-%left '-' '+'
-%left '*' '/' SIGNDIV '%' SIGNMOD
-%nonassoc UNARYOP
-
-%%
-input: /* empty */
-    | input line    {
-       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
-                                              $2);
-       if (nasm_parser_temp_bc)
-           nasm_parser_prev_bc = nasm_parser_temp_bc;
-       line_index++;
-    }
-;
-
-line: '\n'             { $$ = (bytecode *)NULL; }
-    | lineexp '\n'
-    | LINE INTNUM '+' INTNUM FILENAME '\n' {
-       /* %line indicates the line number of the *next* line, so subtract out
-        * the increment when setting the line number.
-        */
-       line_set($5, intnum_get_uint($2)-intnum_get_uint($4),
-                intnum_get_uint($4));
-       intnum_delete($2);
-       intnum_delete($4);
-       xfree($5);
-       $$ = (bytecode *)NULL;
-    }
-    | directive '\n'   { $$ = (bytecode *)NULL; }
-    | error '\n'       {
-       Error(_("label or instruction expected at start of line"));
-       $$ = (bytecode *)NULL;
-       yyerrok;
-    }
-;
-
-lineexp: exp
-    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
-    | label                            { $$ = (bytecode *)NULL; }
-    | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
-    | label_id EQU expr                        {
-       symrec_define_equ($1, $3);
-       xfree($1);
-       $$ = (bytecode *)NULL;
-    }
-;
-
-exp: instr
-    | DECLARE_DATA datavals            { $$ = bc_new_data(&$2, $1); }
-    | RESERVE_SPACE expr               { $$ = bc_new_reserve($2, $1); }
-    | INCBIN STRING                    { $$ = bc_new_incbin($2, NULL, NULL); }
-    | INCBIN STRING ',' expr           { $$ = bc_new_incbin($2, $4, NULL); }
-    | INCBIN STRING ',' expr ',' expr  { $$ = bc_new_incbin($2, $4, $6); }
-;
-
-datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
-    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
-;
-
-dataval: expr_no_string        { $$ = dv_new_expr($1); }
-    | STRING           { $$ = dv_new_string($1); }
-    | error            {
-       Error(_("expression syntax error"));
-       $$ = (dataval *)NULL;
-    }
-;
-
-label: label_id            {
-       symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
-                           1);
-       xfree($1);
-    }
-    | label_id ':'  {
-       symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
-                           1);
-       xfree($1);
-    }
-;
-
-label_id: ID       {
-       $$ = $1;
-       if (nasm_parser_locallabel_base)
-           xfree(nasm_parser_locallabel_base);
-       nasm_parser_locallabel_base = xstrdup($1);
-    }
-    | SPECIAL_ID
-    | LOCAL_ID
-;
-
-/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']'        {
-       xfree($2);
-    }
-    | '[' DIRECTIVE_NAME error ']'             {
-       Error(_("invalid arguments to [%s]"), $2);
-       xfree($2);
-    }
-;
-
-    /* $<str_val>0 is the DIRECTIVE_NAME */
-    /* After : is (optional) object-format specific extension */
-directive_val: directive_valparams {
-       nasm_parser_directive($<str_val>0, &$1, NULL);
-    }
-    | directive_valparams ':' directive_valparams {
-       nasm_parser_directive($<str_val>0, &$1, &$3);
-    }
-;
-
-directive_valparams: directive_valparam                {
-       vps_initialize(&$$);
-       vps_append(&$$, $1);
-    }
-    | directive_valparams directive_valparam   {
-       vps_append(&$1, $2);
-       $$ = $1;
-    }
-;
-
-directive_valparam: direxpr    {
-       /* If direxpr is just an ID, put it in val and delete the expr */
-       const /*@null@*/ symrec *vp_symrec;
-       if ((vp_symrec = expr_get_symrec(&$1, 0))) {
-           vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL);
-           expr_delete($1);
-       } else
-           vp_new($$, NULL, $1);
-    }
-    | ID '=' direxpr           { vp_new($$, $1, $3); }
-;
-
-/* register groupings */
-fpureg: ST0
-    | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
-    | DWORD reg_eax    { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
-    | DWORD reg_ecx    { $$ = $2; }
-;
-
-rawreg32: REG_EAX
-    | REG_ECX
-    | REG_EDX
-    | REG_EBX
-    | REG_ESP
-    | REG_EBP
-    | REG_ESI
-    | REG_EDI
-;
-
-reg32: rawreg32
-    | DWORD reg32      { $$ = $2; }
-;
-
-reg_ax: REG_AX
-    | WORD reg_ax      { $$ = $2; }
-;
-
-reg_cx: REG_CX
-    | WORD reg_cx      { $$ = $2; }
-;
-
-reg_dx: REG_DX
-    | WORD reg_dx      { $$ = $2; }
-;
-
-rawreg16: REG_AX
-    | REG_CX
-    | REG_DX
-    | REG_BX
-    | REG_SP
-    | REG_BP
-    | REG_SI
-    | REG_DI
-;
-
-reg16: rawreg16
-    | WORD reg16       { $$ = $2; }
-;
-
-reg_al: REG_AL
-    | BYTE reg_al      { $$ = $2; }
-;
-
-reg_cl: REG_CL
-    | BYTE reg_cl      { $$ = $2; }
-;
-
-reg8: REG_AL
-    | REG_CL
-    | REG_DL
-    | REG_BL
-    | REG_AH
-    | REG_CH
-    | REG_DH
-    | REG_BH
-    | BYTE reg8                { $$ = $2; }
-;
-
-reg_es: REG_ES
-    | WORD reg_es      { $$ = $2; }
-;
-
-reg_ss: REG_SS
-    | WORD reg_ss      { $$ = $2; }
-;
-
-reg_ds: REG_DS
-    | WORD reg_ds      { $$ = $2; }
-;
-
-reg_fs: REG_FS
-    | WORD reg_fs      { $$ = $2; }
-;
-
-reg_gs: REG_GS
-    | WORD reg_gs      { $$ = $2; }
-;
-
-reg_cs: REG_CS
-    | WORD reg_cs      { $$ = $2; }
-;
-
-segreg: REG_ES
-    | REG_SS
-    | REG_DS
-    | REG_FS
-    | REG_GS
-    | REG_CS
-    | WORD segreg      { $$ = $2; }
-;
-
-/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated?  This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg.  I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | rawreg16                 { $$ = expr_new_ident(ExprReg($1, 16)); }
-    | rawreg32                 { $$ = expr_new_ident(ExprReg($1, 32)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| memexpr '||' memexpr   { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | memexpr '|' memexpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | memexpr '^' memexpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' memexpr      { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | memexpr '&' memexpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| memexpr '==' memexpr   { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| memexpr '>' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '<' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '>=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '<=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '!=' memexpr   { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | memexpr LEFT_OP memexpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | memexpr '+' memexpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | memexpr '-' memexpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | memexpr '*' memexpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | memexpr '/' memexpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | memexpr SIGNDIV memexpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | memexpr '%' memexpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | memexpr SIGNMOD memexpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' memexpr %prec UNARYOP        { $$ = $2; }
-    | '-' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' memexpr            { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' memexpr ')'          { $$ = $2; }
-    | STRING                   {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-    | error                    { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr           {
-       $$ = x86_ea_new_expr($1);
-       x86_ea_set_segment($$, 0);
-    }
-    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
-    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
-    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
-;
-
-mem: '[' memaddr ']'   { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem                { $$ = $2; }
-    | BYTE mem8x       { $$ = $2; }
-;
-mem16x: WORD mem       { $$ = $2; }
-    | WORD mem16x      { $$ = $2; }
-;
-mem32x: DWORD mem      { $$ = $2; }
-    | DWORD mem32x     { $$ = $2; }
-;
-mem64x: QWORD mem      { $$ = $2; }
-    | QWORD mem64x     { $$ = $2; }
-;
-mem80x: TWORD mem      { $$ = $2; }
-    | TWORD mem80x     { $$ = $2; }
-;
-mem128x: DQWORD mem    { $$ = $2; }
-    | DQWORD mem128x   { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem                { $$ = $2; }
-    | FAR memfar       { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
-    | mem8x
-;
-mem16: mem
-    | mem16x
-;
-mem32: mem
-    | mem32x
-;
-mem64: mem
-    | mem64x
-;
-mem80: mem
-    | mem80x
-;
-mem128: mem
-    | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
-    | mem16x
-    | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8     { $$ = x86_ea_new_reg($1); }
-    | mem8x
-;
-rm16x: reg16   { $$ = x86_ea_new_reg($1); }
-    | mem16x
-;
-rm32x: reg32   { $$ = x86_ea_new_reg($1); }
-    | mem32x
-;
-/* not needed:
-rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
-    | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
-    | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8      { $$ = x86_ea_new_reg($1); }
-    | mem8
-;
-rm16: reg16    { $$ = x86_ea_new_reg($1); }
-    | mem16
-;
-rm32: reg32    { $$ = x86_ea_new_reg($1); }
-    | mem32
-;
-rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
-    | mem64
-;
-rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
-    | mem128
-;
-
-/* immediate values */
-imm: expr   { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm            { $$ = $2; }
-;
-imm16x: WORD imm    { $$ = $2; }
-;
-imm32x: DWORD imm   { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
-    | imm8x
-;
-imm16: imm
-    | imm16x
-;
-imm32: imm
-    | imm32x
-;
-
-/* jump targets */
-target: expr           {
-       $$.val = $1;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
-    }
-    | SHORT target     {
-       $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
-    }
-    | NEAR target      {
-       $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
-    }
-;
-
-/* expression trees */
-
-/* expr w/o FLTNUM and unary + and -, for use in directives */
-direxpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | ID                       {
-       $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0)));
-       xfree($1);
-    }
-    | direxpr '|' direxpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | direxpr '^' direxpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    | direxpr '&' direxpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    | direxpr LEFT_OP direxpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | direxpr '+' direxpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | direxpr '-' direxpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | direxpr '*' direxpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | direxpr '/' direxpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | direxpr SIGNDIV direxpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | direxpr '%' direxpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | direxpr SIGNMOD direxpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    /*| '!' expr               { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' direxpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' direxpr ')'          { $$ = $2; }
-;
-
-expr_no_string: INTNUM         { $$ = expr_new_ident(ExprInt($1)); }
-    | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| expr '||' expr         { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | expr '|' expr            { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | expr '^' expr            { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' expr         { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | expr '&' expr            { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| expr '==' expr         { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| expr '>' expr          { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| expr '<' expr          { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| expr '>=' expr         { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| expr '<=' expr         { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| expr '!=' expr         { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | expr LEFT_OP expr                { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | expr RIGHT_OP expr       { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | expr '+' expr            { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | expr '-' expr            { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | expr '*' expr            { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | expr '/' expr            { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | expr SIGNDIV expr                { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | expr '%' expr            { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | expr SIGNMOD expr                { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' expr %prec UNARYOP   { $$ = $2; }
-    | '-' expr %prec UNARYOP   { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' expr               { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' expr %prec UNARYOP   { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' expr ')'             { $$ = $2; }
-;
-
-expr: expr_no_string
-    | STRING           {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-;
-
-explabel: ID           {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | SPECIAL_ID       {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | LOCAL_ID         {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | '$'              {
-       $$ = symrec_define_label("$", nasm_parser_cur_section,
-                                nasm_parser_prev_bc, 0);
-    }
-    | START_SECTION_ID {
-       if (section_is_absolute(nasm_parser_cur_section)) {
-           Error(_("`$$' is not valid within an ABSOLUTE section"));
-           YYERROR;
-       } else {
-           const char *ss_name = section_get_name(nasm_parser_cur_section);
-           assert(ss_name != NULL);
-           $$ = symrec_use(ss_name);
-       }
-    }
-;
-
-instr: /* empty */     {
-       idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
-       $$ = x86_bc_new_insn(&idata);
-    }
-    | instrbase
-    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
-    | REG_CS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
-    }
-    | REG_SS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
-    }
-    | REG_DS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
-    }
-    | REG_ES instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
-    }
-    | REG_FS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
-    }
-    | REG_GS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
-    }
-    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
-    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
-%%
-/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
-
-static void
-nasm_parser_directive(const char *name, valparamhead *valparams,
-                     valparamhead *objext_valparams)
-{
-    valparam *vp, *vp2;
-    const intnum *intn;
-    long lval;
-
-    assert(cur_objfmt != NULL);
-
-    /* Handle (mostly) output-format independent directives here */
-    if (strcasecmp(name, "extern") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val)
-           symrec_declare(vp->val, SYM_EXTERN,
-                          cur_objfmt->extern_data_new(vp->val,
-                                                      objext_valparams));
-       else
-           Error(_("invalid argument to [%s]"), "EXTERN");
-    } else if (strcasecmp(name, "global") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val)
-           symrec_declare(vp->val, SYM_GLOBAL,
-                          cur_objfmt->global_data_new(vp->val,
-                                                      objext_valparams));
-       else
-           Error(_("invalid argument to [%s]"), "GLOBAL");
-    } else if (strcasecmp(name, "common") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val) {
-           vp2 = vps_next(vp);
-           if (!vp2 || (!vp2->val && !vp2->param))
-               Error(_("no size specified in %s declaration"), "COMMON");
-           else {
-               if (vp2->val)
-                   symrec_declare(vp->val, SYM_COMMON,
-                       cur_objfmt->common_data_new(vp->val,
-                           expr_new_ident(ExprSym(symrec_use(vp2->val))),
-                           objext_valparams));
-               else if (vp2->param) {
-                   symrec_declare(vp->val, SYM_COMMON,
-                       cur_objfmt->common_data_new(vp->val, vp2->param,
-                                                   objext_valparams));
-                   vp2->param = NULL;
-               }
-           }
-       } else
-           Error(_("invalid argument to [%s]"), "COMMON");
-    } else if (strcasecmp(name, "section") == 0 ||
-              strcasecmp(name, "segment") == 0) {
-       section *new_section =
-           cur_objfmt->sections_switch(&nasm_parser_sections, valparams,
-                                       objext_valparams);
-       if (new_section) {
-           nasm_parser_cur_section = new_section;
-           nasm_parser_prev_bc = (bytecode *)NULL;
-       } else
-           Error(_("invalid argument to [%s]"), "SECTION");
-    } else if (strcasecmp(name, "absolute") == 0) {
-       /* it can be just an ID or a complete expression, so handle both. */
-       vp = vps_first(valparams);
-       if (vp->val)
-           nasm_parser_cur_section =
-               sections_switch_absolute(&nasm_parser_sections,
-                   expr_new_ident(ExprSym(symrec_use(vp->val))));
-       else if (vp->param) {
-           nasm_parser_cur_section =
-               sections_switch_absolute(&nasm_parser_sections, vp->param);
-           vp->param = NULL;
-       }
-       nasm_parser_prev_bc = (bytecode *)NULL;
-    } else if (strcasecmp(name, "bits") == 0) {
-       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
-           (intn = expr_get_intnum(&vp->param)) != NULL &&
-           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
-           x86_mode_bits = (unsigned char)lval;
-       else
-           Error(_("invalid argument to [%s]"), "BITS");
-    } else if (cur_objfmt->directive(name, valparams, objext_valparams,
-                                    &nasm_parser_sections)) {
-       Error(_("unrecognized directive [%s]"), name);
-    }
-
-    vps_delete(valparams);
-    if (objext_valparams)
-       vps_delete(objext_valparams);
-}
-
-void
-nasm_parser_error(const char *s)
-{
-    ParserError(s);
-}
-
diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl
deleted file mode 100755 (executable)
index b0599d9..0000000
+++ /dev/null
@@ -1,889 +0,0 @@
-#!/usr/bin/perl -w
-# $IdPath$
-# Generates NASM-compatible bison.y and token.l from instrs.dat.
-#
-#    Copyright (C) 2001  Michael Urman
-#
-#    This file is part of YASM.
-#
-#    YASM is free software; you can redistribute it and/or modify
-#    it under the terms of the GNU General Public License as published by
-#    the Free Software Foundation; either version 2 of the License, or
-#    (at your option) any later version.
-#
-#    YASM is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#    GNU General Public License for more details.
-#
-#    You should have received a copy of the GNU General Public License
-#    along with this program; if not, write to the Free Software
-#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-
-use strict;
-use Getopt::Long;
-my $VERSION = "0.0.1";
-
-# useful constants for instruction arrays
-#  common
-use constant INST          => 0;
-use constant OPERANDS      => 1;
-#  general format
-use constant OPSIZE        => 2;
-use constant OPCODE        => 3;
-use constant EFFADDR       => 4;
-use constant IMM           => 5;
-use constant CPU           => 6;
-#  relative target format
-use constant ADSIZE        => 2;
-use constant SHORTOPCODE    => 3;
-use constant NEAROPCODE            => 4;
-use constant SHORTCPU      => 5;
-use constant NEARCPU       => 6;
-
-use constant TOO_MANY_ERRORS => 20;
-
-# default options
-my $instrfile = 'instrs.dat';
-my $tokenfile = 'token.l';
-my $tokensource;
-my $grammarfile = 'bison.y';
-my $grammarsource;
-my $showversion;
-my $showusage;
-my $dry_run;
-
-# allow overrides
-my $gotopts = GetOptions ( 'input=s' => \$instrfile,
-                          'token=s' => \$tokenfile,
-                          'sourcetoken=s' => \$tokensource,
-                          'grammar=s' => \$grammarfile,
-                          'sourcegrammar=s' => \$grammarsource,
-                          'version' => \$showversion,
-                          'n|dry-run' => \$dry_run,
-                          'help|usage' => \$showusage,
-                        );
-
-&showusage and exit 1 unless $gotopts;
-&showversion if $showversion;
-&showusage if $showusage;
-exit 0 if $showversion or $showusage;
-
-# valid values for instrs.dat fields
-my $valid_regs = join '|', qw(
-    reg_al reg_ah reg_ax reg_eax
-    reg_bl reg_bh reg_bx reg_ebx
-    reg_cl reg_ch reg_cx reg_ecx
-    reg_dl reg_dh reg_dx reg_edx
-    reg_si reg_esi reg_di reg_edi
-    reg_bp reg_ebp
-    reg_cs reg_ds reg_es reg_fs reg_gs reg_ss
-    ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG
-    fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm
-    imm8 imm16 imm32 imm64 imm80 imm128
-    imm8x imm16x imm32x imm64x imm80x imm128x
-    rm8 rm16 rm32 rm1632 rm64 rm80 rm128
-    rm8x rm16x rm32x rm1632x rm64x rm80x rm128x
-    reg8 reg16 reg32 reg1632 reg64 reg80 reg128
-    reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
-    mem8 mem16 mem32 mem1632 mem64 mem80 mem128
-    mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
-    target memfar
-);
-my $valid_opcodes = join '|', qw(
-    [0-9A-F]{2}
-    \\$0\\.\\d
-);
-my $valid_cpus = join '|', qw(
-    8086 186 286 386 486 P4 P5 P6
-    FPU MMX KATMAI SSE SSE2
-    AMD ATHLON 3DNOW
-    SMM
-    CYRIX
-    UNDOC OBS PRIV PROT
-    @0 @1
-);
-
-# track errors and warnings rather than die'ing on the first.
-my (@messages, $errcount, $warncount);
-sub die_with_errors (@)
-{
-    foreach (@_) { print; };
-    if ($errcount)
-    {
-       print "Dying with errors\n";
-       exit -1;
-    }
-}
-
-my ($groups) = &read_instructions ($instrfile);
-
-die_with_errors @messages;
-
-exit 0 if $dry_run; # done with simple verification, so exit
-
-unless ($dry_run)
-{
-    &output_lex ($tokenfile, $tokensource, $groups);
-    &output_yacc ($grammarfile, $grammarsource, $groups);
-}
-
-# print version for --version, etc.
-sub showversion
-{
-    print "YASM gen_instr.pl $VERSION\n";
-}
-
-# print usage information for --help, etc.
-sub showusage
-{
-    print <<"EOF";
-Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile]
-    -i, --input                 instructions file (default: $instrfile)
-    -t, --token                 token output file (default: $tokenfile)
-    -st, --sourcetoken   token input file (default: $tokenfile.in)
-    -g, --grammar        grammar output file (default: $grammarfile)
-    -sg, --sourcegrammar grammar input file (default: $grammarfile.in)
-    -v, --version        show version and exit
-    -h, --help, --usage  show this message and exit
-    -n, --dry-run        verify input file without writing output files
-EOF
-}
-
-# read in instructions, and verify they're valid (well, mostly)
-sub read_instructions ($)
-{
-    my $instrfile = shift || die;
-    open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n";
-    my %instr;
-    my %groups;
-
-    sub add_group_rule ($$$$)
-    {
-       my ($inst, $args, $groups, $instrfile) = splice @_;
-
-       # slide $0.\d down by one.
-       # i still say changing instrs.dat would be better ;)
-       $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
-
-       # detect relative target format by looking for "target" in args
-       if($args =~ m/target/oi)
-       {
-           my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
-               split /\t+/, $args;
-           eval {
-               die "Invalid group name\n"
-                       if $inst !~ m/^!\w+$/o;
-               die "Invalid Operands\n"
-                       if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
-               die "Invalid Address Size\n"
-                       if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
-               die "Invalid Short Opcode\n"
-                       if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
-               die "Invalid Near Opcode\n"
-                       if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
-               die "Invalid Short CPU\n"
-                       if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-               die "Invalid Near CPU\n"
-                       if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           };
-           push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-           die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-           # knock the ! off of $inst for the groupname
-           $inst = substr $inst, 1;
-           push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
-       } else {
-           my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
-           eval {
-               die "Invalid group name\n"
-                       if $inst !~ m/^!\w+$/o;
-               die "Invalid Operands\n"
-                       if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
-               die "Invalid Operation Size\n"
-                       if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
-               die "Invalid Opcode\n"
-                       if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
-               die "Invalid Effective Address\n"
-                       if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
-               die "Invalid Immediate Operand\n"
-                       if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
-               die "Invalid CPU\n"
-                       if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           };
-           push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-           die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-           # knock the ! off of $inst for the groupname
-           $inst = substr $inst, 1;
-           push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
-       }
-    }
-
-    sub add_group_member ($$$$$)
-    {
-       my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_;
-
-       my ($inst, $group) = split /!/, $handle;
-       my ($args, $cpu) = split /\t+/, $fullargs;
-       eval {
-           die "Invalid instruction name\n"
-                   if $inst !~ m/^\w+$/o;
-           die "Invalid group name\n"
-                   if $group !~ m/^\w+$/o;
-           die "Invalid CPU\n"
-                   if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n"
-                   unless exists $groups->{$group};
-           $warncount++;
-       };
-       push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-       # only allow multiple instances of instructions that aren't of a group
-       push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++
-               if exists $instr->{$inst} and not exists $groups->{$inst};
-       die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-       push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu];
-       $instr->{$inst} = 1;
-    }
-
-    while (<INPUT>)
-    {
-       chomp;
-       next if /^\s*(?:;.*)$/;
-
-       my ($handle, $args) = split /\t+/, $_, 2;
-
-       # pseudo hack to handle original style instructions (no group)
-       if ($handle =~ m/^\w+$/)
-       {
-           # TODO: this has some long ranging effects, as the eventual
-           # bison rules get tagged <groupdata> when they don't need
-           # to, etc.  Fix this sometime.
-           add_group_rule ("!$handle", $args, \%groups, $instrfile);
-           add_group_member ("$handle!$handle", "", \%groups, \%instr,
-                             $instrfile);
-       }
-       elsif ($handle =~ m/^!\w+$/)
-       {
-           add_group_rule ($handle, $args, \%groups, $instrfile);
-       }
-       elsif ($handle =~ m/^\w+!\w+$/)
-       {
-           add_group_member ($handle, $args, \%groups, \%instr,
-                             $instrfile);
-       }
-       # TODO: consider if this is necessary: Pete?
-       # (add_group_member_synonym is -not- implemented)
-       #elsif ($handle =~ m/^:\w+$/)
-       #{
-       #    add_group_member_synonym ($handle, $args);
-       #}
-    }
-    close INPUT;
-    return (\%groups);
-}
-
-sub output_lex ($@)
-{
-    my $tokenfile = shift or die;
-    my $tokensource = shift;
-    $tokensource ||= "$tokenfile.in";
-    my $groups = shift or die;
-
-    open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n";
-    open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n";
-    while (<IN>)
-    {
-       # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content
-       if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
-       {
-           foreach my $grp (sort keys %$groups)
-           {
-               my %printed;
-               my $group = $grp; $group =~ s/^!//;
-
-               foreach my $grp (@{$groups->{$grp}{members}})
-               {
-                   unless (exists $printed{$grp->[0]})
-                   {
-                       $printed{$grp->[0]} = 1;
-                       my @groupdata;
-                       if ($grp->[2])
-                       {
-                           @groupdata = split ",", $grp->[2];
-                           for (my $i=0; $i < @groupdata; ++$i)
-                           {
-                               $groupdata[$i] =~ s/nil/0/;
-                               $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];";
-                           }
-                           $groupdata[-1] .= "\n\t     ";
-                       }
-                       printf TOKEN "%-12s{%s return %-20s }\n",
-                           $grp->[0],
-                           (join "\n\t     ", @groupdata), 
-                           "\Ugrp_$group;\E";
-                           # TODO: change appropriate GRP_FOO back to
-                           # INS_FOO's.  not functionally important;
-                           # just pedantically so.
-                   }
-               }
-           }
-       }
-       else
-       {
-           print TOKEN $_;
-       }
-    }
-    close IN;
-    close TOKEN;
-}
-
-# helper functions for yacc output
-sub rule_header ($ $ $)
-{
-    my ($rule, $tokens, $count) = splice (@_);
-    $count ? "    | $tokens {\n" : "$rule: $tokens {\n"; 
-}
-sub rule_footer ()
-{
-    return "    }\n";
-}
-
-sub cond_action_if ( $ $ $ $ $ $ $ )
-{
-    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
-    return rule_header ($rule, $tokens, $count) . <<"EOF";
-        if (\$$regarg == $val) {
-            @$a_eax
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action_elsif ( $ $ $ $ )
-{
-    my ($regarg, $val, $func, $a_eax) = splice (@_);
-    return <<"EOF";
-        else if (\$$regarg == $val) {
-            @$a_eax
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action_else ( $ $ )
-{
-    my ($func, $a_args) = splice (@_);
-    return <<"EOF" . rule_footer;
-        else {
-            @$a_args
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action ( $ $ $ $ $ $ $ $ )
-{
-    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args)
-     = splice (@_);
-    return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func,
-       $a_eax) . cond_action_else ($func, $a_args);
-}
-
-#sub action ( $ $ $ $ $ )
-sub action ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . rule_footer; 
-}
-
-sub action_setshiftflag ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . "        x86_bc_insn_set_shift_flag(\$\$);\n"
-       . rule_footer; 
-}
-
-sub action_setjrshort ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        if (\$2.op_sel == JR_NONE)\n"
-       . "            \$2.op_sel = JR_SHORT;\n"
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . rule_footer; 
-}
-
-sub get_token_number ( $ $ )
-{
-    my ($tokens, $str) = splice @_;
-    $tokens =~ s/$str.*/x/; # hold its place
-    my @f = split /\s+/, $tokens;
-    return scalar @f;
-}
-
-sub output_yacc ($@)
-{
-    my $grammarfile = shift or die;
-    my $grammarsource = shift;
-    $grammarsource ||= "$grammarfile.in";
-    my $groups = shift or die;
-
-    open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n";
-    open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n";
-
-    while (<IN>)
-    {
-       if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
-       {
-           print GRAMMAR "static x86_new_insn_data idata;\n";
-           print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
-       }
-       elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
-       {
-           my $len = length("%token <groupdata>");
-           print GRAMMAR "%token <groupdata>";
-           foreach my $group (sort keys %$groups)
-           {
-               if ($len + length("GRP_$group") < 76)
-               {
-                   print GRAMMAR " GRP_\U$group\E";
-                   $len += length(" GRP_$group");
-               }
-               else
-               {
-                   print GRAMMAR "\n%token <groupdata> GRP_\U$group\E";
-                   $len = length("%token <groupdata> GRP_$group");
-               }
-           }
-           print GRAMMAR "\n";
-       }
-       elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/})
-       {
-           my $len = length("%type <bc>");
-           print GRAMMAR "%type <bc>";
-           foreach my $group (sort keys %$groups)
-           {
-               if ($len + length($group) < 76)
-               {
-                   print GRAMMAR " $group";
-                   $len += length(" $group");
-               }
-               else
-               {
-                   print GRAMMAR "\n%type <bc> $group";
-                   $len = length("%type <bc> $group");
-               }
-           }
-           print GRAMMAR "\n";
-       }
-       elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
-       {
-           # list every kind of instruction that instrbase can be
-           print GRAMMAR "instrbase:    ",
-                   join( "\n    | ", sort keys %$groups), "\n;\n";
-
-           my ($ONE, $AL, $AX, $EAX);  # need the outer scope
-           my (@XCHG_AX, @XCHG_EAX);
-
-           # list the arguments and actions (buildbc)
-           #foreach my $instrname (sort keys %$instrlist)
-           foreach my $group (sort keys %$groups)
-           {
-               # I'm still convinced this is a hack.  The idea is if
-               # within an instruction we see certain versions of the
-               # opcodes with ONE, or reg_e?a[lx],imm(8|16|32).  If we
-               # do, defer generation of the action, as we may need to
-               # fold it into another version with a conditional to
-               # generate the more efficient variant of the opcode
-               # BUT, if we don't fold it in, we have to generate the
-               # original version we would have otherwise.
-               ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0);
-               # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax).
-               (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
-               my $count = 0;
-               foreach my $inst (@{$groups->{$group}{rules}}) {
-                   if($inst->[OPERANDS] =~ m/target/oi)
-                   {
-                       # relative target format
-                       # build the instruction in pieces.
-
-                       # rulename = instruction
-                       my $rule = "$inst->[INST]";
-
-                       # tokens it eats: instruction and arguments
-                       # nil => no arguments
-                       my $tokens = "\Ugrp_$rule\E";
-                       $tokens .= " $inst->[OPERANDS]"
-                           if $inst->[OPERANDS] ne 'nil';
-                       $tokens =~ s/,/ ',' /g;
-                       $tokens =~ s/:/ ':' /g;
-                       my $datastruct = "x86_new_jmprel_data";
-                       my $datastructname = "jrdata";
-                       my $func = "x86_bc_new_jmprel(&$datastructname)";
-
-                       # Create the argument list for bytecode_new
-                       my @args;
-
-                       # Target argument: HACK: Always assumed to be arg 1.
-                       push @args, 'target=&$2;';
-
-                       # test for short opcode "nil"
-                       if($inst->[SHORTOPCODE] =~ m/nil/)
-                       {
-                           push @args, 'short_op_len=0;';
-                       }
-                       else
-                       {
-                           my @opcodes;
-                           # Check for possible length parameter
-                           if($inst->[SHORTOPCODE] =~ m/\?/)
-                           {
-                               my @pieces = split /\?/, $inst->[SHORTOPCODE];
-                               push @args, "short_op_len=".$pieces[0].";";
-                               # opcode piece 1 (and 2 and 3 if attached)
-                               @opcodes = split ",", $pieces[1];
-                           }
-                           else
-                           {
-                               # opcode piece 1 (and 2 and 3 if attached)
-                               @opcodes = split ",", $inst->[SHORTOPCODE];
-                               # number of bytes of short opcode
-                               push @args, "short_op_len=".@opcodes.";";
-                           }
-                           for (my $i=0; $i < @opcodes; ++$i)
-                           {
-                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                               $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                               # don't match $0.\d in the following rule.
-                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
-                               push @args, "short_op[$i]=$opcodes[$i];";
-                           }
-                       }
-
-                       # test for near opcode "nil"
-                       if($inst->[NEAROPCODE] =~ m/nil/)
-                       {
-                           push @args, 'near_op_len=0;';
-                       }
-                       else
-                       {
-                           # opcode piece 1 (and 2 and 3 if attached)
-                           my @opcodes = split ",", $inst->[NEAROPCODE];
-                           # number of bytes of near opcode
-                           push @args, "near_op_len=".@opcodes.";";
-                           for (my $i=0; $i < @opcodes; ++$i)
-                           {
-                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                               $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                               # don't match $0.\d in the following rule.
-                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
-                               push @args, "near_op[$i]=$opcodes[$i];";
-                           }
-                       }
-
-                       # address size
-                       push @args, "addrsize=$inst->[ADSIZE];";
-                       $args[-1] =~ s/nil/0/;
-
-                       # now that we've constructed the arglist, subst $0.\d
-                       s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
-                       # and add the data structure reference
-                       s/^/$datastructname./g foreach (@args);
-
-                       if ($args[0] =~ m/\&\$/)
-                       {
-                           $args[0] = '/*@-immediatetrans@*/' . $args[0] .
-                               '/*@=immediatetrans@*/';
-                       }
-
-                       # generate the grammar
-                       # Specialcase jcc to set op_sel=JR_SHORT.
-                       if ($rule =~ m/jcc/)
-                       {
-                           print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++);
-                       }
-                       else
-                       {
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-                       }
-                   }
-                   else
-                   {
-                       # general instruction format
-                       # build the instruction in pieces.
-
-                       # rulename = instruction
-                       my $rule = "$inst->[INST]";
-
-                       # tokens it eats: instruction and arguments
-                       # nil => no arguments
-                       my $tokens = "\Ugrp_$rule\E";
-                       $tokens .= " $inst->[OPERANDS]"
-                           if $inst->[OPERANDS] ne 'nil';
-                       $tokens =~ s/,/ ',' /g;
-                       $tokens =~ s/:/ ':' /g;
-                       # offset args
-                       my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
-                       my $datastruct = "x86_new_insn_data";
-                       my $datastructname = "idata";
-                       my $func = "x86_bc_new_insn(&$datastructname)";
-
-                       # Create the argument list for bytecode_new
-                       my @args;
-
-                       # operand size
-                       push @args, "opersize=$inst->[OPSIZE];";
-                       $args[-1] =~ s/nil/0/;
-
-
-                       # opcode piece 1 (and 2 and 3 if attached)
-                       my @opcodes = split ",", $inst->[OPCODE];
-                       # number of bytes of opcodes
-                       push @args, "op_len=".@opcodes.";";
-                       for (my $i=0; $i < @opcodes; ++$i)
-                       {
-                           $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                           # don't match $0.\d in the following rule.
-                           $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
-                           push @args, "op[$i]=$opcodes[$i];";
-                       }
-
-                       # effective addresses
-                       my $effaddr = $inst->[EFFADDR];
-                       $effaddr =~ s/^nil/NULL,0/;
-                       $effaddr =~ s/nil/0/;
-                       # don't let a $0.\d match slip into the following rules.
-                       $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
-                       $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
-                       $effaddr =~ s[(\$\d+)i,\s*(\d+)]
-                           ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
-
-                       die $effaddr if $effaddr =~ m/\d+[ri]/;
-
-                       my @effaddr_split = split ',', $effaddr;
-                       $effaddr_split[0] =~ s/\^/,/;
-                       push @args, "ea=$effaddr_split[0];";
-                       if ($effaddr_split[0] !~ m/NULL/)
-                       {
-                           push @args, "spare=$effaddr_split[1];";
-                       }
-
-                       # immediate sources
-                       my $imm = $inst->[IMM];
-                       $imm =~ s/nil/NULL,0/;
-                       # don't match $0.\d in the following rules.
-                       $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       $imm =~ s[^([0-9A-Fa-f]+),]
-                           [imm_new_int(0x$1),];
-                       $imm =~ s[^\$0.(\d+),]
-                           [imm_new_int((unsigned long)\$1\[$1\]),];
-
-                       # divide the second, and only the second, by 8 bits/byte
-                       $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
-                       $imm .= ($3||'') eq 's' ? ',1' : ',0';
-
-                       die $imm if $imm =~ m/\d+s/;
-
-                       my @imm_split = split ",", $imm;
-                       push @args, "imm=$imm_split[0];";
-                       if ($imm_split[0] !~ m/NULL/)
-                       {
-                           push @args, "im_len=$imm_split[1];";
-                           push @args, "im_sign=$imm_split[2];";
-                       }
-
-                       # now that we've constructed the arglist, subst $0.\d
-                       s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
-                       # and add the data structure reference
-                       s/^/$datastructname./g foreach (@args);
-                   
-                       # see if we match one of the cases to defer
-                       if (($inst->[OPERANDS]||"") =~ m/,ONE/)
-                       {
-                           $ONE = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/)
-                       {
-                           $AL = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/)
-                       {
-                           $AX = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/)
-                       {
-                           $EAX = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/)
-                       {
-                           $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/)
-                       {
-                           $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/)
-                       {
-                           $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/)
-                       {
-                           $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
-                       }
-
-                       # or if we've deferred and we match the folding version
-                       elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
-                       {
-                           $ONE->[4] = 1;
-                           # Output a normal version except imm8 -> imm8x
-                           # (BYTE override always makes longer version, and
-                           # we don't want to conflict with the imm version
-                           # we output right after this one.
-                           $tokens =~ s/imm8/imm8x/;
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-
-                           # Now output imm version, with second opcode byte
-                           # set to ,1 opcode.  Also call SetInsnShiftFlag().
-                           $tokens =~ s/imm8x/imm/;
-                           my $oneval = $ONE->[3]->[2];
-                           $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg;
-                           push @args, $oneval;
-                           print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
-                       }
-                       elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
-                       {
-                           $AL->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg8");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
-                       }
-                       elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/)
-                       {
-                           $AX->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg16");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
-                       }
-                       elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/)
-                       {
-                           $EAX->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg32");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
-                       }
-                       elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
-                           ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
-                       {
-                           my $first = 1;
-                           for (my $i=0; $i < @XCHG_AX; ++$i)
-                           {
-                               if($XCHG_AX[$i])
-                               {
-                                   $XCHG_AX[$i]->[4] = 1;
-                                   # This is definitely a hack.  The "right"
-                                   # way to do this would be to enhance
-                                   # get_token_number to get the nth reg16
-                                   # instead of always getting the first.
-                                   my $regarg =
-                                       get_token_number ($tokens, "reg16")
-                                       + $i*2;
-
-                                   if ($first)
-                                   {
-                                       print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
-                                       $first = 0;
-                                   }
-                                   else
-                                   {
-                                       $count++;
-                                       print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
-                                   }
-                               }
-                           }
-                           print GRAMMAR cond_action_else ($func, \@args);
-                       }
-                       elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
-                           ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
-                       {
-                           my $first = 1;
-                           for (my $i=0; $i < @XCHG_EAX; ++$i)
-                           {
-                               if($XCHG_EAX[$i])
-                               {
-                                   $XCHG_EAX[$i]->[4] = 1;
-                                   # This is definitely a hack.  The "right"
-                                   # way to do this would be to enhance
-                                   # get_token_number to get the nth reg32
-                                   # instead of always getting the first.
-                                   my $regarg =
-                                       get_token_number ($tokens, "reg32")
-                                       + $i*2;
-
-                                   if ($first)
-                                   {
-                                       print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
-                                       $first = 0;
-                                   }
-                                   else
-                                   {
-                                       $count++;
-                                       print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
-                                   }
-                               }
-                           }
-                           print GRAMMAR cond_action_else ($func, \@args);
-                       }
-
-                       # otherwise, generate the normal version
-                       else
-                       {
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-                       }
-                   }
-               }
-
-               # catch deferreds that haven't been folded in.
-               if ($ONE and not $ONE->[4])
-               {
-                   print GRAMMAR action (@$ONE, $count++);
-               }
-               if ($AL and not $AL->[4])
-               {
-                   print GRAMMAR action (@$AL, $count++);
-               }
-               if ($AX and not $AL->[4])
-               {
-                   print GRAMMAR action (@$AX, $count++);
-               }
-               if ($EAX and not $AL->[4])
-               {
-                   print GRAMMAR action (@$EAX, $count++);
-               }
-               
-               # print error action
-               # ASSUMES: at least one previous action exists
-               print GRAMMAR "    | \Ugrp_$group\E error {\n";
-               print GRAMMAR "        Error (_(\"expression syntax error\"));\n";
-               print GRAMMAR "        \$\$ = (bytecode *)NULL;\n";
-               print GRAMMAR "    }\n";
-
-               # terminate the rule
-               print GRAMMAR ";\n";
-           }
-       }
-       else
-       {
-           print GRAMMAR $_;
-       }
-    }
-    close IN;
-    close GRAMMAR;
-}
index b3f0533ac3b4d8ef1af2cb0895e8b341ff49d61a..248820db28992015ca849e16e6ad97b756e1ab6c 100644 (file)
@@ -44,8 +44,10 @@ RCSID("$IdPath$");
 
 #include "src/parsers/nasm/nasm-defs.h"
 
+
 void init_table(void);
 extern int nasm_parser_lex(void);
+extern void nasm_parser_set_directive_state(void);
 void nasm_parser_error(const char *);
 static void nasm_parser_directive(const char *name,
                                  valparamhead *valparams,
@@ -55,6 +57,7 @@ extern objfmt *nasm_parser_objfmt;
 extern sectionhead nasm_parser_sections;
 extern section *nasm_parser_cur_section;
 extern char *nasm_parser_locallabel_base;
+extern size_t nasm_parser_locallabel_base_len;
 
 static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
@@ -71,16 +74,19 @@ static bytecode *nasm_parser_temp_bc;
     intnum *intn;
     floatnum *flt;
     symrec *sym;
-    unsigned char groupdata[5];
+    unsigned long arch_data[4];
     effaddr *ea;
     expr *exp;
-    immval *im_val;
-    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
     valparamhead dir_valparams;
     valparam *dir_valparam;
+    struct {
+       insn_operandhead operands;
+       int num_operands;
+    } insn_operands;
+    insn_operand *insn_operand;
 }
 
 %token <intn> INTNUM
@@ -90,46 +96,25 @@ static bytecode *nasm_parser_temp_bc;
 %token <int_info> DECLARE_DATA
 %token <int_info> RESERVE_SPACE
 %token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
+%token SEG WRT NOSPLIT
 %token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
+%token <arch_data> INSN PREFIX REG SEGREG TARGETMOD
 %token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
 %token <str_val> ID LOCAL_ID SPECIAL_ID
 %token LINE
 
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
+%type <bc> line lineexp exp instr
+
+%type <ea> memaddr
+%type <exp> dvexpr expr direxpr
 %type <sym> explabel
 %type <str_val> label_id
-%type <tgt_val> target
 %type <data> dataval
 %type <datahead> datavals
 %type <dir_valparams> directive_valparams
 %type <dir_valparam> directive_valparam
+%type <insn_operands> operands
+%type <insn_operand> operand
 
 %left '|'
 %left '^'
@@ -163,7 +148,9 @@ line: '\n'          { $$ = (bytecode *)NULL; }
        xfree($5);
        $$ = (bytecode *)NULL;
     }
-    | directive '\n'   { $$ = (bytecode *)NULL; }
+    | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' {
+       $$ = (bytecode *)NULL;
+    }
     | error '\n'       {
        Error(_("label or instruction expected at start of line"));
        $$ = (bytecode *)NULL;
@@ -191,11 +178,32 @@ exp: instr
     | INCBIN STRING ',' expr ',' expr  { $$ = bc_new_incbin($2, $4, $6); }
 ;
 
+instr: INSN            {
+       $$ = cur_arch->parse.new_insn($1, 0, NULL);
+    }
+    | INSN operands    {
+       $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands);
+       ops_delete(&$2.operands, 0);
+    }
+    | INSN error       {
+       Error(_("expression syntax error"));
+       $$ = NULL;
+    }
+    | PREFIX instr     {
+       $$ = $2;
+       cur_arch->parse.handle_prefix($$, $1);
+    }
+    | SEGREG instr     {
+       $$ = $2;
+       cur_arch->parse.handle_seg_prefix($$, $1[0]);
+    }
+;
+
 datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
     | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dv_new_expr($1); }
+dataval: dvexpr                { $$ = dv_new_expr($1); }
     | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
@@ -219,19 +227,22 @@ label_id: ID          {
        $$ = $1;
        if (nasm_parser_locallabel_base)
            xfree(nasm_parser_locallabel_base);
-       nasm_parser_locallabel_base = xstrdup($1);
+       nasm_parser_locallabel_base_len = strlen($1);
+       nasm_parser_locallabel_base =
+           xmalloc(nasm_parser_locallabel_base_len+1);
+       strcpy(nasm_parser_locallabel_base, $1);
     }
     | SPECIAL_ID
     | LOCAL_ID
 ;
 
 /* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']'        {
-       xfree($2);
+directive: DIRECTIVE_NAME directive_val        {
+       xfree($1);
     }
-    | '[' DIRECTIVE_NAME error ']'             {
-       Error(_("invalid arguments to [%s]"), $2);
-       xfree($2);
+    | DIRECTIVE_NAME error             {
+       Error(_("invalid arguments to [%s]"), $1);
+       xfree($1);
     }
 ;
 
@@ -267,299 +278,85 @@ directive_valparam: direxpr      {
     | ID '=' direxpr           { vp_new($$, $1, $3); }
 ;
 
-/* register groupings */
-fpureg: ST0
-    | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
-    | DWORD reg_eax    { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
-    | DWORD reg_ecx    { $$ = $2; }
-;
-
-rawreg32: REG_EAX
-    | REG_ECX
-    | REG_EDX
-    | REG_EBX
-    | REG_ESP
-    | REG_EBP
-    | REG_ESI
-    | REG_EDI
-;
-
-reg32: rawreg32
-    | DWORD reg32      { $$ = $2; }
-;
-
-reg_ax: REG_AX
-    | WORD reg_ax      { $$ = $2; }
-;
-
-reg_cx: REG_CX
-    | WORD reg_cx      { $$ = $2; }
-;
-
-reg_dx: REG_DX
-    | WORD reg_dx      { $$ = $2; }
-;
-
-rawreg16: REG_AX
-    | REG_CX
-    | REG_DX
-    | REG_BX
-    | REG_SP
-    | REG_BP
-    | REG_SI
-    | REG_DI
-;
-
-reg16: rawreg16
-    | WORD reg16       { $$ = $2; }
-;
-
-reg_al: REG_AL
-    | BYTE reg_al      { $$ = $2; }
-;
-
-reg_cl: REG_CL
-    | BYTE reg_cl      { $$ = $2; }
-;
-
-reg8: REG_AL
-    | REG_CL
-    | REG_DL
-    | REG_BL
-    | REG_AH
-    | REG_CH
-    | REG_DH
-    | REG_BH
-    | BYTE reg8                { $$ = $2; }
-;
-
-reg_es: REG_ES
-    | WORD reg_es      { $$ = $2; }
-;
-
-reg_ss: REG_SS
-    | WORD reg_ss      { $$ = $2; }
-;
-
-reg_ds: REG_DS
-    | WORD reg_ds      { $$ = $2; }
-;
-
-reg_fs: REG_FS
-    | WORD reg_fs      { $$ = $2; }
-;
-
-reg_gs: REG_GS
-    | WORD reg_gs      { $$ = $2; }
-;
-
-reg_cs: REG_CS
-    | WORD reg_cs      { $$ = $2; }
-;
-
-segreg: REG_ES
-    | REG_SS
-    | REG_DS
-    | REG_FS
-    | REG_GS
-    | REG_CS
-    | WORD segreg      { $$ = $2; }
-;
-
 /* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated?  This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg.  I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | rawreg16                 { $$ = expr_new_ident(ExprReg($1, 16)); }
-    | rawreg32                 { $$ = expr_new_ident(ExprReg($1, 32)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| memexpr '||' memexpr   { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | memexpr '|' memexpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | memexpr '^' memexpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' memexpr      { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | memexpr '&' memexpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| memexpr '==' memexpr   { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| memexpr '>' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '<' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '>=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '<=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '!=' memexpr   { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | memexpr LEFT_OP memexpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | memexpr '+' memexpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | memexpr '-' memexpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | memexpr '*' memexpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | memexpr '/' memexpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | memexpr SIGNDIV memexpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | memexpr '%' memexpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | memexpr SIGNMOD memexpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' memexpr %prec UNARYOP        { $$ = $2; }
-    | '-' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' memexpr            { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' memexpr ')'          { $$ = $2; }
-    | STRING                   {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
+memaddr: expr              {
+       $$ = cur_arch->parse.ea_new_expr($1);
     }
-    | error                    { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr           {
-       $$ = x86_ea_new_expr($1);
-       x86_ea_set_segment($$, 0);
+    | SEGREG ':' memaddr    {
+       $$ = $3;
+       cur_arch->parse.handle_seg_override($$, $1[0]);
     }
-    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
     | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
     | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
     | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
     | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
-mem: '[' memaddr ']'   { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem                { $$ = $2; }
-    | BYTE mem8x       { $$ = $2; }
-;
-mem16x: WORD mem       { $$ = $2; }
-    | WORD mem16x      { $$ = $2; }
-;
-mem32x: DWORD mem      { $$ = $2; }
-    | DWORD mem32x     { $$ = $2; }
-;
-mem64x: QWORD mem      { $$ = $2; }
-    | QWORD mem64x     { $$ = $2; }
-;
-mem80x: TWORD mem      { $$ = $2; }
-    | TWORD mem80x     { $$ = $2; }
-;
-mem128x: DQWORD mem    { $$ = $2; }
-    | DQWORD mem128x   { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem                { $$ = $2; }
-    | FAR memfar       { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
-    | mem8x
-;
-mem16: mem
-    | mem16x
-;
-mem32: mem
-    | mem32x
-;
-mem64: mem
-    | mem64x
-;
-mem80: mem
-    | mem80x
-;
-mem128: mem
-    | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
-    | mem16x
-    | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8     { $$ = x86_ea_new_reg($1); }
-    | mem8x
-;
-rm16x: reg16   { $$ = x86_ea_new_reg($1); }
-    | mem16x
-;
-rm32x: reg32   { $$ = x86_ea_new_reg($1); }
-    | mem32x
-;
-/* not needed:
-rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
-    | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
-    | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8      { $$ = x86_ea_new_reg($1); }
-    | mem8
-;
-rm16: reg16    { $$ = x86_ea_new_reg($1); }
-    | mem16
-;
-rm32: reg32    { $$ = x86_ea_new_reg($1); }
-    | mem32
-;
-rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
-    | mem64
-;
-rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
-    | mem128
-;
-
-/* immediate values */
-imm: expr   { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm            { $$ = $2; }
-;
-imm16x: WORD imm    { $$ = $2; }
-;
-imm32x: DWORD imm   { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
-    | imm8x
-;
-imm16: imm
-    | imm16x
-;
-imm32: imm
-    | imm32x
+/* instruction operands */
+operands: operand          {
+       ops_initialize(&$$.operands);
+       ops_append(&$$.operands, $1);
+       $$.num_operands = 1;
+    }
+    | operands ',' operand  {
+       ops_append(&$1.operands, $3);
+       $$.operands = $1.operands;
+       $$.num_operands = $1.num_operands+1;
+    }
 ;
 
-/* jump targets */
-target: expr           {
-       $$.val = $1;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+operand: '[' memaddr ']'    { $$ = operand_new_mem($2); }
+    | expr                 { $$ = operand_new_imm($1); }
+    | SEGREG               { $$ = operand_new_segreg($1[0]); }
+    | BYTE operand         {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 1)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 1;
     }
-    | SHORT target     {
+    | WORD operand         {
        $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 2)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 2;
     }
-    | NEAR target      {
+    | DWORD operand        {
        $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 4)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 4;
     }
+    | QWORD operand        {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 8)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 8;
+    }
+    | TWORD operand        {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 10)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 10;
+    }
+    | DQWORD operand       {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 16)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 16;
+    }
+    | TARGETMOD operand            { $$ = $2; $$->targetmod = $1[0]; }
 ;
 
 /* expression trees */
@@ -587,9 +384,48 @@ direxpr: INTNUM                    { $$ = expr_new_ident(ExprInt($1)); }
     | '(' direxpr ')'          { $$ = $2; }
 ;
 
-expr_no_string: INTNUM         { $$ = expr_new_ident(ExprInt($1)); }
+dvexpr: INTNUM                 { $$ = expr_new_ident(ExprInt($1)); }
     | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
     | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
+    /*| dvexpr '||' dvexpr     { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
+    | dvexpr '|' dvexpr                { $$ = expr_new_tree($1, EXPR_OR, $3); }
+    | dvexpr '^' dvexpr                { $$ = expr_new_tree($1, EXPR_XOR, $3); }
+    /*| dvexpr '&&' dvexpr     { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
+    | dvexpr '&' dvexpr                { $$ = expr_new_tree($1, EXPR_AND, $3); }
+    /*| dvexpr '==' dvexpr     { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
+    /*| dvexpr '>' dvexpr      { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+    /*| dvexpr '<' dvexpr      { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+    /*| dvexpr '>=' dvexpr     { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+    /*| dvexpr '<=' dvexpr     { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+    /*| dvexpr '!=' dvexpr     { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
+    | dvexpr LEFT_OP dvexpr    { $$ = expr_new_tree($1, EXPR_SHL, $3); }
+    | dvexpr RIGHT_OP dvexpr   { $$ = expr_new_tree($1, EXPR_SHR, $3); }
+    | dvexpr '+' dvexpr                { $$ = expr_new_tree($1, EXPR_ADD, $3); }
+    | dvexpr '-' dvexpr                { $$ = expr_new_tree($1, EXPR_SUB, $3); }
+    | dvexpr '*' dvexpr                { $$ = expr_new_tree($1, EXPR_MUL, $3); }
+    | dvexpr '/' dvexpr                { $$ = expr_new_tree($1, EXPR_DIV, $3); }
+    | dvexpr SIGNDIV dvexpr    { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
+    | dvexpr '%' dvexpr                { $$ = expr_new_tree($1, EXPR_MOD, $3); }
+    | dvexpr SIGNMOD dvexpr    { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
+    | '+' dvexpr %prec UNARYOP  { $$ = $2; }
+    | '-' dvexpr %prec UNARYOP  { $$ = expr_new_branch(EXPR_NEG, $2); }
+    /*| '!' dvexpr             { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
+    | '~' dvexpr %prec UNARYOP  { $$ = expr_new_branch(EXPR_NOT, $2); }
+    | '(' dvexpr ')'           { $$ = $2; }
+;
+
+/* Expressions for operands and memory expressions.
+ * We don't attempt to check memory expressions for validity here.
+ * Essentially the same as expr_no_string above but adds REG and STRING.
+ */
+expr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
+    | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
+    | REG                      { $$ = expr_new_ident(ExprReg($1[0])); }
+    | STRING                   {
+       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
+       xfree($1);
+    }
+    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
     /*| expr '||' expr         { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
     | expr '|' expr            { $$ = expr_new_tree($1, EXPR_OR, $3); }
     | expr '^' expr            { $$ = expr_new_tree($1, EXPR_XOR, $3); }
@@ -617,13 +453,6 @@ expr_no_string: INTNUM             { $$ = expr_new_ident(ExprInt($1)); }
     | '(' expr ')'             { $$ = $2; }
 ;
 
-expr: expr_no_string
-    | STRING           {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-;
-
 explabel: ID           {
        $$ = symrec_use($1);
        xfree($1);
@@ -652,46 +481,6 @@ explabel: ID               {
     }
 ;
 
-instr: /* empty */     {
-       idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
-       $$ = x86_bc_new_insn(&idata);
-    }
-    | instrbase
-    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
-    | REG_CS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
-    }
-    | REG_SS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
-    }
-    | REG_DS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
-    }
-    | REG_ES instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
-    }
-    | REG_FS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
-    }
-    | REG_GS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
-    }
-    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
-    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
 %%
 /*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
 
@@ -700,8 +489,6 @@ nasm_parser_directive(const char *name, valparamhead *valparams,
                      valparamhead *objext_valparams)
 {
     valparam *vp, *vp2;
-    const intnum *intn;
-    long lval;
 
     assert(cur_objfmt != NULL);
 
@@ -766,13 +553,25 @@ nasm_parser_directive(const char *name, valparamhead *valparams,
            vp->param = NULL;
        }
        nasm_parser_prev_bc = (bytecode *)NULL;
-    } else if (strcasecmp(name, "bits") == 0) {
-       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
-           (intn = expr_get_intnum(&vp->param)) != NULL &&
-           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
-           x86_mode_bits = (unsigned char)lval;
-       else
-           Error(_("invalid argument to [%s]"), "BITS");
+    } else if (strcasecmp(name, "cpu") == 0) {
+       vps_foreach(vp, valparams) {
+           if (vp->val)
+               cur_arch->parse.switch_cpu(vp->val);
+           else if (vp->param) {
+               const intnum *intcpu;
+               intcpu = expr_get_intnum(&vp->param);
+               if (!intcpu)
+                   Error(_("invalid argument to [%s]"), "CPU");
+               else {
+                   char strcpu[16];
+                   sprintf(strcpu, "%lu", intnum_get_uint(intcpu));
+                   cur_arch->parse.switch_cpu(strcpu);
+               }
+           }
+       }
+    } else if (!cur_arch->parse.directive(name, valparams, objext_valparams,
+                                         &nasm_parser_sections)) {
+       ;
     } else if (cur_objfmt->directive(name, valparams, objext_valparams,
                                     &nasm_parser_sections)) {
        Error(_("unrecognized directive [%s]"), name);
index b7d669722351f64ccebd27082e9298bb30193723..e21e9554e9e540021410cc2d31ddcef6e64cc08f 100644 (file)
@@ -34,6 +34,7 @@ extern FILE *nasm_parser_in;
 extern int nasm_parser_debug;
 
 extern int nasm_parser_parse(void);
+extern void nasm_parser_cleanup(void);
 
 size_t (*nasm_parser_input) (char *buf, size_t max_size);
 
@@ -58,6 +59,8 @@ nasm_parser_do_parse(parser *p, FILE *f, const char *in_filename)
 
     nasm_parser_parse();
 
+    nasm_parser_cleanup();
+
     /* Free locallabel base if necessary */
     if (nasm_parser_locallabel_base)
        xfree(nasm_parser_locallabel_base);
diff --git a/modules/parsers/nasm/nasm-token.re b/modules/parsers/nasm/nasm-token.re
new file mode 100644 (file)
index 0000000..ab0aa11
--- /dev/null
@@ -0,0 +1,516 @@
+/*
+ * NASM-compatible lex lexer
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  Portions based on re2c's example code.
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+#include "src/parsers/nasm/nasm-defs.h"
+#include "nasm-bison.h"
+
+
+#define BSIZE  8192
+
+#define YYCTYPE                char
+#define YYCURSOR       cursor
+#define YYLIMIT                s.lim
+#define YYMARKER       s.ptr
+#define YYFILL(n)      {cursor = fill(cursor);}
+
+#define RETURN(i)      {s.cur = cursor; return i;}
+
+#define SCANINIT()     { \
+       s.tchar = cursor - s.pos; \
+       s.tline = s.cline; \
+       s.tok = cursor; \
+    }
+
+#define TOKLEN         (cursor-s.tok)
+
+void nasm_parser_cleanup(void);
+void nasm_parser_set_directive_state(void);
+int nasm_parser_lex(void);
+
+extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
+
+
+typedef struct Scanner {
+    YYCTYPE            *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    unsigned int       tchar, tline, cline;
+} Scanner;
+
+static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 };
+
+FILE *nasm_parser_in = NULL;
+
+static YYCTYPE *
+fill(YYCTYPE *cursor)
+{
+    if(!s.eof){
+       size_t cnt = s.tok - s.bot;
+       if(cnt){
+           memcpy(s.bot, s.tok, s.lim - s.tok);
+           s.tok = s.bot;
+           s.ptr -= cnt;
+           cursor -= cnt;
+           s.pos -= cnt;
+           s.lim -= cnt;
+       }
+       if((s.top - s.lim) < BSIZE){
+           char *buf = xmalloc((s.lim - s.bot) + BSIZE);
+           memcpy(buf, s.tok, s.lim - s.tok);
+           s.tok = buf;
+           s.ptr = &buf[s.ptr - s.bot];
+           cursor = &buf[cursor - s.bot];
+           s.pos = &buf[s.pos - s.bot];
+           s.lim = &buf[s.lim - s.bot];
+           s.top = &s.lim[BSIZE];
+           if (s.bot)
+               xfree(s.bot);
+           s.bot = buf;
+       }
+       if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){
+           s.eof = &s.lim[cnt]; *s.eof++ = '\n';
+       }
+       s.lim += cnt;
+    }
+    return cursor;
+}
+
+void
+nasm_parser_cleanup(void)
+{
+    if (s.bot)
+       xfree(s.bot);
+}
+
+/* starting size of string buffer */
+#define STRBUF_ALLOC_SIZE      128
+
+/* string buffer used when parsing strings/character constants */
+static char *strbuf = (char *)NULL;
+
+/* length of strbuf (including terminating NULL character) */
+static size_t strbuf_size = 0;
+
+/* last "base" label for local (.) labels */
+char *nasm_parser_locallabel_base = (char *)NULL;
+size_t nasm_parser_locallabel_base_len = 0;
+
+static int linechg_numcount;
+
+/*!re2c
+  any = [\000-\377];
+  digit = [0-9];
+  iletter = [a-zA-Z];
+  bindigit = [01];
+  octdigit = [0-7];
+  hexdigit = [0-9a-fA-F];
+  ws = [ \t\r];
+  quot = ["'];
+  A = [aA];
+  B = [bB];
+  C = [cC];
+  D = [dD];
+  E = [eE];
+  F = [fF];
+  G = [gG];
+  H = [hH];
+  I = [iI];
+  J = [jJ];
+  K = [kK];
+  L = [lL];
+  M = [mM];
+  N = [nN];
+  O = [oO];
+  P = [pP];
+  Q = [qQ];
+  R = [rR];
+  S = [sS];
+  T = [tT];
+  U = [uU];
+  V = [vV];
+  W = [wW];
+  X = [xX];
+  Y = [yY];
+  Z = [zZ];
+*/
+
+static enum {
+    INITIAL,
+    DIRECTIVE,
+    DIRECTIVE2,
+    LINECHG,
+    LINECHG2
+} state = INITIAL;
+
+void
+nasm_parser_set_directive_state(void)
+{
+    state = DIRECTIVE;
+}
+
+int
+nasm_parser_lex(void)
+{
+    YYCTYPE *cursor = s.cur;
+    YYCTYPE endch;
+    size_t count, len;
+    YYCTYPE savech;
+    arch_check_id_retval check_id_ret;
+
+    /* Catch EOF */
+    if (s.eof && cursor == s.eof)
+       return 0;
+
+    /* Jump to proper "exclusive" states */
+    switch (state) {
+       case DIRECTIVE:
+           goto directive;
+       case LINECHG:
+           goto linechg;
+       case LINECHG2:
+           goto linechg2;
+       default:
+           break;
+    }
+
+scan:
+    SCANINIT();
+
+    /*!re2c
+       /* standard decimal integer */
+       digit+ {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.intn = intnum_new_dec(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+       /* 10010011b - binary number */
+
+       bindigit+ "b" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */
+           yylval.intn = intnum_new_bin(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* 777q - octal number */
+       octdigit+ "q" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */
+           yylval.intn = intnum_new_oct(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* 0AAh form of hexidecimal number */
+       digit hexdigit+ "h" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */
+           yylval.intn = intnum_new_hex(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* $0AA and 0xAA forms of hexidecimal number */
+       (("$" digit) | "0x") hexdigit+ {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           if (s.tok[1] == 'x')
+               yylval.intn = intnum_new_hex(s.tok+2);  /* skip 0 and x */
+           else
+               yylval.intn = intnum_new_hex(s.tok+1);  /* don't skip 0 */
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+
+       /* floating point value */
+       digit+ "." digit* ("e" [-+]? digit+)? {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.flt = floatnum_new(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(FLTNUM);
+       }
+
+       /* string/character constant values */
+       quot {
+           endch = s.tok[0];
+           goto stringconst;
+       }
+
+       /* %line linenum+lineinc filename */
+       "%line" {
+           state = LINECHG;
+           linechg_numcount = 0;
+           RETURN(LINE);
+       }
+
+       /* size specifiers */
+       B Y T E         { yylval.int_info = 1; RETURN(BYTE); }
+       W O R D         { yylval.int_info = 2; RETURN(WORD); }
+       D W O R D       { yylval.int_info = 4; RETURN(DWORD); }
+       Q W O R D       { yylval.int_info = 8; RETURN(QWORD); }
+       T W O R D       { yylval.int_info = 10; RETURN(TWORD); }
+       D Q W O R D     { yylval.int_info = 16; RETURN(DQWORD); }
+
+       /* pseudo-instructions */
+       D B             { yylval.int_info = 1; RETURN(DECLARE_DATA); }
+       D W             { yylval.int_info = 2; RETURN(DECLARE_DATA); }
+       D D             { yylval.int_info = 4; RETURN(DECLARE_DATA); }
+       D Q             { yylval.int_info = 8; RETURN(DECLARE_DATA); }
+       D T             { yylval.int_info = 10; RETURN(DECLARE_DATA); }
+
+       R E S B         { yylval.int_info = 1; RETURN(RESERVE_SPACE); }
+       R E S W         { yylval.int_info = 2; RETURN(RESERVE_SPACE); }
+       R E S D         { yylval.int_info = 4; RETURN(RESERVE_SPACE); }
+       R E S Q         { yylval.int_info = 8; RETURN(RESERVE_SPACE); }
+       R E S T         { yylval.int_info = 10; RETURN(RESERVE_SPACE); }
+
+       I N C B I N     { RETURN(INCBIN); }
+
+       E Q U           { RETURN(EQU); }
+
+       T I M E S       { RETURN(TIMES); }
+
+       S E G           { RETURN(SEG); }
+       W R T           { RETURN(WRT); }
+
+       N O S P L I T   { RETURN(NOSPLIT); }
+
+       T O             { RETURN(TO); }
+
+       /* operators */
+       "<<"                    { RETURN(LEFT_OP); }
+       ">>"                    { RETURN(RIGHT_OP); }
+       "//"                    { RETURN(SIGNDIV); }
+       "%%"                    { RETURN(SIGNMOD); }
+       "$$"                    { RETURN(START_SECTION_ID); }
+       [-+|^*&/%~$():=,\[]     { RETURN(s.tok[0]); }
+
+       /* handle ] separately for directives */
+       "]" {
+           if (state == DIRECTIVE2)
+               state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       /* special non-local ..@label and labels like ..start */
+       ".." [a-zA-Z0-9_$#@~.?]+ {
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(SPECIAL_ID);
+       }
+
+       /* local label (.label) */
+       "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* {
+           /* override local labels in directive state */
+           if (state == DIRECTIVE2) {
+               yylval.str_val = xstrndup(s.tok, TOKLEN);
+               RETURN(ID);
+           } else if (!nasm_parser_locallabel_base) {
+               Warning(_("no non-local label before `%s'"), s.tok[0]);
+               yylval.str_val = xstrndup(s.tok, TOKLEN);
+           } else {
+               len = TOKLEN + nasm_parser_locallabel_base_len;
+               yylval.str_val = xmalloc(len + 1);
+               strcpy(yylval.str_val, nasm_parser_locallabel_base);
+               strncat(yylval.str_val, s.tok, TOKLEN);
+               yylval.str_val[len] = '\0';
+           }
+
+           RETURN(LOCAL_ID);
+       }
+
+       /* forced identifier */
+       "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(ID);
+       }
+
+       /* identifier that may be a register, instruction, etc. */
+       [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data,
+                                                           s.tok);
+           s.tok[TOKLEN] = savech;
+           switch (check_id_ret) {
+               case ARCH_CHECK_ID_NONE:
+                   /* Just an identifier, return as such. */
+                   yylval.str_val = xstrndup(s.tok, TOKLEN);
+                   RETURN(ID);
+               case ARCH_CHECK_ID_INSN:
+                   RETURN(INSN);
+               case ARCH_CHECK_ID_PREFIX:
+                   RETURN(PREFIX);
+               case ARCH_CHECK_ID_REG:
+                   RETURN(REG);
+               case ARCH_CHECK_ID_SEGREG:
+                   RETURN(SEGREG);
+               case ARCH_CHECK_ID_TARGETMOD:
+                   RETURN(TARGETMOD);
+               default:
+                   Warning(_("Arch feature not supported, treating as identifier"));
+                   yylval.str_val = xstrndup(s.tok, TOKLEN);
+                   RETURN(ID);
+           }
+       }
+
+       ";" (any \ [\n])*       { goto scan; }
+
+       ws+                     { goto scan; }
+
+       "\n"                    { state = INITIAL; RETURN(s.tok[0]); }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto scan;
+       }
+    */
+
+    /* %line linenum+lineinc filename */
+linechg:
+    SCANINIT();
+
+    /*!re2c
+       digit+ {
+           linechg_numcount++;
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.intn = intnum_new_dec(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+
+       "\n" {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       "+" {
+           RETURN(s.tok[0]);
+       }
+
+       ws+ {
+           if (linechg_numcount == 2)
+           state = LINECHG2;
+           goto linechg2;
+       }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto linechg;
+       }
+    */
+
+linechg2:
+    SCANINIT();
+
+    /*!re2c
+       "\n" {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       "\r" { }
+
+       (any \ [\r\n])+ {
+           state = LINECHG;
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(FILENAME);
+       }
+    */
+
+    /* directive: [name value] */
+directive:
+    SCANINIT();
+
+    /*!re2c
+       [\]\n] {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       iletter+ {
+           state = DIRECTIVE2;
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(DIRECTIVE_NAME);
+       }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto directive;
+       }
+    */
+
+    /* string/character constant values */
+stringconst:
+    strbuf = xmalloc(STRBUF_ALLOC_SIZE);
+    strbuf_size = STRBUF_ALLOC_SIZE;
+    count = 0;
+
+stringconst_scan:
+    SCANINIT();
+
+    /*!re2c
+       "\n"    {
+           if (cursor == s.eof)
+               Error(_("unexpected end of file in string"));
+           else
+               Error(_("unterminated string"));
+           strbuf[count] = '\0';
+           yylval.str_val = strbuf;
+           RETURN(STRING);
+       }
+
+       any     {
+           if (s.tok[0] == endch) {
+               strbuf[count] = '\0';
+               yylval.str_val = strbuf;
+               RETURN(STRING);
+           }
+
+           strbuf[count++] = s.tok[0];
+           if (count >= strbuf_size) {
+               strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
+               strbuf_size += STRBUF_ALLOC_SIZE;
+           }
+
+           goto stringconst_scan;
+       }
+    */
+}
diff --git a/modules/parsers/nasm/token.l.in b/modules/parsers/nasm/token.l.in
deleted file mode 100644 (file)
index 7f45855..0000000
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * NASM-compatible lex lexer
- *
- *  Copyright (C) 2001  Peter Johnson
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#include "bitvect.h"
-
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-#include "nasm-bison.h"
-
-
-#define YY_NEVER_INTERACTIVE   1
-
-int nasm_parser_lex(void);
-
-extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
-#undef YY_INPUT
-#define YY_INPUT(b, r, ms)     (r = nasm_parser_input(b, ms))
-
-/* starting size of string buffer */
-#define STRBUF_ALLOC_SIZE      128
-
-/* string buffer used when parsing strings/character constants */
-static char *strbuf = (char *)NULL;
-
-/* length of strbuf (including terminating NULL character) */
-static size_t strbuf_size = 0;
-
-/* last "base" label for local (.) labels */
-char *nasm_parser_locallabel_base = (char *)NULL;
-
-static int linechg_numcount;
-
-%}
-%option noyywrap
-%option nounput
-%option case-insensitive
-%option never-interactive
-%option prefix="nasm_parser_"
-%option outfile="lex.yy.c"
-
-%x DIRECTIVE LINECHG LINECHG2
-%s DIRECTIVE2
-
-DIGIT    [0-9]
-BINDIGIT [01]
-OCTDIGIT [0-7]
-HEXDIGIT [0-9a-f]
-WS       [ \t\r]
-
-%%
-
-    /* standard decimal integer */
-{DIGIT}+ {
-    yylval.intn = intnum_new_dec(yytext);
-    return INTNUM;
-}
-
-    /* 10010011b - binary number */
-{BINDIGIT}+b {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'b' */
-    yylval.intn = intnum_new_bin(yytext);
-    return INTNUM;
-}
-
-    /* 777q - octal number */
-{OCTDIGIT}+q {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'q' */
-    yylval.intn = intnum_new_oct(yytext);
-    return INTNUM;
-}
-
-    /* 0AAh form of hexidecimal number */
-{DIGIT}{HEXDIGIT}*h {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'h' */
-    yylval.intn = intnum_new_hex(yytext);
-    return INTNUM;
-}
-
-    /* $0AA and 0xAA forms of hexidecimal number */
-(\${DIGIT}|0x){HEXDIGIT}+ {
-    if (yytext[1] == 'x')
-       yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */
-    else
-       yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */
-    return INTNUM;
-}
-
-    /* floating point value */
-{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? {
-    yylval.flt = floatnum_new(yytext);
-    return FLTNUM;
-}
-
-    /* string/character constant values */
-["']   {
-    int inch, count;
-    char endch = yytext[0];
-
-    strbuf = xmalloc(STRBUF_ALLOC_SIZE);
-
-    strbuf_size = STRBUF_ALLOC_SIZE;
-    inch = input();
-    count = 0;
-    while (inch != EOF && inch != endch && inch != '\n') {
-       strbuf[count++] = inch;
-       if (count >= strbuf_size) {
-           strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
-           if (!strbuf)
-               Fatal(FATAL_NOMEM);
-           strbuf_size += STRBUF_ALLOC_SIZE;
-       }
-       inch = input();
-    }
-
-    if (inch == '\n')
-       Error(_("unterminated string"));
-    else if (inch == EOF)
-       Error(_("unexpected end of file in string"));
-
-    strbuf[count] = '\0';
-
-    yylval.str_val = strbuf;
-    return STRING;
-}
-
-    /* %line linenum+lineinc filename */
-^%line                 { BEGIN LINECHG; linechg_numcount = 0; return LINE; }
-<LINECHG>{DIGIT}+      {
-    linechg_numcount++;
-    yylval.intn = intnum_new_dec(yytext);
-    return INTNUM;
-}
-<LINECHG>\n            { BEGIN INITIAL; return '\n'; }
-<LINECHG>[+]           { return yytext[0]; }
-<LINECHG>{WS}+         {
-    if (linechg_numcount == 2)
-       BEGIN LINECHG2;
-}
-<LINECHG2>\n           { BEGIN INITIAL; return '\n'; }
-<LINECHG2>\r           ;
-<LINECHG2>[^\r\n]+     {
-    BEGIN LINECHG;
-    yylval.str_val = xstrdup(yytext);
-    return FILENAME;
-}
-
-    /* directive: [name value] */
-^{WS}*"["          { BEGIN DIRECTIVE; return '['; }
-<DIRECTIVE>"]"     { BEGIN INITIAL; return ']'; }
-<DIRECTIVE2>"]"            { BEGIN INITIAL; return ']'; }
-<DIRECTIVE>\n      { BEGIN INITIAL; return '\n'; }
-<DIRECTIVE2>\n     { BEGIN INITIAL; return '\n'; }
-
-<DIRECTIVE>[a-z]+   {
-    BEGIN DIRECTIVE2;
-    yylval.str_val = xstrdup(yytext);
-    return DIRECTIVE_NAME;
-}
-<DIRECTIVE>. {
-    if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
-       Warning(_("ignoring unrecognized character `%s'"),
-               conv_unprint(yytext[0]));
-}
-
-    /* override local labels in directive state */
-<DIRECTIVE2>\.[a-z0-9_$#@~.?]* {
-    yylval.str_val = xstrdup(yytext);
-    return ID;
-}
-
-    /* size specifiers */
-byte   { yylval.int_info = 1; return BYTE; }
-word   { yylval.int_info = 2; return WORD; }
-dword  { yylval.int_info = 4; return DWORD; }
-qword  { yylval.int_info = 8; return QWORD; }
-tword  { yylval.int_info = 10; return TWORD; }
-dqword { yylval.int_info = 16; return DQWORD; }
-
-    /* pseudo-instructions */
-db     { yylval.int_info = 1; return DECLARE_DATA; }
-dw     { yylval.int_info = 2; return DECLARE_DATA; }
-dd     { yylval.int_info = 4; return DECLARE_DATA; }
-dq     { yylval.int_info = 8; return DECLARE_DATA; }
-dt     { yylval.int_info = 10; return DECLARE_DATA; }
-
-resb   { yylval.int_info = 1; return RESERVE_SPACE; }
-resw   { yylval.int_info = 2; return RESERVE_SPACE; }
-resd   { yylval.int_info = 4; return RESERVE_SPACE; }
-resq   { yylval.int_info = 8; return RESERVE_SPACE; }
-rest   { yylval.int_info = 10; return RESERVE_SPACE; }
-
-incbin { return INCBIN; }
-
-equ    { return EQU; }
-
-times  { return TIMES; }
-
-seg    { return SEG; }
-wrt    { return WRT; }
-near   { return NEAR; }
-short  { return SHORT; }
-far    { return FAR; }
-
-nosplit        { return NOSPLIT; }
-
-org    { return ORG; }
-
-to     { return TO; }
-
-    /* operand size overrides */
-o16    { yylval.int_info = 16; return OPERSIZE; }
-o32    { yylval.int_info = 32; return OPERSIZE; }
-    /* address size overrides */
-a16    { yylval.int_info = 16; return ADDRSIZE; }
-a32    { yylval.int_info = 32; return ADDRSIZE; }
-
-    /* instruction prefixes */
-lock   { return LOCK; }
-repne  { return REPNZ; }
-repnz  { return REPNZ; }
-rep    { return REP; }
-repe   { return REPZ; }
-repz   { return REPZ; }
-
-    /* control, debug, and test registers */
-cr4            { yylval.int_info = 4; return CR4; }
-cr[023]                { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; }
-dr[0-367]      { yylval.int_info = yytext[2]-'0'; return DRREG; }
-tr[3-7]                { yylval.int_info = yytext[2]-'0'; return TRREG; }
-
-    /* floating point, MMX, and SSE registers */
-st0        { yylval.int_info = 0; return ST0; }
-st[1-7]            { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; }
-mm[0-7]            { yylval.int_info = yytext[2]-'0'; return MMXREG; }
-xmm[0-7]    { yylval.int_info = yytext[3]-'0'; return XMMREG; }
-
-    /* integer registers */
-eax    { yylval.int_info = 0; return REG_EAX; }
-ecx    { yylval.int_info = 1; return REG_ECX; }
-edx    { yylval.int_info = 2; return REG_EDX; }
-ebx    { yylval.int_info = 3; return REG_EBX; }
-esp    { yylval.int_info = 4; return REG_ESP; }
-ebp    { yylval.int_info = 5; return REG_EBP; }
-esi    { yylval.int_info = 6; return REG_ESI; }
-edi    { yylval.int_info = 7; return REG_EDI; }
-
-ax     { yylval.int_info = 0; return REG_AX; }
-cx     { yylval.int_info = 1; return REG_CX; }
-dx     { yylval.int_info = 2; return REG_DX; }
-bx     { yylval.int_info = 3; return REG_BX; }
-sp     { yylval.int_info = 4; return REG_SP; }
-bp     { yylval.int_info = 5; return REG_BP; }
-si     { yylval.int_info = 6; return REG_SI; }
-di     { yylval.int_info = 7; return REG_DI; }
-
-al     { yylval.int_info = 0; return REG_AL; }
-cl     { yylval.int_info = 1; return REG_CL; }
-dl     { yylval.int_info = 2; return REG_DL; }
-bl     { yylval.int_info = 3; return REG_BL; }
-ah     { yylval.int_info = 4; return REG_AH; }
-ch     { yylval.int_info = 5; return REG_CH; }
-dh     { yylval.int_info = 6; return REG_DH; }
-bh     { yylval.int_info = 7; return REG_BH; }
-
-    /* segment registers */
-es     { yylval.int_info = 0; return REG_ES; }
-cs     { yylval.int_info = 1; return REG_CS; }
-ss     { yylval.int_info = 2; return REG_SS; }
-ds     { yylval.int_info = 3; return REG_DS; }
-fs     { yylval.int_info = 4; return REG_FS; }
-gs     { yylval.int_info = 5; return REG_GS; }
-
-    /* operators */
-"<<"                   { return LEFT_OP; }
-">>"                   { return RIGHT_OP; }
-"//"                   { return SIGNDIV; }
-"%%"                   { return SIGNMOD; }
-"$$"                   { return START_SECTION_ID; }
-[-+|^&*/%~$():[\]=,]   { return yytext[0]; }
-
-    /* special non-local ..@label and labels like ..start */
-\.\.[a-z0-9_$#@~.?]+ {
-    yylval.str_val = xstrdup(yytext);
-    return SPECIAL_ID;
-}
-
-    /* local label (.label) */
-\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* {
-    if (!nasm_parser_locallabel_base) {
-       Warning(_("no non-local label before `%s'"), yytext);
-       yylval.str_val = xstrdup(yytext);
-    } else {
-       yylval.str_val = xmalloc(strlen(yytext) +
-                                strlen(nasm_parser_locallabel_base) + 1);
-       strcpy(yylval.str_val, nasm_parser_locallabel_base);
-       strcat(yylval.str_val, yytext);
-    }
-
-    return LOCAL_ID;
-}
-
-    /* instructions */
-    /* @INSTRUCTIONS@ */
-
-    /* label */
-[a-z_?][a-z0-9_$#@~.?]* {
-    yylval.str_val = xstrdup(yytext);
-    return ID;
-}
-
-;.*    ;
-
-{WS}+  ;
-
-\n     return '\n';
-
-.      {
-    if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
-       Warning(_("ignoring unrecognized character `%s'"),
-               conv_unprint(yytext[0]));
-}
-
index 5a997421b5ae1c2766c9a8b425d6a93505f6f35f..780fb26ed1bb76b4cf37689c3a1dd606c68dcefc 100644 (file)
 #include "util.h"
 /*@unused@*/ RCSID("$IdPath$");
 
+#include "globals.h"
+#include "expr.h"
+
 #include "bytecode.h"
 
 #include "arch.h"
 
+
 arch *cur_arch;
 
+insn_operand *
+operand_new_reg(unsigned long reg)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_REG;
+    retval->data.reg = reg;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_segreg(unsigned long segreg)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_SEGREG;
+    retval->data.reg = segreg;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_mem(/*@only@*/ effaddr *ea)
+{
+    insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+    retval->type = INSN_OPERAND_MEMORY;
+    retval->data.ea = ea;
+    retval->targetmod = 0;
+    retval->size = 0;
+
+    return retval;
+}
+
+insn_operand *
+operand_new_imm(/*@only@*/ expr *val)
+{
+    insn_operand *retval;
+    const unsigned long *reg;
+
+    reg = expr_get_reg(&val, 0);
+    if (reg) {
+       retval = operand_new_reg(*reg);
+       expr_delete(val);
+    } else {
+       retval = xmalloc(sizeof(insn_operand));
+       retval->type = INSN_OPERAND_IMM;
+       retval->data.val = val;
+       retval->targetmod = 0;
+       retval->size = 0;
+    }
+
+    return retval;
+}
+
+void
+operand_print(FILE *f, const insn_operand *op)
+{
+    switch (op->type) {
+       case INSN_OPERAND_REG:
+           fprintf(f, "%*sReg=", indent_level, "");
+           cur_arch->reg_print(f, op->data.reg);
+           fprintf(f, "\n");
+           break;
+       case INSN_OPERAND_SEGREG:
+           fprintf(f, "%*sSegReg=", indent_level, "");
+           cur_arch->segreg_print(f, op->data.reg);
+           fprintf(f, "\n");
+           break;
+       case INSN_OPERAND_MEMORY:
+           fprintf(f, "%*sMemory=\n", indent_level, "");
+           indent_level++;
+           ea_print(f, op->data.ea);
+           indent_level--;
+           break;
+       case INSN_OPERAND_IMM:
+           fprintf(f, "%*sImm=", indent_level, "");
+           expr_print(f, op->data.val);
+           fprintf(f, "\n");
+           break;
+    }
+    fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod);
+    fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size);
+}
+
+void
+ops_delete(insn_operandhead *headp, int content)
+{
+    insn_operand *cur, *next;
+
+    cur = STAILQ_FIRST(headp);
+    while (cur) {
+       next = STAILQ_NEXT(cur, link);
+       if (content)
+           switch (cur->type) {
+               case INSN_OPERAND_MEMORY:
+                   ea_delete(cur->data.ea);
+                   break;
+               case INSN_OPERAND_IMM:
+                   expr_delete(cur->data.val);
+                   break;
+               default:
+                   break;
+           }
+       xfree(cur);
+       cur = next;
+    }
+    STAILQ_INIT(headp);
+}
+
+/*@null@*/ insn_operand *
+ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op)
+{
+    if (op) {
+       STAILQ_INSERT_TAIL(headp, op, link);
+       return op;
+    }
+    return (insn_operand *)NULL;
+}
+
+void
+ops_print(FILE *f, const insn_operandhead *headp)
+{
+    insn_operand *cur;
+
+    STAILQ_FOREACH (cur, headp, link)
+       operand_print(f, cur);
+}
index 2e53ae3024f1cbc7243ed4dab526044eb92cb36b..18e3faf6b813d1ef6668aa449552afcf6f020acf 100644 (file)
@@ -1,7 +1,7 @@
 /* $IdPath$
  * Architecture header file
  *
- *  Copyright (C) 2001  Peter Johnson
+ *  Copyright (C) 2002  Peter Johnson
  *
  *  This file is part of YASM.
  *
 #ifndef YASM_ARCH_H
 #define YASM_ARCH_H
 
+typedef enum arch_check_id_retval {
+    ARCH_CHECK_ID_NONE = 0,    /* just a normal identifier */
+    ARCH_CHECK_ID_INSN,                /* an instruction */
+    ARCH_CHECK_ID_PREFIX,      /* an instruction prefix */ 
+    ARCH_CHECK_ID_REG,         /* a register */
+    ARCH_CHECK_ID_SEGREG,      /* a segment register (for memory overrides) */
+    ARCH_CHECK_ID_TARGETMOD    /* an target modifier (for jumps) */
+} arch_check_id_retval;
+
+typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand)
+       insn_operandhead;
+
+typedef struct insn_operand insn_operand;
+
+/* Different assemblers order instruction operands differently.  Also, some
+ * differ on how exactly various registers are specified.  There's no great
+ * solution to this, as the parsers aren't supposed to have knowledge of the
+ * architectural internals, and the architecture is supposed to be parser-
+ * independent.  To make things work, as a rather hackish solution, we give the
+ * architecture a little knowledge about the general "flavor" of the parser,
+ * and let the architecture decide what to do with it.  Most architectures will
+ * probably not even use this, but it's required for some (x86 in particular)
+ * for correct behavior on all parsers.
+ */
+typedef enum arch_syntax_flavor {
+    ARCH_SYNTAX_FLAVOR_NASM = 1,       /* like NASM */
+    ARCH_SYNTAX_FLAVOR_GAS             /* like GAS */
+} arch_syntax_flavor;
+
 struct arch {
     /* one-line description of the architecture */
     const char *name;
@@ -29,6 +58,67 @@ struct arch {
     /* keyword used to select architecture */
     const char *keyword;
 
+    struct {
+       /* All "data" below starts the parse initialized to 0.  Thus, it is
+        * okay for a funtion to use/check previously stored data to see if
+        * it's been called before on the same piece of data.
+        */
+
+       /* Switches available instructions/registers/etc. based on a
+        * user-specified CPU identifier.  Should modify behavior ONLY of
+        * parse functions!  The bytecode and output functions should be able
+        * to handle any CPU.
+        */
+       void (*switch_cpu) (const char *cpuid);
+
+       /* Checks an generic identifier to see if it matches architecture
+        * specific names for instructions, registers, etc (see the
+        * arch_check_id_retval enum above for the various types this function
+        * can detect & return.  Unrecognized identifiers should be returned
+        * as NONE so they can be treated as normal symbols.  Any additional
+        * data beyond just the type (almost always necessary) should be
+        * returned into the space provided by the data parameter.
+        * Note: even though this is passed a data[4], only data[0] should be
+        * used for TARGETMOD, REG, and SEGREG return values.
+        */
+       arch_check_id_retval (*check_identifier) (unsigned long data[4],
+                                                 const char *id);
+
+       /* Architecture-specific directive support.  Returns 1 if directive was
+        * not recognized.  Returns 0 if directive was recognized, even if it
+        * wasn't valid.  Should modify behavior ONLY of parse functions, much
+        * like switch_cpu() above.
+        */
+       int (*directive) (const char *name, valparamhead *valparams,
+                         /*@null@*/ valparamhead *objext_valparams,
+                         sectionhead *headp);
+
+       /* Creates an instruction.  Creates a bytecode by matching the
+        * instruction data and the parameters given with a valid instruction.
+        * If no match is found (the instruction is invalid), returns NULL.
+        * All zero data indicates an empty instruction should be created.
+        */
+       /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4],
+                                          int num_operands, /*@null@*/
+                                          insn_operandhead *operands);
+
+       /* Handle an instruction prefix by modifying bc as necessary. */
+       void (*handle_prefix) (bytecode *bc, const unsigned long data[4]);
+
+       /* Handle an segment register instruction prefix by modifying bc as
+        * necessary.
+        */
+       void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg);
+
+       /* Handle memory expression segment overrides by modifying ea as
+        * necessary.
+        */
+       void (*handle_seg_override) (effaddr *ea, unsigned long segreg);
+
+       /* Convert an expression into an effective address. */
+       effaddr * (*ea_new_expr) (/*@keep@*/ expr *e);
+    } parse;
+
     struct {
        /* Maximum used bytecode type value+1.  Should be set to
         * BYTECODE_TYPE_BASE if no additional bytecode types are defined by
@@ -48,10 +138,77 @@ struct arch {
                           const section *sect, void *d,
                           output_expr_func output_expr);
     } bc;
+
+    /* Gets the equivalent register size in bytes.  Returns 0 if there is no
+     * suitable equivalent size.
+     */
+    unsigned int (*get_reg_size) (unsigned long reg);
+
+    void (*reg_print) (FILE *f, unsigned long reg);
+    void (*segreg_print) (FILE *f, unsigned long segreg);
+
+    /* Deletes the arch-specific data in ea.  May be NULL if no special
+     * deletion is required (e.g. there's no dynamically allocated pointers
+     * in the ea data).
+     */
+    void (*ea_data_delete) (effaddr *ea);
+
+    void (*ea_data_print) (FILE *f, const effaddr *ea);
+};
+
+struct insn_operand {
+    /*@reldef@*/ STAILQ_ENTRY(insn_operand) link;
+
+    enum {
+       INSN_OPERAND_REG = 1,   /* a register */
+       INSN_OPERAND_SEGREG,    /* a segment register */
+       INSN_OPERAND_MEMORY,    /* an effective address (memory reference) */
+       INSN_OPERAND_IMM        /* an immediate or jump target */
+    } type;
+
+    union {
+       unsigned long reg;      /* arch data for reg/segreg */
+       effaddr *ea;            /* effective address for memory references */
+       expr *val;              /* value of immediate or jump target */
+    } data;
+
+    unsigned long targetmod;   /* arch target modifier, 0 if none */
+
+    /* Specified size of the operand, in bytes.  0 if not user-specified. */
+    unsigned int size;
 };
 
+/* insn_operand constructors.  operand_new_imm() will look for cases of a
+ * single register and create an INSN_OPERAND_REG variant of insn_operand.
+ */
+insn_operand *operand_new_reg(unsigned long reg);
+insn_operand *operand_new_segreg(unsigned long segreg);
+insn_operand *operand_new_mem(/*@only@*/ effaddr *ea);
+insn_operand *operand_new_imm(/*@only@*/ expr *val);
+
+void operand_print(FILE *f, const insn_operand *op);
+
+#define ops_initialize(headp)  STAILQ_INIT(headp)
+#define ops_first(headp)       STAILQ_FIRST(headp)
+#define ops_next(cur)          STAILQ_NEXT(cur, link)
+
+/* Deletes operands linked list.  Deletes content of each operand if content i
+ * nonzero.
+ */
+void ops_delete(insn_operandhead *headp, int content);
+
+/* Adds op to the list of operands headp.
+ * NOTE: Does not make a copy of op; so don't pass this function
+ * static or local variables, and discard the op pointer after calling
+ * this function.  If op was actually appended (it wasn't NULL), then
+ * returns op, otherwise returns NULL.
+ */
+/*@null@*/ insn_operand *ops_append(insn_operandhead *headp,
+                                   /*@returned@*/ /*@null@*/ insn_operand *op);
+
+void ops_print(FILE *f, const insn_operandhead *headp);
+
 /* Available architectures */
-#include "arch/x86/x86arch.h"
 extern arch x86_arch;
 
 extern arch *cur_arch;
index 3d16e3d317df45b374782ec83b21c4a992f01b58..7bfef4d0085a7f2b1397a25a83c7b4384d464dbf 100644 (file)
@@ -3,10 +3,19 @@
 YASMARCHFILES += \
        src/arch/x86/x86arch.c  \
        src/arch/x86/x86arch.h  \
-       src/arch/x86/x86-int.h  \
        src/arch/x86/x86bc.c    \
-       src/arch/x86/x86expr.c
+       src/arch/x86/x86expr.c  \
+       x86id.c
+
+x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+       re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
+
+BUILT_SOURCES += \
+       x86id.c
+
+CLEANFILES += \
+       x86id.c
 
 EXTRA_DIST += \
        src/arch/x86/README     \
-       src/arch/x86/instrs.dat
+       src/arch/x86/x86id.re
diff --git a/src/arch/x86/instrs.dat b/src/arch/x86/instrs.dat
deleted file mode 100644 (file)
index 02e5ad9..0000000
+++ /dev/null
@@ -1,1208 +0,0 @@
-; $IdPath$
-; List of valid instruction/operand combinations
-;
-;    Copyright (C) 2001  Peter Johnson
-;
-;    This file is part of YASM.
-;
-;    YASM is free software; you can redistribute it and/or modify
-;    it under the terms of the GNU General Public License as published by
-;    the Free Software Foundation; either version 2 of the License, or
-;    (at your option) any later version.
-;
-;    YASM is distributed in the hope that it will be useful,
-;    but WITHOUT ANY WARRANTY; without even the implied warranty of
-;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;    GNU General Public License for more details.
-;
-;    You should have received a copy of the GNU General Public License
-;    along with this program; if not, write to the Free Software
-;    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-;
-; Meanings of codes:
-;  $x refers to operand x
-;  "nil" in a field indicates the lack of that field in the instruction
-;   (there MUST be some text in every field in this document)
-;  Sizes are in bits (8,16,32 are the only valid quantities)
-;
-; Column definitions:
-;  Inst     - Instruction, should be lowercase
-;  Operands - Single combination of valid operands
-;             "TO" is not counted in the operand count.
-;  OpSize   - Fixed operand size.  Can generate prefix byte.
-;  Opcode   - One or two bytes of opcode.
-;  EffAddr  - Effective Address (ModRM/SIB/Off).  First value is the memory
-;             operand, second specifies what value goes into the reg/spare
-;             bits in the ModRM byte.
-;             $xr indicates operand is register, not ModRM (needs convert to RM)
-;             $xi indicates operand is immediate (2nd parm is size in bits)
-;  Imm      - Immediate source operand and forced size (in bits).
-;             "s" after size indicates signed number
-;             A number instead of a $x is a hex constant value.
-;
-; A ':' at the beginning of the line means that the instruction following the
-;  ':' is a synonym for the instruction in the 2nd column.
-;
-; See the parser file for a list of possible operand values and their meanings.
-; gen_instr.pl translates this list into lexer and parser code.
-;
-; Instructions are listed in the same order as that in GNU binutils
-;  /include/opcode/i386.h, used for the GAS assembler.  See
-;  <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h?cvsroot=src>.
-;
-; TODO:
-;  Finish instructions (may require changing parser code).
-;  Doublecheck instruction encodings, allowable operands.
-;  Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes).
-;  Doublecheck AMD and Cyrix instructions.
-;  Doublecheck the segreg mov instructions.
-;
-; Instruction Groupings (to shorten parser code).
-;  The $0.1, $0.2, and $0.3 will get replaced with the parameters given for
-;   the instruction using the group during lexing & parsing.  These parameters
-;   may be in the opcode, opsize, effaddr, or immediate.
-;  When opsize is a parameter, its usage in instructions that use the group
-;   looks slightly different than normal, because the parameters are
-;   specified in hexidecimal while the normal opsize usage is in decimal.
-;   Thus 10 and 20 are used instead of 16 and 32 respectively.
-;  The first CPU grouping for the instruction is OR'ed with the CPU value in
-;   the group CPU fields with @0 in their list.  This allows one grouping to
-;   be used for instructions with different CPU values.
-;  Restrictions on groupings:
-;   - $0.? may not appear in the operand, the first part of the effaddr, the
-;     second part of the imm, or the CPU fields.
-;   - @0, @1 may only appear in the CPU field.
-;  Restrictions on instructions based on groupings:
-;   - no other operand combinations are allowed (eg, if an instruction uses a
-;     group, that must be the ONLY line for the instruction)
-;
-; Notes on code generation:
-;  Each group generates a lex token of the group name (sans !).  Bison rules
-;   are generated for each of the operand combinations for the group just as
-;   with a regular instruction, except for the addition of the $0.? fields.
-;   Each $0.? field is replaced by $1.d? in the generated code (eg,
-;   $0.1->$1.d1, etc).
-;  When an instruction that uses a group is encountered, eg:
-;   inst!grpname  parm1[,parm2[,parm3]]
-;  The following lex code is generated:
-;   inst { yylval.groupdata[0]=0xparm1; return GRPNAME; }
-;  (and additional yylval.groupdata[#-1]=0xparm#; if needed)
-;
-; KEY
-;
-; !Grp Operands                OpSize  Opcode          EffAddr         Imm     CPU
-; Inst Operands                OpSize  Opcode          EffAddr         Imm     CPU
-; Inst!Grp             Parameters      CPU @0          CPU @1
-;
-; Groupings used throughout
-;
-;  One byte opcode instructions with no operands:
-!onebyte       nil             $0.1    $0.2            nil             nil     @0
-;  Two byte opcode instructions with no operands:
-!twobyte       nil             nil     $0.1,$0.2       nil             nil     @0
-;  Three byte opcode instructions with no operands:
-!threebyte     nil             nil     $0.1,$0.2,$0.3  nil             nil     @0
-;  One byte opcode instructions with general memory operand:
-!onebytemem    mem             nil     $0.1            $1,$0.2         nil     @0
-;  Two byte opcode instructions with general memory operand:
-!twobytemem    mem             nil     $0.1,$0.2       $1,$0.3         nil     @0
-;
-; Move instructions
-;
-; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89).
-mov    reg8,reg8               nil     88              $1r,$2          nil     8086
-mov    reg16,reg16             16      89              $1r,$2          nil     8086
-mov    reg32,reg32             32      89              $1r,$2          nil     386
-mov    mem,reg8                nil     88              $1,$2           nil     8086
-mov    mem8x,reg8              nil     88              $1,$2           nil     8086
-mov    mem,reg16               16      89              $1,$2           nil     8086
-mov    mem16x,reg16            16      89              $1,$2           nil     8086
-mov    mem,reg32               32      89              $1,$2           nil     386
-mov    mem32x,reg32            32      89              $1,$2           nil     386
-mov    reg8,mem8               nil     8A              $2,$1           nil     8086
-mov    reg16,mem16             16      8B              $2,$1           nil     8086
-mov    reg32,mem32             32      8B              $2,$1           nil     386
-mov    mem,segreg              nil     8C              $1,$2           nil     8086
-mov    reg16,segreg            16      8C              $1r,$2          nil     8086
-mov    mem16x,segreg           16      8C              $1,$2           nil     8086
-mov    reg32,segreg            32      8C              $1r,$2          nil     386
-mov    mem32x,segreg           32      8C              $1,$2           nil     386
-mov    segreg,mem              nil     8E              $2,$1           nil     8086
-mov    segreg,rm16x            nil     8E              $2,$1           nil     8086
-mov    segreg,rm32x            nil     8E              $2,$1           nil     386
-;mov   reg_al,memoff8
-;mov   reg_ax,memoff16
-;mov   reg_eax,memoff32
-;mov   memoff8,reg_al
-;mov   memoff16,reg_ax
-;mov   memoff32,reg_eax
-mov    reg8,imm8               nil     B0+$1           nil             $2,8    8086
-mov    reg16,imm16             16      B8+$1           nil             $2,16   8086
-mov    reg32,imm32             32      B8+$1           nil             $2,32   386
-mov    mem8x,imm8              nil     C6              $1,0            $2,8    8086
-mov    mem,imm8x               nil     C6              $1,0            $2,8    8086
-mov    mem16x,imm16            16      C7              $1,0            $2,16   8086
-mov    mem,imm16x              16      C7              $1,0            $2,16   8086
-mov    mem32x,imm32            32      C7              $1,0            $2,32   8086
-mov    mem,imm32x              32      C7              $1,0            $2,32   8086
-mov    CRREG_NOTCR4,reg32      nil     0F,22           $2r,$1          nil     386,PRIV
-mov    CR4,reg32               nil     0F,22           $2r,$1          nil     P5,PRIV
-mov    reg32,CRREG_NOTCR4      nil     0F,20           $1r,$2          nil     386,PRIV
-mov    reg32,CR4               nil     0F,20           $1r,$2          nil     P5,PRIV
-mov    reg32,DRREG             nil     0F,21           $1r,$2          nil     386,PRIV
-mov    DRREG,reg32             nil     0F,23           $2r,$1          nil     386,PRIV
-;
-; Move with sign/zero extend
-;
-!movszx        reg16,rm8               16      0F,$0.1         $2,$1           nil     386
-!movszx        reg32,rm8x              32      0F,$0.1         $2,$1           nil     386
-!movszx        reg32,rm16x             nil     0F,$0.1+1       $2,$1           nil     386
-movsx!movszx           BE
-movzx!movszx           B6
-;
-; Push instructions
-;
-push   mem16x                  16      FF              $1,6            nil     8086
-push   mem32x                  32      FF              $1,6            nil     386
-push   reg16                   16      50+$1           nil             nil     8086
-push   reg32                   32      50+$1           nil             nil     386
-push   imm8x                   nil     6A              nil             $1,8    8086
-push   imm16x                  16      68              nil             $1,16   8086
-push   imm32x                  32      68              nil             $1,32   386
-push   reg_cs                  nil     0E              nil             nil     8086
-push   reg_ss                  nil     16              nil             nil     8086
-push   reg_ds                  nil     1E              nil             nil     8086
-push   reg_es                  nil     06              nil             nil     8086
-push   reg_fs                  nil     0F,A0           nil             nil     386
-push   reg_gs                  nil     0F,A8           nil             nil     386
-pusha!onebyte          nil,60          186
-pushad!onebyte         20,60           386
-pushaw!onebyte         10,60           186
-;
-; Pop instructions
-;
-pop    mem16x                  16      8F              $1,0            nil     8086
-pop    mem32x                  32      8F              $1,0            nil     386
-pop    reg16                   16      58+$1           nil             nil     8086
-pop    reg32                   32      58+$1           nil             nil     386
-pop    reg_ds                  nil     1F              nil             nil     8086
-pop    reg_es                  nil     07              nil             nil     8086
-pop    reg_ss                  nil     17              nil             nil     8086
-pop    reg_fs                  nil     0F,A1           nil             nil     386
-pop    reg_gs                  nil     0F,A9           nil             nil     386
-popa!onebyte           nil,61          186
-popad!onebyte          20,61           386
-popaw!onebyte          10,61           186
-;
-; Exchange instructions
-;
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg8,reg8               nil     86              $1r,$2          nil     8086
-xchg   mem,reg8                nil     86              $1,$2           nil     8086
-xchg   mem8x,reg8              nil     86              $1,$2           nil     8086
-xchg   reg8,mem8               nil     86              $2,$1           nil     8086
-xchg   reg_ax,reg16            16      90+$2           nil             nil     8086
-xchg   reg16,reg_ax            16      90+$1           nil             nil     8086
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg16,reg16             16      87              $1r,$2          nil     8086
-xchg   mem,reg16               16      87              $1,$2           nil     8086
-xchg   mem16x,reg16            16      87              $1,$2           nil     8086
-xchg   reg16,mem16             16      87              $2,$1           nil     8086
-xchg   reg_eax,reg32           32      90+$2           nil             nil     386
-xchg   reg32,reg_eax           32      90+$1           nil             nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg   reg32,reg32             32      87              $1r,$2          nil     386
-xchg   mem,reg32               32      87              $1,$2           nil     386
-xchg   mem32x,reg32            32      87              $1,$2           nil     386
-xchg   reg32,mem32             32      87              $2,$1           nil     386
-;
-; In/out from ports
-;
-in     reg_al,imm8             nil     E4              nil             $2,8    8086
-in     reg_ax,imm8             16      E5              nil             $2,8    8086
-in     reg_eax,imm8            32      E5              nil             $2,8    386
-in     reg_al,reg_dx           nil     EC              nil             nil     8086
-in     reg_ax,reg_dx           16      ED              nil             nil     8086
-in     reg_eax,reg_dx          32      ED              nil             nil     386
-out    imm8,reg_al             nil     E6              nil             $1,8    8086
-out    imm8,reg_ax             16      E7              nil             $1,8    8086
-out    imm8,reg_eax            32      E7              nil             $1,8    386
-out    reg_dx,reg_al           nil     EE              nil             nil     8086
-out    reg_dx,reg_ax           16      EF              nil             nil     8086
-out    reg_dx,reg_eax          32      EF              nil             nil     386
-;
-; Load effective address
-;
-lea    reg16,mem16             16      8D              $2,$1           nil     8086
-lea    reg32,mem32             32      8D              $2,$1           nil     386
-;
-; Load segment registers from memory
-;
-lds    reg16,mem               16      C5              $2,$1           nil     8086
-lds    reg32,mem               32      C5              $2,$1           nil     386
-les    reg16,mem               16      C4              $2,$1           nil     8086
-les    reg32,mem               32      C4              $2,$1           nil     386
-lfs    reg16,mem               16      0F,B4           $2,$1           nil     386
-lfs    reg32,mem               32      0F,B4           $2,$1           nil     386
-lgs    reg16,mem               16      0F,B5           $2,$1           nil     386
-lgs    reg32,mem               32      0F,B5           $2,$1           nil     386
-lss    reg16,mem               16      0F,B2           $2,$1           nil     386
-lss    reg32,mem               32      0F,B2           $2,$1           nil     386
-;
-; Flags register instructions
-;
-clc!onebyte            nil,F8          8086
-cld!onebyte            nil,FC          8086
-cli!onebyte            nil,FA          8086
-clts!twobyte           0F,06           286,PRIV
-cmc!onebyte            nil,F5          8086
-lahf!onebyte           nil,9F          8086
-sahf!onebyte           nil,9E          8086
-pushf!onebyte          nil,9C          8086
-pushfd!onebyte         20,9C           386
-pushfw!onebyte         10,9C           8086
-popf!onebyte           nil,9D          8086
-popfd!onebyte          20,9D           386
-popfw!onebyte          10,9D           8086
-stc!onebyte            nil,F9          8086
-std!onebyte            nil,FD          8086
-sti!onebyte            nil,FB          8086
-;
-; Arithmetic
-;
-;  General arithmetic
-!arith reg_al,imm8             nil     $0.1+4          nil             $2,8    8086
-!arith reg_ax,imm16            16      $0.1+5          nil             $2,16   8086
-!arith reg_eax,imm32           32      $0.1+5          nil             $2,32   386
-!arith reg8,imm8               nil     80              $1r,$0.2        $2,8    8086
-!arith mem8x,imm               nil     80              $1,$0.2         $2,8    8086
-!arith mem,imm8x               nil     80              $1,$0.2         $2,8    8086
-!arith reg16,imm               16      81              $1r,$0.2        $2,16   8086
-!arith mem16x,imm              16      81              $1,$0.2         $2,16   8086
-!arith reg16,imm16x            16      81              $1r,$0.2        $2,16   8086
-!arith mem,imm16x              16      81              $1,$0.2         $2,16   8086
-!arith reg32,imm               32      81              $1r,$0.2        $2,32   386
-!arith mem32x,imm              32      81              $1,$0.2         $2,32   386
-!arith reg32,imm32x            32      81              $1r,$0.2        $2,32   386
-!arith mem,imm32x              32      81              $1,$0.2         $2,32   386
-!arith reg16,imm8x             16      83              $1r,$0.2        $2,8s   8086
-!arith mem16x,imm8x            16      83              $1,$0.2         $2,8s   8086
-!arith reg32,imm8x             32      83              $1r,$0.2        $2,8s   386
-!arith mem32x,imm8x            32      83              $1,$0.2         $2,8s   386
-; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1).
-!arith reg8,reg8               nil     $0.1            $1r,$2          nil     8086
-!arith reg16,reg16             16      $0.1+1          $1r,$2          nil     8086
-!arith reg32,reg32             32      $0.1+1          $1r,$2          nil     386
-!arith mem,reg8                nil     $0.1            $1,$2           nil     8086
-!arith mem8x,reg8              nil     $0.1            $1,$2           nil     8086
-!arith mem,reg16               16      $0.1+1          $1,$2           nil     8086
-!arith mem16x,reg16            16      $0.1+1          $1,$2           nil     8086
-!arith mem,reg32               32      $0.1+1          $1,$2           nil     386
-!arith mem32x,reg32            32      $0.1+1          $1,$2           nil     386
-!arith reg8,mem8               nil     $0.1+2          $2,$1           nil     8086
-!arith reg16,mem16             16      $0.1+3          $2,$1           nil     8086
-!arith reg32,mem32             32      $0.1+3          $2,$1           nil     386
-;  INC/DEC
-!incdec        rm8x                    nil     FE              $1,$0.1         nil     8086
-!incdec        mem16x                  16      FF              $1,$0.1         nil     8086
-!incdec        mem32x                  32      FF              $1,$0.1         nil     386
-!incdec        reg16                   16      $0.2+$1         nil             nil     8086
-!incdec        reg32                   32      $0.2+$1         nil             nil     386
-;  "F6" opcodes (DIV/IDIV/MUL/NEG/NOT):
-!groupf6       rm8x            nil     F6              $1,$0.1         nil     8086
-!groupf6       rm16x           16      F7              $1,$0.1         nil     8086
-!groupf6       rm32x           32      F7              $1,$0.1         nil     386
-add!arith              00,0
-inc!incdec             0,40
-sub!arith              28,5
-dec!incdec             1,48
-sbb!arith              18,3
-cmp!arith              38,7
-test   reg_al,imm8             nil     A8              nil             $2,8    8086
-test   reg_ax,imm16            16      A9              nil             $2,16   8086
-test   reg_eax,imm32           32      A9              nil             $2,32   386
-test   reg8,imm8               nil     F6              $1r,0           $2,8    8086
-test   mem8x,imm               nil     F6              $1,0            $2,8    8086
-test   mem,imm8x               nil     F6              $1,0            $2,8    8086
-test   reg16,imm16             16      F7              $1r,0           $2,16   8086
-test   mem16x,imm              16      F7              $1,0            $2,16   8086
-test   mem,imm16x              16      F7              $1,0            $2,16   8086
-test   reg32,imm32             32      F7              $1r,0           $2,32   386
-test   mem32x,imm              32      F7              $1,0            $2,32   386
-test   mem,imm32x              32      F7              $1,0            $2,32   386
-; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1
-test   reg8,reg8               nil     84              $1r,$2          nil     8086
-test   reg16,reg16             16      85              $1r,$2          nil     8086
-test   reg32,reg32             32      85              $1r,$2          nil     386
-test   mem,reg8                nil     84              $1,$2           nil     8086
-test   mem8x,reg8              nil     84              $1,$2           nil     8086
-test   mem,reg16               16      85              $1,$2           nil     8086
-test   mem16x,reg16            16      85              $1,$2           nil     8086
-test   mem,reg32               32      85              $1,$2           nil     386
-test   mem32x,reg32            32      85              $1,$2           nil     386
-test   reg8,mem8               nil     84              $2,$1           nil     8086
-test   reg16,mem16             16      85              $2,$1           nil     8086
-test   reg32,mem32             32      85              $2,$1           nil     386
-and!arith              20,4
-or!arith               08,1
-xor!arith              30,6
-adc!arith              10,2
-neg!groupf6            3
-not!groupf6            2
-aaa!onebyte            nil,37          8086
-aas!onebyte            nil,3F          8086
-daa!onebyte            nil,27          8086
-das!onebyte            nil,2F          8086
-aad    nil                     nil     D5,0A           nil             nil     8086
-aad    imm8                    nil     D5              nil             $1,8    8086
-aam    nil                     nil     D4,0A           nil             nil     8086
-aam    imm8                    nil     D4              nil             $1,8    8086
-;
-; Conversion instructions
-;
-cbw!onebyte            10,98           8086
-cwde!onebyte           20,98           386
-cwd!onebyte            10,99           8086
-cdq!onebyte            20,99           386
-;
-; Multiplication and division
-;
-mul!groupf6            4
-imul   rm8x                    nil     F6              $1,5            nil     8086
-imul   rm16x                   16      F7              $1,5            nil     8086
-imul   rm32x                   32      F7              $1,5            nil     386
-imul   reg16,rm16              16      0F,AF           $2,$1           nil     386
-imul   reg32,rm32              32      0F,AF           $2,$1           nil     386
-imul   reg16,rm16,imm8x        16      6B              $2,$1           $3,8s   186
-imul   reg32,rm32,imm8x        32      6B              $2,$1           $3,8s   386
-imul   reg16,imm8x             16      6B              $1r,$1          $2,8s   186
-imul   reg32,imm8x             32      6B              $1r,$1          $2,8s   386
-imul   reg16,rm16,imm16        16      69              $2,$1           $3,16s  186
-imul   reg32,rm32,imm32        32      69              $2,$1           $3,32s  386
-imul   reg16,imm16             16      69              $1r,$1          $2,16s  186
-imul   reg32,imm32             32      69              $1r,$1          $2,32s  386
-div!groupf6            6
-idiv!groupf6           7
-;
-; Shifts
-;
-;  Standard
-!shift rm8x,ONE                nil     D0              $1,$0.1         nil     8086
-!shift rm8x,reg_cl             nil     D2              $1,$0.1         nil     8086
-!shift rm8x,imm8               nil     C0              $1,$0.1         $2,8    186
-!shift rm16x,ONE               16      D1              $1,$0.1         nil     8086
-!shift rm16x,reg_cl            16      D3              $1,$0.1         nil     8086
-!shift rm16x,imm8              16      C1              $1,$0.1         $2,8    186
-!shift rm32x,ONE               32      D1              $1,$0.1         nil     386
-!shift rm32x,reg_cl            32      D3              $1,$0.1         nil     386
-!shift rm32x,imm8              32      C1              $1,$0.1         $2,8    386
-;  Doubleword
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,imm8        16      0F,$0.1         $1r,$2          $3,8    386
-!shlrd mem,reg16,imm8          16      0F,$0.1         $1,$2           $3,8    386
-!shlrd mem16x,reg16,imm8       16      0F,$0.1         $1,$2           $3,8    386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,reg_cl      16      0F,$0.1+1       $1r,$2          nil     386
-!shlrd mem,reg16,reg_cl        16      0F,$0.1+1       $1,$2           nil     386
-!shlrd mem16x,reg16,reg_cl     16      0F,$0.1+1       $1,$2           nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,imm8        32      0F,$0.1         $1r,$2          $3,8    386
-!shlrd mem,reg32,imm8          32      0F,$0.1         $1,$2           $3,8    386
-!shlrd mem32x,reg32,imm8       32      0F,$0.1         $1,$2           $3,8    386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,reg_cl      32      0F,$0.1+1       $1r,$2          nil     386
-!shlrd mem,reg32,reg_cl        32      0F,$0.1+1       $1,$2           nil     386
-!shlrd mem32x,reg32,reg_cl     32      0F,$0.1+1       $1,$2           nil     386
-rol!shift              0
-ror!shift              1
-rcl!shift              2
-rcr!shift              3
-sal!shift              4
-shl!shift              4
-shr!shift              5
-sar!shift              7
-shld!shlrd             A4
-shrd!shlrd             AC
-;
-; Control transfer instructions (unconditional)
-;
-; Special format for relative targets:
-; !Grp/Inst    target          AdSize  ShrtOp  NearOp          ShrtCPU NearCPU
-;
-!jmpcall       target          nil     $0.1?$0.2       $0.3    8086    8086
-!jmpcall       imm:imm         nil     $0.4            $2i,nil         $1,16   8086
-!jmpcall       WORD imm:imm    16      $0.4            $2i,16          $1,16   8086
-!jmpcall       DWORD imm:imm   32      $0.4            $2i,32          $1,16   386
-!jmpcall       memfar          nil     FF              $1,$0.4+1       nil     8086
-!jmpcall       WORD memfar     16      FF              $1,$0.4+1       nil     8086
-!jmpcall       DWORD memfar    32      FF              $1,$0.4+1       nil     386
-!jmpcall       mem             nil     FF              $1,$0.4         nil     8086
-!jmpcall       rm16x           16      FF              $1,$0.4         nil     8086
-!jmpcall       rm32x           32      FF              $1,$0.4         nil     386
-call!jmpcall           nil,0,E8,9A,2
-jmp!jmpcall            1,EB,E9,EA,4
-ret!onebyte            nil,C3          8086
-retn   nil                     nil     C3              nil             nil     8086
-retf   nil                     nil     CB              nil             nil     8086
-retn   imm16                   nil     C2              nil             $1,16   8086
-retf   imm16                   nil     CA              nil             $1,16   8086
-enter  imm16,imm8              nil     C8              $1i,16          $2,8    186
-leave!onebyte          nil,C9          186
-;
-; Conditional jumps
-;
-!jcc           target          nil     70+$0.1 0F,80+$0.1      8086    386
-jo!jcc                 0
-jno!jcc                        1
-jb!jcc                 2
-jc!jcc                 2
-jnae!jcc               2
-jnb!jcc                        3
-jnc!jcc                        3
-jae!jcc                        3
-je!jcc                 4
-jz!jcc                 4
-jne!jcc                        5
-jnz!jcc                        5
-jbe!jcc                        6
-jna!jcc                        6
-jnbe!jcc               7
-ja!jcc                 7
-js!jcc                 8
-jns!jcc                        9
-jp!jcc                 A
-jpe!jcc                        A
-jnp!jcc                        B
-jpo!jcc                        B
-jl!jcc                 C
-jnge!jcc               C
-jnl!jcc                        D
-jge!jcc                        D
-jle!jcc                        E
-jng!jcc                        E
-jnle!jcc               F
-jg!jcc                 F
-jcxz           target          16      E3      nil             8086    8086
-jecxz          target          32      E3      nil             386     386
-;
-; Loop instructions
-;
-!loopg         target          nil     E0+$0.1 nil             8086    8086
-!loopg         target,reg_cx   16      E0+$0.1 nil             8086    8086
-!loopg         target,reg_ecx  32      E0+$0.1 nil             386     386
-loop!loopg             2
-loopz!loopg            1
-loope!loopg            1
-loopnz!loopg           0
-loopne!loopg           0
-;
-; Set byte on flag instructions
-;
-!setcc rm8                     nil     0F,90+$0.1      $1,2            nil     386
-seto!setcc             0
-setno!setcc            1
-setb!setcc             2
-setc!setcc             2
-setnae!setcc           2
-setnb!setcc            3
-setnc!setcc            3
-setae!setcc            3
-sete!setcc             4
-setz!setcc             4
-setne!setcc            5
-setnz!setcc            5
-setbe!setcc            6
-setna!setcc            6
-setnbe!setcc           7
-seta!setcc             7
-sets!setcc             8
-setns!setcc            9
-setp!setcc             A
-setpe!setcc            A
-setnp!setcc            B
-setpo!setcc            B
-setl!setcc             C
-setnge!setcc           C
-setnl!setcc            D
-setge!setcc            D
-setle!setcc            E
-setng!setcc            E
-setnle!setcc           F
-setg!setcc             F
-;
-; String instructions
-;
-;  NOTE: cmpsd,movsd can't go to !onebyte group because of other variations
-cmpsb!onebyte          nil,A6          8086
-cmpsw!onebyte          10,A7           8086
-cmpsd  nil                     32      A7              nil             nil     386
-insb!onebyte           nil,6C          8086
-insw!onebyte           10,6D           8086
-insd!onebyte           20,6D           386
-outsb!onebyte          nil,6E          8086
-outsw!onebyte          10,6F           8086
-outsd!onebyte          20,6F           386
-lodsb!onebyte          nil,AC          8086
-lodsw!onebyte          10,AD           8086
-lodsd!onebyte          20,AD           386
-movsb!onebyte          nil,A4          8086
-movsw!onebyte          10,A5           8086
-movsd  nil                     32      A5              nil             nil     386
-scasb!onebyte          nil,AE          8086
-scasw!onebyte          10,AF           8086
-scasd!onebyte          20,AF           386
-stosb!onebyte          nil,AA          8086
-stosw!onebyte          10,AB           8086
-stosd!onebyte          20,AB           386
-xlat!onebyte           nil,D7          8086
-xlatb!onebyte          nil,D7          8086
-;
-; Bit manipulation
-;
-;  Bit tests
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest       reg16,reg16     16      0F,$0.1         $1r,$2          nil     386
-!bittest       mem,reg16       16      0F,$0.1         $1,$2           nil     386
-!bittest       mem16x,reg16    16      0F,$0.1         $1,$2           nil     386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest       reg32,reg32     32      0F,$0.1         $1r,$2          nil     386
-!bittest       mem,reg32       32      0F,$0.1         $1,$2           nil     386
-!bittest       mem32x,reg32    32      0F,$0.1         $1,$2           nil     386
-!bittest       reg16,imm8      16      0F,BA           $1r,$0.2        $2,8    386
-!bittest       mem16x,imm8     16      0F,BA           $1,$0.2         $2,8    386
-!bittest       reg32,imm8      32      0F,BA           $1r,$0.2        $2,8    386
-!bittest       mem32x,imm8     32      0F,BA           $1,$0.2         $2,8    386
-;  Bit scans
-!bsfr  reg16,rm16              16      0F,BC+$0.1      $2,$1           nil     386
-!bsfr  reg32,rm32              32      0F,BC+$0.1      $2,$1           nil     386
-bsf!bsfr               0
-bsr!bsfr               1
-bt!bittest             A3,4
-btc!bittest            BB,7
-btr!bittest            B3,6
-bts!bittest            AB,5
-;
-; Interrupts and operating system instructions
-;
-int    imm8                    nil     CD              nil             $1,8    8086
-int3!onebyte           nil,CC          8086
-int03!onebyte          nil,CC          8086
-into!onebyte           nil,CE          8086
-iret!onebyte           nil,CF          8086
-iretw!onebyte          10,CF           8086
-iretd!onebyte          20,CF           386
-rsm!twobyte            0F,AA           P5,SMM
-bound  reg16,mem16             16      62              $2,$1           nil     186
-bound  reg32,mem32             32      62              $2,$1           nil     386
-hlt!onebyte            nil,F4          8086,PRIV
-nop!onebyte            nil,90          8086
-;
-; Protection control
-;
-;  286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW):
-!prot286       rm16            nil     0F,00           $1,$0.1         nil     286,PROT,@0
-arpl   rm16,reg16              nil     63              $1,$2           nil     286,PROT
-lar    reg16,rm16              16      0F,02           $2,$1           nil     286,PROT
-lar    reg32,rm32              32      0F,02           $2,$1           nil     386,PROT
-lgdt!twobytemem                0F,01,2         286,PRIV
-lidt!twobytemem                0F,01,3         286,PRIV
-lldt!prot286           2               PRIV
-lmsw   rm16                    nil     0F,01           $1,6            nil     286,PRIV
-lsl    reg16,rm16              16      0F,03           $2,$1           nil     286,PROT
-lsl    reg32,rm32              32      0F,03           $2,$1           nil     286,PROT
-ltr!prot286            3               PRIV
-sgdt!twobytemem                0F,01,0         286
-sidt!twobytemem                0F,01,1         286
-sldt   mem1632                 nil     0F,00           $1,0            nil     286
-sldt   reg16                   16      0F,00           $1r,0           nil     286
-sldt   reg32                   32      0F,00           $1r,0           nil     386
-smsw   mem1632                 nil     0F,01           $1,4            nil     286
-smsw   reg16                   16      0F,01           $1r,4           nil     286
-smsw   reg32                   32      0F,01           $1r,4           nil     386
-str!prot286            1
-verr!prot286           4
-verw!prot286           5
-;
-; Floating point instructions
-;
-;  Load
-fld    mem32x                  nil     D9              $1,0            nil     8086,FPU
-fld    mem64x                  nil     DD              $1,0            nil     8086,FPU
-fld    mem80x                  nil     DB              $1,5            nil     8086,FPU
-fld    fpureg                  nil     D9,C0+$1        nil             nil     8086,FPU
-fild   mem16x                  nil     DF              $1,0            nil     8086,FPU
-fild   mem32x                  nil     DB              $1,0            nil     8086,FPU
-fild   mem64x                  nil     DF              $1,5            nil     8086,FPU
-fbld   mem80                   nil     DF              $1,4            nil     8086,FPU
-;  Store
-fst    mem32x                  nil     D9              $1,2            nil     8086,FPU
-fst    mem64x                  nil     DD              $1,2            nil     8086,FPU
-fst    fpureg                  nil     DD,D0+$1        nil             nil     8086,FPU
-fist   mem16x                  nil     DF              $1,2            nil     8086,FPU
-fist   mem32x                  nil     DB              $1,2            nil     8086,FPU
-;  Store (with pop)
-fstp   mem32x                  nil     D9              $1,3            nil     8086,FPU
-fstp   mem64x                  nil     DD              $1,3            nil     8086,FPU
-fstp   mem80x                  nil     DB              $1,7            nil     8086,FPU
-fstp   fpureg                  nil     DD,D8+$1        nil             nil     8086,FPU
-fistp  mem16x                  nil     DF              $1,3            nil     8086,FPU
-fistp  mem32x                  nil     DB              $1,3            nil     8086,FPU
-fistp  mem64x                  nil     DF              $1,7            nil     8086,FPU
-fbstp  mem80                   nil     DF              $1,6            nil     8086,FPU
-;  Exchange (with ST0)
-fxch   fpureg                  nil     D9,C8+$1        nil             nil     8086,FPU
-fxch   ST0,ST0                 nil     D9,C8           nil             nil     8086,FPU
-fxch   ST0,FPUREG_NOTST0       nil     D9,C8+$2        nil             nil     8086,FPU
-fxch   FPUREG_NOTST0,ST0       nil     D9,C8+$1        nil             nil     8086,FPU
-fxch   nil                     nil     D9,C9           nil             nil     8086,FPU
-;  Comparisons
-!fcomg mem32x                  nil     D8              $1,$0.1         nil     8086,FPU
-!fcomg mem64x                  nil     DC              $1,$0.1         nil     8086,FPU
-!fcomg fpureg                  nil     D8,$0.2+$1      nil             nil     8086,FPU
-!fcomg ST0,fpureg              nil     D8,$0.2+$2      nil             nil     8086,FPU
-;  Extended comparisons
-!fcomg2        fpureg                  nil     $0.1,$0.2+$1    nil             nil     @0,FPU
-!fcomg2        ST0,fpureg              nil     $0.1,$0.2+$2    nil             nil     @0,FPU
-;  Comparison (without pop)
-fcom!fcomg             2,D0
-ficom  mem16x                  nil     DE              $1,2            nil     8086,FPU
-ficom  mem32x                  nil     DA              $1,2            nil     8086,FPU
-;  Comparison (with pop)
-fcomp!fcomg            3,D8
-ficomp mem16x                  nil     DE              $1,3            nil     8086,FPU
-ficomp mem32x                  nil     DA              $1,3            nil     8086,FPU
-fcompp!twobyte         DE,D9           8086,FPU
-;  Unordered comparison (with pop)
-fucom!fcomg2           DD,E0           286,FPU
-fucomp!fcomg2          DD,E8           286,FPU
-fucompp!twobyte                DA,E9           286,FPU
-ftst!twobyte           D9,E4           8086,FPU
-fxam!twobyte           D9,E5           8086,FPU
-;  Load constants into ST0
-fld1!twobyte           D9,E8           8086,FPU
-fldl2t!twobyte         D9,E9           8086,FPU
-fldl2e!twobyte         D9,EA           8086,FPU
-fldpi!twobyte          D9,EB           8086,FPU
-fldlg2!twobyte         D9,EC           8086,FPU
-fldln2!twobyte         D9,ED           8086,FPU
-fldz!twobyte           D9,EE           8086,FPU
-;  Arithmetic
-!farith        mem32x                  nil     D8              $1,$0.1         nil     8086,FPU
-!farith        mem64x                  nil     DC              $1,$0.1         nil     8086,FPU
-!farith        fpureg                  nil     D8,$0.2+$1      nil             nil     8086,FPU
-!farith        ST0,ST0                 nil     D8,$0.2         nil             nil     8086,FPU
-!farith        ST0,FPUREG_NOTST0       nil     D8,$0.2+$2      nil             nil     8086,FPU
-!farith        TO fpureg               nil     DC,$0.3+$1      nil             nil     8086,FPU
-!farith        FPUREG_NOTST0,ST0       nil     DC,$0.3+$1      nil             nil     8086,FPU
-!farithp       fpureg          nil     DE,$0.1+$1      nil             nil     8086,FPU
-!farithp       fpureg,ST0      nil     DE,$0.1+$1      nil             nil     8086,FPU
-!fiarith       mem32x          nil     DA              $1,$0.1         nil     8086,FPU
-!fiarith       mem16x          nil     DE              $1,$0.1         nil     8086,FPU
-fadd!farith            0,C0,C0
-faddp!farithp          C0
-fiadd!fiarith          0
-fsub!farith            4,E0,E8
-fisub!fiarith          4
-fsubp!farithp          E8
-fsubr!farith           5,E8,E0
-fisubr!fiarith         5
-fsubrp!farithp         E0
-;  Multiply
-fmul!farith            1,C8,C8
-fimul!fiarith          1
-fmulp!farithp          C8
-;  Divide
-fdiv!farith            6,F0,F8
-fidiv!fiarith          6
-fdivp!farithp          F8
-fdivr!farith           7,F8,F0
-fidivr!fiarith         7
-fdivrp!farithp         F0
-;  Other arithmetic
-f2xm1!twobyte          D9,F0           8086,FPU
-fyl2x!twobyte          D9,F1           8086,FPU
-fptan!twobyte          D9,F2           8086,FPU
-fpatan!twobyte         D9,F3           8086,FPU
-fxtract!twobyte                D9,F4           8086,FPU
-fprem1!twobyte         D9,F5           286,FPU
-fdecstp!twobyte                D9,F6           8086,FPU
-fincstp!twobyte                D9,F7           8086,FPU
-fprem!twobyte          D9,F8           8086,FPU
-fyl2xp1!twobyte                D9,F9           8086,FPU
-fsqrt!twobyte          D9,FA           8086,FPU
-fsincos!twobyte                D9,FB           286,FPU
-frndint!twobyte                D9,FC           8086,FPU
-fscale!twobyte         D9,FD           8086,FPU
-fsin!twobyte           D9,FE           286,FPU
-fcos!twobyte           D9,FF           286,FPU
-fchs!twobyte           D9,E0           8086,FPU
-fabs!twobyte           D9,E1           8086,FPU
-;  Processor control
-fninit!twobyte         DB,E3           8086,FPU
-finit!threebyte                9B,DB,E3        8086,FPU
-fldcw  mem16                   nil     D9              $1,5            nil     8086,FPU
-fnstcw mem16                   nil     D9              $1,7            nil     8086,FPU
-fstcw  mem16                   nil     9B,D9           $1,7            nil     8086,FPU
-fnstsw mem16                   nil     DD              $1,7            nil     8086,FPU
-fnstsw reg_ax                  nil     DF,E0           nil             nil     8086,FPU
-fstsw  mem16                   nil     9B,DD           $1,7            nil     8086,FPU
-fstsw  reg_ax                  nil     9B,DF,E0        nil             nil     8086,FPU
-fnclex!twobyte         DB,E2           8086,FPU
-fclex!threebyte                9B,DB,E2        8086,FPU
-fnstenv!onebytemem     D9,6            8086,FPU
-fstenv!twobytemem      9B,D9,6         8086,FPU
-fldenv!onebytemem      D9,4            8086,FPU
-fnsave!onebytemem      DD,6            8086,FPU
-fsave!twobytemem       9B,DD,6         8086,FPU
-frstor!onebytemem      DD,4            8086,FPU
-ffree  fpureg                  nil     DD,C0+$1        nil             nil     8086,FPU
-ffreep fpureg                  nil     DF,C0+$1        nil             nil     P6,FPU,UNDOC
-fnop!twobyte           D9,D0           8086,FPU
-fwait!onebyte          nil,9B          8086,FPU
-;
-; Prefixes (should the others be here too? should wait be a prefix?)
-;
-wait!onebyte           nil,9B          8086
-;
-; 486 extensions
-;
-;  Compare & exchange, exchange & add
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg8,reg8       nil     0F,$0.1         $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg8        nil     0F,$0.1         $1,$2           nil     @0
-!cmpxchgxadd   mem8x,reg8      nil     0F,$0.1         $1,$2           nil     @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg16,reg16     16      0F,$0.1+1       $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg16       16      0F,$0.1+1       $1,$2           nil     @0
-!cmpxchgxadd   mem16x,reg16    16      0F,$0.1+1       $1,$2           nil     @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd   reg32,reg32     32      0F,$0.1+1       $1r,$2          nil     @0
-!cmpxchgxadd   mem,reg32       32      0F,$0.1+1       $1,$2           nil     @0
-!cmpxchgxadd   mem32x,reg32    32      0F,$0.1+1       $1,$2           nil     @0
-bswap  reg32                   32      0F,C8+$1        nil             nil     486
-xadd!cmpxchgxadd       C0              486
-cmpxchg!cmpxchgxadd    B0              486
-cmpxchg486!cmpxchgxadd A6              486,UNDOC
-invd!twobyte           0F,08           486,PRIV
-wbinvd!twobyte         0F,09           486,PRIV
-invlpg!twobytemem      0F,01,7         486,PRIV
-;
-; 586 and late 486 extensions
-;
-cpuid!twobyte          0F,A2           486
-;
-; Pentium extensions
-;
-wrmsr!twobyte          0F,30           P5,PRIV
-rdtsc!twobyte          0F,31           P5
-rdmsr!twobyte          0F,32           P5,PRIV
-cmpxchg8b      mem64           nil     0F,C7           $1,1            nil     P5
-;
-; Pentium II/Pentium Pro extensions
-;
-sysenter!twobyte       0F,34           P6
-sysexit!twobyte                0F,35           P6,PRIV
-fxsave!twobytemem      0F,AE,0         P6,FPU
-fxrstor!twobytemem     0F,AE,1         P6,FPU
-rdpmc!twobyte          0F,33           P6
-ud2!twobyte            0F,0B           286
-ud1!twobyte            0F,B9           286,UNDOC
-; cmov
-; fcmov
-fcomi!fcomg2           DB,F0           P6
-fucomi!fcomg2          DB,E8           P6
-fcomip!fcomg2          DF,F0           P6
-fucomip!fcomg2         DF,E8           P6
-;
-; Pentium4 extensions
-;
-movnti mem32,reg32             nil     0F,C3           $1,$2           nil     P4
-clflush        mem8                    nil     0F,AE           $1,7            nil     KATMAI
-lfence!threebyte       0F,AE,E8        KATMAI
-mfence!threebyte       0F,AE,F0        KATMAI
-pause!twobyte          F3,90           P4
-;
-; MMX/SSE2 instructions
-;
-;  General
-!mmxsse        MMXREG,rm64             nil     0F,$0.1         $2,$1           nil     @0,MMX
-!mmxsse        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     @1
-;  Shifts
-!pshift        MMXREG,rm64             nil     0F,$0.1         $2,$1           nil     P5,MMX
-!pshift        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     P4,SSE2
-!pshift        MMXREG,imm8             nil     0F,$0.2         $1r,$0.3        $2,8    P5,MMX
-!pshift        XMMREG,imm8             nil     66,0F,$0.2      $1r,$0.3        $2,8    P4,SSE2
-emms!twobyte           0F,77           P5,MMX
-movd   MMXREG,rm32             nil     0F,6E           $2,$1           nil     P5,MMX
-movd   rm32,MMXREG             nil     0F,7E           $1,$2           nil     P5,MMX
-movd   XMMREG,rm32             nil     66,0F,6E        $2,$1           nil     P4,SSE2
-movd   rm32,XMMREG             nil     66,0F,7E        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq   MMXREG,MMXREG           nil     0F,6F           $2r,$1          nil     P5,MMX
-movq   MMXREG,mem64            nil     0F,6F           $2,$1           nil     P5,MMX
-movq   mem64,MMXREG            nil     0F,7F           $1,$2           nil     P5,MMX
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq   XMMREG,XMMREG           nil     F3,0F,7E        $2r,$1          nil     P4,SSE2
-movq   XMMREG,mem64            nil     F3,0F,7E        $2,$1           nil     P4,SSE2
-movq   mem64,XMMREG            nil     66,0F,D6        $1,$2           nil     P4,SSE2
-packssdw!mmxsse                6B              P5              P4,SSE2
-packsswb!mmxsse                63              P5              P4,SSE2
-packuswb!mmxsse                67              P5              P4,SSE2
-paddb!mmxsse           FC              P5              P4,SSE2
-paddw!mmxsse           FD              P5              P4,SSE2
-paddd!mmxsse           FE              P5              P4,SSE2
-paddq!mmxsse           D4              P5              P4,SSE2
-paddsb!mmxsse          EC              P5              P4,SSE2
-paddsw!mmxsse          ED              P5              P4,SSE2
-paddusb!mmxsse         DC              P5              P4,SSE2
-paddusw!mmxsse         DD              P5              P4,SSE2
-pand!mmxsse            DB              P5              P4,SSE2
-pandn!mmxsse           DF              P5              P4,SSE2
-pcmpeqb!mmxsse         74              P5              P4,SSE2
-pcmpeqw!mmxsse         75              P5              P4,SSE2
-pcmpeqd!mmxsse         76              P5              P4,SSE2
-pcmpgtb!mmxsse         64              P5              P4,SSE2
-pcmpgtw!mmxsse         65              P5              P4,SSE2
-pcmpgtd!mmxsse         66              P5              P4,SSE2
-pmaddwd!mmxsse         F5              P5              P4,SSE2
-pmulhw!mmxsse          E5              P5              P4,SSE2
-pmullw!mmxsse          D5              P5              P4,SSE2
-por!mmxsse             EB              P5              P4,SSE2
-psllw!pshift           F1,71,6
-pslld!pshift           F2,72,6
-psllq!pshift           F3,73,6
-psraw!pshift           E1,71,4
-psrad!pshift           E2,72,4
-psrlw!pshift           D1,71,2
-psrld!pshift           D2,72,2
-psrlq!pshift           D3,73,2
-psubb  MMXREG,imm8             nil     0F,F8           $1r,2           $2,8    P5,MMX
-psubb  XMMREG,imm8             nil     66,0F,F8        $1r,2           $2,8    P4,SSE2
-psubw  MMXREG,imm8             nil     0F,F9           $1r,2           $2,8    P5,MMX
-psubw  XMMREG,imm8             nil     66,0F,F9        $1r,2           $2,8    P4,SSE2
-psubd!mmxsse           FA              P5              P4,SSE2
-psubq!mmxsse           FB              P5              P4,SSE2
-psubsb!mmxsse          E8              P5              P4,SSE2
-psubsw!mmxsse          E9              P5              P4,SSE2
-psubusb!mmxsse         D8              P5              P4,SSE2
-psubusw!mmxsse         D9              P5              P4,SSE2
-punpckhbw!mmxsse       68              P5              P4,SSE2
-punpckhwd!mmxsse       69              P5              P4,SSE2
-punpckhdq!mmxsse       6A              P5              P4,SSE2
-punpcklbw!mmxsse       60              P5              P4,SSE2
-punpcklwd!mmxsse       61              P5              P4,SSE2
-punpckldq!mmxsse       62              P5              P4,SSE2
-pxor!mmxsse            EF              P5              P4,SSE2
-;
-; PIII (Katmai) new instructions / SIMD instructions
-;
-;  Standard
-!sseps XMMREG,rm128            nil     0F,$0.1         $2,$1           nil     @0
-!ssess XMMREG,rm128            nil     F3,0F,$0.1      $2,$1           nil     @0
-;  With immediate
-!ssepsimm      XMMREG,rm128,imm8       nil     0F,$0.1 $2,$1           $3,8    KATMAI,SSE
-;  Comparisons
-!ssecmpps      XMMREG,rm128    nil     0F,C2           $2,$1           $0.1,8  KATMAI,SSE
-!ssecmpss      XMMREG,rm128    nil     F3,0F,C2        $2,$1           $0.1,8  KATMAI,SSE
-addps!sseps            58              KATMAI,SSE
-addss!ssess            58              KATMAI,SSE
-andnps!sseps           55              KATMAI,SSE
-andps!sseps            54              KATMAI,SSE
-cmpeqps!ssecmpps       0
-cmpeqss!ssecmpss       0
-cmpleps!ssecmpps       2
-cmpless!ssecmpss       2
-cmpltps!ssecmpps       1
-cmpltss!ssecmpss       1
-cmpneqps!ssecmpps      4
-cmpneqss!ssecmpss      4
-cmpnleps!ssecmpps      6
-cmpnless!ssecmpss      6
-cmpnltps!ssecmpps      5
-cmpnltss!ssecmpss      5
-cmpordps!ssecmpps      7
-cmpordss!ssecmpss      7
-cmpunordps!ssecmpps    3
-cmpunordss!ssecmpss    3
-cmpps!ssepsimm         C2
-cmpss  XMMREG,rm128,imm8       nil     F3,0F,C2        $2,$1           $3,8    KATMAI,SSE
-comiss!sseps           2F              KATMAI,SSE
-cvtpi2ps!sseps         2A              KATMAI,SSE
-cvtps2pi!sseps         2D              KATMAI,SSE
-cvtsi2ss!ssess         2A              KATMAI,SSE
-cvtss2si!ssess         2D              KATMAI,SSE
-cvttps2pi!sseps                2C              KATMAI,SSE
-cvttss2si!ssess                2C              KATMAI,SSE
-divps!sseps            5E              KATMAI,SSE
-divss!ssess            5E              KATMAI,SSE
-ldmxcsr        mem32                   nil     0F,AE           $1,2            nil     KATMAI,SSE
-maskmovq       MMXREG,MMXREG   nil     0F,F7           $2r,$1          nil     KATMAI,MMX
-maxps!sseps            5F              KATMAI,SSE
-maxss!ssess            5F              KATMAI,SSE
-minps!sseps            5D              KATMAI,SSE
-minss!ssess            5D              KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movaps XMMREG,XMMREG           nil     0F,28           $2r,$1          nil     KATMAI,SSE
-movaps XMMREG,mem128           nil     0F,28           $2,$1           nil     KATMAI,SSE
-movaps mem128,XMMREG           nil     0F,29           $1,$2           nil     KATMAI,SSE
-movhlps        XMMREG,XMMREG           nil     0F,12           $2r,$1          nil     KATMAI,SSE
-movhps XMMREG,mem64            nil     0F,16           $2,$1           nil     KATMAI,SSE
-movhps mem64,XMMREG            nil     0F,17           $1,$2           nil     KATMAI,SSE
-movlhps        XMMREG,XMMREG           nil     0F,16           $2r,$1          nil     KATMAI,SSE
-movlps XMMREG,mem64            nil     0F,12           $2,$1           nil     KATMAI,SSE
-movlps mem64,XMMREG            nil     0F,13           $1,$2           nil     KATMAI,SSE
-movmskps       reg32,XMMREG    nil     0F,50           $1r,$2          nil     KATMAI,SSE
-movntps        mem128,XMMREG           nil     0F,2B           $1,$2           nil     KATMAI,SSE
-movntq mem64,MMXREG            nil     0F,E7           $1,$2           nil     KATMAI,MMX
-movntdq        mem128,XMMREG           nil     66,0F,E7        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movss  XMMREG,XMMREG           nil     F3,0F,10        $2r,$1          nil     KATMAI,SSE
-movss  XMMREG,mem64            nil     F3,0F,10        $2,$1           nil     KATMAI,SSE
-movss  mem64,XMMREG            nil     F3,0F,11        $1,$2           nil     KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movups XMMREG,XMMREG           nil     0F,10           $2r,$1          nil     KATMAI,SSE
-movups XMMREG,mem64            nil     0F,10           $2,$1           nil     KATMAI,SSE
-movups mem64,XMMREG            nil     0F,11           $1,$2           nil     KATMAI,SSE
-mulps!sseps            59              KATMAI,SSE
-mulss!ssess            59              KATMAI,SSE
-orps!sseps             56              KATMAI,SSE
-pavgb!mmxsse           E0              KATMAI          P4,SSE2
-pavgw!mmxsse           E3              KATMAI          P4,SSE2
-pextrw reg32,MMXREG,imm8       nil     0F,C5           $1r,$2          $3,8    KATMAI,MMX
-pextrw reg32,XMMREG,imm8       nil     66,0F,C5        $1r,$2          $3,8    P4,SSE2
-pinsrw MMXREG,reg32,imm8       nil     0F,C4           $2r,$1          $3,8    KATMAI,MMX
-pinsrw MMXREG,rm16,imm8        nil     0F,C4           $2,$1           $3,8    KATMAI,MMX
-pinsrw XMMREG,reg32,imm8       nil     66,0F,C4        $2r,$1          $3,8    P4,SSE2
-pinsrw XMMREG,rm16,imm8        nil     66,0F,C4        $2,$1           $3,8    P4,SSE2
-pmaxsw!mmxsse          EE              KATMAI          P4,SSE2
-pmaxub!mmxsse          DE              KATMAI          P4,SSE2
-pminsw!mmxsse          EA              KATMAI          P4,SSE2
-pminub!mmxsse          DA              KATMAI          P4,SSE2
-pmovmskb       reg32,MMXREG    nil     0F,D7           $1r,$2          nil     KATMAI,SSE
-pmovmskb       reg32,XMMREG    nil     66,0F,D7        $1r,$2          nil     P4,SSE2
-pmulhuw!mmxsse         E4              KATMAI          P4,SSE2
-prefetchnta!twobytemem 0F,18,0         KATMAI
-prefetcht0!twobytemem  0F,18,1         KATMAI
-prefetcht1!twobytemem  0F,18,2         KATMAI
-prefetcht2!twobytemem  0F,18,3         KATMAI
-psadbw!mmxsse          F6              KATMAI          KATMAI,SSE
-pshufw MMXREG,rm64,imm8        nil     0F,70           $2,$1           $3,8    KATMAI,MMX
-rcpps!sseps            53              KATMAI,SSE
-rcpss!ssess            53              KATMAI,SSE
-rsqrtps!sseps          52              KATMAI,SSE
-rsqrtss!ssess          52              KATMAI,SSE
-sfence!threebyte       0F,AE,F8        KATMAI
-shufps!ssepsimm                C6
-sqrtps!sseps           51              KATMAI,SSE
-sqrtss!ssess           51              KATMAI,SSE
-stmxcsr        mem32                   nil     0F,AE           $1,3            nil     KATMAI,SSE
-subps!sseps            5C              KATMAI,SSE
-subss!ssess            5C              KATMAI,SSE
-ucomiss!ssess          2E              KATMAI,SSE
-unpckhps!sseps         15              KATMAI,SSE
-unpcklps!sseps         14              KATMAI,SSE
-xorps!sseps            57              KATMAI,SSE
-;
-; SSE2 instructions
-;
-;  Standard
-!sse2pd        XMMREG,rm128            nil     66,0F,$0.1      $2,$1           nil     P4,SSE2
-!sse2sd        XMMREG,rm128            nil     F2,0F,$0.1      $2,$1           nil     P4,SSE2
-;  With immediate
-!sse2pdimm     XMMREG,rm128,imm8       nil     66,0F,$0.1      $2,$1   $3,8    P4,SSE2
-;  Comparisons
-!sse2cmppd     XMMREG,rm128    nil     66,0F,C2        $2,$1           $0.1,8  P4,SSE2
-!sse2cmpsd     XMMREG,rm128    nil     F2,0F,C2        $2,$1           $0.1,8  P4,SSE2
-addpd!sse2pd           58
-addsd!sse2sd           58
-andnpd!sse2pd          55
-andpd!sse2pd           54
-cmpeqpd!sse2cmppd      0
-cmpeqsd!sse2cmpsd      0
-cmplepd!sse2cmppd      2
-cmplesd!sse2cmpsd      2
-cmpltpd!sse2cmppd      1
-cmpltsd!sse2cmpsd      1
-cmpneqpd!sse2cmppd     4
-cmpneqsd!sse2cmpsd     4
-cmpnlepd!sse2cmppd     6
-cmpnlesd!sse2cmpsd     6
-cmpnltpd!sse2cmppd     5
-cmpnltsd!sse2cmpsd     5
-cmpordpd!sse2cmppd     7
-cmpordsd!sse2cmpsd     7
-cmpunordpd!sse2cmppd   3
-cmpunordsd!sse2cmpsd   3
-cmppd!sse2pdimm                C2
-cmpsd  XMMREG,rm128,imm8       nil     F2,0F,C2        $2,$1           $3,8    P4,SSE2
-comisd!sse2pd          2F
-cvtpi2pd!sse2pd                2A
-cvtsi2sd!sse2sd                2A
-divpd!sse2pd           5E
-divsd!sse2sd           5E
-maxpd!sse2pd           5F
-maxsd!sse2sd           5F
-minpd!sse2pd           5D
-minsd!sse2sd           5D
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movapd XMMREG,XMMREG           nil     66,0F,28        $2r,$1          nil     P4,SSE2
-movapd XMMREG,mem128           nil     66,0F,28        $2,$1           nil     P4,SSE2
-movapd mem128,XMMREG           nil     66,0F,29        $1,$2           nil     P4,SSE2
-movhpd XMMREG,mem64            nil     66,0F,16        $2,$1           nil     P4,SSE2
-movhpd mem64,XMMREG            nil     66,0F,17        $1,$2           nil     P4,SSE2
-movlpd XMMREG,mem64            nil     66,0F,12        $2,$1           nil     P4,SSE2
-movlpd mem64,XMMREG            nil     66,0F,13        $1,$2           nil     P4,SSE2
-movmskpd       reg32,XMMREG    nil     66,0F,50        $1r,$2          nil     P4,SSE2
-movntpd        mem128,XMMREG           nil     66,0F,2B        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movsd  XMMREG,XMMREG           nil     F2,0F,10        $2r,$1          nil     P4,SSE2
-movsd  XMMREG,mem64            nil     F2,0F,10        $2,$1           nil     P4,SSE2
-movsd  mem64,XMMREG            nil     F2,0F,11        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movupd XMMREG,XMMREG           nil     66,0F,10        $2r,$1          nil     P4,SSE2
-movupd XMMREG,mem64            nil     66,0F,10        $2,$1           nil     P4,SSE2
-movupd mem64,XMMREG            nil     66,0F,11        $1,$2           nil     P4,SSE2
-mulpd!sse2pd           59
-mulsd!sse2sd           59
-orpd!sse2pd            56
-shufpd!sse2pdimm       C6
-sqrtpd!sse2pd          51
-sqrtsd!sse2sd          51
-subpd!sse2pd           5C
-subsd!sse2sd           5C
-ucomisd!sse2sd         2E
-unpckhpd!sse2pd                15
-unpcklpd!sse2pd                14
-xorpd!sse2pd           57
-cvtdq2pd!ssess         E6              P4,SSE2
-cvtpd2dq!sse2sd                E6
-cvtdq2ps!sseps         5B              P4,SSE2
-cvtpd2pi!sse2pd                2D
-cvtpd2ps!sse2pd                5A
-cvtps2pd!sseps         5A              P4,SSE2
-cvtps2dq!sse2pd                5B
-cvtsd2si!sse2sd                2D
-cvtsd2ss!sse2sd                5A
-cvtss2sd!ssess         5A              P4,SSE2
-cvttpd2pi!sse2pd       2C
-cvttsd2si!sse2sd       2C
-cvttpd2dq!sse2pd       E6
-cvttps2dq!ssess                5B              P4,SSE2
-maskmovdqu     XMMREG,XMMREG   nil     66,0F,F7        $2r,$1          nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqa XMMREG,XMMREG           nil     66,0F,6F        $2r,$1          nil     P4,SSE2
-movdqa XMMREG,mem128           nil     66,0F,6F        $2,$1           nil     P4,SSE2
-movdqa mem128,XMMREG           nil     66,0F,7F        $1,$2           nil     P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqu XMMREG,XMMREG           nil     F3,0F,6F        $2r,$1          nil     P4,SSE2
-movdqu XMMREG,mem128           nil     F3,0F,6F        $2,$1           nil     P4,SSE2
-movdqu mem128,XMMREG           nil     F3,0F,7F        $1,$2           nil     P4,SSE2
-movdq2q        MMXREG,XMMREG           nil     F2,0F,D6        $2r,$1          nil     P4,SSE2
-movq2dq        XMMREG,MMXREG           nil     F3,0F,D6        $2r,$1          nil     P4,SSE2
-pmuludq!mmxsse         F4              P4              P4,SSE2
-pshufd!sse2pdimm       70
-pshufhw        XMMREG,rm128,imm8       nil     F3,0F,70        $2,$1           $3,8    P4,SSE2
-pshuflw        XMMREG,rm128,imm8       nil     F2,0F,70        $2,$1           $3,8    P4,SSE2
-pslldq XMMREG,imm8             nil     66,0F,73        $1r,7           $2,8    P4,SSE2
-psrldq XMMREG,imm8             nil     66,0F,73        $1r,3           $2,8    P4,SSE2
-punpckhqdq!sse2pd      6D
-punpcklqdq!sse2pd      6C
-;
-; AMD 3DNow! instructions
-;
-!now3d MMXREG,rm64             nil     0F,0F           $2,$1           $0.1,8  @0,3DNOW,AMD
-prefetch!twobytemem    0F,0D,0         P5,3DNOW,AMD
-prefetchw!twobytemem   0F,0D,1         P5,3DNOW,AMD
-femms!twobyte          0F,0E           P5,3DNOW,AMD
-pavgusb!now3d          BF              P5
-pf2id!now3d            1D              P5
-pf2iw!now3d            1C              ATHLON
-pfacc!now3d            AE              P5
-pfadd!now3d            9E              P5
-pfcmpeq!now3d          B0              P5
-pfcmpge!now3d          90              P5
-pfcmpgt!now3d          A0              P5
-pfmax!now3d            A4              P5
-pfmin!now3d            94              P5
-pfmul!now3d            B4              P5
-pfnacc!now3d           8A              ATHLON
-pfpnacc!now3d          8E              ATHLON
-pfrcp!now3d            96              P5
-pfrcpit1!now3d         A6              P5
-pfrcpit2!now3d         B6              P5
-pfrsqit1!now3d         A7              P5
-pfrsqrt!now3d          97              P5
-pfsub!now3d            9A              P5
-pfsubr!now3d           AA              P5
-pi2fd!now3d            0D              P5
-pi2fw!now3d            0C              ATHLON
-pmulhrwa!now3d         B7              P5
-pswapd!now3d           BB              ATHLON
-;
-; AMD extensions
-;
-syscall!twobyte                0F,05           P6,AMD
-sysret!twobyte         0F,07           P6,PRIV,AMD
-; swapgs
-;
-; Cyrix MMX instructions
-;
-!cyrixmmx      MMXREG,rm64     nil     0F,$0.1         $2,$1           nil     P5,MMX,CYRIX
-paddsiw!cyrixmmx       51
-paveb!cyrixmmx         50
-pdistib!cyrixmmx       54
-pmachriw       MMXREG,mem64    nil     0F,5E           $2,$1           nil     P5,MMX,CYRIX
-pmagw!cyrixmmx         52
-pmulhriw!cyrixmmx      5D
-pmulhrwc!cyrixmmx      59
-pmvgezb!cyrixmmx       5C
-pmvlzb!cyrixmmx                5B
-pmvnzb!cyrixmmx                5A
-pmvzb!cyrixmmx         58
-psubsiw!cyrixmmx       55
-;
-; Cyrix extensions
-;
-!cyrixsmm      mem80           nil     0F,$0.1         $1,0            nil     486,CYRIX,SMM
-rdshr!twobyte          0F,36           P6,CYRIX,SMM
-rsdc   segreg,mem80            nil     0F,79           $2,$1           nil     486,CYRIX,SMM
-rsldt!cyrixsmm         7B
-rsts!cyrixsmm          7D
-svdc   mem80,segreg            nil     0F,78           $1,$2           nil     486,CYRIX,SMM
-svldt!cyrixsmm         7A
-svts!cyrixsmm          7C
-smint!twobyte          0F,38           P6,CYRIX
-smintold!twobyte       0F,7E           486,CYRIX,OBS
-wrshr!twobyte          0F,37           P6,CYRIX,SMM
-;
-; Obsolete/Undocumented Instructions
-;
-fsetpm!twobyte         DB,E4           286,FPU,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts   reg16,reg16             16      0F,A7           $1r,$2          nil     386,UNDOC,OBS
-ibts   mem,reg16               16      0F,A7           $1,$2           nil     386,UNDOC,OBS
-ibts   mem16x,reg16            16      0F,A7           $1,$2           nil     386,UNDOC,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts   reg32,reg32             32      0F,A7           $1r,$2          nil     386,UNDOC,OBS
-ibts   mem,reg32               32      0F,A7           $1,$2           nil     386,UNDOC,OBS
-ibts   mem32x,reg32            32      0F,A7           $1,$2           nil     386,UNDOC,OBS
-loadall!twobyte                0F,07           386,UNDOC
-loadall286!twobyte     0F,05           286,UNDOC
-;pop   reg_cs                  nil     0F              nil             nil     8086,UNDOC,OBS
-salc!onebyte           nil,D6          8086,UNDOC
-smi!onebyte            nil,F1          386,UNDOC
-; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11).
-umov   reg8,reg8               nil     0F,10           $1r,$2          nil     386,UNDOC
-umov   reg16,reg16             16      0F,11           $1r,$2          nil     386,UNDOC
-umov   reg32,reg32             32      0F,11           $1r,$2          nil     386,UNDOC
-umov   mem,reg8                nil     0F,10           $1,$2           nil     386,UNDOC
-umov   mem8x,reg8              nil     0F,10           $1,$2           nil     386,UNDOC
-umov   mem,reg16               16      0F,11           $1,$2           nil     386,UNDOC
-umov   mem16x,reg16            16      0F,11           $1,$2           nil     386,UNDOC
-umov   mem,reg32               32      0F,11           $1,$2           nil     386,UNDOC
-umov   mem32x,reg32            32      0F,11           $1,$2           nil     386,UNDOC
-umov   reg8,mem8               nil     0F,12           $2,$1           nil     386,UNDOC
-umov   reg16,mem16             16      0F,13           $2,$1           nil     386,UNDOC
-umov   reg32,mem32             32      0F,13           $2,$1           nil     386,UNDOC
-xbts   reg16,mem16             16      0F,A6           $2,$1           nil     386,UNDOC,OBS
-xbts   reg32,mem32             32      0F,A6           $2,$1           nil     386,UNDOC,OBS
diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h
deleted file mode 100644 (file)
index 86811b6..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-/* $IdPath$
- * x86 internals header file
- *
- *  Copyright (C) 2001  Peter Johnson
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef YASM_X86_INT_H
-#define YASM_X86_INT_H
-
-typedef struct x86_effaddr_data {
-    unsigned char segment;     /* segment override, 0 if none */
-
-    /* How the spare (register) bits in Mod/RM are handled:
-     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
-     * They're set in bytecode_new_insn().
-     */
-    unsigned char modrm;
-    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
-    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
-
-    unsigned char sib;
-    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
-    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
-                                  0xff if unknown */
-} x86_effaddr_data;
-
-typedef struct x86_insn {
-    /*@null@*/ effaddr *ea;    /* effective address */
-
-    /*@null@*/ immval *imm;    /* immediate or relative value */
-
-    unsigned char opcode[3];   /* opcode */
-    unsigned char opcode_len;
-
-    unsigned char addrsize;    /* 0 or =mode_bits => no override */
-    unsigned char opersize;    /* 0 indicates no override */
-    unsigned char lockrep_pre; /* 0 indicates no prefix */
-
-    /* HACK, but a space-saving one: shift opcodes have an immediate
-     * form and a ,1 form (with no immediate).  In the parser, we
-     * set this and opcode_len=1, but store the ,1 version in the
-     * second byte of the opcode array.  We then choose between the
-     * two versions once we know the actual value of imm (because we
-     * don't know it in the parser module).
-     *
-     * A override to force the imm version should just leave this at
-     * 0.  Then later code won't know the ,1 version even exists.
-     * TODO: Figure out how this affects CPU flags processing.
-     *
-     * Call x86_SetInsnShiftFlag() to set this flag to 1.
-     */
-    unsigned char shift_op;
-
-    /* HACK, similar to that for shift_op above, for optimizing instructions
-     * that take a sign-extended imm8 as well as imm values (eg, the arith
-     * instructions and a subset of the imul instructions).
-     */
-    unsigned char signext_imm8_op;
-
-    unsigned char mode_bits;
-} x86_insn;
-
-typedef struct x86_jmprel {
-    expr *target;              /* target location */
-
-    struct {
-       unsigned char opcode[3];
-       unsigned char opcode_len;   /* 0 = no opc for this version */
-    } shortop, nearop;
-
-    /* which opcode are we using? */
-    /* The *FORCED forms are specified in the source as such */
-    x86_jmprel_opcode_sel op_sel;
-
-    unsigned char addrsize;    /* 0 or =mode_bits => no override */
-    unsigned char opersize;    /* 0 indicates no override */
-    unsigned char lockrep_pre; /* 0 indicates no prefix */
-
-    unsigned char mode_bits;
-} x86_jmprel;
-
-void x86_bc_delete(bytecode *bc);
-void x86_bc_print(FILE *f, const bytecode *bc);
-bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
-                               resolve_label_func resolve_label);
-int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
-                  void *d, output_expr_func output_expr);
-
-int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
-                    unsigned char nosplit, unsigned char *displen,
-                    unsigned char *modrm, unsigned char *v_modrm,
-                    unsigned char *n_modrm, unsigned char *sib,
-                    unsigned char *v_sib, unsigned char *n_sib);
-
-#endif
index 755e8bed5ea2420c950ce254c062049a31c2959a..c43feb116aa869f7efb3dfa674b9bef2ac96730f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * x86 architecture description
  *
- *  Copyright (C) 2001  Peter Johnson
+ *  Copyright (C) 2002  Peter Johnson
  *
  *  This file is part of YASM.
  *
 #include "util.h"
 /*@unused@*/ RCSID("$IdPath$");
 
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
 #include "bytecode.h"
+
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 
 unsigned char x86_mode_bits = 0;
 
+int
+x86_directive(const char *name, valparamhead *valparams,
+             /*@unused@*/ /*@null@*/ valparamhead *objext_valparams,
+             /*@unused@*/ sectionhead *headp)
+{
+    valparam *vp;
+    const intnum *intn;
+    long lval;
+
+    if (strcasecmp(name, "bits") == 0) {
+       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
+           (intn = expr_get_intnum(&vp->param)) != NULL &&
+           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
+           x86_mode_bits = (unsigned char)lval;
+       else
+           Error(_("invalid argument to [%s]"), "BITS");
+       return 0;
+    } else
+       return 1;
+}
+
+unsigned int
+x86_get_reg_size(unsigned long reg)
+{
+    switch ((x86_expritem_reg_size)(reg & ~7)) {
+       case X86_REG8:
+           return 1;
+       case X86_REG16:
+           return 2;
+       case X86_REG32:
+       case X86_CRREG:
+       case X86_DRREG:
+       case X86_TRREG:
+           return 4;
+       case X86_MMXREG:
+           return 8;
+       case X86_XMMREG:
+           return 16;
+       case X86_FPUREG:
+           return 10;
+       default:
+           InternalError(_("unknown register size"));
+    }
+    return 0;
+}
+
+void
+x86_reg_print(FILE *f, unsigned long reg)
+{
+    static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"};
+    static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+
+    switch ((x86_expritem_reg_size)(reg&~7)) {
+       case X86_REG8:
+           fprintf(f, "%s", name8[reg&7]);
+           break;
+       case X86_REG16:
+           fprintf(f, "%s", name1632[reg&7]);
+           break;
+       case X86_REG32:
+           fprintf(f, "e%s", name1632[reg&7]);
+           break;
+       case X86_MMXREG:
+           fprintf(f, "mm%d", (int)(reg&7));
+           break;
+       case X86_XMMREG:
+           fprintf(f, "xmm%d", (int)(reg&7));
+           break;
+       case X86_CRREG:
+           fprintf(f, "cr%d", (int)(reg&7));
+           break;
+       case X86_DRREG:
+           fprintf(f, "dr%d", (int)(reg&7));
+           break;
+       case X86_TRREG:
+           fprintf(f, "tr%d", (int)(reg&7));
+           break;
+       case X86_FPUREG:
+           fprintf(f, "st%d", (int)(reg&7));
+           break;
+       default:
+           InternalError(_("unknown register size"));
+    }
+}
+
+void
+x86_segreg_print(FILE *f, unsigned long segreg)
+{
+    static const char *name[] = {"es","cs","ss","ds","fs","gs"};
+    fprintf(f, "%s", name[segreg&7]);
+}
+
+void
+x86_handle_prefix(bytecode *bc, const unsigned long data[4])
+{
+    switch((x86_parse_insn_prefix)data[0]) {
+       case X86_LOCKREP:
+           x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]);
+           break;
+       case X86_ADDRSIZE:
+           x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]);
+           break;
+       case X86_OPERSIZE:
+           x86_bc_insn_opersize_override(bc, (unsigned char)data[1]);
+           break;
+    }
+}
+
+void
+x86_handle_seg_prefix(bytecode *bc, unsigned long segreg)
+{
+    x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8));
+}
+
+void
+x86_handle_seg_override(effaddr *ea, unsigned long segreg)
+{
+    x86_ea_set_segment(ea, (unsigned char)(segreg>>8));
+}
+
 /* Define arch structure -- see arch.h for details */
 arch x86_arch = {
     "x86 (IA-32, x86-64)",
     "x86",
+    {
+       x86_switch_cpu,
+       x86_check_identifier,
+       x86_directive,
+       x86_new_insn,
+       x86_handle_prefix,
+       x86_handle_seg_prefix,
+       x86_handle_seg_override,
+       x86_ea_new_expr
+    },
     {
        X86_BYTECODE_TYPE_MAX,
        x86_bc_delete,
        x86_bc_print,
        x86_bc_resolve,
        x86_bc_tobytes
-    }
+    },
+    x86_get_reg_size,
+    x86_reg_print,
+    x86_segreg_print,
+    NULL,      /* x86_ea_data_delete */
+    x86_ea_data_print
 };
index 336201b8e9928b9f6e0a24c6e985c36fcae83e28..c44c0ddca0ca1d73f0f3e109cc3051e0b6a87c56 100644 (file)
@@ -28,6 +28,31 @@ typedef enum {
 } x86_bytecode_type;
 #define X86_BYTECODE_TYPE_MAX  X86_BC_JMPREL+1
 
+/* 0-7 (low 3 bits) used for register number, stored in same data area */
+typedef enum {
+    X86_REG8 = 0x8,
+    X86_REG16 = 0x10,
+    X86_REG32 = 0x20,
+    X86_MMXREG = 0x40,
+    X86_XMMREG = 0x80,
+    X86_CRREG = 0xC0,
+    X86_DRREG = 0xC8,
+    X86_TRREG = 0xF0,
+    X86_FPUREG = 0xF8
+} x86_expritem_reg_size;
+
+typedef enum {
+    X86_LOCKREP = 1,
+    X86_ADDRSIZE,
+    X86_OPERSIZE
+} x86_parse_insn_prefix;
+
+typedef enum {
+    X86_NEAR,
+    X86_SHORT,
+    X86_FAR
+} x86_parse_targetmod;
+
 typedef enum {
     JR_NONE,
     JR_SHORT,
@@ -44,7 +69,7 @@ typedef struct x86_targetval {
 
 void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment);
 effaddr *x86_ea_new_reg(unsigned char reg);
-effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len);
 effaddr *x86_ea_new_expr(/*@keep@*/ expr *e);
 
 /*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc);
@@ -63,7 +88,7 @@ void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel,
  */
 typedef struct x86_new_insn_data {
     /*@keep@*/ /*@null@*/ effaddr *ea;
-    /*@keep@*/ /*@null@*/ immval *imm;
+    /*@keep@*/ /*@null@*/ expr *imm;
     unsigned char opersize;
     unsigned char op_len;
     unsigned char op[3];
@@ -90,4 +115,116 @@ bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d);
 
 extern unsigned char x86_mode_bits;
 
+typedef struct x86_effaddr_data {
+    unsigned char segment;     /* segment override, 0 if none */
+
+    /* How the spare (register) bits in Mod/RM are handled:
+     * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+     * They're set in bytecode_new_insn().
+     */
+    unsigned char modrm;
+    unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+    unsigned char need_modrm;  /* 1 if Mod/RM byte needed, 0 if not */
+
+    unsigned char sib;
+    unsigned char valid_sib;   /* 1 if SIB byte currently valid, 0 if not */
+    unsigned char need_sib;    /* 1 if SIB byte needed, 0 if not,
+                                  0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+    /*@null@*/ effaddr *ea;    /* effective address */
+
+    /*@null@*/ immval *imm;    /* immediate or relative value */
+
+    unsigned char opcode[3];   /* opcode */
+    unsigned char opcode_len;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    /* HACK, but a space-saving one: shift opcodes have an immediate
+     * form and a ,1 form (with no immediate).  In the parser, we
+     * set this and opcode_len=1, but store the ,1 version in the
+     * second byte of the opcode array.  We then choose between the
+     * two versions once we know the actual value of imm (because we
+     * don't know it in the parser module).
+     *
+     * A override to force the imm version should just leave this at
+     * 0.  Then later code won't know the ,1 version even exists.
+     * TODO: Figure out how this affects CPU flags processing.
+     *
+     * Call x86_SetInsnShiftFlag() to set this flag to 1.
+     */
+    unsigned char shift_op;
+
+    /* HACK, similar to that for shift_op above, for optimizing instructions
+     * that take a sign-extended imm8 as well as imm values (eg, the arith
+     * instructions and a subset of the imul instructions).
+     */
+    unsigned char signext_imm8_op;
+
+    unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+    expr *target;              /* target location */
+
+    struct {
+       unsigned char opcode[3];
+       unsigned char opcode_len;   /* 0 = no opc for this version */
+    } shortop, nearop;
+
+    /* which opcode are we using? */
+    /* The *FORCED forms are specified in the source as such */
+    x86_jmprel_opcode_sel op_sel;
+
+    unsigned char addrsize;    /* 0 or =mode_bits => no override */
+    unsigned char opersize;    /* 0 indicates no override */
+    unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+    unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(FILE *f, const bytecode *bc);
+bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
+                               resolve_label_func resolve_label);
+int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
+                  void *d, output_expr_func output_expr);
+
+int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
+                    unsigned char nosplit, unsigned char *displen,
+                    unsigned char *modrm, unsigned char *v_modrm,
+                    unsigned char *n_modrm, unsigned char *sib,
+                    unsigned char *v_sib, unsigned char *n_sib);
+
+void x86_switch_cpu(const char *cpuid);
+
+arch_check_id_retval x86_check_identifier(unsigned long data[2],
+                                         const char *id);
+
+int x86_directive(const char *name, valparamhead *valparams,
+                 /*@null@*/ valparamhead *objext_valparams,
+                 sectionhead *headp);
+
+/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2],
+                                 int num_operands,
+                                 /*@null@*/ insn_operandhead *operands);
+
+void x86_handle_prefix(bytecode *bc, const unsigned long data[4]);
+
+void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg);
+
+void x86_handle_seg_override(effaddr *ea, unsigned long segreg);
+
+unsigned int x86_get_reg_size(unsigned long reg);
+
+void x86_reg_print(FILE *f, unsigned long reg);
+
+void x86_segreg_print(FILE *f, unsigned long segreg);
+
+void x86_ea_data_print(FILE *f, const effaddr *ea);
+
 #endif
index 8cc4d4b45c2aed3ae017653608a20507f309070e..4393a0c85ed13c178ec8ee83146eaac06fb73858 100644 (file)
@@ -32,7 +32,7 @@
 #include "bytecode.h"
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 #include "bc-int.h"
 
@@ -54,11 +54,12 @@ x86_bc_new_insn(x86_new_insn_data *d)
        ead->modrm |= (d->spare << 3) & 0x38;   /* plug in provided bits */
     }
 
-    insn->imm = d->imm;
     if (d->imm) {
+       insn->imm = imm_new_expr(d->imm);
        insn->imm->len = d->im_len;
        insn->imm->sign = d->im_sign;
-    }
+    } else
+       insn->imm = NULL;
 
     insn->opcode[0] = d->op[0];
     insn->opcode[1] = d->op[1];
@@ -173,12 +174,12 @@ x86_ea_new_expr(expr *e)
 
 /*@-compmempass@*/
 effaddr *
-x86_ea_new_imm(immval *imm, unsigned char im_len)
+x86_ea_new_imm(expr *imm, unsigned char im_len)
 {
     effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
     x86_effaddr_data *ead = ea_get_data(ea);
 
-    ea->disp = imm->val;
+    ea->disp = imm;
     ea->len = im_len;
     ea->nosplit = 0;
     ead->segment = 0;
@@ -320,10 +321,8 @@ x86_bc_delete(bytecode *bc)
     switch ((x86_bytecode_type)bc->type) {
        case X86_BC_INSN:
            insn = bc_get_data(bc);
-           if (insn->ea) {
-               expr_delete(insn->ea->disp);
-               xfree(insn->ea);
-           }
+           if (insn->ea)
+               ea_delete(insn->ea);
            if (insn->imm) {
                expr_delete(insn->imm->val);
                xfree(insn->imm);
@@ -336,40 +335,38 @@ x86_bc_delete(bytecode *bc)
     }
 }
 
+void
+x86_ea_data_print(FILE *f, const effaddr *ea)
+{
+    const x86_effaddr_data *ead = ea_get_const_data(ea);
+    fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "",
+           (unsigned int)ead->segment);
+    fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "",
+           (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm,
+           (unsigned int)ead->need_modrm);
+    fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "",
+           (unsigned int)ead->sib, (unsigned int)ead->valid_sib,
+           (unsigned int)ead->need_sib);
+}
+
 void
 x86_bc_print(FILE *f, const bytecode *bc)
 {
     const x86_insn *insn;
     const x86_jmprel *jmprel;
-    x86_effaddr_data *ead;
 
     switch ((x86_bytecode_type)bc->type) {
        case X86_BC_INSN:
            insn = bc_get_const_data(bc);
            fprintf(f, "%*s_Instruction_\n", indent_level, "");
            fprintf(f, "%*sEffective Address:", indent_level, "");
-           if (!insn->ea)
-               fprintf(f, " (nil)\n");
-           else {
-               indent_level++;
-               fprintf(f, "\n%*sDisp=", indent_level, "");
-               expr_print(f, insn->ea->disp);
+           if (insn->ea) {
                fprintf(f, "\n");
-               ead = ea_get_data(insn->ea);
-               fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n",
-                       indent_level, "", (unsigned int)insn->ea->len,
-                       (unsigned int)ead->segment,
-                       (unsigned int)insn->ea->nosplit);
-               fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n",
-                       indent_level, "", (unsigned int)ead->modrm,
-                       (unsigned int)ead->valid_modrm,
-                       (unsigned int)ead->need_modrm);
-               fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n",
-                       indent_level, "", (unsigned int)ead->sib,
-                       (unsigned int)ead->valid_sib,
-                       (unsigned int)ead->need_sib);
+               indent_level++;
+               ea_print(f, insn->ea);
                indent_level--;
-           }
+           } else
+               fprintf(f, " (nil)\n");
            fprintf(f, "%*sImmediate Value:", indent_level, "");
            if (!insn->imm)
                fprintf(f, " (nil)\n");
@@ -477,8 +474,7 @@ x86_bc_resolve_insn(x86_insn *insn, unsigned long *len, int save,
        x86_effaddr_data ead_t = *ead;  /* structure copy */
        unsigned char displen = ea->len;
 
-       if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
-                          (!ead->valid_modrm && ead->need_modrm))) {
+       if (ea->disp) {
            temp = expr_copy(ea->disp);
            assert(temp != NULL);
 
index d041cc42e4a74f8263eb3c8f14f88ae14ec321eb..a30f14d34b2e10d11ed3e3ec6e20abb7019f5cb1 100644 (file)
@@ -33,7 +33,7 @@
 #include "bytecode.h"
 #include "arch.h"
 
-#include "x86-int.h"
+#include "x86arch.h"
 
 #include "expr-int.h"
 
@@ -48,10 +48,10 @@ x86_expr_checkea_get_reg32(ExprItem *ei, /*returned*/ void *d)
     int *ret;
 
     /* don't allow 16-bit registers */
-    if (ei->data.reg.size != 32)
+    if ((ei->data.reg & ~7) != X86_REG32)
        return 0;
 
-    ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */
+    ret = &data[ei->data.reg & 7];
 
     /* overwrite with 0 to eliminate register from displacement expr */
     ei->type = EXPR_INT;
@@ -84,10 +84,11 @@ x86_expr_checkea_get_reg16(ExprItem *ei, void *d)
     reg16[7] = &data->di;
 
     /* don't allow 32-bit registers */
-    if (ei->data.reg.size != 16)
+    if ((ei->data.reg & ~7) != X86_REG16)
        return 0;
 
-    ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
+    /* & 7 for sanity check */
+    ret = reg16[ei->data.reg & 7];
 
     /* only allow BX, SI, DI, BP */
     if (!ret)
@@ -469,7 +470,7 @@ x86_expr_checkea_getregsize_callback(ExprItem *ei, void *d)
     unsigned char *addrsize = (unsigned char *)d;
 
     if (ei->type == EXPR_REG) {
-       *addrsize = ei->data.reg.size;
+       *addrsize = (unsigned char)ei->data.reg & ~7;
        return 1;
     } else
        return 0;
@@ -757,6 +758,12 @@ x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
        return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE,
                                        havereg == HAVE_BP, displen, modrm,
                                        v_modrm);
+    } else if (!*n_modrm && !*n_sib) {
+       /* Special case for MOV MemOffs opcode: displacement but no modrm. */
+       if (*addrsize == 32)
+           *displen = 4;
+       else if (*addrsize == 16)
+           *displen = 2;
     }
     return 1;
 }
diff --git a/src/arch/x86/x86id.re b/src/arch/x86/x86id.re
new file mode 100644 (file)
index 0000000..46ab2db
--- /dev/null
@@ -0,0 +1,1282 @@
+/*
+ * x86 identifier recognition and instruction handling
+ *
+ *  Copyright (C) 2002  Peter Johnson
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+#include "src/arch/x86/x86arch.h"
+
+#include "expr-int.h"
+#include "bc-int.h"
+
+
+/* Available CPU feature flags */
+#define CPU_Any            (0)         /* Any old cpu will do */
+#define CPU_086            CPU_Any
+#define CPU_186            (1<<0)      /* i186 or better required */
+#define CPU_286            (1<<1)      /* i286 or better required */
+#define CPU_386            (1<<2)      /* i386 or better required */
+#define CPU_486            (1<<3)      /* i486 or better required */
+#define CPU_586            (1<<4)      /* i585 or better required */
+#define CPU_686            (1<<5)      /* i686 or better required */
+#define CPU_P3     (1<<6)      /* Pentium3 or better required */
+#define CPU_P4     (1<<7)      /* Pentium4 or better required */
+#define CPU_IA64    (1<<8)     /* IA-64 or better required */
+#define CPU_K6     (1<<9)      /* AMD K6 or better required */
+#define CPU_Athlon  (1<<10)    /* AMD Athlon or better required */
+#define CPU_Hammer  (1<<11)    /* AMD Sledgehammer or better required */
+#define CPU_FPU            (1<<12)     /* FPU support required */
+#define CPU_MMX            (1<<13)     /* MMX support required */
+#define CPU_SSE            (1<<14)     /* Streaming SIMD extensions required */
+#define CPU_SSE2    (1<<15)    /* Streaming SIMD extensions 2 required */
+#define CPU_3DNow   (1<<16)    /* 3DNow! support required */
+#define CPU_Cyrix   (1<<17)    /* Cyrix-specific instruction */
+#define CPU_AMD            (1<<18)     /* AMD-specific inst. (older than K6) */
+#define CPU_SMM            (1<<19)     /* System Management Mode instruction */
+#define CPU_Prot    (1<<20)    /* Protected mode only instruction */
+#define CPU_Undoc   (1<<21)    /* Undocumented instruction */
+#define CPU_Obs            (1<<22)     /* Obsolete instruction */
+#define CPU_Priv    (1<<23)    /* Priveleged instruction */
+
+/* What instructions/features are enabled?  Defaults to all. */
+static unsigned long cpu_enabled = ~CPU_Any;
+
+/* Opcode modifiers.  The opcode bytes are in "reverse" order because the
+ * parameters are read from the arch-specific data in LSB->MSB order.
+ * (only for asthetic reasons in the lexer code below, no practical reason).
+ */
+#define MOD_Op2Add  (1<<0)     /* Parameter adds to opcode byte 2 */
+#define MOD_Gap0    (1<<1)     /* Eats a parameter */
+#define MOD_Op1Add  (1<<2)     /* Parameter adds to opcode byte 1 */
+#define MOD_Gap1    (1<<3)     /* Eats a parameter */
+#define MOD_Op0Add  (1<<4)     /* Parameter adds to opcode byte 0 */
+#define MOD_SpAdd   (1<<5)     /* Parameter adds to "spare" value */
+#define MOD_OpSizeR (1<<6)     /* Parameter replaces opersize */
+#define MOD_Imm8    (1<<7)     /* Parameter is included as immediate byte */
+
+/* Operand types.  These are more detailed than the "general" types for all
+ * architectures, as they include the size, for instance.
+ * Bit Breakdown (from LSB to MSB):
+ *  - 4 bits = general type (must be exact match, except for =3):
+ *             0 = immediate
+ *             1 = any general purpose, MMX, XMM, or FPU register
+ *             2 = memory
+ *             3 = any general purpose, MMX, XMM, or FPU register OR memory
+ *             4 = segreg
+ *             5 = any CR register
+ *             6 = any DR register
+ *             7 = any TR register
+ *             8 = ST0
+ *             9 = AL/AX/EAX (depending on size)
+ *             A = CL/CX/ECX (depending on size)
+ *             B = CR4
+ *             C = memory offset (an EA, but with no registers allowed)
+ *                 [special case for MOV opcode]
+ *  - 3 bits = size (user-specified, or from register size):
+ *             0 = any size acceptable
+ *             1/2/3/4 = 8/16/32/64 bits (from user or reg size)
+ *             5/6 = 80/128 bits (from user)
+ *  - 1 bit = size implicit or explicit ("strictness" of size matching on
+ *            non-registers -- registers are always strictly matched):
+ *            0 = user size must exactly match size above.
+ *            1 = user size either unspecified or exactly match size above.
+ *
+ * MSBs than the above are actions: what to do with the operand if the
+ * instruction matches.  Essentially describes what part of the output bytecode
+ * gets the operand.  This may require conversion (e.g. a register going into
+ * an ea field).  Naturally, only one of each of these may be contained in the
+ * operands of a single insn_info structure.
+ *  - 3 bits = action:
+ *             0 = does nothing (operand data is discarded)
+ *             1 = operand data goes into ea field
+ *             2 = operand data goes into imm field
+ *             3 = operand data goes into "spare" field
+ *             4 = operand data is added to opcode byte 0
+ */
+#define OPT_Imm                0x0
+#define OPT_Reg                0x1
+#define OPT_Mem                0x2
+#define OPT_RM         0x3
+#define OPT_SegReg     0x4
+#define OPT_CRReg      0x5
+#define OPT_DRReg      0x6
+#define OPT_TRReg      0x7
+#define OPT_ST0                0x8
+#define OPT_Areg       0x9
+#define OPT_Creg       0xA
+#define OPT_CR4                0xB
+#define OPT_MemOffs    0xC
+#define OPT_MASK       0x000F
+
+#define OPS_Any                (0<<4)
+#define OPS_8          (1<<4)
+#define OPS_16         (2<<4)
+#define OPS_32         (3<<4)
+#define OPS_64         (4<<4)
+#define OPS_80         (5<<4)
+#define OPS_128                (6<<4)
+#define OPS_MASK       0x0070
+#define OPS_SHIFT      4
+
+#define OPS_Relaxed    (1<<7)
+#define OPS_RMASK      0x0080
+
+#define OPA_None       (0<<8)
+#define OPA_EA         (1<<8)
+#define OPA_Imm                (2<<8)
+#define OPA_Spare      (3<<8)
+#define OPA_Op0Add     (4<<8)
+#define OPA_MASK       0x0700
+
+typedef struct x86_insn_info {
+    /* The CPU feature flags needed to execute this instruction.  This is OR'ed
+     * with arch-specific data[2].  This combined value is compared with
+     * cpu_enabled to see if all bits set here are set in cpu_enabled--if so,
+     * the instruction is available on this CPU.
+     */
+    unsigned long cpu;
+
+    /* Opcode modifiers for variations of instruction.  As each modifier reads
+     * its parameter in LSB->MSB order from the arch-specific data[1] from the
+     * lexer data, and the LSB of the arch-specific data[1] is reserved for the
+     * count of insn_info structures in the instruction grouping, there can
+     * only be a maximum of 3 modifiers.
+     */
+    unsigned long modifiers;
+
+    /* Operand Size */
+    unsigned char opersize;
+
+    /* The length of the basic opcode */
+    unsigned char opcode_len;
+
+    /* The basic 1-3 byte opcode */
+    unsigned char opcode[3];
+
+    /* The 3-bit "spare" value (extended opcode) for the R/M byte field */
+    unsigned char spare;
+
+    /* The number of operands this form of the instruction takes */
+    unsigned char num_operands;
+
+    /* The types of each operand, see above */
+    unsigned int operands[3];
+} x86_insn_info;
+
+/* Define lexer arch-specific data with 0-3 modifiers. */
+#define DEF_INSN_DATA(group, mod, cpu) do { \
+    data[0] = (unsigned long)group##_insn; \
+    data[1] = ((mod)<<8) | \
+             ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \
+    data[2] = cpu; \
+    } while (0)
+
+#define RET_INSN(group, mod, cpu)      do { \
+    DEF_INSN_DATA(group, mod, cpu); \
+    return ARCH_CHECK_ID_INSN; \
+    } while (0)
+
+/*
+ * General instruction groupings
+ */
+
+/* One byte opcode instructions with no operands */
+static const x86_insn_info onebyte_insn[] = {
+    { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Two byte opcode instructions with no operands */
+static const x86_insn_info twobyte_insn[] = {
+    { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Three byte opcode instructions with no operands */
+static const x86_insn_info threebyte_insn[] = {
+    { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0,
+      {0, 0, 0} }
+};
+
+/* One byte opcode instructions with general memory operand */
+static const x86_insn_info onebytemem_insn[] = {
+    { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Two byte opcode instructions with general memory operand */
+static const x86_insn_info twobytemem_insn[] = {
+    { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+      {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Move instructions */
+static const x86_insn_info mov_insn[] = {
+    { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2,
+      {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} },
+    { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} },
+    { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2,
+      {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} },
+    { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
+    { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
+    { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
+    { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+    /* TODO: segreg here */
+    { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+    /* Need two sets here, one for strictness on left side, one for right. */
+    { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
+    { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+      {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+      {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+    { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+      {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+      {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2,
+      {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+    { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} }
+};
+
+/* Move with sign/zero extend */
+static const x86_insn_info movszx_insn[] = {
+    { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+    { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
+    { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2,
+      {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} }
+};
+
+
+bytecode *
+x86_new_insn(const unsigned long data[4], int num_operands,
+            insn_operandhead *operands)
+{
+    x86_new_insn_data d;
+    int num_info = (int)(data[1]&0xFF);
+    x86_insn_info *info = (x86_insn_info *)data[0];
+    unsigned long mod_data = data[1] >> 8;
+    int found = 0;
+    insn_operand *op;
+    int i;
+    static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0};
+
+    /* Just do a simple linear search through the info array for a match.
+     * First match wins.
+     */
+    for (; num_info>0 && !found; num_info--, info++) {
+       unsigned long cpu;
+       unsigned int size;
+       int mismatch = 0;
+
+       /* Match CPU */
+       cpu = info->cpu | data[2];
+       if ((cpu_enabled & cpu) != cpu)
+           continue;
+
+       /* Match # of operands */
+       if (num_operands != info->num_operands)
+           continue;
+
+       if (!operands) {
+           found = 1;      /* no operands -> must have a match here. */
+           break;
+       }
+
+       /* Match each operand type and size */
+       for(i = 0, op = ops_first(operands); op && i<info->num_operands &&
+           !mismatch; op = ops_next(op), i++) {
+           /* Check operand type */
+           switch (info->operands[i] & OPT_MASK) {
+               case OPT_Imm:
+                   if (op->type != INSN_OPERAND_IMM)
+                       mismatch = 1;
+                   break;
+               case OPT_Reg:
+                   if (op->type != INSN_OPERAND_REG)
+                       mismatch = 1;
+                   else {
+                       size = op->data.reg & ~7;
+                       if (size == X86_CRREG || size == X86_DRREG ||
+                           size == X86_TRREG)
+                           mismatch = 1;
+                   }
+                   break;
+               case OPT_Mem:
+                   if (op->type != INSN_OPERAND_MEMORY)
+                       mismatch = 1;
+                   break;
+               case OPT_RM:
+                   if (op->type != INSN_OPERAND_REG &&
+                       op->type != INSN_OPERAND_MEMORY)
+                       mismatch = 1;
+                   break;
+               case OPT_SegReg:
+                   if (op->type != INSN_OPERAND_SEGREG)
+                       mismatch = 1;
+                   break;
+               case OPT_CRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_CRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_DRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_DRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_TRReg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       (op->data.reg & ~7) != X86_TRREG)
+                       mismatch = 1;
+                   break;
+               case OPT_ST0:
+                   if (op->type != INSN_OPERAND_REG ||
+                       op->data.reg != X86_FPUREG)
+                       mismatch = 1;
+                   break;
+               case OPT_Areg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       ((info->operands[i] & OPS_MASK) == OPS_8 &&
+                        op->data.reg != (X86_REG8 | 0)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_16 &&
+                        op->data.reg != (X86_REG16 | 0)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_32 &&
+                        op->data.reg != (X86_REG32 | 0)))
+                       mismatch = 1;
+                   break;
+               case OPT_Creg:
+                   if (op->type != INSN_OPERAND_REG ||
+                       ((info->operands[i] & OPS_MASK) == OPS_8 &&
+                        op->data.reg != (X86_REG8 | 1)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_16 &&
+                        op->data.reg != (X86_REG16 | 1)) ||
+                       ((info->operands[i] & OPS_MASK) == OPS_32 &&
+                        op->data.reg != (X86_REG32 | 1)))
+                       mismatch = 1;
+                   break;
+               case OPT_CR4:
+                   if (op->type != INSN_OPERAND_REG ||
+                       op->data.reg != (X86_CRREG | 4))
+                       mismatch = 1;
+                   break;
+               case OPT_MemOffs:
+                   if (op->type != INSN_OPERAND_MEMORY ||
+                       expr_contains(ea_get_disp(op->data.ea), EXPR_REG))
+                       mismatch = 1;
+                   break;
+               default:
+                   InternalError(_("invalid operand type"));
+           }
+
+           if (mismatch)
+               break;
+
+           /* Check operand size */
+           size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT];
+           if (op->type == INSN_OPERAND_REG && op->size == 0) {
+               /* Register size must exactly match */
+               if (x86_get_reg_size(op->data.reg) != size)
+                   mismatch = 1;
+           } else {
+               if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) {
+                   /* Relaxed checking */
+                   if (size != 0 && op->size != size && op->size != 0)
+                       mismatch = 1;
+               } else {
+                   /* Strict checking */
+                   if (op->size != size)
+                       mismatch = 1;
+               }
+           }
+       }
+
+       if (!mismatch) {
+           found = 1;
+           break;
+       }
+    }
+
+    if (!found) {
+       /* Didn't find a matching one */
+       /* FIXME: This needs to be more descriptive of certain reasons for a
+        * mismatch.  E.g.:
+        *  "mismatch in operand sizes"
+        *  "operand size not specified"
+        * etc.  This will probably require adding dummy error catchers in the
+        * insn list which are only looked at if we get here.
+        */
+       Error(_("invalid combination of opcode and operands"));
+       return NULL;
+    }
+
+    /* Copy what we can from info */
+    d.ea = NULL;
+    d.imm = NULL;
+    d.opersize = info->opersize;
+    d.op_len = info->opcode_len;
+    d.op[0] = info->opcode[0];
+    d.op[1] = info->opcode[1];
+    d.op[2] = info->opcode[2];
+    d.spare = info->spare;
+    d.im_len = 0;
+    d.im_sign = 0;
+
+    /* Apply modifiers */
+    if (info->modifiers & MOD_Op2Add) {
+       d.op[2] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Gap0)
+       mod_data >>= 8;
+    if (info->modifiers & MOD_Op1Add) {
+       d.op[1] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Gap1)
+       mod_data >>= 8;
+    if (info->modifiers & MOD_Op0Add) {
+       d.op[0] += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_SpAdd) {
+       d.spare += (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_OpSizeR) {
+       d.opersize = (unsigned char)(mod_data & 0xFF);
+       mod_data >>= 8;
+    }
+    if (info->modifiers & MOD_Imm8) {
+       d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF)));
+       d.im_len = 1;
+       /*mod_data >>= 8;*/
+    }
+
+    /* Go through operands and assign */
+    if (operands) {
+       for(i = 0, op = ops_first(operands); op && i<info->num_operands;
+           op = ops_next(op), i++) {
+           switch (info->operands[i] & OPA_MASK) {
+               case OPA_None:
+                   /* Throw away the operand contents */
+                   switch (op->type) {
+                       case INSN_OPERAND_REG:
+                       case INSN_OPERAND_SEGREG:
+                           break;
+                       case INSN_OPERAND_MEMORY:
+                           ea_delete(op->data.ea);
+                           break;
+                       case INSN_OPERAND_IMM:
+                           expr_delete(op->data.val);
+                           break;
+                   }
+                   break;
+               case OPA_EA:
+                   switch (op->type) {
+                       case INSN_OPERAND_REG:
+                           d.ea = x86_ea_new_reg((unsigned char)op->data.reg);
+                           break;
+                       case INSN_OPERAND_SEGREG:
+                           InternalError(_("invalid operand conversion"));
+                       case INSN_OPERAND_MEMORY:
+                           d.ea = op->data.ea;
+                           if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) {
+                               /* Special-case for MOV MemOffs instruction */
+                               x86_effaddr_data *ead = ea_get_data(d.ea);
+                               ead->valid_modrm = 0;
+                               ead->need_modrm = 0;
+                               ead->valid_sib = 0;
+                               ead->need_sib = 0;
+                           }
+                           break;
+                       case INSN_OPERAND_IMM:
+                           d.ea = x86_ea_new_imm(op->data.val,
+                               size_lookup[(info->operands[i] &
+                                            OPS_MASK)>>OPS_SHIFT]);
+                           break;
+                   }
+                   break;
+               case OPA_Imm:
+                   if (op->type == INSN_OPERAND_IMM) {
+                       d.imm = op->data.val;
+                       d.im_len = size_lookup[(info->operands[i] &
+                                               OPS_MASK)>>OPS_SHIFT];
+                   } else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               case OPA_Spare:
+                   if (op->type == INSN_OPERAND_REG ||
+                       op->type == INSN_OPERAND_SEGREG)
+                       d.spare = (unsigned char)(op->data.reg&7);
+                   else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               case OPA_Op0Add:
+                   if (op->type == INSN_OPERAND_REG)
+                       d.op[0] += (unsigned char)(op->data.reg&7);
+                   else
+                       InternalError(_("invalid operand conversion"));
+                   break;
+               default:
+                   InternalError(_("unknown operand action"));
+           }
+       }
+    }
+
+    /* Create the bytecode and return it */
+    return x86_bc_new_insn(&d);
+}
+
+
+#define YYCTYPE                char
+#define YYCURSOR       id
+#define YYLIMIT                id
+#define YYMARKER       marker
+#define YYFILL(n)
+
+/*!re2c
+  any = [\000-\377];
+  A = [aA];
+  B = [bB];
+  C = [cC];
+  D = [dD];
+  E = [eE];
+  F = [fF];
+  G = [gG];
+  H = [hH];
+  I = [iI];
+  J = [jJ];
+  K = [kK];
+  L = [lL];
+  M = [mM];
+  N = [nN];
+  O = [oO];
+  P = [pP];
+  Q = [qQ];
+  R = [rR];
+  S = [sS];
+  T = [tT];
+  U = [uU];
+  V = [vV];
+  W = [wW];
+  X = [xX];
+  Y = [yY];
+  Z = [zZ];
+*/
+
+void
+x86_switch_cpu(const char *id)
+{
+    const char *marker;
+
+    /*!re2c
+       /* The standard CPU names /set/ cpu_enabled. */
+       "8086" {
+           cpu_enabled = CPU_Priv;
+           return;
+       }
+       ("80" | I)? "186" {
+           cpu_enabled = CPU_186|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "286" {
+           cpu_enabled = CPU_186|CPU_286|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "386" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       ("80" | I)? "486" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM|
+                         CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I? "586") | (P E N T I U M) | (P "5") {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (P "2") | (P E N T I U M "-"? ("2" | (I I))) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot|
+                         CPU_Priv;
+           return;
+       }
+       (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (I A "-"? "64") | (I T A N I U M) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE|
+                         CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       K "6" {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot|
+                         CPU_Priv;
+           return;
+       }
+       A T H L O N {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow|
+                         CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+       (S L E D G E)? (H A M M E R) {
+           cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+                         CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|
+                         CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv;
+           return;
+       }
+
+       /* Features have "no" versions to disable them, and only set/reset the
+        * specific feature being changed.  All other bits are left alone.
+        */
+       F P U           { cpu_enabled |= CPU_FPU; return; }
+       N O F P U       { cpu_enabled &= ~CPU_FPU; return; }
+       M M X           { cpu_enabled |= CPU_MMX; return; }
+       N O M M X       { cpu_enabled &= ~CPU_MMX; return; }
+       S S E           { cpu_enabled |= CPU_SSE; return; }
+       N O S S E       { cpu_enabled &= ~CPU_SSE; return; }
+       S S E "2"       { cpu_enabled |= CPU_SSE2; return; }
+       N O S S E "2"   { cpu_enabled &= ~CPU_SSE2; return; }
+       "3" D N O W     { cpu_enabled |= CPU_3DNow; return; }
+       N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; }
+       C Y R I X       { cpu_enabled |= CPU_Cyrix; return; }
+       N O C Y R I X   { cpu_enabled &= ~CPU_Cyrix; return; }
+       A M D           { cpu_enabled |= CPU_AMD; return; }
+       N O A M D       { cpu_enabled &= ~CPU_AMD; return; }
+       S M M           { cpu_enabled |= CPU_SMM; return; }
+       N O S M M       { cpu_enabled &= ~CPU_SMM; return; }
+       P R O T         { cpu_enabled |= CPU_Prot; return; }
+       N O P R O T     { cpu_enabled &= ~CPU_Prot; return; }
+       U N D O C       { cpu_enabled |= CPU_Undoc; return; }
+       N O U N D O C   { cpu_enabled &= ~CPU_Undoc; return; }
+       O B S           { cpu_enabled |= CPU_Obs; return; }
+       N O O B S       { cpu_enabled &= ~CPU_Obs; return; }
+       P R I V         { cpu_enabled |= CPU_Priv; return; }
+       N O P R I V     { cpu_enabled &= ~CPU_Priv; return; }
+
+       /* catchalls */
+       [A-Za-z0-9]+    {
+           Warning(_("unrecognized CPU identifier `%s'"), id);
+           return;
+       }
+       any             {
+           Warning(_("unrecognized CPU identifier `%s'"), id);
+           return;
+       }
+    */
+}
+
+arch_check_id_retval
+x86_check_identifier(unsigned long data[4], const char *id)
+{
+    const char *oid = id;
+    const char *marker;
+    /*!re2c
+       /* target modifiers */
+       N E A R         {
+           data[0] = X86_NEAR;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+       S H O R T       {
+           data[0] = X86_SHORT;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+       F A R           {
+           data[0] = X86_FAR;
+           return ARCH_CHECK_ID_TARGETMOD;
+       }
+
+       /* operand size overrides */
+       O "16"  {
+           data[0] = X86_OPERSIZE;
+           data[1] = 16;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       O "32"  {
+           data[0] = X86_OPERSIZE;
+           data[1] = 32;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       /* address size overrides */
+       A "16"  {
+           data[0] = X86_ADDRSIZE;
+           data[1] = 16;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       A "32"  {
+           data[0] = X86_ADDRSIZE;
+           data[1] = 32;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+
+       /* instruction prefixes */
+       L O C K         {
+           data[0] = X86_LOCKREP; 
+           data[1] = 0xF0;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P N E       {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF2;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P N Z       {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF2;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P           {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF3;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P E         {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF4;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+       R E P Z         {
+           data[0] = X86_LOCKREP;
+           data[1] = 0xF4;
+           return ARCH_CHECK_ID_PREFIX;
+       }
+
+       /* control, debug, and test registers */
+       C R [02-4]      {
+           data[0] = X86_CRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       D R [0-7]       {
+           data[0] = X86_DRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       T R [0-7]       {
+           data[0] = X86_TRREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+
+       /* floating point, MMX, and SSE/SSE2 registers */
+       S T [0-7]       {
+           data[0] = X86_FPUREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       M M [0-7]       {
+           data[0] = X86_MMXREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+       X M M [0-7]     {
+           data[0] = X86_XMMREG | (oid[2]-'0');
+           return ARCH_CHECK_ID_REG;
+       }
+
+       /* integer registers */
+       E A X   { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; }
+       E C X   { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; }
+       E D X   { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; }
+       E B X   { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; }
+       E S P   { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; }
+       E B P   { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; }
+       E S I   { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; }
+       E D I   { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; }
+
+       A X     { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; }
+       C X     { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; }
+       D X     { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; }
+       B X     { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; }
+       S P     { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; }
+       B P     { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; }
+       S I     { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; }
+       D I     { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; }
+
+       A L     { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; }
+       C L     { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; }
+       D L     { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; }
+       B L     { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; }
+       A H     { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; }
+       C H     { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; }
+       D H     { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; }
+       B H     { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; }
+
+       /* segment registers */
+       E S     { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; }
+       C S     { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; }
+       S S     { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; }
+       D S     { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; }
+       F S     { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; }
+       G S     { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; }
+
+       /* instructions */
+
+       /* Move */
+       M O V { RET_INSN(mov, 0, CPU_Any); }
+       /* Move with sign/zero extend */
+       M O V S X { RET_INSN(movszx, 0xBE, CPU_386); }
+       M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); }
+       /* Push instructions */
+       /* P U S H */
+       P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); }
+       P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); }
+       P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); }
+       /* Pop instructions */
+       /* P O P */
+       P O P A { RET_INSN(onebyte, 0x0061, CPU_186); }
+       P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); }
+       P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); }
+       /* Exchange */
+       /* X C H G */
+       /* In/out from ports */
+       /* I N */
+       /* O U T */
+       /* Load effective address */
+       /* L E A */
+       /* Load segment registers from memory */
+       /* L D S */
+       /* L E S */
+       /* L F S */
+       /* L G S */
+       /* L S S */
+       /* Flags register instructions */
+       C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); }
+       C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); }
+       C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); }
+       C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); }
+       C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); }
+       L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); }
+       S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); }
+       P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); }
+       P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); }
+       P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); }
+       P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); }
+       P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); }
+       P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); }
+       S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); }
+       S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); }
+       S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); }
+       /* Arithmetic */
+       /* A D D */
+       /* I N C */
+       /* S U B */
+       /* D E C */
+       /* S B B */
+       /* C M P */
+       /* T E S T */
+       /* A N D */
+       /* O R */
+       /* X O R */
+       /* A D C */
+       /* N E G */
+       /* N O T */
+       A A A { RET_INSN(onebyte, 0x0037, CPU_Any); }
+       A A S { RET_INSN(onebyte, 0x003F, CPU_Any); }
+       D A A { RET_INSN(onebyte, 0x0027, CPU_Any); }
+       D A S { RET_INSN(onebyte, 0x002F, CPU_Any); }
+       /* A A D */
+       /* A A M */
+       /* Conversion instructions */
+       C B W { RET_INSN(onebyte, 0x1098, CPU_Any); }
+       C W D E { RET_INSN(onebyte, 0x2098, CPU_386); }
+       C W D { RET_INSN(onebyte, 0x1099, CPU_Any); }
+       C D Q { RET_INSN(onebyte, 0x2099, CPU_386); }
+       /* Multiplication and division */
+       /* M U L */
+       /* I M U L */
+       /* D I V */
+       /* I D I V */
+       /* Shifts */
+       /* R O L */
+       /* R O R */
+       /* R C L */
+       /* R C R */
+       /* S A L */
+       /* S H L */
+       /* S H R */
+       /* S A R */
+       /* S H L D */
+       /* S H R D */
+       /* Control transfer instructions (unconditional) */
+       /* C A L L */
+       /* J M P */
+       R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); }
+       /* R E T N */
+       /* R E T F */
+       /* E N T E R */
+       L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); }
+       /* Conditional jumps */
+       /* J O */
+       /* J N O */
+       /* J B */
+       /* JC */
+       /* J N A E */
+       /* J N B */
+       /* J N C */
+       /* J A E */
+       /* J E */
+       /* J Z */
+       /* J N E */
+       /* J N Z */
+       /* J B E */
+       /* J N A */
+       /* J N B E */
+       /* J A */
+       /* J S */
+       /* J N S */
+       /* J P */
+       /* J P E */
+       /* J N P */
+       /* J P O */
+       /* J L */
+       /* J N G E */
+       /* J N L */
+       /* J G E */
+       /* J L E */
+       /* J N G */
+       /* J N L E */
+       /* J G */
+       /* J C X Z */
+       /* J E C X Z */
+       /* Loop instructions */
+       /* L O O P */
+       /* L O O P Z */
+       /* L O O P E */
+       /* L O O P N Z */
+       /* L O O P N E */
+       /* Set byte on flag instructions */
+       /* S E T O */
+       /* S E T N O */
+       /* S E T B */
+       /* S E T C */
+       /* S E T N A E */
+       /* S E T N B */
+       /* S E T N C */
+       /* S E T A E */
+       /* S E T E */
+       /* S E T Z */
+       /* S E T N E */
+       /* S E T N Z */
+       /* S E T B E */
+       /* S E T N A */
+       /* S E T N B E */
+       /* S E T A */
+       /* S E T S */
+       /* S E T N S */
+       /* S E T P */
+       /* S E T P E */
+       /* S E T N P */
+       /* S E T P O */
+       /* S E T L */
+       /* S E T N G E */
+       /* S E T N L */
+       /* S E T G E */
+       /* S E T L E */
+       /* S E T N G */
+       /* S E T N L E */
+       /* S E T G */
+       /* String instructions. */
+       C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); }
+       C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); }
+       /* C M P S D */
+       I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); }
+       I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); }
+       I N S D { RET_INSN(onebyte, 0x206D, CPU_386); }
+       O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); }
+       O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); }
+       O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); }
+       L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); }
+       L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); }
+       L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); }
+       M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); }
+       M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); }
+       /* M O V S D */
+       S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); }
+       S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); }
+       S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); }
+       S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); }
+       S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); }
+       S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); }
+       X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); }
+       /* Bit manipulation */
+       /* B S F */
+       /* B S R */
+       /* B T */
+       /* B T C */
+       /* B T R */
+       /* B T S */
+       /* Interrupts and operating system instructions */
+       /* I N T */
+       I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+       I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+       I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); }
+       I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); }
+       I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); }
+       I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); }
+       R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); }
+       /* B O U N D */
+       H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); }
+       N O P { RET_INSN(onebyte, 0x0090, CPU_Any); }
+       /* Protection control */
+       /* A R P L */
+       /* L A R */
+       L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); }
+       L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); }
+       /* L L D T */
+       /* L M S W */
+       /* L S L */
+       /* L T R */
+       S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); }
+       S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); }
+       /* S L D T */
+       /* S M S W */
+       /* S T R */
+       /* V E R R */
+       /* V E R W */
+       /* Floating point instructions */
+       /* F L D */
+       /* F I L D */
+       /* F B L D */
+       /* F S T */
+       /* F I S T */
+       /* F S T P */
+       /* F I S T P */
+       /* F B S T P */
+       /* F X C H */
+       /* F C O M */
+       /* F I C O M */
+       /* F C O M P */
+       /* F I C O M P */
+       F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); }
+       /* F U C O M */
+       /* F U C O M P */
+       F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); }
+       F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); }
+       F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); }
+       F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); }
+       F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); }
+       F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); }
+       F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); }
+       F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); }
+       F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); }
+       F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); }
+       /* F A D D */
+       /* F A D D P */
+       /* F I A D D */
+       /* F S U B */
+       /* F I S U B */
+       /* F S U B P */
+       /* F S U B R */
+       /* F I S U B R */
+       /* F S U B R P */
+       /* F M U L */
+       /* F I M U L */
+       /* F M U L P */
+       /* F D I V */
+       /* F I D I V */
+       /* F D I V P */
+       /* F D I V R */
+       /* F I D I V R */
+       /* F D I V R P */
+       F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); }
+       F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); }
+       F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); }
+       F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); }
+       F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); }
+       F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); }
+       F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); }
+       F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); }
+       F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); }
+       F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); }
+       F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); }
+       F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); }
+       F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); }
+       F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); }
+       F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); }
+       F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); }
+       F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); }
+       F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); }
+       F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); }
+       F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); }
+       /* F L D C W */
+       /* F N S T C W */
+       /* F S T C W */
+       /* F N S T S W */
+       /* F S T S W */
+       F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); }
+       F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); }
+       F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); }
+       F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); }
+       F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); }
+       F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); }
+       F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); }
+       F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); }
+       /* F F R E E */
+       /* F F R E E P */
+       F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); }
+       F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); }
+       /* Prefixes (should the others be here too? should wait be a prefix? */
+       W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); }
+       /* 486 extensions */
+       /* B S W A P */
+       /* X A D D */
+       /* C M P X C H G */
+       /* C M P X C H G 4 8 6 */
+       I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); }
+       W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); }
+       I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); }
+       /* 586+ and late 486 extensions */
+       C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); }
+       /* Pentium extensions */
+       W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); }
+       R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); }
+       R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); }
+       /* C M P X C H G 8 B */
+       /* Pentium II/Pentium Pro extensions */
+       S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); }
+       S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); }
+       F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); }
+       F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); }
+       R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); }
+       U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); }
+       U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); }
+       /* C M O V */
+       /* F C M O V */
+       /* F C O M I */
+       /* F U C O M I */
+       /* F C O M I P */
+       /* F U C O M I P */
+       /* Pentium4 extensions */
+       /* M O V N T I */
+       /* C L F L U S H */
+       L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); }
+       M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); }
+       P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); }
+       /* MMX/SSE2 instructions */
+       E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); }
+       /* PIII (Katmai) new instructions / SIMD instructions */
+       /* ... */
+       P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); }
+       P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); }
+       P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); }
+       P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); }
+       /* ... */
+       S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); }
+       /* ... */
+       /* SSE2 instructions */
+       /* AMD 3DNow! instructions */
+       P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+       P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+       F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); }
+       /* ... */
+       /* AMD extensions */
+       S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); }
+       S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); }
+       /* Cyrix MMX instructions */
+       /* Cyrix extensions */
+       R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); }
+       /* R S D C */
+       /* R S L D T */
+       /* R S T S */
+       /* S V D C */
+       /* S V L D T */
+       /* S V T S */
+       S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); }
+       S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); }
+       W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); }
+       /* Obsolete/undocumented instructions */
+       F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); }
+       /* I B T S */
+       L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); }
+       L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); }
+       S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); }
+       S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); }
+       /* U M O V */
+       /* X B T S */
+
+
+       /* catchalls */
+       [A-Za-z0-9]+    {
+           return ARCH_CHECK_ID_NONE;
+       }
+       any     {
+           return ARCH_CHECK_ID_NONE;
+       }
+    */
+}
index 15aa5b3924bece07dd1998c73bf84e15896a60e2..6a46d8c34245d865b6e973190de9e46d49e25666 100644 (file)
@@ -103,6 +103,12 @@ imm_new_expr(expr *expr_ptr)
     return im;
 }
 
+const expr *
+ea_get_disp(const effaddr *ptr)
+{
+    return ptr->disp;
+}
+
 void
 ea_set_len(effaddr *ptr, unsigned char len)
 {
@@ -125,6 +131,30 @@ ea_set_nosplit(effaddr *ptr, unsigned char nosplit)
     ptr->nosplit = nosplit;
 }
 
+/*@-nullstate@*/
+void
+ea_delete(effaddr *ea)
+{
+    if (cur_arch->ea_data_delete)
+       cur_arch->ea_data_delete(ea);
+    expr_delete(ea->disp);
+    xfree(ea);
+}
+/*@=nullstate@*/
+
+/*@-nullstate@*/
+void
+ea_print(FILE *f, const effaddr *ea)
+{
+    fprintf(f, "%*sDisp=", indent_level, "");
+    expr_print(f, ea->disp);
+    fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len);
+    fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit);
+    if (cur_arch->ea_data_print)
+       cur_arch->ea_data_print(f, ea);
+}
+/*@=nullstate@*/
+
 void
 bc_set_multiple(bytecode *bc, expr *e)
 {
@@ -258,6 +288,7 @@ bc_delete(bytecode *bc)
            break;
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_data(bc);
+           assert(cur_objfmt != NULL);
            if (cur_objfmt->bc_objfmt_data_delete)
                cur_objfmt->bc_objfmt_data_delete(objfmt_data->type,
                                                  objfmt_data->data);
@@ -336,6 +367,7 @@ bc_print(FILE *f, const bytecode *bc)
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_const_data(bc);
            fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, "");
+           assert(cur_objfmt != NULL);
            if (cur_objfmt->bc_objfmt_data_print)
                cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type,
                                                 objfmt_data->data);
@@ -408,7 +440,7 @@ bc_resolve_reserve(bytecode_reserve *reserve, unsigned long *len, int save,
     expr_expand_labelequ(*tempp, sect, 1, resolve_label);
     num = expr_get_intnum(tempp);
     if (!num) {
-       if (expr_contains(temp, EXPR_FLOAT))
+       if (temp && expr_contains(temp, EXPR_FLOAT))
            ErrorAt(line,
                    _("expression must not contain floating point value"));
        retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
@@ -534,10 +566,10 @@ bc_resolve(bytecode *bc, int save, const section *sect,
        case BC_ALIGN:
            /* TODO */
            InternalError(_("TODO: align bytecode not implemented!"));
-           break;
+           /*break;*/
        case BC_OBJFMT_DATA:
            InternalError(_("resolving objfmt data bytecode?"));
-           break;
+           /*break;*/
        default:
            if (bc->type < cur_arch->bc.type_max)
                retval = cur_arch->bc.bc_resolve(bc, save, sect,
@@ -559,7 +591,7 @@ bc_resolve(bytecode *bc, int save, const section *sect,
        expr_expand_labelequ(*tempp, sect, 1, resolve_label);
        num = expr_get_intnum(tempp);
        if (!num) {
-           if (expr_contains(temp, EXPR_FLOAT))
+           if (temp && expr_contains(temp, EXPR_FLOAT))
                ErrorAt(bc->line,
                        _("expression must not contain floating point value"));
            retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
@@ -716,7 +748,7 @@ bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize,
        case BC_ALIGN:
            /* TODO */
            InternalError(_("TODO: align bytecode not implemented!"));
-           break;
+           /*break;*/
        case BC_OBJFMT_DATA:
            objfmt_data = bc_get_data(bc);
            if (output_bc_objfmt_data)
index d61c6bcd74e3b26e15c361781a6554ffcd1b54ba..e68a7ac2ab01e21c15ad9e9aead0151f9a037076 100644 (file)
@@ -43,8 +43,11 @@ typedef enum {
 /*@only@*/ immval *imm_new_int(unsigned long int_val);
 /*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e);
 
+/*@observer@*/ const expr *ea_get_disp(const effaddr *ea);
 void ea_set_len(effaddr *ea, unsigned char len);
 void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
+void ea_delete(/*@only@*/ effaddr *ea);
+void ea_print(FILE *f, const effaddr *ea);
 
 void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e);
 
index 1f0ed2df280af019db260bf18e8501f67cae14d0..a562c2baa65cb2b2870bda952ae79e1038582a63 100644 (file)
@@ -39,11 +39,7 @@ struct ExprItem {
        expr *expn;
        intnum *intn;
        floatnum *flt;
-       /* FIXME: reg structure is moderately x86-specific (namely size) */
-       struct reg {
-           unsigned char num;
-           unsigned char size; /* in bits, eg AX=16, EAX=32 */
-       } reg;
+       unsigned long reg;
     } data;
 };
 
@@ -62,6 +58,9 @@ struct expr {
  *
  * Stops early (and returns 1) if func returns 1.  Otherwise returns 0.
  */
+int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d,
+                                 int (*func) (/*@null@*/ const ExprItem *ei,
+                                              /*@null@*/ void *d));
 int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d,
                            int (*func) (/*@null@*/ ExprItem *ei,
                                         /*@null@*/ void *d));
@@ -88,6 +87,6 @@ void expr_order_terms(expr *e);
 /* Copy entire expression EXCEPT for index "except" at *top level only*. */
 expr *expr_copy_except(const expr *e, int except);
 
-int expr_contains(expr *e, ExprType t);
+int expr_contains(const expr *e, ExprType t);
 
 #endif
index ade3789aa4946d96921f8c64321777564dba8dfb..aa2775280c439746a549ae9d47e090216240a2d1 100644 (file)
 #include "expr.h"
 #include "symrec.h"
 
+#include "bytecode.h"
 #include "section.h"
 
+#include "arch.h"
+
 #include "expr-int.h"
 
 
@@ -135,12 +138,11 @@ ExprFloat(floatnum *f)
 }
 
 ExprItem *
-ExprReg(unsigned char reg, unsigned char size)
+ExprReg(unsigned long reg)
 {
     ExprItem *e = xmalloc(sizeof(ExprItem));
     e->type = EXPR_REG;
-    e->data.reg.num = reg;
-    e->data.reg.size = size;
+    e->data.reg = reg;
     return e;
 }
 
@@ -662,8 +664,7 @@ expr_copy_except(const expr *e, int except)
                    dest->data.flt = floatnum_copy(src->data.flt);
                    break;
                case EXPR_REG:
-                   dest->data.reg.num = src->data.reg.num;
-                   dest->data.reg.size = src->data.reg.size;
+                   dest->data.reg = src->data.reg;
                    break;
                default:
                    break;
@@ -709,16 +710,16 @@ expr_delete(expr *e)
 /*@=mustfree@*/
 
 static int
-expr_contains_callback(ExprItem *ei, void *d)
+expr_contains_callback(const ExprItem *ei, void *d)
 {
     ExprType *t = d;
     return (ei->type & *t);
 }
 
 int
-expr_contains(expr *e, ExprType t)
+expr_contains(const expr *e, ExprType t)
 {
-    return expr_traverse_leaves_in(e, &t, expr_contains_callback);
+    return expr_traverse_leaves_in_const(e, &t, expr_contains_callback);
 }
 
 /* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like
@@ -801,6 +802,33 @@ expr_traverse_nodes_post(expr *e, void *d,
     return func(e, d);
 }
 
+/* Traverse over expression tree in order, calling func for each leaf
+ * (non-operation).  The data pointer d is passed to each func call.
+ *
+ * Stops early (and returns 1) if func returns 1.  Otherwise returns 0.
+ */
+int
+expr_traverse_leaves_in_const(const expr *e, void *d,
+                             int (*func) (/*@null@*/ const ExprItem *ei,
+                                          /*@null@*/ void *d))
+{
+    int i;
+
+    if (!e)
+       return 0;
+
+    for (i=0; i<e->numterms; i++) {
+       if (e->terms[i].type == EXPR_EXPR) {
+           if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func))
+               return 1;
+       } else {
+           if (func(&e->terms[i], d))
+               return 1;
+       }
+    }
+    return 0;
+}
+
 /* Traverse over expression tree in order, calling func for each leaf
  * (non-operation).  The data pointer d is passed to each func call.
  *
@@ -877,10 +905,23 @@ expr_get_symrec(expr **ep, int simplify)
 }
 /*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
 
+/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/
+const unsigned long *
+expr_get_reg(expr **ep, int simplify)
+{
+    if (simplify)
+       *ep = expr_simplify(*ep);
+
+    if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG)
+       return &((*ep)->terms[0].data.reg);
+    else
+       return NULL;
+}
+/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+
 void
 expr_print(FILE *f, const expr *e)
 {
-    static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
     char opstr[3];
     int i;
 
@@ -982,9 +1023,7 @@ expr_print(FILE *f, const expr *e)
                floatnum_print(f, e->terms[i].data.flt);
                break;
            case EXPR_REG:
-               if (e->terms[i].data.reg.size == 32)
-                   fprintf(f, "e");
-               fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]);
+               cur_arch->reg_print(f, e->terms[i].data.reg);
                break;
            case EXPR_NONE:
                break;
index fb97248beed420d33b71f21aae4518f23f7499e7..4521286c69b89bcb19ad3f1f889da0d9fbb6c271 100644 (file)
@@ -31,7 +31,7 @@ typedef struct ExprItem ExprItem;
 /*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *);
 /*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *);
 /*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *);
-/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size);
+/*@only@*/ ExprItem *ExprReg(unsigned long reg);
 
 #define expr_new_tree(l,o,r) \
     expr_new ((o), ExprExpr(l), ExprExpr(r))
@@ -79,6 +79,13 @@ void expr_expand_labelequ(expr *e, const section *srcsect, int withstart,
 /*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep,
                                                         int simplify);
 
+/* Gets the register value of e if the expression is just a register.  If the
+ * expression is more complex, returns NULL.  Simplifies the expr first if
+ * simplify is nonzero.
+ */
+/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep,
+                                                            int simplify);
+
 void expr_print(FILE *f, /*@null@*/ const expr *);
 
 #endif
index 07c4a53635cc9756c7b4371cb8f354fe9e726cd2..a8c7c33de0f882f61baaa864f7e715e3cde789ae 100644 (file)
@@ -289,7 +289,7 @@ main(int argc, char *argv[])
     }
 
     /* Get initial BITS setting from object format */
-    x86_mode_bits = cur_objfmt->default_mode_bits;
+    /*x86_mode_bits = cur_objfmt->default_mode_bits;*/
 
     /* Parse! */
     sections = cur_parser->do_parse(cur_parser, in, in_filename);
index dbe7010d0be51ed53f7f67456321381a559cb7ff..b2a242821a3311f63cc4781d9ee5acae17ce285b 100644 (file)
@@ -3,57 +3,22 @@
 YASMPARSERFILES += \
        src/parsers/nasm/nasm-parser.c          \
        src/parsers/nasm/nasm-defs.h            \
-       nasm-bison.y                            \
+       src/parsers/nasm/nasm-bison.y           \
        nasm-bison.h                            \
-       nasm-token.l
-
-if DEV
-
-nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl
-       $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y
-
-else
-
-nasm-token.l: $(srcdir)/nasm-token.l
-       @echo Warning: Not generating nasm-token.l from nasm-token.l.in.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-token.l .
-nasm-token.c: $(srcdir)/nasm-token.c
-       @echo Warning: Not generating nasm-token.c from nasm-token.l.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-token.c .
-nasm-bison.y: $(srcdir)/nasm-bison.y
-       @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.y .
-nasm-bison.c: $(srcdir)/nasm-bison.c
-       @echo Warning: Not generating nasm-bison.c from nasm-bison.y.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.c .
-nasm-bison.h: $(srcdir)/nasm-bison.h
-       @echo Warning: Not generating nasm-bison.h from nasm-bison.y.
-       @echo Run configure with --enable-dev to enable generation.
-       cp $(srcdir)/nasm-bison.h .
-
-endif
+       nasm-token.c
 
-noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl
+nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+       re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
 
 BUILT_SOURCES += \
-       nasm-bison.y                            \
        nasm-bison.c                            \
        nasm-bison.h                            \
-       nasm-token.l                            \
        nasm-token.c
 
 CLEANFILES += \
-       nasm-bison.y                            \
        nasm-bison.c                            \
        nasm-bison.h                            \
-       nasm-token.l                            \
        nasm-token.c
 
 EXTRA_DIST += \
-       src/parsers/nasm/token.l.in             \
-       src/parsers/nasm/bison.y.in             \
-       src/parsers/nasm/gen_instr.pl
+       src/parsers/nasm/nasm-token.re
diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in
deleted file mode 100644 (file)
index b3f0533..0000000
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * NASM-compatible bison parser
- *
- *  Copyright (C) 2001  Peter Johnson, Michael Urman
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#ifdef STDC_HEADERS
-# include <math.h>
-#endif
-
-#include "bitvect.h"
-
-#include "globals.h"
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-#include "section.h"
-#include "objfmt.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-
-void init_table(void);
-extern int nasm_parser_lex(void);
-void nasm_parser_error(const char *);
-static void nasm_parser_directive(const char *name,
-                                 valparamhead *valparams,
-                                 /*@null@*/ valparamhead *objext_valparams);
-
-extern objfmt *nasm_parser_objfmt;
-extern sectionhead nasm_parser_sections;
-extern section *nasm_parser_cur_section;
-extern char *nasm_parser_locallabel_base;
-
-static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
-static bytecode *nasm_parser_temp_bc;
-
-/* additional data declarations (dynamically generated) */
-/* @DATADECLS@ */
-
-/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/
-%}
-
-%union {
-    unsigned int int_info;
-    char *str_val;
-    intnum *intn;
-    floatnum *flt;
-    symrec *sym;
-    unsigned char groupdata[5];
-    effaddr *ea;
-    expr *exp;
-    immval *im_val;
-    x86_targetval tgt_val;
-    datavalhead datahead;
-    dataval *data;
-    bytecode *bc;
-    valparamhead dir_valparams;
-    valparam *dir_valparam;
-}
-
-%token <intn> INTNUM
-%token <flt> FLTNUM
-%token <str_val> DIRECTIVE_NAME STRING FILENAME
-%token <int_info> BYTE WORD DWORD QWORD TWORD DQWORD
-%token <int_info> DECLARE_DATA
-%token <int_info> RESERVE_SPACE
-%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
-%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
-%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
-%token <str_val> ID LOCAL_ID SPECIAL_ID
-%token LINE
-
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
-%type <sym> explabel
-%type <str_val> label_id
-%type <tgt_val> target
-%type <data> dataval
-%type <datahead> datavals
-%type <dir_valparams> directive_valparams
-%type <dir_valparam> directive_valparam
-
-%left '|'
-%left '^'
-%left '&'
-%left LEFT_OP RIGHT_OP
-%left '-' '+'
-%left '*' '/' SIGNDIV '%' SIGNMOD
-%nonassoc UNARYOP
-
-%%
-input: /* empty */
-    | input line    {
-       nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
-                                              $2);
-       if (nasm_parser_temp_bc)
-           nasm_parser_prev_bc = nasm_parser_temp_bc;
-       line_index++;
-    }
-;
-
-line: '\n'             { $$ = (bytecode *)NULL; }
-    | lineexp '\n'
-    | LINE INTNUM '+' INTNUM FILENAME '\n' {
-       /* %line indicates the line number of the *next* line, so subtract out
-        * the increment when setting the line number.
-        */
-       line_set($5, intnum_get_uint($2)-intnum_get_uint($4),
-                intnum_get_uint($4));
-       intnum_delete($2);
-       intnum_delete($4);
-       xfree($5);
-       $$ = (bytecode *)NULL;
-    }
-    | directive '\n'   { $$ = (bytecode *)NULL; }
-    | error '\n'       {
-       Error(_("label or instruction expected at start of line"));
-       $$ = (bytecode *)NULL;
-       yyerrok;
-    }
-;
-
-lineexp: exp
-    | TIMES expr exp                   { $$ = $3; bc_set_multiple($$, $2); }
-    | label                            { $$ = (bytecode *)NULL; }
-    | label exp                                { $$ = $2; }
-    | label TIMES expr exp             { $$ = $4; bc_set_multiple($$, $3); }
-    | label_id EQU expr                        {
-       symrec_define_equ($1, $3);
-       xfree($1);
-       $$ = (bytecode *)NULL;
-    }
-;
-
-exp: instr
-    | DECLARE_DATA datavals            { $$ = bc_new_data(&$2, $1); }
-    | RESERVE_SPACE expr               { $$ = bc_new_reserve($2, $1); }
-    | INCBIN STRING                    { $$ = bc_new_incbin($2, NULL, NULL); }
-    | INCBIN STRING ',' expr           { $$ = bc_new_incbin($2, $4, NULL); }
-    | INCBIN STRING ',' expr ',' expr  { $$ = bc_new_incbin($2, $4, $6); }
-;
-
-datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
-    | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
-;
-
-dataval: expr_no_string        { $$ = dv_new_expr($1); }
-    | STRING           { $$ = dv_new_string($1); }
-    | error            {
-       Error(_("expression syntax error"));
-       $$ = (dataval *)NULL;
-    }
-;
-
-label: label_id            {
-       symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
-                           1);
-       xfree($1);
-    }
-    | label_id ':'  {
-       symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
-                           1);
-       xfree($1);
-    }
-;
-
-label_id: ID       {
-       $$ = $1;
-       if (nasm_parser_locallabel_base)
-           xfree(nasm_parser_locallabel_base);
-       nasm_parser_locallabel_base = xstrdup($1);
-    }
-    | SPECIAL_ID
-    | LOCAL_ID
-;
-
-/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']'        {
-       xfree($2);
-    }
-    | '[' DIRECTIVE_NAME error ']'             {
-       Error(_("invalid arguments to [%s]"), $2);
-       xfree($2);
-    }
-;
-
-    /* $<str_val>0 is the DIRECTIVE_NAME */
-    /* After : is (optional) object-format specific extension */
-directive_val: directive_valparams {
-       nasm_parser_directive($<str_val>0, &$1, NULL);
-    }
-    | directive_valparams ':' directive_valparams {
-       nasm_parser_directive($<str_val>0, &$1, &$3);
-    }
-;
-
-directive_valparams: directive_valparam                {
-       vps_initialize(&$$);
-       vps_append(&$$, $1);
-    }
-    | directive_valparams directive_valparam   {
-       vps_append(&$1, $2);
-       $$ = $1;
-    }
-;
-
-directive_valparam: direxpr    {
-       /* If direxpr is just an ID, put it in val and delete the expr */
-       const /*@null@*/ symrec *vp_symrec;
-       if ((vp_symrec = expr_get_symrec(&$1, 0))) {
-           vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL);
-           expr_delete($1);
-       } else
-           vp_new($$, NULL, $1);
-    }
-    | ID '=' direxpr           { vp_new($$, $1, $3); }
-;
-
-/* register groupings */
-fpureg: ST0
-    | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
-    | DWORD reg_eax    { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
-    | DWORD reg_ecx    { $$ = $2; }
-;
-
-rawreg32: REG_EAX
-    | REG_ECX
-    | REG_EDX
-    | REG_EBX
-    | REG_ESP
-    | REG_EBP
-    | REG_ESI
-    | REG_EDI
-;
-
-reg32: rawreg32
-    | DWORD reg32      { $$ = $2; }
-;
-
-reg_ax: REG_AX
-    | WORD reg_ax      { $$ = $2; }
-;
-
-reg_cx: REG_CX
-    | WORD reg_cx      { $$ = $2; }
-;
-
-reg_dx: REG_DX
-    | WORD reg_dx      { $$ = $2; }
-;
-
-rawreg16: REG_AX
-    | REG_CX
-    | REG_DX
-    | REG_BX
-    | REG_SP
-    | REG_BP
-    | REG_SI
-    | REG_DI
-;
-
-reg16: rawreg16
-    | WORD reg16       { $$ = $2; }
-;
-
-reg_al: REG_AL
-    | BYTE reg_al      { $$ = $2; }
-;
-
-reg_cl: REG_CL
-    | BYTE reg_cl      { $$ = $2; }
-;
-
-reg8: REG_AL
-    | REG_CL
-    | REG_DL
-    | REG_BL
-    | REG_AH
-    | REG_CH
-    | REG_DH
-    | REG_BH
-    | BYTE reg8                { $$ = $2; }
-;
-
-reg_es: REG_ES
-    | WORD reg_es      { $$ = $2; }
-;
-
-reg_ss: REG_SS
-    | WORD reg_ss      { $$ = $2; }
-;
-
-reg_ds: REG_DS
-    | WORD reg_ds      { $$ = $2; }
-;
-
-reg_fs: REG_FS
-    | WORD reg_fs      { $$ = $2; }
-;
-
-reg_gs: REG_GS
-    | WORD reg_gs      { $$ = $2; }
-;
-
-reg_cs: REG_CS
-    | WORD reg_cs      { $$ = $2; }
-;
-
-segreg: REG_ES
-    | REG_SS
-    | REG_DS
-    | REG_FS
-    | REG_GS
-    | REG_CS
-    | WORD segreg      { $$ = $2; }
-;
-
-/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated?  This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg.  I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | rawreg16                 { $$ = expr_new_ident(ExprReg($1, 16)); }
-    | rawreg32                 { $$ = expr_new_ident(ExprReg($1, 32)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| memexpr '||' memexpr   { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | memexpr '|' memexpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | memexpr '^' memexpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' memexpr      { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | memexpr '&' memexpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| memexpr '==' memexpr   { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| memexpr '>' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '<' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '>=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '<=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '!=' memexpr   { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | memexpr LEFT_OP memexpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | memexpr '+' memexpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | memexpr '-' memexpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | memexpr '*' memexpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | memexpr '/' memexpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | memexpr SIGNDIV memexpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | memexpr '%' memexpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | memexpr SIGNMOD memexpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' memexpr %prec UNARYOP        { $$ = $2; }
-    | '-' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' memexpr            { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' memexpr ')'          { $$ = $2; }
-    | STRING                   {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-    | error                    { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr           {
-       $$ = x86_ea_new_expr($1);
-       x86_ea_set_segment($$, 0);
-    }
-    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
-    | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
-    | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
-    | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
-    | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
-;
-
-mem: '[' memaddr ']'   { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem                { $$ = $2; }
-    | BYTE mem8x       { $$ = $2; }
-;
-mem16x: WORD mem       { $$ = $2; }
-    | WORD mem16x      { $$ = $2; }
-;
-mem32x: DWORD mem      { $$ = $2; }
-    | DWORD mem32x     { $$ = $2; }
-;
-mem64x: QWORD mem      { $$ = $2; }
-    | QWORD mem64x     { $$ = $2; }
-;
-mem80x: TWORD mem      { $$ = $2; }
-    | TWORD mem80x     { $$ = $2; }
-;
-mem128x: DQWORD mem    { $$ = $2; }
-    | DQWORD mem128x   { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem                { $$ = $2; }
-    | FAR memfar       { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
-    | mem8x
-;
-mem16: mem
-    | mem16x
-;
-mem32: mem
-    | mem32x
-;
-mem64: mem
-    | mem64x
-;
-mem80: mem
-    | mem80x
-;
-mem128: mem
-    | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
-    | mem16x
-    | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8     { $$ = x86_ea_new_reg($1); }
-    | mem8x
-;
-rm16x: reg16   { $$ = x86_ea_new_reg($1); }
-    | mem16x
-;
-rm32x: reg32   { $$ = x86_ea_new_reg($1); }
-    | mem32x
-;
-/* not needed:
-rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
-    | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
-    | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8      { $$ = x86_ea_new_reg($1); }
-    | mem8
-;
-rm16: reg16    { $$ = x86_ea_new_reg($1); }
-    | mem16
-;
-rm32: reg32    { $$ = x86_ea_new_reg($1); }
-    | mem32
-;
-rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
-    | mem64
-;
-rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
-    | mem128
-;
-
-/* immediate values */
-imm: expr   { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm            { $$ = $2; }
-;
-imm16x: WORD imm    { $$ = $2; }
-;
-imm32x: DWORD imm   { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
-    | imm8x
-;
-imm16: imm
-    | imm16x
-;
-imm32: imm
-    | imm32x
-;
-
-/* jump targets */
-target: expr           {
-       $$.val = $1;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
-    }
-    | SHORT target     {
-       $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
-    }
-    | NEAR target      {
-       $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
-    }
-;
-
-/* expression trees */
-
-/* expr w/o FLTNUM and unary + and -, for use in directives */
-direxpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | ID                       {
-       $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0)));
-       xfree($1);
-    }
-    | direxpr '|' direxpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | direxpr '^' direxpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    | direxpr '&' direxpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    | direxpr LEFT_OP direxpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | direxpr '+' direxpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | direxpr '-' direxpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | direxpr '*' direxpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | direxpr '/' direxpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | direxpr SIGNDIV direxpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | direxpr '%' direxpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | direxpr SIGNMOD direxpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    /*| '!' expr               { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' direxpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' direxpr ')'          { $$ = $2; }
-;
-
-expr_no_string: INTNUM         { $$ = expr_new_ident(ExprInt($1)); }
-    | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| expr '||' expr         { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | expr '|' expr            { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | expr '^' expr            { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' expr         { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | expr '&' expr            { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| expr '==' expr         { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| expr '>' expr          { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| expr '<' expr          { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| expr '>=' expr         { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| expr '<=' expr         { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| expr '!=' expr         { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | expr LEFT_OP expr                { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | expr RIGHT_OP expr       { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | expr '+' expr            { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | expr '-' expr            { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | expr '*' expr            { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | expr '/' expr            { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | expr SIGNDIV expr                { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | expr '%' expr            { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | expr SIGNMOD expr                { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' expr %prec UNARYOP   { $$ = $2; }
-    | '-' expr %prec UNARYOP   { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' expr               { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' expr %prec UNARYOP   { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' expr ')'             { $$ = $2; }
-;
-
-expr: expr_no_string
-    | STRING           {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-;
-
-explabel: ID           {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | SPECIAL_ID       {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | LOCAL_ID         {
-       $$ = symrec_use($1);
-       xfree($1);
-    }
-    | '$'              {
-       $$ = symrec_define_label("$", nasm_parser_cur_section,
-                                nasm_parser_prev_bc, 0);
-    }
-    | START_SECTION_ID {
-       if (section_is_absolute(nasm_parser_cur_section)) {
-           Error(_("`$$' is not valid within an ABSOLUTE section"));
-           YYERROR;
-       } else {
-           const char *ss_name = section_get_name(nasm_parser_cur_section);
-           assert(ss_name != NULL);
-           $$ = symrec_use(ss_name);
-       }
-    }
-;
-
-instr: /* empty */     {
-       idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
-       $$ = x86_bc_new_insn(&idata);
-    }
-    | instrbase
-    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
-    | REG_CS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
-    }
-    | REG_SS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
-    }
-    | REG_DS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
-    }
-    | REG_ES instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
-    }
-    | REG_FS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
-    }
-    | REG_GS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
-    }
-    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
-    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
-%%
-/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
-
-static void
-nasm_parser_directive(const char *name, valparamhead *valparams,
-                     valparamhead *objext_valparams)
-{
-    valparam *vp, *vp2;
-    const intnum *intn;
-    long lval;
-
-    assert(cur_objfmt != NULL);
-
-    /* Handle (mostly) output-format independent directives here */
-    if (strcasecmp(name, "extern") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val)
-           symrec_declare(vp->val, SYM_EXTERN,
-                          cur_objfmt->extern_data_new(vp->val,
-                                                      objext_valparams));
-       else
-           Error(_("invalid argument to [%s]"), "EXTERN");
-    } else if (strcasecmp(name, "global") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val)
-           symrec_declare(vp->val, SYM_GLOBAL,
-                          cur_objfmt->global_data_new(vp->val,
-                                                      objext_valparams));
-       else
-           Error(_("invalid argument to [%s]"), "GLOBAL");
-    } else if (strcasecmp(name, "common") == 0) {
-       vp = vps_first(valparams);
-       if (vp->val) {
-           vp2 = vps_next(vp);
-           if (!vp2 || (!vp2->val && !vp2->param))
-               Error(_("no size specified in %s declaration"), "COMMON");
-           else {
-               if (vp2->val)
-                   symrec_declare(vp->val, SYM_COMMON,
-                       cur_objfmt->common_data_new(vp->val,
-                           expr_new_ident(ExprSym(symrec_use(vp2->val))),
-                           objext_valparams));
-               else if (vp2->param) {
-                   symrec_declare(vp->val, SYM_COMMON,
-                       cur_objfmt->common_data_new(vp->val, vp2->param,
-                                                   objext_valparams));
-                   vp2->param = NULL;
-               }
-           }
-       } else
-           Error(_("invalid argument to [%s]"), "COMMON");
-    } else if (strcasecmp(name, "section") == 0 ||
-              strcasecmp(name, "segment") == 0) {
-       section *new_section =
-           cur_objfmt->sections_switch(&nasm_parser_sections, valparams,
-                                       objext_valparams);
-       if (new_section) {
-           nasm_parser_cur_section = new_section;
-           nasm_parser_prev_bc = (bytecode *)NULL;
-       } else
-           Error(_("invalid argument to [%s]"), "SECTION");
-    } else if (strcasecmp(name, "absolute") == 0) {
-       /* it can be just an ID or a complete expression, so handle both. */
-       vp = vps_first(valparams);
-       if (vp->val)
-           nasm_parser_cur_section =
-               sections_switch_absolute(&nasm_parser_sections,
-                   expr_new_ident(ExprSym(symrec_use(vp->val))));
-       else if (vp->param) {
-           nasm_parser_cur_section =
-               sections_switch_absolute(&nasm_parser_sections, vp->param);
-           vp->param = NULL;
-       }
-       nasm_parser_prev_bc = (bytecode *)NULL;
-    } else if (strcasecmp(name, "bits") == 0) {
-       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
-           (intn = expr_get_intnum(&vp->param)) != NULL &&
-           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
-           x86_mode_bits = (unsigned char)lval;
-       else
-           Error(_("invalid argument to [%s]"), "BITS");
-    } else if (cur_objfmt->directive(name, valparams, objext_valparams,
-                                    &nasm_parser_sections)) {
-       Error(_("unrecognized directive [%s]"), name);
-    }
-
-    vps_delete(valparams);
-    if (objext_valparams)
-       vps_delete(objext_valparams);
-}
-
-void
-nasm_parser_error(const char *s)
-{
-    ParserError(s);
-}
-
diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl
deleted file mode 100755 (executable)
index b0599d9..0000000
+++ /dev/null
@@ -1,889 +0,0 @@
-#!/usr/bin/perl -w
-# $IdPath$
-# Generates NASM-compatible bison.y and token.l from instrs.dat.
-#
-#    Copyright (C) 2001  Michael Urman
-#
-#    This file is part of YASM.
-#
-#    YASM is free software; you can redistribute it and/or modify
-#    it under the terms of the GNU General Public License as published by
-#    the Free Software Foundation; either version 2 of the License, or
-#    (at your option) any later version.
-#
-#    YASM is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#    GNU General Public License for more details.
-#
-#    You should have received a copy of the GNU General Public License
-#    along with this program; if not, write to the Free Software
-#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-
-use strict;
-use Getopt::Long;
-my $VERSION = "0.0.1";
-
-# useful constants for instruction arrays
-#  common
-use constant INST          => 0;
-use constant OPERANDS      => 1;
-#  general format
-use constant OPSIZE        => 2;
-use constant OPCODE        => 3;
-use constant EFFADDR       => 4;
-use constant IMM           => 5;
-use constant CPU           => 6;
-#  relative target format
-use constant ADSIZE        => 2;
-use constant SHORTOPCODE    => 3;
-use constant NEAROPCODE            => 4;
-use constant SHORTCPU      => 5;
-use constant NEARCPU       => 6;
-
-use constant TOO_MANY_ERRORS => 20;
-
-# default options
-my $instrfile = 'instrs.dat';
-my $tokenfile = 'token.l';
-my $tokensource;
-my $grammarfile = 'bison.y';
-my $grammarsource;
-my $showversion;
-my $showusage;
-my $dry_run;
-
-# allow overrides
-my $gotopts = GetOptions ( 'input=s' => \$instrfile,
-                          'token=s' => \$tokenfile,
-                          'sourcetoken=s' => \$tokensource,
-                          'grammar=s' => \$grammarfile,
-                          'sourcegrammar=s' => \$grammarsource,
-                          'version' => \$showversion,
-                          'n|dry-run' => \$dry_run,
-                          'help|usage' => \$showusage,
-                        );
-
-&showusage and exit 1 unless $gotopts;
-&showversion if $showversion;
-&showusage if $showusage;
-exit 0 if $showversion or $showusage;
-
-# valid values for instrs.dat fields
-my $valid_regs = join '|', qw(
-    reg_al reg_ah reg_ax reg_eax
-    reg_bl reg_bh reg_bx reg_ebx
-    reg_cl reg_ch reg_cx reg_ecx
-    reg_dl reg_dh reg_dx reg_edx
-    reg_si reg_esi reg_di reg_edi
-    reg_bp reg_ebp
-    reg_cs reg_ds reg_es reg_fs reg_gs reg_ss
-    ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG
-    fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm
-    imm8 imm16 imm32 imm64 imm80 imm128
-    imm8x imm16x imm32x imm64x imm80x imm128x
-    rm8 rm16 rm32 rm1632 rm64 rm80 rm128
-    rm8x rm16x rm32x rm1632x rm64x rm80x rm128x
-    reg8 reg16 reg32 reg1632 reg64 reg80 reg128
-    reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
-    mem8 mem16 mem32 mem1632 mem64 mem80 mem128
-    mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
-    target memfar
-);
-my $valid_opcodes = join '|', qw(
-    [0-9A-F]{2}
-    \\$0\\.\\d
-);
-my $valid_cpus = join '|', qw(
-    8086 186 286 386 486 P4 P5 P6
-    FPU MMX KATMAI SSE SSE2
-    AMD ATHLON 3DNOW
-    SMM
-    CYRIX
-    UNDOC OBS PRIV PROT
-    @0 @1
-);
-
-# track errors and warnings rather than die'ing on the first.
-my (@messages, $errcount, $warncount);
-sub die_with_errors (@)
-{
-    foreach (@_) { print; };
-    if ($errcount)
-    {
-       print "Dying with errors\n";
-       exit -1;
-    }
-}
-
-my ($groups) = &read_instructions ($instrfile);
-
-die_with_errors @messages;
-
-exit 0 if $dry_run; # done with simple verification, so exit
-
-unless ($dry_run)
-{
-    &output_lex ($tokenfile, $tokensource, $groups);
-    &output_yacc ($grammarfile, $grammarsource, $groups);
-}
-
-# print version for --version, etc.
-sub showversion
-{
-    print "YASM gen_instr.pl $VERSION\n";
-}
-
-# print usage information for --help, etc.
-sub showusage
-{
-    print <<"EOF";
-Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile]
-    -i, --input                 instructions file (default: $instrfile)
-    -t, --token                 token output file (default: $tokenfile)
-    -st, --sourcetoken   token input file (default: $tokenfile.in)
-    -g, --grammar        grammar output file (default: $grammarfile)
-    -sg, --sourcegrammar grammar input file (default: $grammarfile.in)
-    -v, --version        show version and exit
-    -h, --help, --usage  show this message and exit
-    -n, --dry-run        verify input file without writing output files
-EOF
-}
-
-# read in instructions, and verify they're valid (well, mostly)
-sub read_instructions ($)
-{
-    my $instrfile = shift || die;
-    open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n";
-    my %instr;
-    my %groups;
-
-    sub add_group_rule ($$$$)
-    {
-       my ($inst, $args, $groups, $instrfile) = splice @_;
-
-       # slide $0.\d down by one.
-       # i still say changing instrs.dat would be better ;)
-       $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
-
-       # detect relative target format by looking for "target" in args
-       if($args =~ m/target/oi)
-       {
-           my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
-               split /\t+/, $args;
-           eval {
-               die "Invalid group name\n"
-                       if $inst !~ m/^!\w+$/o;
-               die "Invalid Operands\n"
-                       if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
-               die "Invalid Address Size\n"
-                       if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
-               die "Invalid Short Opcode\n"
-                       if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
-               die "Invalid Near Opcode\n"
-                       if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
-               die "Invalid Short CPU\n"
-                       if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-               die "Invalid Near CPU\n"
-                       if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           };
-           push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-           die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-           # knock the ! off of $inst for the groupname
-           $inst = substr $inst, 1;
-           push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
-       } else {
-           my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
-           eval {
-               die "Invalid group name\n"
-                       if $inst !~ m/^!\w+$/o;
-               die "Invalid Operands\n"
-                       if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
-               die "Invalid Operation Size\n"
-                       if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
-               die "Invalid Opcode\n"
-                       if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
-               die "Invalid Effective Address\n"
-                       if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
-               die "Invalid Immediate Operand\n"
-                       if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
-               die "Invalid CPU\n"
-                       if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           };
-           push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-           die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-           # knock the ! off of $inst for the groupname
-           $inst = substr $inst, 1;
-           push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
-       }
-    }
-
-    sub add_group_member ($$$$$)
-    {
-       my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_;
-
-       my ($inst, $group) = split /!/, $handle;
-       my ($args, $cpu) = split /\t+/, $fullargs;
-       eval {
-           die "Invalid instruction name\n"
-                   if $inst !~ m/^\w+$/o;
-           die "Invalid group name\n"
-                   if $group !~ m/^\w+$/o;
-           die "Invalid CPU\n"
-                   if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
-           push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n"
-                   unless exists $groups->{$group};
-           $warncount++;
-       };
-       push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
-       # only allow multiple instances of instructions that aren't of a group
-       push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++
-               if exists $instr->{$inst} and not exists $groups->{$inst};
-       die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
-       push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu];
-       $instr->{$inst} = 1;
-    }
-
-    while (<INPUT>)
-    {
-       chomp;
-       next if /^\s*(?:;.*)$/;
-
-       my ($handle, $args) = split /\t+/, $_, 2;
-
-       # pseudo hack to handle original style instructions (no group)
-       if ($handle =~ m/^\w+$/)
-       {
-           # TODO: this has some long ranging effects, as the eventual
-           # bison rules get tagged <groupdata> when they don't need
-           # to, etc.  Fix this sometime.
-           add_group_rule ("!$handle", $args, \%groups, $instrfile);
-           add_group_member ("$handle!$handle", "", \%groups, \%instr,
-                             $instrfile);
-       }
-       elsif ($handle =~ m/^!\w+$/)
-       {
-           add_group_rule ($handle, $args, \%groups, $instrfile);
-       }
-       elsif ($handle =~ m/^\w+!\w+$/)
-       {
-           add_group_member ($handle, $args, \%groups, \%instr,
-                             $instrfile);
-       }
-       # TODO: consider if this is necessary: Pete?
-       # (add_group_member_synonym is -not- implemented)
-       #elsif ($handle =~ m/^:\w+$/)
-       #{
-       #    add_group_member_synonym ($handle, $args);
-       #}
-    }
-    close INPUT;
-    return (\%groups);
-}
-
-sub output_lex ($@)
-{
-    my $tokenfile = shift or die;
-    my $tokensource = shift;
-    $tokensource ||= "$tokenfile.in";
-    my $groups = shift or die;
-
-    open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n";
-    open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n";
-    while (<IN>)
-    {
-       # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content
-       if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
-       {
-           foreach my $grp (sort keys %$groups)
-           {
-               my %printed;
-               my $group = $grp; $group =~ s/^!//;
-
-               foreach my $grp (@{$groups->{$grp}{members}})
-               {
-                   unless (exists $printed{$grp->[0]})
-                   {
-                       $printed{$grp->[0]} = 1;
-                       my @groupdata;
-                       if ($grp->[2])
-                       {
-                           @groupdata = split ",", $grp->[2];
-                           for (my $i=0; $i < @groupdata; ++$i)
-                           {
-                               $groupdata[$i] =~ s/nil/0/;
-                               $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];";
-                           }
-                           $groupdata[-1] .= "\n\t     ";
-                       }
-                       printf TOKEN "%-12s{%s return %-20s }\n",
-                           $grp->[0],
-                           (join "\n\t     ", @groupdata), 
-                           "\Ugrp_$group;\E";
-                           # TODO: change appropriate GRP_FOO back to
-                           # INS_FOO's.  not functionally important;
-                           # just pedantically so.
-                   }
-               }
-           }
-       }
-       else
-       {
-           print TOKEN $_;
-       }
-    }
-    close IN;
-    close TOKEN;
-}
-
-# helper functions for yacc output
-sub rule_header ($ $ $)
-{
-    my ($rule, $tokens, $count) = splice (@_);
-    $count ? "    | $tokens {\n" : "$rule: $tokens {\n"; 
-}
-sub rule_footer ()
-{
-    return "    }\n";
-}
-
-sub cond_action_if ( $ $ $ $ $ $ $ )
-{
-    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
-    return rule_header ($rule, $tokens, $count) . <<"EOF";
-        if (\$$regarg == $val) {
-            @$a_eax
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action_elsif ( $ $ $ $ )
-{
-    my ($regarg, $val, $func, $a_eax) = splice (@_);
-    return <<"EOF";
-        else if (\$$regarg == $val) {
-            @$a_eax
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action_else ( $ $ )
-{
-    my ($func, $a_args) = splice (@_);
-    return <<"EOF" . rule_footer;
-        else {
-            @$a_args
-            \$\$ = $func;
-        }
-EOF
-}
-sub cond_action ( $ $ $ $ $ $ $ $ )
-{
-    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args)
-     = splice (@_);
-    return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func,
-       $a_eax) . cond_action_else ($func, $a_args);
-}
-
-#sub action ( $ $ $ $ $ )
-sub action ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . rule_footer; 
-}
-
-sub action_setshiftflag ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . "        x86_bc_insn_set_shift_flag(\$\$);\n"
-       . rule_footer; 
-}
-
-sub action_setjrshort ( @ $ )
-{
-    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
-    return rule_header ($rule, $tokens, $count)
-       . "        if (\$2.op_sel == JR_NONE)\n"
-       . "            \$2.op_sel = JR_SHORT;\n"
-       . "        @$a_args\n"
-       . "        \$\$ = $func;\n"
-       . rule_footer; 
-}
-
-sub get_token_number ( $ $ )
-{
-    my ($tokens, $str) = splice @_;
-    $tokens =~ s/$str.*/x/; # hold its place
-    my @f = split /\s+/, $tokens;
-    return scalar @f;
-}
-
-sub output_yacc ($@)
-{
-    my $grammarfile = shift or die;
-    my $grammarsource = shift;
-    $grammarsource ||= "$grammarfile.in";
-    my $groups = shift or die;
-
-    open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n";
-    open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n";
-
-    while (<IN>)
-    {
-       if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
-       {
-           print GRAMMAR "static x86_new_insn_data idata;\n";
-           print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
-       }
-       elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
-       {
-           my $len = length("%token <groupdata>");
-           print GRAMMAR "%token <groupdata>";
-           foreach my $group (sort keys %$groups)
-           {
-               if ($len + length("GRP_$group") < 76)
-               {
-                   print GRAMMAR " GRP_\U$group\E";
-                   $len += length(" GRP_$group");
-               }
-               else
-               {
-                   print GRAMMAR "\n%token <groupdata> GRP_\U$group\E";
-                   $len = length("%token <groupdata> GRP_$group");
-               }
-           }
-           print GRAMMAR "\n";
-       }
-       elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/})
-       {
-           my $len = length("%type <bc>");
-           print GRAMMAR "%type <bc>";
-           foreach my $group (sort keys %$groups)
-           {
-               if ($len + length($group) < 76)
-               {
-                   print GRAMMAR " $group";
-                   $len += length(" $group");
-               }
-               else
-               {
-                   print GRAMMAR "\n%type <bc> $group";
-                   $len = length("%type <bc> $group");
-               }
-           }
-           print GRAMMAR "\n";
-       }
-       elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
-       {
-           # list every kind of instruction that instrbase can be
-           print GRAMMAR "instrbase:    ",
-                   join( "\n    | ", sort keys %$groups), "\n;\n";
-
-           my ($ONE, $AL, $AX, $EAX);  # need the outer scope
-           my (@XCHG_AX, @XCHG_EAX);
-
-           # list the arguments and actions (buildbc)
-           #foreach my $instrname (sort keys %$instrlist)
-           foreach my $group (sort keys %$groups)
-           {
-               # I'm still convinced this is a hack.  The idea is if
-               # within an instruction we see certain versions of the
-               # opcodes with ONE, or reg_e?a[lx],imm(8|16|32).  If we
-               # do, defer generation of the action, as we may need to
-               # fold it into another version with a conditional to
-               # generate the more efficient variant of the opcode
-               # BUT, if we don't fold it in, we have to generate the
-               # original version we would have otherwise.
-               ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0);
-               # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax).
-               (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
-               my $count = 0;
-               foreach my $inst (@{$groups->{$group}{rules}}) {
-                   if($inst->[OPERANDS] =~ m/target/oi)
-                   {
-                       # relative target format
-                       # build the instruction in pieces.
-
-                       # rulename = instruction
-                       my $rule = "$inst->[INST]";
-
-                       # tokens it eats: instruction and arguments
-                       # nil => no arguments
-                       my $tokens = "\Ugrp_$rule\E";
-                       $tokens .= " $inst->[OPERANDS]"
-                           if $inst->[OPERANDS] ne 'nil';
-                       $tokens =~ s/,/ ',' /g;
-                       $tokens =~ s/:/ ':' /g;
-                       my $datastruct = "x86_new_jmprel_data";
-                       my $datastructname = "jrdata";
-                       my $func = "x86_bc_new_jmprel(&$datastructname)";
-
-                       # Create the argument list for bytecode_new
-                       my @args;
-
-                       # Target argument: HACK: Always assumed to be arg 1.
-                       push @args, 'target=&$2;';
-
-                       # test for short opcode "nil"
-                       if($inst->[SHORTOPCODE] =~ m/nil/)
-                       {
-                           push @args, 'short_op_len=0;';
-                       }
-                       else
-                       {
-                           my @opcodes;
-                           # Check for possible length parameter
-                           if($inst->[SHORTOPCODE] =~ m/\?/)
-                           {
-                               my @pieces = split /\?/, $inst->[SHORTOPCODE];
-                               push @args, "short_op_len=".$pieces[0].";";
-                               # opcode piece 1 (and 2 and 3 if attached)
-                               @opcodes = split ",", $pieces[1];
-                           }
-                           else
-                           {
-                               # opcode piece 1 (and 2 and 3 if attached)
-                               @opcodes = split ",", $inst->[SHORTOPCODE];
-                               # number of bytes of short opcode
-                               push @args, "short_op_len=".@opcodes.";";
-                           }
-                           for (my $i=0; $i < @opcodes; ++$i)
-                           {
-                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                               $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                               # don't match $0.\d in the following rule.
-                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
-                               push @args, "short_op[$i]=$opcodes[$i];";
-                           }
-                       }
-
-                       # test for near opcode "nil"
-                       if($inst->[NEAROPCODE] =~ m/nil/)
-                       {
-                           push @args, 'near_op_len=0;';
-                       }
-                       else
-                       {
-                           # opcode piece 1 (and 2 and 3 if attached)
-                           my @opcodes = split ",", $inst->[NEAROPCODE];
-                           # number of bytes of near opcode
-                           push @args, "near_op_len=".@opcodes.";";
-                           for (my $i=0; $i < @opcodes; ++$i)
-                           {
-                               $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                               $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                               # don't match $0.\d in the following rule.
-                               $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
-                               push @args, "near_op[$i]=$opcodes[$i];";
-                           }
-                       }
-
-                       # address size
-                       push @args, "addrsize=$inst->[ADSIZE];";
-                       $args[-1] =~ s/nil/0/;
-
-                       # now that we've constructed the arglist, subst $0.\d
-                       s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
-                       # and add the data structure reference
-                       s/^/$datastructname./g foreach (@args);
-
-                       if ($args[0] =~ m/\&\$/)
-                       {
-                           $args[0] = '/*@-immediatetrans@*/' . $args[0] .
-                               '/*@=immediatetrans@*/';
-                       }
-
-                       # generate the grammar
-                       # Specialcase jcc to set op_sel=JR_SHORT.
-                       if ($rule =~ m/jcc/)
-                       {
-                           print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++);
-                       }
-                       else
-                       {
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-                       }
-                   }
-                   else
-                   {
-                       # general instruction format
-                       # build the instruction in pieces.
-
-                       # rulename = instruction
-                       my $rule = "$inst->[INST]";
-
-                       # tokens it eats: instruction and arguments
-                       # nil => no arguments
-                       my $tokens = "\Ugrp_$rule\E";
-                       $tokens .= " $inst->[OPERANDS]"
-                           if $inst->[OPERANDS] ne 'nil';
-                       $tokens =~ s/,/ ',' /g;
-                       $tokens =~ s/:/ ':' /g;
-                       # offset args
-                       my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
-                       my $datastruct = "x86_new_insn_data";
-                       my $datastructname = "idata";
-                       my $func = "x86_bc_new_insn(&$datastructname)";
-
-                       # Create the argument list for bytecode_new
-                       my @args;
-
-                       # operand size
-                       push @args, "opersize=$inst->[OPSIZE];";
-                       $args[-1] =~ s/nil/0/;
-
-
-                       # opcode piece 1 (and 2 and 3 if attached)
-                       my @opcodes = split ",", $inst->[OPCODE];
-                       # number of bytes of opcodes
-                       push @args, "op_len=".@opcodes.";";
-                       for (my $i=0; $i < @opcodes; ++$i)
-                       {
-                           $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
-                           $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
-                           # don't match $0.\d in the following rule.
-                           $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
-                           push @args, "op[$i]=$opcodes[$i];";
-                       }
-
-                       # effective addresses
-                       my $effaddr = $inst->[EFFADDR];
-                       $effaddr =~ s/^nil/NULL,0/;
-                       $effaddr =~ s/nil/0/;
-                       # don't let a $0.\d match slip into the following rules.
-                       $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
-                       $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
-                       $effaddr =~ s[(\$\d+)i,\s*(\d+)]
-                           ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
-
-                       die $effaddr if $effaddr =~ m/\d+[ri]/;
-
-                       my @effaddr_split = split ',', $effaddr;
-                       $effaddr_split[0] =~ s/\^/,/;
-                       push @args, "ea=$effaddr_split[0];";
-                       if ($effaddr_split[0] !~ m/NULL/)
-                       {
-                           push @args, "spare=$effaddr_split[1];";
-                       }
-
-                       # immediate sources
-                       my $imm = $inst->[IMM];
-                       $imm =~ s/nil/NULL,0/;
-                       # don't match $0.\d in the following rules.
-                       $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
-                       $imm =~ s[^([0-9A-Fa-f]+),]
-                           [imm_new_int(0x$1),];
-                       $imm =~ s[^\$0.(\d+),]
-                           [imm_new_int((unsigned long)\$1\[$1\]),];
-
-                       # divide the second, and only the second, by 8 bits/byte
-                       $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
-                       $imm .= ($3||'') eq 's' ? ',1' : ',0';
-
-                       die $imm if $imm =~ m/\d+s/;
-
-                       my @imm_split = split ",", $imm;
-                       push @args, "imm=$imm_split[0];";
-                       if ($imm_split[0] !~ m/NULL/)
-                       {
-                           push @args, "im_len=$imm_split[1];";
-                           push @args, "im_sign=$imm_split[2];";
-                       }
-
-                       # now that we've constructed the arglist, subst $0.\d
-                       s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
-                       # and add the data structure reference
-                       s/^/$datastructname./g foreach (@args);
-                   
-                       # see if we match one of the cases to defer
-                       if (($inst->[OPERANDS]||"") =~ m/,ONE/)
-                       {
-                           $ONE = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/)
-                       {
-                           $AL = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/)
-                       {
-                           $AX = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/)
-                       {
-                           $EAX = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/)
-                       {
-                           $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/)
-                       {
-                           $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/)
-                       {
-                           $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
-                       }
-                       elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/)
-                       {
-                           $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
-                       }
-
-                       # or if we've deferred and we match the folding version
-                       elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
-                       {
-                           $ONE->[4] = 1;
-                           # Output a normal version except imm8 -> imm8x
-                           # (BYTE override always makes longer version, and
-                           # we don't want to conflict with the imm version
-                           # we output right after this one.
-                           $tokens =~ s/imm8/imm8x/;
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-
-                           # Now output imm version, with second opcode byte
-                           # set to ,1 opcode.  Also call SetInsnShiftFlag().
-                           $tokens =~ s/imm8x/imm/;
-                           my $oneval = $ONE->[3]->[2];
-                           $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg;
-                           push @args, $oneval;
-                           print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
-                       }
-                       elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
-                       {
-                           $AL->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg8");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
-                       }
-                       elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/)
-                       {
-                           $AX->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg16");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
-                       }
-                       elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/)
-                       {
-                           $EAX->[4] = 1;
-                           my $regarg = get_token_number ($tokens, "reg32");
-
-                           print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
-                       }
-                       elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
-                           ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
-                       {
-                           my $first = 1;
-                           for (my $i=0; $i < @XCHG_AX; ++$i)
-                           {
-                               if($XCHG_AX[$i])
-                               {
-                                   $XCHG_AX[$i]->[4] = 1;
-                                   # This is definitely a hack.  The "right"
-                                   # way to do this would be to enhance
-                                   # get_token_number to get the nth reg16
-                                   # instead of always getting the first.
-                                   my $regarg =
-                                       get_token_number ($tokens, "reg16")
-                                       + $i*2;
-
-                                   if ($first)
-                                   {
-                                       print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
-                                       $first = 0;
-                                   }
-                                   else
-                                   {
-                                       $count++;
-                                       print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
-                                   }
-                               }
-                           }
-                           print GRAMMAR cond_action_else ($func, \@args);
-                       }
-                       elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
-                           ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
-                       {
-                           my $first = 1;
-                           for (my $i=0; $i < @XCHG_EAX; ++$i)
-                           {
-                               if($XCHG_EAX[$i])
-                               {
-                                   $XCHG_EAX[$i]->[4] = 1;
-                                   # This is definitely a hack.  The "right"
-                                   # way to do this would be to enhance
-                                   # get_token_number to get the nth reg32
-                                   # instead of always getting the first.
-                                   my $regarg =
-                                       get_token_number ($tokens, "reg32")
-                                       + $i*2;
-
-                                   if ($first)
-                                   {
-                                       print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
-                                       $first = 0;
-                                   }
-                                   else
-                                   {
-                                       $count++;
-                                       print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
-                                   }
-                               }
-                           }
-                           print GRAMMAR cond_action_else ($func, \@args);
-                       }
-
-                       # otherwise, generate the normal version
-                       else
-                       {
-                           print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-                       }
-                   }
-               }
-
-               # catch deferreds that haven't been folded in.
-               if ($ONE and not $ONE->[4])
-               {
-                   print GRAMMAR action (@$ONE, $count++);
-               }
-               if ($AL and not $AL->[4])
-               {
-                   print GRAMMAR action (@$AL, $count++);
-               }
-               if ($AX and not $AL->[4])
-               {
-                   print GRAMMAR action (@$AX, $count++);
-               }
-               if ($EAX and not $AL->[4])
-               {
-                   print GRAMMAR action (@$EAX, $count++);
-               }
-               
-               # print error action
-               # ASSUMES: at least one previous action exists
-               print GRAMMAR "    | \Ugrp_$group\E error {\n";
-               print GRAMMAR "        Error (_(\"expression syntax error\"));\n";
-               print GRAMMAR "        \$\$ = (bytecode *)NULL;\n";
-               print GRAMMAR "    }\n";
-
-               # terminate the rule
-               print GRAMMAR ";\n";
-           }
-       }
-       else
-       {
-           print GRAMMAR $_;
-       }
-    }
-    close IN;
-    close GRAMMAR;
-}
index b3f0533ac3b4d8ef1af2cb0895e8b341ff49d61a..248820db28992015ca849e16e6ad97b756e1ab6c 100644 (file)
@@ -44,8 +44,10 @@ RCSID("$IdPath$");
 
 #include "src/parsers/nasm/nasm-defs.h"
 
+
 void init_table(void);
 extern int nasm_parser_lex(void);
+extern void nasm_parser_set_directive_state(void);
 void nasm_parser_error(const char *);
 static void nasm_parser_directive(const char *name,
                                  valparamhead *valparams,
@@ -55,6 +57,7 @@ extern objfmt *nasm_parser_objfmt;
 extern sectionhead nasm_parser_sections;
 extern section *nasm_parser_cur_section;
 extern char *nasm_parser_locallabel_base;
+extern size_t nasm_parser_locallabel_base_len;
 
 static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
 static bytecode *nasm_parser_temp_bc;
@@ -71,16 +74,19 @@ static bytecode *nasm_parser_temp_bc;
     intnum *intn;
     floatnum *flt;
     symrec *sym;
-    unsigned char groupdata[5];
+    unsigned long arch_data[4];
     effaddr *ea;
     expr *exp;
-    immval *im_val;
-    x86_targetval tgt_val;
     datavalhead datahead;
     dataval *data;
     bytecode *bc;
     valparamhead dir_valparams;
     valparam *dir_valparam;
+    struct {
+       insn_operandhead operands;
+       int num_operands;
+    } insn_operands;
+    insn_operand *insn_operand;
 }
 
 %token <intn> INTNUM
@@ -90,46 +96,25 @@ static bytecode *nasm_parser_temp_bc;
 %token <int_info> DECLARE_DATA
 %token <int_info> RESERVE_SPACE
 %token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
+%token SEG WRT NOSPLIT
 %token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
+%token <arch_data> INSN PREFIX REG SEGREG TARGETMOD
 %token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
 %token <str_val> ID LOCAL_ID SPECIAL_ID
 %token LINE
 
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
+%type <bc> line lineexp exp instr
+
+%type <ea> memaddr
+%type <exp> dvexpr expr direxpr
 %type <sym> explabel
 %type <str_val> label_id
-%type <tgt_val> target
 %type <data> dataval
 %type <datahead> datavals
 %type <dir_valparams> directive_valparams
 %type <dir_valparam> directive_valparam
+%type <insn_operands> operands
+%type <insn_operand> operand
 
 %left '|'
 %left '^'
@@ -163,7 +148,9 @@ line: '\n'          { $$ = (bytecode *)NULL; }
        xfree($5);
        $$ = (bytecode *)NULL;
     }
-    | directive '\n'   { $$ = (bytecode *)NULL; }
+    | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' {
+       $$ = (bytecode *)NULL;
+    }
     | error '\n'       {
        Error(_("label or instruction expected at start of line"));
        $$ = (bytecode *)NULL;
@@ -191,11 +178,32 @@ exp: instr
     | INCBIN STRING ',' expr ',' expr  { $$ = bc_new_incbin($2, $4, $6); }
 ;
 
+instr: INSN            {
+       $$ = cur_arch->parse.new_insn($1, 0, NULL);
+    }
+    | INSN operands    {
+       $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands);
+       ops_delete(&$2.operands, 0);
+    }
+    | INSN error       {
+       Error(_("expression syntax error"));
+       $$ = NULL;
+    }
+    | PREFIX instr     {
+       $$ = $2;
+       cur_arch->parse.handle_prefix($$, $1);
+    }
+    | SEGREG instr     {
+       $$ = $2;
+       cur_arch->parse.handle_seg_prefix($$, $1[0]);
+    }
+;
+
 datavals: dataval          { dvs_initialize(&$$); dvs_append(&$$, $1); }
     | datavals ',' dataval  { dvs_append(&$1, $3); $$ = $1; }
 ;
 
-dataval: expr_no_string        { $$ = dv_new_expr($1); }
+dataval: dvexpr                { $$ = dv_new_expr($1); }
     | STRING           { $$ = dv_new_string($1); }
     | error            {
        Error(_("expression syntax error"));
@@ -219,19 +227,22 @@ label_id: ID          {
        $$ = $1;
        if (nasm_parser_locallabel_base)
            xfree(nasm_parser_locallabel_base);
-       nasm_parser_locallabel_base = xstrdup($1);
+       nasm_parser_locallabel_base_len = strlen($1);
+       nasm_parser_locallabel_base =
+           xmalloc(nasm_parser_locallabel_base_len+1);
+       strcpy(nasm_parser_locallabel_base, $1);
     }
     | SPECIAL_ID
     | LOCAL_ID
 ;
 
 /* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']'        {
-       xfree($2);
+directive: DIRECTIVE_NAME directive_val        {
+       xfree($1);
     }
-    | '[' DIRECTIVE_NAME error ']'             {
-       Error(_("invalid arguments to [%s]"), $2);
-       xfree($2);
+    | DIRECTIVE_NAME error             {
+       Error(_("invalid arguments to [%s]"), $1);
+       xfree($1);
     }
 ;
 
@@ -267,299 +278,85 @@ directive_valparam: direxpr      {
     | ID '=' direxpr           { vp_new($$, $1, $3); }
 ;
 
-/* register groupings */
-fpureg: ST0
-    | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
-    | DWORD reg_eax    { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
-    | DWORD reg_ecx    { $$ = $2; }
-;
-
-rawreg32: REG_EAX
-    | REG_ECX
-    | REG_EDX
-    | REG_EBX
-    | REG_ESP
-    | REG_EBP
-    | REG_ESI
-    | REG_EDI
-;
-
-reg32: rawreg32
-    | DWORD reg32      { $$ = $2; }
-;
-
-reg_ax: REG_AX
-    | WORD reg_ax      { $$ = $2; }
-;
-
-reg_cx: REG_CX
-    | WORD reg_cx      { $$ = $2; }
-;
-
-reg_dx: REG_DX
-    | WORD reg_dx      { $$ = $2; }
-;
-
-rawreg16: REG_AX
-    | REG_CX
-    | REG_DX
-    | REG_BX
-    | REG_SP
-    | REG_BP
-    | REG_SI
-    | REG_DI
-;
-
-reg16: rawreg16
-    | WORD reg16       { $$ = $2; }
-;
-
-reg_al: REG_AL
-    | BYTE reg_al      { $$ = $2; }
-;
-
-reg_cl: REG_CL
-    | BYTE reg_cl      { $$ = $2; }
-;
-
-reg8: REG_AL
-    | REG_CL
-    | REG_DL
-    | REG_BL
-    | REG_AH
-    | REG_CH
-    | REG_DH
-    | REG_BH
-    | BYTE reg8                { $$ = $2; }
-;
-
-reg_es: REG_ES
-    | WORD reg_es      { $$ = $2; }
-;
-
-reg_ss: REG_SS
-    | WORD reg_ss      { $$ = $2; }
-;
-
-reg_ds: REG_DS
-    | WORD reg_ds      { $$ = $2; }
-;
-
-reg_fs: REG_FS
-    | WORD reg_fs      { $$ = $2; }
-;
-
-reg_gs: REG_GS
-    | WORD reg_gs      { $$ = $2; }
-;
-
-reg_cs: REG_CS
-    | WORD reg_cs      { $$ = $2; }
-;
-
-segreg: REG_ES
-    | REG_SS
-    | REG_DS
-    | REG_FS
-    | REG_GS
-    | REG_CS
-    | WORD segreg      { $$ = $2; }
-;
-
 /* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated?  This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg.  I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM                        { $$ = expr_new_ident(ExprInt($1)); }
-    | rawreg16                 { $$ = expr_new_ident(ExprReg($1, 16)); }
-    | rawreg32                 { $$ = expr_new_ident(ExprReg($1, 32)); }
-    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
-    /*| memexpr '||' memexpr   { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
-    | memexpr '|' memexpr      { $$ = expr_new_tree($1, EXPR_OR, $3); }
-    | memexpr '^' memexpr      { $$ = expr_new_tree($1, EXPR_XOR, $3); }
-    /*| expr '&&' memexpr      { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
-    | memexpr '&' memexpr      { $$ = expr_new_tree($1, EXPR_AND, $3); }
-    /*| memexpr '==' memexpr   { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
-    /*| memexpr '>' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '<' memexpr    { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
-    /*| memexpr '>=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '<=' memexpr   { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
-    /*| memexpr '!=' memexpr   { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
-    | memexpr LEFT_OP memexpr  { $$ = expr_new_tree($1, EXPR_SHL, $3); }
-    | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
-    | memexpr '+' memexpr      { $$ = expr_new_tree($1, EXPR_ADD, $3); }
-    | memexpr '-' memexpr      { $$ = expr_new_tree($1, EXPR_SUB, $3); }
-    | memexpr '*' memexpr      { $$ = expr_new_tree($1, EXPR_MUL, $3); }
-    | memexpr '/' memexpr      { $$ = expr_new_tree($1, EXPR_DIV, $3); }
-    | memexpr SIGNDIV memexpr  { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
-    | memexpr '%' memexpr      { $$ = expr_new_tree($1, EXPR_MOD, $3); }
-    | memexpr SIGNMOD memexpr  { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
-    | '+' memexpr %prec UNARYOP        { $$ = $2; }
-    | '-' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NEG, $2); }
-    /*| '!' memexpr            { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
-    | '~' memexpr %prec UNARYOP        { $$ = expr_new_branch(EXPR_NOT, $2); }
-    | '(' memexpr ')'          { $$ = $2; }
-    | STRING                   {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
+memaddr: expr              {
+       $$ = cur_arch->parse.ea_new_expr($1);
     }
-    | error                    { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr           {
-       $$ = x86_ea_new_expr($1);
-       x86_ea_set_segment($$, 0);
+    | SEGREG ':' memaddr    {
+       $$ = $3;
+       cur_arch->parse.handle_seg_override($$, $1[0]);
     }
-    | REG_CS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x2E); }
-    | REG_SS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x36); }
-    | REG_DS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x3E); }
-    | REG_ES ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x26); }
-    | REG_FS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x64); }
-    | REG_GS ':' memaddr    { $$ = $3; x86_ea_set_segment($$, 0x65); }
     | BYTE memaddr         { $$ = $2; ea_set_len($$, 1); }
     | WORD memaddr         { $$ = $2; ea_set_len($$, 2); }
     | DWORD memaddr        { $$ = $2; ea_set_len($$, 4); }
     | NOSPLIT memaddr      { $$ = $2; ea_set_nosplit($$, 1); }
 ;
 
-mem: '[' memaddr ']'   { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem                { $$ = $2; }
-    | BYTE mem8x       { $$ = $2; }
-;
-mem16x: WORD mem       { $$ = $2; }
-    | WORD mem16x      { $$ = $2; }
-;
-mem32x: DWORD mem      { $$ = $2; }
-    | DWORD mem32x     { $$ = $2; }
-;
-mem64x: QWORD mem      { $$ = $2; }
-    | QWORD mem64x     { $$ = $2; }
-;
-mem80x: TWORD mem      { $$ = $2; }
-    | TWORD mem80x     { $$ = $2; }
-;
-mem128x: DQWORD mem    { $$ = $2; }
-    | DQWORD mem128x   { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem                { $$ = $2; }
-    | FAR memfar       { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
-    | mem8x
-;
-mem16: mem
-    | mem16x
-;
-mem32: mem
-    | mem32x
-;
-mem64: mem
-    | mem64x
-;
-mem80: mem
-    | mem80x
-;
-mem128: mem
-    | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
-    | mem16x
-    | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8     { $$ = x86_ea_new_reg($1); }
-    | mem8x
-;
-rm16x: reg16   { $$ = x86_ea_new_reg($1); }
-    | mem16x
-;
-rm32x: reg32   { $$ = x86_ea_new_reg($1); }
-    | mem32x
-;
-/* not needed:
-rm64x: MMXREG  { $$ = x86_ea_new_reg($1); }
-    | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
-    | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8      { $$ = x86_ea_new_reg($1); }
-    | mem8
-;
-rm16: reg16    { $$ = x86_ea_new_reg($1); }
-    | mem16
-;
-rm32: reg32    { $$ = x86_ea_new_reg($1); }
-    | mem32
-;
-rm64: MMXREG   { $$ = x86_ea_new_reg($1); }
-    | mem64
-;
-rm128: XMMREG  { $$ = x86_ea_new_reg($1); }
-    | mem128
-;
-
-/* immediate values */
-imm: expr   { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm            { $$ = $2; }
-;
-imm16x: WORD imm    { $$ = $2; }
-;
-imm32x: DWORD imm   { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
-    | imm8x
-;
-imm16: imm
-    | imm16x
-;
-imm32: imm
-    | imm32x
+/* instruction operands */
+operands: operand          {
+       ops_initialize(&$$.operands);
+       ops_append(&$$.operands, $1);
+       $$.num_operands = 1;
+    }
+    | operands ',' operand  {
+       ops_append(&$1.operands, $3);
+       $$.operands = $1.operands;
+       $$.num_operands = $1.num_operands+1;
+    }
 ;
 
-/* jump targets */
-target: expr           {
-       $$.val = $1;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+operand: '[' memaddr ']'    { $$ = operand_new_mem($2); }
+    | expr                 { $$ = operand_new_imm($1); }
+    | SEGREG               { $$ = operand_new_segreg($1[0]); }
+    | BYTE operand         {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 1)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 1;
     }
-    | SHORT target     {
+    | WORD operand         {
        $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 2)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 2;
     }
-    | NEAR target      {
+    | DWORD operand        {
        $$ = $2;
-       x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 4)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 4;
     }
+    | QWORD operand        {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 8)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 8;
+    }
+    | TWORD operand        {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 10)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 10;
+    }
+    | DQWORD operand       {
+       $$ = $2;
+       if ($$->type == INSN_OPERAND_REG &&
+           cur_arch->get_reg_size($$->data.reg) != 16)
+           Error(_("cannot override register size"));
+       else
+           $$->size = 16;
+    }
+    | TARGETMOD operand            { $$ = $2; $$->targetmod = $1[0]; }
 ;
 
 /* expression trees */
@@ -587,9 +384,48 @@ direxpr: INTNUM                    { $$ = expr_new_ident(ExprInt($1)); }
     | '(' direxpr ')'          { $$ = $2; }
 ;
 
-expr_no_string: INTNUM         { $$ = expr_new_ident(ExprInt($1)); }
+dvexpr: INTNUM                 { $$ = expr_new_ident(ExprInt($1)); }
     | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
     | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
+    /*| dvexpr '||' dvexpr     { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
+    | dvexpr '|' dvexpr                { $$ = expr_new_tree($1, EXPR_OR, $3); }
+    | dvexpr '^' dvexpr                { $$ = expr_new_tree($1, EXPR_XOR, $3); }
+    /*| dvexpr '&&' dvexpr     { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
+    | dvexpr '&' dvexpr                { $$ = expr_new_tree($1, EXPR_AND, $3); }
+    /*| dvexpr '==' dvexpr     { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
+    /*| dvexpr '>' dvexpr      { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+    /*| dvexpr '<' dvexpr      { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+    /*| dvexpr '>=' dvexpr     { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+    /*| dvexpr '<=' dvexpr     { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+    /*| dvexpr '!=' dvexpr     { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
+    | dvexpr LEFT_OP dvexpr    { $$ = expr_new_tree($1, EXPR_SHL, $3); }
+    | dvexpr RIGHT_OP dvexpr   { $$ = expr_new_tree($1, EXPR_SHR, $3); }
+    | dvexpr '+' dvexpr                { $$ = expr_new_tree($1, EXPR_ADD, $3); }
+    | dvexpr '-' dvexpr                { $$ = expr_new_tree($1, EXPR_SUB, $3); }
+    | dvexpr '*' dvexpr                { $$ = expr_new_tree($1, EXPR_MUL, $3); }
+    | dvexpr '/' dvexpr                { $$ = expr_new_tree($1, EXPR_DIV, $3); }
+    | dvexpr SIGNDIV dvexpr    { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
+    | dvexpr '%' dvexpr                { $$ = expr_new_tree($1, EXPR_MOD, $3); }
+    | dvexpr SIGNMOD dvexpr    { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
+    | '+' dvexpr %prec UNARYOP  { $$ = $2; }
+    | '-' dvexpr %prec UNARYOP  { $$ = expr_new_branch(EXPR_NEG, $2); }
+    /*| '!' dvexpr             { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
+    | '~' dvexpr %prec UNARYOP  { $$ = expr_new_branch(EXPR_NOT, $2); }
+    | '(' dvexpr ')'           { $$ = $2; }
+;
+
+/* Expressions for operands and memory expressions.
+ * We don't attempt to check memory expressions for validity here.
+ * Essentially the same as expr_no_string above but adds REG and STRING.
+ */
+expr: INTNUM                   { $$ = expr_new_ident(ExprInt($1)); }
+    | FLTNUM                   { $$ = expr_new_ident(ExprFloat($1)); }
+    | REG                      { $$ = expr_new_ident(ExprReg($1[0])); }
+    | STRING                   {
+       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
+       xfree($1);
+    }
+    | explabel                 { $$ = expr_new_ident(ExprSym($1)); }
     /*| expr '||' expr         { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
     | expr '|' expr            { $$ = expr_new_tree($1, EXPR_OR, $3); }
     | expr '^' expr            { $$ = expr_new_tree($1, EXPR_XOR, $3); }
@@ -617,13 +453,6 @@ expr_no_string: INTNUM             { $$ = expr_new_ident(ExprInt($1)); }
     | '(' expr ')'             { $$ = $2; }
 ;
 
-expr: expr_no_string
-    | STRING           {
-       $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
-       xfree($1);
-    }
-;
-
 explabel: ID           {
        $$ = symrec_use($1);
        xfree($1);
@@ -652,46 +481,6 @@ explabel: ID               {
     }
 ;
 
-instr: /* empty */     {
-       idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
-       $$ = x86_bc_new_insn(&idata);
-    }
-    | instrbase
-    | OPERSIZE instr   { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
-    | ADDRSIZE instr   { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
-    | REG_CS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
-    }
-    | REG_SS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
-    }
-    | REG_DS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
-    }
-    | REG_ES instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
-    }
-    | REG_FS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
-    }
-    | REG_GS instr     {
-       $$ = $2;
-       x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
-    }
-    | LOCK instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
-    | REPNZ instr      { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
-    | REP instr                { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
-    | REPZ instr       { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
 %%
 /*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
 
@@ -700,8 +489,6 @@ nasm_parser_directive(const char *name, valparamhead *valparams,
                      valparamhead *objext_valparams)
 {
     valparam *vp, *vp2;
-    const intnum *intn;
-    long lval;
 
     assert(cur_objfmt != NULL);
 
@@ -766,13 +553,25 @@ nasm_parser_directive(const char *name, valparamhead *valparams,
            vp->param = NULL;
        }
        nasm_parser_prev_bc = (bytecode *)NULL;
-    } else if (strcasecmp(name, "bits") == 0) {
-       if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
-           (intn = expr_get_intnum(&vp->param)) != NULL &&
-           (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
-           x86_mode_bits = (unsigned char)lval;
-       else
-           Error(_("invalid argument to [%s]"), "BITS");
+    } else if (strcasecmp(name, "cpu") == 0) {
+       vps_foreach(vp, valparams) {
+           if (vp->val)
+               cur_arch->parse.switch_cpu(vp->val);
+           else if (vp->param) {
+               const intnum *intcpu;
+               intcpu = expr_get_intnum(&vp->param);
+               if (!intcpu)
+                   Error(_("invalid argument to [%s]"), "CPU");
+               else {
+                   char strcpu[16];
+                   sprintf(strcpu, "%lu", intnum_get_uint(intcpu));
+                   cur_arch->parse.switch_cpu(strcpu);
+               }
+           }
+       }
+    } else if (!cur_arch->parse.directive(name, valparams, objext_valparams,
+                                         &nasm_parser_sections)) {
+       ;
     } else if (cur_objfmt->directive(name, valparams, objext_valparams,
                                     &nasm_parser_sections)) {
        Error(_("unrecognized directive [%s]"), name);
index b7d669722351f64ccebd27082e9298bb30193723..e21e9554e9e540021410cc2d31ddcef6e64cc08f 100644 (file)
@@ -34,6 +34,7 @@ extern FILE *nasm_parser_in;
 extern int nasm_parser_debug;
 
 extern int nasm_parser_parse(void);
+extern void nasm_parser_cleanup(void);
 
 size_t (*nasm_parser_input) (char *buf, size_t max_size);
 
@@ -58,6 +59,8 @@ nasm_parser_do_parse(parser *p, FILE *f, const char *in_filename)
 
     nasm_parser_parse();
 
+    nasm_parser_cleanup();
+
     /* Free locallabel base if necessary */
     if (nasm_parser_locallabel_base)
        xfree(nasm_parser_locallabel_base);
diff --git a/src/parsers/nasm/nasm-token.re b/src/parsers/nasm/nasm-token.re
new file mode 100644 (file)
index 0000000..ab0aa11
--- /dev/null
@@ -0,0 +1,516 @@
+/*
+ * NASM-compatible lex lexer
+ *
+ *  Copyright (C) 2001  Peter Johnson
+ *
+ *  Portions based on re2c's example code.
+ *
+ *  This file is part of YASM.
+ *
+ *  YASM is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  YASM is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+#include "src/parsers/nasm/nasm-defs.h"
+#include "nasm-bison.h"
+
+
+#define BSIZE  8192
+
+#define YYCTYPE                char
+#define YYCURSOR       cursor
+#define YYLIMIT                s.lim
+#define YYMARKER       s.ptr
+#define YYFILL(n)      {cursor = fill(cursor);}
+
+#define RETURN(i)      {s.cur = cursor; return i;}
+
+#define SCANINIT()     { \
+       s.tchar = cursor - s.pos; \
+       s.tline = s.cline; \
+       s.tok = cursor; \
+    }
+
+#define TOKLEN         (cursor-s.tok)
+
+void nasm_parser_cleanup(void);
+void nasm_parser_set_directive_state(void);
+int nasm_parser_lex(void);
+
+extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
+
+
+typedef struct Scanner {
+    YYCTYPE            *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+    unsigned int       tchar, tline, cline;
+} Scanner;
+
+static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 };
+
+FILE *nasm_parser_in = NULL;
+
+static YYCTYPE *
+fill(YYCTYPE *cursor)
+{
+    if(!s.eof){
+       size_t cnt = s.tok - s.bot;
+       if(cnt){
+           memcpy(s.bot, s.tok, s.lim - s.tok);
+           s.tok = s.bot;
+           s.ptr -= cnt;
+           cursor -= cnt;
+           s.pos -= cnt;
+           s.lim -= cnt;
+       }
+       if((s.top - s.lim) < BSIZE){
+           char *buf = xmalloc((s.lim - s.bot) + BSIZE);
+           memcpy(buf, s.tok, s.lim - s.tok);
+           s.tok = buf;
+           s.ptr = &buf[s.ptr - s.bot];
+           cursor = &buf[cursor - s.bot];
+           s.pos = &buf[s.pos - s.bot];
+           s.lim = &buf[s.lim - s.bot];
+           s.top = &s.lim[BSIZE];
+           if (s.bot)
+               xfree(s.bot);
+           s.bot = buf;
+       }
+       if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){
+           s.eof = &s.lim[cnt]; *s.eof++ = '\n';
+       }
+       s.lim += cnt;
+    }
+    return cursor;
+}
+
+void
+nasm_parser_cleanup(void)
+{
+    if (s.bot)
+       xfree(s.bot);
+}
+
+/* starting size of string buffer */
+#define STRBUF_ALLOC_SIZE      128
+
+/* string buffer used when parsing strings/character constants */
+static char *strbuf = (char *)NULL;
+
+/* length of strbuf (including terminating NULL character) */
+static size_t strbuf_size = 0;
+
+/* last "base" label for local (.) labels */
+char *nasm_parser_locallabel_base = (char *)NULL;
+size_t nasm_parser_locallabel_base_len = 0;
+
+static int linechg_numcount;
+
+/*!re2c
+  any = [\000-\377];
+  digit = [0-9];
+  iletter = [a-zA-Z];
+  bindigit = [01];
+  octdigit = [0-7];
+  hexdigit = [0-9a-fA-F];
+  ws = [ \t\r];
+  quot = ["'];
+  A = [aA];
+  B = [bB];
+  C = [cC];
+  D = [dD];
+  E = [eE];
+  F = [fF];
+  G = [gG];
+  H = [hH];
+  I = [iI];
+  J = [jJ];
+  K = [kK];
+  L = [lL];
+  M = [mM];
+  N = [nN];
+  O = [oO];
+  P = [pP];
+  Q = [qQ];
+  R = [rR];
+  S = [sS];
+  T = [tT];
+  U = [uU];
+  V = [vV];
+  W = [wW];
+  X = [xX];
+  Y = [yY];
+  Z = [zZ];
+*/
+
+static enum {
+    INITIAL,
+    DIRECTIVE,
+    DIRECTIVE2,
+    LINECHG,
+    LINECHG2
+} state = INITIAL;
+
+void
+nasm_parser_set_directive_state(void)
+{
+    state = DIRECTIVE;
+}
+
+int
+nasm_parser_lex(void)
+{
+    YYCTYPE *cursor = s.cur;
+    YYCTYPE endch;
+    size_t count, len;
+    YYCTYPE savech;
+    arch_check_id_retval check_id_ret;
+
+    /* Catch EOF */
+    if (s.eof && cursor == s.eof)
+       return 0;
+
+    /* Jump to proper "exclusive" states */
+    switch (state) {
+       case DIRECTIVE:
+           goto directive;
+       case LINECHG:
+           goto linechg;
+       case LINECHG2:
+           goto linechg2;
+       default:
+           break;
+    }
+
+scan:
+    SCANINIT();
+
+    /*!re2c
+       /* standard decimal integer */
+       digit+ {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.intn = intnum_new_dec(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+       /* 10010011b - binary number */
+
+       bindigit+ "b" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */
+           yylval.intn = intnum_new_bin(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* 777q - octal number */
+       octdigit+ "q" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */
+           yylval.intn = intnum_new_oct(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* 0AAh form of hexidecimal number */
+       digit hexdigit+ "h" {
+           s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */
+           yylval.intn = intnum_new_hex(s.tok);
+           RETURN(INTNUM);
+       }
+
+       /* $0AA and 0xAA forms of hexidecimal number */
+       (("$" digit) | "0x") hexdigit+ {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           if (s.tok[1] == 'x')
+               yylval.intn = intnum_new_hex(s.tok+2);  /* skip 0 and x */
+           else
+               yylval.intn = intnum_new_hex(s.tok+1);  /* don't skip 0 */
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+
+       /* floating point value */
+       digit+ "." digit* ("e" [-+]? digit+)? {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.flt = floatnum_new(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(FLTNUM);
+       }
+
+       /* string/character constant values */
+       quot {
+           endch = s.tok[0];
+           goto stringconst;
+       }
+
+       /* %line linenum+lineinc filename */
+       "%line" {
+           state = LINECHG;
+           linechg_numcount = 0;
+           RETURN(LINE);
+       }
+
+       /* size specifiers */
+       B Y T E         { yylval.int_info = 1; RETURN(BYTE); }
+       W O R D         { yylval.int_info = 2; RETURN(WORD); }
+       D W O R D       { yylval.int_info = 4; RETURN(DWORD); }
+       Q W O R D       { yylval.int_info = 8; RETURN(QWORD); }
+       T W O R D       { yylval.int_info = 10; RETURN(TWORD); }
+       D Q W O R D     { yylval.int_info = 16; RETURN(DQWORD); }
+
+       /* pseudo-instructions */
+       D B             { yylval.int_info = 1; RETURN(DECLARE_DATA); }
+       D W             { yylval.int_info = 2; RETURN(DECLARE_DATA); }
+       D D             { yylval.int_info = 4; RETURN(DECLARE_DATA); }
+       D Q             { yylval.int_info = 8; RETURN(DECLARE_DATA); }
+       D T             { yylval.int_info = 10; RETURN(DECLARE_DATA); }
+
+       R E S B         { yylval.int_info = 1; RETURN(RESERVE_SPACE); }
+       R E S W         { yylval.int_info = 2; RETURN(RESERVE_SPACE); }
+       R E S D         { yylval.int_info = 4; RETURN(RESERVE_SPACE); }
+       R E S Q         { yylval.int_info = 8; RETURN(RESERVE_SPACE); }
+       R E S T         { yylval.int_info = 10; RETURN(RESERVE_SPACE); }
+
+       I N C B I N     { RETURN(INCBIN); }
+
+       E Q U           { RETURN(EQU); }
+
+       T I M E S       { RETURN(TIMES); }
+
+       S E G           { RETURN(SEG); }
+       W R T           { RETURN(WRT); }
+
+       N O S P L I T   { RETURN(NOSPLIT); }
+
+       T O             { RETURN(TO); }
+
+       /* operators */
+       "<<"                    { RETURN(LEFT_OP); }
+       ">>"                    { RETURN(RIGHT_OP); }
+       "//"                    { RETURN(SIGNDIV); }
+       "%%"                    { RETURN(SIGNMOD); }
+       "$$"                    { RETURN(START_SECTION_ID); }
+       [-+|^*&/%~$():=,\[]     { RETURN(s.tok[0]); }
+
+       /* handle ] separately for directives */
+       "]" {
+           if (state == DIRECTIVE2)
+               state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       /* special non-local ..@label and labels like ..start */
+       ".." [a-zA-Z0-9_$#@~.?]+ {
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(SPECIAL_ID);
+       }
+
+       /* local label (.label) */
+       "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* {
+           /* override local labels in directive state */
+           if (state == DIRECTIVE2) {
+               yylval.str_val = xstrndup(s.tok, TOKLEN);
+               RETURN(ID);
+           } else if (!nasm_parser_locallabel_base) {
+               Warning(_("no non-local label before `%s'"), s.tok[0]);
+               yylval.str_val = xstrndup(s.tok, TOKLEN);
+           } else {
+               len = TOKLEN + nasm_parser_locallabel_base_len;
+               yylval.str_val = xmalloc(len + 1);
+               strcpy(yylval.str_val, nasm_parser_locallabel_base);
+               strncat(yylval.str_val, s.tok, TOKLEN);
+               yylval.str_val[len] = '\0';
+           }
+
+           RETURN(LOCAL_ID);
+       }
+
+       /* forced identifier */
+       "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(ID);
+       }
+
+       /* identifier that may be a register, instruction, etc. */
+       [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data,
+                                                           s.tok);
+           s.tok[TOKLEN] = savech;
+           switch (check_id_ret) {
+               case ARCH_CHECK_ID_NONE:
+                   /* Just an identifier, return as such. */
+                   yylval.str_val = xstrndup(s.tok, TOKLEN);
+                   RETURN(ID);
+               case ARCH_CHECK_ID_INSN:
+                   RETURN(INSN);
+               case ARCH_CHECK_ID_PREFIX:
+                   RETURN(PREFIX);
+               case ARCH_CHECK_ID_REG:
+                   RETURN(REG);
+               case ARCH_CHECK_ID_SEGREG:
+                   RETURN(SEGREG);
+               case ARCH_CHECK_ID_TARGETMOD:
+                   RETURN(TARGETMOD);
+               default:
+                   Warning(_("Arch feature not supported, treating as identifier"));
+                   yylval.str_val = xstrndup(s.tok, TOKLEN);
+                   RETURN(ID);
+           }
+       }
+
+       ";" (any \ [\n])*       { goto scan; }
+
+       ws+                     { goto scan; }
+
+       "\n"                    { state = INITIAL; RETURN(s.tok[0]); }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto scan;
+       }
+    */
+
+    /* %line linenum+lineinc filename */
+linechg:
+    SCANINIT();
+
+    /*!re2c
+       digit+ {
+           linechg_numcount++;
+           savech = s.tok[TOKLEN];
+           s.tok[TOKLEN] = '\0';
+           yylval.intn = intnum_new_dec(s.tok);
+           s.tok[TOKLEN] = savech;
+           RETURN(INTNUM);
+       }
+
+       "\n" {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       "+" {
+           RETURN(s.tok[0]);
+       }
+
+       ws+ {
+           if (linechg_numcount == 2)
+           state = LINECHG2;
+           goto linechg2;
+       }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto linechg;
+       }
+    */
+
+linechg2:
+    SCANINIT();
+
+    /*!re2c
+       "\n" {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       "\r" { }
+
+       (any \ [\r\n])+ {
+           state = LINECHG;
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(FILENAME);
+       }
+    */
+
+    /* directive: [name value] */
+directive:
+    SCANINIT();
+
+    /*!re2c
+       [\]\n] {
+           state = INITIAL;
+           RETURN(s.tok[0]);
+       }
+
+       iletter+ {
+           state = DIRECTIVE2;
+           yylval.str_val = xstrndup(s.tok, TOKLEN);
+           RETURN(DIRECTIVE_NAME);
+       }
+
+       any {
+           if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+               Warning(_("ignoring unrecognized character `%s'"),
+                       conv_unprint(s.tok[0]));
+           goto directive;
+       }
+    */
+
+    /* string/character constant values */
+stringconst:
+    strbuf = xmalloc(STRBUF_ALLOC_SIZE);
+    strbuf_size = STRBUF_ALLOC_SIZE;
+    count = 0;
+
+stringconst_scan:
+    SCANINIT();
+
+    /*!re2c
+       "\n"    {
+           if (cursor == s.eof)
+               Error(_("unexpected end of file in string"));
+           else
+               Error(_("unterminated string"));
+           strbuf[count] = '\0';
+           yylval.str_val = strbuf;
+           RETURN(STRING);
+       }
+
+       any     {
+           if (s.tok[0] == endch) {
+               strbuf[count] = '\0';
+               yylval.str_val = strbuf;
+               RETURN(STRING);
+           }
+
+           strbuf[count++] = s.tok[0];
+           if (count >= strbuf_size) {
+               strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
+               strbuf_size += STRBUF_ALLOC_SIZE;
+           }
+
+           goto stringconst_scan;
+       }
+    */
+}
diff --git a/src/parsers/nasm/token.l.in b/src/parsers/nasm/token.l.in
deleted file mode 100644 (file)
index 7f45855..0000000
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * NASM-compatible lex lexer
- *
- *  Copyright (C) 2001  Peter Johnson
- *
- *  This file is part of YASM.
- *
- *  YASM is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  YASM is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#include "bitvect.h"
-
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-#include "nasm-bison.h"
-
-
-#define YY_NEVER_INTERACTIVE   1
-
-int nasm_parser_lex(void);
-
-extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
-#undef YY_INPUT
-#define YY_INPUT(b, r, ms)     (r = nasm_parser_input(b, ms))
-
-/* starting size of string buffer */
-#define STRBUF_ALLOC_SIZE      128
-
-/* string buffer used when parsing strings/character constants */
-static char *strbuf = (char *)NULL;
-
-/* length of strbuf (including terminating NULL character) */
-static size_t strbuf_size = 0;
-
-/* last "base" label for local (.) labels */
-char *nasm_parser_locallabel_base = (char *)NULL;
-
-static int linechg_numcount;
-
-%}
-%option noyywrap
-%option nounput
-%option case-insensitive
-%option never-interactive
-%option prefix="nasm_parser_"
-%option outfile="lex.yy.c"
-
-%x DIRECTIVE LINECHG LINECHG2
-%s DIRECTIVE2
-
-DIGIT    [0-9]
-BINDIGIT [01]
-OCTDIGIT [0-7]
-HEXDIGIT [0-9a-f]
-WS       [ \t\r]
-
-%%
-
-    /* standard decimal integer */
-{DIGIT}+ {
-    yylval.intn = intnum_new_dec(yytext);
-    return INTNUM;
-}
-
-    /* 10010011b - binary number */
-{BINDIGIT}+b {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'b' */
-    yylval.intn = intnum_new_bin(yytext);
-    return INTNUM;
-}
-
-    /* 777q - octal number */
-{OCTDIGIT}+q {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'q' */
-    yylval.intn = intnum_new_oct(yytext);
-    return INTNUM;
-}
-
-    /* 0AAh form of hexidecimal number */
-{DIGIT}{HEXDIGIT}*h {
-    yytext[strlen(yytext)-1] = '\0';  /* strip off 'h' */
-    yylval.intn = intnum_new_hex(yytext);
-    return INTNUM;
-}
-
-    /* $0AA and 0xAA forms of hexidecimal number */
-(\${DIGIT}|0x){HEXDIGIT}+ {
-    if (yytext[1] == 'x')
-       yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */
-    else
-       yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */
-    return INTNUM;
-}
-
-    /* floating point value */
-{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? {
-    yylval.flt = floatnum_new(yytext);
-    return FLTNUM;
-}
-
-    /* string/character constant values */
-["']   {
-    int inch, count;
-    char endch = yytext[0];
-
-    strbuf = xmalloc(STRBUF_ALLOC_SIZE);
-
-    strbuf_size = STRBUF_ALLOC_SIZE;
-    inch = input();
-    count = 0;
-    while (inch != EOF && inch != endch && inch != '\n') {
-       strbuf[count++] = inch;
-       if (count >= strbuf_size) {
-           strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
-           if (!strbuf)
-               Fatal(FATAL_NOMEM);
-           strbuf_size += STRBUF_ALLOC_SIZE;
-       }
-       inch = input();
-    }
-
-    if (inch == '\n')
-       Error(_("unterminated string"));
-    else if (inch == EOF)
-       Error(_("unexpected end of file in string"));
-
-    strbuf[count] = '\0';
-
-    yylval.str_val = strbuf;
-    return STRING;
-}
-
-    /* %line linenum+lineinc filename */
-^%line                 { BEGIN LINECHG; linechg_numcount = 0; return LINE; }
-<LINECHG>{DIGIT}+      {
-    linechg_numcount++;
-    yylval.intn = intnum_new_dec(yytext);
-    return INTNUM;
-}
-<LINECHG>\n            { BEGIN INITIAL; return '\n'; }
-<LINECHG>[+]           { return yytext[0]; }
-<LINECHG>{WS}+         {
-    if (linechg_numcount == 2)
-       BEGIN LINECHG2;
-}
-<LINECHG2>\n           { BEGIN INITIAL; return '\n'; }
-<LINECHG2>\r           ;
-<LINECHG2>[^\r\n]+     {
-    BEGIN LINECHG;
-    yylval.str_val = xstrdup(yytext);
-    return FILENAME;
-}
-
-    /* directive: [name value] */
-^{WS}*"["          { BEGIN DIRECTIVE; return '['; }
-<DIRECTIVE>"]"     { BEGIN INITIAL; return ']'; }
-<DIRECTIVE2>"]"            { BEGIN INITIAL; return ']'; }
-<DIRECTIVE>\n      { BEGIN INITIAL; return '\n'; }
-<DIRECTIVE2>\n     { BEGIN INITIAL; return '\n'; }
-
-<DIRECTIVE>[a-z]+   {
-    BEGIN DIRECTIVE2;
-    yylval.str_val = xstrdup(yytext);
-    return DIRECTIVE_NAME;
-}
-<DIRECTIVE>. {
-    if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
-       Warning(_("ignoring unrecognized character `%s'"),
-               conv_unprint(yytext[0]));
-}
-
-    /* override local labels in directive state */
-<DIRECTIVE2>\.[a-z0-9_$#@~.?]* {
-    yylval.str_val = xstrdup(yytext);
-    return ID;
-}
-
-    /* size specifiers */
-byte   { yylval.int_info = 1; return BYTE; }
-word   { yylval.int_info = 2; return WORD; }
-dword  { yylval.int_info = 4; return DWORD; }
-qword  { yylval.int_info = 8; return QWORD; }
-tword  { yylval.int_info = 10; return TWORD; }
-dqword { yylval.int_info = 16; return DQWORD; }
-
-    /* pseudo-instructions */
-db     { yylval.int_info = 1; return DECLARE_DATA; }
-dw     { yylval.int_info = 2; return DECLARE_DATA; }
-dd     { yylval.int_info = 4; return DECLARE_DATA; }
-dq     { yylval.int_info = 8; return DECLARE_DATA; }
-dt     { yylval.int_info = 10; return DECLARE_DATA; }
-
-resb   { yylval.int_info = 1; return RESERVE_SPACE; }
-resw   { yylval.int_info = 2; return RESERVE_SPACE; }
-resd   { yylval.int_info = 4; return RESERVE_SPACE; }
-resq   { yylval.int_info = 8; return RESERVE_SPACE; }
-rest   { yylval.int_info = 10; return RESERVE_SPACE; }
-
-incbin { return INCBIN; }
-
-equ    { return EQU; }
-
-times  { return TIMES; }
-
-seg    { return SEG; }
-wrt    { return WRT; }
-near   { return NEAR; }
-short  { return SHORT; }
-far    { return FAR; }
-
-nosplit        { return NOSPLIT; }
-
-org    { return ORG; }
-
-to     { return TO; }
-
-    /* operand size overrides */
-o16    { yylval.int_info = 16; return OPERSIZE; }
-o32    { yylval.int_info = 32; return OPERSIZE; }
-    /* address size overrides */
-a16    { yylval.int_info = 16; return ADDRSIZE; }
-a32    { yylval.int_info = 32; return ADDRSIZE; }
-
-    /* instruction prefixes */
-lock   { return LOCK; }
-repne  { return REPNZ; }
-repnz  { return REPNZ; }
-rep    { return REP; }
-repe   { return REPZ; }
-repz   { return REPZ; }
-
-    /* control, debug, and test registers */
-cr4            { yylval.int_info = 4; return CR4; }
-cr[023]                { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; }
-dr[0-367]      { yylval.int_info = yytext[2]-'0'; return DRREG; }
-tr[3-7]                { yylval.int_info = yytext[2]-'0'; return TRREG; }
-
-    /* floating point, MMX, and SSE registers */
-st0        { yylval.int_info = 0; return ST0; }
-st[1-7]            { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; }
-mm[0-7]            { yylval.int_info = yytext[2]-'0'; return MMXREG; }
-xmm[0-7]    { yylval.int_info = yytext[3]-'0'; return XMMREG; }
-
-    /* integer registers */
-eax    { yylval.int_info = 0; return REG_EAX; }
-ecx    { yylval.int_info = 1; return REG_ECX; }
-edx    { yylval.int_info = 2; return REG_EDX; }
-ebx    { yylval.int_info = 3; return REG_EBX; }
-esp    { yylval.int_info = 4; return REG_ESP; }
-ebp    { yylval.int_info = 5; return REG_EBP; }
-esi    { yylval.int_info = 6; return REG_ESI; }
-edi    { yylval.int_info = 7; return REG_EDI; }
-
-ax     { yylval.int_info = 0; return REG_AX; }
-cx     { yylval.int_info = 1; return REG_CX; }
-dx     { yylval.int_info = 2; return REG_DX; }
-bx     { yylval.int_info = 3; return REG_BX; }
-sp     { yylval.int_info = 4; return REG_SP; }
-bp     { yylval.int_info = 5; return REG_BP; }
-si     { yylval.int_info = 6; return REG_SI; }
-di     { yylval.int_info = 7; return REG_DI; }
-
-al     { yylval.int_info = 0; return REG_AL; }
-cl     { yylval.int_info = 1; return REG_CL; }
-dl     { yylval.int_info = 2; return REG_DL; }
-bl     { yylval.int_info = 3; return REG_BL; }
-ah     { yylval.int_info = 4; return REG_AH; }
-ch     { yylval.int_info = 5; return REG_CH; }
-dh     { yylval.int_info = 6; return REG_DH; }
-bh     { yylval.int_info = 7; return REG_BH; }
-
-    /* segment registers */
-es     { yylval.int_info = 0; return REG_ES; }
-cs     { yylval.int_info = 1; return REG_CS; }
-ss     { yylval.int_info = 2; return REG_SS; }
-ds     { yylval.int_info = 3; return REG_DS; }
-fs     { yylval.int_info = 4; return REG_FS; }
-gs     { yylval.int_info = 5; return REG_GS; }
-
-    /* operators */
-"<<"                   { return LEFT_OP; }
-">>"                   { return RIGHT_OP; }
-"//"                   { return SIGNDIV; }
-"%%"                   { return SIGNMOD; }
-"$$"                   { return START_SECTION_ID; }
-[-+|^&*/%~$():[\]=,]   { return yytext[0]; }
-
-    /* special non-local ..@label and labels like ..start */
-\.\.[a-z0-9_$#@~.?]+ {
-    yylval.str_val = xstrdup(yytext);
-    return SPECIAL_ID;
-}
-
-    /* local label (.label) */
-\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* {
-    if (!nasm_parser_locallabel_base) {
-       Warning(_("no non-local label before `%s'"), yytext);
-       yylval.str_val = xstrdup(yytext);
-    } else {
-       yylval.str_val = xmalloc(strlen(yytext) +
-                                strlen(nasm_parser_locallabel_base) + 1);
-       strcpy(yylval.str_val, nasm_parser_locallabel_base);
-       strcat(yylval.str_val, yytext);
-    }
-
-    return LOCAL_ID;
-}
-
-    /* instructions */
-    /* @INSTRUCTIONS@ */
-
-    /* label */
-[a-z_?][a-z0-9_$#@~.?]* {
-    yylval.str_val = xstrdup(yytext);
-    return ID;
-}
-
-;.*    ;
-
-{WS}+  ;
-
-\n     return '\n';
-
-.      {
-    if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
-       Warning(_("ignoring unrecognized character `%s'"),
-               conv_unprint(yytext[0]));
-}
-
index f34115126ddb074fdf88366bc99aa1c8c612c3bf..f702596c608a6c3965e4b8e720c77451a9845def 100644 (file)
@@ -25,7 +25,7 @@
 #include "bytecode.h"
 #include "bc-int.h"
 #include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
 
 START_TEST(test_x86_ea_new_reg)
 {
index ec9c001d237e8a09f0db84a337b10c7440fdaa8e..86f7f320084b87ff4903cdb713548db75b632671 100644 (file)
@@ -32,7 +32,7 @@
 
 #include "bytecode.h"
 #include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
 
 typedef enum {
     REG_AX = 0,