autom4te.cache
-nasm-bison.y
nasm-bison.c
nasm-bison.h
-nasm-token.l
nasm-token.c
yapp-token.c
yasm
}
/* Get initial BITS setting from object format */
- x86_mode_bits = cur_objfmt->default_mode_bits;
+ /*x86_mode_bits = cur_objfmt->default_mode_bits;*/
/* Parse! */
sections = cur_parser->do_parse(cur_parser, in, in_filename);
#include "util.h"
/*@unused@*/ RCSID("$IdPath$");
+#include "globals.h"
+#include "expr.h"
+
#include "bytecode.h"
#include "arch.h"
+
arch *cur_arch;
+insn_operand *
+operand_new_reg(unsigned long reg)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_REG;
+ retval->data.reg = reg;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_segreg(unsigned long segreg)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_SEGREG;
+ retval->data.reg = segreg;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_mem(/*@only@*/ effaddr *ea)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_MEMORY;
+ retval->data.ea = ea;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_imm(/*@only@*/ expr *val)
+{
+ insn_operand *retval;
+ const unsigned long *reg;
+
+ reg = expr_get_reg(&val, 0);
+ if (reg) {
+ retval = operand_new_reg(*reg);
+ expr_delete(val);
+ } else {
+ retval = xmalloc(sizeof(insn_operand));
+ retval->type = INSN_OPERAND_IMM;
+ retval->data.val = val;
+ retval->targetmod = 0;
+ retval->size = 0;
+ }
+
+ return retval;
+}
+
+void
+operand_print(FILE *f, const insn_operand *op)
+{
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ fprintf(f, "%*sReg=", indent_level, "");
+ cur_arch->reg_print(f, op->data.reg);
+ fprintf(f, "\n");
+ break;
+ case INSN_OPERAND_SEGREG:
+ fprintf(f, "%*sSegReg=", indent_level, "");
+ cur_arch->segreg_print(f, op->data.reg);
+ fprintf(f, "\n");
+ break;
+ case INSN_OPERAND_MEMORY:
+ fprintf(f, "%*sMemory=\n", indent_level, "");
+ indent_level++;
+ ea_print(f, op->data.ea);
+ indent_level--;
+ break;
+ case INSN_OPERAND_IMM:
+ fprintf(f, "%*sImm=", indent_level, "");
+ expr_print(f, op->data.val);
+ fprintf(f, "\n");
+ break;
+ }
+ fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod);
+ fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size);
+}
+
+void
+ops_delete(insn_operandhead *headp, int content)
+{
+ insn_operand *cur, *next;
+
+ cur = STAILQ_FIRST(headp);
+ while (cur) {
+ next = STAILQ_NEXT(cur, link);
+ if (content)
+ switch (cur->type) {
+ case INSN_OPERAND_MEMORY:
+ ea_delete(cur->data.ea);
+ break;
+ case INSN_OPERAND_IMM:
+ expr_delete(cur->data.val);
+ break;
+ default:
+ break;
+ }
+ xfree(cur);
+ cur = next;
+ }
+ STAILQ_INIT(headp);
+}
+
+/*@null@*/ insn_operand *
+ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op)
+{
+ if (op) {
+ STAILQ_INSERT_TAIL(headp, op, link);
+ return op;
+ }
+ return (insn_operand *)NULL;
+}
+
+void
+ops_print(FILE *f, const insn_operandhead *headp)
+{
+ insn_operand *cur;
+
+ STAILQ_FOREACH (cur, headp, link)
+ operand_print(f, cur);
+}
/* $IdPath$
* Architecture header file
*
- * Copyright (C) 2001 Peter Johnson
+ * Copyright (C) 2002 Peter Johnson
*
* This file is part of YASM.
*
#ifndef YASM_ARCH_H
#define YASM_ARCH_H
+typedef enum arch_check_id_retval {
+ ARCH_CHECK_ID_NONE = 0, /* just a normal identifier */
+ ARCH_CHECK_ID_INSN, /* an instruction */
+ ARCH_CHECK_ID_PREFIX, /* an instruction prefix */
+ ARCH_CHECK_ID_REG, /* a register */
+ ARCH_CHECK_ID_SEGREG, /* a segment register (for memory overrides) */
+ ARCH_CHECK_ID_TARGETMOD /* an target modifier (for jumps) */
+} arch_check_id_retval;
+
+typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand)
+ insn_operandhead;
+
+typedef struct insn_operand insn_operand;
+
+/* Different assemblers order instruction operands differently. Also, some
+ * differ on how exactly various registers are specified. There's no great
+ * solution to this, as the parsers aren't supposed to have knowledge of the
+ * architectural internals, and the architecture is supposed to be parser-
+ * independent. To make things work, as a rather hackish solution, we give the
+ * architecture a little knowledge about the general "flavor" of the parser,
+ * and let the architecture decide what to do with it. Most architectures will
+ * probably not even use this, but it's required for some (x86 in particular)
+ * for correct behavior on all parsers.
+ */
+typedef enum arch_syntax_flavor {
+ ARCH_SYNTAX_FLAVOR_NASM = 1, /* like NASM */
+ ARCH_SYNTAX_FLAVOR_GAS /* like GAS */
+} arch_syntax_flavor;
+
struct arch {
/* one-line description of the architecture */
const char *name;
/* keyword used to select architecture */
const char *keyword;
+ struct {
+ /* All "data" below starts the parse initialized to 0. Thus, it is
+ * okay for a funtion to use/check previously stored data to see if
+ * it's been called before on the same piece of data.
+ */
+
+ /* Switches available instructions/registers/etc. based on a
+ * user-specified CPU identifier. Should modify behavior ONLY of
+ * parse functions! The bytecode and output functions should be able
+ * to handle any CPU.
+ */
+ void (*switch_cpu) (const char *cpuid);
+
+ /* Checks an generic identifier to see if it matches architecture
+ * specific names for instructions, registers, etc (see the
+ * arch_check_id_retval enum above for the various types this function
+ * can detect & return. Unrecognized identifiers should be returned
+ * as NONE so they can be treated as normal symbols. Any additional
+ * data beyond just the type (almost always necessary) should be
+ * returned into the space provided by the data parameter.
+ * Note: even though this is passed a data[4], only data[0] should be
+ * used for TARGETMOD, REG, and SEGREG return values.
+ */
+ arch_check_id_retval (*check_identifier) (unsigned long data[4],
+ const char *id);
+
+ /* Architecture-specific directive support. Returns 1 if directive was
+ * not recognized. Returns 0 if directive was recognized, even if it
+ * wasn't valid. Should modify behavior ONLY of parse functions, much
+ * like switch_cpu() above.
+ */
+ int (*directive) (const char *name, valparamhead *valparams,
+ /*@null@*/ valparamhead *objext_valparams,
+ sectionhead *headp);
+
+ /* Creates an instruction. Creates a bytecode by matching the
+ * instruction data and the parameters given with a valid instruction.
+ * If no match is found (the instruction is invalid), returns NULL.
+ * All zero data indicates an empty instruction should be created.
+ */
+ /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4],
+ int num_operands, /*@null@*/
+ insn_operandhead *operands);
+
+ /* Handle an instruction prefix by modifying bc as necessary. */
+ void (*handle_prefix) (bytecode *bc, const unsigned long data[4]);
+
+ /* Handle an segment register instruction prefix by modifying bc as
+ * necessary.
+ */
+ void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg);
+
+ /* Handle memory expression segment overrides by modifying ea as
+ * necessary.
+ */
+ void (*handle_seg_override) (effaddr *ea, unsigned long segreg);
+
+ /* Convert an expression into an effective address. */
+ effaddr * (*ea_new_expr) (/*@keep@*/ expr *e);
+ } parse;
+
struct {
/* Maximum used bytecode type value+1. Should be set to
* BYTECODE_TYPE_BASE if no additional bytecode types are defined by
const section *sect, void *d,
output_expr_func output_expr);
} bc;
+
+ /* Gets the equivalent register size in bytes. Returns 0 if there is no
+ * suitable equivalent size.
+ */
+ unsigned int (*get_reg_size) (unsigned long reg);
+
+ void (*reg_print) (FILE *f, unsigned long reg);
+ void (*segreg_print) (FILE *f, unsigned long segreg);
+
+ /* Deletes the arch-specific data in ea. May be NULL if no special
+ * deletion is required (e.g. there's no dynamically allocated pointers
+ * in the ea data).
+ */
+ void (*ea_data_delete) (effaddr *ea);
+
+ void (*ea_data_print) (FILE *f, const effaddr *ea);
+};
+
+struct insn_operand {
+ /*@reldef@*/ STAILQ_ENTRY(insn_operand) link;
+
+ enum {
+ INSN_OPERAND_REG = 1, /* a register */
+ INSN_OPERAND_SEGREG, /* a segment register */
+ INSN_OPERAND_MEMORY, /* an effective address (memory reference) */
+ INSN_OPERAND_IMM /* an immediate or jump target */
+ } type;
+
+ union {
+ unsigned long reg; /* arch data for reg/segreg */
+ effaddr *ea; /* effective address for memory references */
+ expr *val; /* value of immediate or jump target */
+ } data;
+
+ unsigned long targetmod; /* arch target modifier, 0 if none */
+
+ /* Specified size of the operand, in bytes. 0 if not user-specified. */
+ unsigned int size;
};
+/* insn_operand constructors. operand_new_imm() will look for cases of a
+ * single register and create an INSN_OPERAND_REG variant of insn_operand.
+ */
+insn_operand *operand_new_reg(unsigned long reg);
+insn_operand *operand_new_segreg(unsigned long segreg);
+insn_operand *operand_new_mem(/*@only@*/ effaddr *ea);
+insn_operand *operand_new_imm(/*@only@*/ expr *val);
+
+void operand_print(FILE *f, const insn_operand *op);
+
+#define ops_initialize(headp) STAILQ_INIT(headp)
+#define ops_first(headp) STAILQ_FIRST(headp)
+#define ops_next(cur) STAILQ_NEXT(cur, link)
+
+/* Deletes operands linked list. Deletes content of each operand if content i
+ * nonzero.
+ */
+void ops_delete(insn_operandhead *headp, int content);
+
+/* Adds op to the list of operands headp.
+ * NOTE: Does not make a copy of op; so don't pass this function
+ * static or local variables, and discard the op pointer after calling
+ * this function. If op was actually appended (it wasn't NULL), then
+ * returns op, otherwise returns NULL.
+ */
+/*@null@*/ insn_operand *ops_append(insn_operandhead *headp,
+ /*@returned@*/ /*@null@*/ insn_operand *op);
+
+void ops_print(FILE *f, const insn_operandhead *headp);
+
/* Available architectures */
-#include "arch/x86/x86arch.h"
extern arch x86_arch;
extern arch *cur_arch;
return im;
}
+const expr *
+ea_get_disp(const effaddr *ptr)
+{
+ return ptr->disp;
+}
+
void
ea_set_len(effaddr *ptr, unsigned char len)
{
ptr->nosplit = nosplit;
}
+/*@-nullstate@*/
+void
+ea_delete(effaddr *ea)
+{
+ if (cur_arch->ea_data_delete)
+ cur_arch->ea_data_delete(ea);
+ expr_delete(ea->disp);
+ xfree(ea);
+}
+/*@=nullstate@*/
+
+/*@-nullstate@*/
+void
+ea_print(FILE *f, const effaddr *ea)
+{
+ fprintf(f, "%*sDisp=", indent_level, "");
+ expr_print(f, ea->disp);
+ fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len);
+ fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit);
+ if (cur_arch->ea_data_print)
+ cur_arch->ea_data_print(f, ea);
+}
+/*@=nullstate@*/
+
void
bc_set_multiple(bytecode *bc, expr *e)
{
break;
case BC_OBJFMT_DATA:
objfmt_data = bc_get_data(bc);
+ assert(cur_objfmt != NULL);
if (cur_objfmt->bc_objfmt_data_delete)
cur_objfmt->bc_objfmt_data_delete(objfmt_data->type,
objfmt_data->data);
case BC_OBJFMT_DATA:
objfmt_data = bc_get_const_data(bc);
fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, "");
+ assert(cur_objfmt != NULL);
if (cur_objfmt->bc_objfmt_data_print)
cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type,
objfmt_data->data);
expr_expand_labelequ(*tempp, sect, 1, resolve_label);
num = expr_get_intnum(tempp);
if (!num) {
- if (expr_contains(temp, EXPR_FLOAT))
+ if (temp && expr_contains(temp, EXPR_FLOAT))
ErrorAt(line,
_("expression must not contain floating point value"));
retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
case BC_ALIGN:
/* TODO */
InternalError(_("TODO: align bytecode not implemented!"));
- break;
+ /*break;*/
case BC_OBJFMT_DATA:
InternalError(_("resolving objfmt data bytecode?"));
- break;
+ /*break;*/
default:
if (bc->type < cur_arch->bc.type_max)
retval = cur_arch->bc.bc_resolve(bc, save, sect,
expr_expand_labelequ(*tempp, sect, 1, resolve_label);
num = expr_get_intnum(tempp);
if (!num) {
- if (expr_contains(temp, EXPR_FLOAT))
+ if (temp && expr_contains(temp, EXPR_FLOAT))
ErrorAt(bc->line,
_("expression must not contain floating point value"));
retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
case BC_ALIGN:
/* TODO */
InternalError(_("TODO: align bytecode not implemented!"));
- break;
+ /*break;*/
case BC_OBJFMT_DATA:
objfmt_data = bc_get_data(bc);
if (output_bc_objfmt_data)
/*@only@*/ immval *imm_new_int(unsigned long int_val);
/*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e);
+/*@observer@*/ const expr *ea_get_disp(const effaddr *ea);
void ea_set_len(effaddr *ea, unsigned char len);
void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
+void ea_delete(/*@only@*/ effaddr *ea);
+void ea_print(FILE *f, const effaddr *ea);
void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e);
expr *expn;
intnum *intn;
floatnum *flt;
- /* FIXME: reg structure is moderately x86-specific (namely size) */
- struct reg {
- unsigned char num;
- unsigned char size; /* in bits, eg AX=16, EAX=32 */
- } reg;
+ unsigned long reg;
} data;
};
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
*/
+int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d,
+ int (*func) (/*@null@*/ const ExprItem *ei,
+ /*@null@*/ void *d));
int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d,
int (*func) (/*@null@*/ ExprItem *ei,
/*@null@*/ void *d));
/* Copy entire expression EXCEPT for index "except" at *top level only*. */
expr *expr_copy_except(const expr *e, int except);
-int expr_contains(expr *e, ExprType t);
+int expr_contains(const expr *e, ExprType t);
#endif
#include "expr.h"
#include "symrec.h"
+#include "bytecode.h"
#include "section.h"
+#include "arch.h"
+
#include "expr-int.h"
}
ExprItem *
-ExprReg(unsigned char reg, unsigned char size)
+ExprReg(unsigned long reg)
{
ExprItem *e = xmalloc(sizeof(ExprItem));
e->type = EXPR_REG;
- e->data.reg.num = reg;
- e->data.reg.size = size;
+ e->data.reg = reg;
return e;
}
dest->data.flt = floatnum_copy(src->data.flt);
break;
case EXPR_REG:
- dest->data.reg.num = src->data.reg.num;
- dest->data.reg.size = src->data.reg.size;
+ dest->data.reg = src->data.reg;
break;
default:
break;
/*@=mustfree@*/
static int
-expr_contains_callback(ExprItem *ei, void *d)
+expr_contains_callback(const ExprItem *ei, void *d)
{
ExprType *t = d;
return (ei->type & *t);
}
int
-expr_contains(expr *e, ExprType t)
+expr_contains(const expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, &t, expr_contains_callback);
+ return expr_traverse_leaves_in_const(e, &t, expr_contains_callback);
}
/* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like
return func(e, d);
}
+/* Traverse over expression tree in order, calling func for each leaf
+ * (non-operation). The data pointer d is passed to each func call.
+ *
+ * Stops early (and returns 1) if func returns 1. Otherwise returns 0.
+ */
+int
+expr_traverse_leaves_in_const(const expr *e, void *d,
+ int (*func) (/*@null@*/ const ExprItem *ei,
+ /*@null@*/ void *d))
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Traverse over expression tree in order, calling func for each leaf
* (non-operation). The data pointer d is passed to each func call.
*
}
/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/
+const unsigned long *
+expr_get_reg(expr **ep, int simplify)
+{
+ if (simplify)
+ *ep = expr_simplify(*ep);
+
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG)
+ return &((*ep)->terms[0].data.reg);
+ else
+ return NULL;
+}
+/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+
void
expr_print(FILE *f, const expr *e)
{
- static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
char opstr[3];
int i;
floatnum_print(f, e->terms[i].data.flt);
break;
case EXPR_REG:
- if (e->terms[i].data.reg.size == 32)
- fprintf(f, "e");
- fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]);
+ cur_arch->reg_print(f, e->terms[i].data.reg);
break;
case EXPR_NONE:
break;
/*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *);
/*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *);
/*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *);
-/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size);
+/*@only@*/ ExprItem *ExprReg(unsigned long reg);
#define expr_new_tree(l,o,r) \
expr_new ((o), ExprExpr(l), ExprExpr(r))
/*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep,
int simplify);
+/* Gets the register value of e if the expression is just a register. If the
+ * expression is more complex, returns NULL. Simplifies the expr first if
+ * simplify is nonzero.
+ */
+/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep,
+ int simplify);
+
void expr_print(FILE *f, /*@null@*/ const expr *);
#endif
#include "bytecode.h"
#include "bc-int.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
START_TEST(test_x86_ea_new_reg)
{
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
typedef enum {
REG_AX = 0,
YASMARCHFILES += \
src/arch/x86/x86arch.c \
src/arch/x86/x86arch.h \
- src/arch/x86/x86-int.h \
src/arch/x86/x86bc.c \
- src/arch/x86/x86expr.c
+ src/arch/x86/x86expr.c \
+ x86id.c
+
+x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+ re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
+
+BUILT_SOURCES += \
+ x86id.c
+
+CLEANFILES += \
+ x86id.c
EXTRA_DIST += \
src/arch/x86/README \
- src/arch/x86/instrs.dat
+ src/arch/x86/x86id.re
+++ /dev/null
-; $IdPath$
-; List of valid instruction/operand combinations
-;
-; Copyright (C) 2001 Peter Johnson
-;
-; This file is part of YASM.
-;
-; YASM is free software; you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation; either version 2 of the License, or
-; (at your option) any later version.
-;
-; YASM is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with this program; if not, write to the Free Software
-; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-;
-; Meanings of codes:
-; $x refers to operand x
-; "nil" in a field indicates the lack of that field in the instruction
-; (there MUST be some text in every field in this document)
-; Sizes are in bits (8,16,32 are the only valid quantities)
-;
-; Column definitions:
-; Inst - Instruction, should be lowercase
-; Operands - Single combination of valid operands
-; "TO" is not counted in the operand count.
-; OpSize - Fixed operand size. Can generate prefix byte.
-; Opcode - One or two bytes of opcode.
-; EffAddr - Effective Address (ModRM/SIB/Off). First value is the memory
-; operand, second specifies what value goes into the reg/spare
-; bits in the ModRM byte.
-; $xr indicates operand is register, not ModRM (needs convert to RM)
-; $xi indicates operand is immediate (2nd parm is size in bits)
-; Imm - Immediate source operand and forced size (in bits).
-; "s" after size indicates signed number
-; A number instead of a $x is a hex constant value.
-;
-; A ':' at the beginning of the line means that the instruction following the
-; ':' is a synonym for the instruction in the 2nd column.
-;
-; See the parser file for a list of possible operand values and their meanings.
-; gen_instr.pl translates this list into lexer and parser code.
-;
-; Instructions are listed in the same order as that in GNU binutils
-; /include/opcode/i386.h, used for the GAS assembler. See
-; <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h?cvsroot=src>.
-;
-; TODO:
-; Finish instructions (may require changing parser code).
-; Doublecheck instruction encodings, allowable operands.
-; Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes).
-; Doublecheck AMD and Cyrix instructions.
-; Doublecheck the segreg mov instructions.
-;
-; Instruction Groupings (to shorten parser code).
-; The $0.1, $0.2, and $0.3 will get replaced with the parameters given for
-; the instruction using the group during lexing & parsing. These parameters
-; may be in the opcode, opsize, effaddr, or immediate.
-; When opsize is a parameter, its usage in instructions that use the group
-; looks slightly different than normal, because the parameters are
-; specified in hexidecimal while the normal opsize usage is in decimal.
-; Thus 10 and 20 are used instead of 16 and 32 respectively.
-; The first CPU grouping for the instruction is OR'ed with the CPU value in
-; the group CPU fields with @0 in their list. This allows one grouping to
-; be used for instructions with different CPU values.
-; Restrictions on groupings:
-; - $0.? may not appear in the operand, the first part of the effaddr, the
-; second part of the imm, or the CPU fields.
-; - @0, @1 may only appear in the CPU field.
-; Restrictions on instructions based on groupings:
-; - no other operand combinations are allowed (eg, if an instruction uses a
-; group, that must be the ONLY line for the instruction)
-;
-; Notes on code generation:
-; Each group generates a lex token of the group name (sans !). Bison rules
-; are generated for each of the operand combinations for the group just as
-; with a regular instruction, except for the addition of the $0.? fields.
-; Each $0.? field is replaced by $1.d? in the generated code (eg,
-; $0.1->$1.d1, etc).
-; When an instruction that uses a group is encountered, eg:
-; inst!grpname parm1[,parm2[,parm3]]
-; The following lex code is generated:
-; inst { yylval.groupdata[0]=0xparm1; return GRPNAME; }
-; (and additional yylval.groupdata[#-1]=0xparm#; if needed)
-;
-; KEY
-;
-; !Grp Operands OpSize Opcode EffAddr Imm CPU
-; Inst Operands OpSize Opcode EffAddr Imm CPU
-; Inst!Grp Parameters CPU @0 CPU @1
-;
-; Groupings used throughout
-;
-; One byte opcode instructions with no operands:
-!onebyte nil $0.1 $0.2 nil nil @0
-; Two byte opcode instructions with no operands:
-!twobyte nil nil $0.1,$0.2 nil nil @0
-; Three byte opcode instructions with no operands:
-!threebyte nil nil $0.1,$0.2,$0.3 nil nil @0
-; One byte opcode instructions with general memory operand:
-!onebytemem mem nil $0.1 $1,$0.2 nil @0
-; Two byte opcode instructions with general memory operand:
-!twobytemem mem nil $0.1,$0.2 $1,$0.3 nil @0
-;
-; Move instructions
-;
-; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89).
-mov reg8,reg8 nil 88 $1r,$2 nil 8086
-mov reg16,reg16 16 89 $1r,$2 nil 8086
-mov reg32,reg32 32 89 $1r,$2 nil 386
-mov mem,reg8 nil 88 $1,$2 nil 8086
-mov mem8x,reg8 nil 88 $1,$2 nil 8086
-mov mem,reg16 16 89 $1,$2 nil 8086
-mov mem16x,reg16 16 89 $1,$2 nil 8086
-mov mem,reg32 32 89 $1,$2 nil 386
-mov mem32x,reg32 32 89 $1,$2 nil 386
-mov reg8,mem8 nil 8A $2,$1 nil 8086
-mov reg16,mem16 16 8B $2,$1 nil 8086
-mov reg32,mem32 32 8B $2,$1 nil 386
-mov mem,segreg nil 8C $1,$2 nil 8086
-mov reg16,segreg 16 8C $1r,$2 nil 8086
-mov mem16x,segreg 16 8C $1,$2 nil 8086
-mov reg32,segreg 32 8C $1r,$2 nil 386
-mov mem32x,segreg 32 8C $1,$2 nil 386
-mov segreg,mem nil 8E $2,$1 nil 8086
-mov segreg,rm16x nil 8E $2,$1 nil 8086
-mov segreg,rm32x nil 8E $2,$1 nil 386
-;mov reg_al,memoff8
-;mov reg_ax,memoff16
-;mov reg_eax,memoff32
-;mov memoff8,reg_al
-;mov memoff16,reg_ax
-;mov memoff32,reg_eax
-mov reg8,imm8 nil B0+$1 nil $2,8 8086
-mov reg16,imm16 16 B8+$1 nil $2,16 8086
-mov reg32,imm32 32 B8+$1 nil $2,32 386
-mov mem8x,imm8 nil C6 $1,0 $2,8 8086
-mov mem,imm8x nil C6 $1,0 $2,8 8086
-mov mem16x,imm16 16 C7 $1,0 $2,16 8086
-mov mem,imm16x 16 C7 $1,0 $2,16 8086
-mov mem32x,imm32 32 C7 $1,0 $2,32 8086
-mov mem,imm32x 32 C7 $1,0 $2,32 8086
-mov CRREG_NOTCR4,reg32 nil 0F,22 $2r,$1 nil 386,PRIV
-mov CR4,reg32 nil 0F,22 $2r,$1 nil P5,PRIV
-mov reg32,CRREG_NOTCR4 nil 0F,20 $1r,$2 nil 386,PRIV
-mov reg32,CR4 nil 0F,20 $1r,$2 nil P5,PRIV
-mov reg32,DRREG nil 0F,21 $1r,$2 nil 386,PRIV
-mov DRREG,reg32 nil 0F,23 $2r,$1 nil 386,PRIV
-;
-; Move with sign/zero extend
-;
-!movszx reg16,rm8 16 0F,$0.1 $2,$1 nil 386
-!movszx reg32,rm8x 32 0F,$0.1 $2,$1 nil 386
-!movszx reg32,rm16x nil 0F,$0.1+1 $2,$1 nil 386
-movsx!movszx BE
-movzx!movszx B6
-;
-; Push instructions
-;
-push mem16x 16 FF $1,6 nil 8086
-push mem32x 32 FF $1,6 nil 386
-push reg16 16 50+$1 nil nil 8086
-push reg32 32 50+$1 nil nil 386
-push imm8x nil 6A nil $1,8 8086
-push imm16x 16 68 nil $1,16 8086
-push imm32x 32 68 nil $1,32 386
-push reg_cs nil 0E nil nil 8086
-push reg_ss nil 16 nil nil 8086
-push reg_ds nil 1E nil nil 8086
-push reg_es nil 06 nil nil 8086
-push reg_fs nil 0F,A0 nil nil 386
-push reg_gs nil 0F,A8 nil nil 386
-pusha!onebyte nil,60 186
-pushad!onebyte 20,60 386
-pushaw!onebyte 10,60 186
-;
-; Pop instructions
-;
-pop mem16x 16 8F $1,0 nil 8086
-pop mem32x 32 8F $1,0 nil 386
-pop reg16 16 58+$1 nil nil 8086
-pop reg32 32 58+$1 nil nil 386
-pop reg_ds nil 1F nil nil 8086
-pop reg_es nil 07 nil nil 8086
-pop reg_ss nil 17 nil nil 8086
-pop reg_fs nil 0F,A1 nil nil 386
-pop reg_gs nil 0F,A9 nil nil 386
-popa!onebyte nil,61 186
-popad!onebyte 20,61 386
-popaw!onebyte 10,61 186
-;
-; Exchange instructions
-;
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg8,reg8 nil 86 $1r,$2 nil 8086
-xchg mem,reg8 nil 86 $1,$2 nil 8086
-xchg mem8x,reg8 nil 86 $1,$2 nil 8086
-xchg reg8,mem8 nil 86 $2,$1 nil 8086
-xchg reg_ax,reg16 16 90+$2 nil nil 8086
-xchg reg16,reg_ax 16 90+$1 nil nil 8086
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg16,reg16 16 87 $1r,$2 nil 8086
-xchg mem,reg16 16 87 $1,$2 nil 8086
-xchg mem16x,reg16 16 87 $1,$2 nil 8086
-xchg reg16,mem16 16 87 $2,$1 nil 8086
-xchg reg_eax,reg32 32 90+$2 nil nil 386
-xchg reg32,reg_eax 32 90+$1 nil nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg32,reg32 32 87 $1r,$2 nil 386
-xchg mem,reg32 32 87 $1,$2 nil 386
-xchg mem32x,reg32 32 87 $1,$2 nil 386
-xchg reg32,mem32 32 87 $2,$1 nil 386
-;
-; In/out from ports
-;
-in reg_al,imm8 nil E4 nil $2,8 8086
-in reg_ax,imm8 16 E5 nil $2,8 8086
-in reg_eax,imm8 32 E5 nil $2,8 386
-in reg_al,reg_dx nil EC nil nil 8086
-in reg_ax,reg_dx 16 ED nil nil 8086
-in reg_eax,reg_dx 32 ED nil nil 386
-out imm8,reg_al nil E6 nil $1,8 8086
-out imm8,reg_ax 16 E7 nil $1,8 8086
-out imm8,reg_eax 32 E7 nil $1,8 386
-out reg_dx,reg_al nil EE nil nil 8086
-out reg_dx,reg_ax 16 EF nil nil 8086
-out reg_dx,reg_eax 32 EF nil nil 386
-;
-; Load effective address
-;
-lea reg16,mem16 16 8D $2,$1 nil 8086
-lea reg32,mem32 32 8D $2,$1 nil 386
-;
-; Load segment registers from memory
-;
-lds reg16,mem 16 C5 $2,$1 nil 8086
-lds reg32,mem 32 C5 $2,$1 nil 386
-les reg16,mem 16 C4 $2,$1 nil 8086
-les reg32,mem 32 C4 $2,$1 nil 386
-lfs reg16,mem 16 0F,B4 $2,$1 nil 386
-lfs reg32,mem 32 0F,B4 $2,$1 nil 386
-lgs reg16,mem 16 0F,B5 $2,$1 nil 386
-lgs reg32,mem 32 0F,B5 $2,$1 nil 386
-lss reg16,mem 16 0F,B2 $2,$1 nil 386
-lss reg32,mem 32 0F,B2 $2,$1 nil 386
-;
-; Flags register instructions
-;
-clc!onebyte nil,F8 8086
-cld!onebyte nil,FC 8086
-cli!onebyte nil,FA 8086
-clts!twobyte 0F,06 286,PRIV
-cmc!onebyte nil,F5 8086
-lahf!onebyte nil,9F 8086
-sahf!onebyte nil,9E 8086
-pushf!onebyte nil,9C 8086
-pushfd!onebyte 20,9C 386
-pushfw!onebyte 10,9C 8086
-popf!onebyte nil,9D 8086
-popfd!onebyte 20,9D 386
-popfw!onebyte 10,9D 8086
-stc!onebyte nil,F9 8086
-std!onebyte nil,FD 8086
-sti!onebyte nil,FB 8086
-;
-; Arithmetic
-;
-; General arithmetic
-!arith reg_al,imm8 nil $0.1+4 nil $2,8 8086
-!arith reg_ax,imm16 16 $0.1+5 nil $2,16 8086
-!arith reg_eax,imm32 32 $0.1+5 nil $2,32 386
-!arith reg8,imm8 nil 80 $1r,$0.2 $2,8 8086
-!arith mem8x,imm nil 80 $1,$0.2 $2,8 8086
-!arith mem,imm8x nil 80 $1,$0.2 $2,8 8086
-!arith reg16,imm 16 81 $1r,$0.2 $2,16 8086
-!arith mem16x,imm 16 81 $1,$0.2 $2,16 8086
-!arith reg16,imm16x 16 81 $1r,$0.2 $2,16 8086
-!arith mem,imm16x 16 81 $1,$0.2 $2,16 8086
-!arith reg32,imm 32 81 $1r,$0.2 $2,32 386
-!arith mem32x,imm 32 81 $1,$0.2 $2,32 386
-!arith reg32,imm32x 32 81 $1r,$0.2 $2,32 386
-!arith mem,imm32x 32 81 $1,$0.2 $2,32 386
-!arith reg16,imm8x 16 83 $1r,$0.2 $2,8s 8086
-!arith mem16x,imm8x 16 83 $1,$0.2 $2,8s 8086
-!arith reg32,imm8x 32 83 $1r,$0.2 $2,8s 386
-!arith mem32x,imm8x 32 83 $1,$0.2 $2,8s 386
-; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1).
-!arith reg8,reg8 nil $0.1 $1r,$2 nil 8086
-!arith reg16,reg16 16 $0.1+1 $1r,$2 nil 8086
-!arith reg32,reg32 32 $0.1+1 $1r,$2 nil 386
-!arith mem,reg8 nil $0.1 $1,$2 nil 8086
-!arith mem8x,reg8 nil $0.1 $1,$2 nil 8086
-!arith mem,reg16 16 $0.1+1 $1,$2 nil 8086
-!arith mem16x,reg16 16 $0.1+1 $1,$2 nil 8086
-!arith mem,reg32 32 $0.1+1 $1,$2 nil 386
-!arith mem32x,reg32 32 $0.1+1 $1,$2 nil 386
-!arith reg8,mem8 nil $0.1+2 $2,$1 nil 8086
-!arith reg16,mem16 16 $0.1+3 $2,$1 nil 8086
-!arith reg32,mem32 32 $0.1+3 $2,$1 nil 386
-; INC/DEC
-!incdec rm8x nil FE $1,$0.1 nil 8086
-!incdec mem16x 16 FF $1,$0.1 nil 8086
-!incdec mem32x 32 FF $1,$0.1 nil 386
-!incdec reg16 16 $0.2+$1 nil nil 8086
-!incdec reg32 32 $0.2+$1 nil nil 386
-; "F6" opcodes (DIV/IDIV/MUL/NEG/NOT):
-!groupf6 rm8x nil F6 $1,$0.1 nil 8086
-!groupf6 rm16x 16 F7 $1,$0.1 nil 8086
-!groupf6 rm32x 32 F7 $1,$0.1 nil 386
-add!arith 00,0
-inc!incdec 0,40
-sub!arith 28,5
-dec!incdec 1,48
-sbb!arith 18,3
-cmp!arith 38,7
-test reg_al,imm8 nil A8 nil $2,8 8086
-test reg_ax,imm16 16 A9 nil $2,16 8086
-test reg_eax,imm32 32 A9 nil $2,32 386
-test reg8,imm8 nil F6 $1r,0 $2,8 8086
-test mem8x,imm nil F6 $1,0 $2,8 8086
-test mem,imm8x nil F6 $1,0 $2,8 8086
-test reg16,imm16 16 F7 $1r,0 $2,16 8086
-test mem16x,imm 16 F7 $1,0 $2,16 8086
-test mem,imm16x 16 F7 $1,0 $2,16 8086
-test reg32,imm32 32 F7 $1r,0 $2,32 386
-test mem32x,imm 32 F7 $1,0 $2,32 386
-test mem,imm32x 32 F7 $1,0 $2,32 386
-; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1
-test reg8,reg8 nil 84 $1r,$2 nil 8086
-test reg16,reg16 16 85 $1r,$2 nil 8086
-test reg32,reg32 32 85 $1r,$2 nil 386
-test mem,reg8 nil 84 $1,$2 nil 8086
-test mem8x,reg8 nil 84 $1,$2 nil 8086
-test mem,reg16 16 85 $1,$2 nil 8086
-test mem16x,reg16 16 85 $1,$2 nil 8086
-test mem,reg32 32 85 $1,$2 nil 386
-test mem32x,reg32 32 85 $1,$2 nil 386
-test reg8,mem8 nil 84 $2,$1 nil 8086
-test reg16,mem16 16 85 $2,$1 nil 8086
-test reg32,mem32 32 85 $2,$1 nil 386
-and!arith 20,4
-or!arith 08,1
-xor!arith 30,6
-adc!arith 10,2
-neg!groupf6 3
-not!groupf6 2
-aaa!onebyte nil,37 8086
-aas!onebyte nil,3F 8086
-daa!onebyte nil,27 8086
-das!onebyte nil,2F 8086
-aad nil nil D5,0A nil nil 8086
-aad imm8 nil D5 nil $1,8 8086
-aam nil nil D4,0A nil nil 8086
-aam imm8 nil D4 nil $1,8 8086
-;
-; Conversion instructions
-;
-cbw!onebyte 10,98 8086
-cwde!onebyte 20,98 386
-cwd!onebyte 10,99 8086
-cdq!onebyte 20,99 386
-;
-; Multiplication and division
-;
-mul!groupf6 4
-imul rm8x nil F6 $1,5 nil 8086
-imul rm16x 16 F7 $1,5 nil 8086
-imul rm32x 32 F7 $1,5 nil 386
-imul reg16,rm16 16 0F,AF $2,$1 nil 386
-imul reg32,rm32 32 0F,AF $2,$1 nil 386
-imul reg16,rm16,imm8x 16 6B $2,$1 $3,8s 186
-imul reg32,rm32,imm8x 32 6B $2,$1 $3,8s 386
-imul reg16,imm8x 16 6B $1r,$1 $2,8s 186
-imul reg32,imm8x 32 6B $1r,$1 $2,8s 386
-imul reg16,rm16,imm16 16 69 $2,$1 $3,16s 186
-imul reg32,rm32,imm32 32 69 $2,$1 $3,32s 386
-imul reg16,imm16 16 69 $1r,$1 $2,16s 186
-imul reg32,imm32 32 69 $1r,$1 $2,32s 386
-div!groupf6 6
-idiv!groupf6 7
-;
-; Shifts
-;
-; Standard
-!shift rm8x,ONE nil D0 $1,$0.1 nil 8086
-!shift rm8x,reg_cl nil D2 $1,$0.1 nil 8086
-!shift rm8x,imm8 nil C0 $1,$0.1 $2,8 186
-!shift rm16x,ONE 16 D1 $1,$0.1 nil 8086
-!shift rm16x,reg_cl 16 D3 $1,$0.1 nil 8086
-!shift rm16x,imm8 16 C1 $1,$0.1 $2,8 186
-!shift rm32x,ONE 32 D1 $1,$0.1 nil 386
-!shift rm32x,reg_cl 32 D3 $1,$0.1 nil 386
-!shift rm32x,imm8 32 C1 $1,$0.1 $2,8 386
-; Doubleword
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,imm8 16 0F,$0.1 $1r,$2 $3,8 386
-!shlrd mem,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386
-!shlrd mem16x,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,reg_cl 16 0F,$0.1+1 $1r,$2 nil 386
-!shlrd mem,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386
-!shlrd mem16x,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,imm8 32 0F,$0.1 $1r,$2 $3,8 386
-!shlrd mem,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386
-!shlrd mem32x,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,reg_cl 32 0F,$0.1+1 $1r,$2 nil 386
-!shlrd mem,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386
-!shlrd mem32x,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386
-rol!shift 0
-ror!shift 1
-rcl!shift 2
-rcr!shift 3
-sal!shift 4
-shl!shift 4
-shr!shift 5
-sar!shift 7
-shld!shlrd A4
-shrd!shlrd AC
-;
-; Control transfer instructions (unconditional)
-;
-; Special format for relative targets:
-; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU
-;
-!jmpcall target nil $0.1?$0.2 $0.3 8086 8086
-!jmpcall imm:imm nil $0.4 $2i,nil $1,16 8086
-!jmpcall WORD imm:imm 16 $0.4 $2i,16 $1,16 8086
-!jmpcall DWORD imm:imm 32 $0.4 $2i,32 $1,16 386
-!jmpcall memfar nil FF $1,$0.4+1 nil 8086
-!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086
-!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386
-!jmpcall mem nil FF $1,$0.4 nil 8086
-!jmpcall rm16x 16 FF $1,$0.4 nil 8086
-!jmpcall rm32x 32 FF $1,$0.4 nil 386
-call!jmpcall nil,0,E8,9A,2
-jmp!jmpcall 1,EB,E9,EA,4
-ret!onebyte nil,C3 8086
-retn nil nil C3 nil nil 8086
-retf nil nil CB nil nil 8086
-retn imm16 nil C2 nil $1,16 8086
-retf imm16 nil CA nil $1,16 8086
-enter imm16,imm8 nil C8 $1i,16 $2,8 186
-leave!onebyte nil,C9 186
-;
-; Conditional jumps
-;
-!jcc target nil 70+$0.1 0F,80+$0.1 8086 386
-jo!jcc 0
-jno!jcc 1
-jb!jcc 2
-jc!jcc 2
-jnae!jcc 2
-jnb!jcc 3
-jnc!jcc 3
-jae!jcc 3
-je!jcc 4
-jz!jcc 4
-jne!jcc 5
-jnz!jcc 5
-jbe!jcc 6
-jna!jcc 6
-jnbe!jcc 7
-ja!jcc 7
-js!jcc 8
-jns!jcc 9
-jp!jcc A
-jpe!jcc A
-jnp!jcc B
-jpo!jcc B
-jl!jcc C
-jnge!jcc C
-jnl!jcc D
-jge!jcc D
-jle!jcc E
-jng!jcc E
-jnle!jcc F
-jg!jcc F
-jcxz target 16 E3 nil 8086 8086
-jecxz target 32 E3 nil 386 386
-;
-; Loop instructions
-;
-!loopg target nil E0+$0.1 nil 8086 8086
-!loopg target,reg_cx 16 E0+$0.1 nil 8086 8086
-!loopg target,reg_ecx 32 E0+$0.1 nil 386 386
-loop!loopg 2
-loopz!loopg 1
-loope!loopg 1
-loopnz!loopg 0
-loopne!loopg 0
-;
-; Set byte on flag instructions
-;
-!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386
-seto!setcc 0
-setno!setcc 1
-setb!setcc 2
-setc!setcc 2
-setnae!setcc 2
-setnb!setcc 3
-setnc!setcc 3
-setae!setcc 3
-sete!setcc 4
-setz!setcc 4
-setne!setcc 5
-setnz!setcc 5
-setbe!setcc 6
-setna!setcc 6
-setnbe!setcc 7
-seta!setcc 7
-sets!setcc 8
-setns!setcc 9
-setp!setcc A
-setpe!setcc A
-setnp!setcc B
-setpo!setcc B
-setl!setcc C
-setnge!setcc C
-setnl!setcc D
-setge!setcc D
-setle!setcc E
-setng!setcc E
-setnle!setcc F
-setg!setcc F
-;
-; String instructions
-;
-; NOTE: cmpsd,movsd can't go to !onebyte group because of other variations
-cmpsb!onebyte nil,A6 8086
-cmpsw!onebyte 10,A7 8086
-cmpsd nil 32 A7 nil nil 386
-insb!onebyte nil,6C 8086
-insw!onebyte 10,6D 8086
-insd!onebyte 20,6D 386
-outsb!onebyte nil,6E 8086
-outsw!onebyte 10,6F 8086
-outsd!onebyte 20,6F 386
-lodsb!onebyte nil,AC 8086
-lodsw!onebyte 10,AD 8086
-lodsd!onebyte 20,AD 386
-movsb!onebyte nil,A4 8086
-movsw!onebyte 10,A5 8086
-movsd nil 32 A5 nil nil 386
-scasb!onebyte nil,AE 8086
-scasw!onebyte 10,AF 8086
-scasd!onebyte 20,AF 386
-stosb!onebyte nil,AA 8086
-stosw!onebyte 10,AB 8086
-stosd!onebyte 20,AB 386
-xlat!onebyte nil,D7 8086
-xlatb!onebyte nil,D7 8086
-;
-; Bit manipulation
-;
-; Bit tests
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest reg16,reg16 16 0F,$0.1 $1r,$2 nil 386
-!bittest mem,reg16 16 0F,$0.1 $1,$2 nil 386
-!bittest mem16x,reg16 16 0F,$0.1 $1,$2 nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest reg32,reg32 32 0F,$0.1 $1r,$2 nil 386
-!bittest mem,reg32 32 0F,$0.1 $1,$2 nil 386
-!bittest mem32x,reg32 32 0F,$0.1 $1,$2 nil 386
-!bittest reg16,imm8 16 0F,BA $1r,$0.2 $2,8 386
-!bittest mem16x,imm8 16 0F,BA $1,$0.2 $2,8 386
-!bittest reg32,imm8 32 0F,BA $1r,$0.2 $2,8 386
-!bittest mem32x,imm8 32 0F,BA $1,$0.2 $2,8 386
-; Bit scans
-!bsfr reg16,rm16 16 0F,BC+$0.1 $2,$1 nil 386
-!bsfr reg32,rm32 32 0F,BC+$0.1 $2,$1 nil 386
-bsf!bsfr 0
-bsr!bsfr 1
-bt!bittest A3,4
-btc!bittest BB,7
-btr!bittest B3,6
-bts!bittest AB,5
-;
-; Interrupts and operating system instructions
-;
-int imm8 nil CD nil $1,8 8086
-int3!onebyte nil,CC 8086
-int03!onebyte nil,CC 8086
-into!onebyte nil,CE 8086
-iret!onebyte nil,CF 8086
-iretw!onebyte 10,CF 8086
-iretd!onebyte 20,CF 386
-rsm!twobyte 0F,AA P5,SMM
-bound reg16,mem16 16 62 $2,$1 nil 186
-bound reg32,mem32 32 62 $2,$1 nil 386
-hlt!onebyte nil,F4 8086,PRIV
-nop!onebyte nil,90 8086
-;
-; Protection control
-;
-; 286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW):
-!prot286 rm16 nil 0F,00 $1,$0.1 nil 286,PROT,@0
-arpl rm16,reg16 nil 63 $1,$2 nil 286,PROT
-lar reg16,rm16 16 0F,02 $2,$1 nil 286,PROT
-lar reg32,rm32 32 0F,02 $2,$1 nil 386,PROT
-lgdt!twobytemem 0F,01,2 286,PRIV
-lidt!twobytemem 0F,01,3 286,PRIV
-lldt!prot286 2 PRIV
-lmsw rm16 nil 0F,01 $1,6 nil 286,PRIV
-lsl reg16,rm16 16 0F,03 $2,$1 nil 286,PROT
-lsl reg32,rm32 32 0F,03 $2,$1 nil 286,PROT
-ltr!prot286 3 PRIV
-sgdt!twobytemem 0F,01,0 286
-sidt!twobytemem 0F,01,1 286
-sldt mem1632 nil 0F,00 $1,0 nil 286
-sldt reg16 16 0F,00 $1r,0 nil 286
-sldt reg32 32 0F,00 $1r,0 nil 386
-smsw mem1632 nil 0F,01 $1,4 nil 286
-smsw reg16 16 0F,01 $1r,4 nil 286
-smsw reg32 32 0F,01 $1r,4 nil 386
-str!prot286 1
-verr!prot286 4
-verw!prot286 5
-;
-; Floating point instructions
-;
-; Load
-fld mem32x nil D9 $1,0 nil 8086,FPU
-fld mem64x nil DD $1,0 nil 8086,FPU
-fld mem80x nil DB $1,5 nil 8086,FPU
-fld fpureg nil D9,C0+$1 nil nil 8086,FPU
-fild mem16x nil DF $1,0 nil 8086,FPU
-fild mem32x nil DB $1,0 nil 8086,FPU
-fild mem64x nil DF $1,5 nil 8086,FPU
-fbld mem80 nil DF $1,4 nil 8086,FPU
-; Store
-fst mem32x nil D9 $1,2 nil 8086,FPU
-fst mem64x nil DD $1,2 nil 8086,FPU
-fst fpureg nil DD,D0+$1 nil nil 8086,FPU
-fist mem16x nil DF $1,2 nil 8086,FPU
-fist mem32x nil DB $1,2 nil 8086,FPU
-; Store (with pop)
-fstp mem32x nil D9 $1,3 nil 8086,FPU
-fstp mem64x nil DD $1,3 nil 8086,FPU
-fstp mem80x nil DB $1,7 nil 8086,FPU
-fstp fpureg nil DD,D8+$1 nil nil 8086,FPU
-fistp mem16x nil DF $1,3 nil 8086,FPU
-fistp mem32x nil DB $1,3 nil 8086,FPU
-fistp mem64x nil DF $1,7 nil 8086,FPU
-fbstp mem80 nil DF $1,6 nil 8086,FPU
-; Exchange (with ST0)
-fxch fpureg nil D9,C8+$1 nil nil 8086,FPU
-fxch ST0,ST0 nil D9,C8 nil nil 8086,FPU
-fxch ST0,FPUREG_NOTST0 nil D9,C8+$2 nil nil 8086,FPU
-fxch FPUREG_NOTST0,ST0 nil D9,C8+$1 nil nil 8086,FPU
-fxch nil nil D9,C9 nil nil 8086,FPU
-; Comparisons
-!fcomg mem32x nil D8 $1,$0.1 nil 8086,FPU
-!fcomg mem64x nil DC $1,$0.1 nil 8086,FPU
-!fcomg fpureg nil D8,$0.2+$1 nil nil 8086,FPU
-!fcomg ST0,fpureg nil D8,$0.2+$2 nil nil 8086,FPU
-; Extended comparisons
-!fcomg2 fpureg nil $0.1,$0.2+$1 nil nil @0,FPU
-!fcomg2 ST0,fpureg nil $0.1,$0.2+$2 nil nil @0,FPU
-; Comparison (without pop)
-fcom!fcomg 2,D0
-ficom mem16x nil DE $1,2 nil 8086,FPU
-ficom mem32x nil DA $1,2 nil 8086,FPU
-; Comparison (with pop)
-fcomp!fcomg 3,D8
-ficomp mem16x nil DE $1,3 nil 8086,FPU
-ficomp mem32x nil DA $1,3 nil 8086,FPU
-fcompp!twobyte DE,D9 8086,FPU
-; Unordered comparison (with pop)
-fucom!fcomg2 DD,E0 286,FPU
-fucomp!fcomg2 DD,E8 286,FPU
-fucompp!twobyte DA,E9 286,FPU
-ftst!twobyte D9,E4 8086,FPU
-fxam!twobyte D9,E5 8086,FPU
-; Load constants into ST0
-fld1!twobyte D9,E8 8086,FPU
-fldl2t!twobyte D9,E9 8086,FPU
-fldl2e!twobyte D9,EA 8086,FPU
-fldpi!twobyte D9,EB 8086,FPU
-fldlg2!twobyte D9,EC 8086,FPU
-fldln2!twobyte D9,ED 8086,FPU
-fldz!twobyte D9,EE 8086,FPU
-; Arithmetic
-!farith mem32x nil D8 $1,$0.1 nil 8086,FPU
-!farith mem64x nil DC $1,$0.1 nil 8086,FPU
-!farith fpureg nil D8,$0.2+$1 nil nil 8086,FPU
-!farith ST0,ST0 nil D8,$0.2 nil nil 8086,FPU
-!farith ST0,FPUREG_NOTST0 nil D8,$0.2+$2 nil nil 8086,FPU
-!farith TO fpureg nil DC,$0.3+$1 nil nil 8086,FPU
-!farith FPUREG_NOTST0,ST0 nil DC,$0.3+$1 nil nil 8086,FPU
-!farithp fpureg nil DE,$0.1+$1 nil nil 8086,FPU
-!farithp fpureg,ST0 nil DE,$0.1+$1 nil nil 8086,FPU
-!fiarith mem32x nil DA $1,$0.1 nil 8086,FPU
-!fiarith mem16x nil DE $1,$0.1 nil 8086,FPU
-fadd!farith 0,C0,C0
-faddp!farithp C0
-fiadd!fiarith 0
-fsub!farith 4,E0,E8
-fisub!fiarith 4
-fsubp!farithp E8
-fsubr!farith 5,E8,E0
-fisubr!fiarith 5
-fsubrp!farithp E0
-; Multiply
-fmul!farith 1,C8,C8
-fimul!fiarith 1
-fmulp!farithp C8
-; Divide
-fdiv!farith 6,F0,F8
-fidiv!fiarith 6
-fdivp!farithp F8
-fdivr!farith 7,F8,F0
-fidivr!fiarith 7
-fdivrp!farithp F0
-; Other arithmetic
-f2xm1!twobyte D9,F0 8086,FPU
-fyl2x!twobyte D9,F1 8086,FPU
-fptan!twobyte D9,F2 8086,FPU
-fpatan!twobyte D9,F3 8086,FPU
-fxtract!twobyte D9,F4 8086,FPU
-fprem1!twobyte D9,F5 286,FPU
-fdecstp!twobyte D9,F6 8086,FPU
-fincstp!twobyte D9,F7 8086,FPU
-fprem!twobyte D9,F8 8086,FPU
-fyl2xp1!twobyte D9,F9 8086,FPU
-fsqrt!twobyte D9,FA 8086,FPU
-fsincos!twobyte D9,FB 286,FPU
-frndint!twobyte D9,FC 8086,FPU
-fscale!twobyte D9,FD 8086,FPU
-fsin!twobyte D9,FE 286,FPU
-fcos!twobyte D9,FF 286,FPU
-fchs!twobyte D9,E0 8086,FPU
-fabs!twobyte D9,E1 8086,FPU
-; Processor control
-fninit!twobyte DB,E3 8086,FPU
-finit!threebyte 9B,DB,E3 8086,FPU
-fldcw mem16 nil D9 $1,5 nil 8086,FPU
-fnstcw mem16 nil D9 $1,7 nil 8086,FPU
-fstcw mem16 nil 9B,D9 $1,7 nil 8086,FPU
-fnstsw mem16 nil DD $1,7 nil 8086,FPU
-fnstsw reg_ax nil DF,E0 nil nil 8086,FPU
-fstsw mem16 nil 9B,DD $1,7 nil 8086,FPU
-fstsw reg_ax nil 9B,DF,E0 nil nil 8086,FPU
-fnclex!twobyte DB,E2 8086,FPU
-fclex!threebyte 9B,DB,E2 8086,FPU
-fnstenv!onebytemem D9,6 8086,FPU
-fstenv!twobytemem 9B,D9,6 8086,FPU
-fldenv!onebytemem D9,4 8086,FPU
-fnsave!onebytemem DD,6 8086,FPU
-fsave!twobytemem 9B,DD,6 8086,FPU
-frstor!onebytemem DD,4 8086,FPU
-ffree fpureg nil DD,C0+$1 nil nil 8086,FPU
-ffreep fpureg nil DF,C0+$1 nil nil P6,FPU,UNDOC
-fnop!twobyte D9,D0 8086,FPU
-fwait!onebyte nil,9B 8086,FPU
-;
-; Prefixes (should the others be here too? should wait be a prefix?)
-;
-wait!onebyte nil,9B 8086
-;
-; 486 extensions
-;
-; Compare & exchange, exchange & add
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg8,reg8 nil 0F,$0.1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg8 nil 0F,$0.1 $1,$2 nil @0
-!cmpxchgxadd mem8x,reg8 nil 0F,$0.1 $1,$2 nil @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg16,reg16 16 0F,$0.1+1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg16 16 0F,$0.1+1 $1,$2 nil @0
-!cmpxchgxadd mem16x,reg16 16 0F,$0.1+1 $1,$2 nil @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg32,reg32 32 0F,$0.1+1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg32 32 0F,$0.1+1 $1,$2 nil @0
-!cmpxchgxadd mem32x,reg32 32 0F,$0.1+1 $1,$2 nil @0
-bswap reg32 32 0F,C8+$1 nil nil 486
-xadd!cmpxchgxadd C0 486
-cmpxchg!cmpxchgxadd B0 486
-cmpxchg486!cmpxchgxadd A6 486,UNDOC
-invd!twobyte 0F,08 486,PRIV
-wbinvd!twobyte 0F,09 486,PRIV
-invlpg!twobytemem 0F,01,7 486,PRIV
-;
-; 586 and late 486 extensions
-;
-cpuid!twobyte 0F,A2 486
-;
-; Pentium extensions
-;
-wrmsr!twobyte 0F,30 P5,PRIV
-rdtsc!twobyte 0F,31 P5
-rdmsr!twobyte 0F,32 P5,PRIV
-cmpxchg8b mem64 nil 0F,C7 $1,1 nil P5
-;
-; Pentium II/Pentium Pro extensions
-;
-sysenter!twobyte 0F,34 P6
-sysexit!twobyte 0F,35 P6,PRIV
-fxsave!twobytemem 0F,AE,0 P6,FPU
-fxrstor!twobytemem 0F,AE,1 P6,FPU
-rdpmc!twobyte 0F,33 P6
-ud2!twobyte 0F,0B 286
-ud1!twobyte 0F,B9 286,UNDOC
-; cmov
-; fcmov
-fcomi!fcomg2 DB,F0 P6
-fucomi!fcomg2 DB,E8 P6
-fcomip!fcomg2 DF,F0 P6
-fucomip!fcomg2 DF,E8 P6
-;
-; Pentium4 extensions
-;
-movnti mem32,reg32 nil 0F,C3 $1,$2 nil P4
-clflush mem8 nil 0F,AE $1,7 nil KATMAI
-lfence!threebyte 0F,AE,E8 KATMAI
-mfence!threebyte 0F,AE,F0 KATMAI
-pause!twobyte F3,90 P4
-;
-; MMX/SSE2 instructions
-;
-; General
-!mmxsse MMXREG,rm64 nil 0F,$0.1 $2,$1 nil @0,MMX
-!mmxsse XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil @1
-; Shifts
-!pshift MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX
-!pshift XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2
-!pshift MMXREG,imm8 nil 0F,$0.2 $1r,$0.3 $2,8 P5,MMX
-!pshift XMMREG,imm8 nil 66,0F,$0.2 $1r,$0.3 $2,8 P4,SSE2
-emms!twobyte 0F,77 P5,MMX
-movd MMXREG,rm32 nil 0F,6E $2,$1 nil P5,MMX
-movd rm32,MMXREG nil 0F,7E $1,$2 nil P5,MMX
-movd XMMREG,rm32 nil 66,0F,6E $2,$1 nil P4,SSE2
-movd rm32,XMMREG nil 66,0F,7E $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq MMXREG,MMXREG nil 0F,6F $2r,$1 nil P5,MMX
-movq MMXREG,mem64 nil 0F,6F $2,$1 nil P5,MMX
-movq mem64,MMXREG nil 0F,7F $1,$2 nil P5,MMX
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq XMMREG,XMMREG nil F3,0F,7E $2r,$1 nil P4,SSE2
-movq XMMREG,mem64 nil F3,0F,7E $2,$1 nil P4,SSE2
-movq mem64,XMMREG nil 66,0F,D6 $1,$2 nil P4,SSE2
-packssdw!mmxsse 6B P5 P4,SSE2
-packsswb!mmxsse 63 P5 P4,SSE2
-packuswb!mmxsse 67 P5 P4,SSE2
-paddb!mmxsse FC P5 P4,SSE2
-paddw!mmxsse FD P5 P4,SSE2
-paddd!mmxsse FE P5 P4,SSE2
-paddq!mmxsse D4 P5 P4,SSE2
-paddsb!mmxsse EC P5 P4,SSE2
-paddsw!mmxsse ED P5 P4,SSE2
-paddusb!mmxsse DC P5 P4,SSE2
-paddusw!mmxsse DD P5 P4,SSE2
-pand!mmxsse DB P5 P4,SSE2
-pandn!mmxsse DF P5 P4,SSE2
-pcmpeqb!mmxsse 74 P5 P4,SSE2
-pcmpeqw!mmxsse 75 P5 P4,SSE2
-pcmpeqd!mmxsse 76 P5 P4,SSE2
-pcmpgtb!mmxsse 64 P5 P4,SSE2
-pcmpgtw!mmxsse 65 P5 P4,SSE2
-pcmpgtd!mmxsse 66 P5 P4,SSE2
-pmaddwd!mmxsse F5 P5 P4,SSE2
-pmulhw!mmxsse E5 P5 P4,SSE2
-pmullw!mmxsse D5 P5 P4,SSE2
-por!mmxsse EB P5 P4,SSE2
-psllw!pshift F1,71,6
-pslld!pshift F2,72,6
-psllq!pshift F3,73,6
-psraw!pshift E1,71,4
-psrad!pshift E2,72,4
-psrlw!pshift D1,71,2
-psrld!pshift D2,72,2
-psrlq!pshift D3,73,2
-psubb MMXREG,imm8 nil 0F,F8 $1r,2 $2,8 P5,MMX
-psubb XMMREG,imm8 nil 66,0F,F8 $1r,2 $2,8 P4,SSE2
-psubw MMXREG,imm8 nil 0F,F9 $1r,2 $2,8 P5,MMX
-psubw XMMREG,imm8 nil 66,0F,F9 $1r,2 $2,8 P4,SSE2
-psubd!mmxsse FA P5 P4,SSE2
-psubq!mmxsse FB P5 P4,SSE2
-psubsb!mmxsse E8 P5 P4,SSE2
-psubsw!mmxsse E9 P5 P4,SSE2
-psubusb!mmxsse D8 P5 P4,SSE2
-psubusw!mmxsse D9 P5 P4,SSE2
-punpckhbw!mmxsse 68 P5 P4,SSE2
-punpckhwd!mmxsse 69 P5 P4,SSE2
-punpckhdq!mmxsse 6A P5 P4,SSE2
-punpcklbw!mmxsse 60 P5 P4,SSE2
-punpcklwd!mmxsse 61 P5 P4,SSE2
-punpckldq!mmxsse 62 P5 P4,SSE2
-pxor!mmxsse EF P5 P4,SSE2
-;
-; PIII (Katmai) new instructions / SIMD instructions
-;
-; Standard
-!sseps XMMREG,rm128 nil 0F,$0.1 $2,$1 nil @0
-!ssess XMMREG,rm128 nil F3,0F,$0.1 $2,$1 nil @0
-; With immediate
-!ssepsimm XMMREG,rm128,imm8 nil 0F,$0.1 $2,$1 $3,8 KATMAI,SSE
-; Comparisons
-!ssecmpps XMMREG,rm128 nil 0F,C2 $2,$1 $0.1,8 KATMAI,SSE
-!ssecmpss XMMREG,rm128 nil F3,0F,C2 $2,$1 $0.1,8 KATMAI,SSE
-addps!sseps 58 KATMAI,SSE
-addss!ssess 58 KATMAI,SSE
-andnps!sseps 55 KATMAI,SSE
-andps!sseps 54 KATMAI,SSE
-cmpeqps!ssecmpps 0
-cmpeqss!ssecmpss 0
-cmpleps!ssecmpps 2
-cmpless!ssecmpss 2
-cmpltps!ssecmpps 1
-cmpltss!ssecmpss 1
-cmpneqps!ssecmpps 4
-cmpneqss!ssecmpss 4
-cmpnleps!ssecmpps 6
-cmpnless!ssecmpss 6
-cmpnltps!ssecmpps 5
-cmpnltss!ssecmpss 5
-cmpordps!ssecmpps 7
-cmpordss!ssecmpss 7
-cmpunordps!ssecmpps 3
-cmpunordss!ssecmpss 3
-cmpps!ssepsimm C2
-cmpss XMMREG,rm128,imm8 nil F3,0F,C2 $2,$1 $3,8 KATMAI,SSE
-comiss!sseps 2F KATMAI,SSE
-cvtpi2ps!sseps 2A KATMAI,SSE
-cvtps2pi!sseps 2D KATMAI,SSE
-cvtsi2ss!ssess 2A KATMAI,SSE
-cvtss2si!ssess 2D KATMAI,SSE
-cvttps2pi!sseps 2C KATMAI,SSE
-cvttss2si!ssess 2C KATMAI,SSE
-divps!sseps 5E KATMAI,SSE
-divss!ssess 5E KATMAI,SSE
-ldmxcsr mem32 nil 0F,AE $1,2 nil KATMAI,SSE
-maskmovq MMXREG,MMXREG nil 0F,F7 $2r,$1 nil KATMAI,MMX
-maxps!sseps 5F KATMAI,SSE
-maxss!ssess 5F KATMAI,SSE
-minps!sseps 5D KATMAI,SSE
-minss!ssess 5D KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movaps XMMREG,XMMREG nil 0F,28 $2r,$1 nil KATMAI,SSE
-movaps XMMREG,mem128 nil 0F,28 $2,$1 nil KATMAI,SSE
-movaps mem128,XMMREG nil 0F,29 $1,$2 nil KATMAI,SSE
-movhlps XMMREG,XMMREG nil 0F,12 $2r,$1 nil KATMAI,SSE
-movhps XMMREG,mem64 nil 0F,16 $2,$1 nil KATMAI,SSE
-movhps mem64,XMMREG nil 0F,17 $1,$2 nil KATMAI,SSE
-movlhps XMMREG,XMMREG nil 0F,16 $2r,$1 nil KATMAI,SSE
-movlps XMMREG,mem64 nil 0F,12 $2,$1 nil KATMAI,SSE
-movlps mem64,XMMREG nil 0F,13 $1,$2 nil KATMAI,SSE
-movmskps reg32,XMMREG nil 0F,50 $1r,$2 nil KATMAI,SSE
-movntps mem128,XMMREG nil 0F,2B $1,$2 nil KATMAI,SSE
-movntq mem64,MMXREG nil 0F,E7 $1,$2 nil KATMAI,MMX
-movntdq mem128,XMMREG nil 66,0F,E7 $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movss XMMREG,XMMREG nil F3,0F,10 $2r,$1 nil KATMAI,SSE
-movss XMMREG,mem64 nil F3,0F,10 $2,$1 nil KATMAI,SSE
-movss mem64,XMMREG nil F3,0F,11 $1,$2 nil KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movups XMMREG,XMMREG nil 0F,10 $2r,$1 nil KATMAI,SSE
-movups XMMREG,mem64 nil 0F,10 $2,$1 nil KATMAI,SSE
-movups mem64,XMMREG nil 0F,11 $1,$2 nil KATMAI,SSE
-mulps!sseps 59 KATMAI,SSE
-mulss!ssess 59 KATMAI,SSE
-orps!sseps 56 KATMAI,SSE
-pavgb!mmxsse E0 KATMAI P4,SSE2
-pavgw!mmxsse E3 KATMAI P4,SSE2
-pextrw reg32,MMXREG,imm8 nil 0F,C5 $1r,$2 $3,8 KATMAI,MMX
-pextrw reg32,XMMREG,imm8 nil 66,0F,C5 $1r,$2 $3,8 P4,SSE2
-pinsrw MMXREG,reg32,imm8 nil 0F,C4 $2r,$1 $3,8 KATMAI,MMX
-pinsrw MMXREG,rm16,imm8 nil 0F,C4 $2,$1 $3,8 KATMAI,MMX
-pinsrw XMMREG,reg32,imm8 nil 66,0F,C4 $2r,$1 $3,8 P4,SSE2
-pinsrw XMMREG,rm16,imm8 nil 66,0F,C4 $2,$1 $3,8 P4,SSE2
-pmaxsw!mmxsse EE KATMAI P4,SSE2
-pmaxub!mmxsse DE KATMAI P4,SSE2
-pminsw!mmxsse EA KATMAI P4,SSE2
-pminub!mmxsse DA KATMAI P4,SSE2
-pmovmskb reg32,MMXREG nil 0F,D7 $1r,$2 nil KATMAI,SSE
-pmovmskb reg32,XMMREG nil 66,0F,D7 $1r,$2 nil P4,SSE2
-pmulhuw!mmxsse E4 KATMAI P4,SSE2
-prefetchnta!twobytemem 0F,18,0 KATMAI
-prefetcht0!twobytemem 0F,18,1 KATMAI
-prefetcht1!twobytemem 0F,18,2 KATMAI
-prefetcht2!twobytemem 0F,18,3 KATMAI
-psadbw!mmxsse F6 KATMAI KATMAI,SSE
-pshufw MMXREG,rm64,imm8 nil 0F,70 $2,$1 $3,8 KATMAI,MMX
-rcpps!sseps 53 KATMAI,SSE
-rcpss!ssess 53 KATMAI,SSE
-rsqrtps!sseps 52 KATMAI,SSE
-rsqrtss!ssess 52 KATMAI,SSE
-sfence!threebyte 0F,AE,F8 KATMAI
-shufps!ssepsimm C6
-sqrtps!sseps 51 KATMAI,SSE
-sqrtss!ssess 51 KATMAI,SSE
-stmxcsr mem32 nil 0F,AE $1,3 nil KATMAI,SSE
-subps!sseps 5C KATMAI,SSE
-subss!ssess 5C KATMAI,SSE
-ucomiss!ssess 2E KATMAI,SSE
-unpckhps!sseps 15 KATMAI,SSE
-unpcklps!sseps 14 KATMAI,SSE
-xorps!sseps 57 KATMAI,SSE
-;
-; SSE2 instructions
-;
-; Standard
-!sse2pd XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2
-!sse2sd XMMREG,rm128 nil F2,0F,$0.1 $2,$1 nil P4,SSE2
-; With immediate
-!sse2pdimm XMMREG,rm128,imm8 nil 66,0F,$0.1 $2,$1 $3,8 P4,SSE2
-; Comparisons
-!sse2cmppd XMMREG,rm128 nil 66,0F,C2 $2,$1 $0.1,8 P4,SSE2
-!sse2cmpsd XMMREG,rm128 nil F2,0F,C2 $2,$1 $0.1,8 P4,SSE2
-addpd!sse2pd 58
-addsd!sse2sd 58
-andnpd!sse2pd 55
-andpd!sse2pd 54
-cmpeqpd!sse2cmppd 0
-cmpeqsd!sse2cmpsd 0
-cmplepd!sse2cmppd 2
-cmplesd!sse2cmpsd 2
-cmpltpd!sse2cmppd 1
-cmpltsd!sse2cmpsd 1
-cmpneqpd!sse2cmppd 4
-cmpneqsd!sse2cmpsd 4
-cmpnlepd!sse2cmppd 6
-cmpnlesd!sse2cmpsd 6
-cmpnltpd!sse2cmppd 5
-cmpnltsd!sse2cmpsd 5
-cmpordpd!sse2cmppd 7
-cmpordsd!sse2cmpsd 7
-cmpunordpd!sse2cmppd 3
-cmpunordsd!sse2cmpsd 3
-cmppd!sse2pdimm C2
-cmpsd XMMREG,rm128,imm8 nil F2,0F,C2 $2,$1 $3,8 P4,SSE2
-comisd!sse2pd 2F
-cvtpi2pd!sse2pd 2A
-cvtsi2sd!sse2sd 2A
-divpd!sse2pd 5E
-divsd!sse2sd 5E
-maxpd!sse2pd 5F
-maxsd!sse2sd 5F
-minpd!sse2pd 5D
-minsd!sse2sd 5D
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movapd XMMREG,XMMREG nil 66,0F,28 $2r,$1 nil P4,SSE2
-movapd XMMREG,mem128 nil 66,0F,28 $2,$1 nil P4,SSE2
-movapd mem128,XMMREG nil 66,0F,29 $1,$2 nil P4,SSE2
-movhpd XMMREG,mem64 nil 66,0F,16 $2,$1 nil P4,SSE2
-movhpd mem64,XMMREG nil 66,0F,17 $1,$2 nil P4,SSE2
-movlpd XMMREG,mem64 nil 66,0F,12 $2,$1 nil P4,SSE2
-movlpd mem64,XMMREG nil 66,0F,13 $1,$2 nil P4,SSE2
-movmskpd reg32,XMMREG nil 66,0F,50 $1r,$2 nil P4,SSE2
-movntpd mem128,XMMREG nil 66,0F,2B $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movsd XMMREG,XMMREG nil F2,0F,10 $2r,$1 nil P4,SSE2
-movsd XMMREG,mem64 nil F2,0F,10 $2,$1 nil P4,SSE2
-movsd mem64,XMMREG nil F2,0F,11 $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movupd XMMREG,XMMREG nil 66,0F,10 $2r,$1 nil P4,SSE2
-movupd XMMREG,mem64 nil 66,0F,10 $2,$1 nil P4,SSE2
-movupd mem64,XMMREG nil 66,0F,11 $1,$2 nil P4,SSE2
-mulpd!sse2pd 59
-mulsd!sse2sd 59
-orpd!sse2pd 56
-shufpd!sse2pdimm C6
-sqrtpd!sse2pd 51
-sqrtsd!sse2sd 51
-subpd!sse2pd 5C
-subsd!sse2sd 5C
-ucomisd!sse2sd 2E
-unpckhpd!sse2pd 15
-unpcklpd!sse2pd 14
-xorpd!sse2pd 57
-cvtdq2pd!ssess E6 P4,SSE2
-cvtpd2dq!sse2sd E6
-cvtdq2ps!sseps 5B P4,SSE2
-cvtpd2pi!sse2pd 2D
-cvtpd2ps!sse2pd 5A
-cvtps2pd!sseps 5A P4,SSE2
-cvtps2dq!sse2pd 5B
-cvtsd2si!sse2sd 2D
-cvtsd2ss!sse2sd 5A
-cvtss2sd!ssess 5A P4,SSE2
-cvttpd2pi!sse2pd 2C
-cvttsd2si!sse2sd 2C
-cvttpd2dq!sse2pd E6
-cvttps2dq!ssess 5B P4,SSE2
-maskmovdqu XMMREG,XMMREG nil 66,0F,F7 $2r,$1 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqa XMMREG,XMMREG nil 66,0F,6F $2r,$1 nil P4,SSE2
-movdqa XMMREG,mem128 nil 66,0F,6F $2,$1 nil P4,SSE2
-movdqa mem128,XMMREG nil 66,0F,7F $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqu XMMREG,XMMREG nil F3,0F,6F $2r,$1 nil P4,SSE2
-movdqu XMMREG,mem128 nil F3,0F,6F $2,$1 nil P4,SSE2
-movdqu mem128,XMMREG nil F3,0F,7F $1,$2 nil P4,SSE2
-movdq2q MMXREG,XMMREG nil F2,0F,D6 $2r,$1 nil P4,SSE2
-movq2dq XMMREG,MMXREG nil F3,0F,D6 $2r,$1 nil P4,SSE2
-pmuludq!mmxsse F4 P4 P4,SSE2
-pshufd!sse2pdimm 70
-pshufhw XMMREG,rm128,imm8 nil F3,0F,70 $2,$1 $3,8 P4,SSE2
-pshuflw XMMREG,rm128,imm8 nil F2,0F,70 $2,$1 $3,8 P4,SSE2
-pslldq XMMREG,imm8 nil 66,0F,73 $1r,7 $2,8 P4,SSE2
-psrldq XMMREG,imm8 nil 66,0F,73 $1r,3 $2,8 P4,SSE2
-punpckhqdq!sse2pd 6D
-punpcklqdq!sse2pd 6C
-;
-; AMD 3DNow! instructions
-;
-!now3d MMXREG,rm64 nil 0F,0F $2,$1 $0.1,8 @0,3DNOW,AMD
-prefetch!twobytemem 0F,0D,0 P5,3DNOW,AMD
-prefetchw!twobytemem 0F,0D,1 P5,3DNOW,AMD
-femms!twobyte 0F,0E P5,3DNOW,AMD
-pavgusb!now3d BF P5
-pf2id!now3d 1D P5
-pf2iw!now3d 1C ATHLON
-pfacc!now3d AE P5
-pfadd!now3d 9E P5
-pfcmpeq!now3d B0 P5
-pfcmpge!now3d 90 P5
-pfcmpgt!now3d A0 P5
-pfmax!now3d A4 P5
-pfmin!now3d 94 P5
-pfmul!now3d B4 P5
-pfnacc!now3d 8A ATHLON
-pfpnacc!now3d 8E ATHLON
-pfrcp!now3d 96 P5
-pfrcpit1!now3d A6 P5
-pfrcpit2!now3d B6 P5
-pfrsqit1!now3d A7 P5
-pfrsqrt!now3d 97 P5
-pfsub!now3d 9A P5
-pfsubr!now3d AA P5
-pi2fd!now3d 0D P5
-pi2fw!now3d 0C ATHLON
-pmulhrwa!now3d B7 P5
-pswapd!now3d BB ATHLON
-;
-; AMD extensions
-;
-syscall!twobyte 0F,05 P6,AMD
-sysret!twobyte 0F,07 P6,PRIV,AMD
-; swapgs
-;
-; Cyrix MMX instructions
-;
-!cyrixmmx MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX,CYRIX
-paddsiw!cyrixmmx 51
-paveb!cyrixmmx 50
-pdistib!cyrixmmx 54
-pmachriw MMXREG,mem64 nil 0F,5E $2,$1 nil P5,MMX,CYRIX
-pmagw!cyrixmmx 52
-pmulhriw!cyrixmmx 5D
-pmulhrwc!cyrixmmx 59
-pmvgezb!cyrixmmx 5C
-pmvlzb!cyrixmmx 5B
-pmvnzb!cyrixmmx 5A
-pmvzb!cyrixmmx 58
-psubsiw!cyrixmmx 55
-;
-; Cyrix extensions
-;
-!cyrixsmm mem80 nil 0F,$0.1 $1,0 nil 486,CYRIX,SMM
-rdshr!twobyte 0F,36 P6,CYRIX,SMM
-rsdc segreg,mem80 nil 0F,79 $2,$1 nil 486,CYRIX,SMM
-rsldt!cyrixsmm 7B
-rsts!cyrixsmm 7D
-svdc mem80,segreg nil 0F,78 $1,$2 nil 486,CYRIX,SMM
-svldt!cyrixsmm 7A
-svts!cyrixsmm 7C
-smint!twobyte 0F,38 P6,CYRIX
-smintold!twobyte 0F,7E 486,CYRIX,OBS
-wrshr!twobyte 0F,37 P6,CYRIX,SMM
-;
-; Obsolete/Undocumented Instructions
-;
-fsetpm!twobyte DB,E4 286,FPU,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts reg16,reg16 16 0F,A7 $1r,$2 nil 386,UNDOC,OBS
-ibts mem,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS
-ibts mem16x,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts reg32,reg32 32 0F,A7 $1r,$2 nil 386,UNDOC,OBS
-ibts mem,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS
-ibts mem32x,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS
-loadall!twobyte 0F,07 386,UNDOC
-loadall286!twobyte 0F,05 286,UNDOC
-;pop reg_cs nil 0F nil nil 8086,UNDOC,OBS
-salc!onebyte nil,D6 8086,UNDOC
-smi!onebyte nil,F1 386,UNDOC
-; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11).
-umov reg8,reg8 nil 0F,10 $1r,$2 nil 386,UNDOC
-umov reg16,reg16 16 0F,11 $1r,$2 nil 386,UNDOC
-umov reg32,reg32 32 0F,11 $1r,$2 nil 386,UNDOC
-umov mem,reg8 nil 0F,10 $1,$2 nil 386,UNDOC
-umov mem8x,reg8 nil 0F,10 $1,$2 nil 386,UNDOC
-umov mem,reg16 16 0F,11 $1,$2 nil 386,UNDOC
-umov mem16x,reg16 16 0F,11 $1,$2 nil 386,UNDOC
-umov mem,reg32 32 0F,11 $1,$2 nil 386,UNDOC
-umov mem32x,reg32 32 0F,11 $1,$2 nil 386,UNDOC
-umov reg8,mem8 nil 0F,12 $2,$1 nil 386,UNDOC
-umov reg16,mem16 16 0F,13 $2,$1 nil 386,UNDOC
-umov reg32,mem32 32 0F,13 $2,$1 nil 386,UNDOC
-xbts reg16,mem16 16 0F,A6 $2,$1 nil 386,UNDOC,OBS
-xbts reg32,mem32 32 0F,A6 $2,$1 nil 386,UNDOC,OBS
+++ /dev/null
-/* $IdPath$
- * x86 internals header file
- *
- * Copyright (C) 2001 Peter Johnson
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#ifndef YASM_X86_INT_H
-#define YASM_X86_INT_H
-
-typedef struct x86_effaddr_data {
- unsigned char segment; /* segment override, 0 if none */
-
- /* How the spare (register) bits in Mod/RM are handled:
- * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
- * They're set in bytecode_new_insn().
- */
- unsigned char modrm;
- unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
- unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */
-
- unsigned char sib;
- unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
- unsigned char need_sib; /* 1 if SIB byte needed, 0 if not,
- 0xff if unknown */
-} x86_effaddr_data;
-
-typedef struct x86_insn {
- /*@null@*/ effaddr *ea; /* effective address */
-
- /*@null@*/ immval *imm; /* immediate or relative value */
-
- unsigned char opcode[3]; /* opcode */
- unsigned char opcode_len;
-
- unsigned char addrsize; /* 0 or =mode_bits => no override */
- unsigned char opersize; /* 0 indicates no override */
- unsigned char lockrep_pre; /* 0 indicates no prefix */
-
- /* HACK, but a space-saving one: shift opcodes have an immediate
- * form and a ,1 form (with no immediate). In the parser, we
- * set this and opcode_len=1, but store the ,1 version in the
- * second byte of the opcode array. We then choose between the
- * two versions once we know the actual value of imm (because we
- * don't know it in the parser module).
- *
- * A override to force the imm version should just leave this at
- * 0. Then later code won't know the ,1 version even exists.
- * TODO: Figure out how this affects CPU flags processing.
- *
- * Call x86_SetInsnShiftFlag() to set this flag to 1.
- */
- unsigned char shift_op;
-
- /* HACK, similar to that for shift_op above, for optimizing instructions
- * that take a sign-extended imm8 as well as imm values (eg, the arith
- * instructions and a subset of the imul instructions).
- */
- unsigned char signext_imm8_op;
-
- unsigned char mode_bits;
-} x86_insn;
-
-typedef struct x86_jmprel {
- expr *target; /* target location */
-
- struct {
- unsigned char opcode[3];
- unsigned char opcode_len; /* 0 = no opc for this version */
- } shortop, nearop;
-
- /* which opcode are we using? */
- /* The *FORCED forms are specified in the source as such */
- x86_jmprel_opcode_sel op_sel;
-
- unsigned char addrsize; /* 0 or =mode_bits => no override */
- unsigned char opersize; /* 0 indicates no override */
- unsigned char lockrep_pre; /* 0 indicates no prefix */
-
- unsigned char mode_bits;
-} x86_jmprel;
-
-void x86_bc_delete(bytecode *bc);
-void x86_bc_print(FILE *f, const bytecode *bc);
-bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
- resolve_label_func resolve_label);
-int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
- void *d, output_expr_func output_expr);
-
-int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
- unsigned char nosplit, unsigned char *displen,
- unsigned char *modrm, unsigned char *v_modrm,
- unsigned char *n_modrm, unsigned char *sib,
- unsigned char *v_sib, unsigned char *n_sib);
-
-#endif
/*
* x86 architecture description
*
- * Copyright (C) 2001 Peter Johnson
+ * Copyright (C) 2002 Peter Johnson
*
* This file is part of YASM.
*
#include "util.h"
/*@unused@*/ RCSID("$IdPath$");
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
#include "bytecode.h"
+
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
unsigned char x86_mode_bits = 0;
+int
+x86_directive(const char *name, valparamhead *valparams,
+ /*@unused@*/ /*@null@*/ valparamhead *objext_valparams,
+ /*@unused@*/ sectionhead *headp)
+{
+ valparam *vp;
+ const intnum *intn;
+ long lval;
+
+ if (strcasecmp(name, "bits") == 0) {
+ if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
+ (intn = expr_get_intnum(&vp->param)) != NULL &&
+ (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
+ x86_mode_bits = (unsigned char)lval;
+ else
+ Error(_("invalid argument to [%s]"), "BITS");
+ return 0;
+ } else
+ return 1;
+}
+
+unsigned int
+x86_get_reg_size(unsigned long reg)
+{
+ switch ((x86_expritem_reg_size)(reg & ~7)) {
+ case X86_REG8:
+ return 1;
+ case X86_REG16:
+ return 2;
+ case X86_REG32:
+ case X86_CRREG:
+ case X86_DRREG:
+ case X86_TRREG:
+ return 4;
+ case X86_MMXREG:
+ return 8;
+ case X86_XMMREG:
+ return 16;
+ case X86_FPUREG:
+ return 10;
+ default:
+ InternalError(_("unknown register size"));
+ }
+ return 0;
+}
+
+void
+x86_reg_print(FILE *f, unsigned long reg)
+{
+ static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"};
+ static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+
+ switch ((x86_expritem_reg_size)(reg&~7)) {
+ case X86_REG8:
+ fprintf(f, "%s", name8[reg&7]);
+ break;
+ case X86_REG16:
+ fprintf(f, "%s", name1632[reg&7]);
+ break;
+ case X86_REG32:
+ fprintf(f, "e%s", name1632[reg&7]);
+ break;
+ case X86_MMXREG:
+ fprintf(f, "mm%d", (int)(reg&7));
+ break;
+ case X86_XMMREG:
+ fprintf(f, "xmm%d", (int)(reg&7));
+ break;
+ case X86_CRREG:
+ fprintf(f, "cr%d", (int)(reg&7));
+ break;
+ case X86_DRREG:
+ fprintf(f, "dr%d", (int)(reg&7));
+ break;
+ case X86_TRREG:
+ fprintf(f, "tr%d", (int)(reg&7));
+ break;
+ case X86_FPUREG:
+ fprintf(f, "st%d", (int)(reg&7));
+ break;
+ default:
+ InternalError(_("unknown register size"));
+ }
+}
+
+void
+x86_segreg_print(FILE *f, unsigned long segreg)
+{
+ static const char *name[] = {"es","cs","ss","ds","fs","gs"};
+ fprintf(f, "%s", name[segreg&7]);
+}
+
+void
+x86_handle_prefix(bytecode *bc, const unsigned long data[4])
+{
+ switch((x86_parse_insn_prefix)data[0]) {
+ case X86_LOCKREP:
+ x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]);
+ break;
+ case X86_ADDRSIZE:
+ x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]);
+ break;
+ case X86_OPERSIZE:
+ x86_bc_insn_opersize_override(bc, (unsigned char)data[1]);
+ break;
+ }
+}
+
+void
+x86_handle_seg_prefix(bytecode *bc, unsigned long segreg)
+{
+ x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8));
+}
+
+void
+x86_handle_seg_override(effaddr *ea, unsigned long segreg)
+{
+ x86_ea_set_segment(ea, (unsigned char)(segreg>>8));
+}
+
/* Define arch structure -- see arch.h for details */
arch x86_arch = {
"x86 (IA-32, x86-64)",
"x86",
+ {
+ x86_switch_cpu,
+ x86_check_identifier,
+ x86_directive,
+ x86_new_insn,
+ x86_handle_prefix,
+ x86_handle_seg_prefix,
+ x86_handle_seg_override,
+ x86_ea_new_expr
+ },
{
X86_BYTECODE_TYPE_MAX,
x86_bc_delete,
x86_bc_print,
x86_bc_resolve,
x86_bc_tobytes
- }
+ },
+ x86_get_reg_size,
+ x86_reg_print,
+ x86_segreg_print,
+ NULL, /* x86_ea_data_delete */
+ x86_ea_data_print
};
} x86_bytecode_type;
#define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1
+/* 0-7 (low 3 bits) used for register number, stored in same data area */
+typedef enum {
+ X86_REG8 = 0x8,
+ X86_REG16 = 0x10,
+ X86_REG32 = 0x20,
+ X86_MMXREG = 0x40,
+ X86_XMMREG = 0x80,
+ X86_CRREG = 0xC0,
+ X86_DRREG = 0xC8,
+ X86_TRREG = 0xF0,
+ X86_FPUREG = 0xF8
+} x86_expritem_reg_size;
+
+typedef enum {
+ X86_LOCKREP = 1,
+ X86_ADDRSIZE,
+ X86_OPERSIZE
+} x86_parse_insn_prefix;
+
+typedef enum {
+ X86_NEAR,
+ X86_SHORT,
+ X86_FAR
+} x86_parse_targetmod;
+
typedef enum {
JR_NONE,
JR_SHORT,
void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment);
effaddr *x86_ea_new_reg(unsigned char reg);
-effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len);
effaddr *x86_ea_new_expr(/*@keep@*/ expr *e);
/*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc);
*/
typedef struct x86_new_insn_data {
/*@keep@*/ /*@null@*/ effaddr *ea;
- /*@keep@*/ /*@null@*/ immval *imm;
+ /*@keep@*/ /*@null@*/ expr *imm;
unsigned char opersize;
unsigned char op_len;
unsigned char op[3];
extern unsigned char x86_mode_bits;
+typedef struct x86_effaddr_data {
+ unsigned char segment; /* segment override, 0 if none */
+
+ /* How the spare (register) bits in Mod/RM are handled:
+ * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+ * They're set in bytecode_new_insn().
+ */
+ unsigned char modrm;
+ unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+ unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */
+
+ unsigned char sib;
+ unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
+ unsigned char need_sib; /* 1 if SIB byte needed, 0 if not,
+ 0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+ /*@null@*/ effaddr *ea; /* effective address */
+
+ /*@null@*/ immval *imm; /* immediate or relative value */
+
+ unsigned char opcode[3]; /* opcode */
+ unsigned char opcode_len;
+
+ unsigned char addrsize; /* 0 or =mode_bits => no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+ /* HACK, but a space-saving one: shift opcodes have an immediate
+ * form and a ,1 form (with no immediate). In the parser, we
+ * set this and opcode_len=1, but store the ,1 version in the
+ * second byte of the opcode array. We then choose between the
+ * two versions once we know the actual value of imm (because we
+ * don't know it in the parser module).
+ *
+ * A override to force the imm version should just leave this at
+ * 0. Then later code won't know the ,1 version even exists.
+ * TODO: Figure out how this affects CPU flags processing.
+ *
+ * Call x86_SetInsnShiftFlag() to set this flag to 1.
+ */
+ unsigned char shift_op;
+
+ /* HACK, similar to that for shift_op above, for optimizing instructions
+ * that take a sign-extended imm8 as well as imm values (eg, the arith
+ * instructions and a subset of the imul instructions).
+ */
+ unsigned char signext_imm8_op;
+
+ unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+ expr *target; /* target location */
+
+ struct {
+ unsigned char opcode[3];
+ unsigned char opcode_len; /* 0 = no opc for this version */
+ } shortop, nearop;
+
+ /* which opcode are we using? */
+ /* The *FORCED forms are specified in the source as such */
+ x86_jmprel_opcode_sel op_sel;
+
+ unsigned char addrsize; /* 0 or =mode_bits => no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+ unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(FILE *f, const bytecode *bc);
+bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
+ resolve_label_func resolve_label);
+int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
+ void *d, output_expr_func output_expr);
+
+int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
+ unsigned char nosplit, unsigned char *displen,
+ unsigned char *modrm, unsigned char *v_modrm,
+ unsigned char *n_modrm, unsigned char *sib,
+ unsigned char *v_sib, unsigned char *n_sib);
+
+void x86_switch_cpu(const char *cpuid);
+
+arch_check_id_retval x86_check_identifier(unsigned long data[2],
+ const char *id);
+
+int x86_directive(const char *name, valparamhead *valparams,
+ /*@null@*/ valparamhead *objext_valparams,
+ sectionhead *headp);
+
+/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2],
+ int num_operands,
+ /*@null@*/ insn_operandhead *operands);
+
+void x86_handle_prefix(bytecode *bc, const unsigned long data[4]);
+
+void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg);
+
+void x86_handle_seg_override(effaddr *ea, unsigned long segreg);
+
+unsigned int x86_get_reg_size(unsigned long reg);
+
+void x86_reg_print(FILE *f, unsigned long reg);
+
+void x86_segreg_print(FILE *f, unsigned long segreg);
+
+void x86_ea_data_print(FILE *f, const effaddr *ea);
+
#endif
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
#include "bc-int.h"
ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */
}
- insn->imm = d->imm;
if (d->imm) {
+ insn->imm = imm_new_expr(d->imm);
insn->imm->len = d->im_len;
insn->imm->sign = d->im_sign;
- }
+ } else
+ insn->imm = NULL;
insn->opcode[0] = d->op[0];
insn->opcode[1] = d->op[1];
/*@-compmempass@*/
effaddr *
-x86_ea_new_imm(immval *imm, unsigned char im_len)
+x86_ea_new_imm(expr *imm, unsigned char im_len)
{
effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
x86_effaddr_data *ead = ea_get_data(ea);
- ea->disp = imm->val;
+ ea->disp = imm;
ea->len = im_len;
ea->nosplit = 0;
ead->segment = 0;
switch ((x86_bytecode_type)bc->type) {
case X86_BC_INSN:
insn = bc_get_data(bc);
- if (insn->ea) {
- expr_delete(insn->ea->disp);
- xfree(insn->ea);
- }
+ if (insn->ea)
+ ea_delete(insn->ea);
if (insn->imm) {
expr_delete(insn->imm->val);
xfree(insn->imm);
}
}
+void
+x86_ea_data_print(FILE *f, const effaddr *ea)
+{
+ const x86_effaddr_data *ead = ea_get_const_data(ea);
+ fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "",
+ (unsigned int)ead->segment);
+ fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "",
+ (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm,
+ (unsigned int)ead->need_modrm);
+ fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "",
+ (unsigned int)ead->sib, (unsigned int)ead->valid_sib,
+ (unsigned int)ead->need_sib);
+}
+
void
x86_bc_print(FILE *f, const bytecode *bc)
{
const x86_insn *insn;
const x86_jmprel *jmprel;
- x86_effaddr_data *ead;
switch ((x86_bytecode_type)bc->type) {
case X86_BC_INSN:
insn = bc_get_const_data(bc);
fprintf(f, "%*s_Instruction_\n", indent_level, "");
fprintf(f, "%*sEffective Address:", indent_level, "");
- if (!insn->ea)
- fprintf(f, " (nil)\n");
- else {
- indent_level++;
- fprintf(f, "\n%*sDisp=", indent_level, "");
- expr_print(f, insn->ea->disp);
+ if (insn->ea) {
fprintf(f, "\n");
- ead = ea_get_data(insn->ea);
- fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n",
- indent_level, "", (unsigned int)insn->ea->len,
- (unsigned int)ead->segment,
- (unsigned int)insn->ea->nosplit);
- fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n",
- indent_level, "", (unsigned int)ead->modrm,
- (unsigned int)ead->valid_modrm,
- (unsigned int)ead->need_modrm);
- fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n",
- indent_level, "", (unsigned int)ead->sib,
- (unsigned int)ead->valid_sib,
- (unsigned int)ead->need_sib);
+ indent_level++;
+ ea_print(f, insn->ea);
indent_level--;
- }
+ } else
+ fprintf(f, " (nil)\n");
fprintf(f, "%*sImmediate Value:", indent_level, "");
if (!insn->imm)
fprintf(f, " (nil)\n");
x86_effaddr_data ead_t = *ead; /* structure copy */
unsigned char displen = ea->len;
- if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
- (!ead->valid_modrm && ead->need_modrm))) {
+ if (ea->disp) {
temp = expr_copy(ea->disp);
assert(temp != NULL);
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
#include "expr-int.h"
int *ret;
/* don't allow 16-bit registers */
- if (ei->data.reg.size != 32)
+ if ((ei->data.reg & ~7) != X86_REG32)
return 0;
- ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */
+ ret = &data[ei->data.reg & 7];
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
reg16[7] = &data->di;
/* don't allow 32-bit registers */
- if (ei->data.reg.size != 16)
+ if ((ei->data.reg & ~7) != X86_REG16)
return 0;
- ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
+ /* & 7 for sanity check */
+ ret = reg16[ei->data.reg & 7];
/* only allow BX, SI, DI, BP */
if (!ret)
unsigned char *addrsize = (unsigned char *)d;
if (ei->type == EXPR_REG) {
- *addrsize = ei->data.reg.size;
+ *addrsize = (unsigned char)ei->data.reg & ~7;
return 1;
} else
return 0;
return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE,
havereg == HAVE_BP, displen, modrm,
v_modrm);
+ } else if (!*n_modrm && !*n_sib) {
+ /* Special case for MOV MemOffs opcode: displacement but no modrm. */
+ if (*addrsize == 32)
+ *displen = 4;
+ else if (*addrsize == 16)
+ *displen = 2;
}
return 1;
}
--- /dev/null
+/*
+ * x86 identifier recognition and instruction handling
+ *
+ * Copyright (C) 2002 Peter Johnson
+ *
+ * This file is part of YASM.
+ *
+ * YASM is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * YASM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+#include "src/arch/x86/x86arch.h"
+
+#include "expr-int.h"
+#include "bc-int.h"
+
+
+/* Available CPU feature flags */
+#define CPU_Any (0) /* Any old cpu will do */
+#define CPU_086 CPU_Any
+#define CPU_186 (1<<0) /* i186 or better required */
+#define CPU_286 (1<<1) /* i286 or better required */
+#define CPU_386 (1<<2) /* i386 or better required */
+#define CPU_486 (1<<3) /* i486 or better required */
+#define CPU_586 (1<<4) /* i585 or better required */
+#define CPU_686 (1<<5) /* i686 or better required */
+#define CPU_P3 (1<<6) /* Pentium3 or better required */
+#define CPU_P4 (1<<7) /* Pentium4 or better required */
+#define CPU_IA64 (1<<8) /* IA-64 or better required */
+#define CPU_K6 (1<<9) /* AMD K6 or better required */
+#define CPU_Athlon (1<<10) /* AMD Athlon or better required */
+#define CPU_Hammer (1<<11) /* AMD Sledgehammer or better required */
+#define CPU_FPU (1<<12) /* FPU support required */
+#define CPU_MMX (1<<13) /* MMX support required */
+#define CPU_SSE (1<<14) /* Streaming SIMD extensions required */
+#define CPU_SSE2 (1<<15) /* Streaming SIMD extensions 2 required */
+#define CPU_3DNow (1<<16) /* 3DNow! support required */
+#define CPU_Cyrix (1<<17) /* Cyrix-specific instruction */
+#define CPU_AMD (1<<18) /* AMD-specific inst. (older than K6) */
+#define CPU_SMM (1<<19) /* System Management Mode instruction */
+#define CPU_Prot (1<<20) /* Protected mode only instruction */
+#define CPU_Undoc (1<<21) /* Undocumented instruction */
+#define CPU_Obs (1<<22) /* Obsolete instruction */
+#define CPU_Priv (1<<23) /* Priveleged instruction */
+
+/* What instructions/features are enabled? Defaults to all. */
+static unsigned long cpu_enabled = ~CPU_Any;
+
+/* Opcode modifiers. The opcode bytes are in "reverse" order because the
+ * parameters are read from the arch-specific data in LSB->MSB order.
+ * (only for asthetic reasons in the lexer code below, no practical reason).
+ */
+#define MOD_Op2Add (1<<0) /* Parameter adds to opcode byte 2 */
+#define MOD_Gap0 (1<<1) /* Eats a parameter */
+#define MOD_Op1Add (1<<2) /* Parameter adds to opcode byte 1 */
+#define MOD_Gap1 (1<<3) /* Eats a parameter */
+#define MOD_Op0Add (1<<4) /* Parameter adds to opcode byte 0 */
+#define MOD_SpAdd (1<<5) /* Parameter adds to "spare" value */
+#define MOD_OpSizeR (1<<6) /* Parameter replaces opersize */
+#define MOD_Imm8 (1<<7) /* Parameter is included as immediate byte */
+
+/* Operand types. These are more detailed than the "general" types for all
+ * architectures, as they include the size, for instance.
+ * Bit Breakdown (from LSB to MSB):
+ * - 4 bits = general type (must be exact match, except for =3):
+ * 0 = immediate
+ * 1 = any general purpose, MMX, XMM, or FPU register
+ * 2 = memory
+ * 3 = any general purpose, MMX, XMM, or FPU register OR memory
+ * 4 = segreg
+ * 5 = any CR register
+ * 6 = any DR register
+ * 7 = any TR register
+ * 8 = ST0
+ * 9 = AL/AX/EAX (depending on size)
+ * A = CL/CX/ECX (depending on size)
+ * B = CR4
+ * C = memory offset (an EA, but with no registers allowed)
+ * [special case for MOV opcode]
+ * - 3 bits = size (user-specified, or from register size):
+ * 0 = any size acceptable
+ * 1/2/3/4 = 8/16/32/64 bits (from user or reg size)
+ * 5/6 = 80/128 bits (from user)
+ * - 1 bit = size implicit or explicit ("strictness" of size matching on
+ * non-registers -- registers are always strictly matched):
+ * 0 = user size must exactly match size above.
+ * 1 = user size either unspecified or exactly match size above.
+ *
+ * MSBs than the above are actions: what to do with the operand if the
+ * instruction matches. Essentially describes what part of the output bytecode
+ * gets the operand. This may require conversion (e.g. a register going into
+ * an ea field). Naturally, only one of each of these may be contained in the
+ * operands of a single insn_info structure.
+ * - 3 bits = action:
+ * 0 = does nothing (operand data is discarded)
+ * 1 = operand data goes into ea field
+ * 2 = operand data goes into imm field
+ * 3 = operand data goes into "spare" field
+ * 4 = operand data is added to opcode byte 0
+ */
+#define OPT_Imm 0x0
+#define OPT_Reg 0x1
+#define OPT_Mem 0x2
+#define OPT_RM 0x3
+#define OPT_SegReg 0x4
+#define OPT_CRReg 0x5
+#define OPT_DRReg 0x6
+#define OPT_TRReg 0x7
+#define OPT_ST0 0x8
+#define OPT_Areg 0x9
+#define OPT_Creg 0xA
+#define OPT_CR4 0xB
+#define OPT_MemOffs 0xC
+#define OPT_MASK 0x000F
+
+#define OPS_Any (0<<4)
+#define OPS_8 (1<<4)
+#define OPS_16 (2<<4)
+#define OPS_32 (3<<4)
+#define OPS_64 (4<<4)
+#define OPS_80 (5<<4)
+#define OPS_128 (6<<4)
+#define OPS_MASK 0x0070
+#define OPS_SHIFT 4
+
+#define OPS_Relaxed (1<<7)
+#define OPS_RMASK 0x0080
+
+#define OPA_None (0<<8)
+#define OPA_EA (1<<8)
+#define OPA_Imm (2<<8)
+#define OPA_Spare (3<<8)
+#define OPA_Op0Add (4<<8)
+#define OPA_MASK 0x0700
+
+typedef struct x86_insn_info {
+ /* The CPU feature flags needed to execute this instruction. This is OR'ed
+ * with arch-specific data[2]. This combined value is compared with
+ * cpu_enabled to see if all bits set here are set in cpu_enabled--if so,
+ * the instruction is available on this CPU.
+ */
+ unsigned long cpu;
+
+ /* Opcode modifiers for variations of instruction. As each modifier reads
+ * its parameter in LSB->MSB order from the arch-specific data[1] from the
+ * lexer data, and the LSB of the arch-specific data[1] is reserved for the
+ * count of insn_info structures in the instruction grouping, there can
+ * only be a maximum of 3 modifiers.
+ */
+ unsigned long modifiers;
+
+ /* Operand Size */
+ unsigned char opersize;
+
+ /* The length of the basic opcode */
+ unsigned char opcode_len;
+
+ /* The basic 1-3 byte opcode */
+ unsigned char opcode[3];
+
+ /* The 3-bit "spare" value (extended opcode) for the R/M byte field */
+ unsigned char spare;
+
+ /* The number of operands this form of the instruction takes */
+ unsigned char num_operands;
+
+ /* The types of each operand, see above */
+ unsigned int operands[3];
+} x86_insn_info;
+
+/* Define lexer arch-specific data with 0-3 modifiers. */
+#define DEF_INSN_DATA(group, mod, cpu) do { \
+ data[0] = (unsigned long)group##_insn; \
+ data[1] = ((mod)<<8) | \
+ ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \
+ data[2] = cpu; \
+ } while (0)
+
+#define RET_INSN(group, mod, cpu) do { \
+ DEF_INSN_DATA(group, mod, cpu); \
+ return ARCH_CHECK_ID_INSN; \
+ } while (0)
+
+/*
+ * General instruction groupings
+ */
+
+/* One byte opcode instructions with no operands */
+static const x86_insn_info onebyte_insn[] = {
+ { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Two byte opcode instructions with no operands */
+static const x86_insn_info twobyte_insn[] = {
+ { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Three byte opcode instructions with no operands */
+static const x86_insn_info threebyte_insn[] = {
+ { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0,
+ {0, 0, 0} }
+};
+
+/* One byte opcode instructions with general memory operand */
+static const x86_insn_info onebytemem_insn[] = {
+ { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+ {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Two byte opcode instructions with general memory operand */
+static const x86_insn_info twobytemem_insn[] = {
+ { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+ {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Move instructions */
+static const x86_insn_info mov_insn[] = {
+ { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} },
+ { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} },
+ { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} },
+ { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
+ { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
+ { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
+ { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+ /* TODO: segreg here */
+ { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+ /* Need two sets here, one for strictness on left side, one for right. */
+ { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
+ { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+ {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+ {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2,
+ {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} }
+};
+
+/* Move with sign/zero extend */
+static const x86_insn_info movszx_insn[] = {
+ { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
+ { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} }
+};
+
+
+bytecode *
+x86_new_insn(const unsigned long data[4], int num_operands,
+ insn_operandhead *operands)
+{
+ x86_new_insn_data d;
+ int num_info = (int)(data[1]&0xFF);
+ x86_insn_info *info = (x86_insn_info *)data[0];
+ unsigned long mod_data = data[1] >> 8;
+ int found = 0;
+ insn_operand *op;
+ int i;
+ static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0};
+
+ /* Just do a simple linear search through the info array for a match.
+ * First match wins.
+ */
+ for (; num_info>0 && !found; num_info--, info++) {
+ unsigned long cpu;
+ unsigned int size;
+ int mismatch = 0;
+
+ /* Match CPU */
+ cpu = info->cpu | data[2];
+ if ((cpu_enabled & cpu) != cpu)
+ continue;
+
+ /* Match # of operands */
+ if (num_operands != info->num_operands)
+ continue;
+
+ if (!operands) {
+ found = 1; /* no operands -> must have a match here. */
+ break;
+ }
+
+ /* Match each operand type and size */
+ for(i = 0, op = ops_first(operands); op && i<info->num_operands &&
+ !mismatch; op = ops_next(op), i++) {
+ /* Check operand type */
+ switch (info->operands[i] & OPT_MASK) {
+ case OPT_Imm:
+ if (op->type != INSN_OPERAND_IMM)
+ mismatch = 1;
+ break;
+ case OPT_Reg:
+ if (op->type != INSN_OPERAND_REG)
+ mismatch = 1;
+ else {
+ size = op->data.reg & ~7;
+ if (size == X86_CRREG || size == X86_DRREG ||
+ size == X86_TRREG)
+ mismatch = 1;
+ }
+ break;
+ case OPT_Mem:
+ if (op->type != INSN_OPERAND_MEMORY)
+ mismatch = 1;
+ break;
+ case OPT_RM:
+ if (op->type != INSN_OPERAND_REG &&
+ op->type != INSN_OPERAND_MEMORY)
+ mismatch = 1;
+ break;
+ case OPT_SegReg:
+ if (op->type != INSN_OPERAND_SEGREG)
+ mismatch = 1;
+ break;
+ case OPT_CRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_CRREG)
+ mismatch = 1;
+ break;
+ case OPT_DRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_DRREG)
+ mismatch = 1;
+ break;
+ case OPT_TRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_TRREG)
+ mismatch = 1;
+ break;
+ case OPT_ST0:
+ if (op->type != INSN_OPERAND_REG ||
+ op->data.reg != X86_FPUREG)
+ mismatch = 1;
+ break;
+ case OPT_Areg:
+ if (op->type != INSN_OPERAND_REG ||
+ ((info->operands[i] & OPS_MASK) == OPS_8 &&
+ op->data.reg != (X86_REG8 | 0)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_16 &&
+ op->data.reg != (X86_REG16 | 0)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_32 &&
+ op->data.reg != (X86_REG32 | 0)))
+ mismatch = 1;
+ break;
+ case OPT_Creg:
+ if (op->type != INSN_OPERAND_REG ||
+ ((info->operands[i] & OPS_MASK) == OPS_8 &&
+ op->data.reg != (X86_REG8 | 1)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_16 &&
+ op->data.reg != (X86_REG16 | 1)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_32 &&
+ op->data.reg != (X86_REG32 | 1)))
+ mismatch = 1;
+ break;
+ case OPT_CR4:
+ if (op->type != INSN_OPERAND_REG ||
+ op->data.reg != (X86_CRREG | 4))
+ mismatch = 1;
+ break;
+ case OPT_MemOffs:
+ if (op->type != INSN_OPERAND_MEMORY ||
+ expr_contains(ea_get_disp(op->data.ea), EXPR_REG))
+ mismatch = 1;
+ break;
+ default:
+ InternalError(_("invalid operand type"));
+ }
+
+ if (mismatch)
+ break;
+
+ /* Check operand size */
+ size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT];
+ if (op->type == INSN_OPERAND_REG && op->size == 0) {
+ /* Register size must exactly match */
+ if (x86_get_reg_size(op->data.reg) != size)
+ mismatch = 1;
+ } else {
+ if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) {
+ /* Relaxed checking */
+ if (size != 0 && op->size != size && op->size != 0)
+ mismatch = 1;
+ } else {
+ /* Strict checking */
+ if (op->size != size)
+ mismatch = 1;
+ }
+ }
+ }
+
+ if (!mismatch) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Didn't find a matching one */
+ /* FIXME: This needs to be more descriptive of certain reasons for a
+ * mismatch. E.g.:
+ * "mismatch in operand sizes"
+ * "operand size not specified"
+ * etc. This will probably require adding dummy error catchers in the
+ * insn list which are only looked at if we get here.
+ */
+ Error(_("invalid combination of opcode and operands"));
+ return NULL;
+ }
+
+ /* Copy what we can from info */
+ d.ea = NULL;
+ d.imm = NULL;
+ d.opersize = info->opersize;
+ d.op_len = info->opcode_len;
+ d.op[0] = info->opcode[0];
+ d.op[1] = info->opcode[1];
+ d.op[2] = info->opcode[2];
+ d.spare = info->spare;
+ d.im_len = 0;
+ d.im_sign = 0;
+
+ /* Apply modifiers */
+ if (info->modifiers & MOD_Op2Add) {
+ d.op[2] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Gap0)
+ mod_data >>= 8;
+ if (info->modifiers & MOD_Op1Add) {
+ d.op[1] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Gap1)
+ mod_data >>= 8;
+ if (info->modifiers & MOD_Op0Add) {
+ d.op[0] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_SpAdd) {
+ d.spare += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_OpSizeR) {
+ d.opersize = (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Imm8) {
+ d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF)));
+ d.im_len = 1;
+ /*mod_data >>= 8;*/
+ }
+
+ /* Go through operands and assign */
+ if (operands) {
+ for(i = 0, op = ops_first(operands); op && i<info->num_operands;
+ op = ops_next(op), i++) {
+ switch (info->operands[i] & OPA_MASK) {
+ case OPA_None:
+ /* Throw away the operand contents */
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ case INSN_OPERAND_SEGREG:
+ break;
+ case INSN_OPERAND_MEMORY:
+ ea_delete(op->data.ea);
+ break;
+ case INSN_OPERAND_IMM:
+ expr_delete(op->data.val);
+ break;
+ }
+ break;
+ case OPA_EA:
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ d.ea = x86_ea_new_reg((unsigned char)op->data.reg);
+ break;
+ case INSN_OPERAND_SEGREG:
+ InternalError(_("invalid operand conversion"));
+ case INSN_OPERAND_MEMORY:
+ d.ea = op->data.ea;
+ if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) {
+ /* Special-case for MOV MemOffs instruction */
+ x86_effaddr_data *ead = ea_get_data(d.ea);
+ ead->valid_modrm = 0;
+ ead->need_modrm = 0;
+ ead->valid_sib = 0;
+ ead->need_sib = 0;
+ }
+ break;
+ case INSN_OPERAND_IMM:
+ d.ea = x86_ea_new_imm(op->data.val,
+ size_lookup[(info->operands[i] &
+ OPS_MASK)>>OPS_SHIFT]);
+ break;
+ }
+ break;
+ case OPA_Imm:
+ if (op->type == INSN_OPERAND_IMM) {
+ d.imm = op->data.val;
+ d.im_len = size_lookup[(info->operands[i] &
+ OPS_MASK)>>OPS_SHIFT];
+ } else
+ InternalError(_("invalid operand conversion"));
+ break;
+ case OPA_Spare:
+ if (op->type == INSN_OPERAND_REG ||
+ op->type == INSN_OPERAND_SEGREG)
+ d.spare = (unsigned char)(op->data.reg&7);
+ else
+ InternalError(_("invalid operand conversion"));
+ break;
+ case OPA_Op0Add:
+ if (op->type == INSN_OPERAND_REG)
+ d.op[0] += (unsigned char)(op->data.reg&7);
+ else
+ InternalError(_("invalid operand conversion"));
+ break;
+ default:
+ InternalError(_("unknown operand action"));
+ }
+ }
+ }
+
+ /* Create the bytecode and return it */
+ return x86_bc_new_insn(&d);
+}
+
+
+#define YYCTYPE char
+#define YYCURSOR id
+#define YYLIMIT id
+#define YYMARKER marker
+#define YYFILL(n)
+
+/*!re2c
+ any = [\000-\377];
+ A = [aA];
+ B = [bB];
+ C = [cC];
+ D = [dD];
+ E = [eE];
+ F = [fF];
+ G = [gG];
+ H = [hH];
+ I = [iI];
+ J = [jJ];
+ K = [kK];
+ L = [lL];
+ M = [mM];
+ N = [nN];
+ O = [oO];
+ P = [pP];
+ Q = [qQ];
+ R = [rR];
+ S = [sS];
+ T = [tT];
+ U = [uU];
+ V = [vV];
+ W = [wW];
+ X = [xX];
+ Y = [yY];
+ Z = [zZ];
+*/
+
+void
+x86_switch_cpu(const char *id)
+{
+ const char *marker;
+
+ /*!re2c
+ /* The standard CPU names /set/ cpu_enabled. */
+ "8086" {
+ cpu_enabled = CPU_Priv;
+ return;
+ }
+ ("80" | I)? "186" {
+ cpu_enabled = CPU_186|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "286" {
+ cpu_enabled = CPU_186|CPU_286|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "386" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "486" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM|
+ CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I? "586") | (P E N T I U M) | (P "5") {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (P "2") | (P E N T I U M "-"? ("2" | (I I))) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot|
+ CPU_Priv;
+ return;
+ }
+ (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I A "-"? "64") | (I T A N I U M) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE|
+ CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ K "6" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot|
+ CPU_Priv;
+ return;
+ }
+ A T H L O N {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (S L E D G E)? (H A M M E R) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|
+ CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+
+ /* Features have "no" versions to disable them, and only set/reset the
+ * specific feature being changed. All other bits are left alone.
+ */
+ F P U { cpu_enabled |= CPU_FPU; return; }
+ N O F P U { cpu_enabled &= ~CPU_FPU; return; }
+ M M X { cpu_enabled |= CPU_MMX; return; }
+ N O M M X { cpu_enabled &= ~CPU_MMX; return; }
+ S S E { cpu_enabled |= CPU_SSE; return; }
+ N O S S E { cpu_enabled &= ~CPU_SSE; return; }
+ S S E "2" { cpu_enabled |= CPU_SSE2; return; }
+ N O S S E "2" { cpu_enabled &= ~CPU_SSE2; return; }
+ "3" D N O W { cpu_enabled |= CPU_3DNow; return; }
+ N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; }
+ C Y R I X { cpu_enabled |= CPU_Cyrix; return; }
+ N O C Y R I X { cpu_enabled &= ~CPU_Cyrix; return; }
+ A M D { cpu_enabled |= CPU_AMD; return; }
+ N O A M D { cpu_enabled &= ~CPU_AMD; return; }
+ S M M { cpu_enabled |= CPU_SMM; return; }
+ N O S M M { cpu_enabled &= ~CPU_SMM; return; }
+ P R O T { cpu_enabled |= CPU_Prot; return; }
+ N O P R O T { cpu_enabled &= ~CPU_Prot; return; }
+ U N D O C { cpu_enabled |= CPU_Undoc; return; }
+ N O U N D O C { cpu_enabled &= ~CPU_Undoc; return; }
+ O B S { cpu_enabled |= CPU_Obs; return; }
+ N O O B S { cpu_enabled &= ~CPU_Obs; return; }
+ P R I V { cpu_enabled |= CPU_Priv; return; }
+ N O P R I V { cpu_enabled &= ~CPU_Priv; return; }
+
+ /* catchalls */
+ [A-Za-z0-9]+ {
+ Warning(_("unrecognized CPU identifier `%s'"), id);
+ return;
+ }
+ any {
+ Warning(_("unrecognized CPU identifier `%s'"), id);
+ return;
+ }
+ */
+}
+
+arch_check_id_retval
+x86_check_identifier(unsigned long data[4], const char *id)
+{
+ const char *oid = id;
+ const char *marker;
+ /*!re2c
+ /* target modifiers */
+ N E A R {
+ data[0] = X86_NEAR;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+ S H O R T {
+ data[0] = X86_SHORT;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+ F A R {
+ data[0] = X86_FAR;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+
+ /* operand size overrides */
+ O "16" {
+ data[0] = X86_OPERSIZE;
+ data[1] = 16;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ O "32" {
+ data[0] = X86_OPERSIZE;
+ data[1] = 32;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ /* address size overrides */
+ A "16" {
+ data[0] = X86_ADDRSIZE;
+ data[1] = 16;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ A "32" {
+ data[0] = X86_ADDRSIZE;
+ data[1] = 32;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+
+ /* instruction prefixes */
+ L O C K {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF0;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P N E {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF2;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P N Z {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF2;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF3;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P E {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF4;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P Z {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF4;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+
+ /* control, debug, and test registers */
+ C R [02-4] {
+ data[0] = X86_CRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ D R [0-7] {
+ data[0] = X86_DRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ T R [0-7] {
+ data[0] = X86_TRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+
+ /* floating point, MMX, and SSE/SSE2 registers */
+ S T [0-7] {
+ data[0] = X86_FPUREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ M M [0-7] {
+ data[0] = X86_MMXREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ X M M [0-7] {
+ data[0] = X86_XMMREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+
+ /* integer registers */
+ E A X { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; }
+ E C X { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; }
+ E D X { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; }
+ E B X { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; }
+ E S P { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; }
+ E B P { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; }
+ E S I { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; }
+ E D I { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; }
+
+ A X { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; }
+ C X { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; }
+ D X { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; }
+ B X { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; }
+ S P { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; }
+ B P { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; }
+ S I { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; }
+ D I { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; }
+
+ A L { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; }
+ C L { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; }
+ D L { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; }
+ B L { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; }
+ A H { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; }
+ C H { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; }
+ D H { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; }
+ B H { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; }
+
+ /* segment registers */
+ E S { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; }
+ C S { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; }
+ S S { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; }
+ D S { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; }
+ F S { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; }
+ G S { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; }
+
+ /* instructions */
+
+ /* Move */
+ M O V { RET_INSN(mov, 0, CPU_Any); }
+ /* Move with sign/zero extend */
+ M O V S X { RET_INSN(movszx, 0xBE, CPU_386); }
+ M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); }
+ /* Push instructions */
+ /* P U S H */
+ P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); }
+ P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); }
+ P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); }
+ /* Pop instructions */
+ /* P O P */
+ P O P A { RET_INSN(onebyte, 0x0061, CPU_186); }
+ P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); }
+ P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); }
+ /* Exchange */
+ /* X C H G */
+ /* In/out from ports */
+ /* I N */
+ /* O U T */
+ /* Load effective address */
+ /* L E A */
+ /* Load segment registers from memory */
+ /* L D S */
+ /* L E S */
+ /* L F S */
+ /* L G S */
+ /* L S S */
+ /* Flags register instructions */
+ C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); }
+ C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); }
+ C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); }
+ C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); }
+ C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); }
+ L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); }
+ S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); }
+ P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); }
+ P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); }
+ P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); }
+ P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); }
+ P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); }
+ P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); }
+ S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); }
+ S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); }
+ S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); }
+ /* Arithmetic */
+ /* A D D */
+ /* I N C */
+ /* S U B */
+ /* D E C */
+ /* S B B */
+ /* C M P */
+ /* T E S T */
+ /* A N D */
+ /* O R */
+ /* X O R */
+ /* A D C */
+ /* N E G */
+ /* N O T */
+ A A A { RET_INSN(onebyte, 0x0037, CPU_Any); }
+ A A S { RET_INSN(onebyte, 0x003F, CPU_Any); }
+ D A A { RET_INSN(onebyte, 0x0027, CPU_Any); }
+ D A S { RET_INSN(onebyte, 0x002F, CPU_Any); }
+ /* A A D */
+ /* A A M */
+ /* Conversion instructions */
+ C B W { RET_INSN(onebyte, 0x1098, CPU_Any); }
+ C W D E { RET_INSN(onebyte, 0x2098, CPU_386); }
+ C W D { RET_INSN(onebyte, 0x1099, CPU_Any); }
+ C D Q { RET_INSN(onebyte, 0x2099, CPU_386); }
+ /* Multiplication and division */
+ /* M U L */
+ /* I M U L */
+ /* D I V */
+ /* I D I V */
+ /* Shifts */
+ /* R O L */
+ /* R O R */
+ /* R C L */
+ /* R C R */
+ /* S A L */
+ /* S H L */
+ /* S H R */
+ /* S A R */
+ /* S H L D */
+ /* S H R D */
+ /* Control transfer instructions (unconditional) */
+ /* C A L L */
+ /* J M P */
+ R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); }
+ /* R E T N */
+ /* R E T F */
+ /* E N T E R */
+ L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); }
+ /* Conditional jumps */
+ /* J O */
+ /* J N O */
+ /* J B */
+ /* JC */
+ /* J N A E */
+ /* J N B */
+ /* J N C */
+ /* J A E */
+ /* J E */
+ /* J Z */
+ /* J N E */
+ /* J N Z */
+ /* J B E */
+ /* J N A */
+ /* J N B E */
+ /* J A */
+ /* J S */
+ /* J N S */
+ /* J P */
+ /* J P E */
+ /* J N P */
+ /* J P O */
+ /* J L */
+ /* J N G E */
+ /* J N L */
+ /* J G E */
+ /* J L E */
+ /* J N G */
+ /* J N L E */
+ /* J G */
+ /* J C X Z */
+ /* J E C X Z */
+ /* Loop instructions */
+ /* L O O P */
+ /* L O O P Z */
+ /* L O O P E */
+ /* L O O P N Z */
+ /* L O O P N E */
+ /* Set byte on flag instructions */
+ /* S E T O */
+ /* S E T N O */
+ /* S E T B */
+ /* S E T C */
+ /* S E T N A E */
+ /* S E T N B */
+ /* S E T N C */
+ /* S E T A E */
+ /* S E T E */
+ /* S E T Z */
+ /* S E T N E */
+ /* S E T N Z */
+ /* S E T B E */
+ /* S E T N A */
+ /* S E T N B E */
+ /* S E T A */
+ /* S E T S */
+ /* S E T N S */
+ /* S E T P */
+ /* S E T P E */
+ /* S E T N P */
+ /* S E T P O */
+ /* S E T L */
+ /* S E T N G E */
+ /* S E T N L */
+ /* S E T G E */
+ /* S E T L E */
+ /* S E T N G */
+ /* S E T N L E */
+ /* S E T G */
+ /* String instructions. */
+ C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); }
+ C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); }
+ /* C M P S D */
+ I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); }
+ I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); }
+ I N S D { RET_INSN(onebyte, 0x206D, CPU_386); }
+ O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); }
+ O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); }
+ O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); }
+ L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); }
+ L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); }
+ L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); }
+ M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); }
+ M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); }
+ /* M O V S D */
+ S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); }
+ S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); }
+ S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); }
+ S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); }
+ S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); }
+ S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); }
+ X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); }
+ /* Bit manipulation */
+ /* B S F */
+ /* B S R */
+ /* B T */
+ /* B T C */
+ /* B T R */
+ /* B T S */
+ /* Interrupts and operating system instructions */
+ /* I N T */
+ I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+ I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+ I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); }
+ I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); }
+ I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); }
+ I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); }
+ R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); }
+ /* B O U N D */
+ H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); }
+ N O P { RET_INSN(onebyte, 0x0090, CPU_Any); }
+ /* Protection control */
+ /* A R P L */
+ /* L A R */
+ L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); }
+ L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); }
+ /* L L D T */
+ /* L M S W */
+ /* L S L */
+ /* L T R */
+ S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); }
+ S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); }
+ /* S L D T */
+ /* S M S W */
+ /* S T R */
+ /* V E R R */
+ /* V E R W */
+ /* Floating point instructions */
+ /* F L D */
+ /* F I L D */
+ /* F B L D */
+ /* F S T */
+ /* F I S T */
+ /* F S T P */
+ /* F I S T P */
+ /* F B S T P */
+ /* F X C H */
+ /* F C O M */
+ /* F I C O M */
+ /* F C O M P */
+ /* F I C O M P */
+ F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); }
+ /* F U C O M */
+ /* F U C O M P */
+ F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); }
+ F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); }
+ F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); }
+ F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); }
+ F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); }
+ F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); }
+ F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); }
+ F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); }
+ F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); }
+ F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); }
+ /* F A D D */
+ /* F A D D P */
+ /* F I A D D */
+ /* F S U B */
+ /* F I S U B */
+ /* F S U B P */
+ /* F S U B R */
+ /* F I S U B R */
+ /* F S U B R P */
+ /* F M U L */
+ /* F I M U L */
+ /* F M U L P */
+ /* F D I V */
+ /* F I D I V */
+ /* F D I V P */
+ /* F D I V R */
+ /* F I D I V R */
+ /* F D I V R P */
+ F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); }
+ F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); }
+ F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); }
+ F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); }
+ F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); }
+ F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); }
+ F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); }
+ F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); }
+ F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); }
+ F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); }
+ F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); }
+ F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); }
+ F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); }
+ F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); }
+ F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); }
+ F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); }
+ F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); }
+ F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); }
+ F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); }
+ F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); }
+ /* F L D C W */
+ /* F N S T C W */
+ /* F S T C W */
+ /* F N S T S W */
+ /* F S T S W */
+ F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); }
+ F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); }
+ F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); }
+ F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); }
+ F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); }
+ F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); }
+ F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); }
+ F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); }
+ /* F F R E E */
+ /* F F R E E P */
+ F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); }
+ F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); }
+ /* Prefixes (should the others be here too? should wait be a prefix? */
+ W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); }
+ /* 486 extensions */
+ /* B S W A P */
+ /* X A D D */
+ /* C M P X C H G */
+ /* C M P X C H G 4 8 6 */
+ I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); }
+ W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); }
+ I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); }
+ /* 586+ and late 486 extensions */
+ C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); }
+ /* Pentium extensions */
+ W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); }
+ R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); }
+ R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); }
+ /* C M P X C H G 8 B */
+ /* Pentium II/Pentium Pro extensions */
+ S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); }
+ S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); }
+ F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); }
+ F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); }
+ R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); }
+ U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); }
+ U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); }
+ /* C M O V */
+ /* F C M O V */
+ /* F C O M I */
+ /* F U C O M I */
+ /* F C O M I P */
+ /* F U C O M I P */
+ /* Pentium4 extensions */
+ /* M O V N T I */
+ /* C L F L U S H */
+ L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); }
+ M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); }
+ P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); }
+ /* MMX/SSE2 instructions */
+ E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); }
+ /* PIII (Katmai) new instructions / SIMD instructions */
+ /* ... */
+ P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); }
+ P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); }
+ P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); }
+ P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); }
+ /* ... */
+ S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); }
+ /* ... */
+ /* SSE2 instructions */
+ /* AMD 3DNow! instructions */
+ P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+ P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+ F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); }
+ /* ... */
+ /* AMD extensions */
+ S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); }
+ S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); }
+ /* Cyrix MMX instructions */
+ /* Cyrix extensions */
+ R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); }
+ /* R S D C */
+ /* R S L D T */
+ /* R S T S */
+ /* S V D C */
+ /* S V L D T */
+ /* S V T S */
+ S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); }
+ S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); }
+ W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); }
+ /* Obsolete/undocumented instructions */
+ F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); }
+ /* I B T S */
+ L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); }
+ L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); }
+ S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); }
+ S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); }
+ /* U M O V */
+ /* X B T S */
+
+
+ /* catchalls */
+ [A-Za-z0-9]+ {
+ return ARCH_CHECK_ID_NONE;
+ }
+ any {
+ return ARCH_CHECK_ID_NONE;
+ }
+ */
+}
YASMPARSERFILES += \
src/parsers/nasm/nasm-parser.c \
src/parsers/nasm/nasm-defs.h \
- nasm-bison.y \
+ src/parsers/nasm/nasm-bison.y \
nasm-bison.h \
- nasm-token.l
-
-if DEV
-
-nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl
- $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y
-
-else
-
-nasm-token.l: $(srcdir)/nasm-token.l
- @echo Warning: Not generating nasm-token.l from nasm-token.l.in.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-token.l .
-nasm-token.c: $(srcdir)/nasm-token.c
- @echo Warning: Not generating nasm-token.c from nasm-token.l.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-token.c .
-nasm-bison.y: $(srcdir)/nasm-bison.y
- @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.y .
-nasm-bison.c: $(srcdir)/nasm-bison.c
- @echo Warning: Not generating nasm-bison.c from nasm-bison.y.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.c .
-nasm-bison.h: $(srcdir)/nasm-bison.h
- @echo Warning: Not generating nasm-bison.h from nasm-bison.y.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.h .
-
-endif
+ nasm-token.c
-noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl
+nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+ re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
BUILT_SOURCES += \
- nasm-bison.y \
nasm-bison.c \
nasm-bison.h \
- nasm-token.l \
nasm-token.c
CLEANFILES += \
- nasm-bison.y \
nasm-bison.c \
nasm-bison.h \
- nasm-token.l \
nasm-token.c
EXTRA_DIST += \
- src/parsers/nasm/token.l.in \
- src/parsers/nasm/bison.y.in \
- src/parsers/nasm/gen_instr.pl
+ src/parsers/nasm/nasm-token.re
+++ /dev/null
-/*
- * NASM-compatible bison parser
- *
- * Copyright (C) 2001 Peter Johnson, Michael Urman
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#ifdef STDC_HEADERS
-# include <math.h>
-#endif
-
-#include "bitvect.h"
-
-#include "globals.h"
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-#include "section.h"
-#include "objfmt.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-
-void init_table(void);
-extern int nasm_parser_lex(void);
-void nasm_parser_error(const char *);
-static void nasm_parser_directive(const char *name,
- valparamhead *valparams,
- /*@null@*/ valparamhead *objext_valparams);
-
-extern objfmt *nasm_parser_objfmt;
-extern sectionhead nasm_parser_sections;
-extern section *nasm_parser_cur_section;
-extern char *nasm_parser_locallabel_base;
-
-static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
-static bytecode *nasm_parser_temp_bc;
-
-/* additional data declarations (dynamically generated) */
-/* @DATADECLS@ */
-
-/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/
-%}
-
-%union {
- unsigned int int_info;
- char *str_val;
- intnum *intn;
- floatnum *flt;
- symrec *sym;
- unsigned char groupdata[5];
- effaddr *ea;
- expr *exp;
- immval *im_val;
- x86_targetval tgt_val;
- datavalhead datahead;
- dataval *data;
- bytecode *bc;
- valparamhead dir_valparams;
- valparam *dir_valparam;
-}
-
-%token <intn> INTNUM
-%token <flt> FLTNUM
-%token <str_val> DIRECTIVE_NAME STRING FILENAME
-%token <int_info> BYTE WORD DWORD QWORD TWORD DQWORD
-%token <int_info> DECLARE_DATA
-%token <int_info> RESERVE_SPACE
-%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
-%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
-%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
-%token <str_val> ID LOCAL_ID SPECIAL_ID
-%token LINE
-
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
-%type <sym> explabel
-%type <str_val> label_id
-%type <tgt_val> target
-%type <data> dataval
-%type <datahead> datavals
-%type <dir_valparams> directive_valparams
-%type <dir_valparam> directive_valparam
-
-%left '|'
-%left '^'
-%left '&'
-%left LEFT_OP RIGHT_OP
-%left '-' '+'
-%left '*' '/' SIGNDIV '%' SIGNMOD
-%nonassoc UNARYOP
-
-%%
-input: /* empty */
- | input line {
- nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
- $2);
- if (nasm_parser_temp_bc)
- nasm_parser_prev_bc = nasm_parser_temp_bc;
- line_index++;
- }
-;
-
-line: '\n' { $$ = (bytecode *)NULL; }
- | lineexp '\n'
- | LINE INTNUM '+' INTNUM FILENAME '\n' {
- /* %line indicates the line number of the *next* line, so subtract out
- * the increment when setting the line number.
- */
- line_set($5, intnum_get_uint($2)-intnum_get_uint($4),
- intnum_get_uint($4));
- intnum_delete($2);
- intnum_delete($4);
- xfree($5);
- $$ = (bytecode *)NULL;
- }
- | directive '\n' { $$ = (bytecode *)NULL; }
- | error '\n' {
- Error(_("label or instruction expected at start of line"));
- $$ = (bytecode *)NULL;
- yyerrok;
- }
-;
-
-lineexp: exp
- | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); }
- | label { $$ = (bytecode *)NULL; }
- | label exp { $$ = $2; }
- | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); }
- | label_id EQU expr {
- symrec_define_equ($1, $3);
- xfree($1);
- $$ = (bytecode *)NULL;
- }
-;
-
-exp: instr
- | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); }
- | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); }
- | INCBIN STRING { $$ = bc_new_incbin($2, NULL, NULL); }
- | INCBIN STRING ',' expr { $$ = bc_new_incbin($2, $4, NULL); }
- | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); }
-;
-
-datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); }
- | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; }
-;
-
-dataval: expr_no_string { $$ = dv_new_expr($1); }
- | STRING { $$ = dv_new_string($1); }
- | error {
- Error(_("expression syntax error"));
- $$ = (dataval *)NULL;
- }
-;
-
-label: label_id {
- symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
- 1);
- xfree($1);
- }
- | label_id ':' {
- symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
- 1);
- xfree($1);
- }
-;
-
-label_id: ID {
- $$ = $1;
- if (nasm_parser_locallabel_base)
- xfree(nasm_parser_locallabel_base);
- nasm_parser_locallabel_base = xstrdup($1);
- }
- | SPECIAL_ID
- | LOCAL_ID
-;
-
-/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']' {
- xfree($2);
- }
- | '[' DIRECTIVE_NAME error ']' {
- Error(_("invalid arguments to [%s]"), $2);
- xfree($2);
- }
-;
-
- /* $<str_val>0 is the DIRECTIVE_NAME */
- /* After : is (optional) object-format specific extension */
-directive_val: directive_valparams {
- nasm_parser_directive($<str_val>0, &$1, NULL);
- }
- | directive_valparams ':' directive_valparams {
- nasm_parser_directive($<str_val>0, &$1, &$3);
- }
-;
-
-directive_valparams: directive_valparam {
- vps_initialize(&$$);
- vps_append(&$$, $1);
- }
- | directive_valparams directive_valparam {
- vps_append(&$1, $2);
- $$ = $1;
- }
-;
-
-directive_valparam: direxpr {
- /* If direxpr is just an ID, put it in val and delete the expr */
- const /*@null@*/ symrec *vp_symrec;
- if ((vp_symrec = expr_get_symrec(&$1, 0))) {
- vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL);
- expr_delete($1);
- } else
- vp_new($$, NULL, $1);
- }
- | ID '=' direxpr { vp_new($$, $1, $3); }
-;
-
-/* register groupings */
-fpureg: ST0
- | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
- | DWORD reg_eax { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
- | DWORD reg_ecx { $$ = $2; }
-;
-
-rawreg32: REG_EAX
- | REG_ECX
- | REG_EDX
- | REG_EBX
- | REG_ESP
- | REG_EBP
- | REG_ESI
- | REG_EDI
-;
-
-reg32: rawreg32
- | DWORD reg32 { $$ = $2; }
-;
-
-reg_ax: REG_AX
- | WORD reg_ax { $$ = $2; }
-;
-
-reg_cx: REG_CX
- | WORD reg_cx { $$ = $2; }
-;
-
-reg_dx: REG_DX
- | WORD reg_dx { $$ = $2; }
-;
-
-rawreg16: REG_AX
- | REG_CX
- | REG_DX
- | REG_BX
- | REG_SP
- | REG_BP
- | REG_SI
- | REG_DI
-;
-
-reg16: rawreg16
- | WORD reg16 { $$ = $2; }
-;
-
-reg_al: REG_AL
- | BYTE reg_al { $$ = $2; }
-;
-
-reg_cl: REG_CL
- | BYTE reg_cl { $$ = $2; }
-;
-
-reg8: REG_AL
- | REG_CL
- | REG_DL
- | REG_BL
- | REG_AH
- | REG_CH
- | REG_DH
- | REG_BH
- | BYTE reg8 { $$ = $2; }
-;
-
-reg_es: REG_ES
- | WORD reg_es { $$ = $2; }
-;
-
-reg_ss: REG_SS
- | WORD reg_ss { $$ = $2; }
-;
-
-reg_ds: REG_DS
- | WORD reg_ds { $$ = $2; }
-;
-
-reg_fs: REG_FS
- | WORD reg_fs { $$ = $2; }
-;
-
-reg_gs: REG_GS
- | WORD reg_gs { $$ = $2; }
-;
-
-reg_cs: REG_CS
- | WORD reg_cs { $$ = $2; }
-;
-
-segreg: REG_ES
- | REG_SS
- | REG_DS
- | REG_FS
- | REG_GS
- | REG_CS
- | WORD segreg { $$ = $2; }
-;
-
-/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated? This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg. I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); }
- | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' memexpr %prec UNARYOP { $$ = $2; }
- | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' memexpr ')' { $$ = $2; }
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
- | error { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr {
- $$ = x86_ea_new_expr($1);
- x86_ea_set_segment($$, 0);
- }
- | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); }
- | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); }
- | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); }
- | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); }
- | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); }
- | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); }
- | BYTE memaddr { $$ = $2; ea_set_len($$, 1); }
- | WORD memaddr { $$ = $2; ea_set_len($$, 2); }
- | DWORD memaddr { $$ = $2; ea_set_len($$, 4); }
- | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); }
-;
-
-mem: '[' memaddr ']' { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem { $$ = $2; }
- | BYTE mem8x { $$ = $2; }
-;
-mem16x: WORD mem { $$ = $2; }
- | WORD mem16x { $$ = $2; }
-;
-mem32x: DWORD mem { $$ = $2; }
- | DWORD mem32x { $$ = $2; }
-;
-mem64x: QWORD mem { $$ = $2; }
- | QWORD mem64x { $$ = $2; }
-;
-mem80x: TWORD mem { $$ = $2; }
- | TWORD mem80x { $$ = $2; }
-;
-mem128x: DQWORD mem { $$ = $2; }
- | DQWORD mem128x { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem { $$ = $2; }
- | FAR memfar { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
- | mem8x
-;
-mem16: mem
- | mem16x
-;
-mem32: mem
- | mem32x
-;
-mem64: mem
- | mem64x
-;
-mem80: mem
- | mem80x
-;
-mem128: mem
- | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
- | mem16x
- | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8x
-;
-rm16x: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16x
-;
-rm32x: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32x
-;
-/* not needed:
-rm64x: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8
-;
-rm16: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16
-;
-rm32: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32
-;
-rm64: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64
-;
-rm128: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128
-;
-
-/* immediate values */
-imm: expr { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm { $$ = $2; }
-;
-imm16x: WORD imm { $$ = $2; }
-;
-imm32x: DWORD imm { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
- | imm8x
-;
-imm16: imm
- | imm16x
-;
-imm32: imm
- | imm32x
-;
-
-/* jump targets */
-target: expr {
- $$.val = $1;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
- }
- | SHORT target {
- $$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
- }
- | NEAR target {
- $$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
- }
-;
-
-/* expression trees */
-
-/* expr w/o FLTNUM and unary + and -, for use in directives */
-direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | ID {
- $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0)));
- xfree($1);
- }
- | direxpr '|' direxpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | direxpr '^' direxpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- | direxpr '&' direxpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- | direxpr LEFT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | direxpr '+' direxpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | direxpr '-' direxpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | direxpr '*' direxpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | direxpr '/' direxpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | direxpr SIGNDIV direxpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | direxpr '%' direxpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | direxpr SIGNMOD direxpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' direxpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' direxpr ')' { $$ = $2; }
-;
-
-expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' expr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | expr '&' expr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| expr '==' expr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| expr '>' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| expr '<' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| expr '>=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| expr '<=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| expr '!=' expr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | expr LEFT_OP expr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | expr RIGHT_OP expr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | expr '+' expr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | expr '-' expr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | expr '*' expr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | expr '/' expr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | expr SIGNDIV expr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | expr '%' expr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | expr SIGNMOD expr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' expr %prec UNARYOP { $$ = $2; }
- | '-' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' expr ')' { $$ = $2; }
-;
-
-expr: expr_no_string
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
-;
-
-explabel: ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | SPECIAL_ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | LOCAL_ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | '$' {
- $$ = symrec_define_label("$", nasm_parser_cur_section,
- nasm_parser_prev_bc, 0);
- }
- | START_SECTION_ID {
- if (section_is_absolute(nasm_parser_cur_section)) {
- Error(_("`$$' is not valid within an ABSOLUTE section"));
- YYERROR;
- } else {
- const char *ss_name = section_get_name(nasm_parser_cur_section);
- assert(ss_name != NULL);
- $$ = symrec_use(ss_name);
- }
- }
-;
-
-instr: /* empty */ {
- idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
- $$ = x86_bc_new_insn(&idata);
- }
- | instrbase
- | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
- | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
- | REG_CS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
- }
- | REG_SS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
- }
- | REG_DS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
- }
- | REG_ES instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
- }
- | REG_FS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
- }
- | REG_GS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
- }
- | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
- | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
- | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
- | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
-%%
-/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
-
-static void
-nasm_parser_directive(const char *name, valparamhead *valparams,
- valparamhead *objext_valparams)
-{
- valparam *vp, *vp2;
- const intnum *intn;
- long lval;
-
- assert(cur_objfmt != NULL);
-
- /* Handle (mostly) output-format independent directives here */
- if (strcasecmp(name, "extern") == 0) {
- vp = vps_first(valparams);
- if (vp->val)
- symrec_declare(vp->val, SYM_EXTERN,
- cur_objfmt->extern_data_new(vp->val,
- objext_valparams));
- else
- Error(_("invalid argument to [%s]"), "EXTERN");
- } else if (strcasecmp(name, "global") == 0) {
- vp = vps_first(valparams);
- if (vp->val)
- symrec_declare(vp->val, SYM_GLOBAL,
- cur_objfmt->global_data_new(vp->val,
- objext_valparams));
- else
- Error(_("invalid argument to [%s]"), "GLOBAL");
- } else if (strcasecmp(name, "common") == 0) {
- vp = vps_first(valparams);
- if (vp->val) {
- vp2 = vps_next(vp);
- if (!vp2 || (!vp2->val && !vp2->param))
- Error(_("no size specified in %s declaration"), "COMMON");
- else {
- if (vp2->val)
- symrec_declare(vp->val, SYM_COMMON,
- cur_objfmt->common_data_new(vp->val,
- expr_new_ident(ExprSym(symrec_use(vp2->val))),
- objext_valparams));
- else if (vp2->param) {
- symrec_declare(vp->val, SYM_COMMON,
- cur_objfmt->common_data_new(vp->val, vp2->param,
- objext_valparams));
- vp2->param = NULL;
- }
- }
- } else
- Error(_("invalid argument to [%s]"), "COMMON");
- } else if (strcasecmp(name, "section") == 0 ||
- strcasecmp(name, "segment") == 0) {
- section *new_section =
- cur_objfmt->sections_switch(&nasm_parser_sections, valparams,
- objext_valparams);
- if (new_section) {
- nasm_parser_cur_section = new_section;
- nasm_parser_prev_bc = (bytecode *)NULL;
- } else
- Error(_("invalid argument to [%s]"), "SECTION");
- } else if (strcasecmp(name, "absolute") == 0) {
- /* it can be just an ID or a complete expression, so handle both. */
- vp = vps_first(valparams);
- if (vp->val)
- nasm_parser_cur_section =
- sections_switch_absolute(&nasm_parser_sections,
- expr_new_ident(ExprSym(symrec_use(vp->val))));
- else if (vp->param) {
- nasm_parser_cur_section =
- sections_switch_absolute(&nasm_parser_sections, vp->param);
- vp->param = NULL;
- }
- nasm_parser_prev_bc = (bytecode *)NULL;
- } else if (strcasecmp(name, "bits") == 0) {
- if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
- (intn = expr_get_intnum(&vp->param)) != NULL &&
- (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
- x86_mode_bits = (unsigned char)lval;
- else
- Error(_("invalid argument to [%s]"), "BITS");
- } else if (cur_objfmt->directive(name, valparams, objext_valparams,
- &nasm_parser_sections)) {
- Error(_("unrecognized directive [%s]"), name);
- }
-
- vps_delete(valparams);
- if (objext_valparams)
- vps_delete(objext_valparams);
-}
-
-void
-nasm_parser_error(const char *s)
-{
- ParserError(s);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-# $IdPath$
-# Generates NASM-compatible bison.y and token.l from instrs.dat.
-#
-# Copyright (C) 2001 Michael Urman
-#
-# This file is part of YASM.
-#
-# YASM is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# YASM is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-
-use strict;
-use Getopt::Long;
-my $VERSION = "0.0.1";
-
-# useful constants for instruction arrays
-# common
-use constant INST => 0;
-use constant OPERANDS => 1;
-# general format
-use constant OPSIZE => 2;
-use constant OPCODE => 3;
-use constant EFFADDR => 4;
-use constant IMM => 5;
-use constant CPU => 6;
-# relative target format
-use constant ADSIZE => 2;
-use constant SHORTOPCODE => 3;
-use constant NEAROPCODE => 4;
-use constant SHORTCPU => 5;
-use constant NEARCPU => 6;
-
-use constant TOO_MANY_ERRORS => 20;
-
-# default options
-my $instrfile = 'instrs.dat';
-my $tokenfile = 'token.l';
-my $tokensource;
-my $grammarfile = 'bison.y';
-my $grammarsource;
-my $showversion;
-my $showusage;
-my $dry_run;
-
-# allow overrides
-my $gotopts = GetOptions ( 'input=s' => \$instrfile,
- 'token=s' => \$tokenfile,
- 'sourcetoken=s' => \$tokensource,
- 'grammar=s' => \$grammarfile,
- 'sourcegrammar=s' => \$grammarsource,
- 'version' => \$showversion,
- 'n|dry-run' => \$dry_run,
- 'help|usage' => \$showusage,
- );
-
-&showusage and exit 1 unless $gotopts;
-&showversion if $showversion;
-&showusage if $showusage;
-exit 0 if $showversion or $showusage;
-
-# valid values for instrs.dat fields
-my $valid_regs = join '|', qw(
- reg_al reg_ah reg_ax reg_eax
- reg_bl reg_bh reg_bx reg_ebx
- reg_cl reg_ch reg_cx reg_ecx
- reg_dl reg_dh reg_dx reg_edx
- reg_si reg_esi reg_di reg_edi
- reg_bp reg_ebp
- reg_cs reg_ds reg_es reg_fs reg_gs reg_ss
- ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG
- fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm
- imm8 imm16 imm32 imm64 imm80 imm128
- imm8x imm16x imm32x imm64x imm80x imm128x
- rm8 rm16 rm32 rm1632 rm64 rm80 rm128
- rm8x rm16x rm32x rm1632x rm64x rm80x rm128x
- reg8 reg16 reg32 reg1632 reg64 reg80 reg128
- reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
- mem8 mem16 mem32 mem1632 mem64 mem80 mem128
- mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
- target memfar
-);
-my $valid_opcodes = join '|', qw(
- [0-9A-F]{2}
- \\$0\\.\\d
-);
-my $valid_cpus = join '|', qw(
- 8086 186 286 386 486 P4 P5 P6
- FPU MMX KATMAI SSE SSE2
- AMD ATHLON 3DNOW
- SMM
- CYRIX
- UNDOC OBS PRIV PROT
- @0 @1
-);
-
-# track errors and warnings rather than die'ing on the first.
-my (@messages, $errcount, $warncount);
-sub die_with_errors (@)
-{
- foreach (@_) { print; };
- if ($errcount)
- {
- print "Dying with errors\n";
- exit -1;
- }
-}
-
-my ($groups) = &read_instructions ($instrfile);
-
-die_with_errors @messages;
-
-exit 0 if $dry_run; # done with simple verification, so exit
-
-unless ($dry_run)
-{
- &output_lex ($tokenfile, $tokensource, $groups);
- &output_yacc ($grammarfile, $grammarsource, $groups);
-}
-
-# print version for --version, etc.
-sub showversion
-{
- print "YASM gen_instr.pl $VERSION\n";
-}
-
-# print usage information for --help, etc.
-sub showusage
-{
- print <<"EOF";
-Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile]
- -i, --input instructions file (default: $instrfile)
- -t, --token token output file (default: $tokenfile)
- -st, --sourcetoken token input file (default: $tokenfile.in)
- -g, --grammar grammar output file (default: $grammarfile)
- -sg, --sourcegrammar grammar input file (default: $grammarfile.in)
- -v, --version show version and exit
- -h, --help, --usage show this message and exit
- -n, --dry-run verify input file without writing output files
-EOF
-}
-
-# read in instructions, and verify they're valid (well, mostly)
-sub read_instructions ($)
-{
- my $instrfile = shift || die;
- open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n";
- my %instr;
- my %groups;
-
- sub add_group_rule ($$$$)
- {
- my ($inst, $args, $groups, $instrfile) = splice @_;
-
- # slide $0.\d down by one.
- # i still say changing instrs.dat would be better ;)
- $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
-
- # detect relative target format by looking for "target" in args
- if($args =~ m/target/oi)
- {
- my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
- split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
- die "Invalid Address Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Short Opcode\n"
- if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
- die "Invalid Near Opcode\n"
- if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
- die "Invalid Short CPU\n"
- if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- die "Invalid Near CPU\n"
- if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
- } else {
- my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
- die "Invalid Operation Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Opcode\n"
- if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
- die "Invalid Effective Address\n"
- if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
- die "Invalid Immediate Operand\n"
- if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
- die "Invalid CPU\n"
- if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
- }
- }
-
- sub add_group_member ($$$$$)
- {
- my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_;
-
- my ($inst, $group) = split /!/, $handle;
- my ($args, $cpu) = split /\t+/, $fullargs;
- eval {
- die "Invalid instruction name\n"
- if $inst !~ m/^\w+$/o;
- die "Invalid group name\n"
- if $group !~ m/^\w+$/o;
- die "Invalid CPU\n"
- if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n"
- unless exists $groups->{$group};
- $warncount++;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- # only allow multiple instances of instructions that aren't of a group
- push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++
- if exists $instr->{$inst} and not exists $groups->{$inst};
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu];
- $instr->{$inst} = 1;
- }
-
- while (<INPUT>)
- {
- chomp;
- next if /^\s*(?:;.*)$/;
-
- my ($handle, $args) = split /\t+/, $_, 2;
-
- # pseudo hack to handle original style instructions (no group)
- if ($handle =~ m/^\w+$/)
- {
- # TODO: this has some long ranging effects, as the eventual
- # bison rules get tagged <groupdata> when they don't need
- # to, etc. Fix this sometime.
- add_group_rule ("!$handle", $args, \%groups, $instrfile);
- add_group_member ("$handle!$handle", "", \%groups, \%instr,
- $instrfile);
- }
- elsif ($handle =~ m/^!\w+$/)
- {
- add_group_rule ($handle, $args, \%groups, $instrfile);
- }
- elsif ($handle =~ m/^\w+!\w+$/)
- {
- add_group_member ($handle, $args, \%groups, \%instr,
- $instrfile);
- }
- # TODO: consider if this is necessary: Pete?
- # (add_group_member_synonym is -not- implemented)
- #elsif ($handle =~ m/^:\w+$/)
- #{
- # add_group_member_synonym ($handle, $args);
- #}
- }
- close INPUT;
- return (\%groups);
-}
-
-sub output_lex ($@)
-{
- my $tokenfile = shift or die;
- my $tokensource = shift;
- $tokensource ||= "$tokenfile.in";
- my $groups = shift or die;
-
- open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n";
- open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n";
- while (<IN>)
- {
- # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content
- if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
- {
- foreach my $grp (sort keys %$groups)
- {
- my %printed;
- my $group = $grp; $group =~ s/^!//;
-
- foreach my $grp (@{$groups->{$grp}{members}})
- {
- unless (exists $printed{$grp->[0]})
- {
- $printed{$grp->[0]} = 1;
- my @groupdata;
- if ($grp->[2])
- {
- @groupdata = split ",", $grp->[2];
- for (my $i=0; $i < @groupdata; ++$i)
- {
- $groupdata[$i] =~ s/nil/0/;
- $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];";
- }
- $groupdata[-1] .= "\n\t ";
- }
- printf TOKEN "%-12s{%s return %-20s }\n",
- $grp->[0],
- (join "\n\t ", @groupdata),
- "\Ugrp_$group;\E";
- # TODO: change appropriate GRP_FOO back to
- # INS_FOO's. not functionally important;
- # just pedantically so.
- }
- }
- }
- }
- else
- {
- print TOKEN $_;
- }
- }
- close IN;
- close TOKEN;
-}
-
-# helper functions for yacc output
-sub rule_header ($ $ $)
-{
- my ($rule, $tokens, $count) = splice (@_);
- $count ? " | $tokens {\n" : "$rule: $tokens {\n";
-}
-sub rule_footer ()
-{
- return " }\n";
-}
-
-sub cond_action_if ( $ $ $ $ $ $ $ )
-{
- my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
- return rule_header ($rule, $tokens, $count) . <<"EOF";
- if (\$$regarg == $val) {
- @$a_eax
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action_elsif ( $ $ $ $ )
-{
- my ($regarg, $val, $func, $a_eax) = splice (@_);
- return <<"EOF";
- else if (\$$regarg == $val) {
- @$a_eax
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action_else ( $ $ )
-{
- my ($func, $a_args) = splice (@_);
- return <<"EOF" . rule_footer;
- else {
- @$a_args
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action ( $ $ $ $ $ $ $ $ )
-{
- my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args)
- = splice (@_);
- return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func,
- $a_eax) . cond_action_else ($func, $a_args);
-}
-
-#sub action ( $ $ $ $ $ )
-sub action ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . rule_footer;
-}
-
-sub action_setshiftflag ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . " x86_bc_insn_set_shift_flag(\$\$);\n"
- . rule_footer;
-}
-
-sub action_setjrshort ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " if (\$2.op_sel == JR_NONE)\n"
- . " \$2.op_sel = JR_SHORT;\n"
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . rule_footer;
-}
-
-sub get_token_number ( $ $ )
-{
- my ($tokens, $str) = splice @_;
- $tokens =~ s/$str.*/x/; # hold its place
- my @f = split /\s+/, $tokens;
- return scalar @f;
-}
-
-sub output_yacc ($@)
-{
- my $grammarfile = shift or die;
- my $grammarsource = shift;
- $grammarsource ||= "$grammarfile.in";
- my $groups = shift or die;
-
- open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n";
- open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n";
-
- while (<IN>)
- {
- if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
- {
- print GRAMMAR "static x86_new_insn_data idata;\n";
- print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
- }
- elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
- {
- my $len = length("%token <groupdata>");
- print GRAMMAR "%token <groupdata>";
- foreach my $group (sort keys %$groups)
- {
- if ($len + length("GRP_$group") < 76)
- {
- print GRAMMAR " GRP_\U$group\E";
- $len += length(" GRP_$group");
- }
- else
- {
- print GRAMMAR "\n%token <groupdata> GRP_\U$group\E";
- $len = length("%token <groupdata> GRP_$group");
- }
- }
- print GRAMMAR "\n";
- }
- elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/})
- {
- my $len = length("%type <bc>");
- print GRAMMAR "%type <bc>";
- foreach my $group (sort keys %$groups)
- {
- if ($len + length($group) < 76)
- {
- print GRAMMAR " $group";
- $len += length(" $group");
- }
- else
- {
- print GRAMMAR "\n%type <bc> $group";
- $len = length("%type <bc> $group");
- }
- }
- print GRAMMAR "\n";
- }
- elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
- {
- # list every kind of instruction that instrbase can be
- print GRAMMAR "instrbase: ",
- join( "\n | ", sort keys %$groups), "\n;\n";
-
- my ($ONE, $AL, $AX, $EAX); # need the outer scope
- my (@XCHG_AX, @XCHG_EAX);
-
- # list the arguments and actions (buildbc)
- #foreach my $instrname (sort keys %$instrlist)
- foreach my $group (sort keys %$groups)
- {
- # I'm still convinced this is a hack. The idea is if
- # within an instruction we see certain versions of the
- # opcodes with ONE, or reg_e?a[lx],imm(8|16|32). If we
- # do, defer generation of the action, as we may need to
- # fold it into another version with a conditional to
- # generate the more efficient variant of the opcode
- # BUT, if we don't fold it in, we have to generate the
- # original version we would have otherwise.
- ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0);
- # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax).
- (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
- my $count = 0;
- foreach my $inst (@{$groups->{$group}{rules}}) {
- if($inst->[OPERANDS] =~ m/target/oi)
- {
- # relative target format
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]"
- if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- $tokens =~ s/:/ ':' /g;
- my $datastruct = "x86_new_jmprel_data";
- my $datastructname = "jrdata";
- my $func = "x86_bc_new_jmprel(&$datastructname)";
-
- # Create the argument list for bytecode_new
- my @args;
-
- # Target argument: HACK: Always assumed to be arg 1.
- push @args, 'target=&$2;';
-
- # test for short opcode "nil"
- if($inst->[SHORTOPCODE] =~ m/nil/)
- {
- push @args, 'short_op_len=0;';
- }
- else
- {
- my @opcodes;
- # Check for possible length parameter
- if($inst->[SHORTOPCODE] =~ m/\?/)
- {
- my @pieces = split /\?/, $inst->[SHORTOPCODE];
- push @args, "short_op_len=".$pieces[0].";";
- # opcode piece 1 (and 2 and 3 if attached)
- @opcodes = split ",", $pieces[1];
- }
- else
- {
- # opcode piece 1 (and 2 and 3 if attached)
- @opcodes = split ",", $inst->[SHORTOPCODE];
- # number of bytes of short opcode
- push @args, "short_op_len=".@opcodes.";";
- }
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
- push @args, "short_op[$i]=$opcodes[$i];";
- }
- }
-
- # test for near opcode "nil"
- if($inst->[NEAROPCODE] =~ m/nil/)
- {
- push @args, 'near_op_len=0;';
- }
- else
- {
- # opcode piece 1 (and 2 and 3 if attached)
- my @opcodes = split ",", $inst->[NEAROPCODE];
- # number of bytes of near opcode
- push @args, "near_op_len=".@opcodes.";";
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
- push @args, "near_op[$i]=$opcodes[$i];";
- }
- }
-
- # address size
- push @args, "addrsize=$inst->[ADSIZE];";
- $args[-1] =~ s/nil/0/;
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # and add the data structure reference
- s/^/$datastructname./g foreach (@args);
-
- if ($args[0] =~ m/\&\$/)
- {
- $args[0] = '/*@-immediatetrans@*/' . $args[0] .
- '/*@=immediatetrans@*/';
- }
-
- # generate the grammar
- # Specialcase jcc to set op_sel=JR_SHORT.
- if ($rule =~ m/jcc/)
- {
- print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++);
- }
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
- }
- }
- else
- {
- # general instruction format
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]"
- if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- $tokens =~ s/:/ ':' /g;
- # offset args
- my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
- my $datastruct = "x86_new_insn_data";
- my $datastructname = "idata";
- my $func = "x86_bc_new_insn(&$datastructname)";
-
- # Create the argument list for bytecode_new
- my @args;
-
- # operand size
- push @args, "opersize=$inst->[OPSIZE];";
- $args[-1] =~ s/nil/0/;
-
-
- # opcode piece 1 (and 2 and 3 if attached)
- my @opcodes = split ",", $inst->[OPCODE];
- # number of bytes of opcodes
- push @args, "op_len=".@opcodes.";";
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
- push @args, "op[$i]=$opcodes[$i];";
- }
-
- # effective addresses
- my $effaddr = $inst->[EFFADDR];
- $effaddr =~ s/^nil/NULL,0/;
- $effaddr =~ s/nil/0/;
- # don't let a $0.\d match slip into the following rules.
- $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
- $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
- $effaddr =~ s[(\$\d+)i,\s*(\d+)]
- ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
-
- die $effaddr if $effaddr =~ m/\d+[ri]/;
-
- my @effaddr_split = split ',', $effaddr;
- $effaddr_split[0] =~ s/\^/,/;
- push @args, "ea=$effaddr_split[0];";
- if ($effaddr_split[0] !~ m/NULL/)
- {
- push @args, "spare=$effaddr_split[1];";
- }
-
- # immediate sources
- my $imm = $inst->[IMM];
- $imm =~ s/nil/NULL,0/;
- # don't match $0.\d in the following rules.
- $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $imm =~ s[^([0-9A-Fa-f]+),]
- [imm_new_int(0x$1),];
- $imm =~ s[^\$0.(\d+),]
- [imm_new_int((unsigned long)\$1\[$1\]),];
-
- # divide the second, and only the second, by 8 bits/byte
- $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
- $imm .= ($3||'') eq 's' ? ',1' : ',0';
-
- die $imm if $imm =~ m/\d+s/;
-
- my @imm_split = split ",", $imm;
- push @args, "imm=$imm_split[0];";
- if ($imm_split[0] !~ m/NULL/)
- {
- push @args, "im_len=$imm_split[1];";
- push @args, "im_sign=$imm_split[2];";
- }
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # and add the data structure reference
- s/^/$datastructname./g foreach (@args);
-
- # see if we match one of the cases to defer
- if (($inst->[OPERANDS]||"") =~ m/,ONE/)
- {
- $ONE = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/)
- {
- $AL = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/)
- {
- $AX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/)
- {
- $EAX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/)
- {
- $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/)
- {
- $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/)
- {
- $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/)
- {
- $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
- }
-
- # or if we've deferred and we match the folding version
- elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
- {
- $ONE->[4] = 1;
- # Output a normal version except imm8 -> imm8x
- # (BYTE override always makes longer version, and
- # we don't want to conflict with the imm version
- # we output right after this one.
- $tokens =~ s/imm8/imm8x/;
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-
- # Now output imm version, with second opcode byte
- # set to ,1 opcode. Also call SetInsnShiftFlag().
- $tokens =~ s/imm8x/imm/;
- my $oneval = $ONE->[3]->[2];
- $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg;
- push @args, $oneval;
- print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
- }
- elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
- {
- $AL->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg8");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
- }
- elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/)
- {
- $AX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg16");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
- }
- elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/)
- {
- $EAX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg32");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
- }
- elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_AX; ++$i)
- {
- if($XCHG_AX[$i])
- {
- $XCHG_AX[$i]->[4] = 1;
- # This is definitely a hack. The "right"
- # way to do this would be to enhance
- # get_token_number to get the nth reg16
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg16")
- + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
- }
- }
- }
- print GRAMMAR cond_action_else ($func, \@args);
- }
- elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_EAX; ++$i)
- {
- if($XCHG_EAX[$i])
- {
- $XCHG_EAX[$i]->[4] = 1;
- # This is definitely a hack. The "right"
- # way to do this would be to enhance
- # get_token_number to get the nth reg32
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg32")
- + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- }
- }
- }
- print GRAMMAR cond_action_else ($func, \@args);
- }
-
- # otherwise, generate the normal version
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
- }
- }
- }
-
- # catch deferreds that haven't been folded in.
- if ($ONE and not $ONE->[4])
- {
- print GRAMMAR action (@$ONE, $count++);
- }
- if ($AL and not $AL->[4])
- {
- print GRAMMAR action (@$AL, $count++);
- }
- if ($AX and not $AL->[4])
- {
- print GRAMMAR action (@$AX, $count++);
- }
- if ($EAX and not $AL->[4])
- {
- print GRAMMAR action (@$EAX, $count++);
- }
-
- # print error action
- # ASSUMES: at least one previous action exists
- print GRAMMAR " | \Ugrp_$group\E error {\n";
- print GRAMMAR " Error (_(\"expression syntax error\"));\n";
- print GRAMMAR " \$\$ = (bytecode *)NULL;\n";
- print GRAMMAR " }\n";
-
- # terminate the rule
- print GRAMMAR ";\n";
- }
- }
- else
- {
- print GRAMMAR $_;
- }
- }
- close IN;
- close GRAMMAR;
-}
#include "src/parsers/nasm/nasm-defs.h"
+
void init_table(void);
extern int nasm_parser_lex(void);
+extern void nasm_parser_set_directive_state(void);
void nasm_parser_error(const char *);
static void nasm_parser_directive(const char *name,
valparamhead *valparams,
extern sectionhead nasm_parser_sections;
extern section *nasm_parser_cur_section;
extern char *nasm_parser_locallabel_base;
+extern size_t nasm_parser_locallabel_base_len;
static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
static bytecode *nasm_parser_temp_bc;
intnum *intn;
floatnum *flt;
symrec *sym;
- unsigned char groupdata[5];
+ unsigned long arch_data[4];
effaddr *ea;
expr *exp;
- immval *im_val;
- x86_targetval tgt_val;
datavalhead datahead;
dataval *data;
bytecode *bc;
valparamhead dir_valparams;
valparam *dir_valparam;
+ struct {
+ insn_operandhead operands;
+ int num_operands;
+ } insn_operands;
+ insn_operand *insn_operand;
}
%token <intn> INTNUM
%token <int_info> DECLARE_DATA
%token <int_info> RESERVE_SPACE
%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
+%token SEG WRT NOSPLIT
%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
+%token <arch_data> INSN PREFIX REG SEGREG TARGETMOD
%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
%token <str_val> ID LOCAL_ID SPECIAL_ID
%token LINE
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
+%type <bc> line lineexp exp instr
+
+%type <ea> memaddr
+%type <exp> dvexpr expr direxpr
%type <sym> explabel
%type <str_val> label_id
-%type <tgt_val> target
%type <data> dataval
%type <datahead> datavals
%type <dir_valparams> directive_valparams
%type <dir_valparam> directive_valparam
+%type <insn_operands> operands
+%type <insn_operand> operand
%left '|'
%left '^'
xfree($5);
$$ = (bytecode *)NULL;
}
- | directive '\n' { $$ = (bytecode *)NULL; }
+ | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' {
+ $$ = (bytecode *)NULL;
+ }
| error '\n' {
Error(_("label or instruction expected at start of line"));
$$ = (bytecode *)NULL;
| INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); }
;
+instr: INSN {
+ $$ = cur_arch->parse.new_insn($1, 0, NULL);
+ }
+ | INSN operands {
+ $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands);
+ ops_delete(&$2.operands, 0);
+ }
+ | INSN error {
+ Error(_("expression syntax error"));
+ $$ = NULL;
+ }
+ | PREFIX instr {
+ $$ = $2;
+ cur_arch->parse.handle_prefix($$, $1);
+ }
+ | SEGREG instr {
+ $$ = $2;
+ cur_arch->parse.handle_seg_prefix($$, $1[0]);
+ }
+;
+
datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); }
| datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; }
;
-dataval: expr_no_string { $$ = dv_new_expr($1); }
+dataval: dvexpr { $$ = dv_new_expr($1); }
| STRING { $$ = dv_new_string($1); }
| error {
Error(_("expression syntax error"));
$$ = $1;
if (nasm_parser_locallabel_base)
xfree(nasm_parser_locallabel_base);
- nasm_parser_locallabel_base = xstrdup($1);
+ nasm_parser_locallabel_base_len = strlen($1);
+ nasm_parser_locallabel_base =
+ xmalloc(nasm_parser_locallabel_base_len+1);
+ strcpy(nasm_parser_locallabel_base, $1);
}
| SPECIAL_ID
| LOCAL_ID
;
/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']' {
- xfree($2);
+directive: DIRECTIVE_NAME directive_val {
+ xfree($1);
}
- | '[' DIRECTIVE_NAME error ']' {
- Error(_("invalid arguments to [%s]"), $2);
- xfree($2);
+ | DIRECTIVE_NAME error {
+ Error(_("invalid arguments to [%s]"), $1);
+ xfree($1);
}
;
| ID '=' direxpr { vp_new($$, $1, $3); }
;
-/* register groupings */
-fpureg: ST0
- | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
- | DWORD reg_eax { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
- | DWORD reg_ecx { $$ = $2; }
-;
-
-rawreg32: REG_EAX
- | REG_ECX
- | REG_EDX
- | REG_EBX
- | REG_ESP
- | REG_EBP
- | REG_ESI
- | REG_EDI
-;
-
-reg32: rawreg32
- | DWORD reg32 { $$ = $2; }
-;
-
-reg_ax: REG_AX
- | WORD reg_ax { $$ = $2; }
-;
-
-reg_cx: REG_CX
- | WORD reg_cx { $$ = $2; }
-;
-
-reg_dx: REG_DX
- | WORD reg_dx { $$ = $2; }
-;
-
-rawreg16: REG_AX
- | REG_CX
- | REG_DX
- | REG_BX
- | REG_SP
- | REG_BP
- | REG_SI
- | REG_DI
-;
-
-reg16: rawreg16
- | WORD reg16 { $$ = $2; }
-;
-
-reg_al: REG_AL
- | BYTE reg_al { $$ = $2; }
-;
-
-reg_cl: REG_CL
- | BYTE reg_cl { $$ = $2; }
-;
-
-reg8: REG_AL
- | REG_CL
- | REG_DL
- | REG_BL
- | REG_AH
- | REG_CH
- | REG_DH
- | REG_BH
- | BYTE reg8 { $$ = $2; }
-;
-
-reg_es: REG_ES
- | WORD reg_es { $$ = $2; }
-;
-
-reg_ss: REG_SS
- | WORD reg_ss { $$ = $2; }
-;
-
-reg_ds: REG_DS
- | WORD reg_ds { $$ = $2; }
-;
-
-reg_fs: REG_FS
- | WORD reg_fs { $$ = $2; }
-;
-
-reg_gs: REG_GS
- | WORD reg_gs { $$ = $2; }
-;
-
-reg_cs: REG_CS
- | WORD reg_cs { $$ = $2; }
-;
-
-segreg: REG_ES
- | REG_SS
- | REG_DS
- | REG_FS
- | REG_GS
- | REG_CS
- | WORD segreg { $$ = $2; }
-;
-
/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated? This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg. I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); }
- | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' memexpr %prec UNARYOP { $$ = $2; }
- | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' memexpr ')' { $$ = $2; }
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
+memaddr: expr {
+ $$ = cur_arch->parse.ea_new_expr($1);
}
- | error { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr {
- $$ = x86_ea_new_expr($1);
- x86_ea_set_segment($$, 0);
+ | SEGREG ':' memaddr {
+ $$ = $3;
+ cur_arch->parse.handle_seg_override($$, $1[0]);
}
- | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); }
- | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); }
- | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); }
- | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); }
- | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); }
- | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); }
| BYTE memaddr { $$ = $2; ea_set_len($$, 1); }
| WORD memaddr { $$ = $2; ea_set_len($$, 2); }
| DWORD memaddr { $$ = $2; ea_set_len($$, 4); }
| NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); }
;
-mem: '[' memaddr ']' { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem { $$ = $2; }
- | BYTE mem8x { $$ = $2; }
-;
-mem16x: WORD mem { $$ = $2; }
- | WORD mem16x { $$ = $2; }
-;
-mem32x: DWORD mem { $$ = $2; }
- | DWORD mem32x { $$ = $2; }
-;
-mem64x: QWORD mem { $$ = $2; }
- | QWORD mem64x { $$ = $2; }
-;
-mem80x: TWORD mem { $$ = $2; }
- | TWORD mem80x { $$ = $2; }
-;
-mem128x: DQWORD mem { $$ = $2; }
- | DQWORD mem128x { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem { $$ = $2; }
- | FAR memfar { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
- | mem8x
-;
-mem16: mem
- | mem16x
-;
-mem32: mem
- | mem32x
-;
-mem64: mem
- | mem64x
-;
-mem80: mem
- | mem80x
-;
-mem128: mem
- | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
- | mem16x
- | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8x
-;
-rm16x: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16x
-;
-rm32x: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32x
-;
-/* not needed:
-rm64x: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8
-;
-rm16: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16
-;
-rm32: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32
-;
-rm64: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64
-;
-rm128: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128
-;
-
-/* immediate values */
-imm: expr { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm { $$ = $2; }
-;
-imm16x: WORD imm { $$ = $2; }
-;
-imm32x: DWORD imm { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
- | imm8x
-;
-imm16: imm
- | imm16x
-;
-imm32: imm
- | imm32x
+/* instruction operands */
+operands: operand {
+ ops_initialize(&$$.operands);
+ ops_append(&$$.operands, $1);
+ $$.num_operands = 1;
+ }
+ | operands ',' operand {
+ ops_append(&$1.operands, $3);
+ $$.operands = $1.operands;
+ $$.num_operands = $1.num_operands+1;
+ }
;
-/* jump targets */
-target: expr {
- $$.val = $1;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+operand: '[' memaddr ']' { $$ = operand_new_mem($2); }
+ | expr { $$ = operand_new_imm($1); }
+ | SEGREG { $$ = operand_new_segreg($1[0]); }
+ | BYTE operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 1)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 1;
}
- | SHORT target {
+ | WORD operand {
$$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 2)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 2;
}
- | NEAR target {
+ | DWORD operand {
$$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 4)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 4;
}
+ | QWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 8)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 8;
+ }
+ | TWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 10)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 10;
+ }
+ | DQWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 16)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 16;
+ }
+ | TARGETMOD operand { $$ = $2; $$->targetmod = $1[0]; }
;
/* expression trees */
| '(' direxpr ')' { $$ = $2; }
;
-expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
+dvexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
| FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
| explabel { $$ = expr_new_ident(ExprSym($1)); }
+ /*| dvexpr '||' dvexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
+ | dvexpr '|' dvexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
+ | dvexpr '^' dvexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
+ /*| dvexpr '&&' dvexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
+ | dvexpr '&' dvexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
+ /*| dvexpr '==' dvexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
+ /*| dvexpr '>' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+ /*| dvexpr '<' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+ /*| dvexpr '>=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+ /*| dvexpr '<=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+ /*| dvexpr '!=' dvexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
+ | dvexpr LEFT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
+ | dvexpr RIGHT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
+ | dvexpr '+' dvexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
+ | dvexpr '-' dvexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
+ | dvexpr '*' dvexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
+ | dvexpr '/' dvexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
+ | dvexpr SIGNDIV dvexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
+ | dvexpr '%' dvexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
+ | dvexpr SIGNMOD dvexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
+ | '+' dvexpr %prec UNARYOP { $$ = $2; }
+ | '-' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
+ /*| '!' dvexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
+ | '~' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
+ | '(' dvexpr ')' { $$ = $2; }
+;
+
+/* Expressions for operands and memory expressions.
+ * We don't attempt to check memory expressions for validity here.
+ * Essentially the same as expr_no_string above but adds REG and STRING.
+ */
+expr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
+ | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
+ | REG { $$ = expr_new_ident(ExprReg($1[0])); }
+ | STRING {
+ $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
+ xfree($1);
+ }
+ | explabel { $$ = expr_new_ident(ExprSym($1)); }
/*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
| expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); }
| expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
| '(' expr ')' { $$ = $2; }
;
-expr: expr_no_string
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
-;
-
explabel: ID {
$$ = symrec_use($1);
xfree($1);
}
;
-instr: /* empty */ {
- idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
- $$ = x86_bc_new_insn(&idata);
- }
- | instrbase
- | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
- | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
- | REG_CS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
- }
- | REG_SS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
- }
- | REG_DS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
- }
- | REG_ES instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
- }
- | REG_FS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
- }
- | REG_GS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
- }
- | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
- | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
- | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
- | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
%%
/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
valparamhead *objext_valparams)
{
valparam *vp, *vp2;
- const intnum *intn;
- long lval;
assert(cur_objfmt != NULL);
vp->param = NULL;
}
nasm_parser_prev_bc = (bytecode *)NULL;
- } else if (strcasecmp(name, "bits") == 0) {
- if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
- (intn = expr_get_intnum(&vp->param)) != NULL &&
- (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
- x86_mode_bits = (unsigned char)lval;
- else
- Error(_("invalid argument to [%s]"), "BITS");
+ } else if (strcasecmp(name, "cpu") == 0) {
+ vps_foreach(vp, valparams) {
+ if (vp->val)
+ cur_arch->parse.switch_cpu(vp->val);
+ else if (vp->param) {
+ const intnum *intcpu;
+ intcpu = expr_get_intnum(&vp->param);
+ if (!intcpu)
+ Error(_("invalid argument to [%s]"), "CPU");
+ else {
+ char strcpu[16];
+ sprintf(strcpu, "%lu", intnum_get_uint(intcpu));
+ cur_arch->parse.switch_cpu(strcpu);
+ }
+ }
+ }
+ } else if (!cur_arch->parse.directive(name, valparams, objext_valparams,
+ &nasm_parser_sections)) {
+ ;
} else if (cur_objfmt->directive(name, valparams, objext_valparams,
&nasm_parser_sections)) {
Error(_("unrecognized directive [%s]"), name);
extern int nasm_parser_debug;
extern int nasm_parser_parse(void);
+extern void nasm_parser_cleanup(void);
size_t (*nasm_parser_input) (char *buf, size_t max_size);
nasm_parser_parse();
+ nasm_parser_cleanup();
+
/* Free locallabel base if necessary */
if (nasm_parser_locallabel_base)
xfree(nasm_parser_locallabel_base);
--- /dev/null
+/*
+ * NASM-compatible lex lexer
+ *
+ * Copyright (C) 2001 Peter Johnson
+ *
+ * Portions based on re2c's example code.
+ *
+ * This file is part of YASM.
+ *
+ * YASM is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * YASM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+#include "src/parsers/nasm/nasm-defs.h"
+#include "nasm-bison.h"
+
+
+#define BSIZE 8192
+
+#define YYCTYPE char
+#define YYCURSOR cursor
+#define YYLIMIT s.lim
+#define YYMARKER s.ptr
+#define YYFILL(n) {cursor = fill(cursor);}
+
+#define RETURN(i) {s.cur = cursor; return i;}
+
+#define SCANINIT() { \
+ s.tchar = cursor - s.pos; \
+ s.tline = s.cline; \
+ s.tok = cursor; \
+ }
+
+#define TOKLEN (cursor-s.tok)
+
+void nasm_parser_cleanup(void);
+void nasm_parser_set_directive_state(void);
+int nasm_parser_lex(void);
+
+extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
+
+
+typedef struct Scanner {
+ YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ unsigned int tchar, tline, cline;
+} Scanner;
+
+static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 };
+
+FILE *nasm_parser_in = NULL;
+
+static YYCTYPE *
+fill(YYCTYPE *cursor)
+{
+ if(!s.eof){
+ size_t cnt = s.tok - s.bot;
+ if(cnt){
+ memcpy(s.bot, s.tok, s.lim - s.tok);
+ s.tok = s.bot;
+ s.ptr -= cnt;
+ cursor -= cnt;
+ s.pos -= cnt;
+ s.lim -= cnt;
+ }
+ if((s.top - s.lim) < BSIZE){
+ char *buf = xmalloc((s.lim - s.bot) + BSIZE);
+ memcpy(buf, s.tok, s.lim - s.tok);
+ s.tok = buf;
+ s.ptr = &buf[s.ptr - s.bot];
+ cursor = &buf[cursor - s.bot];
+ s.pos = &buf[s.pos - s.bot];
+ s.lim = &buf[s.lim - s.bot];
+ s.top = &s.lim[BSIZE];
+ if (s.bot)
+ xfree(s.bot);
+ s.bot = buf;
+ }
+ if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){
+ s.eof = &s.lim[cnt]; *s.eof++ = '\n';
+ }
+ s.lim += cnt;
+ }
+ return cursor;
+}
+
+void
+nasm_parser_cleanup(void)
+{
+ if (s.bot)
+ xfree(s.bot);
+}
+
+/* starting size of string buffer */
+#define STRBUF_ALLOC_SIZE 128
+
+/* string buffer used when parsing strings/character constants */
+static char *strbuf = (char *)NULL;
+
+/* length of strbuf (including terminating NULL character) */
+static size_t strbuf_size = 0;
+
+/* last "base" label for local (.) labels */
+char *nasm_parser_locallabel_base = (char *)NULL;
+size_t nasm_parser_locallabel_base_len = 0;
+
+static int linechg_numcount;
+
+/*!re2c
+ any = [\000-\377];
+ digit = [0-9];
+ iletter = [a-zA-Z];
+ bindigit = [01];
+ octdigit = [0-7];
+ hexdigit = [0-9a-fA-F];
+ ws = [ \t\r];
+ quot = ["'];
+ A = [aA];
+ B = [bB];
+ C = [cC];
+ D = [dD];
+ E = [eE];
+ F = [fF];
+ G = [gG];
+ H = [hH];
+ I = [iI];
+ J = [jJ];
+ K = [kK];
+ L = [lL];
+ M = [mM];
+ N = [nN];
+ O = [oO];
+ P = [pP];
+ Q = [qQ];
+ R = [rR];
+ S = [sS];
+ T = [tT];
+ U = [uU];
+ V = [vV];
+ W = [wW];
+ X = [xX];
+ Y = [yY];
+ Z = [zZ];
+*/
+
+static enum {
+ INITIAL,
+ DIRECTIVE,
+ DIRECTIVE2,
+ LINECHG,
+ LINECHG2
+} state = INITIAL;
+
+void
+nasm_parser_set_directive_state(void)
+{
+ state = DIRECTIVE;
+}
+
+int
+nasm_parser_lex(void)
+{
+ YYCTYPE *cursor = s.cur;
+ YYCTYPE endch;
+ size_t count, len;
+ YYCTYPE savech;
+ arch_check_id_retval check_id_ret;
+
+ /* Catch EOF */
+ if (s.eof && cursor == s.eof)
+ return 0;
+
+ /* Jump to proper "exclusive" states */
+ switch (state) {
+ case DIRECTIVE:
+ goto directive;
+ case LINECHG:
+ goto linechg;
+ case LINECHG2:
+ goto linechg2;
+ default:
+ break;
+ }
+
+scan:
+ SCANINIT();
+
+ /*!re2c
+ /* standard decimal integer */
+ digit+ {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.intn = intnum_new_dec(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+ /* 10010011b - binary number */
+
+ bindigit+ "b" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */
+ yylval.intn = intnum_new_bin(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* 777q - octal number */
+ octdigit+ "q" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */
+ yylval.intn = intnum_new_oct(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* 0AAh form of hexidecimal number */
+ digit hexdigit+ "h" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */
+ yylval.intn = intnum_new_hex(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* $0AA and 0xAA forms of hexidecimal number */
+ (("$" digit) | "0x") hexdigit+ {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ if (s.tok[1] == 'x')
+ yylval.intn = intnum_new_hex(s.tok+2); /* skip 0 and x */
+ else
+ yylval.intn = intnum_new_hex(s.tok+1); /* don't skip 0 */
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+
+ /* floating point value */
+ digit+ "." digit* ("e" [-+]? digit+)? {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.flt = floatnum_new(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(FLTNUM);
+ }
+
+ /* string/character constant values */
+ quot {
+ endch = s.tok[0];
+ goto stringconst;
+ }
+
+ /* %line linenum+lineinc filename */
+ "%line" {
+ state = LINECHG;
+ linechg_numcount = 0;
+ RETURN(LINE);
+ }
+
+ /* size specifiers */
+ B Y T E { yylval.int_info = 1; RETURN(BYTE); }
+ W O R D { yylval.int_info = 2; RETURN(WORD); }
+ D W O R D { yylval.int_info = 4; RETURN(DWORD); }
+ Q W O R D { yylval.int_info = 8; RETURN(QWORD); }
+ T W O R D { yylval.int_info = 10; RETURN(TWORD); }
+ D Q W O R D { yylval.int_info = 16; RETURN(DQWORD); }
+
+ /* pseudo-instructions */
+ D B { yylval.int_info = 1; RETURN(DECLARE_DATA); }
+ D W { yylval.int_info = 2; RETURN(DECLARE_DATA); }
+ D D { yylval.int_info = 4; RETURN(DECLARE_DATA); }
+ D Q { yylval.int_info = 8; RETURN(DECLARE_DATA); }
+ D T { yylval.int_info = 10; RETURN(DECLARE_DATA); }
+
+ R E S B { yylval.int_info = 1; RETURN(RESERVE_SPACE); }
+ R E S W { yylval.int_info = 2; RETURN(RESERVE_SPACE); }
+ R E S D { yylval.int_info = 4; RETURN(RESERVE_SPACE); }
+ R E S Q { yylval.int_info = 8; RETURN(RESERVE_SPACE); }
+ R E S T { yylval.int_info = 10; RETURN(RESERVE_SPACE); }
+
+ I N C B I N { RETURN(INCBIN); }
+
+ E Q U { RETURN(EQU); }
+
+ T I M E S { RETURN(TIMES); }
+
+ S E G { RETURN(SEG); }
+ W R T { RETURN(WRT); }
+
+ N O S P L I T { RETURN(NOSPLIT); }
+
+ T O { RETURN(TO); }
+
+ /* operators */
+ "<<" { RETURN(LEFT_OP); }
+ ">>" { RETURN(RIGHT_OP); }
+ "//" { RETURN(SIGNDIV); }
+ "%%" { RETURN(SIGNMOD); }
+ "$$" { RETURN(START_SECTION_ID); }
+ [-+|^*&/%~$():=,\[] { RETURN(s.tok[0]); }
+
+ /* handle ] separately for directives */
+ "]" {
+ if (state == DIRECTIVE2)
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ /* special non-local ..@label and labels like ..start */
+ ".." [a-zA-Z0-9_$#@~.?]+ {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(SPECIAL_ID);
+ }
+
+ /* local label (.label) */
+ "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* {
+ /* override local labels in directive state */
+ if (state == DIRECTIVE2) {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ } else if (!nasm_parser_locallabel_base) {
+ Warning(_("no non-local label before `%s'"), s.tok[0]);
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ } else {
+ len = TOKLEN + nasm_parser_locallabel_base_len;
+ yylval.str_val = xmalloc(len + 1);
+ strcpy(yylval.str_val, nasm_parser_locallabel_base);
+ strncat(yylval.str_val, s.tok, TOKLEN);
+ yylval.str_val[len] = '\0';
+ }
+
+ RETURN(LOCAL_ID);
+ }
+
+ /* forced identifier */
+ "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ }
+
+ /* identifier that may be a register, instruction, etc. */
+ [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data,
+ s.tok);
+ s.tok[TOKLEN] = savech;
+ switch (check_id_ret) {
+ case ARCH_CHECK_ID_NONE:
+ /* Just an identifier, return as such. */
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ case ARCH_CHECK_ID_INSN:
+ RETURN(INSN);
+ case ARCH_CHECK_ID_PREFIX:
+ RETURN(PREFIX);
+ case ARCH_CHECK_ID_REG:
+ RETURN(REG);
+ case ARCH_CHECK_ID_SEGREG:
+ RETURN(SEGREG);
+ case ARCH_CHECK_ID_TARGETMOD:
+ RETURN(TARGETMOD);
+ default:
+ Warning(_("Arch feature not supported, treating as identifier"));
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ }
+ }
+
+ ";" (any \ [\n])* { goto scan; }
+
+ ws+ { goto scan; }
+
+ "\n" { state = INITIAL; RETURN(s.tok[0]); }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto scan;
+ }
+ */
+
+ /* %line linenum+lineinc filename */
+linechg:
+ SCANINIT();
+
+ /*!re2c
+ digit+ {
+ linechg_numcount++;
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.intn = intnum_new_dec(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+
+ "\n" {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ "+" {
+ RETURN(s.tok[0]);
+ }
+
+ ws+ {
+ if (linechg_numcount == 2)
+ state = LINECHG2;
+ goto linechg2;
+ }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto linechg;
+ }
+ */
+
+linechg2:
+ SCANINIT();
+
+ /*!re2c
+ "\n" {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ "\r" { }
+
+ (any \ [\r\n])+ {
+ state = LINECHG;
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(FILENAME);
+ }
+ */
+
+ /* directive: [name value] */
+directive:
+ SCANINIT();
+
+ /*!re2c
+ [\]\n] {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ iletter+ {
+ state = DIRECTIVE2;
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(DIRECTIVE_NAME);
+ }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto directive;
+ }
+ */
+
+ /* string/character constant values */
+stringconst:
+ strbuf = xmalloc(STRBUF_ALLOC_SIZE);
+ strbuf_size = STRBUF_ALLOC_SIZE;
+ count = 0;
+
+stringconst_scan:
+ SCANINIT();
+
+ /*!re2c
+ "\n" {
+ if (cursor == s.eof)
+ Error(_("unexpected end of file in string"));
+ else
+ Error(_("unterminated string"));
+ strbuf[count] = '\0';
+ yylval.str_val = strbuf;
+ RETURN(STRING);
+ }
+
+ any {
+ if (s.tok[0] == endch) {
+ strbuf[count] = '\0';
+ yylval.str_val = strbuf;
+ RETURN(STRING);
+ }
+
+ strbuf[count++] = s.tok[0];
+ if (count >= strbuf_size) {
+ strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
+ strbuf_size += STRBUF_ALLOC_SIZE;
+ }
+
+ goto stringconst_scan;
+ }
+ */
+}
+++ /dev/null
-/*
- * NASM-compatible lex lexer
- *
- * Copyright (C) 2001 Peter Johnson
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#include "bitvect.h"
-
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-#include "nasm-bison.h"
-
-
-#define YY_NEVER_INTERACTIVE 1
-
-int nasm_parser_lex(void);
-
-extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
-#undef YY_INPUT
-#define YY_INPUT(b, r, ms) (r = nasm_parser_input(b, ms))
-
-/* starting size of string buffer */
-#define STRBUF_ALLOC_SIZE 128
-
-/* string buffer used when parsing strings/character constants */
-static char *strbuf = (char *)NULL;
-
-/* length of strbuf (including terminating NULL character) */
-static size_t strbuf_size = 0;
-
-/* last "base" label for local (.) labels */
-char *nasm_parser_locallabel_base = (char *)NULL;
-
-static int linechg_numcount;
-
-%}
-%option noyywrap
-%option nounput
-%option case-insensitive
-%option never-interactive
-%option prefix="nasm_parser_"
-%option outfile="lex.yy.c"
-
-%x DIRECTIVE LINECHG LINECHG2
-%s DIRECTIVE2
-
-DIGIT [0-9]
-BINDIGIT [01]
-OCTDIGIT [0-7]
-HEXDIGIT [0-9a-f]
-WS [ \t\r]
-
-%%
-
- /* standard decimal integer */
-{DIGIT}+ {
- yylval.intn = intnum_new_dec(yytext);
- return INTNUM;
-}
-
- /* 10010011b - binary number */
-{BINDIGIT}+b {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'b' */
- yylval.intn = intnum_new_bin(yytext);
- return INTNUM;
-}
-
- /* 777q - octal number */
-{OCTDIGIT}+q {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'q' */
- yylval.intn = intnum_new_oct(yytext);
- return INTNUM;
-}
-
- /* 0AAh form of hexidecimal number */
-{DIGIT}{HEXDIGIT}*h {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'h' */
- yylval.intn = intnum_new_hex(yytext);
- return INTNUM;
-}
-
- /* $0AA and 0xAA forms of hexidecimal number */
-(\${DIGIT}|0x){HEXDIGIT}+ {
- if (yytext[1] == 'x')
- yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */
- else
- yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */
- return INTNUM;
-}
-
- /* floating point value */
-{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? {
- yylval.flt = floatnum_new(yytext);
- return FLTNUM;
-}
-
- /* string/character constant values */
-["'] {
- int inch, count;
- char endch = yytext[0];
-
- strbuf = xmalloc(STRBUF_ALLOC_SIZE);
-
- strbuf_size = STRBUF_ALLOC_SIZE;
- inch = input();
- count = 0;
- while (inch != EOF && inch != endch && inch != '\n') {
- strbuf[count++] = inch;
- if (count >= strbuf_size) {
- strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
- if (!strbuf)
- Fatal(FATAL_NOMEM);
- strbuf_size += STRBUF_ALLOC_SIZE;
- }
- inch = input();
- }
-
- if (inch == '\n')
- Error(_("unterminated string"));
- else if (inch == EOF)
- Error(_("unexpected end of file in string"));
-
- strbuf[count] = '\0';
-
- yylval.str_val = strbuf;
- return STRING;
-}
-
- /* %line linenum+lineinc filename */
-^%line { BEGIN LINECHG; linechg_numcount = 0; return LINE; }
-<LINECHG>{DIGIT}+ {
- linechg_numcount++;
- yylval.intn = intnum_new_dec(yytext);
- return INTNUM;
-}
-<LINECHG>\n { BEGIN INITIAL; return '\n'; }
-<LINECHG>[+] { return yytext[0]; }
-<LINECHG>{WS}+ {
- if (linechg_numcount == 2)
- BEGIN LINECHG2;
-}
-<LINECHG2>\n { BEGIN INITIAL; return '\n'; }
-<LINECHG2>\r ;
-<LINECHG2>[^\r\n]+ {
- BEGIN LINECHG;
- yylval.str_val = xstrdup(yytext);
- return FILENAME;
-}
-
- /* directive: [name value] */
-^{WS}*"[" { BEGIN DIRECTIVE; return '['; }
-<DIRECTIVE>"]" { BEGIN INITIAL; return ']'; }
-<DIRECTIVE2>"]" { BEGIN INITIAL; return ']'; }
-<DIRECTIVE>\n { BEGIN INITIAL; return '\n'; }
-<DIRECTIVE2>\n { BEGIN INITIAL; return '\n'; }
-
-<DIRECTIVE>[a-z]+ {
- BEGIN DIRECTIVE2;
- yylval.str_val = xstrdup(yytext);
- return DIRECTIVE_NAME;
-}
-<DIRECTIVE>. {
- if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
- Warning(_("ignoring unrecognized character `%s'"),
- conv_unprint(yytext[0]));
-}
-
- /* override local labels in directive state */
-<DIRECTIVE2>\.[a-z0-9_$#@~.?]* {
- yylval.str_val = xstrdup(yytext);
- return ID;
-}
-
- /* size specifiers */
-byte { yylval.int_info = 1; return BYTE; }
-word { yylval.int_info = 2; return WORD; }
-dword { yylval.int_info = 4; return DWORD; }
-qword { yylval.int_info = 8; return QWORD; }
-tword { yylval.int_info = 10; return TWORD; }
-dqword { yylval.int_info = 16; return DQWORD; }
-
- /* pseudo-instructions */
-db { yylval.int_info = 1; return DECLARE_DATA; }
-dw { yylval.int_info = 2; return DECLARE_DATA; }
-dd { yylval.int_info = 4; return DECLARE_DATA; }
-dq { yylval.int_info = 8; return DECLARE_DATA; }
-dt { yylval.int_info = 10; return DECLARE_DATA; }
-
-resb { yylval.int_info = 1; return RESERVE_SPACE; }
-resw { yylval.int_info = 2; return RESERVE_SPACE; }
-resd { yylval.int_info = 4; return RESERVE_SPACE; }
-resq { yylval.int_info = 8; return RESERVE_SPACE; }
-rest { yylval.int_info = 10; return RESERVE_SPACE; }
-
-incbin { return INCBIN; }
-
-equ { return EQU; }
-
-times { return TIMES; }
-
-seg { return SEG; }
-wrt { return WRT; }
-near { return NEAR; }
-short { return SHORT; }
-far { return FAR; }
-
-nosplit { return NOSPLIT; }
-
-org { return ORG; }
-
-to { return TO; }
-
- /* operand size overrides */
-o16 { yylval.int_info = 16; return OPERSIZE; }
-o32 { yylval.int_info = 32; return OPERSIZE; }
- /* address size overrides */
-a16 { yylval.int_info = 16; return ADDRSIZE; }
-a32 { yylval.int_info = 32; return ADDRSIZE; }
-
- /* instruction prefixes */
-lock { return LOCK; }
-repne { return REPNZ; }
-repnz { return REPNZ; }
-rep { return REP; }
-repe { return REPZ; }
-repz { return REPZ; }
-
- /* control, debug, and test registers */
-cr4 { yylval.int_info = 4; return CR4; }
-cr[023] { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; }
-dr[0-367] { yylval.int_info = yytext[2]-'0'; return DRREG; }
-tr[3-7] { yylval.int_info = yytext[2]-'0'; return TRREG; }
-
- /* floating point, MMX, and SSE registers */
-st0 { yylval.int_info = 0; return ST0; }
-st[1-7] { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; }
-mm[0-7] { yylval.int_info = yytext[2]-'0'; return MMXREG; }
-xmm[0-7] { yylval.int_info = yytext[3]-'0'; return XMMREG; }
-
- /* integer registers */
-eax { yylval.int_info = 0; return REG_EAX; }
-ecx { yylval.int_info = 1; return REG_ECX; }
-edx { yylval.int_info = 2; return REG_EDX; }
-ebx { yylval.int_info = 3; return REG_EBX; }
-esp { yylval.int_info = 4; return REG_ESP; }
-ebp { yylval.int_info = 5; return REG_EBP; }
-esi { yylval.int_info = 6; return REG_ESI; }
-edi { yylval.int_info = 7; return REG_EDI; }
-
-ax { yylval.int_info = 0; return REG_AX; }
-cx { yylval.int_info = 1; return REG_CX; }
-dx { yylval.int_info = 2; return REG_DX; }
-bx { yylval.int_info = 3; return REG_BX; }
-sp { yylval.int_info = 4; return REG_SP; }
-bp { yylval.int_info = 5; return REG_BP; }
-si { yylval.int_info = 6; return REG_SI; }
-di { yylval.int_info = 7; return REG_DI; }
-
-al { yylval.int_info = 0; return REG_AL; }
-cl { yylval.int_info = 1; return REG_CL; }
-dl { yylval.int_info = 2; return REG_DL; }
-bl { yylval.int_info = 3; return REG_BL; }
-ah { yylval.int_info = 4; return REG_AH; }
-ch { yylval.int_info = 5; return REG_CH; }
-dh { yylval.int_info = 6; return REG_DH; }
-bh { yylval.int_info = 7; return REG_BH; }
-
- /* segment registers */
-es { yylval.int_info = 0; return REG_ES; }
-cs { yylval.int_info = 1; return REG_CS; }
-ss { yylval.int_info = 2; return REG_SS; }
-ds { yylval.int_info = 3; return REG_DS; }
-fs { yylval.int_info = 4; return REG_FS; }
-gs { yylval.int_info = 5; return REG_GS; }
-
- /* operators */
-"<<" { return LEFT_OP; }
-">>" { return RIGHT_OP; }
-"//" { return SIGNDIV; }
-"%%" { return SIGNMOD; }
-"$$" { return START_SECTION_ID; }
-[-+|^&*/%~$():[\]=,] { return yytext[0]; }
-
- /* special non-local ..@label and labels like ..start */
-\.\.[a-z0-9_$#@~.?]+ {
- yylval.str_val = xstrdup(yytext);
- return SPECIAL_ID;
-}
-
- /* local label (.label) */
-\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* {
- if (!nasm_parser_locallabel_base) {
- Warning(_("no non-local label before `%s'"), yytext);
- yylval.str_val = xstrdup(yytext);
- } else {
- yylval.str_val = xmalloc(strlen(yytext) +
- strlen(nasm_parser_locallabel_base) + 1);
- strcpy(yylval.str_val, nasm_parser_locallabel_base);
- strcat(yylval.str_val, yytext);
- }
-
- return LOCAL_ID;
-}
-
- /* instructions */
- /* @INSTRUCTIONS@ */
-
- /* label */
-[a-z_?][a-z0-9_$#@~.?]* {
- yylval.str_val = xstrdup(yytext);
- return ID;
-}
-
-;.* ;
-
-{WS}+ ;
-
-\n return '\n';
-
-. {
- if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
- Warning(_("ignoring unrecognized character `%s'"),
- conv_unprint(yytext[0]));
-}
-
#include "util.h"
/*@unused@*/ RCSID("$IdPath$");
+#include "globals.h"
+#include "expr.h"
+
#include "bytecode.h"
#include "arch.h"
+
arch *cur_arch;
+insn_operand *
+operand_new_reg(unsigned long reg)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_REG;
+ retval->data.reg = reg;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_segreg(unsigned long segreg)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_SEGREG;
+ retval->data.reg = segreg;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_mem(/*@only@*/ effaddr *ea)
+{
+ insn_operand *retval = xmalloc(sizeof(insn_operand));
+
+ retval->type = INSN_OPERAND_MEMORY;
+ retval->data.ea = ea;
+ retval->targetmod = 0;
+ retval->size = 0;
+
+ return retval;
+}
+
+insn_operand *
+operand_new_imm(/*@only@*/ expr *val)
+{
+ insn_operand *retval;
+ const unsigned long *reg;
+
+ reg = expr_get_reg(&val, 0);
+ if (reg) {
+ retval = operand_new_reg(*reg);
+ expr_delete(val);
+ } else {
+ retval = xmalloc(sizeof(insn_operand));
+ retval->type = INSN_OPERAND_IMM;
+ retval->data.val = val;
+ retval->targetmod = 0;
+ retval->size = 0;
+ }
+
+ return retval;
+}
+
+void
+operand_print(FILE *f, const insn_operand *op)
+{
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ fprintf(f, "%*sReg=", indent_level, "");
+ cur_arch->reg_print(f, op->data.reg);
+ fprintf(f, "\n");
+ break;
+ case INSN_OPERAND_SEGREG:
+ fprintf(f, "%*sSegReg=", indent_level, "");
+ cur_arch->segreg_print(f, op->data.reg);
+ fprintf(f, "\n");
+ break;
+ case INSN_OPERAND_MEMORY:
+ fprintf(f, "%*sMemory=\n", indent_level, "");
+ indent_level++;
+ ea_print(f, op->data.ea);
+ indent_level--;
+ break;
+ case INSN_OPERAND_IMM:
+ fprintf(f, "%*sImm=", indent_level, "");
+ expr_print(f, op->data.val);
+ fprintf(f, "\n");
+ break;
+ }
+ fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod);
+ fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size);
+}
+
+void
+ops_delete(insn_operandhead *headp, int content)
+{
+ insn_operand *cur, *next;
+
+ cur = STAILQ_FIRST(headp);
+ while (cur) {
+ next = STAILQ_NEXT(cur, link);
+ if (content)
+ switch (cur->type) {
+ case INSN_OPERAND_MEMORY:
+ ea_delete(cur->data.ea);
+ break;
+ case INSN_OPERAND_IMM:
+ expr_delete(cur->data.val);
+ break;
+ default:
+ break;
+ }
+ xfree(cur);
+ cur = next;
+ }
+ STAILQ_INIT(headp);
+}
+
+/*@null@*/ insn_operand *
+ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op)
+{
+ if (op) {
+ STAILQ_INSERT_TAIL(headp, op, link);
+ return op;
+ }
+ return (insn_operand *)NULL;
+}
+
+void
+ops_print(FILE *f, const insn_operandhead *headp)
+{
+ insn_operand *cur;
+
+ STAILQ_FOREACH (cur, headp, link)
+ operand_print(f, cur);
+}
/* $IdPath$
* Architecture header file
*
- * Copyright (C) 2001 Peter Johnson
+ * Copyright (C) 2002 Peter Johnson
*
* This file is part of YASM.
*
#ifndef YASM_ARCH_H
#define YASM_ARCH_H
+typedef enum arch_check_id_retval {
+ ARCH_CHECK_ID_NONE = 0, /* just a normal identifier */
+ ARCH_CHECK_ID_INSN, /* an instruction */
+ ARCH_CHECK_ID_PREFIX, /* an instruction prefix */
+ ARCH_CHECK_ID_REG, /* a register */
+ ARCH_CHECK_ID_SEGREG, /* a segment register (for memory overrides) */
+ ARCH_CHECK_ID_TARGETMOD /* an target modifier (for jumps) */
+} arch_check_id_retval;
+
+typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand)
+ insn_operandhead;
+
+typedef struct insn_operand insn_operand;
+
+/* Different assemblers order instruction operands differently. Also, some
+ * differ on how exactly various registers are specified. There's no great
+ * solution to this, as the parsers aren't supposed to have knowledge of the
+ * architectural internals, and the architecture is supposed to be parser-
+ * independent. To make things work, as a rather hackish solution, we give the
+ * architecture a little knowledge about the general "flavor" of the parser,
+ * and let the architecture decide what to do with it. Most architectures will
+ * probably not even use this, but it's required for some (x86 in particular)
+ * for correct behavior on all parsers.
+ */
+typedef enum arch_syntax_flavor {
+ ARCH_SYNTAX_FLAVOR_NASM = 1, /* like NASM */
+ ARCH_SYNTAX_FLAVOR_GAS /* like GAS */
+} arch_syntax_flavor;
+
struct arch {
/* one-line description of the architecture */
const char *name;
/* keyword used to select architecture */
const char *keyword;
+ struct {
+ /* All "data" below starts the parse initialized to 0. Thus, it is
+ * okay for a funtion to use/check previously stored data to see if
+ * it's been called before on the same piece of data.
+ */
+
+ /* Switches available instructions/registers/etc. based on a
+ * user-specified CPU identifier. Should modify behavior ONLY of
+ * parse functions! The bytecode and output functions should be able
+ * to handle any CPU.
+ */
+ void (*switch_cpu) (const char *cpuid);
+
+ /* Checks an generic identifier to see if it matches architecture
+ * specific names for instructions, registers, etc (see the
+ * arch_check_id_retval enum above for the various types this function
+ * can detect & return. Unrecognized identifiers should be returned
+ * as NONE so they can be treated as normal symbols. Any additional
+ * data beyond just the type (almost always necessary) should be
+ * returned into the space provided by the data parameter.
+ * Note: even though this is passed a data[4], only data[0] should be
+ * used for TARGETMOD, REG, and SEGREG return values.
+ */
+ arch_check_id_retval (*check_identifier) (unsigned long data[4],
+ const char *id);
+
+ /* Architecture-specific directive support. Returns 1 if directive was
+ * not recognized. Returns 0 if directive was recognized, even if it
+ * wasn't valid. Should modify behavior ONLY of parse functions, much
+ * like switch_cpu() above.
+ */
+ int (*directive) (const char *name, valparamhead *valparams,
+ /*@null@*/ valparamhead *objext_valparams,
+ sectionhead *headp);
+
+ /* Creates an instruction. Creates a bytecode by matching the
+ * instruction data and the parameters given with a valid instruction.
+ * If no match is found (the instruction is invalid), returns NULL.
+ * All zero data indicates an empty instruction should be created.
+ */
+ /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4],
+ int num_operands, /*@null@*/
+ insn_operandhead *operands);
+
+ /* Handle an instruction prefix by modifying bc as necessary. */
+ void (*handle_prefix) (bytecode *bc, const unsigned long data[4]);
+
+ /* Handle an segment register instruction prefix by modifying bc as
+ * necessary.
+ */
+ void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg);
+
+ /* Handle memory expression segment overrides by modifying ea as
+ * necessary.
+ */
+ void (*handle_seg_override) (effaddr *ea, unsigned long segreg);
+
+ /* Convert an expression into an effective address. */
+ effaddr * (*ea_new_expr) (/*@keep@*/ expr *e);
+ } parse;
+
struct {
/* Maximum used bytecode type value+1. Should be set to
* BYTECODE_TYPE_BASE if no additional bytecode types are defined by
const section *sect, void *d,
output_expr_func output_expr);
} bc;
+
+ /* Gets the equivalent register size in bytes. Returns 0 if there is no
+ * suitable equivalent size.
+ */
+ unsigned int (*get_reg_size) (unsigned long reg);
+
+ void (*reg_print) (FILE *f, unsigned long reg);
+ void (*segreg_print) (FILE *f, unsigned long segreg);
+
+ /* Deletes the arch-specific data in ea. May be NULL if no special
+ * deletion is required (e.g. there's no dynamically allocated pointers
+ * in the ea data).
+ */
+ void (*ea_data_delete) (effaddr *ea);
+
+ void (*ea_data_print) (FILE *f, const effaddr *ea);
+};
+
+struct insn_operand {
+ /*@reldef@*/ STAILQ_ENTRY(insn_operand) link;
+
+ enum {
+ INSN_OPERAND_REG = 1, /* a register */
+ INSN_OPERAND_SEGREG, /* a segment register */
+ INSN_OPERAND_MEMORY, /* an effective address (memory reference) */
+ INSN_OPERAND_IMM /* an immediate or jump target */
+ } type;
+
+ union {
+ unsigned long reg; /* arch data for reg/segreg */
+ effaddr *ea; /* effective address for memory references */
+ expr *val; /* value of immediate or jump target */
+ } data;
+
+ unsigned long targetmod; /* arch target modifier, 0 if none */
+
+ /* Specified size of the operand, in bytes. 0 if not user-specified. */
+ unsigned int size;
};
+/* insn_operand constructors. operand_new_imm() will look for cases of a
+ * single register and create an INSN_OPERAND_REG variant of insn_operand.
+ */
+insn_operand *operand_new_reg(unsigned long reg);
+insn_operand *operand_new_segreg(unsigned long segreg);
+insn_operand *operand_new_mem(/*@only@*/ effaddr *ea);
+insn_operand *operand_new_imm(/*@only@*/ expr *val);
+
+void operand_print(FILE *f, const insn_operand *op);
+
+#define ops_initialize(headp) STAILQ_INIT(headp)
+#define ops_first(headp) STAILQ_FIRST(headp)
+#define ops_next(cur) STAILQ_NEXT(cur, link)
+
+/* Deletes operands linked list. Deletes content of each operand if content i
+ * nonzero.
+ */
+void ops_delete(insn_operandhead *headp, int content);
+
+/* Adds op to the list of operands headp.
+ * NOTE: Does not make a copy of op; so don't pass this function
+ * static or local variables, and discard the op pointer after calling
+ * this function. If op was actually appended (it wasn't NULL), then
+ * returns op, otherwise returns NULL.
+ */
+/*@null@*/ insn_operand *ops_append(insn_operandhead *headp,
+ /*@returned@*/ /*@null@*/ insn_operand *op);
+
+void ops_print(FILE *f, const insn_operandhead *headp);
+
/* Available architectures */
-#include "arch/x86/x86arch.h"
extern arch x86_arch;
extern arch *cur_arch;
YASMARCHFILES += \
src/arch/x86/x86arch.c \
src/arch/x86/x86arch.h \
- src/arch/x86/x86-int.h \
src/arch/x86/x86bc.c \
- src/arch/x86/x86expr.c
+ src/arch/x86/x86expr.c \
+ x86id.c
+
+x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+ re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
+
+BUILT_SOURCES += \
+ x86id.c
+
+CLEANFILES += \
+ x86id.c
EXTRA_DIST += \
src/arch/x86/README \
- src/arch/x86/instrs.dat
+ src/arch/x86/x86id.re
+++ /dev/null
-; $IdPath$
-; List of valid instruction/operand combinations
-;
-; Copyright (C) 2001 Peter Johnson
-;
-; This file is part of YASM.
-;
-; YASM is free software; you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation; either version 2 of the License, or
-; (at your option) any later version.
-;
-; YASM is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with this program; if not, write to the Free Software
-; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-;
-; Meanings of codes:
-; $x refers to operand x
-; "nil" in a field indicates the lack of that field in the instruction
-; (there MUST be some text in every field in this document)
-; Sizes are in bits (8,16,32 are the only valid quantities)
-;
-; Column definitions:
-; Inst - Instruction, should be lowercase
-; Operands - Single combination of valid operands
-; "TO" is not counted in the operand count.
-; OpSize - Fixed operand size. Can generate prefix byte.
-; Opcode - One or two bytes of opcode.
-; EffAddr - Effective Address (ModRM/SIB/Off). First value is the memory
-; operand, second specifies what value goes into the reg/spare
-; bits in the ModRM byte.
-; $xr indicates operand is register, not ModRM (needs convert to RM)
-; $xi indicates operand is immediate (2nd parm is size in bits)
-; Imm - Immediate source operand and forced size (in bits).
-; "s" after size indicates signed number
-; A number instead of a $x is a hex constant value.
-;
-; A ':' at the beginning of the line means that the instruction following the
-; ':' is a synonym for the instruction in the 2nd column.
-;
-; See the parser file for a list of possible operand values and their meanings.
-; gen_instr.pl translates this list into lexer and parser code.
-;
-; Instructions are listed in the same order as that in GNU binutils
-; /include/opcode/i386.h, used for the GAS assembler. See
-; <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h?cvsroot=src>.
-;
-; TODO:
-; Finish instructions (may require changing parser code).
-; Doublecheck instruction encodings, allowable operands.
-; Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes).
-; Doublecheck AMD and Cyrix instructions.
-; Doublecheck the segreg mov instructions.
-;
-; Instruction Groupings (to shorten parser code).
-; The $0.1, $0.2, and $0.3 will get replaced with the parameters given for
-; the instruction using the group during lexing & parsing. These parameters
-; may be in the opcode, opsize, effaddr, or immediate.
-; When opsize is a parameter, its usage in instructions that use the group
-; looks slightly different than normal, because the parameters are
-; specified in hexidecimal while the normal opsize usage is in decimal.
-; Thus 10 and 20 are used instead of 16 and 32 respectively.
-; The first CPU grouping for the instruction is OR'ed with the CPU value in
-; the group CPU fields with @0 in their list. This allows one grouping to
-; be used for instructions with different CPU values.
-; Restrictions on groupings:
-; - $0.? may not appear in the operand, the first part of the effaddr, the
-; second part of the imm, or the CPU fields.
-; - @0, @1 may only appear in the CPU field.
-; Restrictions on instructions based on groupings:
-; - no other operand combinations are allowed (eg, if an instruction uses a
-; group, that must be the ONLY line for the instruction)
-;
-; Notes on code generation:
-; Each group generates a lex token of the group name (sans !). Bison rules
-; are generated for each of the operand combinations for the group just as
-; with a regular instruction, except for the addition of the $0.? fields.
-; Each $0.? field is replaced by $1.d? in the generated code (eg,
-; $0.1->$1.d1, etc).
-; When an instruction that uses a group is encountered, eg:
-; inst!grpname parm1[,parm2[,parm3]]
-; The following lex code is generated:
-; inst { yylval.groupdata[0]=0xparm1; return GRPNAME; }
-; (and additional yylval.groupdata[#-1]=0xparm#; if needed)
-;
-; KEY
-;
-; !Grp Operands OpSize Opcode EffAddr Imm CPU
-; Inst Operands OpSize Opcode EffAddr Imm CPU
-; Inst!Grp Parameters CPU @0 CPU @1
-;
-; Groupings used throughout
-;
-; One byte opcode instructions with no operands:
-!onebyte nil $0.1 $0.2 nil nil @0
-; Two byte opcode instructions with no operands:
-!twobyte nil nil $0.1,$0.2 nil nil @0
-; Three byte opcode instructions with no operands:
-!threebyte nil nil $0.1,$0.2,$0.3 nil nil @0
-; One byte opcode instructions with general memory operand:
-!onebytemem mem nil $0.1 $1,$0.2 nil @0
-; Two byte opcode instructions with general memory operand:
-!twobytemem mem nil $0.1,$0.2 $1,$0.3 nil @0
-;
-; Move instructions
-;
-; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89).
-mov reg8,reg8 nil 88 $1r,$2 nil 8086
-mov reg16,reg16 16 89 $1r,$2 nil 8086
-mov reg32,reg32 32 89 $1r,$2 nil 386
-mov mem,reg8 nil 88 $1,$2 nil 8086
-mov mem8x,reg8 nil 88 $1,$2 nil 8086
-mov mem,reg16 16 89 $1,$2 nil 8086
-mov mem16x,reg16 16 89 $1,$2 nil 8086
-mov mem,reg32 32 89 $1,$2 nil 386
-mov mem32x,reg32 32 89 $1,$2 nil 386
-mov reg8,mem8 nil 8A $2,$1 nil 8086
-mov reg16,mem16 16 8B $2,$1 nil 8086
-mov reg32,mem32 32 8B $2,$1 nil 386
-mov mem,segreg nil 8C $1,$2 nil 8086
-mov reg16,segreg 16 8C $1r,$2 nil 8086
-mov mem16x,segreg 16 8C $1,$2 nil 8086
-mov reg32,segreg 32 8C $1r,$2 nil 386
-mov mem32x,segreg 32 8C $1,$2 nil 386
-mov segreg,mem nil 8E $2,$1 nil 8086
-mov segreg,rm16x nil 8E $2,$1 nil 8086
-mov segreg,rm32x nil 8E $2,$1 nil 386
-;mov reg_al,memoff8
-;mov reg_ax,memoff16
-;mov reg_eax,memoff32
-;mov memoff8,reg_al
-;mov memoff16,reg_ax
-;mov memoff32,reg_eax
-mov reg8,imm8 nil B0+$1 nil $2,8 8086
-mov reg16,imm16 16 B8+$1 nil $2,16 8086
-mov reg32,imm32 32 B8+$1 nil $2,32 386
-mov mem8x,imm8 nil C6 $1,0 $2,8 8086
-mov mem,imm8x nil C6 $1,0 $2,8 8086
-mov mem16x,imm16 16 C7 $1,0 $2,16 8086
-mov mem,imm16x 16 C7 $1,0 $2,16 8086
-mov mem32x,imm32 32 C7 $1,0 $2,32 8086
-mov mem,imm32x 32 C7 $1,0 $2,32 8086
-mov CRREG_NOTCR4,reg32 nil 0F,22 $2r,$1 nil 386,PRIV
-mov CR4,reg32 nil 0F,22 $2r,$1 nil P5,PRIV
-mov reg32,CRREG_NOTCR4 nil 0F,20 $1r,$2 nil 386,PRIV
-mov reg32,CR4 nil 0F,20 $1r,$2 nil P5,PRIV
-mov reg32,DRREG nil 0F,21 $1r,$2 nil 386,PRIV
-mov DRREG,reg32 nil 0F,23 $2r,$1 nil 386,PRIV
-;
-; Move with sign/zero extend
-;
-!movszx reg16,rm8 16 0F,$0.1 $2,$1 nil 386
-!movszx reg32,rm8x 32 0F,$0.1 $2,$1 nil 386
-!movszx reg32,rm16x nil 0F,$0.1+1 $2,$1 nil 386
-movsx!movszx BE
-movzx!movszx B6
-;
-; Push instructions
-;
-push mem16x 16 FF $1,6 nil 8086
-push mem32x 32 FF $1,6 nil 386
-push reg16 16 50+$1 nil nil 8086
-push reg32 32 50+$1 nil nil 386
-push imm8x nil 6A nil $1,8 8086
-push imm16x 16 68 nil $1,16 8086
-push imm32x 32 68 nil $1,32 386
-push reg_cs nil 0E nil nil 8086
-push reg_ss nil 16 nil nil 8086
-push reg_ds nil 1E nil nil 8086
-push reg_es nil 06 nil nil 8086
-push reg_fs nil 0F,A0 nil nil 386
-push reg_gs nil 0F,A8 nil nil 386
-pusha!onebyte nil,60 186
-pushad!onebyte 20,60 386
-pushaw!onebyte 10,60 186
-;
-; Pop instructions
-;
-pop mem16x 16 8F $1,0 nil 8086
-pop mem32x 32 8F $1,0 nil 386
-pop reg16 16 58+$1 nil nil 8086
-pop reg32 32 58+$1 nil nil 386
-pop reg_ds nil 1F nil nil 8086
-pop reg_es nil 07 nil nil 8086
-pop reg_ss nil 17 nil nil 8086
-pop reg_fs nil 0F,A1 nil nil 386
-pop reg_gs nil 0F,A9 nil nil 386
-popa!onebyte nil,61 186
-popad!onebyte 20,61 386
-popaw!onebyte 10,61 186
-;
-; Exchange instructions
-;
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg8,reg8 nil 86 $1r,$2 nil 8086
-xchg mem,reg8 nil 86 $1,$2 nil 8086
-xchg mem8x,reg8 nil 86 $1,$2 nil 8086
-xchg reg8,mem8 nil 86 $2,$1 nil 8086
-xchg reg_ax,reg16 16 90+$2 nil nil 8086
-xchg reg16,reg_ax 16 90+$1 nil nil 8086
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg16,reg16 16 87 $1r,$2 nil 8086
-xchg mem,reg16 16 87 $1,$2 nil 8086
-xchg mem16x,reg16 16 87 $1,$2 nil 8086
-xchg reg16,mem16 16 87 $2,$1 nil 8086
-xchg reg_eax,reg32 32 90+$2 nil nil 386
-xchg reg32,reg_eax 32 90+$1 nil nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-xchg reg32,reg32 32 87 $1r,$2 nil 386
-xchg mem,reg32 32 87 $1,$2 nil 386
-xchg mem32x,reg32 32 87 $1,$2 nil 386
-xchg reg32,mem32 32 87 $2,$1 nil 386
-;
-; In/out from ports
-;
-in reg_al,imm8 nil E4 nil $2,8 8086
-in reg_ax,imm8 16 E5 nil $2,8 8086
-in reg_eax,imm8 32 E5 nil $2,8 386
-in reg_al,reg_dx nil EC nil nil 8086
-in reg_ax,reg_dx 16 ED nil nil 8086
-in reg_eax,reg_dx 32 ED nil nil 386
-out imm8,reg_al nil E6 nil $1,8 8086
-out imm8,reg_ax 16 E7 nil $1,8 8086
-out imm8,reg_eax 32 E7 nil $1,8 386
-out reg_dx,reg_al nil EE nil nil 8086
-out reg_dx,reg_ax 16 EF nil nil 8086
-out reg_dx,reg_eax 32 EF nil nil 386
-;
-; Load effective address
-;
-lea reg16,mem16 16 8D $2,$1 nil 8086
-lea reg32,mem32 32 8D $2,$1 nil 386
-;
-; Load segment registers from memory
-;
-lds reg16,mem 16 C5 $2,$1 nil 8086
-lds reg32,mem 32 C5 $2,$1 nil 386
-les reg16,mem 16 C4 $2,$1 nil 8086
-les reg32,mem 32 C4 $2,$1 nil 386
-lfs reg16,mem 16 0F,B4 $2,$1 nil 386
-lfs reg32,mem 32 0F,B4 $2,$1 nil 386
-lgs reg16,mem 16 0F,B5 $2,$1 nil 386
-lgs reg32,mem 32 0F,B5 $2,$1 nil 386
-lss reg16,mem 16 0F,B2 $2,$1 nil 386
-lss reg32,mem 32 0F,B2 $2,$1 nil 386
-;
-; Flags register instructions
-;
-clc!onebyte nil,F8 8086
-cld!onebyte nil,FC 8086
-cli!onebyte nil,FA 8086
-clts!twobyte 0F,06 286,PRIV
-cmc!onebyte nil,F5 8086
-lahf!onebyte nil,9F 8086
-sahf!onebyte nil,9E 8086
-pushf!onebyte nil,9C 8086
-pushfd!onebyte 20,9C 386
-pushfw!onebyte 10,9C 8086
-popf!onebyte nil,9D 8086
-popfd!onebyte 20,9D 386
-popfw!onebyte 10,9D 8086
-stc!onebyte nil,F9 8086
-std!onebyte nil,FD 8086
-sti!onebyte nil,FB 8086
-;
-; Arithmetic
-;
-; General arithmetic
-!arith reg_al,imm8 nil $0.1+4 nil $2,8 8086
-!arith reg_ax,imm16 16 $0.1+5 nil $2,16 8086
-!arith reg_eax,imm32 32 $0.1+5 nil $2,32 386
-!arith reg8,imm8 nil 80 $1r,$0.2 $2,8 8086
-!arith mem8x,imm nil 80 $1,$0.2 $2,8 8086
-!arith mem,imm8x nil 80 $1,$0.2 $2,8 8086
-!arith reg16,imm 16 81 $1r,$0.2 $2,16 8086
-!arith mem16x,imm 16 81 $1,$0.2 $2,16 8086
-!arith reg16,imm16x 16 81 $1r,$0.2 $2,16 8086
-!arith mem,imm16x 16 81 $1,$0.2 $2,16 8086
-!arith reg32,imm 32 81 $1r,$0.2 $2,32 386
-!arith mem32x,imm 32 81 $1,$0.2 $2,32 386
-!arith reg32,imm32x 32 81 $1r,$0.2 $2,32 386
-!arith mem,imm32x 32 81 $1,$0.2 $2,32 386
-!arith reg16,imm8x 16 83 $1r,$0.2 $2,8s 8086
-!arith mem16x,imm8x 16 83 $1,$0.2 $2,8s 8086
-!arith reg32,imm8x 32 83 $1r,$0.2 $2,8s 386
-!arith mem32x,imm8x 32 83 $1,$0.2 $2,8s 386
-; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1).
-!arith reg8,reg8 nil $0.1 $1r,$2 nil 8086
-!arith reg16,reg16 16 $0.1+1 $1r,$2 nil 8086
-!arith reg32,reg32 32 $0.1+1 $1r,$2 nil 386
-!arith mem,reg8 nil $0.1 $1,$2 nil 8086
-!arith mem8x,reg8 nil $0.1 $1,$2 nil 8086
-!arith mem,reg16 16 $0.1+1 $1,$2 nil 8086
-!arith mem16x,reg16 16 $0.1+1 $1,$2 nil 8086
-!arith mem,reg32 32 $0.1+1 $1,$2 nil 386
-!arith mem32x,reg32 32 $0.1+1 $1,$2 nil 386
-!arith reg8,mem8 nil $0.1+2 $2,$1 nil 8086
-!arith reg16,mem16 16 $0.1+3 $2,$1 nil 8086
-!arith reg32,mem32 32 $0.1+3 $2,$1 nil 386
-; INC/DEC
-!incdec rm8x nil FE $1,$0.1 nil 8086
-!incdec mem16x 16 FF $1,$0.1 nil 8086
-!incdec mem32x 32 FF $1,$0.1 nil 386
-!incdec reg16 16 $0.2+$1 nil nil 8086
-!incdec reg32 32 $0.2+$1 nil nil 386
-; "F6" opcodes (DIV/IDIV/MUL/NEG/NOT):
-!groupf6 rm8x nil F6 $1,$0.1 nil 8086
-!groupf6 rm16x 16 F7 $1,$0.1 nil 8086
-!groupf6 rm32x 32 F7 $1,$0.1 nil 386
-add!arith 00,0
-inc!incdec 0,40
-sub!arith 28,5
-dec!incdec 1,48
-sbb!arith 18,3
-cmp!arith 38,7
-test reg_al,imm8 nil A8 nil $2,8 8086
-test reg_ax,imm16 16 A9 nil $2,16 8086
-test reg_eax,imm32 32 A9 nil $2,32 386
-test reg8,imm8 nil F6 $1r,0 $2,8 8086
-test mem8x,imm nil F6 $1,0 $2,8 8086
-test mem,imm8x nil F6 $1,0 $2,8 8086
-test reg16,imm16 16 F7 $1r,0 $2,16 8086
-test mem16x,imm 16 F7 $1,0 $2,16 8086
-test mem,imm16x 16 F7 $1,0 $2,16 8086
-test reg32,imm32 32 F7 $1r,0 $2,32 386
-test mem32x,imm 32 F7 $1,0 $2,32 386
-test mem,imm32x 32 F7 $1,0 $2,32 386
-; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1
-test reg8,reg8 nil 84 $1r,$2 nil 8086
-test reg16,reg16 16 85 $1r,$2 nil 8086
-test reg32,reg32 32 85 $1r,$2 nil 386
-test mem,reg8 nil 84 $1,$2 nil 8086
-test mem8x,reg8 nil 84 $1,$2 nil 8086
-test mem,reg16 16 85 $1,$2 nil 8086
-test mem16x,reg16 16 85 $1,$2 nil 8086
-test mem,reg32 32 85 $1,$2 nil 386
-test mem32x,reg32 32 85 $1,$2 nil 386
-test reg8,mem8 nil 84 $2,$1 nil 8086
-test reg16,mem16 16 85 $2,$1 nil 8086
-test reg32,mem32 32 85 $2,$1 nil 386
-and!arith 20,4
-or!arith 08,1
-xor!arith 30,6
-adc!arith 10,2
-neg!groupf6 3
-not!groupf6 2
-aaa!onebyte nil,37 8086
-aas!onebyte nil,3F 8086
-daa!onebyte nil,27 8086
-das!onebyte nil,2F 8086
-aad nil nil D5,0A nil nil 8086
-aad imm8 nil D5 nil $1,8 8086
-aam nil nil D4,0A nil nil 8086
-aam imm8 nil D4 nil $1,8 8086
-;
-; Conversion instructions
-;
-cbw!onebyte 10,98 8086
-cwde!onebyte 20,98 386
-cwd!onebyte 10,99 8086
-cdq!onebyte 20,99 386
-;
-; Multiplication and division
-;
-mul!groupf6 4
-imul rm8x nil F6 $1,5 nil 8086
-imul rm16x 16 F7 $1,5 nil 8086
-imul rm32x 32 F7 $1,5 nil 386
-imul reg16,rm16 16 0F,AF $2,$1 nil 386
-imul reg32,rm32 32 0F,AF $2,$1 nil 386
-imul reg16,rm16,imm8x 16 6B $2,$1 $3,8s 186
-imul reg32,rm32,imm8x 32 6B $2,$1 $3,8s 386
-imul reg16,imm8x 16 6B $1r,$1 $2,8s 186
-imul reg32,imm8x 32 6B $1r,$1 $2,8s 386
-imul reg16,rm16,imm16 16 69 $2,$1 $3,16s 186
-imul reg32,rm32,imm32 32 69 $2,$1 $3,32s 386
-imul reg16,imm16 16 69 $1r,$1 $2,16s 186
-imul reg32,imm32 32 69 $1r,$1 $2,32s 386
-div!groupf6 6
-idiv!groupf6 7
-;
-; Shifts
-;
-; Standard
-!shift rm8x,ONE nil D0 $1,$0.1 nil 8086
-!shift rm8x,reg_cl nil D2 $1,$0.1 nil 8086
-!shift rm8x,imm8 nil C0 $1,$0.1 $2,8 186
-!shift rm16x,ONE 16 D1 $1,$0.1 nil 8086
-!shift rm16x,reg_cl 16 D3 $1,$0.1 nil 8086
-!shift rm16x,imm8 16 C1 $1,$0.1 $2,8 186
-!shift rm32x,ONE 32 D1 $1,$0.1 nil 386
-!shift rm32x,reg_cl 32 D3 $1,$0.1 nil 386
-!shift rm32x,imm8 32 C1 $1,$0.1 $2,8 386
-; Doubleword
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,imm8 16 0F,$0.1 $1r,$2 $3,8 386
-!shlrd mem,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386
-!shlrd mem16x,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg16,reg16,reg_cl 16 0F,$0.1+1 $1r,$2 nil 386
-!shlrd mem,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386
-!shlrd mem16x,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,imm8 32 0F,$0.1 $1r,$2 $3,8 386
-!shlrd mem,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386
-!shlrd mem32x,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!shlrd reg32,reg32,reg_cl 32 0F,$0.1+1 $1r,$2 nil 386
-!shlrd mem,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386
-!shlrd mem32x,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386
-rol!shift 0
-ror!shift 1
-rcl!shift 2
-rcr!shift 3
-sal!shift 4
-shl!shift 4
-shr!shift 5
-sar!shift 7
-shld!shlrd A4
-shrd!shlrd AC
-;
-; Control transfer instructions (unconditional)
-;
-; Special format for relative targets:
-; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU
-;
-!jmpcall target nil $0.1?$0.2 $0.3 8086 8086
-!jmpcall imm:imm nil $0.4 $2i,nil $1,16 8086
-!jmpcall WORD imm:imm 16 $0.4 $2i,16 $1,16 8086
-!jmpcall DWORD imm:imm 32 $0.4 $2i,32 $1,16 386
-!jmpcall memfar nil FF $1,$0.4+1 nil 8086
-!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086
-!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386
-!jmpcall mem nil FF $1,$0.4 nil 8086
-!jmpcall rm16x 16 FF $1,$0.4 nil 8086
-!jmpcall rm32x 32 FF $1,$0.4 nil 386
-call!jmpcall nil,0,E8,9A,2
-jmp!jmpcall 1,EB,E9,EA,4
-ret!onebyte nil,C3 8086
-retn nil nil C3 nil nil 8086
-retf nil nil CB nil nil 8086
-retn imm16 nil C2 nil $1,16 8086
-retf imm16 nil CA nil $1,16 8086
-enter imm16,imm8 nil C8 $1i,16 $2,8 186
-leave!onebyte nil,C9 186
-;
-; Conditional jumps
-;
-!jcc target nil 70+$0.1 0F,80+$0.1 8086 386
-jo!jcc 0
-jno!jcc 1
-jb!jcc 2
-jc!jcc 2
-jnae!jcc 2
-jnb!jcc 3
-jnc!jcc 3
-jae!jcc 3
-je!jcc 4
-jz!jcc 4
-jne!jcc 5
-jnz!jcc 5
-jbe!jcc 6
-jna!jcc 6
-jnbe!jcc 7
-ja!jcc 7
-js!jcc 8
-jns!jcc 9
-jp!jcc A
-jpe!jcc A
-jnp!jcc B
-jpo!jcc B
-jl!jcc C
-jnge!jcc C
-jnl!jcc D
-jge!jcc D
-jle!jcc E
-jng!jcc E
-jnle!jcc F
-jg!jcc F
-jcxz target 16 E3 nil 8086 8086
-jecxz target 32 E3 nil 386 386
-;
-; Loop instructions
-;
-!loopg target nil E0+$0.1 nil 8086 8086
-!loopg target,reg_cx 16 E0+$0.1 nil 8086 8086
-!loopg target,reg_ecx 32 E0+$0.1 nil 386 386
-loop!loopg 2
-loopz!loopg 1
-loope!loopg 1
-loopnz!loopg 0
-loopne!loopg 0
-;
-; Set byte on flag instructions
-;
-!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386
-seto!setcc 0
-setno!setcc 1
-setb!setcc 2
-setc!setcc 2
-setnae!setcc 2
-setnb!setcc 3
-setnc!setcc 3
-setae!setcc 3
-sete!setcc 4
-setz!setcc 4
-setne!setcc 5
-setnz!setcc 5
-setbe!setcc 6
-setna!setcc 6
-setnbe!setcc 7
-seta!setcc 7
-sets!setcc 8
-setns!setcc 9
-setp!setcc A
-setpe!setcc A
-setnp!setcc B
-setpo!setcc B
-setl!setcc C
-setnge!setcc C
-setnl!setcc D
-setge!setcc D
-setle!setcc E
-setng!setcc E
-setnle!setcc F
-setg!setcc F
-;
-; String instructions
-;
-; NOTE: cmpsd,movsd can't go to !onebyte group because of other variations
-cmpsb!onebyte nil,A6 8086
-cmpsw!onebyte 10,A7 8086
-cmpsd nil 32 A7 nil nil 386
-insb!onebyte nil,6C 8086
-insw!onebyte 10,6D 8086
-insd!onebyte 20,6D 386
-outsb!onebyte nil,6E 8086
-outsw!onebyte 10,6F 8086
-outsd!onebyte 20,6F 386
-lodsb!onebyte nil,AC 8086
-lodsw!onebyte 10,AD 8086
-lodsd!onebyte 20,AD 386
-movsb!onebyte nil,A4 8086
-movsw!onebyte 10,A5 8086
-movsd nil 32 A5 nil nil 386
-scasb!onebyte nil,AE 8086
-scasw!onebyte 10,AF 8086
-scasd!onebyte 20,AF 386
-stosb!onebyte nil,AA 8086
-stosw!onebyte 10,AB 8086
-stosd!onebyte 20,AB 386
-xlat!onebyte nil,D7 8086
-xlatb!onebyte nil,D7 8086
-;
-; Bit manipulation
-;
-; Bit tests
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest reg16,reg16 16 0F,$0.1 $1r,$2 nil 386
-!bittest mem,reg16 16 0F,$0.1 $1,$2 nil 386
-!bittest mem16x,reg16 16 0F,$0.1 $1,$2 nil 386
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!bittest reg32,reg32 32 0F,$0.1 $1r,$2 nil 386
-!bittest mem,reg32 32 0F,$0.1 $1,$2 nil 386
-!bittest mem32x,reg32 32 0F,$0.1 $1,$2 nil 386
-!bittest reg16,imm8 16 0F,BA $1r,$0.2 $2,8 386
-!bittest mem16x,imm8 16 0F,BA $1,$0.2 $2,8 386
-!bittest reg32,imm8 32 0F,BA $1r,$0.2 $2,8 386
-!bittest mem32x,imm8 32 0F,BA $1,$0.2 $2,8 386
-; Bit scans
-!bsfr reg16,rm16 16 0F,BC+$0.1 $2,$1 nil 386
-!bsfr reg32,rm32 32 0F,BC+$0.1 $2,$1 nil 386
-bsf!bsfr 0
-bsr!bsfr 1
-bt!bittest A3,4
-btc!bittest BB,7
-btr!bittest B3,6
-bts!bittest AB,5
-;
-; Interrupts and operating system instructions
-;
-int imm8 nil CD nil $1,8 8086
-int3!onebyte nil,CC 8086
-int03!onebyte nil,CC 8086
-into!onebyte nil,CE 8086
-iret!onebyte nil,CF 8086
-iretw!onebyte 10,CF 8086
-iretd!onebyte 20,CF 386
-rsm!twobyte 0F,AA P5,SMM
-bound reg16,mem16 16 62 $2,$1 nil 186
-bound reg32,mem32 32 62 $2,$1 nil 386
-hlt!onebyte nil,F4 8086,PRIV
-nop!onebyte nil,90 8086
-;
-; Protection control
-;
-; 286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW):
-!prot286 rm16 nil 0F,00 $1,$0.1 nil 286,PROT,@0
-arpl rm16,reg16 nil 63 $1,$2 nil 286,PROT
-lar reg16,rm16 16 0F,02 $2,$1 nil 286,PROT
-lar reg32,rm32 32 0F,02 $2,$1 nil 386,PROT
-lgdt!twobytemem 0F,01,2 286,PRIV
-lidt!twobytemem 0F,01,3 286,PRIV
-lldt!prot286 2 PRIV
-lmsw rm16 nil 0F,01 $1,6 nil 286,PRIV
-lsl reg16,rm16 16 0F,03 $2,$1 nil 286,PROT
-lsl reg32,rm32 32 0F,03 $2,$1 nil 286,PROT
-ltr!prot286 3 PRIV
-sgdt!twobytemem 0F,01,0 286
-sidt!twobytemem 0F,01,1 286
-sldt mem1632 nil 0F,00 $1,0 nil 286
-sldt reg16 16 0F,00 $1r,0 nil 286
-sldt reg32 32 0F,00 $1r,0 nil 386
-smsw mem1632 nil 0F,01 $1,4 nil 286
-smsw reg16 16 0F,01 $1r,4 nil 286
-smsw reg32 32 0F,01 $1r,4 nil 386
-str!prot286 1
-verr!prot286 4
-verw!prot286 5
-;
-; Floating point instructions
-;
-; Load
-fld mem32x nil D9 $1,0 nil 8086,FPU
-fld mem64x nil DD $1,0 nil 8086,FPU
-fld mem80x nil DB $1,5 nil 8086,FPU
-fld fpureg nil D9,C0+$1 nil nil 8086,FPU
-fild mem16x nil DF $1,0 nil 8086,FPU
-fild mem32x nil DB $1,0 nil 8086,FPU
-fild mem64x nil DF $1,5 nil 8086,FPU
-fbld mem80 nil DF $1,4 nil 8086,FPU
-; Store
-fst mem32x nil D9 $1,2 nil 8086,FPU
-fst mem64x nil DD $1,2 nil 8086,FPU
-fst fpureg nil DD,D0+$1 nil nil 8086,FPU
-fist mem16x nil DF $1,2 nil 8086,FPU
-fist mem32x nil DB $1,2 nil 8086,FPU
-; Store (with pop)
-fstp mem32x nil D9 $1,3 nil 8086,FPU
-fstp mem64x nil DD $1,3 nil 8086,FPU
-fstp mem80x nil DB $1,7 nil 8086,FPU
-fstp fpureg nil DD,D8+$1 nil nil 8086,FPU
-fistp mem16x nil DF $1,3 nil 8086,FPU
-fistp mem32x nil DB $1,3 nil 8086,FPU
-fistp mem64x nil DF $1,7 nil 8086,FPU
-fbstp mem80 nil DF $1,6 nil 8086,FPU
-; Exchange (with ST0)
-fxch fpureg nil D9,C8+$1 nil nil 8086,FPU
-fxch ST0,ST0 nil D9,C8 nil nil 8086,FPU
-fxch ST0,FPUREG_NOTST0 nil D9,C8+$2 nil nil 8086,FPU
-fxch FPUREG_NOTST0,ST0 nil D9,C8+$1 nil nil 8086,FPU
-fxch nil nil D9,C9 nil nil 8086,FPU
-; Comparisons
-!fcomg mem32x nil D8 $1,$0.1 nil 8086,FPU
-!fcomg mem64x nil DC $1,$0.1 nil 8086,FPU
-!fcomg fpureg nil D8,$0.2+$1 nil nil 8086,FPU
-!fcomg ST0,fpureg nil D8,$0.2+$2 nil nil 8086,FPU
-; Extended comparisons
-!fcomg2 fpureg nil $0.1,$0.2+$1 nil nil @0,FPU
-!fcomg2 ST0,fpureg nil $0.1,$0.2+$2 nil nil @0,FPU
-; Comparison (without pop)
-fcom!fcomg 2,D0
-ficom mem16x nil DE $1,2 nil 8086,FPU
-ficom mem32x nil DA $1,2 nil 8086,FPU
-; Comparison (with pop)
-fcomp!fcomg 3,D8
-ficomp mem16x nil DE $1,3 nil 8086,FPU
-ficomp mem32x nil DA $1,3 nil 8086,FPU
-fcompp!twobyte DE,D9 8086,FPU
-; Unordered comparison (with pop)
-fucom!fcomg2 DD,E0 286,FPU
-fucomp!fcomg2 DD,E8 286,FPU
-fucompp!twobyte DA,E9 286,FPU
-ftst!twobyte D9,E4 8086,FPU
-fxam!twobyte D9,E5 8086,FPU
-; Load constants into ST0
-fld1!twobyte D9,E8 8086,FPU
-fldl2t!twobyte D9,E9 8086,FPU
-fldl2e!twobyte D9,EA 8086,FPU
-fldpi!twobyte D9,EB 8086,FPU
-fldlg2!twobyte D9,EC 8086,FPU
-fldln2!twobyte D9,ED 8086,FPU
-fldz!twobyte D9,EE 8086,FPU
-; Arithmetic
-!farith mem32x nil D8 $1,$0.1 nil 8086,FPU
-!farith mem64x nil DC $1,$0.1 nil 8086,FPU
-!farith fpureg nil D8,$0.2+$1 nil nil 8086,FPU
-!farith ST0,ST0 nil D8,$0.2 nil nil 8086,FPU
-!farith ST0,FPUREG_NOTST0 nil D8,$0.2+$2 nil nil 8086,FPU
-!farith TO fpureg nil DC,$0.3+$1 nil nil 8086,FPU
-!farith FPUREG_NOTST0,ST0 nil DC,$0.3+$1 nil nil 8086,FPU
-!farithp fpureg nil DE,$0.1+$1 nil nil 8086,FPU
-!farithp fpureg,ST0 nil DE,$0.1+$1 nil nil 8086,FPU
-!fiarith mem32x nil DA $1,$0.1 nil 8086,FPU
-!fiarith mem16x nil DE $1,$0.1 nil 8086,FPU
-fadd!farith 0,C0,C0
-faddp!farithp C0
-fiadd!fiarith 0
-fsub!farith 4,E0,E8
-fisub!fiarith 4
-fsubp!farithp E8
-fsubr!farith 5,E8,E0
-fisubr!fiarith 5
-fsubrp!farithp E0
-; Multiply
-fmul!farith 1,C8,C8
-fimul!fiarith 1
-fmulp!farithp C8
-; Divide
-fdiv!farith 6,F0,F8
-fidiv!fiarith 6
-fdivp!farithp F8
-fdivr!farith 7,F8,F0
-fidivr!fiarith 7
-fdivrp!farithp F0
-; Other arithmetic
-f2xm1!twobyte D9,F0 8086,FPU
-fyl2x!twobyte D9,F1 8086,FPU
-fptan!twobyte D9,F2 8086,FPU
-fpatan!twobyte D9,F3 8086,FPU
-fxtract!twobyte D9,F4 8086,FPU
-fprem1!twobyte D9,F5 286,FPU
-fdecstp!twobyte D9,F6 8086,FPU
-fincstp!twobyte D9,F7 8086,FPU
-fprem!twobyte D9,F8 8086,FPU
-fyl2xp1!twobyte D9,F9 8086,FPU
-fsqrt!twobyte D9,FA 8086,FPU
-fsincos!twobyte D9,FB 286,FPU
-frndint!twobyte D9,FC 8086,FPU
-fscale!twobyte D9,FD 8086,FPU
-fsin!twobyte D9,FE 286,FPU
-fcos!twobyte D9,FF 286,FPU
-fchs!twobyte D9,E0 8086,FPU
-fabs!twobyte D9,E1 8086,FPU
-; Processor control
-fninit!twobyte DB,E3 8086,FPU
-finit!threebyte 9B,DB,E3 8086,FPU
-fldcw mem16 nil D9 $1,5 nil 8086,FPU
-fnstcw mem16 nil D9 $1,7 nil 8086,FPU
-fstcw mem16 nil 9B,D9 $1,7 nil 8086,FPU
-fnstsw mem16 nil DD $1,7 nil 8086,FPU
-fnstsw reg_ax nil DF,E0 nil nil 8086,FPU
-fstsw mem16 nil 9B,DD $1,7 nil 8086,FPU
-fstsw reg_ax nil 9B,DF,E0 nil nil 8086,FPU
-fnclex!twobyte DB,E2 8086,FPU
-fclex!threebyte 9B,DB,E2 8086,FPU
-fnstenv!onebytemem D9,6 8086,FPU
-fstenv!twobytemem 9B,D9,6 8086,FPU
-fldenv!onebytemem D9,4 8086,FPU
-fnsave!onebytemem DD,6 8086,FPU
-fsave!twobytemem 9B,DD,6 8086,FPU
-frstor!onebytemem DD,4 8086,FPU
-ffree fpureg nil DD,C0+$1 nil nil 8086,FPU
-ffreep fpureg nil DF,C0+$1 nil nil P6,FPU,UNDOC
-fnop!twobyte D9,D0 8086,FPU
-fwait!onebyte nil,9B 8086,FPU
-;
-; Prefixes (should the others be here too? should wait be a prefix?)
-;
-wait!onebyte nil,9B 8086
-;
-; 486 extensions
-;
-; Compare & exchange, exchange & add
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg8,reg8 nil 0F,$0.1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg8 nil 0F,$0.1 $1,$2 nil @0
-!cmpxchgxadd mem8x,reg8 nil 0F,$0.1 $1,$2 nil @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg16,reg16 16 0F,$0.1+1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg16 16 0F,$0.1+1 $1,$2 nil @0
-!cmpxchgxadd mem16x,reg16 16 0F,$0.1+1 $1,$2 nil @0
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-!cmpxchgxadd reg32,reg32 32 0F,$0.1+1 $1r,$2 nil @0
-!cmpxchgxadd mem,reg32 32 0F,$0.1+1 $1,$2 nil @0
-!cmpxchgxadd mem32x,reg32 32 0F,$0.1+1 $1,$2 nil @0
-bswap reg32 32 0F,C8+$1 nil nil 486
-xadd!cmpxchgxadd C0 486
-cmpxchg!cmpxchgxadd B0 486
-cmpxchg486!cmpxchgxadd A6 486,UNDOC
-invd!twobyte 0F,08 486,PRIV
-wbinvd!twobyte 0F,09 486,PRIV
-invlpg!twobytemem 0F,01,7 486,PRIV
-;
-; 586 and late 486 extensions
-;
-cpuid!twobyte 0F,A2 486
-;
-; Pentium extensions
-;
-wrmsr!twobyte 0F,30 P5,PRIV
-rdtsc!twobyte 0F,31 P5
-rdmsr!twobyte 0F,32 P5,PRIV
-cmpxchg8b mem64 nil 0F,C7 $1,1 nil P5
-;
-; Pentium II/Pentium Pro extensions
-;
-sysenter!twobyte 0F,34 P6
-sysexit!twobyte 0F,35 P6,PRIV
-fxsave!twobytemem 0F,AE,0 P6,FPU
-fxrstor!twobytemem 0F,AE,1 P6,FPU
-rdpmc!twobyte 0F,33 P6
-ud2!twobyte 0F,0B 286
-ud1!twobyte 0F,B9 286,UNDOC
-; cmov
-; fcmov
-fcomi!fcomg2 DB,F0 P6
-fucomi!fcomg2 DB,E8 P6
-fcomip!fcomg2 DF,F0 P6
-fucomip!fcomg2 DF,E8 P6
-;
-; Pentium4 extensions
-;
-movnti mem32,reg32 nil 0F,C3 $1,$2 nil P4
-clflush mem8 nil 0F,AE $1,7 nil KATMAI
-lfence!threebyte 0F,AE,E8 KATMAI
-mfence!threebyte 0F,AE,F0 KATMAI
-pause!twobyte F3,90 P4
-;
-; MMX/SSE2 instructions
-;
-; General
-!mmxsse MMXREG,rm64 nil 0F,$0.1 $2,$1 nil @0,MMX
-!mmxsse XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil @1
-; Shifts
-!pshift MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX
-!pshift XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2
-!pshift MMXREG,imm8 nil 0F,$0.2 $1r,$0.3 $2,8 P5,MMX
-!pshift XMMREG,imm8 nil 66,0F,$0.2 $1r,$0.3 $2,8 P4,SSE2
-emms!twobyte 0F,77 P5,MMX
-movd MMXREG,rm32 nil 0F,6E $2,$1 nil P5,MMX
-movd rm32,MMXREG nil 0F,7E $1,$2 nil P5,MMX
-movd XMMREG,rm32 nil 66,0F,6E $2,$1 nil P4,SSE2
-movd rm32,XMMREG nil 66,0F,7E $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq MMXREG,MMXREG nil 0F,6F $2r,$1 nil P5,MMX
-movq MMXREG,mem64 nil 0F,6F $2,$1 nil P5,MMX
-movq mem64,MMXREG nil 0F,7F $1,$2 nil P5,MMX
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movq XMMREG,XMMREG nil F3,0F,7E $2r,$1 nil P4,SSE2
-movq XMMREG,mem64 nil F3,0F,7E $2,$1 nil P4,SSE2
-movq mem64,XMMREG nil 66,0F,D6 $1,$2 nil P4,SSE2
-packssdw!mmxsse 6B P5 P4,SSE2
-packsswb!mmxsse 63 P5 P4,SSE2
-packuswb!mmxsse 67 P5 P4,SSE2
-paddb!mmxsse FC P5 P4,SSE2
-paddw!mmxsse FD P5 P4,SSE2
-paddd!mmxsse FE P5 P4,SSE2
-paddq!mmxsse D4 P5 P4,SSE2
-paddsb!mmxsse EC P5 P4,SSE2
-paddsw!mmxsse ED P5 P4,SSE2
-paddusb!mmxsse DC P5 P4,SSE2
-paddusw!mmxsse DD P5 P4,SSE2
-pand!mmxsse DB P5 P4,SSE2
-pandn!mmxsse DF P5 P4,SSE2
-pcmpeqb!mmxsse 74 P5 P4,SSE2
-pcmpeqw!mmxsse 75 P5 P4,SSE2
-pcmpeqd!mmxsse 76 P5 P4,SSE2
-pcmpgtb!mmxsse 64 P5 P4,SSE2
-pcmpgtw!mmxsse 65 P5 P4,SSE2
-pcmpgtd!mmxsse 66 P5 P4,SSE2
-pmaddwd!mmxsse F5 P5 P4,SSE2
-pmulhw!mmxsse E5 P5 P4,SSE2
-pmullw!mmxsse D5 P5 P4,SSE2
-por!mmxsse EB P5 P4,SSE2
-psllw!pshift F1,71,6
-pslld!pshift F2,72,6
-psllq!pshift F3,73,6
-psraw!pshift E1,71,4
-psrad!pshift E2,72,4
-psrlw!pshift D1,71,2
-psrld!pshift D2,72,2
-psrlq!pshift D3,73,2
-psubb MMXREG,imm8 nil 0F,F8 $1r,2 $2,8 P5,MMX
-psubb XMMREG,imm8 nil 66,0F,F8 $1r,2 $2,8 P4,SSE2
-psubw MMXREG,imm8 nil 0F,F9 $1r,2 $2,8 P5,MMX
-psubw XMMREG,imm8 nil 66,0F,F9 $1r,2 $2,8 P4,SSE2
-psubd!mmxsse FA P5 P4,SSE2
-psubq!mmxsse FB P5 P4,SSE2
-psubsb!mmxsse E8 P5 P4,SSE2
-psubsw!mmxsse E9 P5 P4,SSE2
-psubusb!mmxsse D8 P5 P4,SSE2
-psubusw!mmxsse D9 P5 P4,SSE2
-punpckhbw!mmxsse 68 P5 P4,SSE2
-punpckhwd!mmxsse 69 P5 P4,SSE2
-punpckhdq!mmxsse 6A P5 P4,SSE2
-punpcklbw!mmxsse 60 P5 P4,SSE2
-punpcklwd!mmxsse 61 P5 P4,SSE2
-punpckldq!mmxsse 62 P5 P4,SSE2
-pxor!mmxsse EF P5 P4,SSE2
-;
-; PIII (Katmai) new instructions / SIMD instructions
-;
-; Standard
-!sseps XMMREG,rm128 nil 0F,$0.1 $2,$1 nil @0
-!ssess XMMREG,rm128 nil F3,0F,$0.1 $2,$1 nil @0
-; With immediate
-!ssepsimm XMMREG,rm128,imm8 nil 0F,$0.1 $2,$1 $3,8 KATMAI,SSE
-; Comparisons
-!ssecmpps XMMREG,rm128 nil 0F,C2 $2,$1 $0.1,8 KATMAI,SSE
-!ssecmpss XMMREG,rm128 nil F3,0F,C2 $2,$1 $0.1,8 KATMAI,SSE
-addps!sseps 58 KATMAI,SSE
-addss!ssess 58 KATMAI,SSE
-andnps!sseps 55 KATMAI,SSE
-andps!sseps 54 KATMAI,SSE
-cmpeqps!ssecmpps 0
-cmpeqss!ssecmpss 0
-cmpleps!ssecmpps 2
-cmpless!ssecmpss 2
-cmpltps!ssecmpps 1
-cmpltss!ssecmpss 1
-cmpneqps!ssecmpps 4
-cmpneqss!ssecmpss 4
-cmpnleps!ssecmpps 6
-cmpnless!ssecmpss 6
-cmpnltps!ssecmpps 5
-cmpnltss!ssecmpss 5
-cmpordps!ssecmpps 7
-cmpordss!ssecmpss 7
-cmpunordps!ssecmpps 3
-cmpunordss!ssecmpss 3
-cmpps!ssepsimm C2
-cmpss XMMREG,rm128,imm8 nil F3,0F,C2 $2,$1 $3,8 KATMAI,SSE
-comiss!sseps 2F KATMAI,SSE
-cvtpi2ps!sseps 2A KATMAI,SSE
-cvtps2pi!sseps 2D KATMAI,SSE
-cvtsi2ss!ssess 2A KATMAI,SSE
-cvtss2si!ssess 2D KATMAI,SSE
-cvttps2pi!sseps 2C KATMAI,SSE
-cvttss2si!ssess 2C KATMAI,SSE
-divps!sseps 5E KATMAI,SSE
-divss!ssess 5E KATMAI,SSE
-ldmxcsr mem32 nil 0F,AE $1,2 nil KATMAI,SSE
-maskmovq MMXREG,MMXREG nil 0F,F7 $2r,$1 nil KATMAI,MMX
-maxps!sseps 5F KATMAI,SSE
-maxss!ssess 5F KATMAI,SSE
-minps!sseps 5D KATMAI,SSE
-minss!ssess 5D KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movaps XMMREG,XMMREG nil 0F,28 $2r,$1 nil KATMAI,SSE
-movaps XMMREG,mem128 nil 0F,28 $2,$1 nil KATMAI,SSE
-movaps mem128,XMMREG nil 0F,29 $1,$2 nil KATMAI,SSE
-movhlps XMMREG,XMMREG nil 0F,12 $2r,$1 nil KATMAI,SSE
-movhps XMMREG,mem64 nil 0F,16 $2,$1 nil KATMAI,SSE
-movhps mem64,XMMREG nil 0F,17 $1,$2 nil KATMAI,SSE
-movlhps XMMREG,XMMREG nil 0F,16 $2r,$1 nil KATMAI,SSE
-movlps XMMREG,mem64 nil 0F,12 $2,$1 nil KATMAI,SSE
-movlps mem64,XMMREG nil 0F,13 $1,$2 nil KATMAI,SSE
-movmskps reg32,XMMREG nil 0F,50 $1r,$2 nil KATMAI,SSE
-movntps mem128,XMMREG nil 0F,2B $1,$2 nil KATMAI,SSE
-movntq mem64,MMXREG nil 0F,E7 $1,$2 nil KATMAI,MMX
-movntdq mem128,XMMREG nil 66,0F,E7 $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movss XMMREG,XMMREG nil F3,0F,10 $2r,$1 nil KATMAI,SSE
-movss XMMREG,mem64 nil F3,0F,10 $2,$1 nil KATMAI,SSE
-movss mem64,XMMREG nil F3,0F,11 $1,$2 nil KATMAI,SSE
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movups XMMREG,XMMREG nil 0F,10 $2r,$1 nil KATMAI,SSE
-movups XMMREG,mem64 nil 0F,10 $2,$1 nil KATMAI,SSE
-movups mem64,XMMREG nil 0F,11 $1,$2 nil KATMAI,SSE
-mulps!sseps 59 KATMAI,SSE
-mulss!ssess 59 KATMAI,SSE
-orps!sseps 56 KATMAI,SSE
-pavgb!mmxsse E0 KATMAI P4,SSE2
-pavgw!mmxsse E3 KATMAI P4,SSE2
-pextrw reg32,MMXREG,imm8 nil 0F,C5 $1r,$2 $3,8 KATMAI,MMX
-pextrw reg32,XMMREG,imm8 nil 66,0F,C5 $1r,$2 $3,8 P4,SSE2
-pinsrw MMXREG,reg32,imm8 nil 0F,C4 $2r,$1 $3,8 KATMAI,MMX
-pinsrw MMXREG,rm16,imm8 nil 0F,C4 $2,$1 $3,8 KATMAI,MMX
-pinsrw XMMREG,reg32,imm8 nil 66,0F,C4 $2r,$1 $3,8 P4,SSE2
-pinsrw XMMREG,rm16,imm8 nil 66,0F,C4 $2,$1 $3,8 P4,SSE2
-pmaxsw!mmxsse EE KATMAI P4,SSE2
-pmaxub!mmxsse DE KATMAI P4,SSE2
-pminsw!mmxsse EA KATMAI P4,SSE2
-pminub!mmxsse DA KATMAI P4,SSE2
-pmovmskb reg32,MMXREG nil 0F,D7 $1r,$2 nil KATMAI,SSE
-pmovmskb reg32,XMMREG nil 66,0F,D7 $1r,$2 nil P4,SSE2
-pmulhuw!mmxsse E4 KATMAI P4,SSE2
-prefetchnta!twobytemem 0F,18,0 KATMAI
-prefetcht0!twobytemem 0F,18,1 KATMAI
-prefetcht1!twobytemem 0F,18,2 KATMAI
-prefetcht2!twobytemem 0F,18,3 KATMAI
-psadbw!mmxsse F6 KATMAI KATMAI,SSE
-pshufw MMXREG,rm64,imm8 nil 0F,70 $2,$1 $3,8 KATMAI,MMX
-rcpps!sseps 53 KATMAI,SSE
-rcpss!ssess 53 KATMAI,SSE
-rsqrtps!sseps 52 KATMAI,SSE
-rsqrtss!ssess 52 KATMAI,SSE
-sfence!threebyte 0F,AE,F8 KATMAI
-shufps!ssepsimm C6
-sqrtps!sseps 51 KATMAI,SSE
-sqrtss!ssess 51 KATMAI,SSE
-stmxcsr mem32 nil 0F,AE $1,3 nil KATMAI,SSE
-subps!sseps 5C KATMAI,SSE
-subss!ssess 5C KATMAI,SSE
-ucomiss!ssess 2E KATMAI,SSE
-unpckhps!sseps 15 KATMAI,SSE
-unpcklps!sseps 14 KATMAI,SSE
-xorps!sseps 57 KATMAI,SSE
-;
-; SSE2 instructions
-;
-; Standard
-!sse2pd XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2
-!sse2sd XMMREG,rm128 nil F2,0F,$0.1 $2,$1 nil P4,SSE2
-; With immediate
-!sse2pdimm XMMREG,rm128,imm8 nil 66,0F,$0.1 $2,$1 $3,8 P4,SSE2
-; Comparisons
-!sse2cmppd XMMREG,rm128 nil 66,0F,C2 $2,$1 $0.1,8 P4,SSE2
-!sse2cmpsd XMMREG,rm128 nil F2,0F,C2 $2,$1 $0.1,8 P4,SSE2
-addpd!sse2pd 58
-addsd!sse2sd 58
-andnpd!sse2pd 55
-andpd!sse2pd 54
-cmpeqpd!sse2cmppd 0
-cmpeqsd!sse2cmpsd 0
-cmplepd!sse2cmppd 2
-cmplesd!sse2cmpsd 2
-cmpltpd!sse2cmppd 1
-cmpltsd!sse2cmpsd 1
-cmpneqpd!sse2cmppd 4
-cmpneqsd!sse2cmpsd 4
-cmpnlepd!sse2cmppd 6
-cmpnlesd!sse2cmpsd 6
-cmpnltpd!sse2cmppd 5
-cmpnltsd!sse2cmpsd 5
-cmpordpd!sse2cmppd 7
-cmpordsd!sse2cmpsd 7
-cmpunordpd!sse2cmppd 3
-cmpunordsd!sse2cmpsd 3
-cmppd!sse2pdimm C2
-cmpsd XMMREG,rm128,imm8 nil F2,0F,C2 $2,$1 $3,8 P4,SSE2
-comisd!sse2pd 2F
-cvtpi2pd!sse2pd 2A
-cvtsi2sd!sse2sd 2A
-divpd!sse2pd 5E
-divsd!sse2sd 5E
-maxpd!sse2pd 5F
-maxsd!sse2sd 5F
-minpd!sse2pd 5D
-minsd!sse2sd 5D
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movapd XMMREG,XMMREG nil 66,0F,28 $2r,$1 nil P4,SSE2
-movapd XMMREG,mem128 nil 66,0F,28 $2,$1 nil P4,SSE2
-movapd mem128,XMMREG nil 66,0F,29 $1,$2 nil P4,SSE2
-movhpd XMMREG,mem64 nil 66,0F,16 $2,$1 nil P4,SSE2
-movhpd mem64,XMMREG nil 66,0F,17 $1,$2 nil P4,SSE2
-movlpd XMMREG,mem64 nil 66,0F,12 $2,$1 nil P4,SSE2
-movlpd mem64,XMMREG nil 66,0F,13 $1,$2 nil P4,SSE2
-movmskpd reg32,XMMREG nil 66,0F,50 $1r,$2 nil P4,SSE2
-movntpd mem128,XMMREG nil 66,0F,2B $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movsd XMMREG,XMMREG nil F2,0F,10 $2r,$1 nil P4,SSE2
-movsd XMMREG,mem64 nil F2,0F,10 $2,$1 nil P4,SSE2
-movsd mem64,XMMREG nil F2,0F,11 $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movupd XMMREG,XMMREG nil 66,0F,10 $2r,$1 nil P4,SSE2
-movupd XMMREG,mem64 nil 66,0F,10 $2,$1 nil P4,SSE2
-movupd mem64,XMMREG nil 66,0F,11 $1,$2 nil P4,SSE2
-mulpd!sse2pd 59
-mulsd!sse2sd 59
-orpd!sse2pd 56
-shufpd!sse2pdimm C6
-sqrtpd!sse2pd 51
-sqrtsd!sse2sd 51
-subpd!sse2pd 5C
-subsd!sse2sd 5C
-ucomisd!sse2sd 2E
-unpckhpd!sse2pd 15
-unpcklpd!sse2pd 14
-xorpd!sse2pd 57
-cvtdq2pd!ssess E6 P4,SSE2
-cvtpd2dq!sse2sd E6
-cvtdq2ps!sseps 5B P4,SSE2
-cvtpd2pi!sse2pd 2D
-cvtpd2ps!sse2pd 5A
-cvtps2pd!sseps 5A P4,SSE2
-cvtps2dq!sse2pd 5B
-cvtsd2si!sse2sd 2D
-cvtsd2ss!sse2sd 5A
-cvtss2sd!ssess 5A P4,SSE2
-cvttpd2pi!sse2pd 2C
-cvttsd2si!sse2sd 2C
-cvttpd2dq!sse2pd E6
-cvttps2dq!ssess 5B P4,SSE2
-maskmovdqu XMMREG,XMMREG nil 66,0F,F7 $2r,$1 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqa XMMREG,XMMREG nil 66,0F,6F $2r,$1 nil P4,SSE2
-movdqa XMMREG,mem128 nil 66,0F,6F $2,$1 nil P4,SSE2
-movdqa mem128,XMMREG nil 66,0F,7F $1,$2 nil P4,SSE2
-; arbitrary encoding, picked $2r,$1 instead of $1r,$2
-movdqu XMMREG,XMMREG nil F3,0F,6F $2r,$1 nil P4,SSE2
-movdqu XMMREG,mem128 nil F3,0F,6F $2,$1 nil P4,SSE2
-movdqu mem128,XMMREG nil F3,0F,7F $1,$2 nil P4,SSE2
-movdq2q MMXREG,XMMREG nil F2,0F,D6 $2r,$1 nil P4,SSE2
-movq2dq XMMREG,MMXREG nil F3,0F,D6 $2r,$1 nil P4,SSE2
-pmuludq!mmxsse F4 P4 P4,SSE2
-pshufd!sse2pdimm 70
-pshufhw XMMREG,rm128,imm8 nil F3,0F,70 $2,$1 $3,8 P4,SSE2
-pshuflw XMMREG,rm128,imm8 nil F2,0F,70 $2,$1 $3,8 P4,SSE2
-pslldq XMMREG,imm8 nil 66,0F,73 $1r,7 $2,8 P4,SSE2
-psrldq XMMREG,imm8 nil 66,0F,73 $1r,3 $2,8 P4,SSE2
-punpckhqdq!sse2pd 6D
-punpcklqdq!sse2pd 6C
-;
-; AMD 3DNow! instructions
-;
-!now3d MMXREG,rm64 nil 0F,0F $2,$1 $0.1,8 @0,3DNOW,AMD
-prefetch!twobytemem 0F,0D,0 P5,3DNOW,AMD
-prefetchw!twobytemem 0F,0D,1 P5,3DNOW,AMD
-femms!twobyte 0F,0E P5,3DNOW,AMD
-pavgusb!now3d BF P5
-pf2id!now3d 1D P5
-pf2iw!now3d 1C ATHLON
-pfacc!now3d AE P5
-pfadd!now3d 9E P5
-pfcmpeq!now3d B0 P5
-pfcmpge!now3d 90 P5
-pfcmpgt!now3d A0 P5
-pfmax!now3d A4 P5
-pfmin!now3d 94 P5
-pfmul!now3d B4 P5
-pfnacc!now3d 8A ATHLON
-pfpnacc!now3d 8E ATHLON
-pfrcp!now3d 96 P5
-pfrcpit1!now3d A6 P5
-pfrcpit2!now3d B6 P5
-pfrsqit1!now3d A7 P5
-pfrsqrt!now3d 97 P5
-pfsub!now3d 9A P5
-pfsubr!now3d AA P5
-pi2fd!now3d 0D P5
-pi2fw!now3d 0C ATHLON
-pmulhrwa!now3d B7 P5
-pswapd!now3d BB ATHLON
-;
-; AMD extensions
-;
-syscall!twobyte 0F,05 P6,AMD
-sysret!twobyte 0F,07 P6,PRIV,AMD
-; swapgs
-;
-; Cyrix MMX instructions
-;
-!cyrixmmx MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX,CYRIX
-paddsiw!cyrixmmx 51
-paveb!cyrixmmx 50
-pdistib!cyrixmmx 54
-pmachriw MMXREG,mem64 nil 0F,5E $2,$1 nil P5,MMX,CYRIX
-pmagw!cyrixmmx 52
-pmulhriw!cyrixmmx 5D
-pmulhrwc!cyrixmmx 59
-pmvgezb!cyrixmmx 5C
-pmvlzb!cyrixmmx 5B
-pmvnzb!cyrixmmx 5A
-pmvzb!cyrixmmx 58
-psubsiw!cyrixmmx 55
-;
-; Cyrix extensions
-;
-!cyrixsmm mem80 nil 0F,$0.1 $1,0 nil 486,CYRIX,SMM
-rdshr!twobyte 0F,36 P6,CYRIX,SMM
-rsdc segreg,mem80 nil 0F,79 $2,$1 nil 486,CYRIX,SMM
-rsldt!cyrixsmm 7B
-rsts!cyrixsmm 7D
-svdc mem80,segreg nil 0F,78 $1,$2 nil 486,CYRIX,SMM
-svldt!cyrixsmm 7A
-svts!cyrixsmm 7C
-smint!twobyte 0F,38 P6,CYRIX
-smintold!twobyte 0F,7E 486,CYRIX,OBS
-wrshr!twobyte 0F,37 P6,CYRIX,SMM
-;
-; Obsolete/Undocumented Instructions
-;
-fsetpm!twobyte DB,E4 286,FPU,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts reg16,reg16 16 0F,A7 $1r,$2 nil 386,UNDOC,OBS
-ibts mem,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS
-ibts mem16x,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS
-; arbitrary encoding, picked $1r,$2 instead of $2r,$1
-ibts reg32,reg32 32 0F,A7 $1r,$2 nil 386,UNDOC,OBS
-ibts mem,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS
-ibts mem32x,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS
-loadall!twobyte 0F,07 386,UNDOC
-loadall286!twobyte 0F,05 286,UNDOC
-;pop reg_cs nil 0F nil nil 8086,UNDOC,OBS
-salc!onebyte nil,D6 8086,UNDOC
-smi!onebyte nil,F1 386,UNDOC
-; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11).
-umov reg8,reg8 nil 0F,10 $1r,$2 nil 386,UNDOC
-umov reg16,reg16 16 0F,11 $1r,$2 nil 386,UNDOC
-umov reg32,reg32 32 0F,11 $1r,$2 nil 386,UNDOC
-umov mem,reg8 nil 0F,10 $1,$2 nil 386,UNDOC
-umov mem8x,reg8 nil 0F,10 $1,$2 nil 386,UNDOC
-umov mem,reg16 16 0F,11 $1,$2 nil 386,UNDOC
-umov mem16x,reg16 16 0F,11 $1,$2 nil 386,UNDOC
-umov mem,reg32 32 0F,11 $1,$2 nil 386,UNDOC
-umov mem32x,reg32 32 0F,11 $1,$2 nil 386,UNDOC
-umov reg8,mem8 nil 0F,12 $2,$1 nil 386,UNDOC
-umov reg16,mem16 16 0F,13 $2,$1 nil 386,UNDOC
-umov reg32,mem32 32 0F,13 $2,$1 nil 386,UNDOC
-xbts reg16,mem16 16 0F,A6 $2,$1 nil 386,UNDOC,OBS
-xbts reg32,mem32 32 0F,A6 $2,$1 nil 386,UNDOC,OBS
+++ /dev/null
-/* $IdPath$
- * x86 internals header file
- *
- * Copyright (C) 2001 Peter Johnson
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#ifndef YASM_X86_INT_H
-#define YASM_X86_INT_H
-
-typedef struct x86_effaddr_data {
- unsigned char segment; /* segment override, 0 if none */
-
- /* How the spare (register) bits in Mod/RM are handled:
- * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
- * They're set in bytecode_new_insn().
- */
- unsigned char modrm;
- unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
- unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */
-
- unsigned char sib;
- unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
- unsigned char need_sib; /* 1 if SIB byte needed, 0 if not,
- 0xff if unknown */
-} x86_effaddr_data;
-
-typedef struct x86_insn {
- /*@null@*/ effaddr *ea; /* effective address */
-
- /*@null@*/ immval *imm; /* immediate or relative value */
-
- unsigned char opcode[3]; /* opcode */
- unsigned char opcode_len;
-
- unsigned char addrsize; /* 0 or =mode_bits => no override */
- unsigned char opersize; /* 0 indicates no override */
- unsigned char lockrep_pre; /* 0 indicates no prefix */
-
- /* HACK, but a space-saving one: shift opcodes have an immediate
- * form and a ,1 form (with no immediate). In the parser, we
- * set this and opcode_len=1, but store the ,1 version in the
- * second byte of the opcode array. We then choose between the
- * two versions once we know the actual value of imm (because we
- * don't know it in the parser module).
- *
- * A override to force the imm version should just leave this at
- * 0. Then later code won't know the ,1 version even exists.
- * TODO: Figure out how this affects CPU flags processing.
- *
- * Call x86_SetInsnShiftFlag() to set this flag to 1.
- */
- unsigned char shift_op;
-
- /* HACK, similar to that for shift_op above, for optimizing instructions
- * that take a sign-extended imm8 as well as imm values (eg, the arith
- * instructions and a subset of the imul instructions).
- */
- unsigned char signext_imm8_op;
-
- unsigned char mode_bits;
-} x86_insn;
-
-typedef struct x86_jmprel {
- expr *target; /* target location */
-
- struct {
- unsigned char opcode[3];
- unsigned char opcode_len; /* 0 = no opc for this version */
- } shortop, nearop;
-
- /* which opcode are we using? */
- /* The *FORCED forms are specified in the source as such */
- x86_jmprel_opcode_sel op_sel;
-
- unsigned char addrsize; /* 0 or =mode_bits => no override */
- unsigned char opersize; /* 0 indicates no override */
- unsigned char lockrep_pre; /* 0 indicates no prefix */
-
- unsigned char mode_bits;
-} x86_jmprel;
-
-void x86_bc_delete(bytecode *bc);
-void x86_bc_print(FILE *f, const bytecode *bc);
-bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
- resolve_label_func resolve_label);
-int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
- void *d, output_expr_func output_expr);
-
-int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
- unsigned char nosplit, unsigned char *displen,
- unsigned char *modrm, unsigned char *v_modrm,
- unsigned char *n_modrm, unsigned char *sib,
- unsigned char *v_sib, unsigned char *n_sib);
-
-#endif
/*
* x86 architecture description
*
- * Copyright (C) 2001 Peter Johnson
+ * Copyright (C) 2002 Peter Johnson
*
* This file is part of YASM.
*
#include "util.h"
/*@unused@*/ RCSID("$IdPath$");
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "expr.h"
+
#include "bytecode.h"
+
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
unsigned char x86_mode_bits = 0;
+int
+x86_directive(const char *name, valparamhead *valparams,
+ /*@unused@*/ /*@null@*/ valparamhead *objext_valparams,
+ /*@unused@*/ sectionhead *headp)
+{
+ valparam *vp;
+ const intnum *intn;
+ long lval;
+
+ if (strcasecmp(name, "bits") == 0) {
+ if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
+ (intn = expr_get_intnum(&vp->param)) != NULL &&
+ (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
+ x86_mode_bits = (unsigned char)lval;
+ else
+ Error(_("invalid argument to [%s]"), "BITS");
+ return 0;
+ } else
+ return 1;
+}
+
+unsigned int
+x86_get_reg_size(unsigned long reg)
+{
+ switch ((x86_expritem_reg_size)(reg & ~7)) {
+ case X86_REG8:
+ return 1;
+ case X86_REG16:
+ return 2;
+ case X86_REG32:
+ case X86_CRREG:
+ case X86_DRREG:
+ case X86_TRREG:
+ return 4;
+ case X86_MMXREG:
+ return 8;
+ case X86_XMMREG:
+ return 16;
+ case X86_FPUREG:
+ return 10;
+ default:
+ InternalError(_("unknown register size"));
+ }
+ return 0;
+}
+
+void
+x86_reg_print(FILE *f, unsigned long reg)
+{
+ static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"};
+ static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"};
+
+ switch ((x86_expritem_reg_size)(reg&~7)) {
+ case X86_REG8:
+ fprintf(f, "%s", name8[reg&7]);
+ break;
+ case X86_REG16:
+ fprintf(f, "%s", name1632[reg&7]);
+ break;
+ case X86_REG32:
+ fprintf(f, "e%s", name1632[reg&7]);
+ break;
+ case X86_MMXREG:
+ fprintf(f, "mm%d", (int)(reg&7));
+ break;
+ case X86_XMMREG:
+ fprintf(f, "xmm%d", (int)(reg&7));
+ break;
+ case X86_CRREG:
+ fprintf(f, "cr%d", (int)(reg&7));
+ break;
+ case X86_DRREG:
+ fprintf(f, "dr%d", (int)(reg&7));
+ break;
+ case X86_TRREG:
+ fprintf(f, "tr%d", (int)(reg&7));
+ break;
+ case X86_FPUREG:
+ fprintf(f, "st%d", (int)(reg&7));
+ break;
+ default:
+ InternalError(_("unknown register size"));
+ }
+}
+
+void
+x86_segreg_print(FILE *f, unsigned long segreg)
+{
+ static const char *name[] = {"es","cs","ss","ds","fs","gs"};
+ fprintf(f, "%s", name[segreg&7]);
+}
+
+void
+x86_handle_prefix(bytecode *bc, const unsigned long data[4])
+{
+ switch((x86_parse_insn_prefix)data[0]) {
+ case X86_LOCKREP:
+ x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]);
+ break;
+ case X86_ADDRSIZE:
+ x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]);
+ break;
+ case X86_OPERSIZE:
+ x86_bc_insn_opersize_override(bc, (unsigned char)data[1]);
+ break;
+ }
+}
+
+void
+x86_handle_seg_prefix(bytecode *bc, unsigned long segreg)
+{
+ x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8));
+}
+
+void
+x86_handle_seg_override(effaddr *ea, unsigned long segreg)
+{
+ x86_ea_set_segment(ea, (unsigned char)(segreg>>8));
+}
+
/* Define arch structure -- see arch.h for details */
arch x86_arch = {
"x86 (IA-32, x86-64)",
"x86",
+ {
+ x86_switch_cpu,
+ x86_check_identifier,
+ x86_directive,
+ x86_new_insn,
+ x86_handle_prefix,
+ x86_handle_seg_prefix,
+ x86_handle_seg_override,
+ x86_ea_new_expr
+ },
{
X86_BYTECODE_TYPE_MAX,
x86_bc_delete,
x86_bc_print,
x86_bc_resolve,
x86_bc_tobytes
- }
+ },
+ x86_get_reg_size,
+ x86_reg_print,
+ x86_segreg_print,
+ NULL, /* x86_ea_data_delete */
+ x86_ea_data_print
};
} x86_bytecode_type;
#define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1
+/* 0-7 (low 3 bits) used for register number, stored in same data area */
+typedef enum {
+ X86_REG8 = 0x8,
+ X86_REG16 = 0x10,
+ X86_REG32 = 0x20,
+ X86_MMXREG = 0x40,
+ X86_XMMREG = 0x80,
+ X86_CRREG = 0xC0,
+ X86_DRREG = 0xC8,
+ X86_TRREG = 0xF0,
+ X86_FPUREG = 0xF8
+} x86_expritem_reg_size;
+
+typedef enum {
+ X86_LOCKREP = 1,
+ X86_ADDRSIZE,
+ X86_OPERSIZE
+} x86_parse_insn_prefix;
+
+typedef enum {
+ X86_NEAR,
+ X86_SHORT,
+ X86_FAR
+} x86_parse_targetmod;
+
typedef enum {
JR_NONE,
JR_SHORT,
void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment);
effaddr *x86_ea_new_reg(unsigned char reg);
-effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len);
+effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len);
effaddr *x86_ea_new_expr(/*@keep@*/ expr *e);
/*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc);
*/
typedef struct x86_new_insn_data {
/*@keep@*/ /*@null@*/ effaddr *ea;
- /*@keep@*/ /*@null@*/ immval *imm;
+ /*@keep@*/ /*@null@*/ expr *imm;
unsigned char opersize;
unsigned char op_len;
unsigned char op[3];
extern unsigned char x86_mode_bits;
+typedef struct x86_effaddr_data {
+ unsigned char segment; /* segment override, 0 if none */
+
+ /* How the spare (register) bits in Mod/RM are handled:
+ * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!)
+ * They're set in bytecode_new_insn().
+ */
+ unsigned char modrm;
+ unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */
+ unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */
+
+ unsigned char sib;
+ unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */
+ unsigned char need_sib; /* 1 if SIB byte needed, 0 if not,
+ 0xff if unknown */
+} x86_effaddr_data;
+
+typedef struct x86_insn {
+ /*@null@*/ effaddr *ea; /* effective address */
+
+ /*@null@*/ immval *imm; /* immediate or relative value */
+
+ unsigned char opcode[3]; /* opcode */
+ unsigned char opcode_len;
+
+ unsigned char addrsize; /* 0 or =mode_bits => no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+ /* HACK, but a space-saving one: shift opcodes have an immediate
+ * form and a ,1 form (with no immediate). In the parser, we
+ * set this and opcode_len=1, but store the ,1 version in the
+ * second byte of the opcode array. We then choose between the
+ * two versions once we know the actual value of imm (because we
+ * don't know it in the parser module).
+ *
+ * A override to force the imm version should just leave this at
+ * 0. Then later code won't know the ,1 version even exists.
+ * TODO: Figure out how this affects CPU flags processing.
+ *
+ * Call x86_SetInsnShiftFlag() to set this flag to 1.
+ */
+ unsigned char shift_op;
+
+ /* HACK, similar to that for shift_op above, for optimizing instructions
+ * that take a sign-extended imm8 as well as imm values (eg, the arith
+ * instructions and a subset of the imul instructions).
+ */
+ unsigned char signext_imm8_op;
+
+ unsigned char mode_bits;
+} x86_insn;
+
+typedef struct x86_jmprel {
+ expr *target; /* target location */
+
+ struct {
+ unsigned char opcode[3];
+ unsigned char opcode_len; /* 0 = no opc for this version */
+ } shortop, nearop;
+
+ /* which opcode are we using? */
+ /* The *FORCED forms are specified in the source as such */
+ x86_jmprel_opcode_sel op_sel;
+
+ unsigned char addrsize; /* 0 or =mode_bits => no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+
+ unsigned char mode_bits;
+} x86_jmprel;
+
+void x86_bc_delete(bytecode *bc);
+void x86_bc_print(FILE *f, const bytecode *bc);
+bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect,
+ resolve_label_func resolve_label);
+int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect,
+ void *d, output_expr_func output_expr);
+
+int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits,
+ unsigned char nosplit, unsigned char *displen,
+ unsigned char *modrm, unsigned char *v_modrm,
+ unsigned char *n_modrm, unsigned char *sib,
+ unsigned char *v_sib, unsigned char *n_sib);
+
+void x86_switch_cpu(const char *cpuid);
+
+arch_check_id_retval x86_check_identifier(unsigned long data[2],
+ const char *id);
+
+int x86_directive(const char *name, valparamhead *valparams,
+ /*@null@*/ valparamhead *objext_valparams,
+ sectionhead *headp);
+
+/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2],
+ int num_operands,
+ /*@null@*/ insn_operandhead *operands);
+
+void x86_handle_prefix(bytecode *bc, const unsigned long data[4]);
+
+void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg);
+
+void x86_handle_seg_override(effaddr *ea, unsigned long segreg);
+
+unsigned int x86_get_reg_size(unsigned long reg);
+
+void x86_reg_print(FILE *f, unsigned long reg);
+
+void x86_segreg_print(FILE *f, unsigned long segreg);
+
+void x86_ea_data_print(FILE *f, const effaddr *ea);
+
#endif
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
#include "bc-int.h"
ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */
}
- insn->imm = d->imm;
if (d->imm) {
+ insn->imm = imm_new_expr(d->imm);
insn->imm->len = d->im_len;
insn->imm->sign = d->im_sign;
- }
+ } else
+ insn->imm = NULL;
insn->opcode[0] = d->op[0];
insn->opcode[1] = d->op[1];
/*@-compmempass@*/
effaddr *
-x86_ea_new_imm(immval *imm, unsigned char im_len)
+x86_ea_new_imm(expr *imm, unsigned char im_len)
{
effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data));
x86_effaddr_data *ead = ea_get_data(ea);
- ea->disp = imm->val;
+ ea->disp = imm;
ea->len = im_len;
ea->nosplit = 0;
ead->segment = 0;
switch ((x86_bytecode_type)bc->type) {
case X86_BC_INSN:
insn = bc_get_data(bc);
- if (insn->ea) {
- expr_delete(insn->ea->disp);
- xfree(insn->ea);
- }
+ if (insn->ea)
+ ea_delete(insn->ea);
if (insn->imm) {
expr_delete(insn->imm->val);
xfree(insn->imm);
}
}
+void
+x86_ea_data_print(FILE *f, const effaddr *ea)
+{
+ const x86_effaddr_data *ead = ea_get_const_data(ea);
+ fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "",
+ (unsigned int)ead->segment);
+ fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "",
+ (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm,
+ (unsigned int)ead->need_modrm);
+ fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "",
+ (unsigned int)ead->sib, (unsigned int)ead->valid_sib,
+ (unsigned int)ead->need_sib);
+}
+
void
x86_bc_print(FILE *f, const bytecode *bc)
{
const x86_insn *insn;
const x86_jmprel *jmprel;
- x86_effaddr_data *ead;
switch ((x86_bytecode_type)bc->type) {
case X86_BC_INSN:
insn = bc_get_const_data(bc);
fprintf(f, "%*s_Instruction_\n", indent_level, "");
fprintf(f, "%*sEffective Address:", indent_level, "");
- if (!insn->ea)
- fprintf(f, " (nil)\n");
- else {
- indent_level++;
- fprintf(f, "\n%*sDisp=", indent_level, "");
- expr_print(f, insn->ea->disp);
+ if (insn->ea) {
fprintf(f, "\n");
- ead = ea_get_data(insn->ea);
- fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n",
- indent_level, "", (unsigned int)insn->ea->len,
- (unsigned int)ead->segment,
- (unsigned int)insn->ea->nosplit);
- fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n",
- indent_level, "", (unsigned int)ead->modrm,
- (unsigned int)ead->valid_modrm,
- (unsigned int)ead->need_modrm);
- fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n",
- indent_level, "", (unsigned int)ead->sib,
- (unsigned int)ead->valid_sib,
- (unsigned int)ead->need_sib);
+ indent_level++;
+ ea_print(f, insn->ea);
indent_level--;
- }
+ } else
+ fprintf(f, " (nil)\n");
fprintf(f, "%*sImmediate Value:", indent_level, "");
if (!insn->imm)
fprintf(f, " (nil)\n");
x86_effaddr_data ead_t = *ead; /* structure copy */
unsigned char displen = ea->len;
- if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) ||
- (!ead->valid_modrm && ead->need_modrm))) {
+ if (ea->disp) {
temp = expr_copy(ea->disp);
assert(temp != NULL);
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
#include "expr-int.h"
int *ret;
/* don't allow 16-bit registers */
- if (ei->data.reg.size != 32)
+ if ((ei->data.reg & ~7) != X86_REG32)
return 0;
- ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */
+ ret = &data[ei->data.reg & 7];
/* overwrite with 0 to eliminate register from displacement expr */
ei->type = EXPR_INT;
reg16[7] = &data->di;
/* don't allow 32-bit registers */
- if (ei->data.reg.size != 16)
+ if ((ei->data.reg & ~7) != X86_REG16)
return 0;
- ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */
+ /* & 7 for sanity check */
+ ret = reg16[ei->data.reg & 7];
/* only allow BX, SI, DI, BP */
if (!ret)
unsigned char *addrsize = (unsigned char *)d;
if (ei->type == EXPR_REG) {
- *addrsize = ei->data.reg.size;
+ *addrsize = (unsigned char)ei->data.reg & ~7;
return 1;
} else
return 0;
return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE,
havereg == HAVE_BP, displen, modrm,
v_modrm);
+ } else if (!*n_modrm && !*n_sib) {
+ /* Special case for MOV MemOffs opcode: displacement but no modrm. */
+ if (*addrsize == 32)
+ *displen = 4;
+ else if (*addrsize == 16)
+ *displen = 2;
}
return 1;
}
--- /dev/null
+/*
+ * x86 identifier recognition and instruction handling
+ *
+ * Copyright (C) 2002 Peter Johnson
+ *
+ * This file is part of YASM.
+ *
+ * YASM is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * YASM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "globals.h"
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+#include "src/arch/x86/x86arch.h"
+
+#include "expr-int.h"
+#include "bc-int.h"
+
+
+/* Available CPU feature flags */
+#define CPU_Any (0) /* Any old cpu will do */
+#define CPU_086 CPU_Any
+#define CPU_186 (1<<0) /* i186 or better required */
+#define CPU_286 (1<<1) /* i286 or better required */
+#define CPU_386 (1<<2) /* i386 or better required */
+#define CPU_486 (1<<3) /* i486 or better required */
+#define CPU_586 (1<<4) /* i585 or better required */
+#define CPU_686 (1<<5) /* i686 or better required */
+#define CPU_P3 (1<<6) /* Pentium3 or better required */
+#define CPU_P4 (1<<7) /* Pentium4 or better required */
+#define CPU_IA64 (1<<8) /* IA-64 or better required */
+#define CPU_K6 (1<<9) /* AMD K6 or better required */
+#define CPU_Athlon (1<<10) /* AMD Athlon or better required */
+#define CPU_Hammer (1<<11) /* AMD Sledgehammer or better required */
+#define CPU_FPU (1<<12) /* FPU support required */
+#define CPU_MMX (1<<13) /* MMX support required */
+#define CPU_SSE (1<<14) /* Streaming SIMD extensions required */
+#define CPU_SSE2 (1<<15) /* Streaming SIMD extensions 2 required */
+#define CPU_3DNow (1<<16) /* 3DNow! support required */
+#define CPU_Cyrix (1<<17) /* Cyrix-specific instruction */
+#define CPU_AMD (1<<18) /* AMD-specific inst. (older than K6) */
+#define CPU_SMM (1<<19) /* System Management Mode instruction */
+#define CPU_Prot (1<<20) /* Protected mode only instruction */
+#define CPU_Undoc (1<<21) /* Undocumented instruction */
+#define CPU_Obs (1<<22) /* Obsolete instruction */
+#define CPU_Priv (1<<23) /* Priveleged instruction */
+
+/* What instructions/features are enabled? Defaults to all. */
+static unsigned long cpu_enabled = ~CPU_Any;
+
+/* Opcode modifiers. The opcode bytes are in "reverse" order because the
+ * parameters are read from the arch-specific data in LSB->MSB order.
+ * (only for asthetic reasons in the lexer code below, no practical reason).
+ */
+#define MOD_Op2Add (1<<0) /* Parameter adds to opcode byte 2 */
+#define MOD_Gap0 (1<<1) /* Eats a parameter */
+#define MOD_Op1Add (1<<2) /* Parameter adds to opcode byte 1 */
+#define MOD_Gap1 (1<<3) /* Eats a parameter */
+#define MOD_Op0Add (1<<4) /* Parameter adds to opcode byte 0 */
+#define MOD_SpAdd (1<<5) /* Parameter adds to "spare" value */
+#define MOD_OpSizeR (1<<6) /* Parameter replaces opersize */
+#define MOD_Imm8 (1<<7) /* Parameter is included as immediate byte */
+
+/* Operand types. These are more detailed than the "general" types for all
+ * architectures, as they include the size, for instance.
+ * Bit Breakdown (from LSB to MSB):
+ * - 4 bits = general type (must be exact match, except for =3):
+ * 0 = immediate
+ * 1 = any general purpose, MMX, XMM, or FPU register
+ * 2 = memory
+ * 3 = any general purpose, MMX, XMM, or FPU register OR memory
+ * 4 = segreg
+ * 5 = any CR register
+ * 6 = any DR register
+ * 7 = any TR register
+ * 8 = ST0
+ * 9 = AL/AX/EAX (depending on size)
+ * A = CL/CX/ECX (depending on size)
+ * B = CR4
+ * C = memory offset (an EA, but with no registers allowed)
+ * [special case for MOV opcode]
+ * - 3 bits = size (user-specified, or from register size):
+ * 0 = any size acceptable
+ * 1/2/3/4 = 8/16/32/64 bits (from user or reg size)
+ * 5/6 = 80/128 bits (from user)
+ * - 1 bit = size implicit or explicit ("strictness" of size matching on
+ * non-registers -- registers are always strictly matched):
+ * 0 = user size must exactly match size above.
+ * 1 = user size either unspecified or exactly match size above.
+ *
+ * MSBs than the above are actions: what to do with the operand if the
+ * instruction matches. Essentially describes what part of the output bytecode
+ * gets the operand. This may require conversion (e.g. a register going into
+ * an ea field). Naturally, only one of each of these may be contained in the
+ * operands of a single insn_info structure.
+ * - 3 bits = action:
+ * 0 = does nothing (operand data is discarded)
+ * 1 = operand data goes into ea field
+ * 2 = operand data goes into imm field
+ * 3 = operand data goes into "spare" field
+ * 4 = operand data is added to opcode byte 0
+ */
+#define OPT_Imm 0x0
+#define OPT_Reg 0x1
+#define OPT_Mem 0x2
+#define OPT_RM 0x3
+#define OPT_SegReg 0x4
+#define OPT_CRReg 0x5
+#define OPT_DRReg 0x6
+#define OPT_TRReg 0x7
+#define OPT_ST0 0x8
+#define OPT_Areg 0x9
+#define OPT_Creg 0xA
+#define OPT_CR4 0xB
+#define OPT_MemOffs 0xC
+#define OPT_MASK 0x000F
+
+#define OPS_Any (0<<4)
+#define OPS_8 (1<<4)
+#define OPS_16 (2<<4)
+#define OPS_32 (3<<4)
+#define OPS_64 (4<<4)
+#define OPS_80 (5<<4)
+#define OPS_128 (6<<4)
+#define OPS_MASK 0x0070
+#define OPS_SHIFT 4
+
+#define OPS_Relaxed (1<<7)
+#define OPS_RMASK 0x0080
+
+#define OPA_None (0<<8)
+#define OPA_EA (1<<8)
+#define OPA_Imm (2<<8)
+#define OPA_Spare (3<<8)
+#define OPA_Op0Add (4<<8)
+#define OPA_MASK 0x0700
+
+typedef struct x86_insn_info {
+ /* The CPU feature flags needed to execute this instruction. This is OR'ed
+ * with arch-specific data[2]. This combined value is compared with
+ * cpu_enabled to see if all bits set here are set in cpu_enabled--if so,
+ * the instruction is available on this CPU.
+ */
+ unsigned long cpu;
+
+ /* Opcode modifiers for variations of instruction. As each modifier reads
+ * its parameter in LSB->MSB order from the arch-specific data[1] from the
+ * lexer data, and the LSB of the arch-specific data[1] is reserved for the
+ * count of insn_info structures in the instruction grouping, there can
+ * only be a maximum of 3 modifiers.
+ */
+ unsigned long modifiers;
+
+ /* Operand Size */
+ unsigned char opersize;
+
+ /* The length of the basic opcode */
+ unsigned char opcode_len;
+
+ /* The basic 1-3 byte opcode */
+ unsigned char opcode[3];
+
+ /* The 3-bit "spare" value (extended opcode) for the R/M byte field */
+ unsigned char spare;
+
+ /* The number of operands this form of the instruction takes */
+ unsigned char num_operands;
+
+ /* The types of each operand, see above */
+ unsigned int operands[3];
+} x86_insn_info;
+
+/* Define lexer arch-specific data with 0-3 modifiers. */
+#define DEF_INSN_DATA(group, mod, cpu) do { \
+ data[0] = (unsigned long)group##_insn; \
+ data[1] = ((mod)<<8) | \
+ ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \
+ data[2] = cpu; \
+ } while (0)
+
+#define RET_INSN(group, mod, cpu) do { \
+ DEF_INSN_DATA(group, mod, cpu); \
+ return ARCH_CHECK_ID_INSN; \
+ } while (0)
+
+/*
+ * General instruction groupings
+ */
+
+/* One byte opcode instructions with no operands */
+static const x86_insn_info onebyte_insn[] = {
+ { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Two byte opcode instructions with no operands */
+static const x86_insn_info twobyte_insn[] = {
+ { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} }
+};
+
+/* Three byte opcode instructions with no operands */
+static const x86_insn_info threebyte_insn[] = {
+ { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0,
+ {0, 0, 0} }
+};
+
+/* One byte opcode instructions with general memory operand */
+static const x86_insn_info onebytemem_insn[] = {
+ { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+ {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Two byte opcode instructions with general memory operand */
+static const x86_insn_info twobytemem_insn[] = {
+ { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1,
+ {OPT_Mem|OPS_Any|OPA_EA, 0, 0} }
+};
+
+/* Move instructions */
+static const x86_insn_info mov_insn[] = {
+ { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2,
+ {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} },
+ { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} },
+ { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2,
+ {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} },
+ { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} },
+ { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} },
+ { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} },
+ { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} },
+ /* TODO: segreg here */
+ { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+ /* Need two sets here, one for strictness on left side, one for right. */
+ { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} },
+ { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2,
+ {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2,
+ {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} },
+ { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+ {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2,
+ {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2,
+ {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} },
+ { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} }
+};
+
+/* Move with sign/zero extend */
+static const x86_insn_info movszx_insn[] = {
+ { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} },
+ { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} },
+ { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2,
+ {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} }
+};
+
+
+bytecode *
+x86_new_insn(const unsigned long data[4], int num_operands,
+ insn_operandhead *operands)
+{
+ x86_new_insn_data d;
+ int num_info = (int)(data[1]&0xFF);
+ x86_insn_info *info = (x86_insn_info *)data[0];
+ unsigned long mod_data = data[1] >> 8;
+ int found = 0;
+ insn_operand *op;
+ int i;
+ static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0};
+
+ /* Just do a simple linear search through the info array for a match.
+ * First match wins.
+ */
+ for (; num_info>0 && !found; num_info--, info++) {
+ unsigned long cpu;
+ unsigned int size;
+ int mismatch = 0;
+
+ /* Match CPU */
+ cpu = info->cpu | data[2];
+ if ((cpu_enabled & cpu) != cpu)
+ continue;
+
+ /* Match # of operands */
+ if (num_operands != info->num_operands)
+ continue;
+
+ if (!operands) {
+ found = 1; /* no operands -> must have a match here. */
+ break;
+ }
+
+ /* Match each operand type and size */
+ for(i = 0, op = ops_first(operands); op && i<info->num_operands &&
+ !mismatch; op = ops_next(op), i++) {
+ /* Check operand type */
+ switch (info->operands[i] & OPT_MASK) {
+ case OPT_Imm:
+ if (op->type != INSN_OPERAND_IMM)
+ mismatch = 1;
+ break;
+ case OPT_Reg:
+ if (op->type != INSN_OPERAND_REG)
+ mismatch = 1;
+ else {
+ size = op->data.reg & ~7;
+ if (size == X86_CRREG || size == X86_DRREG ||
+ size == X86_TRREG)
+ mismatch = 1;
+ }
+ break;
+ case OPT_Mem:
+ if (op->type != INSN_OPERAND_MEMORY)
+ mismatch = 1;
+ break;
+ case OPT_RM:
+ if (op->type != INSN_OPERAND_REG &&
+ op->type != INSN_OPERAND_MEMORY)
+ mismatch = 1;
+ break;
+ case OPT_SegReg:
+ if (op->type != INSN_OPERAND_SEGREG)
+ mismatch = 1;
+ break;
+ case OPT_CRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_CRREG)
+ mismatch = 1;
+ break;
+ case OPT_DRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_DRREG)
+ mismatch = 1;
+ break;
+ case OPT_TRReg:
+ if (op->type != INSN_OPERAND_REG ||
+ (op->data.reg & ~7) != X86_TRREG)
+ mismatch = 1;
+ break;
+ case OPT_ST0:
+ if (op->type != INSN_OPERAND_REG ||
+ op->data.reg != X86_FPUREG)
+ mismatch = 1;
+ break;
+ case OPT_Areg:
+ if (op->type != INSN_OPERAND_REG ||
+ ((info->operands[i] & OPS_MASK) == OPS_8 &&
+ op->data.reg != (X86_REG8 | 0)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_16 &&
+ op->data.reg != (X86_REG16 | 0)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_32 &&
+ op->data.reg != (X86_REG32 | 0)))
+ mismatch = 1;
+ break;
+ case OPT_Creg:
+ if (op->type != INSN_OPERAND_REG ||
+ ((info->operands[i] & OPS_MASK) == OPS_8 &&
+ op->data.reg != (X86_REG8 | 1)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_16 &&
+ op->data.reg != (X86_REG16 | 1)) ||
+ ((info->operands[i] & OPS_MASK) == OPS_32 &&
+ op->data.reg != (X86_REG32 | 1)))
+ mismatch = 1;
+ break;
+ case OPT_CR4:
+ if (op->type != INSN_OPERAND_REG ||
+ op->data.reg != (X86_CRREG | 4))
+ mismatch = 1;
+ break;
+ case OPT_MemOffs:
+ if (op->type != INSN_OPERAND_MEMORY ||
+ expr_contains(ea_get_disp(op->data.ea), EXPR_REG))
+ mismatch = 1;
+ break;
+ default:
+ InternalError(_("invalid operand type"));
+ }
+
+ if (mismatch)
+ break;
+
+ /* Check operand size */
+ size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT];
+ if (op->type == INSN_OPERAND_REG && op->size == 0) {
+ /* Register size must exactly match */
+ if (x86_get_reg_size(op->data.reg) != size)
+ mismatch = 1;
+ } else {
+ if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) {
+ /* Relaxed checking */
+ if (size != 0 && op->size != size && op->size != 0)
+ mismatch = 1;
+ } else {
+ /* Strict checking */
+ if (op->size != size)
+ mismatch = 1;
+ }
+ }
+ }
+
+ if (!mismatch) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Didn't find a matching one */
+ /* FIXME: This needs to be more descriptive of certain reasons for a
+ * mismatch. E.g.:
+ * "mismatch in operand sizes"
+ * "operand size not specified"
+ * etc. This will probably require adding dummy error catchers in the
+ * insn list which are only looked at if we get here.
+ */
+ Error(_("invalid combination of opcode and operands"));
+ return NULL;
+ }
+
+ /* Copy what we can from info */
+ d.ea = NULL;
+ d.imm = NULL;
+ d.opersize = info->opersize;
+ d.op_len = info->opcode_len;
+ d.op[0] = info->opcode[0];
+ d.op[1] = info->opcode[1];
+ d.op[2] = info->opcode[2];
+ d.spare = info->spare;
+ d.im_len = 0;
+ d.im_sign = 0;
+
+ /* Apply modifiers */
+ if (info->modifiers & MOD_Op2Add) {
+ d.op[2] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Gap0)
+ mod_data >>= 8;
+ if (info->modifiers & MOD_Op1Add) {
+ d.op[1] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Gap1)
+ mod_data >>= 8;
+ if (info->modifiers & MOD_Op0Add) {
+ d.op[0] += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_SpAdd) {
+ d.spare += (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_OpSizeR) {
+ d.opersize = (unsigned char)(mod_data & 0xFF);
+ mod_data >>= 8;
+ }
+ if (info->modifiers & MOD_Imm8) {
+ d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF)));
+ d.im_len = 1;
+ /*mod_data >>= 8;*/
+ }
+
+ /* Go through operands and assign */
+ if (operands) {
+ for(i = 0, op = ops_first(operands); op && i<info->num_operands;
+ op = ops_next(op), i++) {
+ switch (info->operands[i] & OPA_MASK) {
+ case OPA_None:
+ /* Throw away the operand contents */
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ case INSN_OPERAND_SEGREG:
+ break;
+ case INSN_OPERAND_MEMORY:
+ ea_delete(op->data.ea);
+ break;
+ case INSN_OPERAND_IMM:
+ expr_delete(op->data.val);
+ break;
+ }
+ break;
+ case OPA_EA:
+ switch (op->type) {
+ case INSN_OPERAND_REG:
+ d.ea = x86_ea_new_reg((unsigned char)op->data.reg);
+ break;
+ case INSN_OPERAND_SEGREG:
+ InternalError(_("invalid operand conversion"));
+ case INSN_OPERAND_MEMORY:
+ d.ea = op->data.ea;
+ if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) {
+ /* Special-case for MOV MemOffs instruction */
+ x86_effaddr_data *ead = ea_get_data(d.ea);
+ ead->valid_modrm = 0;
+ ead->need_modrm = 0;
+ ead->valid_sib = 0;
+ ead->need_sib = 0;
+ }
+ break;
+ case INSN_OPERAND_IMM:
+ d.ea = x86_ea_new_imm(op->data.val,
+ size_lookup[(info->operands[i] &
+ OPS_MASK)>>OPS_SHIFT]);
+ break;
+ }
+ break;
+ case OPA_Imm:
+ if (op->type == INSN_OPERAND_IMM) {
+ d.imm = op->data.val;
+ d.im_len = size_lookup[(info->operands[i] &
+ OPS_MASK)>>OPS_SHIFT];
+ } else
+ InternalError(_("invalid operand conversion"));
+ break;
+ case OPA_Spare:
+ if (op->type == INSN_OPERAND_REG ||
+ op->type == INSN_OPERAND_SEGREG)
+ d.spare = (unsigned char)(op->data.reg&7);
+ else
+ InternalError(_("invalid operand conversion"));
+ break;
+ case OPA_Op0Add:
+ if (op->type == INSN_OPERAND_REG)
+ d.op[0] += (unsigned char)(op->data.reg&7);
+ else
+ InternalError(_("invalid operand conversion"));
+ break;
+ default:
+ InternalError(_("unknown operand action"));
+ }
+ }
+ }
+
+ /* Create the bytecode and return it */
+ return x86_bc_new_insn(&d);
+}
+
+
+#define YYCTYPE char
+#define YYCURSOR id
+#define YYLIMIT id
+#define YYMARKER marker
+#define YYFILL(n)
+
+/*!re2c
+ any = [\000-\377];
+ A = [aA];
+ B = [bB];
+ C = [cC];
+ D = [dD];
+ E = [eE];
+ F = [fF];
+ G = [gG];
+ H = [hH];
+ I = [iI];
+ J = [jJ];
+ K = [kK];
+ L = [lL];
+ M = [mM];
+ N = [nN];
+ O = [oO];
+ P = [pP];
+ Q = [qQ];
+ R = [rR];
+ S = [sS];
+ T = [tT];
+ U = [uU];
+ V = [vV];
+ W = [wW];
+ X = [xX];
+ Y = [yY];
+ Z = [zZ];
+*/
+
+void
+x86_switch_cpu(const char *id)
+{
+ const char *marker;
+
+ /*!re2c
+ /* The standard CPU names /set/ cpu_enabled. */
+ "8086" {
+ cpu_enabled = CPU_Priv;
+ return;
+ }
+ ("80" | I)? "186" {
+ cpu_enabled = CPU_186|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "286" {
+ cpu_enabled = CPU_186|CPU_286|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "386" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ ("80" | I)? "486" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM|
+ CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I? "586") | (P E N T I U M) | (P "5") {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (P "2") | (P E N T I U M "-"? ("2" | (I I))) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot|
+ CPU_Priv;
+ return;
+ }
+ (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (I A "-"? "64") | (I T A N I U M) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE|
+ CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ K "6" {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot|
+ CPU_Priv;
+ return;
+ }
+ A T H L O N {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow|
+ CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+ (S L E D G E)? (H A M M E R) {
+ cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|
+ CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|
+ CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv;
+ return;
+ }
+
+ /* Features have "no" versions to disable them, and only set/reset the
+ * specific feature being changed. All other bits are left alone.
+ */
+ F P U { cpu_enabled |= CPU_FPU; return; }
+ N O F P U { cpu_enabled &= ~CPU_FPU; return; }
+ M M X { cpu_enabled |= CPU_MMX; return; }
+ N O M M X { cpu_enabled &= ~CPU_MMX; return; }
+ S S E { cpu_enabled |= CPU_SSE; return; }
+ N O S S E { cpu_enabled &= ~CPU_SSE; return; }
+ S S E "2" { cpu_enabled |= CPU_SSE2; return; }
+ N O S S E "2" { cpu_enabled &= ~CPU_SSE2; return; }
+ "3" D N O W { cpu_enabled |= CPU_3DNow; return; }
+ N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; }
+ C Y R I X { cpu_enabled |= CPU_Cyrix; return; }
+ N O C Y R I X { cpu_enabled &= ~CPU_Cyrix; return; }
+ A M D { cpu_enabled |= CPU_AMD; return; }
+ N O A M D { cpu_enabled &= ~CPU_AMD; return; }
+ S M M { cpu_enabled |= CPU_SMM; return; }
+ N O S M M { cpu_enabled &= ~CPU_SMM; return; }
+ P R O T { cpu_enabled |= CPU_Prot; return; }
+ N O P R O T { cpu_enabled &= ~CPU_Prot; return; }
+ U N D O C { cpu_enabled |= CPU_Undoc; return; }
+ N O U N D O C { cpu_enabled &= ~CPU_Undoc; return; }
+ O B S { cpu_enabled |= CPU_Obs; return; }
+ N O O B S { cpu_enabled &= ~CPU_Obs; return; }
+ P R I V { cpu_enabled |= CPU_Priv; return; }
+ N O P R I V { cpu_enabled &= ~CPU_Priv; return; }
+
+ /* catchalls */
+ [A-Za-z0-9]+ {
+ Warning(_("unrecognized CPU identifier `%s'"), id);
+ return;
+ }
+ any {
+ Warning(_("unrecognized CPU identifier `%s'"), id);
+ return;
+ }
+ */
+}
+
+arch_check_id_retval
+x86_check_identifier(unsigned long data[4], const char *id)
+{
+ const char *oid = id;
+ const char *marker;
+ /*!re2c
+ /* target modifiers */
+ N E A R {
+ data[0] = X86_NEAR;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+ S H O R T {
+ data[0] = X86_SHORT;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+ F A R {
+ data[0] = X86_FAR;
+ return ARCH_CHECK_ID_TARGETMOD;
+ }
+
+ /* operand size overrides */
+ O "16" {
+ data[0] = X86_OPERSIZE;
+ data[1] = 16;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ O "32" {
+ data[0] = X86_OPERSIZE;
+ data[1] = 32;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ /* address size overrides */
+ A "16" {
+ data[0] = X86_ADDRSIZE;
+ data[1] = 16;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ A "32" {
+ data[0] = X86_ADDRSIZE;
+ data[1] = 32;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+
+ /* instruction prefixes */
+ L O C K {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF0;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P N E {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF2;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P N Z {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF2;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF3;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P E {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF4;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+ R E P Z {
+ data[0] = X86_LOCKREP;
+ data[1] = 0xF4;
+ return ARCH_CHECK_ID_PREFIX;
+ }
+
+ /* control, debug, and test registers */
+ C R [02-4] {
+ data[0] = X86_CRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ D R [0-7] {
+ data[0] = X86_DRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ T R [0-7] {
+ data[0] = X86_TRREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+
+ /* floating point, MMX, and SSE/SSE2 registers */
+ S T [0-7] {
+ data[0] = X86_FPUREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ M M [0-7] {
+ data[0] = X86_MMXREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+ X M M [0-7] {
+ data[0] = X86_XMMREG | (oid[2]-'0');
+ return ARCH_CHECK_ID_REG;
+ }
+
+ /* integer registers */
+ E A X { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; }
+ E C X { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; }
+ E D X { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; }
+ E B X { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; }
+ E S P { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; }
+ E B P { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; }
+ E S I { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; }
+ E D I { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; }
+
+ A X { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; }
+ C X { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; }
+ D X { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; }
+ B X { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; }
+ S P { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; }
+ B P { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; }
+ S I { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; }
+ D I { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; }
+
+ A L { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; }
+ C L { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; }
+ D L { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; }
+ B L { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; }
+ A H { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; }
+ C H { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; }
+ D H { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; }
+ B H { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; }
+
+ /* segment registers */
+ E S { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; }
+ C S { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; }
+ S S { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; }
+ D S { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; }
+ F S { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; }
+ G S { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; }
+
+ /* instructions */
+
+ /* Move */
+ M O V { RET_INSN(mov, 0, CPU_Any); }
+ /* Move with sign/zero extend */
+ M O V S X { RET_INSN(movszx, 0xBE, CPU_386); }
+ M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); }
+ /* Push instructions */
+ /* P U S H */
+ P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); }
+ P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); }
+ P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); }
+ /* Pop instructions */
+ /* P O P */
+ P O P A { RET_INSN(onebyte, 0x0061, CPU_186); }
+ P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); }
+ P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); }
+ /* Exchange */
+ /* X C H G */
+ /* In/out from ports */
+ /* I N */
+ /* O U T */
+ /* Load effective address */
+ /* L E A */
+ /* Load segment registers from memory */
+ /* L D S */
+ /* L E S */
+ /* L F S */
+ /* L G S */
+ /* L S S */
+ /* Flags register instructions */
+ C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); }
+ C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); }
+ C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); }
+ C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); }
+ C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); }
+ L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); }
+ S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); }
+ P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); }
+ P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); }
+ P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); }
+ P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); }
+ P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); }
+ P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); }
+ S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); }
+ S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); }
+ S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); }
+ /* Arithmetic */
+ /* A D D */
+ /* I N C */
+ /* S U B */
+ /* D E C */
+ /* S B B */
+ /* C M P */
+ /* T E S T */
+ /* A N D */
+ /* O R */
+ /* X O R */
+ /* A D C */
+ /* N E G */
+ /* N O T */
+ A A A { RET_INSN(onebyte, 0x0037, CPU_Any); }
+ A A S { RET_INSN(onebyte, 0x003F, CPU_Any); }
+ D A A { RET_INSN(onebyte, 0x0027, CPU_Any); }
+ D A S { RET_INSN(onebyte, 0x002F, CPU_Any); }
+ /* A A D */
+ /* A A M */
+ /* Conversion instructions */
+ C B W { RET_INSN(onebyte, 0x1098, CPU_Any); }
+ C W D E { RET_INSN(onebyte, 0x2098, CPU_386); }
+ C W D { RET_INSN(onebyte, 0x1099, CPU_Any); }
+ C D Q { RET_INSN(onebyte, 0x2099, CPU_386); }
+ /* Multiplication and division */
+ /* M U L */
+ /* I M U L */
+ /* D I V */
+ /* I D I V */
+ /* Shifts */
+ /* R O L */
+ /* R O R */
+ /* R C L */
+ /* R C R */
+ /* S A L */
+ /* S H L */
+ /* S H R */
+ /* S A R */
+ /* S H L D */
+ /* S H R D */
+ /* Control transfer instructions (unconditional) */
+ /* C A L L */
+ /* J M P */
+ R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); }
+ /* R E T N */
+ /* R E T F */
+ /* E N T E R */
+ L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); }
+ /* Conditional jumps */
+ /* J O */
+ /* J N O */
+ /* J B */
+ /* JC */
+ /* J N A E */
+ /* J N B */
+ /* J N C */
+ /* J A E */
+ /* J E */
+ /* J Z */
+ /* J N E */
+ /* J N Z */
+ /* J B E */
+ /* J N A */
+ /* J N B E */
+ /* J A */
+ /* J S */
+ /* J N S */
+ /* J P */
+ /* J P E */
+ /* J N P */
+ /* J P O */
+ /* J L */
+ /* J N G E */
+ /* J N L */
+ /* J G E */
+ /* J L E */
+ /* J N G */
+ /* J N L E */
+ /* J G */
+ /* J C X Z */
+ /* J E C X Z */
+ /* Loop instructions */
+ /* L O O P */
+ /* L O O P Z */
+ /* L O O P E */
+ /* L O O P N Z */
+ /* L O O P N E */
+ /* Set byte on flag instructions */
+ /* S E T O */
+ /* S E T N O */
+ /* S E T B */
+ /* S E T C */
+ /* S E T N A E */
+ /* S E T N B */
+ /* S E T N C */
+ /* S E T A E */
+ /* S E T E */
+ /* S E T Z */
+ /* S E T N E */
+ /* S E T N Z */
+ /* S E T B E */
+ /* S E T N A */
+ /* S E T N B E */
+ /* S E T A */
+ /* S E T S */
+ /* S E T N S */
+ /* S E T P */
+ /* S E T P E */
+ /* S E T N P */
+ /* S E T P O */
+ /* S E T L */
+ /* S E T N G E */
+ /* S E T N L */
+ /* S E T G E */
+ /* S E T L E */
+ /* S E T N G */
+ /* S E T N L E */
+ /* S E T G */
+ /* String instructions. */
+ C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); }
+ C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); }
+ /* C M P S D */
+ I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); }
+ I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); }
+ I N S D { RET_INSN(onebyte, 0x206D, CPU_386); }
+ O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); }
+ O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); }
+ O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); }
+ L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); }
+ L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); }
+ L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); }
+ M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); }
+ M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); }
+ /* M O V S D */
+ S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); }
+ S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); }
+ S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); }
+ S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); }
+ S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); }
+ S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); }
+ X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); }
+ /* Bit manipulation */
+ /* B S F */
+ /* B S R */
+ /* B T */
+ /* B T C */
+ /* B T R */
+ /* B T S */
+ /* Interrupts and operating system instructions */
+ /* I N T */
+ I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+ I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); }
+ I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); }
+ I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); }
+ I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); }
+ I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); }
+ R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); }
+ /* B O U N D */
+ H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); }
+ N O P { RET_INSN(onebyte, 0x0090, CPU_Any); }
+ /* Protection control */
+ /* A R P L */
+ /* L A R */
+ L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); }
+ L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); }
+ /* L L D T */
+ /* L M S W */
+ /* L S L */
+ /* L T R */
+ S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); }
+ S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); }
+ /* S L D T */
+ /* S M S W */
+ /* S T R */
+ /* V E R R */
+ /* V E R W */
+ /* Floating point instructions */
+ /* F L D */
+ /* F I L D */
+ /* F B L D */
+ /* F S T */
+ /* F I S T */
+ /* F S T P */
+ /* F I S T P */
+ /* F B S T P */
+ /* F X C H */
+ /* F C O M */
+ /* F I C O M */
+ /* F C O M P */
+ /* F I C O M P */
+ F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); }
+ /* F U C O M */
+ /* F U C O M P */
+ F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); }
+ F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); }
+ F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); }
+ F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); }
+ F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); }
+ F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); }
+ F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); }
+ F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); }
+ F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); }
+ F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); }
+ /* F A D D */
+ /* F A D D P */
+ /* F I A D D */
+ /* F S U B */
+ /* F I S U B */
+ /* F S U B P */
+ /* F S U B R */
+ /* F I S U B R */
+ /* F S U B R P */
+ /* F M U L */
+ /* F I M U L */
+ /* F M U L P */
+ /* F D I V */
+ /* F I D I V */
+ /* F D I V P */
+ /* F D I V R */
+ /* F I D I V R */
+ /* F D I V R P */
+ F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); }
+ F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); }
+ F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); }
+ F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); }
+ F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); }
+ F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); }
+ F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); }
+ F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); }
+ F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); }
+ F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); }
+ F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); }
+ F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); }
+ F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); }
+ F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); }
+ F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); }
+ F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); }
+ F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); }
+ F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); }
+ F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); }
+ F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); }
+ /* F L D C W */
+ /* F N S T C W */
+ /* F S T C W */
+ /* F N S T S W */
+ /* F S T S W */
+ F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); }
+ F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); }
+ F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); }
+ F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); }
+ F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); }
+ F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); }
+ F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); }
+ F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); }
+ /* F F R E E */
+ /* F F R E E P */
+ F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); }
+ F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); }
+ /* Prefixes (should the others be here too? should wait be a prefix? */
+ W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); }
+ /* 486 extensions */
+ /* B S W A P */
+ /* X A D D */
+ /* C M P X C H G */
+ /* C M P X C H G 4 8 6 */
+ I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); }
+ W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); }
+ I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); }
+ /* 586+ and late 486 extensions */
+ C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); }
+ /* Pentium extensions */
+ W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); }
+ R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); }
+ R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); }
+ /* C M P X C H G 8 B */
+ /* Pentium II/Pentium Pro extensions */
+ S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); }
+ S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); }
+ F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); }
+ F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); }
+ R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); }
+ U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); }
+ U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); }
+ /* C M O V */
+ /* F C M O V */
+ /* F C O M I */
+ /* F U C O M I */
+ /* F C O M I P */
+ /* F U C O M I P */
+ /* Pentium4 extensions */
+ /* M O V N T I */
+ /* C L F L U S H */
+ L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); }
+ M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); }
+ P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); }
+ /* MMX/SSE2 instructions */
+ E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); }
+ /* PIII (Katmai) new instructions / SIMD instructions */
+ /* ... */
+ P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); }
+ P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); }
+ P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); }
+ P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); }
+ /* ... */
+ S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); }
+ /* ... */
+ /* SSE2 instructions */
+ /* AMD 3DNow! instructions */
+ P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+ P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); }
+ F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); }
+ /* ... */
+ /* AMD extensions */
+ S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); }
+ S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); }
+ /* Cyrix MMX instructions */
+ /* Cyrix extensions */
+ R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); }
+ /* R S D C */
+ /* R S L D T */
+ /* R S T S */
+ /* S V D C */
+ /* S V L D T */
+ /* S V T S */
+ S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); }
+ S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); }
+ W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); }
+ /* Obsolete/undocumented instructions */
+ F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); }
+ /* I B T S */
+ L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); }
+ L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); }
+ S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); }
+ S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); }
+ /* U M O V */
+ /* X B T S */
+
+
+ /* catchalls */
+ [A-Za-z0-9]+ {
+ return ARCH_CHECK_ID_NONE;
+ }
+ any {
+ return ARCH_CHECK_ID_NONE;
+ }
+ */
+}
return im;
}
+const expr *
+ea_get_disp(const effaddr *ptr)
+{
+ return ptr->disp;
+}
+
void
ea_set_len(effaddr *ptr, unsigned char len)
{
ptr->nosplit = nosplit;
}
+/*@-nullstate@*/
+void
+ea_delete(effaddr *ea)
+{
+ if (cur_arch->ea_data_delete)
+ cur_arch->ea_data_delete(ea);
+ expr_delete(ea->disp);
+ xfree(ea);
+}
+/*@=nullstate@*/
+
+/*@-nullstate@*/
+void
+ea_print(FILE *f, const effaddr *ea)
+{
+ fprintf(f, "%*sDisp=", indent_level, "");
+ expr_print(f, ea->disp);
+ fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len);
+ fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit);
+ if (cur_arch->ea_data_print)
+ cur_arch->ea_data_print(f, ea);
+}
+/*@=nullstate@*/
+
void
bc_set_multiple(bytecode *bc, expr *e)
{
break;
case BC_OBJFMT_DATA:
objfmt_data = bc_get_data(bc);
+ assert(cur_objfmt != NULL);
if (cur_objfmt->bc_objfmt_data_delete)
cur_objfmt->bc_objfmt_data_delete(objfmt_data->type,
objfmt_data->data);
case BC_OBJFMT_DATA:
objfmt_data = bc_get_const_data(bc);
fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, "");
+ assert(cur_objfmt != NULL);
if (cur_objfmt->bc_objfmt_data_print)
cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type,
objfmt_data->data);
expr_expand_labelequ(*tempp, sect, 1, resolve_label);
num = expr_get_intnum(tempp);
if (!num) {
- if (expr_contains(temp, EXPR_FLOAT))
+ if (temp && expr_contains(temp, EXPR_FLOAT))
ErrorAt(line,
_("expression must not contain floating point value"));
retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
case BC_ALIGN:
/* TODO */
InternalError(_("TODO: align bytecode not implemented!"));
- break;
+ /*break;*/
case BC_OBJFMT_DATA:
InternalError(_("resolving objfmt data bytecode?"));
- break;
+ /*break;*/
default:
if (bc->type < cur_arch->bc.type_max)
retval = cur_arch->bc.bc_resolve(bc, save, sect,
expr_expand_labelequ(*tempp, sect, 1, resolve_label);
num = expr_get_intnum(tempp);
if (!num) {
- if (expr_contains(temp, EXPR_FLOAT))
+ if (temp && expr_contains(temp, EXPR_FLOAT))
ErrorAt(bc->line,
_("expression must not contain floating point value"));
retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN;
case BC_ALIGN:
/* TODO */
InternalError(_("TODO: align bytecode not implemented!"));
- break;
+ /*break;*/
case BC_OBJFMT_DATA:
objfmt_data = bc_get_data(bc);
if (output_bc_objfmt_data)
/*@only@*/ immval *imm_new_int(unsigned long int_val);
/*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e);
+/*@observer@*/ const expr *ea_get_disp(const effaddr *ea);
void ea_set_len(effaddr *ea, unsigned char len);
void ea_set_nosplit(effaddr *ea, unsigned char nosplit);
+void ea_delete(/*@only@*/ effaddr *ea);
+void ea_print(FILE *f, const effaddr *ea);
void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e);
expr *expn;
intnum *intn;
floatnum *flt;
- /* FIXME: reg structure is moderately x86-specific (namely size) */
- struct reg {
- unsigned char num;
- unsigned char size; /* in bits, eg AX=16, EAX=32 */
- } reg;
+ unsigned long reg;
} data;
};
*
* Stops early (and returns 1) if func returns 1. Otherwise returns 0.
*/
+int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d,
+ int (*func) (/*@null@*/ const ExprItem *ei,
+ /*@null@*/ void *d));
int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d,
int (*func) (/*@null@*/ ExprItem *ei,
/*@null@*/ void *d));
/* Copy entire expression EXCEPT for index "except" at *top level only*. */
expr *expr_copy_except(const expr *e, int except);
-int expr_contains(expr *e, ExprType t);
+int expr_contains(const expr *e, ExprType t);
#endif
#include "expr.h"
#include "symrec.h"
+#include "bytecode.h"
#include "section.h"
+#include "arch.h"
+
#include "expr-int.h"
}
ExprItem *
-ExprReg(unsigned char reg, unsigned char size)
+ExprReg(unsigned long reg)
{
ExprItem *e = xmalloc(sizeof(ExprItem));
e->type = EXPR_REG;
- e->data.reg.num = reg;
- e->data.reg.size = size;
+ e->data.reg = reg;
return e;
}
dest->data.flt = floatnum_copy(src->data.flt);
break;
case EXPR_REG:
- dest->data.reg.num = src->data.reg.num;
- dest->data.reg.size = src->data.reg.size;
+ dest->data.reg = src->data.reg;
break;
default:
break;
/*@=mustfree@*/
static int
-expr_contains_callback(ExprItem *ei, void *d)
+expr_contains_callback(const ExprItem *ei, void *d)
{
ExprType *t = d;
return (ei->type & *t);
}
int
-expr_contains(expr *e, ExprType t)
+expr_contains(const expr *e, ExprType t)
{
- return expr_traverse_leaves_in(e, &t, expr_contains_callback);
+ return expr_traverse_leaves_in_const(e, &t, expr_contains_callback);
}
/* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like
return func(e, d);
}
+/* Traverse over expression tree in order, calling func for each leaf
+ * (non-operation). The data pointer d is passed to each func call.
+ *
+ * Stops early (and returns 1) if func returns 1. Otherwise returns 0.
+ */
+int
+expr_traverse_leaves_in_const(const expr *e, void *d,
+ int (*func) (/*@null@*/ const ExprItem *ei,
+ /*@null@*/ void *d))
+{
+ int i;
+
+ if (!e)
+ return 0;
+
+ for (i=0; i<e->numterms; i++) {
+ if (e->terms[i].type == EXPR_EXPR) {
+ if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func))
+ return 1;
+ } else {
+ if (func(&e->terms[i], d))
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Traverse over expression tree in order, calling func for each leaf
* (non-operation). The data pointer d is passed to each func call.
*
}
/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/
+const unsigned long *
+expr_get_reg(expr **ep, int simplify)
+{
+ if (simplify)
+ *ep = expr_simplify(*ep);
+
+ if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG)
+ return &((*ep)->terms[0].data.reg);
+ else
+ return NULL;
+}
+/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/
+
void
expr_print(FILE *f, const expr *e)
{
- static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"};
char opstr[3];
int i;
floatnum_print(f, e->terms[i].data.flt);
break;
case EXPR_REG:
- if (e->terms[i].data.reg.size == 32)
- fprintf(f, "e");
- fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]);
+ cur_arch->reg_print(f, e->terms[i].data.reg);
break;
case EXPR_NONE:
break;
/*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *);
/*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *);
/*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *);
-/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size);
+/*@only@*/ ExprItem *ExprReg(unsigned long reg);
#define expr_new_tree(l,o,r) \
expr_new ((o), ExprExpr(l), ExprExpr(r))
/*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep,
int simplify);
+/* Gets the register value of e if the expression is just a register. If the
+ * expression is more complex, returns NULL. Simplifies the expr first if
+ * simplify is nonzero.
+ */
+/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep,
+ int simplify);
+
void expr_print(FILE *f, /*@null@*/ const expr *);
#endif
}
/* Get initial BITS setting from object format */
- x86_mode_bits = cur_objfmt->default_mode_bits;
+ /*x86_mode_bits = cur_objfmt->default_mode_bits;*/
/* Parse! */
sections = cur_parser->do_parse(cur_parser, in, in_filename);
YASMPARSERFILES += \
src/parsers/nasm/nasm-parser.c \
src/parsers/nasm/nasm-defs.h \
- nasm-bison.y \
+ src/parsers/nasm/nasm-bison.y \
nasm-bison.h \
- nasm-token.l
-
-if DEV
-
-nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl
- $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y
-
-else
-
-nasm-token.l: $(srcdir)/nasm-token.l
- @echo Warning: Not generating nasm-token.l from nasm-token.l.in.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-token.l .
-nasm-token.c: $(srcdir)/nasm-token.c
- @echo Warning: Not generating nasm-token.c from nasm-token.l.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-token.c .
-nasm-bison.y: $(srcdir)/nasm-bison.y
- @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.y .
-nasm-bison.c: $(srcdir)/nasm-bison.c
- @echo Warning: Not generating nasm-bison.c from nasm-bison.y.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.c .
-nasm-bison.h: $(srcdir)/nasm-bison.h
- @echo Warning: Not generating nasm-bison.h from nasm-bison.y.
- @echo Run configure with --enable-dev to enable generation.
- cp $(srcdir)/nasm-bison.h .
-
-endif
+ nasm-token.c
-noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl
+nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl
+ re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@
BUILT_SOURCES += \
- nasm-bison.y \
nasm-bison.c \
nasm-bison.h \
- nasm-token.l \
nasm-token.c
CLEANFILES += \
- nasm-bison.y \
nasm-bison.c \
nasm-bison.h \
- nasm-token.l \
nasm-token.c
EXTRA_DIST += \
- src/parsers/nasm/token.l.in \
- src/parsers/nasm/bison.y.in \
- src/parsers/nasm/gen_instr.pl
+ src/parsers/nasm/nasm-token.re
+++ /dev/null
-/*
- * NASM-compatible bison parser
- *
- * Copyright (C) 2001 Peter Johnson, Michael Urman
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#ifdef STDC_HEADERS
-# include <math.h>
-#endif
-
-#include "bitvect.h"
-
-#include "globals.h"
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-#include "section.h"
-#include "objfmt.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-
-void init_table(void);
-extern int nasm_parser_lex(void);
-void nasm_parser_error(const char *);
-static void nasm_parser_directive(const char *name,
- valparamhead *valparams,
- /*@null@*/ valparamhead *objext_valparams);
-
-extern objfmt *nasm_parser_objfmt;
-extern sectionhead nasm_parser_sections;
-extern section *nasm_parser_cur_section;
-extern char *nasm_parser_locallabel_base;
-
-static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
-static bytecode *nasm_parser_temp_bc;
-
-/* additional data declarations (dynamically generated) */
-/* @DATADECLS@ */
-
-/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/
-%}
-
-%union {
- unsigned int int_info;
- char *str_val;
- intnum *intn;
- floatnum *flt;
- symrec *sym;
- unsigned char groupdata[5];
- effaddr *ea;
- expr *exp;
- immval *im_val;
- x86_targetval tgt_val;
- datavalhead datahead;
- dataval *data;
- bytecode *bc;
- valparamhead dir_valparams;
- valparam *dir_valparam;
-}
-
-%token <intn> INTNUM
-%token <flt> FLTNUM
-%token <str_val> DIRECTIVE_NAME STRING FILENAME
-%token <int_info> BYTE WORD DWORD QWORD TWORD DQWORD
-%token <int_info> DECLARE_DATA
-%token <int_info> RESERVE_SPACE
-%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
-%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
-%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
-%token <str_val> ID LOCAL_ID SPECIAL_ID
-%token LINE
-
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
-%type <sym> explabel
-%type <str_val> label_id
-%type <tgt_val> target
-%type <data> dataval
-%type <datahead> datavals
-%type <dir_valparams> directive_valparams
-%type <dir_valparam> directive_valparam
-
-%left '|'
-%left '^'
-%left '&'
-%left LEFT_OP RIGHT_OP
-%left '-' '+'
-%left '*' '/' SIGNDIV '%' SIGNMOD
-%nonassoc UNARYOP
-
-%%
-input: /* empty */
- | input line {
- nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section),
- $2);
- if (nasm_parser_temp_bc)
- nasm_parser_prev_bc = nasm_parser_temp_bc;
- line_index++;
- }
-;
-
-line: '\n' { $$ = (bytecode *)NULL; }
- | lineexp '\n'
- | LINE INTNUM '+' INTNUM FILENAME '\n' {
- /* %line indicates the line number of the *next* line, so subtract out
- * the increment when setting the line number.
- */
- line_set($5, intnum_get_uint($2)-intnum_get_uint($4),
- intnum_get_uint($4));
- intnum_delete($2);
- intnum_delete($4);
- xfree($5);
- $$ = (bytecode *)NULL;
- }
- | directive '\n' { $$ = (bytecode *)NULL; }
- | error '\n' {
- Error(_("label or instruction expected at start of line"));
- $$ = (bytecode *)NULL;
- yyerrok;
- }
-;
-
-lineexp: exp
- | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); }
- | label { $$ = (bytecode *)NULL; }
- | label exp { $$ = $2; }
- | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); }
- | label_id EQU expr {
- symrec_define_equ($1, $3);
- xfree($1);
- $$ = (bytecode *)NULL;
- }
-;
-
-exp: instr
- | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); }
- | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); }
- | INCBIN STRING { $$ = bc_new_incbin($2, NULL, NULL); }
- | INCBIN STRING ',' expr { $$ = bc_new_incbin($2, $4, NULL); }
- | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); }
-;
-
-datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); }
- | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; }
-;
-
-dataval: expr_no_string { $$ = dv_new_expr($1); }
- | STRING { $$ = dv_new_string($1); }
- | error {
- Error(_("expression syntax error"));
- $$ = (dataval *)NULL;
- }
-;
-
-label: label_id {
- symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
- 1);
- xfree($1);
- }
- | label_id ':' {
- symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc,
- 1);
- xfree($1);
- }
-;
-
-label_id: ID {
- $$ = $1;
- if (nasm_parser_locallabel_base)
- xfree(nasm_parser_locallabel_base);
- nasm_parser_locallabel_base = xstrdup($1);
- }
- | SPECIAL_ID
- | LOCAL_ID
-;
-
-/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']' {
- xfree($2);
- }
- | '[' DIRECTIVE_NAME error ']' {
- Error(_("invalid arguments to [%s]"), $2);
- xfree($2);
- }
-;
-
- /* $<str_val>0 is the DIRECTIVE_NAME */
- /* After : is (optional) object-format specific extension */
-directive_val: directive_valparams {
- nasm_parser_directive($<str_val>0, &$1, NULL);
- }
- | directive_valparams ':' directive_valparams {
- nasm_parser_directive($<str_val>0, &$1, &$3);
- }
-;
-
-directive_valparams: directive_valparam {
- vps_initialize(&$$);
- vps_append(&$$, $1);
- }
- | directive_valparams directive_valparam {
- vps_append(&$1, $2);
- $$ = $1;
- }
-;
-
-directive_valparam: direxpr {
- /* If direxpr is just an ID, put it in val and delete the expr */
- const /*@null@*/ symrec *vp_symrec;
- if ((vp_symrec = expr_get_symrec(&$1, 0))) {
- vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL);
- expr_delete($1);
- } else
- vp_new($$, NULL, $1);
- }
- | ID '=' direxpr { vp_new($$, $1, $3); }
-;
-
-/* register groupings */
-fpureg: ST0
- | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
- | DWORD reg_eax { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
- | DWORD reg_ecx { $$ = $2; }
-;
-
-rawreg32: REG_EAX
- | REG_ECX
- | REG_EDX
- | REG_EBX
- | REG_ESP
- | REG_EBP
- | REG_ESI
- | REG_EDI
-;
-
-reg32: rawreg32
- | DWORD reg32 { $$ = $2; }
-;
-
-reg_ax: REG_AX
- | WORD reg_ax { $$ = $2; }
-;
-
-reg_cx: REG_CX
- | WORD reg_cx { $$ = $2; }
-;
-
-reg_dx: REG_DX
- | WORD reg_dx { $$ = $2; }
-;
-
-rawreg16: REG_AX
- | REG_CX
- | REG_DX
- | REG_BX
- | REG_SP
- | REG_BP
- | REG_SI
- | REG_DI
-;
-
-reg16: rawreg16
- | WORD reg16 { $$ = $2; }
-;
-
-reg_al: REG_AL
- | BYTE reg_al { $$ = $2; }
-;
-
-reg_cl: REG_CL
- | BYTE reg_cl { $$ = $2; }
-;
-
-reg8: REG_AL
- | REG_CL
- | REG_DL
- | REG_BL
- | REG_AH
- | REG_CH
- | REG_DH
- | REG_BH
- | BYTE reg8 { $$ = $2; }
-;
-
-reg_es: REG_ES
- | WORD reg_es { $$ = $2; }
-;
-
-reg_ss: REG_SS
- | WORD reg_ss { $$ = $2; }
-;
-
-reg_ds: REG_DS
- | WORD reg_ds { $$ = $2; }
-;
-
-reg_fs: REG_FS
- | WORD reg_fs { $$ = $2; }
-;
-
-reg_gs: REG_GS
- | WORD reg_gs { $$ = $2; }
-;
-
-reg_cs: REG_CS
- | WORD reg_cs { $$ = $2; }
-;
-
-segreg: REG_ES
- | REG_SS
- | REG_DS
- | REG_FS
- | REG_GS
- | REG_CS
- | WORD segreg { $$ = $2; }
-;
-
-/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated? This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg. I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); }
- | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' memexpr %prec UNARYOP { $$ = $2; }
- | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' memexpr ')' { $$ = $2; }
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
- | error { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr {
- $$ = x86_ea_new_expr($1);
- x86_ea_set_segment($$, 0);
- }
- | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); }
- | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); }
- | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); }
- | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); }
- | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); }
- | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); }
- | BYTE memaddr { $$ = $2; ea_set_len($$, 1); }
- | WORD memaddr { $$ = $2; ea_set_len($$, 2); }
- | DWORD memaddr { $$ = $2; ea_set_len($$, 4); }
- | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); }
-;
-
-mem: '[' memaddr ']' { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem { $$ = $2; }
- | BYTE mem8x { $$ = $2; }
-;
-mem16x: WORD mem { $$ = $2; }
- | WORD mem16x { $$ = $2; }
-;
-mem32x: DWORD mem { $$ = $2; }
- | DWORD mem32x { $$ = $2; }
-;
-mem64x: QWORD mem { $$ = $2; }
- | QWORD mem64x { $$ = $2; }
-;
-mem80x: TWORD mem { $$ = $2; }
- | TWORD mem80x { $$ = $2; }
-;
-mem128x: DQWORD mem { $$ = $2; }
- | DQWORD mem128x { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem { $$ = $2; }
- | FAR memfar { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
- | mem8x
-;
-mem16: mem
- | mem16x
-;
-mem32: mem
- | mem32x
-;
-mem64: mem
- | mem64x
-;
-mem80: mem
- | mem80x
-;
-mem128: mem
- | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
- | mem16x
- | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8x
-;
-rm16x: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16x
-;
-rm32x: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32x
-;
-/* not needed:
-rm64x: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8
-;
-rm16: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16
-;
-rm32: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32
-;
-rm64: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64
-;
-rm128: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128
-;
-
-/* immediate values */
-imm: expr { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm { $$ = $2; }
-;
-imm16x: WORD imm { $$ = $2; }
-;
-imm32x: DWORD imm { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
- | imm8x
-;
-imm16: imm
- | imm16x
-;
-imm32: imm
- | imm32x
-;
-
-/* jump targets */
-target: expr {
- $$.val = $1;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
- }
- | SHORT target {
- $$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
- }
- | NEAR target {
- $$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
- }
-;
-
-/* expression trees */
-
-/* expr w/o FLTNUM and unary + and -, for use in directives */
-direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | ID {
- $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0)));
- xfree($1);
- }
- | direxpr '|' direxpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | direxpr '^' direxpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- | direxpr '&' direxpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- | direxpr LEFT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | direxpr '+' direxpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | direxpr '-' direxpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | direxpr '*' direxpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | direxpr '/' direxpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | direxpr SIGNDIV direxpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | direxpr '%' direxpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | direxpr SIGNMOD direxpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' direxpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' direxpr ')' { $$ = $2; }
-;
-
-expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' expr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | expr '&' expr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| expr '==' expr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| expr '>' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| expr '<' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| expr '>=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| expr '<=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| expr '!=' expr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | expr LEFT_OP expr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | expr RIGHT_OP expr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | expr '+' expr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | expr '-' expr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | expr '*' expr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | expr '/' expr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | expr SIGNDIV expr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | expr '%' expr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | expr SIGNMOD expr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' expr %prec UNARYOP { $$ = $2; }
- | '-' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' expr ')' { $$ = $2; }
-;
-
-expr: expr_no_string
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
-;
-
-explabel: ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | SPECIAL_ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | LOCAL_ID {
- $$ = symrec_use($1);
- xfree($1);
- }
- | '$' {
- $$ = symrec_define_label("$", nasm_parser_cur_section,
- nasm_parser_prev_bc, 0);
- }
- | START_SECTION_ID {
- if (section_is_absolute(nasm_parser_cur_section)) {
- Error(_("`$$' is not valid within an ABSOLUTE section"));
- YYERROR;
- } else {
- const char *ss_name = section_get_name(nasm_parser_cur_section);
- assert(ss_name != NULL);
- $$ = symrec_use(ss_name);
- }
- }
-;
-
-instr: /* empty */ {
- idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
- $$ = x86_bc_new_insn(&idata);
- }
- | instrbase
- | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
- | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
- | REG_CS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
- }
- | REG_SS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
- }
- | REG_DS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
- }
- | REG_ES instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
- }
- | REG_FS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
- }
- | REG_GS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
- }
- | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
- | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
- | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
- | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
-%%
-/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
-
-static void
-nasm_parser_directive(const char *name, valparamhead *valparams,
- valparamhead *objext_valparams)
-{
- valparam *vp, *vp2;
- const intnum *intn;
- long lval;
-
- assert(cur_objfmt != NULL);
-
- /* Handle (mostly) output-format independent directives here */
- if (strcasecmp(name, "extern") == 0) {
- vp = vps_first(valparams);
- if (vp->val)
- symrec_declare(vp->val, SYM_EXTERN,
- cur_objfmt->extern_data_new(vp->val,
- objext_valparams));
- else
- Error(_("invalid argument to [%s]"), "EXTERN");
- } else if (strcasecmp(name, "global") == 0) {
- vp = vps_first(valparams);
- if (vp->val)
- symrec_declare(vp->val, SYM_GLOBAL,
- cur_objfmt->global_data_new(vp->val,
- objext_valparams));
- else
- Error(_("invalid argument to [%s]"), "GLOBAL");
- } else if (strcasecmp(name, "common") == 0) {
- vp = vps_first(valparams);
- if (vp->val) {
- vp2 = vps_next(vp);
- if (!vp2 || (!vp2->val && !vp2->param))
- Error(_("no size specified in %s declaration"), "COMMON");
- else {
- if (vp2->val)
- symrec_declare(vp->val, SYM_COMMON,
- cur_objfmt->common_data_new(vp->val,
- expr_new_ident(ExprSym(symrec_use(vp2->val))),
- objext_valparams));
- else if (vp2->param) {
- symrec_declare(vp->val, SYM_COMMON,
- cur_objfmt->common_data_new(vp->val, vp2->param,
- objext_valparams));
- vp2->param = NULL;
- }
- }
- } else
- Error(_("invalid argument to [%s]"), "COMMON");
- } else if (strcasecmp(name, "section") == 0 ||
- strcasecmp(name, "segment") == 0) {
- section *new_section =
- cur_objfmt->sections_switch(&nasm_parser_sections, valparams,
- objext_valparams);
- if (new_section) {
- nasm_parser_cur_section = new_section;
- nasm_parser_prev_bc = (bytecode *)NULL;
- } else
- Error(_("invalid argument to [%s]"), "SECTION");
- } else if (strcasecmp(name, "absolute") == 0) {
- /* it can be just an ID or a complete expression, so handle both. */
- vp = vps_first(valparams);
- if (vp->val)
- nasm_parser_cur_section =
- sections_switch_absolute(&nasm_parser_sections,
- expr_new_ident(ExprSym(symrec_use(vp->val))));
- else if (vp->param) {
- nasm_parser_cur_section =
- sections_switch_absolute(&nasm_parser_sections, vp->param);
- vp->param = NULL;
- }
- nasm_parser_prev_bc = (bytecode *)NULL;
- } else if (strcasecmp(name, "bits") == 0) {
- if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
- (intn = expr_get_intnum(&vp->param)) != NULL &&
- (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
- x86_mode_bits = (unsigned char)lval;
- else
- Error(_("invalid argument to [%s]"), "BITS");
- } else if (cur_objfmt->directive(name, valparams, objext_valparams,
- &nasm_parser_sections)) {
- Error(_("unrecognized directive [%s]"), name);
- }
-
- vps_delete(valparams);
- if (objext_valparams)
- vps_delete(objext_valparams);
-}
-
-void
-nasm_parser_error(const char *s)
-{
- ParserError(s);
-}
-
+++ /dev/null
-#!/usr/bin/perl -w
-# $IdPath$
-# Generates NASM-compatible bison.y and token.l from instrs.dat.
-#
-# Copyright (C) 2001 Michael Urman
-#
-# This file is part of YASM.
-#
-# YASM is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# YASM is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-
-use strict;
-use Getopt::Long;
-my $VERSION = "0.0.1";
-
-# useful constants for instruction arrays
-# common
-use constant INST => 0;
-use constant OPERANDS => 1;
-# general format
-use constant OPSIZE => 2;
-use constant OPCODE => 3;
-use constant EFFADDR => 4;
-use constant IMM => 5;
-use constant CPU => 6;
-# relative target format
-use constant ADSIZE => 2;
-use constant SHORTOPCODE => 3;
-use constant NEAROPCODE => 4;
-use constant SHORTCPU => 5;
-use constant NEARCPU => 6;
-
-use constant TOO_MANY_ERRORS => 20;
-
-# default options
-my $instrfile = 'instrs.dat';
-my $tokenfile = 'token.l';
-my $tokensource;
-my $grammarfile = 'bison.y';
-my $grammarsource;
-my $showversion;
-my $showusage;
-my $dry_run;
-
-# allow overrides
-my $gotopts = GetOptions ( 'input=s' => \$instrfile,
- 'token=s' => \$tokenfile,
- 'sourcetoken=s' => \$tokensource,
- 'grammar=s' => \$grammarfile,
- 'sourcegrammar=s' => \$grammarsource,
- 'version' => \$showversion,
- 'n|dry-run' => \$dry_run,
- 'help|usage' => \$showusage,
- );
-
-&showusage and exit 1 unless $gotopts;
-&showversion if $showversion;
-&showusage if $showusage;
-exit 0 if $showversion or $showusage;
-
-# valid values for instrs.dat fields
-my $valid_regs = join '|', qw(
- reg_al reg_ah reg_ax reg_eax
- reg_bl reg_bh reg_bx reg_ebx
- reg_cl reg_ch reg_cx reg_ecx
- reg_dl reg_dh reg_dx reg_edx
- reg_si reg_esi reg_di reg_edi
- reg_bp reg_ebp
- reg_cs reg_ds reg_es reg_fs reg_gs reg_ss
- ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG
- fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm
- imm8 imm16 imm32 imm64 imm80 imm128
- imm8x imm16x imm32x imm64x imm80x imm128x
- rm8 rm16 rm32 rm1632 rm64 rm80 rm128
- rm8x rm16x rm32x rm1632x rm64x rm80x rm128x
- reg8 reg16 reg32 reg1632 reg64 reg80 reg128
- reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
- mem8 mem16 mem32 mem1632 mem64 mem80 mem128
- mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
- target memfar
-);
-my $valid_opcodes = join '|', qw(
- [0-9A-F]{2}
- \\$0\\.\\d
-);
-my $valid_cpus = join '|', qw(
- 8086 186 286 386 486 P4 P5 P6
- FPU MMX KATMAI SSE SSE2
- AMD ATHLON 3DNOW
- SMM
- CYRIX
- UNDOC OBS PRIV PROT
- @0 @1
-);
-
-# track errors and warnings rather than die'ing on the first.
-my (@messages, $errcount, $warncount);
-sub die_with_errors (@)
-{
- foreach (@_) { print; };
- if ($errcount)
- {
- print "Dying with errors\n";
- exit -1;
- }
-}
-
-my ($groups) = &read_instructions ($instrfile);
-
-die_with_errors @messages;
-
-exit 0 if $dry_run; # done with simple verification, so exit
-
-unless ($dry_run)
-{
- &output_lex ($tokenfile, $tokensource, $groups);
- &output_yacc ($grammarfile, $grammarsource, $groups);
-}
-
-# print version for --version, etc.
-sub showversion
-{
- print "YASM gen_instr.pl $VERSION\n";
-}
-
-# print usage information for --help, etc.
-sub showusage
-{
- print <<"EOF";
-Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile]
- -i, --input instructions file (default: $instrfile)
- -t, --token token output file (default: $tokenfile)
- -st, --sourcetoken token input file (default: $tokenfile.in)
- -g, --grammar grammar output file (default: $grammarfile)
- -sg, --sourcegrammar grammar input file (default: $grammarfile.in)
- -v, --version show version and exit
- -h, --help, --usage show this message and exit
- -n, --dry-run verify input file without writing output files
-EOF
-}
-
-# read in instructions, and verify they're valid (well, mostly)
-sub read_instructions ($)
-{
- my $instrfile = shift || die;
- open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n";
- my %instr;
- my %groups;
-
- sub add_group_rule ($$$$)
- {
- my ($inst, $args, $groups, $instrfile) = splice @_;
-
- # slide $0.\d down by one.
- # i still say changing instrs.dat would be better ;)
- $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
-
- # detect relative target format by looking for "target" in args
- if($args =~ m/target/oi)
- {
- my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
- split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
- die "Invalid Address Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Short Opcode\n"
- if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
- die "Invalid Near Opcode\n"
- if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
- die "Invalid Short CPU\n"
- if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- die "Invalid Near CPU\n"
- if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
- } else {
- my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
- die "Invalid Operation Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Opcode\n"
- if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
- die "Invalid Effective Address\n"
- if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
- die "Invalid Immediate Operand\n"
- if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
- die "Invalid CPU\n"
- if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
- }
- }
-
- sub add_group_member ($$$$$)
- {
- my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_;
-
- my ($inst, $group) = split /!/, $handle;
- my ($args, $cpu) = split /\t+/, $fullargs;
- eval {
- die "Invalid instruction name\n"
- if $inst !~ m/^\w+$/o;
- die "Invalid group name\n"
- if $group !~ m/^\w+$/o;
- die "Invalid CPU\n"
- if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n"
- unless exists $groups->{$group};
- $warncount++;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- # only allow multiple instances of instructions that aren't of a group
- push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++
- if exists $instr->{$inst} and not exists $groups->{$inst};
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu];
- $instr->{$inst} = 1;
- }
-
- while (<INPUT>)
- {
- chomp;
- next if /^\s*(?:;.*)$/;
-
- my ($handle, $args) = split /\t+/, $_, 2;
-
- # pseudo hack to handle original style instructions (no group)
- if ($handle =~ m/^\w+$/)
- {
- # TODO: this has some long ranging effects, as the eventual
- # bison rules get tagged <groupdata> when they don't need
- # to, etc. Fix this sometime.
- add_group_rule ("!$handle", $args, \%groups, $instrfile);
- add_group_member ("$handle!$handle", "", \%groups, \%instr,
- $instrfile);
- }
- elsif ($handle =~ m/^!\w+$/)
- {
- add_group_rule ($handle, $args, \%groups, $instrfile);
- }
- elsif ($handle =~ m/^\w+!\w+$/)
- {
- add_group_member ($handle, $args, \%groups, \%instr,
- $instrfile);
- }
- # TODO: consider if this is necessary: Pete?
- # (add_group_member_synonym is -not- implemented)
- #elsif ($handle =~ m/^:\w+$/)
- #{
- # add_group_member_synonym ($handle, $args);
- #}
- }
- close INPUT;
- return (\%groups);
-}
-
-sub output_lex ($@)
-{
- my $tokenfile = shift or die;
- my $tokensource = shift;
- $tokensource ||= "$tokenfile.in";
- my $groups = shift or die;
-
- open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n";
- open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n";
- while (<IN>)
- {
- # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content
- if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
- {
- foreach my $grp (sort keys %$groups)
- {
- my %printed;
- my $group = $grp; $group =~ s/^!//;
-
- foreach my $grp (@{$groups->{$grp}{members}})
- {
- unless (exists $printed{$grp->[0]})
- {
- $printed{$grp->[0]} = 1;
- my @groupdata;
- if ($grp->[2])
- {
- @groupdata = split ",", $grp->[2];
- for (my $i=0; $i < @groupdata; ++$i)
- {
- $groupdata[$i] =~ s/nil/0/;
- $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];";
- }
- $groupdata[-1] .= "\n\t ";
- }
- printf TOKEN "%-12s{%s return %-20s }\n",
- $grp->[0],
- (join "\n\t ", @groupdata),
- "\Ugrp_$group;\E";
- # TODO: change appropriate GRP_FOO back to
- # INS_FOO's. not functionally important;
- # just pedantically so.
- }
- }
- }
- }
- else
- {
- print TOKEN $_;
- }
- }
- close IN;
- close TOKEN;
-}
-
-# helper functions for yacc output
-sub rule_header ($ $ $)
-{
- my ($rule, $tokens, $count) = splice (@_);
- $count ? " | $tokens {\n" : "$rule: $tokens {\n";
-}
-sub rule_footer ()
-{
- return " }\n";
-}
-
-sub cond_action_if ( $ $ $ $ $ $ $ )
-{
- my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
- return rule_header ($rule, $tokens, $count) . <<"EOF";
- if (\$$regarg == $val) {
- @$a_eax
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action_elsif ( $ $ $ $ )
-{
- my ($regarg, $val, $func, $a_eax) = splice (@_);
- return <<"EOF";
- else if (\$$regarg == $val) {
- @$a_eax
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action_else ( $ $ )
-{
- my ($func, $a_args) = splice (@_);
- return <<"EOF" . rule_footer;
- else {
- @$a_args
- \$\$ = $func;
- }
-EOF
-}
-sub cond_action ( $ $ $ $ $ $ $ $ )
-{
- my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args)
- = splice (@_);
- return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func,
- $a_eax) . cond_action_else ($func, $a_args);
-}
-
-#sub action ( $ $ $ $ $ )
-sub action ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . rule_footer;
-}
-
-sub action_setshiftflag ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . " x86_bc_insn_set_shift_flag(\$\$);\n"
- . rule_footer;
-}
-
-sub action_setjrshort ( @ $ )
-{
- my ($rule, $tokens, $func, $a_args, $count) = splice @_;
- return rule_header ($rule, $tokens, $count)
- . " if (\$2.op_sel == JR_NONE)\n"
- . " \$2.op_sel = JR_SHORT;\n"
- . " @$a_args\n"
- . " \$\$ = $func;\n"
- . rule_footer;
-}
-
-sub get_token_number ( $ $ )
-{
- my ($tokens, $str) = splice @_;
- $tokens =~ s/$str.*/x/; # hold its place
- my @f = split /\s+/, $tokens;
- return scalar @f;
-}
-
-sub output_yacc ($@)
-{
- my $grammarfile = shift or die;
- my $grammarsource = shift;
- $grammarsource ||= "$grammarfile.in";
- my $groups = shift or die;
-
- open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n";
- open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n";
-
- while (<IN>)
- {
- if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/})
- {
- print GRAMMAR "static x86_new_insn_data idata;\n";
- print GRAMMAR "static x86_new_jmprel_data jrdata;\n";
- }
- elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
- {
- my $len = length("%token <groupdata>");
- print GRAMMAR "%token <groupdata>";
- foreach my $group (sort keys %$groups)
- {
- if ($len + length("GRP_$group") < 76)
- {
- print GRAMMAR " GRP_\U$group\E";
- $len += length(" GRP_$group");
- }
- else
- {
- print GRAMMAR "\n%token <groupdata> GRP_\U$group\E";
- $len = length("%token <groupdata> GRP_$group");
- }
- }
- print GRAMMAR "\n";
- }
- elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/})
- {
- my $len = length("%type <bc>");
- print GRAMMAR "%type <bc>";
- foreach my $group (sort keys %$groups)
- {
- if ($len + length($group) < 76)
- {
- print GRAMMAR " $group";
- $len += length(" $group");
- }
- else
- {
- print GRAMMAR "\n%type <bc> $group";
- $len = length("%type <bc> $group");
- }
- }
- print GRAMMAR "\n";
- }
- elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
- {
- # list every kind of instruction that instrbase can be
- print GRAMMAR "instrbase: ",
- join( "\n | ", sort keys %$groups), "\n;\n";
-
- my ($ONE, $AL, $AX, $EAX); # need the outer scope
- my (@XCHG_AX, @XCHG_EAX);
-
- # list the arguments and actions (buildbc)
- #foreach my $instrname (sort keys %$instrlist)
- foreach my $group (sort keys %$groups)
- {
- # I'm still convinced this is a hack. The idea is if
- # within an instruction we see certain versions of the
- # opcodes with ONE, or reg_e?a[lx],imm(8|16|32). If we
- # do, defer generation of the action, as we may need to
- # fold it into another version with a conditional to
- # generate the more efficient variant of the opcode
- # BUT, if we don't fold it in, we have to generate the
- # original version we would have otherwise.
- ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0);
- # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax).
- (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
- my $count = 0;
- foreach my $inst (@{$groups->{$group}{rules}}) {
- if($inst->[OPERANDS] =~ m/target/oi)
- {
- # relative target format
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]"
- if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- $tokens =~ s/:/ ':' /g;
- my $datastruct = "x86_new_jmprel_data";
- my $datastructname = "jrdata";
- my $func = "x86_bc_new_jmprel(&$datastructname)";
-
- # Create the argument list for bytecode_new
- my @args;
-
- # Target argument: HACK: Always assumed to be arg 1.
- push @args, 'target=&$2;';
-
- # test for short opcode "nil"
- if($inst->[SHORTOPCODE] =~ m/nil/)
- {
- push @args, 'short_op_len=0;';
- }
- else
- {
- my @opcodes;
- # Check for possible length parameter
- if($inst->[SHORTOPCODE] =~ m/\?/)
- {
- my @pieces = split /\?/, $inst->[SHORTOPCODE];
- push @args, "short_op_len=".$pieces[0].";";
- # opcode piece 1 (and 2 and 3 if attached)
- @opcodes = split ",", $pieces[1];
- }
- else
- {
- # opcode piece 1 (and 2 and 3 if attached)
- @opcodes = split ",", $inst->[SHORTOPCODE];
- # number of bytes of short opcode
- push @args, "short_op_len=".@opcodes.";";
- }
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
- push @args, "short_op[$i]=$opcodes[$i];";
- }
- }
-
- # test for near opcode "nil"
- if($inst->[NEAROPCODE] =~ m/nil/)
- {
- push @args, 'near_op_len=0;';
- }
- else
- {
- # opcode piece 1 (and 2 and 3 if attached)
- my @opcodes = split ",", $inst->[NEAROPCODE];
- # number of bytes of near opcode
- push @args, "near_op_len=".@opcodes.";";
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg;
- push @args, "near_op[$i]=$opcodes[$i];";
- }
- }
-
- # address size
- push @args, "addrsize=$inst->[ADSIZE];";
- $args[-1] =~ s/nil/0/;
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # and add the data structure reference
- s/^/$datastructname./g foreach (@args);
-
- if ($args[0] =~ m/\&\$/)
- {
- $args[0] = '/*@-immediatetrans@*/' . $args[0] .
- '/*@=immediatetrans@*/';
- }
-
- # generate the grammar
- # Specialcase jcc to set op_sel=JR_SHORT.
- if ($rule =~ m/jcc/)
- {
- print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++);
- }
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
- }
- }
- else
- {
- # general instruction format
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]"
- if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- $tokens =~ s/:/ ':' /g;
- # offset args
- my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
- my $datastruct = "x86_new_insn_data";
- my $datastructname = "idata";
- my $func = "x86_bc_new_insn(&$datastructname)";
-
- # Create the argument list for bytecode_new
- my @args;
-
- # operand size
- push @args, "opersize=$inst->[OPSIZE];";
- $args[-1] =~ s/nil/0/;
-
-
- # opcode piece 1 (and 2 and 3 if attached)
- my @opcodes = split ",", $inst->[OPCODE];
- # number of bytes of opcodes
- push @args, "op_len=".@opcodes.";";
- for (my $i=0; $i < @opcodes; ++$i)
- {
- $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g;
- # don't match $0.\d in the following rule.
- $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg;
- push @args, "op[$i]=$opcodes[$i];";
- }
-
- # effective addresses
- my $effaddr = $inst->[EFFADDR];
- $effaddr =~ s/^nil/NULL,0/;
- $effaddr =~ s/nil/0/;
- # don't let a $0.\d match slip into the following rules.
- $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
- $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/;
- $effaddr =~ s[(\$\d+)i,\s*(\d+)]
- ["x86_ea_new_imm($1^ ".($2/8)."),0"]e;
-
- die $effaddr if $effaddr =~ m/\d+[ri]/;
-
- my @effaddr_split = split ',', $effaddr;
- $effaddr_split[0] =~ s/\^/,/;
- push @args, "ea=$effaddr_split[0];";
- if ($effaddr_split[0] !~ m/NULL/)
- {
- push @args, "spare=$effaddr_split[1];";
- }
-
- # immediate sources
- my $imm = $inst->[IMM];
- $imm =~ s/nil/NULL,0/;
- # don't match $0.\d in the following rules.
- $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $imm =~ s[^([0-9A-Fa-f]+),]
- [imm_new_int(0x$1),];
- $imm =~ s[^\$0.(\d+),]
- [imm_new_int((unsigned long)\$1\[$1\]),];
-
- # divide the second, and only the second, by 8 bits/byte
- $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
- $imm .= ($3||'') eq 's' ? ',1' : ',0';
-
- die $imm if $imm =~ m/\d+s/;
-
- my @imm_split = split ",", $imm;
- push @args, "imm=$imm_split[0];";
- if ($imm_split[0] !~ m/NULL/)
- {
- push @args, "im_len=$imm_split[1];";
- push @args, "im_sign=$imm_split[2];";
- }
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # and add the data structure reference
- s/^/$datastructname./g foreach (@args);
-
- # see if we match one of the cases to defer
- if (($inst->[OPERANDS]||"") =~ m/,ONE/)
- {
- $ONE = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/)
- {
- $AL = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/)
- {
- $AX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/)
- {
- $EAX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/)
- {
- $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/)
- {
- $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/)
- {
- $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/)
- {
- $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
- }
-
- # or if we've deferred and we match the folding version
- elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
- {
- $ONE->[4] = 1;
- # Output a normal version except imm8 -> imm8x
- # (BYTE override always makes longer version, and
- # we don't want to conflict with the imm version
- # we output right after this one.
- $tokens =~ s/imm8/imm8x/;
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
-
- # Now output imm version, with second opcode byte
- # set to ,1 opcode. Also call SetInsnShiftFlag().
- $tokens =~ s/imm8x/imm/;
- my $oneval = $ONE->[3]->[2];
- $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg;
- push @args, $oneval;
- print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
- }
- elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
- {
- $AL->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg8");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
- }
- elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/)
- {
- $AX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg16");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
- }
- elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/)
- {
- $EAX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg32");
-
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
- }
- elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_AX; ++$i)
- {
- if($XCHG_AX[$i])
- {
- $XCHG_AX[$i]->[4] = 1;
- # This is definitely a hack. The "right"
- # way to do this would be to enhance
- # get_token_number to get the nth reg16
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg16")
- + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
- }
- }
- }
- print GRAMMAR cond_action_else ($func, \@args);
- }
- elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_EAX; ++$i)
- {
- if($XCHG_EAX[$i])
- {
- $XCHG_EAX[$i]->[4] = 1;
- # This is definitely a hack. The "right"
- # way to do this would be to enhance
- # get_token_number to get the nth reg32
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg32")
- + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- }
- }
- }
- print GRAMMAR cond_action_else ($func, \@args);
- }
-
- # otherwise, generate the normal version
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
- }
- }
- }
-
- # catch deferreds that haven't been folded in.
- if ($ONE and not $ONE->[4])
- {
- print GRAMMAR action (@$ONE, $count++);
- }
- if ($AL and not $AL->[4])
- {
- print GRAMMAR action (@$AL, $count++);
- }
- if ($AX and not $AL->[4])
- {
- print GRAMMAR action (@$AX, $count++);
- }
- if ($EAX and not $AL->[4])
- {
- print GRAMMAR action (@$EAX, $count++);
- }
-
- # print error action
- # ASSUMES: at least one previous action exists
- print GRAMMAR " | \Ugrp_$group\E error {\n";
- print GRAMMAR " Error (_(\"expression syntax error\"));\n";
- print GRAMMAR " \$\$ = (bytecode *)NULL;\n";
- print GRAMMAR " }\n";
-
- # terminate the rule
- print GRAMMAR ";\n";
- }
- }
- else
- {
- print GRAMMAR $_;
- }
- }
- close IN;
- close GRAMMAR;
-}
#include "src/parsers/nasm/nasm-defs.h"
+
void init_table(void);
extern int nasm_parser_lex(void);
+extern void nasm_parser_set_directive_state(void);
void nasm_parser_error(const char *);
static void nasm_parser_directive(const char *name,
valparamhead *valparams,
extern sectionhead nasm_parser_sections;
extern section *nasm_parser_cur_section;
extern char *nasm_parser_locallabel_base;
+extern size_t nasm_parser_locallabel_base_len;
static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL;
static bytecode *nasm_parser_temp_bc;
intnum *intn;
floatnum *flt;
symrec *sym;
- unsigned char groupdata[5];
+ unsigned long arch_data[4];
effaddr *ea;
expr *exp;
- immval *im_val;
- x86_targetval tgt_val;
datavalhead datahead;
dataval *data;
bytecode *bc;
valparamhead dir_valparams;
valparam *dir_valparam;
+ struct {
+ insn_operandhead operands;
+ int num_operands;
+ } insn_operands;
+ insn_operand *insn_operand;
}
%token <intn> INTNUM
%token <int_info> DECLARE_DATA
%token <int_info> RESERVE_SPACE
%token INCBIN EQU TIMES
-%token SEG WRT NEAR SHORT FAR NOSPLIT ORG
+%token SEG WRT NOSPLIT
%token TO
-%token LOCK REPNZ REP REPZ
-%token <int_info> OPERSIZE ADDRSIZE
-%token <int_info> CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG
-%token <int_info> REG_EAX REG_ECX REG_EDX REG_EBX
-%token <int_info> REG_ESP REG_EBP REG_ESI REG_EDI
-%token <int_info> REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI
-%token <int_info> REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH
-%token <int_info> REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS
+%token <arch_data> INSN PREFIX REG SEGREG TARGETMOD
%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID
%token <str_val> ID LOCAL_ID SPECIAL_ID
%token LINE
-/* instruction tokens (dynamically generated) */
-/* @TOKENS@ */
-
-/* @TYPES@ */
-
-%type <bc> line lineexp exp instr instrbase
-
-%type <int_info> reg_eax reg_ecx
-%type <int_info> reg_ax reg_cx reg_dx
-%type <int_info> reg_al reg_cl
-%type <int_info> reg_es reg_cs reg_ss reg_ds reg_fs reg_gs
-%type <int_info> fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg
-%type <ea> mem memaddr memfar
-%type <ea> mem8x mem16x mem32x mem64x mem80x mem128x
-%type <ea> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
-%type <ea> rm8x rm16x rm32x /*rm64x rm128x*/
-%type <ea> rm8 rm16 rm32 rm64 rm128
-%type <im_val> imm imm8x imm16x imm32x imm8 imm16 imm32
-%type <exp> expr expr_no_string memexpr direxpr
+%type <bc> line lineexp exp instr
+
+%type <ea> memaddr
+%type <exp> dvexpr expr direxpr
%type <sym> explabel
%type <str_val> label_id
-%type <tgt_val> target
%type <data> dataval
%type <datahead> datavals
%type <dir_valparams> directive_valparams
%type <dir_valparam> directive_valparam
+%type <insn_operands> operands
+%type <insn_operand> operand
%left '|'
%left '^'
xfree($5);
$$ = (bytecode *)NULL;
}
- | directive '\n' { $$ = (bytecode *)NULL; }
+ | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' {
+ $$ = (bytecode *)NULL;
+ }
| error '\n' {
Error(_("label or instruction expected at start of line"));
$$ = (bytecode *)NULL;
| INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); }
;
+instr: INSN {
+ $$ = cur_arch->parse.new_insn($1, 0, NULL);
+ }
+ | INSN operands {
+ $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands);
+ ops_delete(&$2.operands, 0);
+ }
+ | INSN error {
+ Error(_("expression syntax error"));
+ $$ = NULL;
+ }
+ | PREFIX instr {
+ $$ = $2;
+ cur_arch->parse.handle_prefix($$, $1);
+ }
+ | SEGREG instr {
+ $$ = $2;
+ cur_arch->parse.handle_seg_prefix($$, $1[0]);
+ }
+;
+
datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); }
| datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; }
;
-dataval: expr_no_string { $$ = dv_new_expr($1); }
+dataval: dvexpr { $$ = dv_new_expr($1); }
| STRING { $$ = dv_new_string($1); }
| error {
Error(_("expression syntax error"));
$$ = $1;
if (nasm_parser_locallabel_base)
xfree(nasm_parser_locallabel_base);
- nasm_parser_locallabel_base = xstrdup($1);
+ nasm_parser_locallabel_base_len = strlen($1);
+ nasm_parser_locallabel_base =
+ xmalloc(nasm_parser_locallabel_base_len+1);
+ strcpy(nasm_parser_locallabel_base, $1);
}
| SPECIAL_ID
| LOCAL_ID
;
/* directives */
-directive: '[' DIRECTIVE_NAME directive_val ']' {
- xfree($2);
+directive: DIRECTIVE_NAME directive_val {
+ xfree($1);
}
- | '[' DIRECTIVE_NAME error ']' {
- Error(_("invalid arguments to [%s]"), $2);
- xfree($2);
+ | DIRECTIVE_NAME error {
+ Error(_("invalid arguments to [%s]"), $1);
+ xfree($1);
}
;
| ID '=' direxpr { vp_new($$, $1, $3); }
;
-/* register groupings */
-fpureg: ST0
- | FPUREG_NOTST0
-;
-
-reg_eax: REG_EAX
- | DWORD reg_eax { $$ = $2; }
-;
-
-reg_ecx: REG_ECX
- | DWORD reg_ecx { $$ = $2; }
-;
-
-rawreg32: REG_EAX
- | REG_ECX
- | REG_EDX
- | REG_EBX
- | REG_ESP
- | REG_EBP
- | REG_ESI
- | REG_EDI
-;
-
-reg32: rawreg32
- | DWORD reg32 { $$ = $2; }
-;
-
-reg_ax: REG_AX
- | WORD reg_ax { $$ = $2; }
-;
-
-reg_cx: REG_CX
- | WORD reg_cx { $$ = $2; }
-;
-
-reg_dx: REG_DX
- | WORD reg_dx { $$ = $2; }
-;
-
-rawreg16: REG_AX
- | REG_CX
- | REG_DX
- | REG_BX
- | REG_SP
- | REG_BP
- | REG_SI
- | REG_DI
-;
-
-reg16: rawreg16
- | WORD reg16 { $$ = $2; }
-;
-
-reg_al: REG_AL
- | BYTE reg_al { $$ = $2; }
-;
-
-reg_cl: REG_CL
- | BYTE reg_cl { $$ = $2; }
-;
-
-reg8: REG_AL
- | REG_CL
- | REG_DL
- | REG_BL
- | REG_AH
- | REG_CH
- | REG_DH
- | REG_BH
- | BYTE reg8 { $$ = $2; }
-;
-
-reg_es: REG_ES
- | WORD reg_es { $$ = $2; }
-;
-
-reg_ss: REG_SS
- | WORD reg_ss { $$ = $2; }
-;
-
-reg_ds: REG_DS
- | WORD reg_ds { $$ = $2; }
-;
-
-reg_fs: REG_FS
- | WORD reg_fs { $$ = $2; }
-;
-
-reg_gs: REG_GS
- | WORD reg_gs { $$ = $2; }
-;
-
-reg_cs: REG_CS
- | WORD reg_cs { $$ = $2; }
-;
-
-segreg: REG_ES
- | REG_SS
- | REG_DS
- | REG_FS
- | REG_GS
- | REG_CS
- | WORD segreg { $$ = $2; }
-;
-
/* memory addresses */
-/* FIXME: Is there any way this redundancy can be eliminated? This is almost
- * identical to expr: the only difference is that FLTNUM is replaced by
- * rawreg16 and rawreg32.
- *
- * Note that the two can't be just combined because of conflicts caused by imm
- * vs. reg. I don't see a simple solution right now to this.
- *
- * We don't attempt to check memory expressions for validity here.
- */
-memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
- | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); }
- | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); }
- | explabel { $$ = expr_new_ident(ExprSym($1)); }
- /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
- | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
- | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
- /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
- | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
- /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
- /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
- /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
- /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
- | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
- | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
- | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
- | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
- | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
- | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
- | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
- | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
- | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
- | '+' memexpr %prec UNARYOP { $$ = $2; }
- | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
- /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
- | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
- | '(' memexpr ')' { $$ = $2; }
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
+memaddr: expr {
+ $$ = cur_arch->parse.ea_new_expr($1);
}
- | error { Error(_("invalid effective address")); }
-;
-
-memaddr: memexpr {
- $$ = x86_ea_new_expr($1);
- x86_ea_set_segment($$, 0);
+ | SEGREG ':' memaddr {
+ $$ = $3;
+ cur_arch->parse.handle_seg_override($$, $1[0]);
}
- | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); }
- | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); }
- | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); }
- | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); }
- | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); }
- | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); }
| BYTE memaddr { $$ = $2; ea_set_len($$, 1); }
| WORD memaddr { $$ = $2; ea_set_len($$, 2); }
| DWORD memaddr { $$ = $2; ea_set_len($$, 4); }
| NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); }
;
-mem: '[' memaddr ']' { $$ = $2; }
-;
-
-/* explicit memory */
-mem8x: BYTE mem { $$ = $2; }
- | BYTE mem8x { $$ = $2; }
-;
-mem16x: WORD mem { $$ = $2; }
- | WORD mem16x { $$ = $2; }
-;
-mem32x: DWORD mem { $$ = $2; }
- | DWORD mem32x { $$ = $2; }
-;
-mem64x: QWORD mem { $$ = $2; }
- | QWORD mem64x { $$ = $2; }
-;
-mem80x: TWORD mem { $$ = $2; }
- | TWORD mem80x { $$ = $2; }
-;
-mem128x: DQWORD mem { $$ = $2; }
- | DQWORD mem128x { $$ = $2; }
-;
-
-/* FAR memory, for jmp and call */
-memfar: FAR mem { $$ = $2; }
- | FAR memfar { $$ = $2; }
-;
-
-/* implicit memory */
-mem8: mem
- | mem8x
-;
-mem16: mem
- | mem16x
-;
-mem32: mem
- | mem32x
-;
-mem64: mem
- | mem64x
-;
-mem80: mem
- | mem80x
-;
-mem128: mem
- | mem128x
-;
-
-/* both 16 and 32 bit memory */
-mem1632: mem
- | mem16x
- | mem32x
-;
-
-/* explicit register or memory */
-rm8x: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8x
-;
-rm16x: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16x
-;
-rm32x: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32x
-;
-/* not needed:
-rm64x: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64x
-;
-rm128x: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128x
-;
-*/
-
-/* implicit register or memory */
-rm8: reg8 { $$ = x86_ea_new_reg($1); }
- | mem8
-;
-rm16: reg16 { $$ = x86_ea_new_reg($1); }
- | mem16
-;
-rm32: reg32 { $$ = x86_ea_new_reg($1); }
- | mem32
-;
-rm64: MMXREG { $$ = x86_ea_new_reg($1); }
- | mem64
-;
-rm128: XMMREG { $$ = x86_ea_new_reg($1); }
- | mem128
-;
-
-/* immediate values */
-imm: expr { $$ = imm_new_expr($1); }
-;
-
-/* explicit immediates */
-imm8x: BYTE imm { $$ = $2; }
-;
-imm16x: WORD imm { $$ = $2; }
-;
-imm32x: DWORD imm { $$ = $2; }
-;
-
-/* implicit immediates */
-imm8: imm
- | imm8x
-;
-imm16: imm
- | imm16x
-;
-imm32: imm
- | imm32x
+/* instruction operands */
+operands: operand {
+ ops_initialize(&$$.operands);
+ ops_append(&$$.operands, $1);
+ $$.num_operands = 1;
+ }
+ | operands ',' operand {
+ ops_append(&$1.operands, $3);
+ $$.operands = $1.operands;
+ $$.num_operands = $1.num_operands+1;
+ }
;
-/* jump targets */
-target: expr {
- $$.val = $1;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE);
+operand: '[' memaddr ']' { $$ = operand_new_mem($2); }
+ | expr { $$ = operand_new_imm($1); }
+ | SEGREG { $$ = operand_new_segreg($1[0]); }
+ | BYTE operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 1)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 1;
}
- | SHORT target {
+ | WORD operand {
$$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED);
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 2)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 2;
}
- | NEAR target {
+ | DWORD operand {
$$ = $2;
- x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED);
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 4)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 4;
}
+ | QWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 8)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 8;
+ }
+ | TWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 10)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 10;
+ }
+ | DQWORD operand {
+ $$ = $2;
+ if ($$->type == INSN_OPERAND_REG &&
+ cur_arch->get_reg_size($$->data.reg) != 16)
+ Error(_("cannot override register size"));
+ else
+ $$->size = 16;
+ }
+ | TARGETMOD operand { $$ = $2; $$->targetmod = $1[0]; }
;
/* expression trees */
| '(' direxpr ')' { $$ = $2; }
;
-expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
+dvexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
| FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
| explabel { $$ = expr_new_ident(ExprSym($1)); }
+ /*| dvexpr '||' dvexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
+ | dvexpr '|' dvexpr { $$ = expr_new_tree($1, EXPR_OR, $3); }
+ | dvexpr '^' dvexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
+ /*| dvexpr '&&' dvexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/
+ | dvexpr '&' dvexpr { $$ = expr_new_tree($1, EXPR_AND, $3); }
+ /*| dvexpr '==' dvexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/
+ /*| dvexpr '>' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+ /*| dvexpr '<' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/
+ /*| dvexpr '>=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+ /*| dvexpr '<=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/
+ /*| dvexpr '!=' dvexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/
+ | dvexpr LEFT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); }
+ | dvexpr RIGHT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); }
+ | dvexpr '+' dvexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); }
+ | dvexpr '-' dvexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); }
+ | dvexpr '*' dvexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); }
+ | dvexpr '/' dvexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); }
+ | dvexpr SIGNDIV dvexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); }
+ | dvexpr '%' dvexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); }
+ | dvexpr SIGNMOD dvexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); }
+ | '+' dvexpr %prec UNARYOP { $$ = $2; }
+ | '-' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); }
+ /*| '!' dvexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/
+ | '~' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); }
+ | '(' dvexpr ')' { $$ = $2; }
+;
+
+/* Expressions for operands and memory expressions.
+ * We don't attempt to check memory expressions for validity here.
+ * Essentially the same as expr_no_string above but adds REG and STRING.
+ */
+expr: INTNUM { $$ = expr_new_ident(ExprInt($1)); }
+ | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); }
+ | REG { $$ = expr_new_ident(ExprReg($1[0])); }
+ | STRING {
+ $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
+ xfree($1);
+ }
+ | explabel { $$ = expr_new_ident(ExprSym($1)); }
/*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/
| expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); }
| expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); }
| '(' expr ')' { $$ = $2; }
;
-expr: expr_no_string
- | STRING {
- $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1)));
- xfree($1);
- }
-;
-
explabel: ID {
$$ = symrec_use($1);
xfree($1);
}
;
-instr: /* empty */ {
- idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL;
- $$ = x86_bc_new_insn(&idata);
- }
- | instrbase
- | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); }
- | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); }
- | REG_CS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E);
- }
- | REG_SS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36);
- }
- | REG_DS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E);
- }
- | REG_ES instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26);
- }
- | REG_FS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64);
- }
- | REG_GS instr {
- $$ = $2;
- x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65);
- }
- | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); }
- | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); }
- | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); }
- | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); }
-;
-
-/* instruction grammars (dynamically generated) */
-/* @INSTRUCTIONS@ */
-
%%
/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/
valparamhead *objext_valparams)
{
valparam *vp, *vp2;
- const intnum *intn;
- long lval;
assert(cur_objfmt != NULL);
vp->param = NULL;
}
nasm_parser_prev_bc = (bytecode *)NULL;
- } else if (strcasecmp(name, "bits") == 0) {
- if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL &&
- (intn = expr_get_intnum(&vp->param)) != NULL &&
- (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32))
- x86_mode_bits = (unsigned char)lval;
- else
- Error(_("invalid argument to [%s]"), "BITS");
+ } else if (strcasecmp(name, "cpu") == 0) {
+ vps_foreach(vp, valparams) {
+ if (vp->val)
+ cur_arch->parse.switch_cpu(vp->val);
+ else if (vp->param) {
+ const intnum *intcpu;
+ intcpu = expr_get_intnum(&vp->param);
+ if (!intcpu)
+ Error(_("invalid argument to [%s]"), "CPU");
+ else {
+ char strcpu[16];
+ sprintf(strcpu, "%lu", intnum_get_uint(intcpu));
+ cur_arch->parse.switch_cpu(strcpu);
+ }
+ }
+ }
+ } else if (!cur_arch->parse.directive(name, valparams, objext_valparams,
+ &nasm_parser_sections)) {
+ ;
} else if (cur_objfmt->directive(name, valparams, objext_valparams,
&nasm_parser_sections)) {
Error(_("unrecognized directive [%s]"), name);
extern int nasm_parser_debug;
extern int nasm_parser_parse(void);
+extern void nasm_parser_cleanup(void);
size_t (*nasm_parser_input) (char *buf, size_t max_size);
nasm_parser_parse();
+ nasm_parser_cleanup();
+
/* Free locallabel base if necessary */
if (nasm_parser_locallabel_base)
xfree(nasm_parser_locallabel_base);
--- /dev/null
+/*
+ * NASM-compatible lex lexer
+ *
+ * Copyright (C) 2001 Peter Johnson
+ *
+ * Portions based on re2c's example code.
+ *
+ * This file is part of YASM.
+ *
+ * YASM is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * YASM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "util.h"
+RCSID("$IdPath$");
+
+#include "bitvect.h"
+
+#include "errwarn.h"
+#include "intnum.h"
+#include "floatnum.h"
+#include "expr.h"
+#include "symrec.h"
+
+#include "bytecode.h"
+
+#include "arch.h"
+
+#include "src/parsers/nasm/nasm-defs.h"
+#include "nasm-bison.h"
+
+
+#define BSIZE 8192
+
+#define YYCTYPE char
+#define YYCURSOR cursor
+#define YYLIMIT s.lim
+#define YYMARKER s.ptr
+#define YYFILL(n) {cursor = fill(cursor);}
+
+#define RETURN(i) {s.cur = cursor; return i;}
+
+#define SCANINIT() { \
+ s.tchar = cursor - s.pos; \
+ s.tline = s.cline; \
+ s.tok = cursor; \
+ }
+
+#define TOKLEN (cursor-s.tok)
+
+void nasm_parser_cleanup(void);
+void nasm_parser_set_directive_state(void);
+int nasm_parser_lex(void);
+
+extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
+
+
+typedef struct Scanner {
+ YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ unsigned int tchar, tline, cline;
+} Scanner;
+
+static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 };
+
+FILE *nasm_parser_in = NULL;
+
+static YYCTYPE *
+fill(YYCTYPE *cursor)
+{
+ if(!s.eof){
+ size_t cnt = s.tok - s.bot;
+ if(cnt){
+ memcpy(s.bot, s.tok, s.lim - s.tok);
+ s.tok = s.bot;
+ s.ptr -= cnt;
+ cursor -= cnt;
+ s.pos -= cnt;
+ s.lim -= cnt;
+ }
+ if((s.top - s.lim) < BSIZE){
+ char *buf = xmalloc((s.lim - s.bot) + BSIZE);
+ memcpy(buf, s.tok, s.lim - s.tok);
+ s.tok = buf;
+ s.ptr = &buf[s.ptr - s.bot];
+ cursor = &buf[cursor - s.bot];
+ s.pos = &buf[s.pos - s.bot];
+ s.lim = &buf[s.lim - s.bot];
+ s.top = &s.lim[BSIZE];
+ if (s.bot)
+ xfree(s.bot);
+ s.bot = buf;
+ }
+ if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){
+ s.eof = &s.lim[cnt]; *s.eof++ = '\n';
+ }
+ s.lim += cnt;
+ }
+ return cursor;
+}
+
+void
+nasm_parser_cleanup(void)
+{
+ if (s.bot)
+ xfree(s.bot);
+}
+
+/* starting size of string buffer */
+#define STRBUF_ALLOC_SIZE 128
+
+/* string buffer used when parsing strings/character constants */
+static char *strbuf = (char *)NULL;
+
+/* length of strbuf (including terminating NULL character) */
+static size_t strbuf_size = 0;
+
+/* last "base" label for local (.) labels */
+char *nasm_parser_locallabel_base = (char *)NULL;
+size_t nasm_parser_locallabel_base_len = 0;
+
+static int linechg_numcount;
+
+/*!re2c
+ any = [\000-\377];
+ digit = [0-9];
+ iletter = [a-zA-Z];
+ bindigit = [01];
+ octdigit = [0-7];
+ hexdigit = [0-9a-fA-F];
+ ws = [ \t\r];
+ quot = ["'];
+ A = [aA];
+ B = [bB];
+ C = [cC];
+ D = [dD];
+ E = [eE];
+ F = [fF];
+ G = [gG];
+ H = [hH];
+ I = [iI];
+ J = [jJ];
+ K = [kK];
+ L = [lL];
+ M = [mM];
+ N = [nN];
+ O = [oO];
+ P = [pP];
+ Q = [qQ];
+ R = [rR];
+ S = [sS];
+ T = [tT];
+ U = [uU];
+ V = [vV];
+ W = [wW];
+ X = [xX];
+ Y = [yY];
+ Z = [zZ];
+*/
+
+static enum {
+ INITIAL,
+ DIRECTIVE,
+ DIRECTIVE2,
+ LINECHG,
+ LINECHG2
+} state = INITIAL;
+
+void
+nasm_parser_set_directive_state(void)
+{
+ state = DIRECTIVE;
+}
+
+int
+nasm_parser_lex(void)
+{
+ YYCTYPE *cursor = s.cur;
+ YYCTYPE endch;
+ size_t count, len;
+ YYCTYPE savech;
+ arch_check_id_retval check_id_ret;
+
+ /* Catch EOF */
+ if (s.eof && cursor == s.eof)
+ return 0;
+
+ /* Jump to proper "exclusive" states */
+ switch (state) {
+ case DIRECTIVE:
+ goto directive;
+ case LINECHG:
+ goto linechg;
+ case LINECHG2:
+ goto linechg2;
+ default:
+ break;
+ }
+
+scan:
+ SCANINIT();
+
+ /*!re2c
+ /* standard decimal integer */
+ digit+ {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.intn = intnum_new_dec(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+ /* 10010011b - binary number */
+
+ bindigit+ "b" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */
+ yylval.intn = intnum_new_bin(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* 777q - octal number */
+ octdigit+ "q" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */
+ yylval.intn = intnum_new_oct(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* 0AAh form of hexidecimal number */
+ digit hexdigit+ "h" {
+ s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */
+ yylval.intn = intnum_new_hex(s.tok);
+ RETURN(INTNUM);
+ }
+
+ /* $0AA and 0xAA forms of hexidecimal number */
+ (("$" digit) | "0x") hexdigit+ {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ if (s.tok[1] == 'x')
+ yylval.intn = intnum_new_hex(s.tok+2); /* skip 0 and x */
+ else
+ yylval.intn = intnum_new_hex(s.tok+1); /* don't skip 0 */
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+
+ /* floating point value */
+ digit+ "." digit* ("e" [-+]? digit+)? {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.flt = floatnum_new(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(FLTNUM);
+ }
+
+ /* string/character constant values */
+ quot {
+ endch = s.tok[0];
+ goto stringconst;
+ }
+
+ /* %line linenum+lineinc filename */
+ "%line" {
+ state = LINECHG;
+ linechg_numcount = 0;
+ RETURN(LINE);
+ }
+
+ /* size specifiers */
+ B Y T E { yylval.int_info = 1; RETURN(BYTE); }
+ W O R D { yylval.int_info = 2; RETURN(WORD); }
+ D W O R D { yylval.int_info = 4; RETURN(DWORD); }
+ Q W O R D { yylval.int_info = 8; RETURN(QWORD); }
+ T W O R D { yylval.int_info = 10; RETURN(TWORD); }
+ D Q W O R D { yylval.int_info = 16; RETURN(DQWORD); }
+
+ /* pseudo-instructions */
+ D B { yylval.int_info = 1; RETURN(DECLARE_DATA); }
+ D W { yylval.int_info = 2; RETURN(DECLARE_DATA); }
+ D D { yylval.int_info = 4; RETURN(DECLARE_DATA); }
+ D Q { yylval.int_info = 8; RETURN(DECLARE_DATA); }
+ D T { yylval.int_info = 10; RETURN(DECLARE_DATA); }
+
+ R E S B { yylval.int_info = 1; RETURN(RESERVE_SPACE); }
+ R E S W { yylval.int_info = 2; RETURN(RESERVE_SPACE); }
+ R E S D { yylval.int_info = 4; RETURN(RESERVE_SPACE); }
+ R E S Q { yylval.int_info = 8; RETURN(RESERVE_SPACE); }
+ R E S T { yylval.int_info = 10; RETURN(RESERVE_SPACE); }
+
+ I N C B I N { RETURN(INCBIN); }
+
+ E Q U { RETURN(EQU); }
+
+ T I M E S { RETURN(TIMES); }
+
+ S E G { RETURN(SEG); }
+ W R T { RETURN(WRT); }
+
+ N O S P L I T { RETURN(NOSPLIT); }
+
+ T O { RETURN(TO); }
+
+ /* operators */
+ "<<" { RETURN(LEFT_OP); }
+ ">>" { RETURN(RIGHT_OP); }
+ "//" { RETURN(SIGNDIV); }
+ "%%" { RETURN(SIGNMOD); }
+ "$$" { RETURN(START_SECTION_ID); }
+ [-+|^*&/%~$():=,\[] { RETURN(s.tok[0]); }
+
+ /* handle ] separately for directives */
+ "]" {
+ if (state == DIRECTIVE2)
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ /* special non-local ..@label and labels like ..start */
+ ".." [a-zA-Z0-9_$#@~.?]+ {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(SPECIAL_ID);
+ }
+
+ /* local label (.label) */
+ "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* {
+ /* override local labels in directive state */
+ if (state == DIRECTIVE2) {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ } else if (!nasm_parser_locallabel_base) {
+ Warning(_("no non-local label before `%s'"), s.tok[0]);
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ } else {
+ len = TOKLEN + nasm_parser_locallabel_base_len;
+ yylval.str_val = xmalloc(len + 1);
+ strcpy(yylval.str_val, nasm_parser_locallabel_base);
+ strncat(yylval.str_val, s.tok, TOKLEN);
+ yylval.str_val[len] = '\0';
+ }
+
+ RETURN(LOCAL_ID);
+ }
+
+ /* forced identifier */
+ "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ }
+
+ /* identifier that may be a register, instruction, etc. */
+ [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* {
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data,
+ s.tok);
+ s.tok[TOKLEN] = savech;
+ switch (check_id_ret) {
+ case ARCH_CHECK_ID_NONE:
+ /* Just an identifier, return as such. */
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ case ARCH_CHECK_ID_INSN:
+ RETURN(INSN);
+ case ARCH_CHECK_ID_PREFIX:
+ RETURN(PREFIX);
+ case ARCH_CHECK_ID_REG:
+ RETURN(REG);
+ case ARCH_CHECK_ID_SEGREG:
+ RETURN(SEGREG);
+ case ARCH_CHECK_ID_TARGETMOD:
+ RETURN(TARGETMOD);
+ default:
+ Warning(_("Arch feature not supported, treating as identifier"));
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(ID);
+ }
+ }
+
+ ";" (any \ [\n])* { goto scan; }
+
+ ws+ { goto scan; }
+
+ "\n" { state = INITIAL; RETURN(s.tok[0]); }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto scan;
+ }
+ */
+
+ /* %line linenum+lineinc filename */
+linechg:
+ SCANINIT();
+
+ /*!re2c
+ digit+ {
+ linechg_numcount++;
+ savech = s.tok[TOKLEN];
+ s.tok[TOKLEN] = '\0';
+ yylval.intn = intnum_new_dec(s.tok);
+ s.tok[TOKLEN] = savech;
+ RETURN(INTNUM);
+ }
+
+ "\n" {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ "+" {
+ RETURN(s.tok[0]);
+ }
+
+ ws+ {
+ if (linechg_numcount == 2)
+ state = LINECHG2;
+ goto linechg2;
+ }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto linechg;
+ }
+ */
+
+linechg2:
+ SCANINIT();
+
+ /*!re2c
+ "\n" {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ "\r" { }
+
+ (any \ [\r\n])+ {
+ state = LINECHG;
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(FILENAME);
+ }
+ */
+
+ /* directive: [name value] */
+directive:
+ SCANINIT();
+
+ /*!re2c
+ [\]\n] {
+ state = INITIAL;
+ RETURN(s.tok[0]);
+ }
+
+ iletter+ {
+ state = DIRECTIVE2;
+ yylval.str_val = xstrndup(s.tok, TOKLEN);
+ RETURN(DIRECTIVE_NAME);
+ }
+
+ any {
+ if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
+ Warning(_("ignoring unrecognized character `%s'"),
+ conv_unprint(s.tok[0]));
+ goto directive;
+ }
+ */
+
+ /* string/character constant values */
+stringconst:
+ strbuf = xmalloc(STRBUF_ALLOC_SIZE);
+ strbuf_size = STRBUF_ALLOC_SIZE;
+ count = 0;
+
+stringconst_scan:
+ SCANINIT();
+
+ /*!re2c
+ "\n" {
+ if (cursor == s.eof)
+ Error(_("unexpected end of file in string"));
+ else
+ Error(_("unterminated string"));
+ strbuf[count] = '\0';
+ yylval.str_val = strbuf;
+ RETURN(STRING);
+ }
+
+ any {
+ if (s.tok[0] == endch) {
+ strbuf[count] = '\0';
+ yylval.str_val = strbuf;
+ RETURN(STRING);
+ }
+
+ strbuf[count++] = s.tok[0];
+ if (count >= strbuf_size) {
+ strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
+ strbuf_size += STRBUF_ALLOC_SIZE;
+ }
+
+ goto stringconst_scan;
+ }
+ */
+}
+++ /dev/null
-/*
- * NASM-compatible lex lexer
- *
- * Copyright (C) 2001 Peter Johnson
- *
- * This file is part of YASM.
- *
- * YASM is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * YASM is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-%{
-#include "util.h"
-RCSID("$IdPath$");
-
-#include "bitvect.h"
-
-#include "errwarn.h"
-#include "intnum.h"
-#include "floatnum.h"
-#include "expr.h"
-#include "symrec.h"
-
-#include "bytecode.h"
-
-#include "arch.h"
-
-#include "src/parsers/nasm/nasm-defs.h"
-#include "nasm-bison.h"
-
-
-#define YY_NEVER_INTERACTIVE 1
-
-int nasm_parser_lex(void);
-
-extern size_t (*nasm_parser_input) (char *buf, size_t max_size);
-#undef YY_INPUT
-#define YY_INPUT(b, r, ms) (r = nasm_parser_input(b, ms))
-
-/* starting size of string buffer */
-#define STRBUF_ALLOC_SIZE 128
-
-/* string buffer used when parsing strings/character constants */
-static char *strbuf = (char *)NULL;
-
-/* length of strbuf (including terminating NULL character) */
-static size_t strbuf_size = 0;
-
-/* last "base" label for local (.) labels */
-char *nasm_parser_locallabel_base = (char *)NULL;
-
-static int linechg_numcount;
-
-%}
-%option noyywrap
-%option nounput
-%option case-insensitive
-%option never-interactive
-%option prefix="nasm_parser_"
-%option outfile="lex.yy.c"
-
-%x DIRECTIVE LINECHG LINECHG2
-%s DIRECTIVE2
-
-DIGIT [0-9]
-BINDIGIT [01]
-OCTDIGIT [0-7]
-HEXDIGIT [0-9a-f]
-WS [ \t\r]
-
-%%
-
- /* standard decimal integer */
-{DIGIT}+ {
- yylval.intn = intnum_new_dec(yytext);
- return INTNUM;
-}
-
- /* 10010011b - binary number */
-{BINDIGIT}+b {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'b' */
- yylval.intn = intnum_new_bin(yytext);
- return INTNUM;
-}
-
- /* 777q - octal number */
-{OCTDIGIT}+q {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'q' */
- yylval.intn = intnum_new_oct(yytext);
- return INTNUM;
-}
-
- /* 0AAh form of hexidecimal number */
-{DIGIT}{HEXDIGIT}*h {
- yytext[strlen(yytext)-1] = '\0'; /* strip off 'h' */
- yylval.intn = intnum_new_hex(yytext);
- return INTNUM;
-}
-
- /* $0AA and 0xAA forms of hexidecimal number */
-(\${DIGIT}|0x){HEXDIGIT}+ {
- if (yytext[1] == 'x')
- yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */
- else
- yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */
- return INTNUM;
-}
-
- /* floating point value */
-{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? {
- yylval.flt = floatnum_new(yytext);
- return FLTNUM;
-}
-
- /* string/character constant values */
-["'] {
- int inch, count;
- char endch = yytext[0];
-
- strbuf = xmalloc(STRBUF_ALLOC_SIZE);
-
- strbuf_size = STRBUF_ALLOC_SIZE;
- inch = input();
- count = 0;
- while (inch != EOF && inch != endch && inch != '\n') {
- strbuf[count++] = inch;
- if (count >= strbuf_size) {
- strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
- if (!strbuf)
- Fatal(FATAL_NOMEM);
- strbuf_size += STRBUF_ALLOC_SIZE;
- }
- inch = input();
- }
-
- if (inch == '\n')
- Error(_("unterminated string"));
- else if (inch == EOF)
- Error(_("unexpected end of file in string"));
-
- strbuf[count] = '\0';
-
- yylval.str_val = strbuf;
- return STRING;
-}
-
- /* %line linenum+lineinc filename */
-^%line { BEGIN LINECHG; linechg_numcount = 0; return LINE; }
-<LINECHG>{DIGIT}+ {
- linechg_numcount++;
- yylval.intn = intnum_new_dec(yytext);
- return INTNUM;
-}
-<LINECHG>\n { BEGIN INITIAL; return '\n'; }
-<LINECHG>[+] { return yytext[0]; }
-<LINECHG>{WS}+ {
- if (linechg_numcount == 2)
- BEGIN LINECHG2;
-}
-<LINECHG2>\n { BEGIN INITIAL; return '\n'; }
-<LINECHG2>\r ;
-<LINECHG2>[^\r\n]+ {
- BEGIN LINECHG;
- yylval.str_val = xstrdup(yytext);
- return FILENAME;
-}
-
- /* directive: [name value] */
-^{WS}*"[" { BEGIN DIRECTIVE; return '['; }
-<DIRECTIVE>"]" { BEGIN INITIAL; return ']'; }
-<DIRECTIVE2>"]" { BEGIN INITIAL; return ']'; }
-<DIRECTIVE>\n { BEGIN INITIAL; return '\n'; }
-<DIRECTIVE2>\n { BEGIN INITIAL; return '\n'; }
-
-<DIRECTIVE>[a-z]+ {
- BEGIN DIRECTIVE2;
- yylval.str_val = xstrdup(yytext);
- return DIRECTIVE_NAME;
-}
-<DIRECTIVE>. {
- if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
- Warning(_("ignoring unrecognized character `%s'"),
- conv_unprint(yytext[0]));
-}
-
- /* override local labels in directive state */
-<DIRECTIVE2>\.[a-z0-9_$#@~.?]* {
- yylval.str_val = xstrdup(yytext);
- return ID;
-}
-
- /* size specifiers */
-byte { yylval.int_info = 1; return BYTE; }
-word { yylval.int_info = 2; return WORD; }
-dword { yylval.int_info = 4; return DWORD; }
-qword { yylval.int_info = 8; return QWORD; }
-tword { yylval.int_info = 10; return TWORD; }
-dqword { yylval.int_info = 16; return DQWORD; }
-
- /* pseudo-instructions */
-db { yylval.int_info = 1; return DECLARE_DATA; }
-dw { yylval.int_info = 2; return DECLARE_DATA; }
-dd { yylval.int_info = 4; return DECLARE_DATA; }
-dq { yylval.int_info = 8; return DECLARE_DATA; }
-dt { yylval.int_info = 10; return DECLARE_DATA; }
-
-resb { yylval.int_info = 1; return RESERVE_SPACE; }
-resw { yylval.int_info = 2; return RESERVE_SPACE; }
-resd { yylval.int_info = 4; return RESERVE_SPACE; }
-resq { yylval.int_info = 8; return RESERVE_SPACE; }
-rest { yylval.int_info = 10; return RESERVE_SPACE; }
-
-incbin { return INCBIN; }
-
-equ { return EQU; }
-
-times { return TIMES; }
-
-seg { return SEG; }
-wrt { return WRT; }
-near { return NEAR; }
-short { return SHORT; }
-far { return FAR; }
-
-nosplit { return NOSPLIT; }
-
-org { return ORG; }
-
-to { return TO; }
-
- /* operand size overrides */
-o16 { yylval.int_info = 16; return OPERSIZE; }
-o32 { yylval.int_info = 32; return OPERSIZE; }
- /* address size overrides */
-a16 { yylval.int_info = 16; return ADDRSIZE; }
-a32 { yylval.int_info = 32; return ADDRSIZE; }
-
- /* instruction prefixes */
-lock { return LOCK; }
-repne { return REPNZ; }
-repnz { return REPNZ; }
-rep { return REP; }
-repe { return REPZ; }
-repz { return REPZ; }
-
- /* control, debug, and test registers */
-cr4 { yylval.int_info = 4; return CR4; }
-cr[023] { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; }
-dr[0-367] { yylval.int_info = yytext[2]-'0'; return DRREG; }
-tr[3-7] { yylval.int_info = yytext[2]-'0'; return TRREG; }
-
- /* floating point, MMX, and SSE registers */
-st0 { yylval.int_info = 0; return ST0; }
-st[1-7] { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; }
-mm[0-7] { yylval.int_info = yytext[2]-'0'; return MMXREG; }
-xmm[0-7] { yylval.int_info = yytext[3]-'0'; return XMMREG; }
-
- /* integer registers */
-eax { yylval.int_info = 0; return REG_EAX; }
-ecx { yylval.int_info = 1; return REG_ECX; }
-edx { yylval.int_info = 2; return REG_EDX; }
-ebx { yylval.int_info = 3; return REG_EBX; }
-esp { yylval.int_info = 4; return REG_ESP; }
-ebp { yylval.int_info = 5; return REG_EBP; }
-esi { yylval.int_info = 6; return REG_ESI; }
-edi { yylval.int_info = 7; return REG_EDI; }
-
-ax { yylval.int_info = 0; return REG_AX; }
-cx { yylval.int_info = 1; return REG_CX; }
-dx { yylval.int_info = 2; return REG_DX; }
-bx { yylval.int_info = 3; return REG_BX; }
-sp { yylval.int_info = 4; return REG_SP; }
-bp { yylval.int_info = 5; return REG_BP; }
-si { yylval.int_info = 6; return REG_SI; }
-di { yylval.int_info = 7; return REG_DI; }
-
-al { yylval.int_info = 0; return REG_AL; }
-cl { yylval.int_info = 1; return REG_CL; }
-dl { yylval.int_info = 2; return REG_DL; }
-bl { yylval.int_info = 3; return REG_BL; }
-ah { yylval.int_info = 4; return REG_AH; }
-ch { yylval.int_info = 5; return REG_CH; }
-dh { yylval.int_info = 6; return REG_DH; }
-bh { yylval.int_info = 7; return REG_BH; }
-
- /* segment registers */
-es { yylval.int_info = 0; return REG_ES; }
-cs { yylval.int_info = 1; return REG_CS; }
-ss { yylval.int_info = 2; return REG_SS; }
-ds { yylval.int_info = 3; return REG_DS; }
-fs { yylval.int_info = 4; return REG_FS; }
-gs { yylval.int_info = 5; return REG_GS; }
-
- /* operators */
-"<<" { return LEFT_OP; }
-">>" { return RIGHT_OP; }
-"//" { return SIGNDIV; }
-"%%" { return SIGNMOD; }
-"$$" { return START_SECTION_ID; }
-[-+|^&*/%~$():[\]=,] { return yytext[0]; }
-
- /* special non-local ..@label and labels like ..start */
-\.\.[a-z0-9_$#@~.?]+ {
- yylval.str_val = xstrdup(yytext);
- return SPECIAL_ID;
-}
-
- /* local label (.label) */
-\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* {
- if (!nasm_parser_locallabel_base) {
- Warning(_("no non-local label before `%s'"), yytext);
- yylval.str_val = xstrdup(yytext);
- } else {
- yylval.str_val = xmalloc(strlen(yytext) +
- strlen(nasm_parser_locallabel_base) + 1);
- strcpy(yylval.str_val, nasm_parser_locallabel_base);
- strcat(yylval.str_val, yytext);
- }
-
- return LOCAL_ID;
-}
-
- /* instructions */
- /* @INSTRUCTIONS@ */
-
- /* label */
-[a-z_?][a-z0-9_$#@~.?]* {
- yylval.str_val = xstrdup(yytext);
- return ID;
-}
-
-;.* ;
-
-{WS}+ ;
-
-\n return '\n';
-
-. {
- if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR))
- Warning(_("ignoring unrecognized character `%s'"),
- conv_unprint(yytext[0]));
-}
-
#include "bytecode.h"
#include "bc-int.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
START_TEST(test_x86_ea_new_reg)
{
#include "bytecode.h"
#include "arch.h"
-#include "x86-int.h"
+#include "x86arch.h"
typedef enum {
REG_AX = 0,