From 3320945f957c183c88ebff3954f914e992a329c1 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Fri, 9 Aug 2002 05:43:03 +0000 Subject: [PATCH] Massive restructuring of lexing and parsing (NASM-compatible parser). Bugzilla Bug#3. Not all instructions are parsed yet, so this is actually a minor feature regression from the user side, but this commit lays the framework for much easier additions of new assembler syntaxes and architectures. The re2c tool is now used to generate the lexers, as it's much more flexibly function-oriented than lex. - nasm-bison.y is a repocopy+modify of nasm/bison.y.in. - x86arch.h now includes all of x86-int.h svn path=/trunk/yasm/; revision=670 --- .cvsignore | 2 - frontends/yasm/yasm.c | 2 +- libyasm/arch.c | 137 +++ libyasm/arch.h | 161 +++- libyasm/bytecode.c | 42 +- libyasm/bytecode.h | 3 + libyasm/expr-int.h | 11 +- libyasm/expr.c | 63 +- libyasm/expr.h | 9 +- libyasm/tests/bytecode_test.c | 2 +- libyasm/tests/memexpr_test.c | 2 +- modules/arch/x86/Makefile.inc | 15 +- modules/arch/x86/instrs.dat | 1208 -------------------------- modules/arch/x86/x86-int.h | 110 --- modules/arch/x86/x86arch.c | 147 +++- modules/arch/x86/x86arch.h | 141 ++- modules/arch/x86/x86bc.c | 60 +- modules/arch/x86/x86expr.c | 19 +- modules/arch/x86/x86id.re | 1282 ++++++++++++++++++++++++++++ modules/parsers/nasm/Makefile.inc | 45 +- modules/parsers/nasm/bison.y.in | 791 ----------------- modules/parsers/nasm/gen_instr.pl | 889 ------------------- modules/parsers/nasm/nasm-bison.y | 547 ++++-------- modules/parsers/nasm/nasm-parser.c | 3 + modules/parsers/nasm/nasm-token.re | 516 +++++++++++ modules/parsers/nasm/token.l.in | 353 -------- src/arch.c | 137 +++ src/arch.h | 161 +++- src/arch/x86/Makefile.inc | 15 +- src/arch/x86/instrs.dat | 1208 -------------------------- src/arch/x86/x86-int.h | 110 --- src/arch/x86/x86arch.c | 147 +++- src/arch/x86/x86arch.h | 141 ++- src/arch/x86/x86bc.c | 60 +- src/arch/x86/x86expr.c | 19 +- src/arch/x86/x86id.re | 1282 ++++++++++++++++++++++++++++ src/bytecode.c | 42 +- src/bytecode.h | 3 + src/expr-int.h | 11 +- src/expr.c | 63 +- src/expr.h | 9 +- src/main.c | 2 +- src/parsers/nasm/Makefile.inc | 45 +- src/parsers/nasm/bison.y.in | 791 ----------------- src/parsers/nasm/gen_instr.pl | 889 ------------------- src/parsers/nasm/nasm-bison.y | 547 ++++-------- src/parsers/nasm/nasm-parser.c | 3 + src/parsers/nasm/nasm-token.re | 516 +++++++++++ src/parsers/nasm/token.l.in | 353 -------- src/tests/bytecode_test.c | 2 +- src/tests/memexpr_test.c | 2 +- 51 files changed, 5436 insertions(+), 7682 deletions(-) delete mode 100644 modules/arch/x86/instrs.dat delete mode 100644 modules/arch/x86/x86-int.h create mode 100644 modules/arch/x86/x86id.re delete mode 100644 modules/parsers/nasm/bison.y.in delete mode 100755 modules/parsers/nasm/gen_instr.pl create mode 100644 modules/parsers/nasm/nasm-token.re delete mode 100644 modules/parsers/nasm/token.l.in delete mode 100644 src/arch/x86/instrs.dat delete mode 100644 src/arch/x86/x86-int.h create mode 100644 src/arch/x86/x86id.re delete mode 100644 src/parsers/nasm/bison.y.in delete mode 100755 src/parsers/nasm/gen_instr.pl create mode 100644 src/parsers/nasm/nasm-token.re delete mode 100644 src/parsers/nasm/token.l.in diff --git a/.cvsignore b/.cvsignore index 1dc9ca73..441b2161 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,8 +1,6 @@ autom4te.cache -nasm-bison.y nasm-bison.c nasm-bison.h -nasm-token.l nasm-token.c yapp-token.c yasm diff --git a/frontends/yasm/yasm.c b/frontends/yasm/yasm.c index 07c4a536..a8c7c33d 100644 --- a/frontends/yasm/yasm.c +++ b/frontends/yasm/yasm.c @@ -289,7 +289,7 @@ main(int argc, char *argv[]) } /* Get initial BITS setting from object format */ - x86_mode_bits = cur_objfmt->default_mode_bits; + /*x86_mode_bits = cur_objfmt->default_mode_bits;*/ /* Parse! */ sections = cur_parser->do_parse(cur_parser, in, in_filename); diff --git a/libyasm/arch.c b/libyasm/arch.c index 5a997421..780fb26e 100644 --- a/libyasm/arch.c +++ b/libyasm/arch.c @@ -22,9 +22,146 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "globals.h" +#include "expr.h" + #include "bytecode.h" #include "arch.h" + arch *cur_arch; +insn_operand * +operand_new_reg(unsigned long reg) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_REG; + retval->data.reg = reg; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_segreg(unsigned long segreg) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_SEGREG; + retval->data.reg = segreg; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_mem(/*@only@*/ effaddr *ea) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_MEMORY; + retval->data.ea = ea; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_imm(/*@only@*/ expr *val) +{ + insn_operand *retval; + const unsigned long *reg; + + reg = expr_get_reg(&val, 0); + if (reg) { + retval = operand_new_reg(*reg); + expr_delete(val); + } else { + retval = xmalloc(sizeof(insn_operand)); + retval->type = INSN_OPERAND_IMM; + retval->data.val = val; + retval->targetmod = 0; + retval->size = 0; + } + + return retval; +} + +void +operand_print(FILE *f, const insn_operand *op) +{ + switch (op->type) { + case INSN_OPERAND_REG: + fprintf(f, "%*sReg=", indent_level, ""); + cur_arch->reg_print(f, op->data.reg); + fprintf(f, "\n"); + break; + case INSN_OPERAND_SEGREG: + fprintf(f, "%*sSegReg=", indent_level, ""); + cur_arch->segreg_print(f, op->data.reg); + fprintf(f, "\n"); + break; + case INSN_OPERAND_MEMORY: + fprintf(f, "%*sMemory=\n", indent_level, ""); + indent_level++; + ea_print(f, op->data.ea); + indent_level--; + break; + case INSN_OPERAND_IMM: + fprintf(f, "%*sImm=", indent_level, ""); + expr_print(f, op->data.val); + fprintf(f, "\n"); + break; + } + fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod); + fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size); +} + +void +ops_delete(insn_operandhead *headp, int content) +{ + insn_operand *cur, *next; + + cur = STAILQ_FIRST(headp); + while (cur) { + next = STAILQ_NEXT(cur, link); + if (content) + switch (cur->type) { + case INSN_OPERAND_MEMORY: + ea_delete(cur->data.ea); + break; + case INSN_OPERAND_IMM: + expr_delete(cur->data.val); + break; + default: + break; + } + xfree(cur); + cur = next; + } + STAILQ_INIT(headp); +} + +/*@null@*/ insn_operand * +ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op) +{ + if (op) { + STAILQ_INSERT_TAIL(headp, op, link); + return op; + } + return (insn_operand *)NULL; +} + +void +ops_print(FILE *f, const insn_operandhead *headp) +{ + insn_operand *cur; + + STAILQ_FOREACH (cur, headp, link) + operand_print(f, cur); +} diff --git a/libyasm/arch.h b/libyasm/arch.h index 2e53ae30..18e3faf6 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -1,7 +1,7 @@ /* $IdPath$ * Architecture header file * - * Copyright (C) 2001 Peter Johnson + * Copyright (C) 2002 Peter Johnson * * This file is part of YASM. * @@ -22,6 +22,35 @@ #ifndef YASM_ARCH_H #define YASM_ARCH_H +typedef enum arch_check_id_retval { + ARCH_CHECK_ID_NONE = 0, /* just a normal identifier */ + ARCH_CHECK_ID_INSN, /* an instruction */ + ARCH_CHECK_ID_PREFIX, /* an instruction prefix */ + ARCH_CHECK_ID_REG, /* a register */ + ARCH_CHECK_ID_SEGREG, /* a segment register (for memory overrides) */ + ARCH_CHECK_ID_TARGETMOD /* an target modifier (for jumps) */ +} arch_check_id_retval; + +typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand) + insn_operandhead; + +typedef struct insn_operand insn_operand; + +/* Different assemblers order instruction operands differently. Also, some + * differ on how exactly various registers are specified. There's no great + * solution to this, as the parsers aren't supposed to have knowledge of the + * architectural internals, and the architecture is supposed to be parser- + * independent. To make things work, as a rather hackish solution, we give the + * architecture a little knowledge about the general "flavor" of the parser, + * and let the architecture decide what to do with it. Most architectures will + * probably not even use this, but it's required for some (x86 in particular) + * for correct behavior on all parsers. + */ +typedef enum arch_syntax_flavor { + ARCH_SYNTAX_FLAVOR_NASM = 1, /* like NASM */ + ARCH_SYNTAX_FLAVOR_GAS /* like GAS */ +} arch_syntax_flavor; + struct arch { /* one-line description of the architecture */ const char *name; @@ -29,6 +58,67 @@ struct arch { /* keyword used to select architecture */ const char *keyword; + struct { + /* All "data" below starts the parse initialized to 0. Thus, it is + * okay for a funtion to use/check previously stored data to see if + * it's been called before on the same piece of data. + */ + + /* Switches available instructions/registers/etc. based on a + * user-specified CPU identifier. Should modify behavior ONLY of + * parse functions! The bytecode and output functions should be able + * to handle any CPU. + */ + void (*switch_cpu) (const char *cpuid); + + /* Checks an generic identifier to see if it matches architecture + * specific names for instructions, registers, etc (see the + * arch_check_id_retval enum above for the various types this function + * can detect & return. Unrecognized identifiers should be returned + * as NONE so they can be treated as normal symbols. Any additional + * data beyond just the type (almost always necessary) should be + * returned into the space provided by the data parameter. + * Note: even though this is passed a data[4], only data[0] should be + * used for TARGETMOD, REG, and SEGREG return values. + */ + arch_check_id_retval (*check_identifier) (unsigned long data[4], + const char *id); + + /* Architecture-specific directive support. Returns 1 if directive was + * not recognized. Returns 0 if directive was recognized, even if it + * wasn't valid. Should modify behavior ONLY of parse functions, much + * like switch_cpu() above. + */ + int (*directive) (const char *name, valparamhead *valparams, + /*@null@*/ valparamhead *objext_valparams, + sectionhead *headp); + + /* Creates an instruction. Creates a bytecode by matching the + * instruction data and the parameters given with a valid instruction. + * If no match is found (the instruction is invalid), returns NULL. + * All zero data indicates an empty instruction should be created. + */ + /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4], + int num_operands, /*@null@*/ + insn_operandhead *operands); + + /* Handle an instruction prefix by modifying bc as necessary. */ + void (*handle_prefix) (bytecode *bc, const unsigned long data[4]); + + /* Handle an segment register instruction prefix by modifying bc as + * necessary. + */ + void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg); + + /* Handle memory expression segment overrides by modifying ea as + * necessary. + */ + void (*handle_seg_override) (effaddr *ea, unsigned long segreg); + + /* Convert an expression into an effective address. */ + effaddr * (*ea_new_expr) (/*@keep@*/ expr *e); + } parse; + struct { /* Maximum used bytecode type value+1. Should be set to * BYTECODE_TYPE_BASE if no additional bytecode types are defined by @@ -48,10 +138,77 @@ struct arch { const section *sect, void *d, output_expr_func output_expr); } bc; + + /* Gets the equivalent register size in bytes. Returns 0 if there is no + * suitable equivalent size. + */ + unsigned int (*get_reg_size) (unsigned long reg); + + void (*reg_print) (FILE *f, unsigned long reg); + void (*segreg_print) (FILE *f, unsigned long segreg); + + /* Deletes the arch-specific data in ea. May be NULL if no special + * deletion is required (e.g. there's no dynamically allocated pointers + * in the ea data). + */ + void (*ea_data_delete) (effaddr *ea); + + void (*ea_data_print) (FILE *f, const effaddr *ea); +}; + +struct insn_operand { + /*@reldef@*/ STAILQ_ENTRY(insn_operand) link; + + enum { + INSN_OPERAND_REG = 1, /* a register */ + INSN_OPERAND_SEGREG, /* a segment register */ + INSN_OPERAND_MEMORY, /* an effective address (memory reference) */ + INSN_OPERAND_IMM /* an immediate or jump target */ + } type; + + union { + unsigned long reg; /* arch data for reg/segreg */ + effaddr *ea; /* effective address for memory references */ + expr *val; /* value of immediate or jump target */ + } data; + + unsigned long targetmod; /* arch target modifier, 0 if none */ + + /* Specified size of the operand, in bytes. 0 if not user-specified. */ + unsigned int size; }; +/* insn_operand constructors. operand_new_imm() will look for cases of a + * single register and create an INSN_OPERAND_REG variant of insn_operand. + */ +insn_operand *operand_new_reg(unsigned long reg); +insn_operand *operand_new_segreg(unsigned long segreg); +insn_operand *operand_new_mem(/*@only@*/ effaddr *ea); +insn_operand *operand_new_imm(/*@only@*/ expr *val); + +void operand_print(FILE *f, const insn_operand *op); + +#define ops_initialize(headp) STAILQ_INIT(headp) +#define ops_first(headp) STAILQ_FIRST(headp) +#define ops_next(cur) STAILQ_NEXT(cur, link) + +/* Deletes operands linked list. Deletes content of each operand if content i + * nonzero. + */ +void ops_delete(insn_operandhead *headp, int content); + +/* Adds op to the list of operands headp. + * NOTE: Does not make a copy of op; so don't pass this function + * static or local variables, and discard the op pointer after calling + * this function. If op was actually appended (it wasn't NULL), then + * returns op, otherwise returns NULL. + */ +/*@null@*/ insn_operand *ops_append(insn_operandhead *headp, + /*@returned@*/ /*@null@*/ insn_operand *op); + +void ops_print(FILE *f, const insn_operandhead *headp); + /* Available architectures */ -#include "arch/x86/x86arch.h" extern arch x86_arch; extern arch *cur_arch; diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index 15aa5b39..6a46d8c3 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -103,6 +103,12 @@ imm_new_expr(expr *expr_ptr) return im; } +const expr * +ea_get_disp(const effaddr *ptr) +{ + return ptr->disp; +} + void ea_set_len(effaddr *ptr, unsigned char len) { @@ -125,6 +131,30 @@ ea_set_nosplit(effaddr *ptr, unsigned char nosplit) ptr->nosplit = nosplit; } +/*@-nullstate@*/ +void +ea_delete(effaddr *ea) +{ + if (cur_arch->ea_data_delete) + cur_arch->ea_data_delete(ea); + expr_delete(ea->disp); + xfree(ea); +} +/*@=nullstate@*/ + +/*@-nullstate@*/ +void +ea_print(FILE *f, const effaddr *ea) +{ + fprintf(f, "%*sDisp=", indent_level, ""); + expr_print(f, ea->disp); + fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len); + fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit); + if (cur_arch->ea_data_print) + cur_arch->ea_data_print(f, ea); +} +/*@=nullstate@*/ + void bc_set_multiple(bytecode *bc, expr *e) { @@ -258,6 +288,7 @@ bc_delete(bytecode *bc) break; case BC_OBJFMT_DATA: objfmt_data = bc_get_data(bc); + assert(cur_objfmt != NULL); if (cur_objfmt->bc_objfmt_data_delete) cur_objfmt->bc_objfmt_data_delete(objfmt_data->type, objfmt_data->data); @@ -336,6 +367,7 @@ bc_print(FILE *f, const bytecode *bc) case BC_OBJFMT_DATA: objfmt_data = bc_get_const_data(bc); fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, ""); + assert(cur_objfmt != NULL); if (cur_objfmt->bc_objfmt_data_print) cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type, objfmt_data->data); @@ -408,7 +440,7 @@ bc_resolve_reserve(bytecode_reserve *reserve, unsigned long *len, int save, expr_expand_labelequ(*tempp, sect, 1, resolve_label); num = expr_get_intnum(tempp); if (!num) { - if (expr_contains(temp, EXPR_FLOAT)) + if (temp && expr_contains(temp, EXPR_FLOAT)) ErrorAt(line, _("expression must not contain floating point value")); retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN; @@ -534,10 +566,10 @@ bc_resolve(bytecode *bc, int save, const section *sect, case BC_ALIGN: /* TODO */ InternalError(_("TODO: align bytecode not implemented!")); - break; + /*break;*/ case BC_OBJFMT_DATA: InternalError(_("resolving objfmt data bytecode?")); - break; + /*break;*/ default: if (bc->type < cur_arch->bc.type_max) retval = cur_arch->bc.bc_resolve(bc, save, sect, @@ -559,7 +591,7 @@ bc_resolve(bytecode *bc, int save, const section *sect, expr_expand_labelequ(*tempp, sect, 1, resolve_label); num = expr_get_intnum(tempp); if (!num) { - if (expr_contains(temp, EXPR_FLOAT)) + if (temp && expr_contains(temp, EXPR_FLOAT)) ErrorAt(bc->line, _("expression must not contain floating point value")); retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN; @@ -716,7 +748,7 @@ bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize, case BC_ALIGN: /* TODO */ InternalError(_("TODO: align bytecode not implemented!")); - break; + /*break;*/ case BC_OBJFMT_DATA: objfmt_data = bc_get_data(bc); if (output_bc_objfmt_data) diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index d61c6bcd..e68a7ac2 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -43,8 +43,11 @@ typedef enum { /*@only@*/ immval *imm_new_int(unsigned long int_val); /*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e); +/*@observer@*/ const expr *ea_get_disp(const effaddr *ea); void ea_set_len(effaddr *ea, unsigned char len); void ea_set_nosplit(effaddr *ea, unsigned char nosplit); +void ea_delete(/*@only@*/ effaddr *ea); +void ea_print(FILE *f, const effaddr *ea); void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e); diff --git a/libyasm/expr-int.h b/libyasm/expr-int.h index 1f0ed2df..a562c2ba 100644 --- a/libyasm/expr-int.h +++ b/libyasm/expr-int.h @@ -39,11 +39,7 @@ struct ExprItem { expr *expn; intnum *intn; floatnum *flt; - /* FIXME: reg structure is moderately x86-specific (namely size) */ - struct reg { - unsigned char num; - unsigned char size; /* in bits, eg AX=16, EAX=32 */ - } reg; + unsigned long reg; } data; }; @@ -62,6 +58,9 @@ struct expr { * * Stops early (and returns 1) if func returns 1. Otherwise returns 0. */ +int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d, + int (*func) (/*@null@*/ const ExprItem *ei, + /*@null@*/ void *d)); int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d, int (*func) (/*@null@*/ ExprItem *ei, /*@null@*/ void *d)); @@ -88,6 +87,6 @@ void expr_order_terms(expr *e); /* Copy entire expression EXCEPT for index "except" at *top level only*. */ expr *expr_copy_except(const expr *e, int except); -int expr_contains(expr *e, ExprType t); +int expr_contains(const expr *e, ExprType t); #endif diff --git a/libyasm/expr.c b/libyasm/expr.c index ade3789a..aa277528 100644 --- a/libyasm/expr.c +++ b/libyasm/expr.c @@ -31,8 +31,11 @@ #include "expr.h" #include "symrec.h" +#include "bytecode.h" #include "section.h" +#include "arch.h" + #include "expr-int.h" @@ -135,12 +138,11 @@ ExprFloat(floatnum *f) } ExprItem * -ExprReg(unsigned char reg, unsigned char size) +ExprReg(unsigned long reg) { ExprItem *e = xmalloc(sizeof(ExprItem)); e->type = EXPR_REG; - e->data.reg.num = reg; - e->data.reg.size = size; + e->data.reg = reg; return e; } @@ -662,8 +664,7 @@ expr_copy_except(const expr *e, int except) dest->data.flt = floatnum_copy(src->data.flt); break; case EXPR_REG: - dest->data.reg.num = src->data.reg.num; - dest->data.reg.size = src->data.reg.size; + dest->data.reg = src->data.reg; break; default: break; @@ -709,16 +710,16 @@ expr_delete(expr *e) /*@=mustfree@*/ static int -expr_contains_callback(ExprItem *ei, void *d) +expr_contains_callback(const ExprItem *ei, void *d) { ExprType *t = d; return (ei->type & *t); } int -expr_contains(expr *e, ExprType t) +expr_contains(const expr *e, ExprType t) { - return expr_traverse_leaves_in(e, &t, expr_contains_callback); + return expr_traverse_leaves_in_const(e, &t, expr_contains_callback); } /* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like @@ -801,6 +802,33 @@ expr_traverse_nodes_post(expr *e, void *d, return func(e, d); } +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +int +expr_traverse_leaves_in_const(const expr *e, void *d, + int (*func) (/*@null@*/ const ExprItem *ei, + /*@null@*/ void *d)) +{ + int i; + + if (!e) + return 0; + + for (i=0; inumterms; i++) { + if (e->terms[i].type == EXPR_EXPR) { + if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func)) + return 1; + } else { + if (func(&e->terms[i], d)) + return 1; + } + } + return 0; +} + /* Traverse over expression tree in order, calling func for each leaf * (non-operation). The data pointer d is passed to each func call. * @@ -877,10 +905,23 @@ expr_get_symrec(expr **ep, int simplify) } /*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/ +/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/ +const unsigned long * +expr_get_reg(expr **ep, int simplify) +{ + if (simplify) + *ep = expr_simplify(*ep); + + if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG) + return &((*ep)->terms[0].data.reg); + else + return NULL; +} +/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/ + void expr_print(FILE *f, const expr *e) { - static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"}; char opstr[3]; int i; @@ -982,9 +1023,7 @@ expr_print(FILE *f, const expr *e) floatnum_print(f, e->terms[i].data.flt); break; case EXPR_REG: - if (e->terms[i].data.reg.size == 32) - fprintf(f, "e"); - fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]); + cur_arch->reg_print(f, e->terms[i].data.reg); break; case EXPR_NONE: break; diff --git a/libyasm/expr.h b/libyasm/expr.h index fb97248b..4521286c 100644 --- a/libyasm/expr.h +++ b/libyasm/expr.h @@ -31,7 +31,7 @@ typedef struct ExprItem ExprItem; /*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *); /*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *); /*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *); -/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size); +/*@only@*/ ExprItem *ExprReg(unsigned long reg); #define expr_new_tree(l,o,r) \ expr_new ((o), ExprExpr(l), ExprExpr(r)) @@ -79,6 +79,13 @@ void expr_expand_labelequ(expr *e, const section *srcsect, int withstart, /*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep, int simplify); +/* Gets the register value of e if the expression is just a register. If the + * expression is more complex, returns NULL. Simplifies the expr first if + * simplify is nonzero. + */ +/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep, + int simplify); + void expr_print(FILE *f, /*@null@*/ const expr *); #endif diff --git a/libyasm/tests/bytecode_test.c b/libyasm/tests/bytecode_test.c index f3411512..f702596c 100644 --- a/libyasm/tests/bytecode_test.c +++ b/libyasm/tests/bytecode_test.c @@ -25,7 +25,7 @@ #include "bytecode.h" #include "bc-int.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" START_TEST(test_x86_ea_new_reg) { diff --git a/libyasm/tests/memexpr_test.c b/libyasm/tests/memexpr_test.c index ec9c001d..86f7f320 100644 --- a/libyasm/tests/memexpr_test.c +++ b/libyasm/tests/memexpr_test.c @@ -32,7 +32,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" typedef enum { REG_AX = 0, diff --git a/modules/arch/x86/Makefile.inc b/modules/arch/x86/Makefile.inc index 3d16e3d3..7bfef4d0 100644 --- a/modules/arch/x86/Makefile.inc +++ b/modules/arch/x86/Makefile.inc @@ -3,10 +3,19 @@ YASMARCHFILES += \ src/arch/x86/x86arch.c \ src/arch/x86/x86arch.h \ - src/arch/x86/x86-int.h \ src/arch/x86/x86bc.c \ - src/arch/x86/x86expr.c + src/arch/x86/x86expr.c \ + x86id.c + +x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl + re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@ + +BUILT_SOURCES += \ + x86id.c + +CLEANFILES += \ + x86id.c EXTRA_DIST += \ src/arch/x86/README \ - src/arch/x86/instrs.dat + src/arch/x86/x86id.re diff --git a/modules/arch/x86/instrs.dat b/modules/arch/x86/instrs.dat deleted file mode 100644 index 02e5ad9a..00000000 --- a/modules/arch/x86/instrs.dat +++ /dev/null @@ -1,1208 +0,0 @@ -; $IdPath$ -; List of valid instruction/operand combinations -; -; Copyright (C) 2001 Peter Johnson -; -; This file is part of YASM. -; -; YASM is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. -; -; YASM is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -; -; Meanings of codes: -; $x refers to operand x -; "nil" in a field indicates the lack of that field in the instruction -; (there MUST be some text in every field in this document) -; Sizes are in bits (8,16,32 are the only valid quantities) -; -; Column definitions: -; Inst - Instruction, should be lowercase -; Operands - Single combination of valid operands -; "TO" is not counted in the operand count. -; OpSize - Fixed operand size. Can generate prefix byte. -; Opcode - One or two bytes of opcode. -; EffAddr - Effective Address (ModRM/SIB/Off). First value is the memory -; operand, second specifies what value goes into the reg/spare -; bits in the ModRM byte. -; $xr indicates operand is register, not ModRM (needs convert to RM) -; $xi indicates operand is immediate (2nd parm is size in bits) -; Imm - Immediate source operand and forced size (in bits). -; "s" after size indicates signed number -; A number instead of a $x is a hex constant value. -; -; A ':' at the beginning of the line means that the instruction following the -; ':' is a synonym for the instruction in the 2nd column. -; -; See the parser file for a list of possible operand values and their meanings. -; gen_instr.pl translates this list into lexer and parser code. -; -; Instructions are listed in the same order as that in GNU binutils -; /include/opcode/i386.h, used for the GAS assembler. See -; . -; -; TODO: -; Finish instructions (may require changing parser code). -; Doublecheck instruction encodings, allowable operands. -; Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes). -; Doublecheck AMD and Cyrix instructions. -; Doublecheck the segreg mov instructions. -; -; Instruction Groupings (to shorten parser code). -; The $0.1, $0.2, and $0.3 will get replaced with the parameters given for -; the instruction using the group during lexing & parsing. These parameters -; may be in the opcode, opsize, effaddr, or immediate. -; When opsize is a parameter, its usage in instructions that use the group -; looks slightly different than normal, because the parameters are -; specified in hexidecimal while the normal opsize usage is in decimal. -; Thus 10 and 20 are used instead of 16 and 32 respectively. -; The first CPU grouping for the instruction is OR'ed with the CPU value in -; the group CPU fields with @0 in their list. This allows one grouping to -; be used for instructions with different CPU values. -; Restrictions on groupings: -; - $0.? may not appear in the operand, the first part of the effaddr, the -; second part of the imm, or the CPU fields. -; - @0, @1 may only appear in the CPU field. -; Restrictions on instructions based on groupings: -; - no other operand combinations are allowed (eg, if an instruction uses a -; group, that must be the ONLY line for the instruction) -; -; Notes on code generation: -; Each group generates a lex token of the group name (sans !). Bison rules -; are generated for each of the operand combinations for the group just as -; with a regular instruction, except for the addition of the $0.? fields. -; Each $0.? field is replaced by $1.d? in the generated code (eg, -; $0.1->$1.d1, etc). -; When an instruction that uses a group is encountered, eg: -; inst!grpname parm1[,parm2[,parm3]] -; The following lex code is generated: -; inst { yylval.groupdata[0]=0xparm1; return GRPNAME; } -; (and additional yylval.groupdata[#-1]=0xparm#; if needed) -; -; KEY -; -; !Grp Operands OpSize Opcode EffAddr Imm CPU -; Inst Operands OpSize Opcode EffAddr Imm CPU -; Inst!Grp Parameters CPU @0 CPU @1 -; -; Groupings used throughout -; -; One byte opcode instructions with no operands: -!onebyte nil $0.1 $0.2 nil nil @0 -; Two byte opcode instructions with no operands: -!twobyte nil nil $0.1,$0.2 nil nil @0 -; Three byte opcode instructions with no operands: -!threebyte nil nil $0.1,$0.2,$0.3 nil nil @0 -; One byte opcode instructions with general memory operand: -!onebytemem mem nil $0.1 $1,$0.2 nil @0 -; Two byte opcode instructions with general memory operand: -!twobytemem mem nil $0.1,$0.2 $1,$0.3 nil @0 -; -; Move instructions -; -; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89). -mov reg8,reg8 nil 88 $1r,$2 nil 8086 -mov reg16,reg16 16 89 $1r,$2 nil 8086 -mov reg32,reg32 32 89 $1r,$2 nil 386 -mov mem,reg8 nil 88 $1,$2 nil 8086 -mov mem8x,reg8 nil 88 $1,$2 nil 8086 -mov mem,reg16 16 89 $1,$2 nil 8086 -mov mem16x,reg16 16 89 $1,$2 nil 8086 -mov mem,reg32 32 89 $1,$2 nil 386 -mov mem32x,reg32 32 89 $1,$2 nil 386 -mov reg8,mem8 nil 8A $2,$1 nil 8086 -mov reg16,mem16 16 8B $2,$1 nil 8086 -mov reg32,mem32 32 8B $2,$1 nil 386 -mov mem,segreg nil 8C $1,$2 nil 8086 -mov reg16,segreg 16 8C $1r,$2 nil 8086 -mov mem16x,segreg 16 8C $1,$2 nil 8086 -mov reg32,segreg 32 8C $1r,$2 nil 386 -mov mem32x,segreg 32 8C $1,$2 nil 386 -mov segreg,mem nil 8E $2,$1 nil 8086 -mov segreg,rm16x nil 8E $2,$1 nil 8086 -mov segreg,rm32x nil 8E $2,$1 nil 386 -;mov reg_al,memoff8 -;mov reg_ax,memoff16 -;mov reg_eax,memoff32 -;mov memoff8,reg_al -;mov memoff16,reg_ax -;mov memoff32,reg_eax -mov reg8,imm8 nil B0+$1 nil $2,8 8086 -mov reg16,imm16 16 B8+$1 nil $2,16 8086 -mov reg32,imm32 32 B8+$1 nil $2,32 386 -mov mem8x,imm8 nil C6 $1,0 $2,8 8086 -mov mem,imm8x nil C6 $1,0 $2,8 8086 -mov mem16x,imm16 16 C7 $1,0 $2,16 8086 -mov mem,imm16x 16 C7 $1,0 $2,16 8086 -mov mem32x,imm32 32 C7 $1,0 $2,32 8086 -mov mem,imm32x 32 C7 $1,0 $2,32 8086 -mov CRREG_NOTCR4,reg32 nil 0F,22 $2r,$1 nil 386,PRIV -mov CR4,reg32 nil 0F,22 $2r,$1 nil P5,PRIV -mov reg32,CRREG_NOTCR4 nil 0F,20 $1r,$2 nil 386,PRIV -mov reg32,CR4 nil 0F,20 $1r,$2 nil P5,PRIV -mov reg32,DRREG nil 0F,21 $1r,$2 nil 386,PRIV -mov DRREG,reg32 nil 0F,23 $2r,$1 nil 386,PRIV -; -; Move with sign/zero extend -; -!movszx reg16,rm8 16 0F,$0.1 $2,$1 nil 386 -!movszx reg32,rm8x 32 0F,$0.1 $2,$1 nil 386 -!movszx reg32,rm16x nil 0F,$0.1+1 $2,$1 nil 386 -movsx!movszx BE -movzx!movszx B6 -; -; Push instructions -; -push mem16x 16 FF $1,6 nil 8086 -push mem32x 32 FF $1,6 nil 386 -push reg16 16 50+$1 nil nil 8086 -push reg32 32 50+$1 nil nil 386 -push imm8x nil 6A nil $1,8 8086 -push imm16x 16 68 nil $1,16 8086 -push imm32x 32 68 nil $1,32 386 -push reg_cs nil 0E nil nil 8086 -push reg_ss nil 16 nil nil 8086 -push reg_ds nil 1E nil nil 8086 -push reg_es nil 06 nil nil 8086 -push reg_fs nil 0F,A0 nil nil 386 -push reg_gs nil 0F,A8 nil nil 386 -pusha!onebyte nil,60 186 -pushad!onebyte 20,60 386 -pushaw!onebyte 10,60 186 -; -; Pop instructions -; -pop mem16x 16 8F $1,0 nil 8086 -pop mem32x 32 8F $1,0 nil 386 -pop reg16 16 58+$1 nil nil 8086 -pop reg32 32 58+$1 nil nil 386 -pop reg_ds nil 1F nil nil 8086 -pop reg_es nil 07 nil nil 8086 -pop reg_ss nil 17 nil nil 8086 -pop reg_fs nil 0F,A1 nil nil 386 -pop reg_gs nil 0F,A9 nil nil 386 -popa!onebyte nil,61 186 -popad!onebyte 20,61 386 -popaw!onebyte 10,61 186 -; -; Exchange instructions -; -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg8,reg8 nil 86 $1r,$2 nil 8086 -xchg mem,reg8 nil 86 $1,$2 nil 8086 -xchg mem8x,reg8 nil 86 $1,$2 nil 8086 -xchg reg8,mem8 nil 86 $2,$1 nil 8086 -xchg reg_ax,reg16 16 90+$2 nil nil 8086 -xchg reg16,reg_ax 16 90+$1 nil nil 8086 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg16,reg16 16 87 $1r,$2 nil 8086 -xchg mem,reg16 16 87 $1,$2 nil 8086 -xchg mem16x,reg16 16 87 $1,$2 nil 8086 -xchg reg16,mem16 16 87 $2,$1 nil 8086 -xchg reg_eax,reg32 32 90+$2 nil nil 386 -xchg reg32,reg_eax 32 90+$1 nil nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg32,reg32 32 87 $1r,$2 nil 386 -xchg mem,reg32 32 87 $1,$2 nil 386 -xchg mem32x,reg32 32 87 $1,$2 nil 386 -xchg reg32,mem32 32 87 $2,$1 nil 386 -; -; In/out from ports -; -in reg_al,imm8 nil E4 nil $2,8 8086 -in reg_ax,imm8 16 E5 nil $2,8 8086 -in reg_eax,imm8 32 E5 nil $2,8 386 -in reg_al,reg_dx nil EC nil nil 8086 -in reg_ax,reg_dx 16 ED nil nil 8086 -in reg_eax,reg_dx 32 ED nil nil 386 -out imm8,reg_al nil E6 nil $1,8 8086 -out imm8,reg_ax 16 E7 nil $1,8 8086 -out imm8,reg_eax 32 E7 nil $1,8 386 -out reg_dx,reg_al nil EE nil nil 8086 -out reg_dx,reg_ax 16 EF nil nil 8086 -out reg_dx,reg_eax 32 EF nil nil 386 -; -; Load effective address -; -lea reg16,mem16 16 8D $2,$1 nil 8086 -lea reg32,mem32 32 8D $2,$1 nil 386 -; -; Load segment registers from memory -; -lds reg16,mem 16 C5 $2,$1 nil 8086 -lds reg32,mem 32 C5 $2,$1 nil 386 -les reg16,mem 16 C4 $2,$1 nil 8086 -les reg32,mem 32 C4 $2,$1 nil 386 -lfs reg16,mem 16 0F,B4 $2,$1 nil 386 -lfs reg32,mem 32 0F,B4 $2,$1 nil 386 -lgs reg16,mem 16 0F,B5 $2,$1 nil 386 -lgs reg32,mem 32 0F,B5 $2,$1 nil 386 -lss reg16,mem 16 0F,B2 $2,$1 nil 386 -lss reg32,mem 32 0F,B2 $2,$1 nil 386 -; -; Flags register instructions -; -clc!onebyte nil,F8 8086 -cld!onebyte nil,FC 8086 -cli!onebyte nil,FA 8086 -clts!twobyte 0F,06 286,PRIV -cmc!onebyte nil,F5 8086 -lahf!onebyte nil,9F 8086 -sahf!onebyte nil,9E 8086 -pushf!onebyte nil,9C 8086 -pushfd!onebyte 20,9C 386 -pushfw!onebyte 10,9C 8086 -popf!onebyte nil,9D 8086 -popfd!onebyte 20,9D 386 -popfw!onebyte 10,9D 8086 -stc!onebyte nil,F9 8086 -std!onebyte nil,FD 8086 -sti!onebyte nil,FB 8086 -; -; Arithmetic -; -; General arithmetic -!arith reg_al,imm8 nil $0.1+4 nil $2,8 8086 -!arith reg_ax,imm16 16 $0.1+5 nil $2,16 8086 -!arith reg_eax,imm32 32 $0.1+5 nil $2,32 386 -!arith reg8,imm8 nil 80 $1r,$0.2 $2,8 8086 -!arith mem8x,imm nil 80 $1,$0.2 $2,8 8086 -!arith mem,imm8x nil 80 $1,$0.2 $2,8 8086 -!arith reg16,imm 16 81 $1r,$0.2 $2,16 8086 -!arith mem16x,imm 16 81 $1,$0.2 $2,16 8086 -!arith reg16,imm16x 16 81 $1r,$0.2 $2,16 8086 -!arith mem,imm16x 16 81 $1,$0.2 $2,16 8086 -!arith reg32,imm 32 81 $1r,$0.2 $2,32 386 -!arith mem32x,imm 32 81 $1,$0.2 $2,32 386 -!arith reg32,imm32x 32 81 $1r,$0.2 $2,32 386 -!arith mem,imm32x 32 81 $1,$0.2 $2,32 386 -!arith reg16,imm8x 16 83 $1r,$0.2 $2,8s 8086 -!arith mem16x,imm8x 16 83 $1,$0.2 $2,8s 8086 -!arith reg32,imm8x 32 83 $1r,$0.2 $2,8s 386 -!arith mem32x,imm8x 32 83 $1,$0.2 $2,8s 386 -; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1). -!arith reg8,reg8 nil $0.1 $1r,$2 nil 8086 -!arith reg16,reg16 16 $0.1+1 $1r,$2 nil 8086 -!arith reg32,reg32 32 $0.1+1 $1r,$2 nil 386 -!arith mem,reg8 nil $0.1 $1,$2 nil 8086 -!arith mem8x,reg8 nil $0.1 $1,$2 nil 8086 -!arith mem,reg16 16 $0.1+1 $1,$2 nil 8086 -!arith mem16x,reg16 16 $0.1+1 $1,$2 nil 8086 -!arith mem,reg32 32 $0.1+1 $1,$2 nil 386 -!arith mem32x,reg32 32 $0.1+1 $1,$2 nil 386 -!arith reg8,mem8 nil $0.1+2 $2,$1 nil 8086 -!arith reg16,mem16 16 $0.1+3 $2,$1 nil 8086 -!arith reg32,mem32 32 $0.1+3 $2,$1 nil 386 -; INC/DEC -!incdec rm8x nil FE $1,$0.1 nil 8086 -!incdec mem16x 16 FF $1,$0.1 nil 8086 -!incdec mem32x 32 FF $1,$0.1 nil 386 -!incdec reg16 16 $0.2+$1 nil nil 8086 -!incdec reg32 32 $0.2+$1 nil nil 386 -; "F6" opcodes (DIV/IDIV/MUL/NEG/NOT): -!groupf6 rm8x nil F6 $1,$0.1 nil 8086 -!groupf6 rm16x 16 F7 $1,$0.1 nil 8086 -!groupf6 rm32x 32 F7 $1,$0.1 nil 386 -add!arith 00,0 -inc!incdec 0,40 -sub!arith 28,5 -dec!incdec 1,48 -sbb!arith 18,3 -cmp!arith 38,7 -test reg_al,imm8 nil A8 nil $2,8 8086 -test reg_ax,imm16 16 A9 nil $2,16 8086 -test reg_eax,imm32 32 A9 nil $2,32 386 -test reg8,imm8 nil F6 $1r,0 $2,8 8086 -test mem8x,imm nil F6 $1,0 $2,8 8086 -test mem,imm8x nil F6 $1,0 $2,8 8086 -test reg16,imm16 16 F7 $1r,0 $2,16 8086 -test mem16x,imm 16 F7 $1,0 $2,16 8086 -test mem,imm16x 16 F7 $1,0 $2,16 8086 -test reg32,imm32 32 F7 $1r,0 $2,32 386 -test mem32x,imm 32 F7 $1,0 $2,32 386 -test mem,imm32x 32 F7 $1,0 $2,32 386 -; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1 -test reg8,reg8 nil 84 $1r,$2 nil 8086 -test reg16,reg16 16 85 $1r,$2 nil 8086 -test reg32,reg32 32 85 $1r,$2 nil 386 -test mem,reg8 nil 84 $1,$2 nil 8086 -test mem8x,reg8 nil 84 $1,$2 nil 8086 -test mem,reg16 16 85 $1,$2 nil 8086 -test mem16x,reg16 16 85 $1,$2 nil 8086 -test mem,reg32 32 85 $1,$2 nil 386 -test mem32x,reg32 32 85 $1,$2 nil 386 -test reg8,mem8 nil 84 $2,$1 nil 8086 -test reg16,mem16 16 85 $2,$1 nil 8086 -test reg32,mem32 32 85 $2,$1 nil 386 -and!arith 20,4 -or!arith 08,1 -xor!arith 30,6 -adc!arith 10,2 -neg!groupf6 3 -not!groupf6 2 -aaa!onebyte nil,37 8086 -aas!onebyte nil,3F 8086 -daa!onebyte nil,27 8086 -das!onebyte nil,2F 8086 -aad nil nil D5,0A nil nil 8086 -aad imm8 nil D5 nil $1,8 8086 -aam nil nil D4,0A nil nil 8086 -aam imm8 nil D4 nil $1,8 8086 -; -; Conversion instructions -; -cbw!onebyte 10,98 8086 -cwde!onebyte 20,98 386 -cwd!onebyte 10,99 8086 -cdq!onebyte 20,99 386 -; -; Multiplication and division -; -mul!groupf6 4 -imul rm8x nil F6 $1,5 nil 8086 -imul rm16x 16 F7 $1,5 nil 8086 -imul rm32x 32 F7 $1,5 nil 386 -imul reg16,rm16 16 0F,AF $2,$1 nil 386 -imul reg32,rm32 32 0F,AF $2,$1 nil 386 -imul reg16,rm16,imm8x 16 6B $2,$1 $3,8s 186 -imul reg32,rm32,imm8x 32 6B $2,$1 $3,8s 386 -imul reg16,imm8x 16 6B $1r,$1 $2,8s 186 -imul reg32,imm8x 32 6B $1r,$1 $2,8s 386 -imul reg16,rm16,imm16 16 69 $2,$1 $3,16s 186 -imul reg32,rm32,imm32 32 69 $2,$1 $3,32s 386 -imul reg16,imm16 16 69 $1r,$1 $2,16s 186 -imul reg32,imm32 32 69 $1r,$1 $2,32s 386 -div!groupf6 6 -idiv!groupf6 7 -; -; Shifts -; -; Standard -!shift rm8x,ONE nil D0 $1,$0.1 nil 8086 -!shift rm8x,reg_cl nil D2 $1,$0.1 nil 8086 -!shift rm8x,imm8 nil C0 $1,$0.1 $2,8 186 -!shift rm16x,ONE 16 D1 $1,$0.1 nil 8086 -!shift rm16x,reg_cl 16 D3 $1,$0.1 nil 8086 -!shift rm16x,imm8 16 C1 $1,$0.1 $2,8 186 -!shift rm32x,ONE 32 D1 $1,$0.1 nil 386 -!shift rm32x,reg_cl 32 D3 $1,$0.1 nil 386 -!shift rm32x,imm8 32 C1 $1,$0.1 $2,8 386 -; Doubleword -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg16,reg16,imm8 16 0F,$0.1 $1r,$2 $3,8 386 -!shlrd mem,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386 -!shlrd mem16x,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg16,reg16,reg_cl 16 0F,$0.1+1 $1r,$2 nil 386 -!shlrd mem,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386 -!shlrd mem16x,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg32,reg32,imm8 32 0F,$0.1 $1r,$2 $3,8 386 -!shlrd mem,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386 -!shlrd mem32x,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg32,reg32,reg_cl 32 0F,$0.1+1 $1r,$2 nil 386 -!shlrd mem,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386 -!shlrd mem32x,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386 -rol!shift 0 -ror!shift 1 -rcl!shift 2 -rcr!shift 3 -sal!shift 4 -shl!shift 4 -shr!shift 5 -sar!shift 7 -shld!shlrd A4 -shrd!shlrd AC -; -; Control transfer instructions (unconditional) -; -; Special format for relative targets: -; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU -; -!jmpcall target nil $0.1?$0.2 $0.3 8086 8086 -!jmpcall imm:imm nil $0.4 $2i,nil $1,16 8086 -!jmpcall WORD imm:imm 16 $0.4 $2i,16 $1,16 8086 -!jmpcall DWORD imm:imm 32 $0.4 $2i,32 $1,16 386 -!jmpcall memfar nil FF $1,$0.4+1 nil 8086 -!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086 -!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386 -!jmpcall mem nil FF $1,$0.4 nil 8086 -!jmpcall rm16x 16 FF $1,$0.4 nil 8086 -!jmpcall rm32x 32 FF $1,$0.4 nil 386 -call!jmpcall nil,0,E8,9A,2 -jmp!jmpcall 1,EB,E9,EA,4 -ret!onebyte nil,C3 8086 -retn nil nil C3 nil nil 8086 -retf nil nil CB nil nil 8086 -retn imm16 nil C2 nil $1,16 8086 -retf imm16 nil CA nil $1,16 8086 -enter imm16,imm8 nil C8 $1i,16 $2,8 186 -leave!onebyte nil,C9 186 -; -; Conditional jumps -; -!jcc target nil 70+$0.1 0F,80+$0.1 8086 386 -jo!jcc 0 -jno!jcc 1 -jb!jcc 2 -jc!jcc 2 -jnae!jcc 2 -jnb!jcc 3 -jnc!jcc 3 -jae!jcc 3 -je!jcc 4 -jz!jcc 4 -jne!jcc 5 -jnz!jcc 5 -jbe!jcc 6 -jna!jcc 6 -jnbe!jcc 7 -ja!jcc 7 -js!jcc 8 -jns!jcc 9 -jp!jcc A -jpe!jcc A -jnp!jcc B -jpo!jcc B -jl!jcc C -jnge!jcc C -jnl!jcc D -jge!jcc D -jle!jcc E -jng!jcc E -jnle!jcc F -jg!jcc F -jcxz target 16 E3 nil 8086 8086 -jecxz target 32 E3 nil 386 386 -; -; Loop instructions -; -!loopg target nil E0+$0.1 nil 8086 8086 -!loopg target,reg_cx 16 E0+$0.1 nil 8086 8086 -!loopg target,reg_ecx 32 E0+$0.1 nil 386 386 -loop!loopg 2 -loopz!loopg 1 -loope!loopg 1 -loopnz!loopg 0 -loopne!loopg 0 -; -; Set byte on flag instructions -; -!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386 -seto!setcc 0 -setno!setcc 1 -setb!setcc 2 -setc!setcc 2 -setnae!setcc 2 -setnb!setcc 3 -setnc!setcc 3 -setae!setcc 3 -sete!setcc 4 -setz!setcc 4 -setne!setcc 5 -setnz!setcc 5 -setbe!setcc 6 -setna!setcc 6 -setnbe!setcc 7 -seta!setcc 7 -sets!setcc 8 -setns!setcc 9 -setp!setcc A -setpe!setcc A -setnp!setcc B -setpo!setcc B -setl!setcc C -setnge!setcc C -setnl!setcc D -setge!setcc D -setle!setcc E -setng!setcc E -setnle!setcc F -setg!setcc F -; -; String instructions -; -; NOTE: cmpsd,movsd can't go to !onebyte group because of other variations -cmpsb!onebyte nil,A6 8086 -cmpsw!onebyte 10,A7 8086 -cmpsd nil 32 A7 nil nil 386 -insb!onebyte nil,6C 8086 -insw!onebyte 10,6D 8086 -insd!onebyte 20,6D 386 -outsb!onebyte nil,6E 8086 -outsw!onebyte 10,6F 8086 -outsd!onebyte 20,6F 386 -lodsb!onebyte nil,AC 8086 -lodsw!onebyte 10,AD 8086 -lodsd!onebyte 20,AD 386 -movsb!onebyte nil,A4 8086 -movsw!onebyte 10,A5 8086 -movsd nil 32 A5 nil nil 386 -scasb!onebyte nil,AE 8086 -scasw!onebyte 10,AF 8086 -scasd!onebyte 20,AF 386 -stosb!onebyte nil,AA 8086 -stosw!onebyte 10,AB 8086 -stosd!onebyte 20,AB 386 -xlat!onebyte nil,D7 8086 -xlatb!onebyte nil,D7 8086 -; -; Bit manipulation -; -; Bit tests -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!bittest reg16,reg16 16 0F,$0.1 $1r,$2 nil 386 -!bittest mem,reg16 16 0F,$0.1 $1,$2 nil 386 -!bittest mem16x,reg16 16 0F,$0.1 $1,$2 nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!bittest reg32,reg32 32 0F,$0.1 $1r,$2 nil 386 -!bittest mem,reg32 32 0F,$0.1 $1,$2 nil 386 -!bittest mem32x,reg32 32 0F,$0.1 $1,$2 nil 386 -!bittest reg16,imm8 16 0F,BA $1r,$0.2 $2,8 386 -!bittest mem16x,imm8 16 0F,BA $1,$0.2 $2,8 386 -!bittest reg32,imm8 32 0F,BA $1r,$0.2 $2,8 386 -!bittest mem32x,imm8 32 0F,BA $1,$0.2 $2,8 386 -; Bit scans -!bsfr reg16,rm16 16 0F,BC+$0.1 $2,$1 nil 386 -!bsfr reg32,rm32 32 0F,BC+$0.1 $2,$1 nil 386 -bsf!bsfr 0 -bsr!bsfr 1 -bt!bittest A3,4 -btc!bittest BB,7 -btr!bittest B3,6 -bts!bittest AB,5 -; -; Interrupts and operating system instructions -; -int imm8 nil CD nil $1,8 8086 -int3!onebyte nil,CC 8086 -int03!onebyte nil,CC 8086 -into!onebyte nil,CE 8086 -iret!onebyte nil,CF 8086 -iretw!onebyte 10,CF 8086 -iretd!onebyte 20,CF 386 -rsm!twobyte 0F,AA P5,SMM -bound reg16,mem16 16 62 $2,$1 nil 186 -bound reg32,mem32 32 62 $2,$1 nil 386 -hlt!onebyte nil,F4 8086,PRIV -nop!onebyte nil,90 8086 -; -; Protection control -; -; 286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW): -!prot286 rm16 nil 0F,00 $1,$0.1 nil 286,PROT,@0 -arpl rm16,reg16 nil 63 $1,$2 nil 286,PROT -lar reg16,rm16 16 0F,02 $2,$1 nil 286,PROT -lar reg32,rm32 32 0F,02 $2,$1 nil 386,PROT -lgdt!twobytemem 0F,01,2 286,PRIV -lidt!twobytemem 0F,01,3 286,PRIV -lldt!prot286 2 PRIV -lmsw rm16 nil 0F,01 $1,6 nil 286,PRIV -lsl reg16,rm16 16 0F,03 $2,$1 nil 286,PROT -lsl reg32,rm32 32 0F,03 $2,$1 nil 286,PROT -ltr!prot286 3 PRIV -sgdt!twobytemem 0F,01,0 286 -sidt!twobytemem 0F,01,1 286 -sldt mem1632 nil 0F,00 $1,0 nil 286 -sldt reg16 16 0F,00 $1r,0 nil 286 -sldt reg32 32 0F,00 $1r,0 nil 386 -smsw mem1632 nil 0F,01 $1,4 nil 286 -smsw reg16 16 0F,01 $1r,4 nil 286 -smsw reg32 32 0F,01 $1r,4 nil 386 -str!prot286 1 -verr!prot286 4 -verw!prot286 5 -; -; Floating point instructions -; -; Load -fld mem32x nil D9 $1,0 nil 8086,FPU -fld mem64x nil DD $1,0 nil 8086,FPU -fld mem80x nil DB $1,5 nil 8086,FPU -fld fpureg nil D9,C0+$1 nil nil 8086,FPU -fild mem16x nil DF $1,0 nil 8086,FPU -fild mem32x nil DB $1,0 nil 8086,FPU -fild mem64x nil DF $1,5 nil 8086,FPU -fbld mem80 nil DF $1,4 nil 8086,FPU -; Store -fst mem32x nil D9 $1,2 nil 8086,FPU -fst mem64x nil DD $1,2 nil 8086,FPU -fst fpureg nil DD,D0+$1 nil nil 8086,FPU -fist mem16x nil DF $1,2 nil 8086,FPU -fist mem32x nil DB $1,2 nil 8086,FPU -; Store (with pop) -fstp mem32x nil D9 $1,3 nil 8086,FPU -fstp mem64x nil DD $1,3 nil 8086,FPU -fstp mem80x nil DB $1,7 nil 8086,FPU -fstp fpureg nil DD,D8+$1 nil nil 8086,FPU -fistp mem16x nil DF $1,3 nil 8086,FPU -fistp mem32x nil DB $1,3 nil 8086,FPU -fistp mem64x nil DF $1,7 nil 8086,FPU -fbstp mem80 nil DF $1,6 nil 8086,FPU -; Exchange (with ST0) -fxch fpureg nil D9,C8+$1 nil nil 8086,FPU -fxch ST0,ST0 nil D9,C8 nil nil 8086,FPU -fxch ST0,FPUREG_NOTST0 nil D9,C8+$2 nil nil 8086,FPU -fxch FPUREG_NOTST0,ST0 nil D9,C8+$1 nil nil 8086,FPU -fxch nil nil D9,C9 nil nil 8086,FPU -; Comparisons -!fcomg mem32x nil D8 $1,$0.1 nil 8086,FPU -!fcomg mem64x nil DC $1,$0.1 nil 8086,FPU -!fcomg fpureg nil D8,$0.2+$1 nil nil 8086,FPU -!fcomg ST0,fpureg nil D8,$0.2+$2 nil nil 8086,FPU -; Extended comparisons -!fcomg2 fpureg nil $0.1,$0.2+$1 nil nil @0,FPU -!fcomg2 ST0,fpureg nil $0.1,$0.2+$2 nil nil @0,FPU -; Comparison (without pop) -fcom!fcomg 2,D0 -ficom mem16x nil DE $1,2 nil 8086,FPU -ficom mem32x nil DA $1,2 nil 8086,FPU -; Comparison (with pop) -fcomp!fcomg 3,D8 -ficomp mem16x nil DE $1,3 nil 8086,FPU -ficomp mem32x nil DA $1,3 nil 8086,FPU -fcompp!twobyte DE,D9 8086,FPU -; Unordered comparison (with pop) -fucom!fcomg2 DD,E0 286,FPU -fucomp!fcomg2 DD,E8 286,FPU -fucompp!twobyte DA,E9 286,FPU -ftst!twobyte D9,E4 8086,FPU -fxam!twobyte D9,E5 8086,FPU -; Load constants into ST0 -fld1!twobyte D9,E8 8086,FPU -fldl2t!twobyte D9,E9 8086,FPU -fldl2e!twobyte D9,EA 8086,FPU -fldpi!twobyte D9,EB 8086,FPU -fldlg2!twobyte D9,EC 8086,FPU -fldln2!twobyte D9,ED 8086,FPU -fldz!twobyte D9,EE 8086,FPU -; Arithmetic -!farith mem32x nil D8 $1,$0.1 nil 8086,FPU -!farith mem64x nil DC $1,$0.1 nil 8086,FPU -!farith fpureg nil D8,$0.2+$1 nil nil 8086,FPU -!farith ST0,ST0 nil D8,$0.2 nil nil 8086,FPU -!farith ST0,FPUREG_NOTST0 nil D8,$0.2+$2 nil nil 8086,FPU -!farith TO fpureg nil DC,$0.3+$1 nil nil 8086,FPU -!farith FPUREG_NOTST0,ST0 nil DC,$0.3+$1 nil nil 8086,FPU -!farithp fpureg nil DE,$0.1+$1 nil nil 8086,FPU -!farithp fpureg,ST0 nil DE,$0.1+$1 nil nil 8086,FPU -!fiarith mem32x nil DA $1,$0.1 nil 8086,FPU -!fiarith mem16x nil DE $1,$0.1 nil 8086,FPU -fadd!farith 0,C0,C0 -faddp!farithp C0 -fiadd!fiarith 0 -fsub!farith 4,E0,E8 -fisub!fiarith 4 -fsubp!farithp E8 -fsubr!farith 5,E8,E0 -fisubr!fiarith 5 -fsubrp!farithp E0 -; Multiply -fmul!farith 1,C8,C8 -fimul!fiarith 1 -fmulp!farithp C8 -; Divide -fdiv!farith 6,F0,F8 -fidiv!fiarith 6 -fdivp!farithp F8 -fdivr!farith 7,F8,F0 -fidivr!fiarith 7 -fdivrp!farithp F0 -; Other arithmetic -f2xm1!twobyte D9,F0 8086,FPU -fyl2x!twobyte D9,F1 8086,FPU -fptan!twobyte D9,F2 8086,FPU -fpatan!twobyte D9,F3 8086,FPU -fxtract!twobyte D9,F4 8086,FPU -fprem1!twobyte D9,F5 286,FPU -fdecstp!twobyte D9,F6 8086,FPU -fincstp!twobyte D9,F7 8086,FPU -fprem!twobyte D9,F8 8086,FPU -fyl2xp1!twobyte D9,F9 8086,FPU -fsqrt!twobyte D9,FA 8086,FPU -fsincos!twobyte D9,FB 286,FPU -frndint!twobyte D9,FC 8086,FPU -fscale!twobyte D9,FD 8086,FPU -fsin!twobyte D9,FE 286,FPU -fcos!twobyte D9,FF 286,FPU -fchs!twobyte D9,E0 8086,FPU -fabs!twobyte D9,E1 8086,FPU -; Processor control -fninit!twobyte DB,E3 8086,FPU -finit!threebyte 9B,DB,E3 8086,FPU -fldcw mem16 nil D9 $1,5 nil 8086,FPU -fnstcw mem16 nil D9 $1,7 nil 8086,FPU -fstcw mem16 nil 9B,D9 $1,7 nil 8086,FPU -fnstsw mem16 nil DD $1,7 nil 8086,FPU -fnstsw reg_ax nil DF,E0 nil nil 8086,FPU -fstsw mem16 nil 9B,DD $1,7 nil 8086,FPU -fstsw reg_ax nil 9B,DF,E0 nil nil 8086,FPU -fnclex!twobyte DB,E2 8086,FPU -fclex!threebyte 9B,DB,E2 8086,FPU -fnstenv!onebytemem D9,6 8086,FPU -fstenv!twobytemem 9B,D9,6 8086,FPU -fldenv!onebytemem D9,4 8086,FPU -fnsave!onebytemem DD,6 8086,FPU -fsave!twobytemem 9B,DD,6 8086,FPU -frstor!onebytemem DD,4 8086,FPU -ffree fpureg nil DD,C0+$1 nil nil 8086,FPU -ffreep fpureg nil DF,C0+$1 nil nil P6,FPU,UNDOC -fnop!twobyte D9,D0 8086,FPU -fwait!onebyte nil,9B 8086,FPU -; -; Prefixes (should the others be here too? should wait be a prefix?) -; -wait!onebyte nil,9B 8086 -; -; 486 extensions -; -; Compare & exchange, exchange & add -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg8,reg8 nil 0F,$0.1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg8 nil 0F,$0.1 $1,$2 nil @0 -!cmpxchgxadd mem8x,reg8 nil 0F,$0.1 $1,$2 nil @0 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg16,reg16 16 0F,$0.1+1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg16 16 0F,$0.1+1 $1,$2 nil @0 -!cmpxchgxadd mem16x,reg16 16 0F,$0.1+1 $1,$2 nil @0 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg32,reg32 32 0F,$0.1+1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg32 32 0F,$0.1+1 $1,$2 nil @0 -!cmpxchgxadd mem32x,reg32 32 0F,$0.1+1 $1,$2 nil @0 -bswap reg32 32 0F,C8+$1 nil nil 486 -xadd!cmpxchgxadd C0 486 -cmpxchg!cmpxchgxadd B0 486 -cmpxchg486!cmpxchgxadd A6 486,UNDOC -invd!twobyte 0F,08 486,PRIV -wbinvd!twobyte 0F,09 486,PRIV -invlpg!twobytemem 0F,01,7 486,PRIV -; -; 586 and late 486 extensions -; -cpuid!twobyte 0F,A2 486 -; -; Pentium extensions -; -wrmsr!twobyte 0F,30 P5,PRIV -rdtsc!twobyte 0F,31 P5 -rdmsr!twobyte 0F,32 P5,PRIV -cmpxchg8b mem64 nil 0F,C7 $1,1 nil P5 -; -; Pentium II/Pentium Pro extensions -; -sysenter!twobyte 0F,34 P6 -sysexit!twobyte 0F,35 P6,PRIV -fxsave!twobytemem 0F,AE,0 P6,FPU -fxrstor!twobytemem 0F,AE,1 P6,FPU -rdpmc!twobyte 0F,33 P6 -ud2!twobyte 0F,0B 286 -ud1!twobyte 0F,B9 286,UNDOC -; cmov -; fcmov -fcomi!fcomg2 DB,F0 P6 -fucomi!fcomg2 DB,E8 P6 -fcomip!fcomg2 DF,F0 P6 -fucomip!fcomg2 DF,E8 P6 -; -; Pentium4 extensions -; -movnti mem32,reg32 nil 0F,C3 $1,$2 nil P4 -clflush mem8 nil 0F,AE $1,7 nil KATMAI -lfence!threebyte 0F,AE,E8 KATMAI -mfence!threebyte 0F,AE,F0 KATMAI -pause!twobyte F3,90 P4 -; -; MMX/SSE2 instructions -; -; General -!mmxsse MMXREG,rm64 nil 0F,$0.1 $2,$1 nil @0,MMX -!mmxsse XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil @1 -; Shifts -!pshift MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX -!pshift XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2 -!pshift MMXREG,imm8 nil 0F,$0.2 $1r,$0.3 $2,8 P5,MMX -!pshift XMMREG,imm8 nil 66,0F,$0.2 $1r,$0.3 $2,8 P4,SSE2 -emms!twobyte 0F,77 P5,MMX -movd MMXREG,rm32 nil 0F,6E $2,$1 nil P5,MMX -movd rm32,MMXREG nil 0F,7E $1,$2 nil P5,MMX -movd XMMREG,rm32 nil 66,0F,6E $2,$1 nil P4,SSE2 -movd rm32,XMMREG nil 66,0F,7E $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movq MMXREG,MMXREG nil 0F,6F $2r,$1 nil P5,MMX -movq MMXREG,mem64 nil 0F,6F $2,$1 nil P5,MMX -movq mem64,MMXREG nil 0F,7F $1,$2 nil P5,MMX -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movq XMMREG,XMMREG nil F3,0F,7E $2r,$1 nil P4,SSE2 -movq XMMREG,mem64 nil F3,0F,7E $2,$1 nil P4,SSE2 -movq mem64,XMMREG nil 66,0F,D6 $1,$2 nil P4,SSE2 -packssdw!mmxsse 6B P5 P4,SSE2 -packsswb!mmxsse 63 P5 P4,SSE2 -packuswb!mmxsse 67 P5 P4,SSE2 -paddb!mmxsse FC P5 P4,SSE2 -paddw!mmxsse FD P5 P4,SSE2 -paddd!mmxsse FE P5 P4,SSE2 -paddq!mmxsse D4 P5 P4,SSE2 -paddsb!mmxsse EC P5 P4,SSE2 -paddsw!mmxsse ED P5 P4,SSE2 -paddusb!mmxsse DC P5 P4,SSE2 -paddusw!mmxsse DD P5 P4,SSE2 -pand!mmxsse DB P5 P4,SSE2 -pandn!mmxsse DF P5 P4,SSE2 -pcmpeqb!mmxsse 74 P5 P4,SSE2 -pcmpeqw!mmxsse 75 P5 P4,SSE2 -pcmpeqd!mmxsse 76 P5 P4,SSE2 -pcmpgtb!mmxsse 64 P5 P4,SSE2 -pcmpgtw!mmxsse 65 P5 P4,SSE2 -pcmpgtd!mmxsse 66 P5 P4,SSE2 -pmaddwd!mmxsse F5 P5 P4,SSE2 -pmulhw!mmxsse E5 P5 P4,SSE2 -pmullw!mmxsse D5 P5 P4,SSE2 -por!mmxsse EB P5 P4,SSE2 -psllw!pshift F1,71,6 -pslld!pshift F2,72,6 -psllq!pshift F3,73,6 -psraw!pshift E1,71,4 -psrad!pshift E2,72,4 -psrlw!pshift D1,71,2 -psrld!pshift D2,72,2 -psrlq!pshift D3,73,2 -psubb MMXREG,imm8 nil 0F,F8 $1r,2 $2,8 P5,MMX -psubb XMMREG,imm8 nil 66,0F,F8 $1r,2 $2,8 P4,SSE2 -psubw MMXREG,imm8 nil 0F,F9 $1r,2 $2,8 P5,MMX -psubw XMMREG,imm8 nil 66,0F,F9 $1r,2 $2,8 P4,SSE2 -psubd!mmxsse FA P5 P4,SSE2 -psubq!mmxsse FB P5 P4,SSE2 -psubsb!mmxsse E8 P5 P4,SSE2 -psubsw!mmxsse E9 P5 P4,SSE2 -psubusb!mmxsse D8 P5 P4,SSE2 -psubusw!mmxsse D9 P5 P4,SSE2 -punpckhbw!mmxsse 68 P5 P4,SSE2 -punpckhwd!mmxsse 69 P5 P4,SSE2 -punpckhdq!mmxsse 6A P5 P4,SSE2 -punpcklbw!mmxsse 60 P5 P4,SSE2 -punpcklwd!mmxsse 61 P5 P4,SSE2 -punpckldq!mmxsse 62 P5 P4,SSE2 -pxor!mmxsse EF P5 P4,SSE2 -; -; PIII (Katmai) new instructions / SIMD instructions -; -; Standard -!sseps XMMREG,rm128 nil 0F,$0.1 $2,$1 nil @0 -!ssess XMMREG,rm128 nil F3,0F,$0.1 $2,$1 nil @0 -; With immediate -!ssepsimm XMMREG,rm128,imm8 nil 0F,$0.1 $2,$1 $3,8 KATMAI,SSE -; Comparisons -!ssecmpps XMMREG,rm128 nil 0F,C2 $2,$1 $0.1,8 KATMAI,SSE -!ssecmpss XMMREG,rm128 nil F3,0F,C2 $2,$1 $0.1,8 KATMAI,SSE -addps!sseps 58 KATMAI,SSE -addss!ssess 58 KATMAI,SSE -andnps!sseps 55 KATMAI,SSE -andps!sseps 54 KATMAI,SSE -cmpeqps!ssecmpps 0 -cmpeqss!ssecmpss 0 -cmpleps!ssecmpps 2 -cmpless!ssecmpss 2 -cmpltps!ssecmpps 1 -cmpltss!ssecmpss 1 -cmpneqps!ssecmpps 4 -cmpneqss!ssecmpss 4 -cmpnleps!ssecmpps 6 -cmpnless!ssecmpss 6 -cmpnltps!ssecmpps 5 -cmpnltss!ssecmpss 5 -cmpordps!ssecmpps 7 -cmpordss!ssecmpss 7 -cmpunordps!ssecmpps 3 -cmpunordss!ssecmpss 3 -cmpps!ssepsimm C2 -cmpss XMMREG,rm128,imm8 nil F3,0F,C2 $2,$1 $3,8 KATMAI,SSE -comiss!sseps 2F KATMAI,SSE -cvtpi2ps!sseps 2A KATMAI,SSE -cvtps2pi!sseps 2D KATMAI,SSE -cvtsi2ss!ssess 2A KATMAI,SSE -cvtss2si!ssess 2D KATMAI,SSE -cvttps2pi!sseps 2C KATMAI,SSE -cvttss2si!ssess 2C KATMAI,SSE -divps!sseps 5E KATMAI,SSE -divss!ssess 5E KATMAI,SSE -ldmxcsr mem32 nil 0F,AE $1,2 nil KATMAI,SSE -maskmovq MMXREG,MMXREG nil 0F,F7 $2r,$1 nil KATMAI,MMX -maxps!sseps 5F KATMAI,SSE -maxss!ssess 5F KATMAI,SSE -minps!sseps 5D KATMAI,SSE -minss!ssess 5D KATMAI,SSE -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movaps XMMREG,XMMREG nil 0F,28 $2r,$1 nil KATMAI,SSE -movaps XMMREG,mem128 nil 0F,28 $2,$1 nil KATMAI,SSE -movaps mem128,XMMREG nil 0F,29 $1,$2 nil KATMAI,SSE -movhlps XMMREG,XMMREG nil 0F,12 $2r,$1 nil KATMAI,SSE -movhps XMMREG,mem64 nil 0F,16 $2,$1 nil KATMAI,SSE -movhps mem64,XMMREG nil 0F,17 $1,$2 nil KATMAI,SSE -movlhps XMMREG,XMMREG nil 0F,16 $2r,$1 nil KATMAI,SSE -movlps XMMREG,mem64 nil 0F,12 $2,$1 nil KATMAI,SSE -movlps mem64,XMMREG nil 0F,13 $1,$2 nil KATMAI,SSE -movmskps reg32,XMMREG nil 0F,50 $1r,$2 nil KATMAI,SSE -movntps mem128,XMMREG nil 0F,2B $1,$2 nil KATMAI,SSE -movntq mem64,MMXREG nil 0F,E7 $1,$2 nil KATMAI,MMX -movntdq mem128,XMMREG nil 66,0F,E7 $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movss XMMREG,XMMREG nil F3,0F,10 $2r,$1 nil KATMAI,SSE -movss XMMREG,mem64 nil F3,0F,10 $2,$1 nil KATMAI,SSE -movss mem64,XMMREG nil F3,0F,11 $1,$2 nil KATMAI,SSE -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movups XMMREG,XMMREG nil 0F,10 $2r,$1 nil KATMAI,SSE -movups XMMREG,mem64 nil 0F,10 $2,$1 nil KATMAI,SSE -movups mem64,XMMREG nil 0F,11 $1,$2 nil KATMAI,SSE -mulps!sseps 59 KATMAI,SSE -mulss!ssess 59 KATMAI,SSE -orps!sseps 56 KATMAI,SSE -pavgb!mmxsse E0 KATMAI P4,SSE2 -pavgw!mmxsse E3 KATMAI P4,SSE2 -pextrw reg32,MMXREG,imm8 nil 0F,C5 $1r,$2 $3,8 KATMAI,MMX -pextrw reg32,XMMREG,imm8 nil 66,0F,C5 $1r,$2 $3,8 P4,SSE2 -pinsrw MMXREG,reg32,imm8 nil 0F,C4 $2r,$1 $3,8 KATMAI,MMX -pinsrw MMXREG,rm16,imm8 nil 0F,C4 $2,$1 $3,8 KATMAI,MMX -pinsrw XMMREG,reg32,imm8 nil 66,0F,C4 $2r,$1 $3,8 P4,SSE2 -pinsrw XMMREG,rm16,imm8 nil 66,0F,C4 $2,$1 $3,8 P4,SSE2 -pmaxsw!mmxsse EE KATMAI P4,SSE2 -pmaxub!mmxsse DE KATMAI P4,SSE2 -pminsw!mmxsse EA KATMAI P4,SSE2 -pminub!mmxsse DA KATMAI P4,SSE2 -pmovmskb reg32,MMXREG nil 0F,D7 $1r,$2 nil KATMAI,SSE -pmovmskb reg32,XMMREG nil 66,0F,D7 $1r,$2 nil P4,SSE2 -pmulhuw!mmxsse E4 KATMAI P4,SSE2 -prefetchnta!twobytemem 0F,18,0 KATMAI -prefetcht0!twobytemem 0F,18,1 KATMAI -prefetcht1!twobytemem 0F,18,2 KATMAI -prefetcht2!twobytemem 0F,18,3 KATMAI -psadbw!mmxsse F6 KATMAI KATMAI,SSE -pshufw MMXREG,rm64,imm8 nil 0F,70 $2,$1 $3,8 KATMAI,MMX -rcpps!sseps 53 KATMAI,SSE -rcpss!ssess 53 KATMAI,SSE -rsqrtps!sseps 52 KATMAI,SSE -rsqrtss!ssess 52 KATMAI,SSE -sfence!threebyte 0F,AE,F8 KATMAI -shufps!ssepsimm C6 -sqrtps!sseps 51 KATMAI,SSE -sqrtss!ssess 51 KATMAI,SSE -stmxcsr mem32 nil 0F,AE $1,3 nil KATMAI,SSE -subps!sseps 5C KATMAI,SSE -subss!ssess 5C KATMAI,SSE -ucomiss!ssess 2E KATMAI,SSE -unpckhps!sseps 15 KATMAI,SSE -unpcklps!sseps 14 KATMAI,SSE -xorps!sseps 57 KATMAI,SSE -; -; SSE2 instructions -; -; Standard -!sse2pd XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2 -!sse2sd XMMREG,rm128 nil F2,0F,$0.1 $2,$1 nil P4,SSE2 -; With immediate -!sse2pdimm XMMREG,rm128,imm8 nil 66,0F,$0.1 $2,$1 $3,8 P4,SSE2 -; Comparisons -!sse2cmppd XMMREG,rm128 nil 66,0F,C2 $2,$1 $0.1,8 P4,SSE2 -!sse2cmpsd XMMREG,rm128 nil F2,0F,C2 $2,$1 $0.1,8 P4,SSE2 -addpd!sse2pd 58 -addsd!sse2sd 58 -andnpd!sse2pd 55 -andpd!sse2pd 54 -cmpeqpd!sse2cmppd 0 -cmpeqsd!sse2cmpsd 0 -cmplepd!sse2cmppd 2 -cmplesd!sse2cmpsd 2 -cmpltpd!sse2cmppd 1 -cmpltsd!sse2cmpsd 1 -cmpneqpd!sse2cmppd 4 -cmpneqsd!sse2cmpsd 4 -cmpnlepd!sse2cmppd 6 -cmpnlesd!sse2cmpsd 6 -cmpnltpd!sse2cmppd 5 -cmpnltsd!sse2cmpsd 5 -cmpordpd!sse2cmppd 7 -cmpordsd!sse2cmpsd 7 -cmpunordpd!sse2cmppd 3 -cmpunordsd!sse2cmpsd 3 -cmppd!sse2pdimm C2 -cmpsd XMMREG,rm128,imm8 nil F2,0F,C2 $2,$1 $3,8 P4,SSE2 -comisd!sse2pd 2F -cvtpi2pd!sse2pd 2A -cvtsi2sd!sse2sd 2A -divpd!sse2pd 5E -divsd!sse2sd 5E -maxpd!sse2pd 5F -maxsd!sse2sd 5F -minpd!sse2pd 5D -minsd!sse2sd 5D -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movapd XMMREG,XMMREG nil 66,0F,28 $2r,$1 nil P4,SSE2 -movapd XMMREG,mem128 nil 66,0F,28 $2,$1 nil P4,SSE2 -movapd mem128,XMMREG nil 66,0F,29 $1,$2 nil P4,SSE2 -movhpd XMMREG,mem64 nil 66,0F,16 $2,$1 nil P4,SSE2 -movhpd mem64,XMMREG nil 66,0F,17 $1,$2 nil P4,SSE2 -movlpd XMMREG,mem64 nil 66,0F,12 $2,$1 nil P4,SSE2 -movlpd mem64,XMMREG nil 66,0F,13 $1,$2 nil P4,SSE2 -movmskpd reg32,XMMREG nil 66,0F,50 $1r,$2 nil P4,SSE2 -movntpd mem128,XMMREG nil 66,0F,2B $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movsd XMMREG,XMMREG nil F2,0F,10 $2r,$1 nil P4,SSE2 -movsd XMMREG,mem64 nil F2,0F,10 $2,$1 nil P4,SSE2 -movsd mem64,XMMREG nil F2,0F,11 $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movupd XMMREG,XMMREG nil 66,0F,10 $2r,$1 nil P4,SSE2 -movupd XMMREG,mem64 nil 66,0F,10 $2,$1 nil P4,SSE2 -movupd mem64,XMMREG nil 66,0F,11 $1,$2 nil P4,SSE2 -mulpd!sse2pd 59 -mulsd!sse2sd 59 -orpd!sse2pd 56 -shufpd!sse2pdimm C6 -sqrtpd!sse2pd 51 -sqrtsd!sse2sd 51 -subpd!sse2pd 5C -subsd!sse2sd 5C -ucomisd!sse2sd 2E -unpckhpd!sse2pd 15 -unpcklpd!sse2pd 14 -xorpd!sse2pd 57 -cvtdq2pd!ssess E6 P4,SSE2 -cvtpd2dq!sse2sd E6 -cvtdq2ps!sseps 5B P4,SSE2 -cvtpd2pi!sse2pd 2D -cvtpd2ps!sse2pd 5A -cvtps2pd!sseps 5A P4,SSE2 -cvtps2dq!sse2pd 5B -cvtsd2si!sse2sd 2D -cvtsd2ss!sse2sd 5A -cvtss2sd!ssess 5A P4,SSE2 -cvttpd2pi!sse2pd 2C -cvttsd2si!sse2sd 2C -cvttpd2dq!sse2pd E6 -cvttps2dq!ssess 5B P4,SSE2 -maskmovdqu XMMREG,XMMREG nil 66,0F,F7 $2r,$1 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movdqa XMMREG,XMMREG nil 66,0F,6F $2r,$1 nil P4,SSE2 -movdqa XMMREG,mem128 nil 66,0F,6F $2,$1 nil P4,SSE2 -movdqa mem128,XMMREG nil 66,0F,7F $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movdqu XMMREG,XMMREG nil F3,0F,6F $2r,$1 nil P4,SSE2 -movdqu XMMREG,mem128 nil F3,0F,6F $2,$1 nil P4,SSE2 -movdqu mem128,XMMREG nil F3,0F,7F $1,$2 nil P4,SSE2 -movdq2q MMXREG,XMMREG nil F2,0F,D6 $2r,$1 nil P4,SSE2 -movq2dq XMMREG,MMXREG nil F3,0F,D6 $2r,$1 nil P4,SSE2 -pmuludq!mmxsse F4 P4 P4,SSE2 -pshufd!sse2pdimm 70 -pshufhw XMMREG,rm128,imm8 nil F3,0F,70 $2,$1 $3,8 P4,SSE2 -pshuflw XMMREG,rm128,imm8 nil F2,0F,70 $2,$1 $3,8 P4,SSE2 -pslldq XMMREG,imm8 nil 66,0F,73 $1r,7 $2,8 P4,SSE2 -psrldq XMMREG,imm8 nil 66,0F,73 $1r,3 $2,8 P4,SSE2 -punpckhqdq!sse2pd 6D -punpcklqdq!sse2pd 6C -; -; AMD 3DNow! instructions -; -!now3d MMXREG,rm64 nil 0F,0F $2,$1 $0.1,8 @0,3DNOW,AMD -prefetch!twobytemem 0F,0D,0 P5,3DNOW,AMD -prefetchw!twobytemem 0F,0D,1 P5,3DNOW,AMD -femms!twobyte 0F,0E P5,3DNOW,AMD -pavgusb!now3d BF P5 -pf2id!now3d 1D P5 -pf2iw!now3d 1C ATHLON -pfacc!now3d AE P5 -pfadd!now3d 9E P5 -pfcmpeq!now3d B0 P5 -pfcmpge!now3d 90 P5 -pfcmpgt!now3d A0 P5 -pfmax!now3d A4 P5 -pfmin!now3d 94 P5 -pfmul!now3d B4 P5 -pfnacc!now3d 8A ATHLON -pfpnacc!now3d 8E ATHLON -pfrcp!now3d 96 P5 -pfrcpit1!now3d A6 P5 -pfrcpit2!now3d B6 P5 -pfrsqit1!now3d A7 P5 -pfrsqrt!now3d 97 P5 -pfsub!now3d 9A P5 -pfsubr!now3d AA P5 -pi2fd!now3d 0D P5 -pi2fw!now3d 0C ATHLON -pmulhrwa!now3d B7 P5 -pswapd!now3d BB ATHLON -; -; AMD extensions -; -syscall!twobyte 0F,05 P6,AMD -sysret!twobyte 0F,07 P6,PRIV,AMD -; swapgs -; -; Cyrix MMX instructions -; -!cyrixmmx MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX,CYRIX -paddsiw!cyrixmmx 51 -paveb!cyrixmmx 50 -pdistib!cyrixmmx 54 -pmachriw MMXREG,mem64 nil 0F,5E $2,$1 nil P5,MMX,CYRIX -pmagw!cyrixmmx 52 -pmulhriw!cyrixmmx 5D -pmulhrwc!cyrixmmx 59 -pmvgezb!cyrixmmx 5C -pmvlzb!cyrixmmx 5B -pmvnzb!cyrixmmx 5A -pmvzb!cyrixmmx 58 -psubsiw!cyrixmmx 55 -; -; Cyrix extensions -; -!cyrixsmm mem80 nil 0F,$0.1 $1,0 nil 486,CYRIX,SMM -rdshr!twobyte 0F,36 P6,CYRIX,SMM -rsdc segreg,mem80 nil 0F,79 $2,$1 nil 486,CYRIX,SMM -rsldt!cyrixsmm 7B -rsts!cyrixsmm 7D -svdc mem80,segreg nil 0F,78 $1,$2 nil 486,CYRIX,SMM -svldt!cyrixsmm 7A -svts!cyrixsmm 7C -smint!twobyte 0F,38 P6,CYRIX -smintold!twobyte 0F,7E 486,CYRIX,OBS -wrshr!twobyte 0F,37 P6,CYRIX,SMM -; -; Obsolete/Undocumented Instructions -; -fsetpm!twobyte DB,E4 286,FPU,OBS -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -ibts reg16,reg16 16 0F,A7 $1r,$2 nil 386,UNDOC,OBS -ibts mem,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS -ibts mem16x,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -ibts reg32,reg32 32 0F,A7 $1r,$2 nil 386,UNDOC,OBS -ibts mem,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS -ibts mem32x,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS -loadall!twobyte 0F,07 386,UNDOC -loadall286!twobyte 0F,05 286,UNDOC -;pop reg_cs nil 0F nil nil 8086,UNDOC,OBS -salc!onebyte nil,D6 8086,UNDOC -smi!onebyte nil,F1 386,UNDOC -; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11). -umov reg8,reg8 nil 0F,10 $1r,$2 nil 386,UNDOC -umov reg16,reg16 16 0F,11 $1r,$2 nil 386,UNDOC -umov reg32,reg32 32 0F,11 $1r,$2 nil 386,UNDOC -umov mem,reg8 nil 0F,10 $1,$2 nil 386,UNDOC -umov mem8x,reg8 nil 0F,10 $1,$2 nil 386,UNDOC -umov mem,reg16 16 0F,11 $1,$2 nil 386,UNDOC -umov mem16x,reg16 16 0F,11 $1,$2 nil 386,UNDOC -umov mem,reg32 32 0F,11 $1,$2 nil 386,UNDOC -umov mem32x,reg32 32 0F,11 $1,$2 nil 386,UNDOC -umov reg8,mem8 nil 0F,12 $2,$1 nil 386,UNDOC -umov reg16,mem16 16 0F,13 $2,$1 nil 386,UNDOC -umov reg32,mem32 32 0F,13 $2,$1 nil 386,UNDOC -xbts reg16,mem16 16 0F,A6 $2,$1 nil 386,UNDOC,OBS -xbts reg32,mem32 32 0F,A6 $2,$1 nil 386,UNDOC,OBS diff --git a/modules/arch/x86/x86-int.h b/modules/arch/x86/x86-int.h deleted file mode 100644 index 86811b62..00000000 --- a/modules/arch/x86/x86-int.h +++ /dev/null @@ -1,110 +0,0 @@ -/* $IdPath$ - * x86 internals header file - * - * Copyright (C) 2001 Peter Johnson - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef YASM_X86_INT_H -#define YASM_X86_INT_H - -typedef struct x86_effaddr_data { - unsigned char segment; /* segment override, 0 if none */ - - /* How the spare (register) bits in Mod/RM are handled: - * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) - * They're set in bytecode_new_insn(). - */ - unsigned char modrm; - unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ - unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ - - unsigned char sib; - unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ - unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, - 0xff if unknown */ -} x86_effaddr_data; - -typedef struct x86_insn { - /*@null@*/ effaddr *ea; /* effective address */ - - /*@null@*/ immval *imm; /* immediate or relative value */ - - unsigned char opcode[3]; /* opcode */ - unsigned char opcode_len; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - /* HACK, but a space-saving one: shift opcodes have an immediate - * form and a ,1 form (with no immediate). In the parser, we - * set this and opcode_len=1, but store the ,1 version in the - * second byte of the opcode array. We then choose between the - * two versions once we know the actual value of imm (because we - * don't know it in the parser module). - * - * A override to force the imm version should just leave this at - * 0. Then later code won't know the ,1 version even exists. - * TODO: Figure out how this affects CPU flags processing. - * - * Call x86_SetInsnShiftFlag() to set this flag to 1. - */ - unsigned char shift_op; - - /* HACK, similar to that for shift_op above, for optimizing instructions - * that take a sign-extended imm8 as well as imm values (eg, the arith - * instructions and a subset of the imul instructions). - */ - unsigned char signext_imm8_op; - - unsigned char mode_bits; -} x86_insn; - -typedef struct x86_jmprel { - expr *target; /* target location */ - - struct { - unsigned char opcode[3]; - unsigned char opcode_len; /* 0 = no opc for this version */ - } shortop, nearop; - - /* which opcode are we using? */ - /* The *FORCED forms are specified in the source as such */ - x86_jmprel_opcode_sel op_sel; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - unsigned char mode_bits; -} x86_jmprel; - -void x86_bc_delete(bytecode *bc); -void x86_bc_print(FILE *f, const bytecode *bc); -bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect, - resolve_label_func resolve_label); -int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, - void *d, output_expr_func output_expr); - -int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, - unsigned char nosplit, unsigned char *displen, - unsigned char *modrm, unsigned char *v_modrm, - unsigned char *n_modrm, unsigned char *sib, - unsigned char *v_sib, unsigned char *n_sib); - -#endif diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index 755e8bed..c43feb11 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -1,7 +1,7 @@ /* * x86 architecture description * - * Copyright (C) 2001 Peter Johnson + * Copyright (C) 2002 Peter Johnson * * This file is part of YASM. * @@ -22,23 +22,164 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "globals.h" +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + #include "bytecode.h" + #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" unsigned char x86_mode_bits = 0; +int +x86_directive(const char *name, valparamhead *valparams, + /*@unused@*/ /*@null@*/ valparamhead *objext_valparams, + /*@unused@*/ sectionhead *headp) +{ + valparam *vp; + const intnum *intn; + long lval; + + if (strcasecmp(name, "bits") == 0) { + if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && + (intn = expr_get_intnum(&vp->param)) != NULL && + (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) + x86_mode_bits = (unsigned char)lval; + else + Error(_("invalid argument to [%s]"), "BITS"); + return 0; + } else + return 1; +} + +unsigned int +x86_get_reg_size(unsigned long reg) +{ + switch ((x86_expritem_reg_size)(reg & ~7)) { + case X86_REG8: + return 1; + case X86_REG16: + return 2; + case X86_REG32: + case X86_CRREG: + case X86_DRREG: + case X86_TRREG: + return 4; + case X86_MMXREG: + return 8; + case X86_XMMREG: + return 16; + case X86_FPUREG: + return 10; + default: + InternalError(_("unknown register size")); + } + return 0; +} + +void +x86_reg_print(FILE *f, unsigned long reg) +{ + static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"}; + static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"}; + + switch ((x86_expritem_reg_size)(reg&~7)) { + case X86_REG8: + fprintf(f, "%s", name8[reg&7]); + break; + case X86_REG16: + fprintf(f, "%s", name1632[reg&7]); + break; + case X86_REG32: + fprintf(f, "e%s", name1632[reg&7]); + break; + case X86_MMXREG: + fprintf(f, "mm%d", (int)(reg&7)); + break; + case X86_XMMREG: + fprintf(f, "xmm%d", (int)(reg&7)); + break; + case X86_CRREG: + fprintf(f, "cr%d", (int)(reg&7)); + break; + case X86_DRREG: + fprintf(f, "dr%d", (int)(reg&7)); + break; + case X86_TRREG: + fprintf(f, "tr%d", (int)(reg&7)); + break; + case X86_FPUREG: + fprintf(f, "st%d", (int)(reg&7)); + break; + default: + InternalError(_("unknown register size")); + } +} + +void +x86_segreg_print(FILE *f, unsigned long segreg) +{ + static const char *name[] = {"es","cs","ss","ds","fs","gs"}; + fprintf(f, "%s", name[segreg&7]); +} + +void +x86_handle_prefix(bytecode *bc, const unsigned long data[4]) +{ + switch((x86_parse_insn_prefix)data[0]) { + case X86_LOCKREP: + x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]); + break; + case X86_ADDRSIZE: + x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]); + break; + case X86_OPERSIZE: + x86_bc_insn_opersize_override(bc, (unsigned char)data[1]); + break; + } +} + +void +x86_handle_seg_prefix(bytecode *bc, unsigned long segreg) +{ + x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8)); +} + +void +x86_handle_seg_override(effaddr *ea, unsigned long segreg) +{ + x86_ea_set_segment(ea, (unsigned char)(segreg>>8)); +} + /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", "x86", + { + x86_switch_cpu, + x86_check_identifier, + x86_directive, + x86_new_insn, + x86_handle_prefix, + x86_handle_seg_prefix, + x86_handle_seg_override, + x86_ea_new_expr + }, { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, x86_bc_resolve, x86_bc_tobytes - } + }, + x86_get_reg_size, + x86_reg_print, + x86_segreg_print, + NULL, /* x86_ea_data_delete */ + x86_ea_data_print }; diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index 336201b8..c44c0ddc 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -28,6 +28,31 @@ typedef enum { } x86_bytecode_type; #define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1 +/* 0-7 (low 3 bits) used for register number, stored in same data area */ +typedef enum { + X86_REG8 = 0x8, + X86_REG16 = 0x10, + X86_REG32 = 0x20, + X86_MMXREG = 0x40, + X86_XMMREG = 0x80, + X86_CRREG = 0xC0, + X86_DRREG = 0xC8, + X86_TRREG = 0xF0, + X86_FPUREG = 0xF8 +} x86_expritem_reg_size; + +typedef enum { + X86_LOCKREP = 1, + X86_ADDRSIZE, + X86_OPERSIZE +} x86_parse_insn_prefix; + +typedef enum { + X86_NEAR, + X86_SHORT, + X86_FAR +} x86_parse_targetmod; + typedef enum { JR_NONE, JR_SHORT, @@ -44,7 +69,7 @@ typedef struct x86_targetval { void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment); effaddr *x86_ea_new_reg(unsigned char reg); -effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len); +effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len); effaddr *x86_ea_new_expr(/*@keep@*/ expr *e); /*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc); @@ -63,7 +88,7 @@ void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, */ typedef struct x86_new_insn_data { /*@keep@*/ /*@null@*/ effaddr *ea; - /*@keep@*/ /*@null@*/ immval *imm; + /*@keep@*/ /*@null@*/ expr *imm; unsigned char opersize; unsigned char op_len; unsigned char op[3]; @@ -90,4 +115,116 @@ bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d); extern unsigned char x86_mode_bits; +typedef struct x86_effaddr_data { + unsigned char segment; /* segment override, 0 if none */ + + /* How the spare (register) bits in Mod/RM are handled: + * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) + * They're set in bytecode_new_insn(). + */ + unsigned char modrm; + unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ + unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ + + unsigned char sib; + unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ + unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, + 0xff if unknown */ +} x86_effaddr_data; + +typedef struct x86_insn { + /*@null@*/ effaddr *ea; /* effective address */ + + /*@null@*/ immval *imm; /* immediate or relative value */ + + unsigned char opcode[3]; /* opcode */ + unsigned char opcode_len; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call x86_SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; + + /* HACK, similar to that for shift_op above, for optimizing instructions + * that take a sign-extended imm8 as well as imm values (eg, the arith + * instructions and a subset of the imul instructions). + */ + unsigned char signext_imm8_op; + + unsigned char mode_bits; +} x86_insn; + +typedef struct x86_jmprel { + expr *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; /* 0 = no opc for this version */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + x86_jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + unsigned char mode_bits; +} x86_jmprel; + +void x86_bc_delete(bytecode *bc); +void x86_bc_print(FILE *f, const bytecode *bc); +bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect, + resolve_label_func resolve_label); +int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr); + +int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, + unsigned char nosplit, unsigned char *displen, + unsigned char *modrm, unsigned char *v_modrm, + unsigned char *n_modrm, unsigned char *sib, + unsigned char *v_sib, unsigned char *n_sib); + +void x86_switch_cpu(const char *cpuid); + +arch_check_id_retval x86_check_identifier(unsigned long data[2], + const char *id); + +int x86_directive(const char *name, valparamhead *valparams, + /*@null@*/ valparamhead *objext_valparams, + sectionhead *headp); + +/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2], + int num_operands, + /*@null@*/ insn_operandhead *operands); + +void x86_handle_prefix(bytecode *bc, const unsigned long data[4]); + +void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg); + +void x86_handle_seg_override(effaddr *ea, unsigned long segreg); + +unsigned int x86_get_reg_size(unsigned long reg); + +void x86_reg_print(FILE *f, unsigned long reg); + +void x86_segreg_print(FILE *f, unsigned long segreg); + +void x86_ea_data_print(FILE *f, const effaddr *ea); + #endif diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index 8cc4d4b4..4393a0c8 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -32,7 +32,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" #include "bc-int.h" @@ -54,11 +54,12 @@ x86_bc_new_insn(x86_new_insn_data *d) ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ } - insn->imm = d->imm; if (d->imm) { + insn->imm = imm_new_expr(d->imm); insn->imm->len = d->im_len; insn->imm->sign = d->im_sign; - } + } else + insn->imm = NULL; insn->opcode[0] = d->op[0]; insn->opcode[1] = d->op[1]; @@ -173,12 +174,12 @@ x86_ea_new_expr(expr *e) /*@-compmempass@*/ effaddr * -x86_ea_new_imm(immval *imm, unsigned char im_len) +x86_ea_new_imm(expr *imm, unsigned char im_len) { effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); x86_effaddr_data *ead = ea_get_data(ea); - ea->disp = imm->val; + ea->disp = imm; ea->len = im_len; ea->nosplit = 0; ead->segment = 0; @@ -320,10 +321,8 @@ x86_bc_delete(bytecode *bc) switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - if (insn->ea) { - expr_delete(insn->ea->disp); - xfree(insn->ea); - } + if (insn->ea) + ea_delete(insn->ea); if (insn->imm) { expr_delete(insn->imm->val); xfree(insn->imm); @@ -336,40 +335,38 @@ x86_bc_delete(bytecode *bc) } } +void +x86_ea_data_print(FILE *f, const effaddr *ea) +{ + const x86_effaddr_data *ead = ea_get_const_data(ea); + fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "", + (unsigned int)ead->segment); + fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "", + (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "", + (unsigned int)ead->sib, (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); +} + void x86_bc_print(FILE *f, const bytecode *bc) { const x86_insn *insn; const x86_jmprel *jmprel; - x86_effaddr_data *ead; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_const_data(bc); fprintf(f, "%*s_Instruction_\n", indent_level, ""); fprintf(f, "%*sEffective Address:", indent_level, ""); - if (!insn->ea) - fprintf(f, " (nil)\n"); - else { - indent_level++; - fprintf(f, "\n%*sDisp=", indent_level, ""); - expr_print(f, insn->ea->disp); + if (insn->ea) { fprintf(f, "\n"); - ead = ea_get_data(insn->ea); - fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n", - indent_level, "", (unsigned int)insn->ea->len, - (unsigned int)ead->segment, - (unsigned int)insn->ea->nosplit); - fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", - indent_level, "", (unsigned int)ead->modrm, - (unsigned int)ead->valid_modrm, - (unsigned int)ead->need_modrm); - fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", - indent_level, "", (unsigned int)ead->sib, - (unsigned int)ead->valid_sib, - (unsigned int)ead->need_sib); + indent_level++; + ea_print(f, insn->ea); indent_level--; - } + } else + fprintf(f, " (nil)\n"); fprintf(f, "%*sImmediate Value:", indent_level, ""); if (!insn->imm) fprintf(f, " (nil)\n"); @@ -477,8 +474,7 @@ x86_bc_resolve_insn(x86_insn *insn, unsigned long *len, int save, x86_effaddr_data ead_t = *ead; /* structure copy */ unsigned char displen = ea->len; - if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || - (!ead->valid_modrm && ead->need_modrm))) { + if (ea->disp) { temp = expr_copy(ea->disp); assert(temp != NULL); diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c index d041cc42..a30f14d3 100644 --- a/modules/arch/x86/x86expr.c +++ b/modules/arch/x86/x86expr.c @@ -33,7 +33,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" #include "expr-int.h" @@ -48,10 +48,10 @@ x86_expr_checkea_get_reg32(ExprItem *ei, /*returned*/ void *d) int *ret; /* don't allow 16-bit registers */ - if (ei->data.reg.size != 32) + if ((ei->data.reg & ~7) != X86_REG32) return 0; - ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */ + ret = &data[ei->data.reg & 7]; /* overwrite with 0 to eliminate register from displacement expr */ ei->type = EXPR_INT; @@ -84,10 +84,11 @@ x86_expr_checkea_get_reg16(ExprItem *ei, void *d) reg16[7] = &data->di; /* don't allow 32-bit registers */ - if (ei->data.reg.size != 16) + if ((ei->data.reg & ~7) != X86_REG16) return 0; - ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */ + /* & 7 for sanity check */ + ret = reg16[ei->data.reg & 7]; /* only allow BX, SI, DI, BP */ if (!ret) @@ -469,7 +470,7 @@ x86_expr_checkea_getregsize_callback(ExprItem *ei, void *d) unsigned char *addrsize = (unsigned char *)d; if (ei->type == EXPR_REG) { - *addrsize = ei->data.reg.size; + *addrsize = (unsigned char)ei->data.reg & ~7; return 1; } else return 0; @@ -757,6 +758,12 @@ x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE, havereg == HAVE_BP, displen, modrm, v_modrm); + } else if (!*n_modrm && !*n_sib) { + /* Special case for MOV MemOffs opcode: displacement but no modrm. */ + if (*addrsize == 32) + *displen = 4; + else if (*addrsize == 16) + *displen = 2; } return 1; } diff --git a/modules/arch/x86/x86id.re b/modules/arch/x86/x86id.re new file mode 100644 index 00000000..46ab2dbb --- /dev/null +++ b/modules/arch/x86/x86id.re @@ -0,0 +1,1282 @@ +/* + * x86 identifier recognition and instruction handling + * + * Copyright (C) 2002 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "bitvect.h" + +#include "globals.h" +#include "errwarn.h" +#include "intnum.h" +#include "floatnum.h" +#include "expr.h" +#include "symrec.h" + +#include "bytecode.h" + +#include "arch.h" +#include "src/arch/x86/x86arch.h" + +#include "expr-int.h" +#include "bc-int.h" + + +/* Available CPU feature flags */ +#define CPU_Any (0) /* Any old cpu will do */ +#define CPU_086 CPU_Any +#define CPU_186 (1<<0) /* i186 or better required */ +#define CPU_286 (1<<1) /* i286 or better required */ +#define CPU_386 (1<<2) /* i386 or better required */ +#define CPU_486 (1<<3) /* i486 or better required */ +#define CPU_586 (1<<4) /* i585 or better required */ +#define CPU_686 (1<<5) /* i686 or better required */ +#define CPU_P3 (1<<6) /* Pentium3 or better required */ +#define CPU_P4 (1<<7) /* Pentium4 or better required */ +#define CPU_IA64 (1<<8) /* IA-64 or better required */ +#define CPU_K6 (1<<9) /* AMD K6 or better required */ +#define CPU_Athlon (1<<10) /* AMD Athlon or better required */ +#define CPU_Hammer (1<<11) /* AMD Sledgehammer or better required */ +#define CPU_FPU (1<<12) /* FPU support required */ +#define CPU_MMX (1<<13) /* MMX support required */ +#define CPU_SSE (1<<14) /* Streaming SIMD extensions required */ +#define CPU_SSE2 (1<<15) /* Streaming SIMD extensions 2 required */ +#define CPU_3DNow (1<<16) /* 3DNow! support required */ +#define CPU_Cyrix (1<<17) /* Cyrix-specific instruction */ +#define CPU_AMD (1<<18) /* AMD-specific inst. (older than K6) */ +#define CPU_SMM (1<<19) /* System Management Mode instruction */ +#define CPU_Prot (1<<20) /* Protected mode only instruction */ +#define CPU_Undoc (1<<21) /* Undocumented instruction */ +#define CPU_Obs (1<<22) /* Obsolete instruction */ +#define CPU_Priv (1<<23) /* Priveleged instruction */ + +/* What instructions/features are enabled? Defaults to all. */ +static unsigned long cpu_enabled = ~CPU_Any; + +/* Opcode modifiers. The opcode bytes are in "reverse" order because the + * parameters are read from the arch-specific data in LSB->MSB order. + * (only for asthetic reasons in the lexer code below, no practical reason). + */ +#define MOD_Op2Add (1<<0) /* Parameter adds to opcode byte 2 */ +#define MOD_Gap0 (1<<1) /* Eats a parameter */ +#define MOD_Op1Add (1<<2) /* Parameter adds to opcode byte 1 */ +#define MOD_Gap1 (1<<3) /* Eats a parameter */ +#define MOD_Op0Add (1<<4) /* Parameter adds to opcode byte 0 */ +#define MOD_SpAdd (1<<5) /* Parameter adds to "spare" value */ +#define MOD_OpSizeR (1<<6) /* Parameter replaces opersize */ +#define MOD_Imm8 (1<<7) /* Parameter is included as immediate byte */ + +/* Operand types. These are more detailed than the "general" types for all + * architectures, as they include the size, for instance. + * Bit Breakdown (from LSB to MSB): + * - 4 bits = general type (must be exact match, except for =3): + * 0 = immediate + * 1 = any general purpose, MMX, XMM, or FPU register + * 2 = memory + * 3 = any general purpose, MMX, XMM, or FPU register OR memory + * 4 = segreg + * 5 = any CR register + * 6 = any DR register + * 7 = any TR register + * 8 = ST0 + * 9 = AL/AX/EAX (depending on size) + * A = CL/CX/ECX (depending on size) + * B = CR4 + * C = memory offset (an EA, but with no registers allowed) + * [special case for MOV opcode] + * - 3 bits = size (user-specified, or from register size): + * 0 = any size acceptable + * 1/2/3/4 = 8/16/32/64 bits (from user or reg size) + * 5/6 = 80/128 bits (from user) + * - 1 bit = size implicit or explicit ("strictness" of size matching on + * non-registers -- registers are always strictly matched): + * 0 = user size must exactly match size above. + * 1 = user size either unspecified or exactly match size above. + * + * MSBs than the above are actions: what to do with the operand if the + * instruction matches. Essentially describes what part of the output bytecode + * gets the operand. This may require conversion (e.g. a register going into + * an ea field). Naturally, only one of each of these may be contained in the + * operands of a single insn_info structure. + * - 3 bits = action: + * 0 = does nothing (operand data is discarded) + * 1 = operand data goes into ea field + * 2 = operand data goes into imm field + * 3 = operand data goes into "spare" field + * 4 = operand data is added to opcode byte 0 + */ +#define OPT_Imm 0x0 +#define OPT_Reg 0x1 +#define OPT_Mem 0x2 +#define OPT_RM 0x3 +#define OPT_SegReg 0x4 +#define OPT_CRReg 0x5 +#define OPT_DRReg 0x6 +#define OPT_TRReg 0x7 +#define OPT_ST0 0x8 +#define OPT_Areg 0x9 +#define OPT_Creg 0xA +#define OPT_CR4 0xB +#define OPT_MemOffs 0xC +#define OPT_MASK 0x000F + +#define OPS_Any (0<<4) +#define OPS_8 (1<<4) +#define OPS_16 (2<<4) +#define OPS_32 (3<<4) +#define OPS_64 (4<<4) +#define OPS_80 (5<<4) +#define OPS_128 (6<<4) +#define OPS_MASK 0x0070 +#define OPS_SHIFT 4 + +#define OPS_Relaxed (1<<7) +#define OPS_RMASK 0x0080 + +#define OPA_None (0<<8) +#define OPA_EA (1<<8) +#define OPA_Imm (2<<8) +#define OPA_Spare (3<<8) +#define OPA_Op0Add (4<<8) +#define OPA_MASK 0x0700 + +typedef struct x86_insn_info { + /* The CPU feature flags needed to execute this instruction. This is OR'ed + * with arch-specific data[2]. This combined value is compared with + * cpu_enabled to see if all bits set here are set in cpu_enabled--if so, + * the instruction is available on this CPU. + */ + unsigned long cpu; + + /* Opcode modifiers for variations of instruction. As each modifier reads + * its parameter in LSB->MSB order from the arch-specific data[1] from the + * lexer data, and the LSB of the arch-specific data[1] is reserved for the + * count of insn_info structures in the instruction grouping, there can + * only be a maximum of 3 modifiers. + */ + unsigned long modifiers; + + /* Operand Size */ + unsigned char opersize; + + /* The length of the basic opcode */ + unsigned char opcode_len; + + /* The basic 1-3 byte opcode */ + unsigned char opcode[3]; + + /* The 3-bit "spare" value (extended opcode) for the R/M byte field */ + unsigned char spare; + + /* The number of operands this form of the instruction takes */ + unsigned char num_operands; + + /* The types of each operand, see above */ + unsigned int operands[3]; +} x86_insn_info; + +/* Define lexer arch-specific data with 0-3 modifiers. */ +#define DEF_INSN_DATA(group, mod, cpu) do { \ + data[0] = (unsigned long)group##_insn; \ + data[1] = ((mod)<<8) | \ + ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \ + data[2] = cpu; \ + } while (0) + +#define RET_INSN(group, mod, cpu) do { \ + DEF_INSN_DATA(group, mod, cpu); \ + return ARCH_CHECK_ID_INSN; \ + } while (0) + +/* + * General instruction groupings + */ + +/* One byte opcode instructions with no operands */ +static const x86_insn_info onebyte_insn[] = { + { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} } +}; + +/* Two byte opcode instructions with no operands */ +static const x86_insn_info twobyte_insn[] = { + { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} } +}; + +/* Three byte opcode instructions with no operands */ +static const x86_insn_info threebyte_insn[] = { + { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0, + {0, 0, 0} } +}; + +/* One byte opcode instructions with general memory operand */ +static const x86_insn_info onebytemem_insn[] = { + { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1, + {OPT_Mem|OPS_Any|OPA_EA, 0, 0} } +}; + +/* Two byte opcode instructions with general memory operand */ +static const x86_insn_info twobytemem_insn[] = { + { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1, + {OPT_Mem|OPS_Any|OPA_EA, 0, 0} } +}; + +/* Move instructions */ +static const x86_insn_info mov_insn[] = { + { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2, + {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2, + {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2, + {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} }, + { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} }, + { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} }, + { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} }, + { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} }, + { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} }, + { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2, + {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} }, + { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} }, + /* TODO: segreg here */ + { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2, + {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} }, + /* Need two sets here, one for strictness on left side, one for right. */ + { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} }, + { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2, + {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2, + {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2, + {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} } +}; + +/* Move with sign/zero extend */ +static const x86_insn_info movszx_insn[] = { + { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} }, + { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} } +}; + + +bytecode * +x86_new_insn(const unsigned long data[4], int num_operands, + insn_operandhead *operands) +{ + x86_new_insn_data d; + int num_info = (int)(data[1]&0xFF); + x86_insn_info *info = (x86_insn_info *)data[0]; + unsigned long mod_data = data[1] >> 8; + int found = 0; + insn_operand *op; + int i; + static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0}; + + /* Just do a simple linear search through the info array for a match. + * First match wins. + */ + for (; num_info>0 && !found; num_info--, info++) { + unsigned long cpu; + unsigned int size; + int mismatch = 0; + + /* Match CPU */ + cpu = info->cpu | data[2]; + if ((cpu_enabled & cpu) != cpu) + continue; + + /* Match # of operands */ + if (num_operands != info->num_operands) + continue; + + if (!operands) { + found = 1; /* no operands -> must have a match here. */ + break; + } + + /* Match each operand type and size */ + for(i = 0, op = ops_first(operands); op && inum_operands && + !mismatch; op = ops_next(op), i++) { + /* Check operand type */ + switch (info->operands[i] & OPT_MASK) { + case OPT_Imm: + if (op->type != INSN_OPERAND_IMM) + mismatch = 1; + break; + case OPT_Reg: + if (op->type != INSN_OPERAND_REG) + mismatch = 1; + else { + size = op->data.reg & ~7; + if (size == X86_CRREG || size == X86_DRREG || + size == X86_TRREG) + mismatch = 1; + } + break; + case OPT_Mem: + if (op->type != INSN_OPERAND_MEMORY) + mismatch = 1; + break; + case OPT_RM: + if (op->type != INSN_OPERAND_REG && + op->type != INSN_OPERAND_MEMORY) + mismatch = 1; + break; + case OPT_SegReg: + if (op->type != INSN_OPERAND_SEGREG) + mismatch = 1; + break; + case OPT_CRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_CRREG) + mismatch = 1; + break; + case OPT_DRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_DRREG) + mismatch = 1; + break; + case OPT_TRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_TRREG) + mismatch = 1; + break; + case OPT_ST0: + if (op->type != INSN_OPERAND_REG || + op->data.reg != X86_FPUREG) + mismatch = 1; + break; + case OPT_Areg: + if (op->type != INSN_OPERAND_REG || + ((info->operands[i] & OPS_MASK) == OPS_8 && + op->data.reg != (X86_REG8 | 0)) || + ((info->operands[i] & OPS_MASK) == OPS_16 && + op->data.reg != (X86_REG16 | 0)) || + ((info->operands[i] & OPS_MASK) == OPS_32 && + op->data.reg != (X86_REG32 | 0))) + mismatch = 1; + break; + case OPT_Creg: + if (op->type != INSN_OPERAND_REG || + ((info->operands[i] & OPS_MASK) == OPS_8 && + op->data.reg != (X86_REG8 | 1)) || + ((info->operands[i] & OPS_MASK) == OPS_16 && + op->data.reg != (X86_REG16 | 1)) || + ((info->operands[i] & OPS_MASK) == OPS_32 && + op->data.reg != (X86_REG32 | 1))) + mismatch = 1; + break; + case OPT_CR4: + if (op->type != INSN_OPERAND_REG || + op->data.reg != (X86_CRREG | 4)) + mismatch = 1; + break; + case OPT_MemOffs: + if (op->type != INSN_OPERAND_MEMORY || + expr_contains(ea_get_disp(op->data.ea), EXPR_REG)) + mismatch = 1; + break; + default: + InternalError(_("invalid operand type")); + } + + if (mismatch) + break; + + /* Check operand size */ + size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT]; + if (op->type == INSN_OPERAND_REG && op->size == 0) { + /* Register size must exactly match */ + if (x86_get_reg_size(op->data.reg) != size) + mismatch = 1; + } else { + if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) { + /* Relaxed checking */ + if (size != 0 && op->size != size && op->size != 0) + mismatch = 1; + } else { + /* Strict checking */ + if (op->size != size) + mismatch = 1; + } + } + } + + if (!mismatch) { + found = 1; + break; + } + } + + if (!found) { + /* Didn't find a matching one */ + /* FIXME: This needs to be more descriptive of certain reasons for a + * mismatch. E.g.: + * "mismatch in operand sizes" + * "operand size not specified" + * etc. This will probably require adding dummy error catchers in the + * insn list which are only looked at if we get here. + */ + Error(_("invalid combination of opcode and operands")); + return NULL; + } + + /* Copy what we can from info */ + d.ea = NULL; + d.imm = NULL; + d.opersize = info->opersize; + d.op_len = info->opcode_len; + d.op[0] = info->opcode[0]; + d.op[1] = info->opcode[1]; + d.op[2] = info->opcode[2]; + d.spare = info->spare; + d.im_len = 0; + d.im_sign = 0; + + /* Apply modifiers */ + if (info->modifiers & MOD_Op2Add) { + d.op[2] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Gap0) + mod_data >>= 8; + if (info->modifiers & MOD_Op1Add) { + d.op[1] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Gap1) + mod_data >>= 8; + if (info->modifiers & MOD_Op0Add) { + d.op[0] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_SpAdd) { + d.spare += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_OpSizeR) { + d.opersize = (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Imm8) { + d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF))); + d.im_len = 1; + /*mod_data >>= 8;*/ + } + + /* Go through operands and assign */ + if (operands) { + for(i = 0, op = ops_first(operands); op && inum_operands; + op = ops_next(op), i++) { + switch (info->operands[i] & OPA_MASK) { + case OPA_None: + /* Throw away the operand contents */ + switch (op->type) { + case INSN_OPERAND_REG: + case INSN_OPERAND_SEGREG: + break; + case INSN_OPERAND_MEMORY: + ea_delete(op->data.ea); + break; + case INSN_OPERAND_IMM: + expr_delete(op->data.val); + break; + } + break; + case OPA_EA: + switch (op->type) { + case INSN_OPERAND_REG: + d.ea = x86_ea_new_reg((unsigned char)op->data.reg); + break; + case INSN_OPERAND_SEGREG: + InternalError(_("invalid operand conversion")); + case INSN_OPERAND_MEMORY: + d.ea = op->data.ea; + if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) { + /* Special-case for MOV MemOffs instruction */ + x86_effaddr_data *ead = ea_get_data(d.ea); + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + } + break; + case INSN_OPERAND_IMM: + d.ea = x86_ea_new_imm(op->data.val, + size_lookup[(info->operands[i] & + OPS_MASK)>>OPS_SHIFT]); + break; + } + break; + case OPA_Imm: + if (op->type == INSN_OPERAND_IMM) { + d.imm = op->data.val; + d.im_len = size_lookup[(info->operands[i] & + OPS_MASK)>>OPS_SHIFT]; + } else + InternalError(_("invalid operand conversion")); + break; + case OPA_Spare: + if (op->type == INSN_OPERAND_REG || + op->type == INSN_OPERAND_SEGREG) + d.spare = (unsigned char)(op->data.reg&7); + else + InternalError(_("invalid operand conversion")); + break; + case OPA_Op0Add: + if (op->type == INSN_OPERAND_REG) + d.op[0] += (unsigned char)(op->data.reg&7); + else + InternalError(_("invalid operand conversion")); + break; + default: + InternalError(_("unknown operand action")); + } + } + } + + /* Create the bytecode and return it */ + return x86_bc_new_insn(&d); +} + + +#define YYCTYPE char +#define YYCURSOR id +#define YYLIMIT id +#define YYMARKER marker +#define YYFILL(n) + +/*!re2c + any = [\000-\377]; + A = [aA]; + B = [bB]; + C = [cC]; + D = [dD]; + E = [eE]; + F = [fF]; + G = [gG]; + H = [hH]; + I = [iI]; + J = [jJ]; + K = [kK]; + L = [lL]; + M = [mM]; + N = [nN]; + O = [oO]; + P = [pP]; + Q = [qQ]; + R = [rR]; + S = [sS]; + T = [tT]; + U = [uU]; + V = [vV]; + W = [wW]; + X = [xX]; + Y = [yY]; + Z = [zZ]; +*/ + +void +x86_switch_cpu(const char *id) +{ + const char *marker; + + /*!re2c + /* The standard CPU names /set/ cpu_enabled. */ + "8086" { + cpu_enabled = CPU_Priv; + return; + } + ("80" | I)? "186" { + cpu_enabled = CPU_186|CPU_Priv; + return; + } + ("80" | I)? "286" { + cpu_enabled = CPU_186|CPU_286|CPU_Priv; + return; + } + ("80" | I)? "386" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + ("80" | I)? "486" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM| + CPU_Prot|CPU_Priv; + return; + } + (I? "586") | (P E N T I U M) | (P "5") { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (P "2") | (P E N T I U M "-"? ("2" | (I I))) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot| + CPU_Priv; + return; + } + (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (I A "-"? "64") | (I T A N I U M) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE| + CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + K "6" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot| + CPU_Priv; + return; + } + A T H L O N { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (S L E D G E)? (H A M M E R) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE| + CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + + /* Features have "no" versions to disable them, and only set/reset the + * specific feature being changed. All other bits are left alone. + */ + F P U { cpu_enabled |= CPU_FPU; return; } + N O F P U { cpu_enabled &= ~CPU_FPU; return; } + M M X { cpu_enabled |= CPU_MMX; return; } + N O M M X { cpu_enabled &= ~CPU_MMX; return; } + S S E { cpu_enabled |= CPU_SSE; return; } + N O S S E { cpu_enabled &= ~CPU_SSE; return; } + S S E "2" { cpu_enabled |= CPU_SSE2; return; } + N O S S E "2" { cpu_enabled &= ~CPU_SSE2; return; } + "3" D N O W { cpu_enabled |= CPU_3DNow; return; } + N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; } + C Y R I X { cpu_enabled |= CPU_Cyrix; return; } + N O C Y R I X { cpu_enabled &= ~CPU_Cyrix; return; } + A M D { cpu_enabled |= CPU_AMD; return; } + N O A M D { cpu_enabled &= ~CPU_AMD; return; } + S M M { cpu_enabled |= CPU_SMM; return; } + N O S M M { cpu_enabled &= ~CPU_SMM; return; } + P R O T { cpu_enabled |= CPU_Prot; return; } + N O P R O T { cpu_enabled &= ~CPU_Prot; return; } + U N D O C { cpu_enabled |= CPU_Undoc; return; } + N O U N D O C { cpu_enabled &= ~CPU_Undoc; return; } + O B S { cpu_enabled |= CPU_Obs; return; } + N O O B S { cpu_enabled &= ~CPU_Obs; return; } + P R I V { cpu_enabled |= CPU_Priv; return; } + N O P R I V { cpu_enabled &= ~CPU_Priv; return; } + + /* catchalls */ + [A-Za-z0-9]+ { + Warning(_("unrecognized CPU identifier `%s'"), id); + return; + } + any { + Warning(_("unrecognized CPU identifier `%s'"), id); + return; + } + */ +} + +arch_check_id_retval +x86_check_identifier(unsigned long data[4], const char *id) +{ + const char *oid = id; + const char *marker; + /*!re2c + /* target modifiers */ + N E A R { + data[0] = X86_NEAR; + return ARCH_CHECK_ID_TARGETMOD; + } + S H O R T { + data[0] = X86_SHORT; + return ARCH_CHECK_ID_TARGETMOD; + } + F A R { + data[0] = X86_FAR; + return ARCH_CHECK_ID_TARGETMOD; + } + + /* operand size overrides */ + O "16" { + data[0] = X86_OPERSIZE; + data[1] = 16; + return ARCH_CHECK_ID_PREFIX; + } + O "32" { + data[0] = X86_OPERSIZE; + data[1] = 32; + return ARCH_CHECK_ID_PREFIX; + } + /* address size overrides */ + A "16" { + data[0] = X86_ADDRSIZE; + data[1] = 16; + return ARCH_CHECK_ID_PREFIX; + } + A "32" { + data[0] = X86_ADDRSIZE; + data[1] = 32; + return ARCH_CHECK_ID_PREFIX; + } + + /* instruction prefixes */ + L O C K { + data[0] = X86_LOCKREP; + data[1] = 0xF0; + return ARCH_CHECK_ID_PREFIX; + } + R E P N E { + data[0] = X86_LOCKREP; + data[1] = 0xF2; + return ARCH_CHECK_ID_PREFIX; + } + R E P N Z { + data[0] = X86_LOCKREP; + data[1] = 0xF2; + return ARCH_CHECK_ID_PREFIX; + } + R E P { + data[0] = X86_LOCKREP; + data[1] = 0xF3; + return ARCH_CHECK_ID_PREFIX; + } + R E P E { + data[0] = X86_LOCKREP; + data[1] = 0xF4; + return ARCH_CHECK_ID_PREFIX; + } + R E P Z { + data[0] = X86_LOCKREP; + data[1] = 0xF4; + return ARCH_CHECK_ID_PREFIX; + } + + /* control, debug, and test registers */ + C R [02-4] { + data[0] = X86_CRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + D R [0-7] { + data[0] = X86_DRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + T R [0-7] { + data[0] = X86_TRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + + /* floating point, MMX, and SSE/SSE2 registers */ + S T [0-7] { + data[0] = X86_FPUREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + M M [0-7] { + data[0] = X86_MMXREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + X M M [0-7] { + data[0] = X86_XMMREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + + /* integer registers */ + E A X { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; } + E C X { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; } + E D X { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; } + E B X { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; } + E S P { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; } + E B P { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; } + E S I { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; } + E D I { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; } + + A X { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; } + C X { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; } + D X { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; } + B X { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; } + S P { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; } + B P { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; } + S I { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; } + D I { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; } + + A L { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; } + C L { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; } + D L { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; } + B L { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; } + A H { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; } + C H { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; } + D H { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; } + B H { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; } + + /* segment registers */ + E S { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; } + C S { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; } + S S { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; } + D S { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; } + F S { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; } + G S { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; } + + /* instructions */ + + /* Move */ + M O V { RET_INSN(mov, 0, CPU_Any); } + /* Move with sign/zero extend */ + M O V S X { RET_INSN(movszx, 0xBE, CPU_386); } + M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); } + /* Push instructions */ + /* P U S H */ + P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); } + P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); } + P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); } + /* Pop instructions */ + /* P O P */ + P O P A { RET_INSN(onebyte, 0x0061, CPU_186); } + P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); } + P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); } + /* Exchange */ + /* X C H G */ + /* In/out from ports */ + /* I N */ + /* O U T */ + /* Load effective address */ + /* L E A */ + /* Load segment registers from memory */ + /* L D S */ + /* L E S */ + /* L F S */ + /* L G S */ + /* L S S */ + /* Flags register instructions */ + C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); } + C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); } + C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); } + C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); } + C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); } + L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); } + S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); } + P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); } + P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); } + P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); } + P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); } + P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); } + P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); } + S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); } + S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); } + S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); } + /* Arithmetic */ + /* A D D */ + /* I N C */ + /* S U B */ + /* D E C */ + /* S B B */ + /* C M P */ + /* T E S T */ + /* A N D */ + /* O R */ + /* X O R */ + /* A D C */ + /* N E G */ + /* N O T */ + A A A { RET_INSN(onebyte, 0x0037, CPU_Any); } + A A S { RET_INSN(onebyte, 0x003F, CPU_Any); } + D A A { RET_INSN(onebyte, 0x0027, CPU_Any); } + D A S { RET_INSN(onebyte, 0x002F, CPU_Any); } + /* A A D */ + /* A A M */ + /* Conversion instructions */ + C B W { RET_INSN(onebyte, 0x1098, CPU_Any); } + C W D E { RET_INSN(onebyte, 0x2098, CPU_386); } + C W D { RET_INSN(onebyte, 0x1099, CPU_Any); } + C D Q { RET_INSN(onebyte, 0x2099, CPU_386); } + /* Multiplication and division */ + /* M U L */ + /* I M U L */ + /* D I V */ + /* I D I V */ + /* Shifts */ + /* R O L */ + /* R O R */ + /* R C L */ + /* R C R */ + /* S A L */ + /* S H L */ + /* S H R */ + /* S A R */ + /* S H L D */ + /* S H R D */ + /* Control transfer instructions (unconditional) */ + /* C A L L */ + /* J M P */ + R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); } + /* R E T N */ + /* R E T F */ + /* E N T E R */ + L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); } + /* Conditional jumps */ + /* J O */ + /* J N O */ + /* J B */ + /* JC */ + /* J N A E */ + /* J N B */ + /* J N C */ + /* J A E */ + /* J E */ + /* J Z */ + /* J N E */ + /* J N Z */ + /* J B E */ + /* J N A */ + /* J N B E */ + /* J A */ + /* J S */ + /* J N S */ + /* J P */ + /* J P E */ + /* J N P */ + /* J P O */ + /* J L */ + /* J N G E */ + /* J N L */ + /* J G E */ + /* J L E */ + /* J N G */ + /* J N L E */ + /* J G */ + /* J C X Z */ + /* J E C X Z */ + /* Loop instructions */ + /* L O O P */ + /* L O O P Z */ + /* L O O P E */ + /* L O O P N Z */ + /* L O O P N E */ + /* Set byte on flag instructions */ + /* S E T O */ + /* S E T N O */ + /* S E T B */ + /* S E T C */ + /* S E T N A E */ + /* S E T N B */ + /* S E T N C */ + /* S E T A E */ + /* S E T E */ + /* S E T Z */ + /* S E T N E */ + /* S E T N Z */ + /* S E T B E */ + /* S E T N A */ + /* S E T N B E */ + /* S E T A */ + /* S E T S */ + /* S E T N S */ + /* S E T P */ + /* S E T P E */ + /* S E T N P */ + /* S E T P O */ + /* S E T L */ + /* S E T N G E */ + /* S E T N L */ + /* S E T G E */ + /* S E T L E */ + /* S E T N G */ + /* S E T N L E */ + /* S E T G */ + /* String instructions. */ + C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); } + C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); } + /* C M P S D */ + I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); } + I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); } + I N S D { RET_INSN(onebyte, 0x206D, CPU_386); } + O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); } + O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); } + O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); } + L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); } + L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); } + L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); } + M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); } + M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); } + /* M O V S D */ + S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); } + S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); } + S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); } + S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); } + S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); } + S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); } + X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); } + /* Bit manipulation */ + /* B S F */ + /* B S R */ + /* B T */ + /* B T C */ + /* B T R */ + /* B T S */ + /* Interrupts and operating system instructions */ + /* I N T */ + I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); } + I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); } + I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); } + I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); } + I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); } + I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); } + R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); } + /* B O U N D */ + H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); } + N O P { RET_INSN(onebyte, 0x0090, CPU_Any); } + /* Protection control */ + /* A R P L */ + /* L A R */ + L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); } + L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); } + /* L L D T */ + /* L M S W */ + /* L S L */ + /* L T R */ + S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); } + S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); } + /* S L D T */ + /* S M S W */ + /* S T R */ + /* V E R R */ + /* V E R W */ + /* Floating point instructions */ + /* F L D */ + /* F I L D */ + /* F B L D */ + /* F S T */ + /* F I S T */ + /* F S T P */ + /* F I S T P */ + /* F B S T P */ + /* F X C H */ + /* F C O M */ + /* F I C O M */ + /* F C O M P */ + /* F I C O M P */ + F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); } + /* F U C O M */ + /* F U C O M P */ + F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); } + F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); } + F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); } + F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); } + F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); } + F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); } + F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); } + F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); } + F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); } + F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); } + /* F A D D */ + /* F A D D P */ + /* F I A D D */ + /* F S U B */ + /* F I S U B */ + /* F S U B P */ + /* F S U B R */ + /* F I S U B R */ + /* F S U B R P */ + /* F M U L */ + /* F I M U L */ + /* F M U L P */ + /* F D I V */ + /* F I D I V */ + /* F D I V P */ + /* F D I V R */ + /* F I D I V R */ + /* F D I V R P */ + F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); } + F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); } + F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); } + F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); } + F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); } + F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); } + F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); } + F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); } + F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); } + F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); } + F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); } + F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); } + F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); } + F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); } + F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); } + F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); } + F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); } + F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); } + F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); } + F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); } + /* F L D C W */ + /* F N S T C W */ + /* F S T C W */ + /* F N S T S W */ + /* F S T S W */ + F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); } + F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); } + F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); } + F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); } + F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); } + F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); } + F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); } + F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); } + /* F F R E E */ + /* F F R E E P */ + F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); } + F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); } + /* Prefixes (should the others be here too? should wait be a prefix? */ + W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); } + /* 486 extensions */ + /* B S W A P */ + /* X A D D */ + /* C M P X C H G */ + /* C M P X C H G 4 8 6 */ + I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); } + W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); } + I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); } + /* 586+ and late 486 extensions */ + C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); } + /* Pentium extensions */ + W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); } + R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); } + R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); } + /* C M P X C H G 8 B */ + /* Pentium II/Pentium Pro extensions */ + S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); } + S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); } + F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); } + F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); } + R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); } + U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); } + U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); } + /* C M O V */ + /* F C M O V */ + /* F C O M I */ + /* F U C O M I */ + /* F C O M I P */ + /* F U C O M I P */ + /* Pentium4 extensions */ + /* M O V N T I */ + /* C L F L U S H */ + L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); } + M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); } + P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); } + /* MMX/SSE2 instructions */ + E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); } + /* PIII (Katmai) new instructions / SIMD instructions */ + /* ... */ + P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); } + P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); } + P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); } + P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); } + /* ... */ + S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); } + /* ... */ + /* SSE2 instructions */ + /* AMD 3DNow! instructions */ + P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); } + P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); } + F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); } + /* ... */ + /* AMD extensions */ + S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); } + S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); } + /* Cyrix MMX instructions */ + /* Cyrix extensions */ + R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); } + /* R S D C */ + /* R S L D T */ + /* R S T S */ + /* S V D C */ + /* S V L D T */ + /* S V T S */ + S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); } + S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); } + W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); } + /* Obsolete/undocumented instructions */ + F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); } + /* I B T S */ + L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); } + L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); } + S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); } + S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); } + /* U M O V */ + /* X B T S */ + + + /* catchalls */ + [A-Za-z0-9]+ { + return ARCH_CHECK_ID_NONE; + } + any { + return ARCH_CHECK_ID_NONE; + } + */ +} diff --git a/modules/parsers/nasm/Makefile.inc b/modules/parsers/nasm/Makefile.inc index dbe7010d..b2a24282 100644 --- a/modules/parsers/nasm/Makefile.inc +++ b/modules/parsers/nasm/Makefile.inc @@ -3,57 +3,22 @@ YASMPARSERFILES += \ src/parsers/nasm/nasm-parser.c \ src/parsers/nasm/nasm-defs.h \ - nasm-bison.y \ + src/parsers/nasm/nasm-bison.y \ nasm-bison.h \ - nasm-token.l - -if DEV - -nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl - $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y - -else - -nasm-token.l: $(srcdir)/nasm-token.l - @echo Warning: Not generating nasm-token.l from nasm-token.l.in. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-token.l . -nasm-token.c: $(srcdir)/nasm-token.c - @echo Warning: Not generating nasm-token.c from nasm-token.l. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-token.c . -nasm-bison.y: $(srcdir)/nasm-bison.y - @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.y . -nasm-bison.c: $(srcdir)/nasm-bison.c - @echo Warning: Not generating nasm-bison.c from nasm-bison.y. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.c . -nasm-bison.h: $(srcdir)/nasm-bison.h - @echo Warning: Not generating nasm-bison.h from nasm-bison.y. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.h . - -endif + nasm-token.c -noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl +nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl + re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@ BUILT_SOURCES += \ - nasm-bison.y \ nasm-bison.c \ nasm-bison.h \ - nasm-token.l \ nasm-token.c CLEANFILES += \ - nasm-bison.y \ nasm-bison.c \ nasm-bison.h \ - nasm-token.l \ nasm-token.c EXTRA_DIST += \ - src/parsers/nasm/token.l.in \ - src/parsers/nasm/bison.y.in \ - src/parsers/nasm/gen_instr.pl + src/parsers/nasm/nasm-token.re diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in deleted file mode 100644 index b3f0533a..00000000 --- a/modules/parsers/nasm/bison.y.in +++ /dev/null @@ -1,791 +0,0 @@ -/* - * NASM-compatible bison parser - * - * Copyright (C) 2001 Peter Johnson, Michael Urman - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -%{ -#include "util.h" -RCSID("$IdPath$"); - -#ifdef STDC_HEADERS -# include -#endif - -#include "bitvect.h" - -#include "globals.h" -#include "errwarn.h" -#include "intnum.h" -#include "floatnum.h" -#include "expr.h" -#include "symrec.h" - -#include "bytecode.h" -#include "section.h" -#include "objfmt.h" - -#include "arch.h" - -#include "src/parsers/nasm/nasm-defs.h" - -void init_table(void); -extern int nasm_parser_lex(void); -void nasm_parser_error(const char *); -static void nasm_parser_directive(const char *name, - valparamhead *valparams, - /*@null@*/ valparamhead *objext_valparams); - -extern objfmt *nasm_parser_objfmt; -extern sectionhead nasm_parser_sections; -extern section *nasm_parser_cur_section; -extern char *nasm_parser_locallabel_base; - -static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL; -static bytecode *nasm_parser_temp_bc; - -/* additional data declarations (dynamically generated) */ -/* @DATADECLS@ */ - -/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/ -%} - -%union { - unsigned int int_info; - char *str_val; - intnum *intn; - floatnum *flt; - symrec *sym; - unsigned char groupdata[5]; - effaddr *ea; - expr *exp; - immval *im_val; - x86_targetval tgt_val; - datavalhead datahead; - dataval *data; - bytecode *bc; - valparamhead dir_valparams; - valparam *dir_valparam; -} - -%token INTNUM -%token FLTNUM -%token DIRECTIVE_NAME STRING FILENAME -%token BYTE WORD DWORD QWORD TWORD DQWORD -%token DECLARE_DATA -%token RESERVE_SPACE -%token INCBIN EQU TIMES -%token SEG WRT NEAR SHORT FAR NOSPLIT ORG -%token TO -%token LOCK REPNZ REP REPZ -%token OPERSIZE ADDRSIZE -%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG -%token REG_EAX REG_ECX REG_EDX REG_EBX -%token REG_ESP REG_EBP REG_ESI REG_EDI -%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI -%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH -%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS -%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID -%token ID LOCAL_ID SPECIAL_ID -%token LINE - -/* instruction tokens (dynamically generated) */ -/* @TOKENS@ */ - -/* @TYPES@ */ - -%type line lineexp exp instr instrbase - -%type reg_eax reg_ecx -%type reg_ax reg_cx reg_dx -%type reg_al reg_cl -%type reg_es reg_cs reg_ss reg_ds reg_fs reg_gs -%type fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg -%type mem memaddr memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 -%type imm imm8x imm16x imm32x imm8 imm16 imm32 -%type expr expr_no_string memexpr direxpr -%type explabel -%type label_id -%type target -%type dataval -%type datavals -%type directive_valparams -%type directive_valparam - -%left '|' -%left '^' -%left '&' -%left LEFT_OP RIGHT_OP -%left '-' '+' -%left '*' '/' SIGNDIV '%' SIGNMOD -%nonassoc UNARYOP - -%% -input: /* empty */ - | input line { - nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), - $2); - if (nasm_parser_temp_bc) - nasm_parser_prev_bc = nasm_parser_temp_bc; - line_index++; - } -; - -line: '\n' { $$ = (bytecode *)NULL; } - | lineexp '\n' - | LINE INTNUM '+' INTNUM FILENAME '\n' { - /* %line indicates the line number of the *next* line, so subtract out - * the increment when setting the line number. - */ - line_set($5, intnum_get_uint($2)-intnum_get_uint($4), - intnum_get_uint($4)); - intnum_delete($2); - intnum_delete($4); - xfree($5); - $$ = (bytecode *)NULL; - } - | directive '\n' { $$ = (bytecode *)NULL; } - | error '\n' { - Error(_("label or instruction expected at start of line")); - $$ = (bytecode *)NULL; - yyerrok; - } -; - -lineexp: exp - | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } - | label { $$ = (bytecode *)NULL; } - | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } - | label_id EQU expr { - symrec_define_equ($1, $3); - xfree($1); - $$ = (bytecode *)NULL; - } -; - -exp: instr - | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } - | INCBIN STRING { $$ = bc_new_incbin($2, NULL, NULL); } - | INCBIN STRING ',' expr { $$ = bc_new_incbin($2, $4, NULL); } - | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); } -; - -datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } - | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } -; - -dataval: expr_no_string { $$ = dv_new_expr($1); } - | STRING { $$ = dv_new_string($1); } - | error { - Error(_("expression syntax error")); - $$ = (dataval *)NULL; - } -; - -label: label_id { - symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc, - 1); - xfree($1); - } - | label_id ':' { - symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc, - 1); - xfree($1); - } -; - -label_id: ID { - $$ = $1; - if (nasm_parser_locallabel_base) - xfree(nasm_parser_locallabel_base); - nasm_parser_locallabel_base = xstrdup($1); - } - | SPECIAL_ID - | LOCAL_ID -; - -/* directives */ -directive: '[' DIRECTIVE_NAME directive_val ']' { - xfree($2); - } - | '[' DIRECTIVE_NAME error ']' { - Error(_("invalid arguments to [%s]"), $2); - xfree($2); - } -; - - /* $0 is the DIRECTIVE_NAME */ - /* After : is (optional) object-format specific extension */ -directive_val: directive_valparams { - nasm_parser_directive($0, &$1, NULL); - } - | directive_valparams ':' directive_valparams { - nasm_parser_directive($0, &$1, &$3); - } -; - -directive_valparams: directive_valparam { - vps_initialize(&$$); - vps_append(&$$, $1); - } - | directive_valparams directive_valparam { - vps_append(&$1, $2); - $$ = $1; - } -; - -directive_valparam: direxpr { - /* If direxpr is just an ID, put it in val and delete the expr */ - const /*@null@*/ symrec *vp_symrec; - if ((vp_symrec = expr_get_symrec(&$1, 0))) { - vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL); - expr_delete($1); - } else - vp_new($$, NULL, $1); - } - | ID '=' direxpr { vp_new($$, $1, $3); } -; - -/* register groupings */ -fpureg: ST0 - | FPUREG_NOTST0 -; - -reg_eax: REG_EAX - | DWORD reg_eax { $$ = $2; } -; - -reg_ecx: REG_ECX - | DWORD reg_ecx { $$ = $2; } -; - -rawreg32: REG_EAX - | REG_ECX - | REG_EDX - | REG_EBX - | REG_ESP - | REG_EBP - | REG_ESI - | REG_EDI -; - -reg32: rawreg32 - | DWORD reg32 { $$ = $2; } -; - -reg_ax: REG_AX - | WORD reg_ax { $$ = $2; } -; - -reg_cx: REG_CX - | WORD reg_cx { $$ = $2; } -; - -reg_dx: REG_DX - | WORD reg_dx { $$ = $2; } -; - -rawreg16: REG_AX - | REG_CX - | REG_DX - | REG_BX - | REG_SP - | REG_BP - | REG_SI - | REG_DI -; - -reg16: rawreg16 - | WORD reg16 { $$ = $2; } -; - -reg_al: REG_AL - | BYTE reg_al { $$ = $2; } -; - -reg_cl: REG_CL - | BYTE reg_cl { $$ = $2; } -; - -reg8: REG_AL - | REG_CL - | REG_DL - | REG_BL - | REG_AH - | REG_CH - | REG_DH - | REG_BH - | BYTE reg8 { $$ = $2; } -; - -reg_es: REG_ES - | WORD reg_es { $$ = $2; } -; - -reg_ss: REG_SS - | WORD reg_ss { $$ = $2; } -; - -reg_ds: REG_DS - | WORD reg_ds { $$ = $2; } -; - -reg_fs: REG_FS - | WORD reg_fs { $$ = $2; } -; - -reg_gs: REG_GS - | WORD reg_gs { $$ = $2; } -; - -reg_cs: REG_CS - | WORD reg_cs { $$ = $2; } -; - -segreg: REG_ES - | REG_SS - | REG_DS - | REG_FS - | REG_GS - | REG_CS - | WORD segreg { $$ = $2; } -; - -/* memory addresses */ -/* FIXME: Is there any way this redundancy can be eliminated? This is almost - * identical to expr: the only difference is that FLTNUM is replaced by - * rawreg16 and rawreg32. - * - * Note that the two can't be just combined because of conflicts caused by imm - * vs. reg. I don't see a simple solution right now to this. - * - * We don't attempt to check memory expressions for validity here. - */ -memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); } - | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' memexpr %prec UNARYOP { $$ = $2; } - | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' memexpr ')' { $$ = $2; } - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } - | error { Error(_("invalid effective address")); } -; - -memaddr: memexpr { - $$ = x86_ea_new_expr($1); - x86_ea_set_segment($$, 0); - } - | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } - | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } - | WORD memaddr { $$ = $2; ea_set_len($$, 2); } - | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } - | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } -; - -mem: '[' memaddr ']' { $$ = $2; } -; - -/* explicit memory */ -mem8x: BYTE mem { $$ = $2; } - | BYTE mem8x { $$ = $2; } -; -mem16x: WORD mem { $$ = $2; } - | WORD mem16x { $$ = $2; } -; -mem32x: DWORD mem { $$ = $2; } - | DWORD mem32x { $$ = $2; } -; -mem64x: QWORD mem { $$ = $2; } - | QWORD mem64x { $$ = $2; } -; -mem80x: TWORD mem { $$ = $2; } - | TWORD mem80x { $$ = $2; } -; -mem128x: DQWORD mem { $$ = $2; } - | DQWORD mem128x { $$ = $2; } -; - -/* FAR memory, for jmp and call */ -memfar: FAR mem { $$ = $2; } - | FAR memfar { $$ = $2; } -; - -/* implicit memory */ -mem8: mem - | mem8x -; -mem16: mem - | mem16x -; -mem32: mem - | mem32x -; -mem64: mem - | mem64x -; -mem80: mem - | mem80x -; -mem128: mem - | mem128x -; - -/* both 16 and 32 bit memory */ -mem1632: mem - | mem16x - | mem32x -; - -/* explicit register or memory */ -rm8x: reg8 { $$ = x86_ea_new_reg($1); } - | mem8x -; -rm16x: reg16 { $$ = x86_ea_new_reg($1); } - | mem16x -; -rm32x: reg32 { $$ = x86_ea_new_reg($1); } - | mem32x -; -/* not needed: -rm64x: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64x -; -rm128x: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128x -; -*/ - -/* implicit register or memory */ -rm8: reg8 { $$ = x86_ea_new_reg($1); } - | mem8 -; -rm16: reg16 { $$ = x86_ea_new_reg($1); } - | mem16 -; -rm32: reg32 { $$ = x86_ea_new_reg($1); } - | mem32 -; -rm64: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64 -; -rm128: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128 -; - -/* immediate values */ -imm: expr { $$ = imm_new_expr($1); } -; - -/* explicit immediates */ -imm8x: BYTE imm { $$ = $2; } -; -imm16x: WORD imm { $$ = $2; } -; -imm32x: DWORD imm { $$ = $2; } -; - -/* implicit immediates */ -imm8: imm - | imm8x -; -imm16: imm - | imm16x -; -imm32: imm - | imm32x -; - -/* jump targets */ -target: expr { - $$.val = $1; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); - } - | SHORT target { - $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); - } - | NEAR target { - $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); - } -; - -/* expression trees */ - -/* expr w/o FLTNUM and unary + and -, for use in directives */ -direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | ID { - $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0))); - xfree($1); - } - | direxpr '|' direxpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | direxpr '^' direxpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - | direxpr '&' direxpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - | direxpr LEFT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | direxpr '+' direxpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | direxpr '-' direxpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | direxpr '*' direxpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | direxpr '/' direxpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | direxpr SIGNDIV direxpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | direxpr '%' direxpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | direxpr SIGNMOD direxpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' direxpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' direxpr ')' { $$ = $2; } -; - -expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' expr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | expr '&' expr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| expr '==' expr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| expr '>' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| expr '<' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| expr '>=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| expr '<=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| expr '!=' expr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | expr LEFT_OP expr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | expr RIGHT_OP expr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | expr '+' expr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | expr '-' expr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | expr '*' expr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | expr '/' expr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | expr SIGNDIV expr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | expr '%' expr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | expr SIGNMOD expr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' expr %prec UNARYOP { $$ = $2; } - | '-' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' expr ')' { $$ = $2; } -; - -expr: expr_no_string - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } -; - -explabel: ID { - $$ = symrec_use($1); - xfree($1); - } - | SPECIAL_ID { - $$ = symrec_use($1); - xfree($1); - } - | LOCAL_ID { - $$ = symrec_use($1); - xfree($1); - } - | '$' { - $$ = symrec_define_label("$", nasm_parser_cur_section, - nasm_parser_prev_bc, 0); - } - | START_SECTION_ID { - if (section_is_absolute(nasm_parser_cur_section)) { - Error(_("`$$' is not valid within an ABSOLUTE section")); - YYERROR; - } else { - const char *ss_name = section_get_name(nasm_parser_cur_section); - assert(ss_name != NULL); - $$ = symrec_use(ss_name); - } - } -; - -instr: /* empty */ { - idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL; - $$ = x86_bc_new_insn(&idata); - } - | instrbase - | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } - | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } - | REG_CS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); - } - | REG_SS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); - } - | REG_DS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); - } - | REG_ES instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); - } - | REG_FS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); - } - | REG_GS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); - } - | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } - | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } - | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } - | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } -; - -/* instruction grammars (dynamically generated) */ -/* @INSTRUCTIONS@ */ - -%% -/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/ - -static void -nasm_parser_directive(const char *name, valparamhead *valparams, - valparamhead *objext_valparams) -{ - valparam *vp, *vp2; - const intnum *intn; - long lval; - - assert(cur_objfmt != NULL); - - /* Handle (mostly) output-format independent directives here */ - if (strcasecmp(name, "extern") == 0) { - vp = vps_first(valparams); - if (vp->val) - symrec_declare(vp->val, SYM_EXTERN, - cur_objfmt->extern_data_new(vp->val, - objext_valparams)); - else - Error(_("invalid argument to [%s]"), "EXTERN"); - } else if (strcasecmp(name, "global") == 0) { - vp = vps_first(valparams); - if (vp->val) - symrec_declare(vp->val, SYM_GLOBAL, - cur_objfmt->global_data_new(vp->val, - objext_valparams)); - else - Error(_("invalid argument to [%s]"), "GLOBAL"); - } else if (strcasecmp(name, "common") == 0) { - vp = vps_first(valparams); - if (vp->val) { - vp2 = vps_next(vp); - if (!vp2 || (!vp2->val && !vp2->param)) - Error(_("no size specified in %s declaration"), "COMMON"); - else { - if (vp2->val) - symrec_declare(vp->val, SYM_COMMON, - cur_objfmt->common_data_new(vp->val, - expr_new_ident(ExprSym(symrec_use(vp2->val))), - objext_valparams)); - else if (vp2->param) { - symrec_declare(vp->val, SYM_COMMON, - cur_objfmt->common_data_new(vp->val, vp2->param, - objext_valparams)); - vp2->param = NULL; - } - } - } else - Error(_("invalid argument to [%s]"), "COMMON"); - } else if (strcasecmp(name, "section") == 0 || - strcasecmp(name, "segment") == 0) { - section *new_section = - cur_objfmt->sections_switch(&nasm_parser_sections, valparams, - objext_valparams); - if (new_section) { - nasm_parser_cur_section = new_section; - nasm_parser_prev_bc = (bytecode *)NULL; - } else - Error(_("invalid argument to [%s]"), "SECTION"); - } else if (strcasecmp(name, "absolute") == 0) { - /* it can be just an ID or a complete expression, so handle both. */ - vp = vps_first(valparams); - if (vp->val) - nasm_parser_cur_section = - sections_switch_absolute(&nasm_parser_sections, - expr_new_ident(ExprSym(symrec_use(vp->val)))); - else if (vp->param) { - nasm_parser_cur_section = - sections_switch_absolute(&nasm_parser_sections, vp->param); - vp->param = NULL; - } - nasm_parser_prev_bc = (bytecode *)NULL; - } else if (strcasecmp(name, "bits") == 0) { - if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && - (intn = expr_get_intnum(&vp->param)) != NULL && - (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) - x86_mode_bits = (unsigned char)lval; - else - Error(_("invalid argument to [%s]"), "BITS"); - } else if (cur_objfmt->directive(name, valparams, objext_valparams, - &nasm_parser_sections)) { - Error(_("unrecognized directive [%s]"), name); - } - - vps_delete(valparams); - if (objext_valparams) - vps_delete(objext_valparams); -} - -void -nasm_parser_error(const char *s) -{ - ParserError(s); -} - diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl deleted file mode 100755 index b0599d93..00000000 --- a/modules/parsers/nasm/gen_instr.pl +++ /dev/null @@ -1,889 +0,0 @@ -#!/usr/bin/perl -w -# $IdPath$ -# Generates NASM-compatible bison.y and token.l from instrs.dat. -# -# Copyright (C) 2001 Michael Urman -# -# This file is part of YASM. -# -# YASM is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# YASM is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -use strict; -use Getopt::Long; -my $VERSION = "0.0.1"; - -# useful constants for instruction arrays -# common -use constant INST => 0; -use constant OPERANDS => 1; -# general format -use constant OPSIZE => 2; -use constant OPCODE => 3; -use constant EFFADDR => 4; -use constant IMM => 5; -use constant CPU => 6; -# relative target format -use constant ADSIZE => 2; -use constant SHORTOPCODE => 3; -use constant NEAROPCODE => 4; -use constant SHORTCPU => 5; -use constant NEARCPU => 6; - -use constant TOO_MANY_ERRORS => 20; - -# default options -my $instrfile = 'instrs.dat'; -my $tokenfile = 'token.l'; -my $tokensource; -my $grammarfile = 'bison.y'; -my $grammarsource; -my $showversion; -my $showusage; -my $dry_run; - -# allow overrides -my $gotopts = GetOptions ( 'input=s' => \$instrfile, - 'token=s' => \$tokenfile, - 'sourcetoken=s' => \$tokensource, - 'grammar=s' => \$grammarfile, - 'sourcegrammar=s' => \$grammarsource, - 'version' => \$showversion, - 'n|dry-run' => \$dry_run, - 'help|usage' => \$showusage, - ); - -&showusage and exit 1 unless $gotopts; -&showversion if $showversion; -&showusage if $showusage; -exit 0 if $showversion or $showusage; - -# valid values for instrs.dat fields -my $valid_regs = join '|', qw( - reg_al reg_ah reg_ax reg_eax - reg_bl reg_bh reg_bx reg_ebx - reg_cl reg_ch reg_cx reg_ecx - reg_dl reg_dh reg_dx reg_edx - reg_si reg_esi reg_di reg_edi - reg_bp reg_ebp - reg_cs reg_ds reg_es reg_fs reg_gs reg_ss - ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG - fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm - imm8 imm16 imm32 imm64 imm80 imm128 - imm8x imm16x imm32x imm64x imm80x imm128x - rm8 rm16 rm32 rm1632 rm64 rm80 rm128 - rm8x rm16x rm32x rm1632x rm64x rm80x rm128x - reg8 reg16 reg32 reg1632 reg64 reg80 reg128 - reg8x reg16x reg32x reg1632x reg64x reg80x reg128x - mem8 mem16 mem32 mem1632 mem64 mem80 mem128 - mem8x mem16x mem32x mem1632x mem64x mem80x mem128x - target memfar -); -my $valid_opcodes = join '|', qw( - [0-9A-F]{2} - \\$0\\.\\d -); -my $valid_cpus = join '|', qw( - 8086 186 286 386 486 P4 P5 P6 - FPU MMX KATMAI SSE SSE2 - AMD ATHLON 3DNOW - SMM - CYRIX - UNDOC OBS PRIV PROT - @0 @1 -); - -# track errors and warnings rather than die'ing on the first. -my (@messages, $errcount, $warncount); -sub die_with_errors (@) -{ - foreach (@_) { print; }; - if ($errcount) - { - print "Dying with errors\n"; - exit -1; - } -} - -my ($groups) = &read_instructions ($instrfile); - -die_with_errors @messages; - -exit 0 if $dry_run; # done with simple verification, so exit - -unless ($dry_run) -{ - &output_lex ($tokenfile, $tokensource, $groups); - &output_yacc ($grammarfile, $grammarsource, $groups); -} - -# print version for --version, etc. -sub showversion -{ - print "YASM gen_instr.pl $VERSION\n"; -} - -# print usage information for --help, etc. -sub showusage -{ - print <<"EOF"; -Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile] - -i, --input instructions file (default: $instrfile) - -t, --token token output file (default: $tokenfile) - -st, --sourcetoken token input file (default: $tokenfile.in) - -g, --grammar grammar output file (default: $grammarfile) - -sg, --sourcegrammar grammar input file (default: $grammarfile.in) - -v, --version show version and exit - -h, --help, --usage show this message and exit - -n, --dry-run verify input file without writing output files -EOF -} - -# read in instructions, and verify they're valid (well, mostly) -sub read_instructions ($) -{ - my $instrfile = shift || die; - open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; - my %instr; - my %groups; - - sub add_group_rule ($$$$) - { - my ($inst, $args, $groups, $instrfile) = splice @_; - - # slide $0.\d down by one. - # i still say changing instrs.dat would be better ;) - $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg; - - # detect relative target format by looking for "target" in args - if($args =~ m/target/oi) - { - my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) = - split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; - die "Invalid Address Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Short Opcode\n" - if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; - die "Invalid Near Opcode\n" - if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; - die "Invalid Short CPU\n" - if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - die "Invalid Near CPU\n" - if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu]; - } else { - my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; - die "Invalid Operation Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; - die "Invalid Effective Address\n" - if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; - die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; - } - } - - sub add_group_member ($$$$$) - { - my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_; - - my ($inst, $group) = split /!/, $handle; - my ($args, $cpu) = split /\t+/, $fullargs; - eval { - die "Invalid instruction name\n" - if $inst !~ m/^\w+$/o; - die "Invalid group name\n" - if $group !~ m/^\w+$/o; - die "Invalid CPU\n" - if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n" - unless exists $groups->{$group}; - $warncount++; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - # only allow multiple instances of instructions that aren't of a group - push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++ - if exists $instr->{$inst} and not exists $groups->{$inst}; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu]; - $instr->{$inst} = 1; - } - - while () - { - chomp; - next if /^\s*(?:;.*)$/; - - my ($handle, $args) = split /\t+/, $_, 2; - - # pseudo hack to handle original style instructions (no group) - if ($handle =~ m/^\w+$/) - { - # TODO: this has some long ranging effects, as the eventual - # bison rules get tagged when they don't need - # to, etc. Fix this sometime. - add_group_rule ("!$handle", $args, \%groups, $instrfile); - add_group_member ("$handle!$handle", "", \%groups, \%instr, - $instrfile); - } - elsif ($handle =~ m/^!\w+$/) - { - add_group_rule ($handle, $args, \%groups, $instrfile); - } - elsif ($handle =~ m/^\w+!\w+$/) - { - add_group_member ($handle, $args, \%groups, \%instr, - $instrfile); - } - # TODO: consider if this is necessary: Pete? - # (add_group_member_synonym is -not- implemented) - #elsif ($handle =~ m/^:\w+$/) - #{ - # add_group_member_synonym ($handle, $args); - #} - } - close INPUT; - return (\%groups); -} - -sub output_lex ($@) -{ - my $tokenfile = shift or die; - my $tokensource = shift; - $tokensource ||= "$tokenfile.in"; - my $groups = shift or die; - - open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; - open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; - while () - { - # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content - if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) - { - foreach my $grp (sort keys %$groups) - { - my %printed; - my $group = $grp; $group =~ s/^!//; - - foreach my $grp (@{$groups->{$grp}{members}}) - { - unless (exists $printed{$grp->[0]}) - { - $printed{$grp->[0]} = 1; - my @groupdata; - if ($grp->[2]) - { - @groupdata = split ",", $grp->[2]; - for (my $i=0; $i < @groupdata; ++$i) - { - $groupdata[$i] =~ s/nil/0/; - $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];"; - } - $groupdata[-1] .= "\n\t "; - } - printf TOKEN "%-12s{%s return %-20s }\n", - $grp->[0], - (join "\n\t ", @groupdata), - "\Ugrp_$group;\E"; - # TODO: change appropriate GRP_FOO back to - # INS_FOO's. not functionally important; - # just pedantically so. - } - } - } - } - else - { - print TOKEN $_; - } - } - close IN; - close TOKEN; -} - -# helper functions for yacc output -sub rule_header ($ $ $) -{ - my ($rule, $tokens, $count) = splice (@_); - $count ? " | $tokens {\n" : "$rule: $tokens {\n"; -} -sub rule_footer () -{ - return " }\n"; -} - -sub cond_action_if ( $ $ $ $ $ $ $ ) -{ - my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_); - return rule_header ($rule, $tokens, $count) . <<"EOF"; - if (\$$regarg == $val) { - @$a_eax - \$\$ = $func; - } -EOF -} -sub cond_action_elsif ( $ $ $ $ ) -{ - my ($regarg, $val, $func, $a_eax) = splice (@_); - return <<"EOF"; - else if (\$$regarg == $val) { - @$a_eax - \$\$ = $func; - } -EOF -} -sub cond_action_else ( $ $ ) -{ - my ($func, $a_args) = splice (@_); - return <<"EOF" . rule_footer; - else { - @$a_args - \$\$ = $func; - } -EOF -} -sub cond_action ( $ $ $ $ $ $ $ $ ) -{ - my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args) - = splice (@_); - return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func, - $a_eax) . cond_action_else ($func, $a_args); -} - -#sub action ( $ $ $ $ $ ) -sub action ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " @$a_args\n" - . " \$\$ = $func;\n" - . rule_footer; -} - -sub action_setshiftflag ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " @$a_args\n" - . " \$\$ = $func;\n" - . " x86_bc_insn_set_shift_flag(\$\$);\n" - . rule_footer; -} - -sub action_setjrshort ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " if (\$2.op_sel == JR_NONE)\n" - . " \$2.op_sel = JR_SHORT;\n" - . " @$a_args\n" - . " \$\$ = $func;\n" - . rule_footer; -} - -sub get_token_number ( $ $ ) -{ - my ($tokens, $str) = splice @_; - $tokens =~ s/$str.*/x/; # hold its place - my @f = split /\s+/, $tokens; - return scalar @f; -} - -sub output_yacc ($@) -{ - my $grammarfile = shift or die; - my $grammarsource = shift; - $grammarsource ||= "$grammarfile.in"; - my $groups = shift or die; - - open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; - open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; - - while () - { - if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/}) - { - print GRAMMAR "static x86_new_insn_data idata;\n"; - print GRAMMAR "static x86_new_jmprel_data jrdata;\n"; - } - elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) - { - my $len = length("%token "); - print GRAMMAR "%token "; - foreach my $group (sort keys %$groups) - { - if ($len + length("GRP_$group") < 76) - { - print GRAMMAR " GRP_\U$group\E"; - $len += length(" GRP_$group"); - } - else - { - print GRAMMAR "\n%token GRP_\U$group\E"; - $len = length("%token GRP_$group"); - } - } - print GRAMMAR "\n"; - } - elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/}) - { - my $len = length("%type "); - print GRAMMAR "%type "; - foreach my $group (sort keys %$groups) - { - if ($len + length($group) < 76) - { - print GRAMMAR " $group"; - $len += length(" $group"); - } - else - { - print GRAMMAR "\n%type $group"; - $len = length("%type $group"); - } - } - print GRAMMAR "\n"; - } - elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) - { - # list every kind of instruction that instrbase can be - print GRAMMAR "instrbase: ", - join( "\n | ", sort keys %$groups), "\n;\n"; - - my ($ONE, $AL, $AX, $EAX); # need the outer scope - my (@XCHG_AX, @XCHG_EAX); - - # list the arguments and actions (buildbc) - #foreach my $instrname (sort keys %$instrlist) - foreach my $group (sort keys %$groups) - { - # I'm still convinced this is a hack. The idea is if - # within an instruction we see certain versions of the - # opcodes with ONE, or reg_e?a[lx],imm(8|16|32). If we - # do, defer generation of the action, as we may need to - # fold it into another version with a conditional to - # generate the more efficient variant of the opcode - # BUT, if we don't fold it in, we have to generate the - # original version we would have otherwise. - ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0); - # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax). - (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0)); - my $count = 0; - foreach my $inst (@{$groups->{$group}{rules}}) { - if($inst->[OPERANDS] =~ m/target/oi) - { - # relative target format - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" - if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - $tokens =~ s/:/ ':' /g; - my $datastruct = "x86_new_jmprel_data"; - my $datastructname = "jrdata"; - my $func = "x86_bc_new_jmprel(&$datastructname)"; - - # Create the argument list for bytecode_new - my @args; - - # Target argument: HACK: Always assumed to be arg 1. - push @args, 'target=&$2;'; - - # test for short opcode "nil" - if($inst->[SHORTOPCODE] =~ m/nil/) - { - push @args, 'short_op_len=0;'; - } - else - { - my @opcodes; - # Check for possible length parameter - if($inst->[SHORTOPCODE] =~ m/\?/) - { - my @pieces = split /\?/, $inst->[SHORTOPCODE]; - push @args, "short_op_len=".$pieces[0].";"; - # opcode piece 1 (and 2 and 3 if attached) - @opcodes = split ",", $pieces[1]; - } - else - { - # opcode piece 1 (and 2 and 3 if attached) - @opcodes = split ",", $inst->[SHORTOPCODE]; - # number of bytes of short opcode - push @args, "short_op_len=".@opcodes.";"; - } - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; - push @args, "short_op[$i]=$opcodes[$i];"; - } - } - - # test for near opcode "nil" - if($inst->[NEAROPCODE] =~ m/nil/) - { - push @args, 'near_op_len=0;'; - } - else - { - # opcode piece 1 (and 2 and 3 if attached) - my @opcodes = split ",", $inst->[NEAROPCODE]; - # number of bytes of near opcode - push @args, "near_op_len=".@opcodes.";"; - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; - push @args, "near_op[$i]=$opcodes[$i];"; - } - } - - # address size - push @args, "addrsize=$inst->[ADSIZE];"; - $args[-1] =~ s/nil/0/; - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # and add the data structure reference - s/^/$datastructname./g foreach (@args); - - if ($args[0] =~ m/\&\$/) - { - $args[0] = '/*@-immediatetrans@*/' . $args[0] . - '/*@=immediatetrans@*/'; - } - - # generate the grammar - # Specialcase jcc to set op_sel=JR_SHORT. - if ($rule =~ m/jcc/) - { - print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++); - } - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - } - } - else - { - # general instruction format - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" - if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - $tokens =~ s/:/ ':' /g; - # offset args - my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; - my $datastruct = "x86_new_insn_data"; - my $datastructname = "idata"; - my $func = "x86_bc_new_insn(&$datastructname)"; - - # Create the argument list for bytecode_new - my @args; - - # operand size - push @args, "opersize=$inst->[OPSIZE];"; - $args[-1] =~ s/nil/0/; - - - # opcode piece 1 (and 2 and 3 if attached) - my @opcodes = split ",", $inst->[OPCODE]; - # number of bytes of opcodes - push @args, "op_len=".@opcodes.";"; - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg; - push @args, "op[$i]=$opcodes[$i];"; - } - - # effective addresses - my $effaddr = $inst->[EFFADDR]; - $effaddr =~ s/^nil/NULL,0/; - $effaddr =~ s/nil/0/; - # don't let a $0.\d match slip into the following rules. - $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/; - $effaddr =~ s[(\$\d+)i,\s*(\d+)] - ["x86_ea_new_imm($1^ ".($2/8)."),0"]e; - - die $effaddr if $effaddr =~ m/\d+[ri]/; - - my @effaddr_split = split ',', $effaddr; - $effaddr_split[0] =~ s/\^/,/; - push @args, "ea=$effaddr_split[0];"; - if ($effaddr_split[0] !~ m/NULL/) - { - push @args, "spare=$effaddr_split[1];"; - } - - # immediate sources - my $imm = $inst->[IMM]; - $imm =~ s/nil/NULL,0/; - # don't match $0.\d in the following rules. - $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $imm =~ s[^([0-9A-Fa-f]+),] - [imm_new_int(0x$1),]; - $imm =~ s[^\$0.(\d+),] - [imm_new_int((unsigned long)\$1\[$1\]),]; - - # divide the second, and only the second, by 8 bits/byte - $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $imm .= ($3||'') eq 's' ? ',1' : ',0'; - - die $imm if $imm =~ m/\d+s/; - - my @imm_split = split ",", $imm; - push @args, "imm=$imm_split[0];"; - if ($imm_split[0] !~ m/NULL/) - { - push @args, "im_len=$imm_split[1];"; - push @args, "im_sign=$imm_split[2];"; - } - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # and add the data structure reference - s/^/$datastructname./g foreach (@args); - - # see if we match one of the cases to defer - if (($inst->[OPERANDS]||"") =~ m/,ONE/) - { - $ONE = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/) - { - $AL = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/) - { - $AX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/) - { - $EAX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/) - { - $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/) - { - $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/) - { - $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/) - { - $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; - } - - # or if we've deferred and we match the folding version - elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) - { - $ONE->[4] = 1; - # Output a normal version except imm8 -> imm8x - # (BYTE override always makes longer version, and - # we don't want to conflict with the imm version - # we output right after this one. - $tokens =~ s/imm8/imm8x/; - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - - # Now output imm version, with second opcode byte - # set to ,1 opcode. Also call SetInsnShiftFlag(). - $tokens =~ s/imm8x/imm/; - my $oneval = $ONE->[3]->[2]; - $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg; - push @args, $oneval; - print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); - } - elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) - { - $AL->[4] = 1; - my $regarg = get_token_number ($tokens, "reg8"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); - } - elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/) - { - $AX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg16"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); - } - elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/) - { - $EAX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg32"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); - } - elsif (($XCHG_AX[0] or $XCHG_AX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_AX; ++$i) - { - if($XCHG_AX[$i]) - { - $XCHG_AX[$i]->[4] = 1; - # This is definitely a hack. The "right" - # way to do this would be to enhance - # get_token_number to get the nth reg16 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg16") - + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); - } - } - } - print GRAMMAR cond_action_else ($func, \@args); - } - elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_EAX; ++$i) - { - if($XCHG_EAX[$i]) - { - $XCHG_EAX[$i]->[4] = 1; - # This is definitely a hack. The "right" - # way to do this would be to enhance - # get_token_number to get the nth reg32 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg32") - + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); - } - } - } - print GRAMMAR cond_action_else ($func, \@args); - } - - # otherwise, generate the normal version - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - } - } - } - - # catch deferreds that haven't been folded in. - if ($ONE and not $ONE->[4]) - { - print GRAMMAR action (@$ONE, $count++); - } - if ($AL and not $AL->[4]) - { - print GRAMMAR action (@$AL, $count++); - } - if ($AX and not $AL->[4]) - { - print GRAMMAR action (@$AX, $count++); - } - if ($EAX and not $AL->[4]) - { - print GRAMMAR action (@$EAX, $count++); - } - - # print error action - # ASSUMES: at least one previous action exists - print GRAMMAR " | \Ugrp_$group\E error {\n"; - print GRAMMAR " Error (_(\"expression syntax error\"));\n"; - print GRAMMAR " \$\$ = (bytecode *)NULL;\n"; - print GRAMMAR " }\n"; - - # terminate the rule - print GRAMMAR ";\n"; - } - } - else - { - print GRAMMAR $_; - } - } - close IN; - close GRAMMAR; -} diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y index b3f0533a..248820db 100644 --- a/modules/parsers/nasm/nasm-bison.y +++ b/modules/parsers/nasm/nasm-bison.y @@ -44,8 +44,10 @@ RCSID("$IdPath$"); #include "src/parsers/nasm/nasm-defs.h" + void init_table(void); extern int nasm_parser_lex(void); +extern void nasm_parser_set_directive_state(void); void nasm_parser_error(const char *); static void nasm_parser_directive(const char *name, valparamhead *valparams, @@ -55,6 +57,7 @@ extern objfmt *nasm_parser_objfmt; extern sectionhead nasm_parser_sections; extern section *nasm_parser_cur_section; extern char *nasm_parser_locallabel_base; +extern size_t nasm_parser_locallabel_base_len; static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; @@ -71,16 +74,19 @@ static bytecode *nasm_parser_temp_bc; intnum *intn; floatnum *flt; symrec *sym; - unsigned char groupdata[5]; + unsigned long arch_data[4]; effaddr *ea; expr *exp; - immval *im_val; - x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; valparamhead dir_valparams; valparam *dir_valparam; + struct { + insn_operandhead operands; + int num_operands; + } insn_operands; + insn_operand *insn_operand; } %token INTNUM @@ -90,46 +96,25 @@ static bytecode *nasm_parser_temp_bc; %token DECLARE_DATA %token RESERVE_SPACE %token INCBIN EQU TIMES -%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token SEG WRT NOSPLIT %token TO -%token LOCK REPNZ REP REPZ -%token OPERSIZE ADDRSIZE -%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG -%token REG_EAX REG_ECX REG_EDX REG_EBX -%token REG_ESP REG_EBP REG_ESI REG_EDI -%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI -%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH -%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token INSN PREFIX REG SEGREG TARGETMOD %token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID %token ID LOCAL_ID SPECIAL_ID %token LINE -/* instruction tokens (dynamically generated) */ -/* @TOKENS@ */ - -/* @TYPES@ */ - -%type line lineexp exp instr instrbase - -%type reg_eax reg_ecx -%type reg_ax reg_cx reg_dx -%type reg_al reg_cl -%type reg_es reg_cs reg_ss reg_ds reg_fs reg_gs -%type fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg -%type mem memaddr memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 -%type imm imm8x imm16x imm32x imm8 imm16 imm32 -%type expr expr_no_string memexpr direxpr +%type line lineexp exp instr + +%type memaddr +%type dvexpr expr direxpr %type explabel %type label_id -%type target %type dataval %type datavals %type directive_valparams %type directive_valparam +%type operands +%type operand %left '|' %left '^' @@ -163,7 +148,9 @@ line: '\n' { $$ = (bytecode *)NULL; } xfree($5); $$ = (bytecode *)NULL; } - | directive '\n' { $$ = (bytecode *)NULL; } + | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' { + $$ = (bytecode *)NULL; + } | error '\n' { Error(_("label or instruction expected at start of line")); $$ = (bytecode *)NULL; @@ -191,11 +178,32 @@ exp: instr | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); } ; +instr: INSN { + $$ = cur_arch->parse.new_insn($1, 0, NULL); + } + | INSN operands { + $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands); + ops_delete(&$2.operands, 0); + } + | INSN error { + Error(_("expression syntax error")); + $$ = NULL; + } + | PREFIX instr { + $$ = $2; + cur_arch->parse.handle_prefix($$, $1); + } + | SEGREG instr { + $$ = $2; + cur_arch->parse.handle_seg_prefix($$, $1[0]); + } +; + datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dv_new_expr($1); } +dataval: dvexpr { $$ = dv_new_expr($1); } | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); @@ -219,19 +227,22 @@ label_id: ID { $$ = $1; if (nasm_parser_locallabel_base) xfree(nasm_parser_locallabel_base); - nasm_parser_locallabel_base = xstrdup($1); + nasm_parser_locallabel_base_len = strlen($1); + nasm_parser_locallabel_base = + xmalloc(nasm_parser_locallabel_base_len+1); + strcpy(nasm_parser_locallabel_base, $1); } | SPECIAL_ID | LOCAL_ID ; /* directives */ -directive: '[' DIRECTIVE_NAME directive_val ']' { - xfree($2); +directive: DIRECTIVE_NAME directive_val { + xfree($1); } - | '[' DIRECTIVE_NAME error ']' { - Error(_("invalid arguments to [%s]"), $2); - xfree($2); + | DIRECTIVE_NAME error { + Error(_("invalid arguments to [%s]"), $1); + xfree($1); } ; @@ -267,299 +278,85 @@ directive_valparam: direxpr { | ID '=' direxpr { vp_new($$, $1, $3); } ; -/* register groupings */ -fpureg: ST0 - | FPUREG_NOTST0 -; - -reg_eax: REG_EAX - | DWORD reg_eax { $$ = $2; } -; - -reg_ecx: REG_ECX - | DWORD reg_ecx { $$ = $2; } -; - -rawreg32: REG_EAX - | REG_ECX - | REG_EDX - | REG_EBX - | REG_ESP - | REG_EBP - | REG_ESI - | REG_EDI -; - -reg32: rawreg32 - | DWORD reg32 { $$ = $2; } -; - -reg_ax: REG_AX - | WORD reg_ax { $$ = $2; } -; - -reg_cx: REG_CX - | WORD reg_cx { $$ = $2; } -; - -reg_dx: REG_DX - | WORD reg_dx { $$ = $2; } -; - -rawreg16: REG_AX - | REG_CX - | REG_DX - | REG_BX - | REG_SP - | REG_BP - | REG_SI - | REG_DI -; - -reg16: rawreg16 - | WORD reg16 { $$ = $2; } -; - -reg_al: REG_AL - | BYTE reg_al { $$ = $2; } -; - -reg_cl: REG_CL - | BYTE reg_cl { $$ = $2; } -; - -reg8: REG_AL - | REG_CL - | REG_DL - | REG_BL - | REG_AH - | REG_CH - | REG_DH - | REG_BH - | BYTE reg8 { $$ = $2; } -; - -reg_es: REG_ES - | WORD reg_es { $$ = $2; } -; - -reg_ss: REG_SS - | WORD reg_ss { $$ = $2; } -; - -reg_ds: REG_DS - | WORD reg_ds { $$ = $2; } -; - -reg_fs: REG_FS - | WORD reg_fs { $$ = $2; } -; - -reg_gs: REG_GS - | WORD reg_gs { $$ = $2; } -; - -reg_cs: REG_CS - | WORD reg_cs { $$ = $2; } -; - -segreg: REG_ES - | REG_SS - | REG_DS - | REG_FS - | REG_GS - | REG_CS - | WORD segreg { $$ = $2; } -; - /* memory addresses */ -/* FIXME: Is there any way this redundancy can be eliminated? This is almost - * identical to expr: the only difference is that FLTNUM is replaced by - * rawreg16 and rawreg32. - * - * Note that the two can't be just combined because of conflicts caused by imm - * vs. reg. I don't see a simple solution right now to this. - * - * We don't attempt to check memory expressions for validity here. - */ -memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); } - | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' memexpr %prec UNARYOP { $$ = $2; } - | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' memexpr ')' { $$ = $2; } - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); +memaddr: expr { + $$ = cur_arch->parse.ea_new_expr($1); } - | error { Error(_("invalid effective address")); } -; - -memaddr: memexpr { - $$ = x86_ea_new_expr($1); - x86_ea_set_segment($$, 0); + | SEGREG ':' memaddr { + $$ = $3; + cur_arch->parse.handle_seg_override($$, $1[0]); } - | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } | WORD memaddr { $$ = $2; ea_set_len($$, 2); } | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; -mem: '[' memaddr ']' { $$ = $2; } -; - -/* explicit memory */ -mem8x: BYTE mem { $$ = $2; } - | BYTE mem8x { $$ = $2; } -; -mem16x: WORD mem { $$ = $2; } - | WORD mem16x { $$ = $2; } -; -mem32x: DWORD mem { $$ = $2; } - | DWORD mem32x { $$ = $2; } -; -mem64x: QWORD mem { $$ = $2; } - | QWORD mem64x { $$ = $2; } -; -mem80x: TWORD mem { $$ = $2; } - | TWORD mem80x { $$ = $2; } -; -mem128x: DQWORD mem { $$ = $2; } - | DQWORD mem128x { $$ = $2; } -; - -/* FAR memory, for jmp and call */ -memfar: FAR mem { $$ = $2; } - | FAR memfar { $$ = $2; } -; - -/* implicit memory */ -mem8: mem - | mem8x -; -mem16: mem - | mem16x -; -mem32: mem - | mem32x -; -mem64: mem - | mem64x -; -mem80: mem - | mem80x -; -mem128: mem - | mem128x -; - -/* both 16 and 32 bit memory */ -mem1632: mem - | mem16x - | mem32x -; - -/* explicit register or memory */ -rm8x: reg8 { $$ = x86_ea_new_reg($1); } - | mem8x -; -rm16x: reg16 { $$ = x86_ea_new_reg($1); } - | mem16x -; -rm32x: reg32 { $$ = x86_ea_new_reg($1); } - | mem32x -; -/* not needed: -rm64x: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64x -; -rm128x: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128x -; -*/ - -/* implicit register or memory */ -rm8: reg8 { $$ = x86_ea_new_reg($1); } - | mem8 -; -rm16: reg16 { $$ = x86_ea_new_reg($1); } - | mem16 -; -rm32: reg32 { $$ = x86_ea_new_reg($1); } - | mem32 -; -rm64: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64 -; -rm128: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128 -; - -/* immediate values */ -imm: expr { $$ = imm_new_expr($1); } -; - -/* explicit immediates */ -imm8x: BYTE imm { $$ = $2; } -; -imm16x: WORD imm { $$ = $2; } -; -imm32x: DWORD imm { $$ = $2; } -; - -/* implicit immediates */ -imm8: imm - | imm8x -; -imm16: imm - | imm16x -; -imm32: imm - | imm32x +/* instruction operands */ +operands: operand { + ops_initialize(&$$.operands); + ops_append(&$$.operands, $1); + $$.num_operands = 1; + } + | operands ',' operand { + ops_append(&$1.operands, $3); + $$.operands = $1.operands; + $$.num_operands = $1.num_operands+1; + } ; -/* jump targets */ -target: expr { - $$.val = $1; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); +operand: '[' memaddr ']' { $$ = operand_new_mem($2); } + | expr { $$ = operand_new_imm($1); } + | SEGREG { $$ = operand_new_segreg($1[0]); } + | BYTE operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 1) + Error(_("cannot override register size")); + else + $$->size = 1; } - | SHORT target { + | WORD operand { $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 2) + Error(_("cannot override register size")); + else + $$->size = 2; } - | NEAR target { + | DWORD operand { $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 4) + Error(_("cannot override register size")); + else + $$->size = 4; } + | QWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 8) + Error(_("cannot override register size")); + else + $$->size = 8; + } + | TWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 10) + Error(_("cannot override register size")); + else + $$->size = 10; + } + | DQWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 16) + Error(_("cannot override register size")); + else + $$->size = 16; + } + | TARGETMOD operand { $$ = $2; $$->targetmod = $1[0]; } ; /* expression trees */ @@ -587,9 +384,48 @@ direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | '(' direxpr ')' { $$ = $2; } ; -expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } +dvexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } | explabel { $$ = expr_new_ident(ExprSym($1)); } + /*| dvexpr '||' dvexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ + | dvexpr '|' dvexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } + | dvexpr '^' dvexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } + /*| dvexpr '&&' dvexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ + | dvexpr '&' dvexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } + /*| dvexpr '==' dvexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ + /*| dvexpr '>' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ + /*| dvexpr '<' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ + /*| dvexpr '>=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ + /*| dvexpr '<=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ + /*| dvexpr '!=' dvexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ + | dvexpr LEFT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } + | dvexpr RIGHT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } + | dvexpr '+' dvexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } + | dvexpr '-' dvexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } + | dvexpr '*' dvexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } + | dvexpr '/' dvexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } + | dvexpr SIGNDIV dvexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } + | dvexpr '%' dvexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } + | dvexpr SIGNMOD dvexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } + | '+' dvexpr %prec UNARYOP { $$ = $2; } + | '-' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } + /*| '!' dvexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ + | '~' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } + | '(' dvexpr ')' { $$ = $2; } +; + +/* Expressions for operands and memory expressions. + * We don't attempt to check memory expressions for validity here. + * Essentially the same as expr_no_string above but adds REG and STRING. + */ +expr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } + | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } + | REG { $$ = expr_new_ident(ExprReg($1[0])); } + | STRING { + $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); + xfree($1); + } + | explabel { $$ = expr_new_ident(ExprSym($1)); } /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); } | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); } @@ -617,13 +453,6 @@ expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | '(' expr ')' { $$ = $2; } ; -expr: expr_no_string - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } -; - explabel: ID { $$ = symrec_use($1); xfree($1); @@ -652,46 +481,6 @@ explabel: ID { } ; -instr: /* empty */ { - idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL; - $$ = x86_bc_new_insn(&idata); - } - | instrbase - | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } - | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } - | REG_CS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); - } - | REG_SS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); - } - | REG_DS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); - } - | REG_ES instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); - } - | REG_FS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); - } - | REG_GS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); - } - | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } - | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } - | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } - | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } -; - -/* instruction grammars (dynamically generated) */ -/* @INSTRUCTIONS@ */ - %% /*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/ @@ -700,8 +489,6 @@ nasm_parser_directive(const char *name, valparamhead *valparams, valparamhead *objext_valparams) { valparam *vp, *vp2; - const intnum *intn; - long lval; assert(cur_objfmt != NULL); @@ -766,13 +553,25 @@ nasm_parser_directive(const char *name, valparamhead *valparams, vp->param = NULL; } nasm_parser_prev_bc = (bytecode *)NULL; - } else if (strcasecmp(name, "bits") == 0) { - if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && - (intn = expr_get_intnum(&vp->param)) != NULL && - (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) - x86_mode_bits = (unsigned char)lval; - else - Error(_("invalid argument to [%s]"), "BITS"); + } else if (strcasecmp(name, "cpu") == 0) { + vps_foreach(vp, valparams) { + if (vp->val) + cur_arch->parse.switch_cpu(vp->val); + else if (vp->param) { + const intnum *intcpu; + intcpu = expr_get_intnum(&vp->param); + if (!intcpu) + Error(_("invalid argument to [%s]"), "CPU"); + else { + char strcpu[16]; + sprintf(strcpu, "%lu", intnum_get_uint(intcpu)); + cur_arch->parse.switch_cpu(strcpu); + } + } + } + } else if (!cur_arch->parse.directive(name, valparams, objext_valparams, + &nasm_parser_sections)) { + ; } else if (cur_objfmt->directive(name, valparams, objext_valparams, &nasm_parser_sections)) { Error(_("unrecognized directive [%s]"), name); diff --git a/modules/parsers/nasm/nasm-parser.c b/modules/parsers/nasm/nasm-parser.c index b7d66972..e21e9554 100644 --- a/modules/parsers/nasm/nasm-parser.c +++ b/modules/parsers/nasm/nasm-parser.c @@ -34,6 +34,7 @@ extern FILE *nasm_parser_in; extern int nasm_parser_debug; extern int nasm_parser_parse(void); +extern void nasm_parser_cleanup(void); size_t (*nasm_parser_input) (char *buf, size_t max_size); @@ -58,6 +59,8 @@ nasm_parser_do_parse(parser *p, FILE *f, const char *in_filename) nasm_parser_parse(); + nasm_parser_cleanup(); + /* Free locallabel base if necessary */ if (nasm_parser_locallabel_base) xfree(nasm_parser_locallabel_base); diff --git a/modules/parsers/nasm/nasm-token.re b/modules/parsers/nasm/nasm-token.re new file mode 100644 index 00000000..ab0aa111 --- /dev/null +++ b/modules/parsers/nasm/nasm-token.re @@ -0,0 +1,516 @@ +/* + * NASM-compatible lex lexer + * + * Copyright (C) 2001 Peter Johnson + * + * Portions based on re2c's example code. + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "bitvect.h" + +#include "errwarn.h" +#include "intnum.h" +#include "floatnum.h" +#include "expr.h" +#include "symrec.h" + +#include "bytecode.h" + +#include "arch.h" + +#include "src/parsers/nasm/nasm-defs.h" +#include "nasm-bison.h" + + +#define BSIZE 8192 + +#define YYCTYPE char +#define YYCURSOR cursor +#define YYLIMIT s.lim +#define YYMARKER s.ptr +#define YYFILL(n) {cursor = fill(cursor);} + +#define RETURN(i) {s.cur = cursor; return i;} + +#define SCANINIT() { \ + s.tchar = cursor - s.pos; \ + s.tline = s.cline; \ + s.tok = cursor; \ + } + +#define TOKLEN (cursor-s.tok) + +void nasm_parser_cleanup(void); +void nasm_parser_set_directive_state(void); +int nasm_parser_lex(void); + +extern size_t (*nasm_parser_input) (char *buf, size_t max_size); + + +typedef struct Scanner { + YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + unsigned int tchar, tline, cline; +} Scanner; + +static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 }; + +FILE *nasm_parser_in = NULL; + +static YYCTYPE * +fill(YYCTYPE *cursor) +{ + if(!s.eof){ + size_t cnt = s.tok - s.bot; + if(cnt){ + memcpy(s.bot, s.tok, s.lim - s.tok); + s.tok = s.bot; + s.ptr -= cnt; + cursor -= cnt; + s.pos -= cnt; + s.lim -= cnt; + } + if((s.top - s.lim) < BSIZE){ + char *buf = xmalloc((s.lim - s.bot) + BSIZE); + memcpy(buf, s.tok, s.lim - s.tok); + s.tok = buf; + s.ptr = &buf[s.ptr - s.bot]; + cursor = &buf[cursor - s.bot]; + s.pos = &buf[s.pos - s.bot]; + s.lim = &buf[s.lim - s.bot]; + s.top = &s.lim[BSIZE]; + if (s.bot) + xfree(s.bot); + s.bot = buf; + } + if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){ + s.eof = &s.lim[cnt]; *s.eof++ = '\n'; + } + s.lim += cnt; + } + return cursor; +} + +void +nasm_parser_cleanup(void) +{ + if (s.bot) + xfree(s.bot); +} + +/* starting size of string buffer */ +#define STRBUF_ALLOC_SIZE 128 + +/* string buffer used when parsing strings/character constants */ +static char *strbuf = (char *)NULL; + +/* length of strbuf (including terminating NULL character) */ +static size_t strbuf_size = 0; + +/* last "base" label for local (.) labels */ +char *nasm_parser_locallabel_base = (char *)NULL; +size_t nasm_parser_locallabel_base_len = 0; + +static int linechg_numcount; + +/*!re2c + any = [\000-\377]; + digit = [0-9]; + iletter = [a-zA-Z]; + bindigit = [01]; + octdigit = [0-7]; + hexdigit = [0-9a-fA-F]; + ws = [ \t\r]; + quot = ["']; + A = [aA]; + B = [bB]; + C = [cC]; + D = [dD]; + E = [eE]; + F = [fF]; + G = [gG]; + H = [hH]; + I = [iI]; + J = [jJ]; + K = [kK]; + L = [lL]; + M = [mM]; + N = [nN]; + O = [oO]; + P = [pP]; + Q = [qQ]; + R = [rR]; + S = [sS]; + T = [tT]; + U = [uU]; + V = [vV]; + W = [wW]; + X = [xX]; + Y = [yY]; + Z = [zZ]; +*/ + +static enum { + INITIAL, + DIRECTIVE, + DIRECTIVE2, + LINECHG, + LINECHG2 +} state = INITIAL; + +void +nasm_parser_set_directive_state(void) +{ + state = DIRECTIVE; +} + +int +nasm_parser_lex(void) +{ + YYCTYPE *cursor = s.cur; + YYCTYPE endch; + size_t count, len; + YYCTYPE savech; + arch_check_id_retval check_id_ret; + + /* Catch EOF */ + if (s.eof && cursor == s.eof) + return 0; + + /* Jump to proper "exclusive" states */ + switch (state) { + case DIRECTIVE: + goto directive; + case LINECHG: + goto linechg; + case LINECHG2: + goto linechg2; + default: + break; + } + +scan: + SCANINIT(); + + /*!re2c + /* standard decimal integer */ + digit+ { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.intn = intnum_new_dec(s.tok); + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + /* 10010011b - binary number */ + + bindigit+ "b" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */ + yylval.intn = intnum_new_bin(s.tok); + RETURN(INTNUM); + } + + /* 777q - octal number */ + octdigit+ "q" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */ + yylval.intn = intnum_new_oct(s.tok); + RETURN(INTNUM); + } + + /* 0AAh form of hexidecimal number */ + digit hexdigit+ "h" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */ + yylval.intn = intnum_new_hex(s.tok); + RETURN(INTNUM); + } + + /* $0AA and 0xAA forms of hexidecimal number */ + (("$" digit) | "0x") hexdigit+ { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + if (s.tok[1] == 'x') + yylval.intn = intnum_new_hex(s.tok+2); /* skip 0 and x */ + else + yylval.intn = intnum_new_hex(s.tok+1); /* don't skip 0 */ + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + + /* floating point value */ + digit+ "." digit* ("e" [-+]? digit+)? { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.flt = floatnum_new(s.tok); + s.tok[TOKLEN] = savech; + RETURN(FLTNUM); + } + + /* string/character constant values */ + quot { + endch = s.tok[0]; + goto stringconst; + } + + /* %line linenum+lineinc filename */ + "%line" { + state = LINECHG; + linechg_numcount = 0; + RETURN(LINE); + } + + /* size specifiers */ + B Y T E { yylval.int_info = 1; RETURN(BYTE); } + W O R D { yylval.int_info = 2; RETURN(WORD); } + D W O R D { yylval.int_info = 4; RETURN(DWORD); } + Q W O R D { yylval.int_info = 8; RETURN(QWORD); } + T W O R D { yylval.int_info = 10; RETURN(TWORD); } + D Q W O R D { yylval.int_info = 16; RETURN(DQWORD); } + + /* pseudo-instructions */ + D B { yylval.int_info = 1; RETURN(DECLARE_DATA); } + D W { yylval.int_info = 2; RETURN(DECLARE_DATA); } + D D { yylval.int_info = 4; RETURN(DECLARE_DATA); } + D Q { yylval.int_info = 8; RETURN(DECLARE_DATA); } + D T { yylval.int_info = 10; RETURN(DECLARE_DATA); } + + R E S B { yylval.int_info = 1; RETURN(RESERVE_SPACE); } + R E S W { yylval.int_info = 2; RETURN(RESERVE_SPACE); } + R E S D { yylval.int_info = 4; RETURN(RESERVE_SPACE); } + R E S Q { yylval.int_info = 8; RETURN(RESERVE_SPACE); } + R E S T { yylval.int_info = 10; RETURN(RESERVE_SPACE); } + + I N C B I N { RETURN(INCBIN); } + + E Q U { RETURN(EQU); } + + T I M E S { RETURN(TIMES); } + + S E G { RETURN(SEG); } + W R T { RETURN(WRT); } + + N O S P L I T { RETURN(NOSPLIT); } + + T O { RETURN(TO); } + + /* operators */ + "<<" { RETURN(LEFT_OP); } + ">>" { RETURN(RIGHT_OP); } + "//" { RETURN(SIGNDIV); } + "%%" { RETURN(SIGNMOD); } + "$$" { RETURN(START_SECTION_ID); } + [-+|^*&/%~$():=,\[] { RETURN(s.tok[0]); } + + /* handle ] separately for directives */ + "]" { + if (state == DIRECTIVE2) + state = INITIAL; + RETURN(s.tok[0]); + } + + /* special non-local ..@label and labels like ..start */ + ".." [a-zA-Z0-9_$#@~.?]+ { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(SPECIAL_ID); + } + + /* local label (.label) */ + "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* { + /* override local labels in directive state */ + if (state == DIRECTIVE2) { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } else if (!nasm_parser_locallabel_base) { + Warning(_("no non-local label before `%s'"), s.tok[0]); + yylval.str_val = xstrndup(s.tok, TOKLEN); + } else { + len = TOKLEN + nasm_parser_locallabel_base_len; + yylval.str_val = xmalloc(len + 1); + strcpy(yylval.str_val, nasm_parser_locallabel_base); + strncat(yylval.str_val, s.tok, TOKLEN); + yylval.str_val[len] = '\0'; + } + + RETURN(LOCAL_ID); + } + + /* forced identifier */ + "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } + + /* identifier that may be a register, instruction, etc. */ + [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data, + s.tok); + s.tok[TOKLEN] = savech; + switch (check_id_ret) { + case ARCH_CHECK_ID_NONE: + /* Just an identifier, return as such. */ + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + case ARCH_CHECK_ID_INSN: + RETURN(INSN); + case ARCH_CHECK_ID_PREFIX: + RETURN(PREFIX); + case ARCH_CHECK_ID_REG: + RETURN(REG); + case ARCH_CHECK_ID_SEGREG: + RETURN(SEGREG); + case ARCH_CHECK_ID_TARGETMOD: + RETURN(TARGETMOD); + default: + Warning(_("Arch feature not supported, treating as identifier")); + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } + } + + ";" (any \ [\n])* { goto scan; } + + ws+ { goto scan; } + + "\n" { state = INITIAL; RETURN(s.tok[0]); } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto scan; + } + */ + + /* %line linenum+lineinc filename */ +linechg: + SCANINIT(); + + /*!re2c + digit+ { + linechg_numcount++; + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.intn = intnum_new_dec(s.tok); + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + + "\n" { + state = INITIAL; + RETURN(s.tok[0]); + } + + "+" { + RETURN(s.tok[0]); + } + + ws+ { + if (linechg_numcount == 2) + state = LINECHG2; + goto linechg2; + } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto linechg; + } + */ + +linechg2: + SCANINIT(); + + /*!re2c + "\n" { + state = INITIAL; + RETURN(s.tok[0]); + } + + "\r" { } + + (any \ [\r\n])+ { + state = LINECHG; + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(FILENAME); + } + */ + + /* directive: [name value] */ +directive: + SCANINIT(); + + /*!re2c + [\]\n] { + state = INITIAL; + RETURN(s.tok[0]); + } + + iletter+ { + state = DIRECTIVE2; + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(DIRECTIVE_NAME); + } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto directive; + } + */ + + /* string/character constant values */ +stringconst: + strbuf = xmalloc(STRBUF_ALLOC_SIZE); + strbuf_size = STRBUF_ALLOC_SIZE; + count = 0; + +stringconst_scan: + SCANINIT(); + + /*!re2c + "\n" { + if (cursor == s.eof) + Error(_("unexpected end of file in string")); + else + Error(_("unterminated string")); + strbuf[count] = '\0'; + yylval.str_val = strbuf; + RETURN(STRING); + } + + any { + if (s.tok[0] == endch) { + strbuf[count] = '\0'; + yylval.str_val = strbuf; + RETURN(STRING); + } + + strbuf[count++] = s.tok[0]; + if (count >= strbuf_size) { + strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); + strbuf_size += STRBUF_ALLOC_SIZE; + } + + goto stringconst_scan; + } + */ +} diff --git a/modules/parsers/nasm/token.l.in b/modules/parsers/nasm/token.l.in deleted file mode 100644 index 7f458557..00000000 --- a/modules/parsers/nasm/token.l.in +++ /dev/null @@ -1,353 +0,0 @@ -/* - * NASM-compatible lex lexer - * - * Copyright (C) 2001 Peter Johnson - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -%{ -#include "util.h" -RCSID("$IdPath$"); - -#include "bitvect.h" - -#include "errwarn.h" -#include "intnum.h" -#include "floatnum.h" -#include "expr.h" -#include "symrec.h" - -#include "bytecode.h" - -#include "arch.h" - -#include "src/parsers/nasm/nasm-defs.h" -#include "nasm-bison.h" - - -#define YY_NEVER_INTERACTIVE 1 - -int nasm_parser_lex(void); - -extern size_t (*nasm_parser_input) (char *buf, size_t max_size); -#undef YY_INPUT -#define YY_INPUT(b, r, ms) (r = nasm_parser_input(b, ms)) - -/* starting size of string buffer */ -#define STRBUF_ALLOC_SIZE 128 - -/* string buffer used when parsing strings/character constants */ -static char *strbuf = (char *)NULL; - -/* length of strbuf (including terminating NULL character) */ -static size_t strbuf_size = 0; - -/* last "base" label for local (.) labels */ -char *nasm_parser_locallabel_base = (char *)NULL; - -static int linechg_numcount; - -%} -%option noyywrap -%option nounput -%option case-insensitive -%option never-interactive -%option prefix="nasm_parser_" -%option outfile="lex.yy.c" - -%x DIRECTIVE LINECHG LINECHG2 -%s DIRECTIVE2 - -DIGIT [0-9] -BINDIGIT [01] -OCTDIGIT [0-7] -HEXDIGIT [0-9a-f] -WS [ \t\r] - -%% - - /* standard decimal integer */ -{DIGIT}+ { - yylval.intn = intnum_new_dec(yytext); - return INTNUM; -} - - /* 10010011b - binary number */ -{BINDIGIT}+b { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'b' */ - yylval.intn = intnum_new_bin(yytext); - return INTNUM; -} - - /* 777q - octal number */ -{OCTDIGIT}+q { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'q' */ - yylval.intn = intnum_new_oct(yytext); - return INTNUM; -} - - /* 0AAh form of hexidecimal number */ -{DIGIT}{HEXDIGIT}*h { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'h' */ - yylval.intn = intnum_new_hex(yytext); - return INTNUM; -} - - /* $0AA and 0xAA forms of hexidecimal number */ -(\${DIGIT}|0x){HEXDIGIT}+ { - if (yytext[1] == 'x') - yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */ - else - yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */ - return INTNUM; -} - - /* floating point value */ -{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { - yylval.flt = floatnum_new(yytext); - return FLTNUM; -} - - /* string/character constant values */ -["'] { - int inch, count; - char endch = yytext[0]; - - strbuf = xmalloc(STRBUF_ALLOC_SIZE); - - strbuf_size = STRBUF_ALLOC_SIZE; - inch = input(); - count = 0; - while (inch != EOF && inch != endch && inch != '\n') { - strbuf[count++] = inch; - if (count >= strbuf_size) { - strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); - if (!strbuf) - Fatal(FATAL_NOMEM); - strbuf_size += STRBUF_ALLOC_SIZE; - } - inch = input(); - } - - if (inch == '\n') - Error(_("unterminated string")); - else if (inch == EOF) - Error(_("unexpected end of file in string")); - - strbuf[count] = '\0'; - - yylval.str_val = strbuf; - return STRING; -} - - /* %line linenum+lineinc filename */ -^%line { BEGIN LINECHG; linechg_numcount = 0; return LINE; } -{DIGIT}+ { - linechg_numcount++; - yylval.intn = intnum_new_dec(yytext); - return INTNUM; -} -\n { BEGIN INITIAL; return '\n'; } -[+] { return yytext[0]; } -{WS}+ { - if (linechg_numcount == 2) - BEGIN LINECHG2; -} -\n { BEGIN INITIAL; return '\n'; } -\r ; -[^\r\n]+ { - BEGIN LINECHG; - yylval.str_val = xstrdup(yytext); - return FILENAME; -} - - /* directive: [name value] */ -^{WS}*"[" { BEGIN DIRECTIVE; return '['; } -"]" { BEGIN INITIAL; return ']'; } -"]" { BEGIN INITIAL; return ']'; } -\n { BEGIN INITIAL; return '\n'; } -\n { BEGIN INITIAL; return '\n'; } - -[a-z]+ { - BEGIN DIRECTIVE2; - yylval.str_val = xstrdup(yytext); - return DIRECTIVE_NAME; -} -. { - if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) - Warning(_("ignoring unrecognized character `%s'"), - conv_unprint(yytext[0])); -} - - /* override local labels in directive state */ -\.[a-z0-9_$#@~.?]* { - yylval.str_val = xstrdup(yytext); - return ID; -} - - /* size specifiers */ -byte { yylval.int_info = 1; return BYTE; } -word { yylval.int_info = 2; return WORD; } -dword { yylval.int_info = 4; return DWORD; } -qword { yylval.int_info = 8; return QWORD; } -tword { yylval.int_info = 10; return TWORD; } -dqword { yylval.int_info = 16; return DQWORD; } - - /* pseudo-instructions */ -db { yylval.int_info = 1; return DECLARE_DATA; } -dw { yylval.int_info = 2; return DECLARE_DATA; } -dd { yylval.int_info = 4; return DECLARE_DATA; } -dq { yylval.int_info = 8; return DECLARE_DATA; } -dt { yylval.int_info = 10; return DECLARE_DATA; } - -resb { yylval.int_info = 1; return RESERVE_SPACE; } -resw { yylval.int_info = 2; return RESERVE_SPACE; } -resd { yylval.int_info = 4; return RESERVE_SPACE; } -resq { yylval.int_info = 8; return RESERVE_SPACE; } -rest { yylval.int_info = 10; return RESERVE_SPACE; } - -incbin { return INCBIN; } - -equ { return EQU; } - -times { return TIMES; } - -seg { return SEG; } -wrt { return WRT; } -near { return NEAR; } -short { return SHORT; } -far { return FAR; } - -nosplit { return NOSPLIT; } - -org { return ORG; } - -to { return TO; } - - /* operand size overrides */ -o16 { yylval.int_info = 16; return OPERSIZE; } -o32 { yylval.int_info = 32; return OPERSIZE; } - /* address size overrides */ -a16 { yylval.int_info = 16; return ADDRSIZE; } -a32 { yylval.int_info = 32; return ADDRSIZE; } - - /* instruction prefixes */ -lock { return LOCK; } -repne { return REPNZ; } -repnz { return REPNZ; } -rep { return REP; } -repe { return REPZ; } -repz { return REPZ; } - - /* control, debug, and test registers */ -cr4 { yylval.int_info = 4; return CR4; } -cr[023] { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; } -dr[0-367] { yylval.int_info = yytext[2]-'0'; return DRREG; } -tr[3-7] { yylval.int_info = yytext[2]-'0'; return TRREG; } - - /* floating point, MMX, and SSE registers */ -st0 { yylval.int_info = 0; return ST0; } -st[1-7] { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; } -mm[0-7] { yylval.int_info = yytext[2]-'0'; return MMXREG; } -xmm[0-7] { yylval.int_info = yytext[3]-'0'; return XMMREG; } - - /* integer registers */ -eax { yylval.int_info = 0; return REG_EAX; } -ecx { yylval.int_info = 1; return REG_ECX; } -edx { yylval.int_info = 2; return REG_EDX; } -ebx { yylval.int_info = 3; return REG_EBX; } -esp { yylval.int_info = 4; return REG_ESP; } -ebp { yylval.int_info = 5; return REG_EBP; } -esi { yylval.int_info = 6; return REG_ESI; } -edi { yylval.int_info = 7; return REG_EDI; } - -ax { yylval.int_info = 0; return REG_AX; } -cx { yylval.int_info = 1; return REG_CX; } -dx { yylval.int_info = 2; return REG_DX; } -bx { yylval.int_info = 3; return REG_BX; } -sp { yylval.int_info = 4; return REG_SP; } -bp { yylval.int_info = 5; return REG_BP; } -si { yylval.int_info = 6; return REG_SI; } -di { yylval.int_info = 7; return REG_DI; } - -al { yylval.int_info = 0; return REG_AL; } -cl { yylval.int_info = 1; return REG_CL; } -dl { yylval.int_info = 2; return REG_DL; } -bl { yylval.int_info = 3; return REG_BL; } -ah { yylval.int_info = 4; return REG_AH; } -ch { yylval.int_info = 5; return REG_CH; } -dh { yylval.int_info = 6; return REG_DH; } -bh { yylval.int_info = 7; return REG_BH; } - - /* segment registers */ -es { yylval.int_info = 0; return REG_ES; } -cs { yylval.int_info = 1; return REG_CS; } -ss { yylval.int_info = 2; return REG_SS; } -ds { yylval.int_info = 3; return REG_DS; } -fs { yylval.int_info = 4; return REG_FS; } -gs { yylval.int_info = 5; return REG_GS; } - - /* operators */ -"<<" { return LEFT_OP; } -">>" { return RIGHT_OP; } -"//" { return SIGNDIV; } -"%%" { return SIGNMOD; } -"$$" { return START_SECTION_ID; } -[-+|^&*/%~$():[\]=,] { return yytext[0]; } - - /* special non-local ..@label and labels like ..start */ -\.\.[a-z0-9_$#@~.?]+ { - yylval.str_val = xstrdup(yytext); - return SPECIAL_ID; -} - - /* local label (.label) */ -\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { - if (!nasm_parser_locallabel_base) { - Warning(_("no non-local label before `%s'"), yytext); - yylval.str_val = xstrdup(yytext); - } else { - yylval.str_val = xmalloc(strlen(yytext) + - strlen(nasm_parser_locallabel_base) + 1); - strcpy(yylval.str_val, nasm_parser_locallabel_base); - strcat(yylval.str_val, yytext); - } - - return LOCAL_ID; -} - - /* instructions */ - /* @INSTRUCTIONS@ */ - - /* label */ -[a-z_?][a-z0-9_$#@~.?]* { - yylval.str_val = xstrdup(yytext); - return ID; -} - -;.* ; - -{WS}+ ; - -\n return '\n'; - -. { - if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) - Warning(_("ignoring unrecognized character `%s'"), - conv_unprint(yytext[0])); -} - diff --git a/src/arch.c b/src/arch.c index 5a997421..780fb26e 100644 --- a/src/arch.c +++ b/src/arch.c @@ -22,9 +22,146 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "globals.h" +#include "expr.h" + #include "bytecode.h" #include "arch.h" + arch *cur_arch; +insn_operand * +operand_new_reg(unsigned long reg) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_REG; + retval->data.reg = reg; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_segreg(unsigned long segreg) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_SEGREG; + retval->data.reg = segreg; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_mem(/*@only@*/ effaddr *ea) +{ + insn_operand *retval = xmalloc(sizeof(insn_operand)); + + retval->type = INSN_OPERAND_MEMORY; + retval->data.ea = ea; + retval->targetmod = 0; + retval->size = 0; + + return retval; +} + +insn_operand * +operand_new_imm(/*@only@*/ expr *val) +{ + insn_operand *retval; + const unsigned long *reg; + + reg = expr_get_reg(&val, 0); + if (reg) { + retval = operand_new_reg(*reg); + expr_delete(val); + } else { + retval = xmalloc(sizeof(insn_operand)); + retval->type = INSN_OPERAND_IMM; + retval->data.val = val; + retval->targetmod = 0; + retval->size = 0; + } + + return retval; +} + +void +operand_print(FILE *f, const insn_operand *op) +{ + switch (op->type) { + case INSN_OPERAND_REG: + fprintf(f, "%*sReg=", indent_level, ""); + cur_arch->reg_print(f, op->data.reg); + fprintf(f, "\n"); + break; + case INSN_OPERAND_SEGREG: + fprintf(f, "%*sSegReg=", indent_level, ""); + cur_arch->segreg_print(f, op->data.reg); + fprintf(f, "\n"); + break; + case INSN_OPERAND_MEMORY: + fprintf(f, "%*sMemory=\n", indent_level, ""); + indent_level++; + ea_print(f, op->data.ea); + indent_level--; + break; + case INSN_OPERAND_IMM: + fprintf(f, "%*sImm=", indent_level, ""); + expr_print(f, op->data.val); + fprintf(f, "\n"); + break; + } + fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", op->targetmod); + fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size); +} + +void +ops_delete(insn_operandhead *headp, int content) +{ + insn_operand *cur, *next; + + cur = STAILQ_FIRST(headp); + while (cur) { + next = STAILQ_NEXT(cur, link); + if (content) + switch (cur->type) { + case INSN_OPERAND_MEMORY: + ea_delete(cur->data.ea); + break; + case INSN_OPERAND_IMM: + expr_delete(cur->data.val); + break; + default: + break; + } + xfree(cur); + cur = next; + } + STAILQ_INIT(headp); +} + +/*@null@*/ insn_operand * +ops_append(insn_operandhead *headp, /*@returned@*/ /*@null@*/ insn_operand *op) +{ + if (op) { + STAILQ_INSERT_TAIL(headp, op, link); + return op; + } + return (insn_operand *)NULL; +} + +void +ops_print(FILE *f, const insn_operandhead *headp) +{ + insn_operand *cur; + + STAILQ_FOREACH (cur, headp, link) + operand_print(f, cur); +} diff --git a/src/arch.h b/src/arch.h index 2e53ae30..18e3faf6 100644 --- a/src/arch.h +++ b/src/arch.h @@ -1,7 +1,7 @@ /* $IdPath$ * Architecture header file * - * Copyright (C) 2001 Peter Johnson + * Copyright (C) 2002 Peter Johnson * * This file is part of YASM. * @@ -22,6 +22,35 @@ #ifndef YASM_ARCH_H #define YASM_ARCH_H +typedef enum arch_check_id_retval { + ARCH_CHECK_ID_NONE = 0, /* just a normal identifier */ + ARCH_CHECK_ID_INSN, /* an instruction */ + ARCH_CHECK_ID_PREFIX, /* an instruction prefix */ + ARCH_CHECK_ID_REG, /* a register */ + ARCH_CHECK_ID_SEGREG, /* a segment register (for memory overrides) */ + ARCH_CHECK_ID_TARGETMOD /* an target modifier (for jumps) */ +} arch_check_id_retval; + +typedef /*@reldef@*/ STAILQ_HEAD(insn_operandhead, insn_operand) + insn_operandhead; + +typedef struct insn_operand insn_operand; + +/* Different assemblers order instruction operands differently. Also, some + * differ on how exactly various registers are specified. There's no great + * solution to this, as the parsers aren't supposed to have knowledge of the + * architectural internals, and the architecture is supposed to be parser- + * independent. To make things work, as a rather hackish solution, we give the + * architecture a little knowledge about the general "flavor" of the parser, + * and let the architecture decide what to do with it. Most architectures will + * probably not even use this, but it's required for some (x86 in particular) + * for correct behavior on all parsers. + */ +typedef enum arch_syntax_flavor { + ARCH_SYNTAX_FLAVOR_NASM = 1, /* like NASM */ + ARCH_SYNTAX_FLAVOR_GAS /* like GAS */ +} arch_syntax_flavor; + struct arch { /* one-line description of the architecture */ const char *name; @@ -29,6 +58,67 @@ struct arch { /* keyword used to select architecture */ const char *keyword; + struct { + /* All "data" below starts the parse initialized to 0. Thus, it is + * okay for a funtion to use/check previously stored data to see if + * it's been called before on the same piece of data. + */ + + /* Switches available instructions/registers/etc. based on a + * user-specified CPU identifier. Should modify behavior ONLY of + * parse functions! The bytecode and output functions should be able + * to handle any CPU. + */ + void (*switch_cpu) (const char *cpuid); + + /* Checks an generic identifier to see if it matches architecture + * specific names for instructions, registers, etc (see the + * arch_check_id_retval enum above for the various types this function + * can detect & return. Unrecognized identifiers should be returned + * as NONE so they can be treated as normal symbols. Any additional + * data beyond just the type (almost always necessary) should be + * returned into the space provided by the data parameter. + * Note: even though this is passed a data[4], only data[0] should be + * used for TARGETMOD, REG, and SEGREG return values. + */ + arch_check_id_retval (*check_identifier) (unsigned long data[4], + const char *id); + + /* Architecture-specific directive support. Returns 1 if directive was + * not recognized. Returns 0 if directive was recognized, even if it + * wasn't valid. Should modify behavior ONLY of parse functions, much + * like switch_cpu() above. + */ + int (*directive) (const char *name, valparamhead *valparams, + /*@null@*/ valparamhead *objext_valparams, + sectionhead *headp); + + /* Creates an instruction. Creates a bytecode by matching the + * instruction data and the parameters given with a valid instruction. + * If no match is found (the instruction is invalid), returns NULL. + * All zero data indicates an empty instruction should be created. + */ + /*@null@*/ bytecode * (*new_insn) (const unsigned long data[4], + int num_operands, /*@null@*/ + insn_operandhead *operands); + + /* Handle an instruction prefix by modifying bc as necessary. */ + void (*handle_prefix) (bytecode *bc, const unsigned long data[4]); + + /* Handle an segment register instruction prefix by modifying bc as + * necessary. + */ + void (*handle_seg_prefix) (bytecode *bc, unsigned long segreg); + + /* Handle memory expression segment overrides by modifying ea as + * necessary. + */ + void (*handle_seg_override) (effaddr *ea, unsigned long segreg); + + /* Convert an expression into an effective address. */ + effaddr * (*ea_new_expr) (/*@keep@*/ expr *e); + } parse; + struct { /* Maximum used bytecode type value+1. Should be set to * BYTECODE_TYPE_BASE if no additional bytecode types are defined by @@ -48,10 +138,77 @@ struct arch { const section *sect, void *d, output_expr_func output_expr); } bc; + + /* Gets the equivalent register size in bytes. Returns 0 if there is no + * suitable equivalent size. + */ + unsigned int (*get_reg_size) (unsigned long reg); + + void (*reg_print) (FILE *f, unsigned long reg); + void (*segreg_print) (FILE *f, unsigned long segreg); + + /* Deletes the arch-specific data in ea. May be NULL if no special + * deletion is required (e.g. there's no dynamically allocated pointers + * in the ea data). + */ + void (*ea_data_delete) (effaddr *ea); + + void (*ea_data_print) (FILE *f, const effaddr *ea); +}; + +struct insn_operand { + /*@reldef@*/ STAILQ_ENTRY(insn_operand) link; + + enum { + INSN_OPERAND_REG = 1, /* a register */ + INSN_OPERAND_SEGREG, /* a segment register */ + INSN_OPERAND_MEMORY, /* an effective address (memory reference) */ + INSN_OPERAND_IMM /* an immediate or jump target */ + } type; + + union { + unsigned long reg; /* arch data for reg/segreg */ + effaddr *ea; /* effective address for memory references */ + expr *val; /* value of immediate or jump target */ + } data; + + unsigned long targetmod; /* arch target modifier, 0 if none */ + + /* Specified size of the operand, in bytes. 0 if not user-specified. */ + unsigned int size; }; +/* insn_operand constructors. operand_new_imm() will look for cases of a + * single register and create an INSN_OPERAND_REG variant of insn_operand. + */ +insn_operand *operand_new_reg(unsigned long reg); +insn_operand *operand_new_segreg(unsigned long segreg); +insn_operand *operand_new_mem(/*@only@*/ effaddr *ea); +insn_operand *operand_new_imm(/*@only@*/ expr *val); + +void operand_print(FILE *f, const insn_operand *op); + +#define ops_initialize(headp) STAILQ_INIT(headp) +#define ops_first(headp) STAILQ_FIRST(headp) +#define ops_next(cur) STAILQ_NEXT(cur, link) + +/* Deletes operands linked list. Deletes content of each operand if content i + * nonzero. + */ +void ops_delete(insn_operandhead *headp, int content); + +/* Adds op to the list of operands headp. + * NOTE: Does not make a copy of op; so don't pass this function + * static or local variables, and discard the op pointer after calling + * this function. If op was actually appended (it wasn't NULL), then + * returns op, otherwise returns NULL. + */ +/*@null@*/ insn_operand *ops_append(insn_operandhead *headp, + /*@returned@*/ /*@null@*/ insn_operand *op); + +void ops_print(FILE *f, const insn_operandhead *headp); + /* Available architectures */ -#include "arch/x86/x86arch.h" extern arch x86_arch; extern arch *cur_arch; diff --git a/src/arch/x86/Makefile.inc b/src/arch/x86/Makefile.inc index 3d16e3d3..7bfef4d0 100644 --- a/src/arch/x86/Makefile.inc +++ b/src/arch/x86/Makefile.inc @@ -3,10 +3,19 @@ YASMARCHFILES += \ src/arch/x86/x86arch.c \ src/arch/x86/x86arch.h \ - src/arch/x86/x86-int.h \ src/arch/x86/x86bc.c \ - src/arch/x86/x86expr.c + src/arch/x86/x86expr.c \ + x86id.c + +x86id.c: $(srcdir)/src/arch/x86/x86id.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl + re2c$(EXEEXT) -b $(srcdir)/src/arch/x86/x86id.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@ + +BUILT_SOURCES += \ + x86id.c + +CLEANFILES += \ + x86id.c EXTRA_DIST += \ src/arch/x86/README \ - src/arch/x86/instrs.dat + src/arch/x86/x86id.re diff --git a/src/arch/x86/instrs.dat b/src/arch/x86/instrs.dat deleted file mode 100644 index 02e5ad9a..00000000 --- a/src/arch/x86/instrs.dat +++ /dev/null @@ -1,1208 +0,0 @@ -; $IdPath$ -; List of valid instruction/operand combinations -; -; Copyright (C) 2001 Peter Johnson -; -; This file is part of YASM. -; -; YASM is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. -; -; YASM is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -; -; Meanings of codes: -; $x refers to operand x -; "nil" in a field indicates the lack of that field in the instruction -; (there MUST be some text in every field in this document) -; Sizes are in bits (8,16,32 are the only valid quantities) -; -; Column definitions: -; Inst - Instruction, should be lowercase -; Operands - Single combination of valid operands -; "TO" is not counted in the operand count. -; OpSize - Fixed operand size. Can generate prefix byte. -; Opcode - One or two bytes of opcode. -; EffAddr - Effective Address (ModRM/SIB/Off). First value is the memory -; operand, second specifies what value goes into the reg/spare -; bits in the ModRM byte. -; $xr indicates operand is register, not ModRM (needs convert to RM) -; $xi indicates operand is immediate (2nd parm is size in bits) -; Imm - Immediate source operand and forced size (in bits). -; "s" after size indicates signed number -; A number instead of a $x is a hex constant value. -; -; A ':' at the beginning of the line means that the instruction following the -; ':' is a synonym for the instruction in the 2nd column. -; -; See the parser file for a list of possible operand values and their meanings. -; gen_instr.pl translates this list into lexer and parser code. -; -; Instructions are listed in the same order as that in GNU binutils -; /include/opcode/i386.h, used for the GAS assembler. See -; . -; -; TODO: -; Finish instructions (may require changing parser code). -; Doublecheck instruction encodings, allowable operands. -; Doublecheck CPU flags (especially on MMX/SSE/SSE2 opcodes). -; Doublecheck AMD and Cyrix instructions. -; Doublecheck the segreg mov instructions. -; -; Instruction Groupings (to shorten parser code). -; The $0.1, $0.2, and $0.3 will get replaced with the parameters given for -; the instruction using the group during lexing & parsing. These parameters -; may be in the opcode, opsize, effaddr, or immediate. -; When opsize is a parameter, its usage in instructions that use the group -; looks slightly different than normal, because the parameters are -; specified in hexidecimal while the normal opsize usage is in decimal. -; Thus 10 and 20 are used instead of 16 and 32 respectively. -; The first CPU grouping for the instruction is OR'ed with the CPU value in -; the group CPU fields with @0 in their list. This allows one grouping to -; be used for instructions with different CPU values. -; Restrictions on groupings: -; - $0.? may not appear in the operand, the first part of the effaddr, the -; second part of the imm, or the CPU fields. -; - @0, @1 may only appear in the CPU field. -; Restrictions on instructions based on groupings: -; - no other operand combinations are allowed (eg, if an instruction uses a -; group, that must be the ONLY line for the instruction) -; -; Notes on code generation: -; Each group generates a lex token of the group name (sans !). Bison rules -; are generated for each of the operand combinations for the group just as -; with a regular instruction, except for the addition of the $0.? fields. -; Each $0.? field is replaced by $1.d? in the generated code (eg, -; $0.1->$1.d1, etc). -; When an instruction that uses a group is encountered, eg: -; inst!grpname parm1[,parm2[,parm3]] -; The following lex code is generated: -; inst { yylval.groupdata[0]=0xparm1; return GRPNAME; } -; (and additional yylval.groupdata[#-1]=0xparm#; if needed) -; -; KEY -; -; !Grp Operands OpSize Opcode EffAddr Imm CPU -; Inst Operands OpSize Opcode EffAddr Imm CPU -; Inst!Grp Parameters CPU @0 CPU @1 -; -; Groupings used throughout -; -; One byte opcode instructions with no operands: -!onebyte nil $0.1 $0.2 nil nil @0 -; Two byte opcode instructions with no operands: -!twobyte nil nil $0.1,$0.2 nil nil @0 -; Three byte opcode instructions with no operands: -!threebyte nil nil $0.1,$0.2,$0.3 nil nil @0 -; One byte opcode instructions with general memory operand: -!onebytemem mem nil $0.1 $1,$0.2 nil @0 -; Two byte opcode instructions with general memory operand: -!twobytemem mem nil $0.1,$0.2 $1,$0.3 nil @0 -; -; Move instructions -; -; opcode arbitrarily picked for next 3 (could be 8A/8B instead of 88/89). -mov reg8,reg8 nil 88 $1r,$2 nil 8086 -mov reg16,reg16 16 89 $1r,$2 nil 8086 -mov reg32,reg32 32 89 $1r,$2 nil 386 -mov mem,reg8 nil 88 $1,$2 nil 8086 -mov mem8x,reg8 nil 88 $1,$2 nil 8086 -mov mem,reg16 16 89 $1,$2 nil 8086 -mov mem16x,reg16 16 89 $1,$2 nil 8086 -mov mem,reg32 32 89 $1,$2 nil 386 -mov mem32x,reg32 32 89 $1,$2 nil 386 -mov reg8,mem8 nil 8A $2,$1 nil 8086 -mov reg16,mem16 16 8B $2,$1 nil 8086 -mov reg32,mem32 32 8B $2,$1 nil 386 -mov mem,segreg nil 8C $1,$2 nil 8086 -mov reg16,segreg 16 8C $1r,$2 nil 8086 -mov mem16x,segreg 16 8C $1,$2 nil 8086 -mov reg32,segreg 32 8C $1r,$2 nil 386 -mov mem32x,segreg 32 8C $1,$2 nil 386 -mov segreg,mem nil 8E $2,$1 nil 8086 -mov segreg,rm16x nil 8E $2,$1 nil 8086 -mov segreg,rm32x nil 8E $2,$1 nil 386 -;mov reg_al,memoff8 -;mov reg_ax,memoff16 -;mov reg_eax,memoff32 -;mov memoff8,reg_al -;mov memoff16,reg_ax -;mov memoff32,reg_eax -mov reg8,imm8 nil B0+$1 nil $2,8 8086 -mov reg16,imm16 16 B8+$1 nil $2,16 8086 -mov reg32,imm32 32 B8+$1 nil $2,32 386 -mov mem8x,imm8 nil C6 $1,0 $2,8 8086 -mov mem,imm8x nil C6 $1,0 $2,8 8086 -mov mem16x,imm16 16 C7 $1,0 $2,16 8086 -mov mem,imm16x 16 C7 $1,0 $2,16 8086 -mov mem32x,imm32 32 C7 $1,0 $2,32 8086 -mov mem,imm32x 32 C7 $1,0 $2,32 8086 -mov CRREG_NOTCR4,reg32 nil 0F,22 $2r,$1 nil 386,PRIV -mov CR4,reg32 nil 0F,22 $2r,$1 nil P5,PRIV -mov reg32,CRREG_NOTCR4 nil 0F,20 $1r,$2 nil 386,PRIV -mov reg32,CR4 nil 0F,20 $1r,$2 nil P5,PRIV -mov reg32,DRREG nil 0F,21 $1r,$2 nil 386,PRIV -mov DRREG,reg32 nil 0F,23 $2r,$1 nil 386,PRIV -; -; Move with sign/zero extend -; -!movszx reg16,rm8 16 0F,$0.1 $2,$1 nil 386 -!movszx reg32,rm8x 32 0F,$0.1 $2,$1 nil 386 -!movszx reg32,rm16x nil 0F,$0.1+1 $2,$1 nil 386 -movsx!movszx BE -movzx!movszx B6 -; -; Push instructions -; -push mem16x 16 FF $1,6 nil 8086 -push mem32x 32 FF $1,6 nil 386 -push reg16 16 50+$1 nil nil 8086 -push reg32 32 50+$1 nil nil 386 -push imm8x nil 6A nil $1,8 8086 -push imm16x 16 68 nil $1,16 8086 -push imm32x 32 68 nil $1,32 386 -push reg_cs nil 0E nil nil 8086 -push reg_ss nil 16 nil nil 8086 -push reg_ds nil 1E nil nil 8086 -push reg_es nil 06 nil nil 8086 -push reg_fs nil 0F,A0 nil nil 386 -push reg_gs nil 0F,A8 nil nil 386 -pusha!onebyte nil,60 186 -pushad!onebyte 20,60 386 -pushaw!onebyte 10,60 186 -; -; Pop instructions -; -pop mem16x 16 8F $1,0 nil 8086 -pop mem32x 32 8F $1,0 nil 386 -pop reg16 16 58+$1 nil nil 8086 -pop reg32 32 58+$1 nil nil 386 -pop reg_ds nil 1F nil nil 8086 -pop reg_es nil 07 nil nil 8086 -pop reg_ss nil 17 nil nil 8086 -pop reg_fs nil 0F,A1 nil nil 386 -pop reg_gs nil 0F,A9 nil nil 386 -popa!onebyte nil,61 186 -popad!onebyte 20,61 386 -popaw!onebyte 10,61 186 -; -; Exchange instructions -; -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg8,reg8 nil 86 $1r,$2 nil 8086 -xchg mem,reg8 nil 86 $1,$2 nil 8086 -xchg mem8x,reg8 nil 86 $1,$2 nil 8086 -xchg reg8,mem8 nil 86 $2,$1 nil 8086 -xchg reg_ax,reg16 16 90+$2 nil nil 8086 -xchg reg16,reg_ax 16 90+$1 nil nil 8086 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg16,reg16 16 87 $1r,$2 nil 8086 -xchg mem,reg16 16 87 $1,$2 nil 8086 -xchg mem16x,reg16 16 87 $1,$2 nil 8086 -xchg reg16,mem16 16 87 $2,$1 nil 8086 -xchg reg_eax,reg32 32 90+$2 nil nil 386 -xchg reg32,reg_eax 32 90+$1 nil nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -xchg reg32,reg32 32 87 $1r,$2 nil 386 -xchg mem,reg32 32 87 $1,$2 nil 386 -xchg mem32x,reg32 32 87 $1,$2 nil 386 -xchg reg32,mem32 32 87 $2,$1 nil 386 -; -; In/out from ports -; -in reg_al,imm8 nil E4 nil $2,8 8086 -in reg_ax,imm8 16 E5 nil $2,8 8086 -in reg_eax,imm8 32 E5 nil $2,8 386 -in reg_al,reg_dx nil EC nil nil 8086 -in reg_ax,reg_dx 16 ED nil nil 8086 -in reg_eax,reg_dx 32 ED nil nil 386 -out imm8,reg_al nil E6 nil $1,8 8086 -out imm8,reg_ax 16 E7 nil $1,8 8086 -out imm8,reg_eax 32 E7 nil $1,8 386 -out reg_dx,reg_al nil EE nil nil 8086 -out reg_dx,reg_ax 16 EF nil nil 8086 -out reg_dx,reg_eax 32 EF nil nil 386 -; -; Load effective address -; -lea reg16,mem16 16 8D $2,$1 nil 8086 -lea reg32,mem32 32 8D $2,$1 nil 386 -; -; Load segment registers from memory -; -lds reg16,mem 16 C5 $2,$1 nil 8086 -lds reg32,mem 32 C5 $2,$1 nil 386 -les reg16,mem 16 C4 $2,$1 nil 8086 -les reg32,mem 32 C4 $2,$1 nil 386 -lfs reg16,mem 16 0F,B4 $2,$1 nil 386 -lfs reg32,mem 32 0F,B4 $2,$1 nil 386 -lgs reg16,mem 16 0F,B5 $2,$1 nil 386 -lgs reg32,mem 32 0F,B5 $2,$1 nil 386 -lss reg16,mem 16 0F,B2 $2,$1 nil 386 -lss reg32,mem 32 0F,B2 $2,$1 nil 386 -; -; Flags register instructions -; -clc!onebyte nil,F8 8086 -cld!onebyte nil,FC 8086 -cli!onebyte nil,FA 8086 -clts!twobyte 0F,06 286,PRIV -cmc!onebyte nil,F5 8086 -lahf!onebyte nil,9F 8086 -sahf!onebyte nil,9E 8086 -pushf!onebyte nil,9C 8086 -pushfd!onebyte 20,9C 386 -pushfw!onebyte 10,9C 8086 -popf!onebyte nil,9D 8086 -popfd!onebyte 20,9D 386 -popfw!onebyte 10,9D 8086 -stc!onebyte nil,F9 8086 -std!onebyte nil,FD 8086 -sti!onebyte nil,FB 8086 -; -; Arithmetic -; -; General arithmetic -!arith reg_al,imm8 nil $0.1+4 nil $2,8 8086 -!arith reg_ax,imm16 16 $0.1+5 nil $2,16 8086 -!arith reg_eax,imm32 32 $0.1+5 nil $2,32 386 -!arith reg8,imm8 nil 80 $1r,$0.2 $2,8 8086 -!arith mem8x,imm nil 80 $1,$0.2 $2,8 8086 -!arith mem,imm8x nil 80 $1,$0.2 $2,8 8086 -!arith reg16,imm 16 81 $1r,$0.2 $2,16 8086 -!arith mem16x,imm 16 81 $1,$0.2 $2,16 8086 -!arith reg16,imm16x 16 81 $1r,$0.2 $2,16 8086 -!arith mem,imm16x 16 81 $1,$0.2 $2,16 8086 -!arith reg32,imm 32 81 $1r,$0.2 $2,32 386 -!arith mem32x,imm 32 81 $1,$0.2 $2,32 386 -!arith reg32,imm32x 32 81 $1r,$0.2 $2,32 386 -!arith mem,imm32x 32 81 $1,$0.2 $2,32 386 -!arith reg16,imm8x 16 83 $1r,$0.2 $2,8s 8086 -!arith mem16x,imm8x 16 83 $1,$0.2 $2,8s 8086 -!arith reg32,imm8x 32 83 $1r,$0.2 $2,8s 386 -!arith mem32x,imm8x 32 83 $1,$0.2 $2,8s 386 -; opcode arbitrarily picked for next 3 (could be $0.1+2/3 instead of $0.1+0/1). -!arith reg8,reg8 nil $0.1 $1r,$2 nil 8086 -!arith reg16,reg16 16 $0.1+1 $1r,$2 nil 8086 -!arith reg32,reg32 32 $0.1+1 $1r,$2 nil 386 -!arith mem,reg8 nil $0.1 $1,$2 nil 8086 -!arith mem8x,reg8 nil $0.1 $1,$2 nil 8086 -!arith mem,reg16 16 $0.1+1 $1,$2 nil 8086 -!arith mem16x,reg16 16 $0.1+1 $1,$2 nil 8086 -!arith mem,reg32 32 $0.1+1 $1,$2 nil 386 -!arith mem32x,reg32 32 $0.1+1 $1,$2 nil 386 -!arith reg8,mem8 nil $0.1+2 $2,$1 nil 8086 -!arith reg16,mem16 16 $0.1+3 $2,$1 nil 8086 -!arith reg32,mem32 32 $0.1+3 $2,$1 nil 386 -; INC/DEC -!incdec rm8x nil FE $1,$0.1 nil 8086 -!incdec mem16x 16 FF $1,$0.1 nil 8086 -!incdec mem32x 32 FF $1,$0.1 nil 386 -!incdec reg16 16 $0.2+$1 nil nil 8086 -!incdec reg32 32 $0.2+$1 nil nil 386 -; "F6" opcodes (DIV/IDIV/MUL/NEG/NOT): -!groupf6 rm8x nil F6 $1,$0.1 nil 8086 -!groupf6 rm16x 16 F7 $1,$0.1 nil 8086 -!groupf6 rm32x 32 F7 $1,$0.1 nil 386 -add!arith 00,0 -inc!incdec 0,40 -sub!arith 28,5 -dec!incdec 1,48 -sbb!arith 18,3 -cmp!arith 38,7 -test reg_al,imm8 nil A8 nil $2,8 8086 -test reg_ax,imm16 16 A9 nil $2,16 8086 -test reg_eax,imm32 32 A9 nil $2,32 386 -test reg8,imm8 nil F6 $1r,0 $2,8 8086 -test mem8x,imm nil F6 $1,0 $2,8 8086 -test mem,imm8x nil F6 $1,0 $2,8 8086 -test reg16,imm16 16 F7 $1r,0 $2,16 8086 -test mem16x,imm 16 F7 $1,0 $2,16 8086 -test mem,imm16x 16 F7 $1,0 $2,16 8086 -test reg32,imm32 32 F7 $1r,0 $2,32 386 -test mem32x,imm 32 F7 $1,0 $2,32 386 -test mem,imm32x 32 F7 $1,0 $2,32 386 -; arbitrary encoding for next 3, picked $1r,$2 instead of $2r,$1 -test reg8,reg8 nil 84 $1r,$2 nil 8086 -test reg16,reg16 16 85 $1r,$2 nil 8086 -test reg32,reg32 32 85 $1r,$2 nil 386 -test mem,reg8 nil 84 $1,$2 nil 8086 -test mem8x,reg8 nil 84 $1,$2 nil 8086 -test mem,reg16 16 85 $1,$2 nil 8086 -test mem16x,reg16 16 85 $1,$2 nil 8086 -test mem,reg32 32 85 $1,$2 nil 386 -test mem32x,reg32 32 85 $1,$2 nil 386 -test reg8,mem8 nil 84 $2,$1 nil 8086 -test reg16,mem16 16 85 $2,$1 nil 8086 -test reg32,mem32 32 85 $2,$1 nil 386 -and!arith 20,4 -or!arith 08,1 -xor!arith 30,6 -adc!arith 10,2 -neg!groupf6 3 -not!groupf6 2 -aaa!onebyte nil,37 8086 -aas!onebyte nil,3F 8086 -daa!onebyte nil,27 8086 -das!onebyte nil,2F 8086 -aad nil nil D5,0A nil nil 8086 -aad imm8 nil D5 nil $1,8 8086 -aam nil nil D4,0A nil nil 8086 -aam imm8 nil D4 nil $1,8 8086 -; -; Conversion instructions -; -cbw!onebyte 10,98 8086 -cwde!onebyte 20,98 386 -cwd!onebyte 10,99 8086 -cdq!onebyte 20,99 386 -; -; Multiplication and division -; -mul!groupf6 4 -imul rm8x nil F6 $1,5 nil 8086 -imul rm16x 16 F7 $1,5 nil 8086 -imul rm32x 32 F7 $1,5 nil 386 -imul reg16,rm16 16 0F,AF $2,$1 nil 386 -imul reg32,rm32 32 0F,AF $2,$1 nil 386 -imul reg16,rm16,imm8x 16 6B $2,$1 $3,8s 186 -imul reg32,rm32,imm8x 32 6B $2,$1 $3,8s 386 -imul reg16,imm8x 16 6B $1r,$1 $2,8s 186 -imul reg32,imm8x 32 6B $1r,$1 $2,8s 386 -imul reg16,rm16,imm16 16 69 $2,$1 $3,16s 186 -imul reg32,rm32,imm32 32 69 $2,$1 $3,32s 386 -imul reg16,imm16 16 69 $1r,$1 $2,16s 186 -imul reg32,imm32 32 69 $1r,$1 $2,32s 386 -div!groupf6 6 -idiv!groupf6 7 -; -; Shifts -; -; Standard -!shift rm8x,ONE nil D0 $1,$0.1 nil 8086 -!shift rm8x,reg_cl nil D2 $1,$0.1 nil 8086 -!shift rm8x,imm8 nil C0 $1,$0.1 $2,8 186 -!shift rm16x,ONE 16 D1 $1,$0.1 nil 8086 -!shift rm16x,reg_cl 16 D3 $1,$0.1 nil 8086 -!shift rm16x,imm8 16 C1 $1,$0.1 $2,8 186 -!shift rm32x,ONE 32 D1 $1,$0.1 nil 386 -!shift rm32x,reg_cl 32 D3 $1,$0.1 nil 386 -!shift rm32x,imm8 32 C1 $1,$0.1 $2,8 386 -; Doubleword -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg16,reg16,imm8 16 0F,$0.1 $1r,$2 $3,8 386 -!shlrd mem,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386 -!shlrd mem16x,reg16,imm8 16 0F,$0.1 $1,$2 $3,8 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg16,reg16,reg_cl 16 0F,$0.1+1 $1r,$2 nil 386 -!shlrd mem,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386 -!shlrd mem16x,reg16,reg_cl 16 0F,$0.1+1 $1,$2 nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg32,reg32,imm8 32 0F,$0.1 $1r,$2 $3,8 386 -!shlrd mem,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386 -!shlrd mem32x,reg32,imm8 32 0F,$0.1 $1,$2 $3,8 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!shlrd reg32,reg32,reg_cl 32 0F,$0.1+1 $1r,$2 nil 386 -!shlrd mem,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386 -!shlrd mem32x,reg32,reg_cl 32 0F,$0.1+1 $1,$2 nil 386 -rol!shift 0 -ror!shift 1 -rcl!shift 2 -rcr!shift 3 -sal!shift 4 -shl!shift 4 -shr!shift 5 -sar!shift 7 -shld!shlrd A4 -shrd!shlrd AC -; -; Control transfer instructions (unconditional) -; -; Special format for relative targets: -; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU -; -!jmpcall target nil $0.1?$0.2 $0.3 8086 8086 -!jmpcall imm:imm nil $0.4 $2i,nil $1,16 8086 -!jmpcall WORD imm:imm 16 $0.4 $2i,16 $1,16 8086 -!jmpcall DWORD imm:imm 32 $0.4 $2i,32 $1,16 386 -!jmpcall memfar nil FF $1,$0.4+1 nil 8086 -!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086 -!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386 -!jmpcall mem nil FF $1,$0.4 nil 8086 -!jmpcall rm16x 16 FF $1,$0.4 nil 8086 -!jmpcall rm32x 32 FF $1,$0.4 nil 386 -call!jmpcall nil,0,E8,9A,2 -jmp!jmpcall 1,EB,E9,EA,4 -ret!onebyte nil,C3 8086 -retn nil nil C3 nil nil 8086 -retf nil nil CB nil nil 8086 -retn imm16 nil C2 nil $1,16 8086 -retf imm16 nil CA nil $1,16 8086 -enter imm16,imm8 nil C8 $1i,16 $2,8 186 -leave!onebyte nil,C9 186 -; -; Conditional jumps -; -!jcc target nil 70+$0.1 0F,80+$0.1 8086 386 -jo!jcc 0 -jno!jcc 1 -jb!jcc 2 -jc!jcc 2 -jnae!jcc 2 -jnb!jcc 3 -jnc!jcc 3 -jae!jcc 3 -je!jcc 4 -jz!jcc 4 -jne!jcc 5 -jnz!jcc 5 -jbe!jcc 6 -jna!jcc 6 -jnbe!jcc 7 -ja!jcc 7 -js!jcc 8 -jns!jcc 9 -jp!jcc A -jpe!jcc A -jnp!jcc B -jpo!jcc B -jl!jcc C -jnge!jcc C -jnl!jcc D -jge!jcc D -jle!jcc E -jng!jcc E -jnle!jcc F -jg!jcc F -jcxz target 16 E3 nil 8086 8086 -jecxz target 32 E3 nil 386 386 -; -; Loop instructions -; -!loopg target nil E0+$0.1 nil 8086 8086 -!loopg target,reg_cx 16 E0+$0.1 nil 8086 8086 -!loopg target,reg_ecx 32 E0+$0.1 nil 386 386 -loop!loopg 2 -loopz!loopg 1 -loope!loopg 1 -loopnz!loopg 0 -loopne!loopg 0 -; -; Set byte on flag instructions -; -!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386 -seto!setcc 0 -setno!setcc 1 -setb!setcc 2 -setc!setcc 2 -setnae!setcc 2 -setnb!setcc 3 -setnc!setcc 3 -setae!setcc 3 -sete!setcc 4 -setz!setcc 4 -setne!setcc 5 -setnz!setcc 5 -setbe!setcc 6 -setna!setcc 6 -setnbe!setcc 7 -seta!setcc 7 -sets!setcc 8 -setns!setcc 9 -setp!setcc A -setpe!setcc A -setnp!setcc B -setpo!setcc B -setl!setcc C -setnge!setcc C -setnl!setcc D -setge!setcc D -setle!setcc E -setng!setcc E -setnle!setcc F -setg!setcc F -; -; String instructions -; -; NOTE: cmpsd,movsd can't go to !onebyte group because of other variations -cmpsb!onebyte nil,A6 8086 -cmpsw!onebyte 10,A7 8086 -cmpsd nil 32 A7 nil nil 386 -insb!onebyte nil,6C 8086 -insw!onebyte 10,6D 8086 -insd!onebyte 20,6D 386 -outsb!onebyte nil,6E 8086 -outsw!onebyte 10,6F 8086 -outsd!onebyte 20,6F 386 -lodsb!onebyte nil,AC 8086 -lodsw!onebyte 10,AD 8086 -lodsd!onebyte 20,AD 386 -movsb!onebyte nil,A4 8086 -movsw!onebyte 10,A5 8086 -movsd nil 32 A5 nil nil 386 -scasb!onebyte nil,AE 8086 -scasw!onebyte 10,AF 8086 -scasd!onebyte 20,AF 386 -stosb!onebyte nil,AA 8086 -stosw!onebyte 10,AB 8086 -stosd!onebyte 20,AB 386 -xlat!onebyte nil,D7 8086 -xlatb!onebyte nil,D7 8086 -; -; Bit manipulation -; -; Bit tests -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!bittest reg16,reg16 16 0F,$0.1 $1r,$2 nil 386 -!bittest mem,reg16 16 0F,$0.1 $1,$2 nil 386 -!bittest mem16x,reg16 16 0F,$0.1 $1,$2 nil 386 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!bittest reg32,reg32 32 0F,$0.1 $1r,$2 nil 386 -!bittest mem,reg32 32 0F,$0.1 $1,$2 nil 386 -!bittest mem32x,reg32 32 0F,$0.1 $1,$2 nil 386 -!bittest reg16,imm8 16 0F,BA $1r,$0.2 $2,8 386 -!bittest mem16x,imm8 16 0F,BA $1,$0.2 $2,8 386 -!bittest reg32,imm8 32 0F,BA $1r,$0.2 $2,8 386 -!bittest mem32x,imm8 32 0F,BA $1,$0.2 $2,8 386 -; Bit scans -!bsfr reg16,rm16 16 0F,BC+$0.1 $2,$1 nil 386 -!bsfr reg32,rm32 32 0F,BC+$0.1 $2,$1 nil 386 -bsf!bsfr 0 -bsr!bsfr 1 -bt!bittest A3,4 -btc!bittest BB,7 -btr!bittest B3,6 -bts!bittest AB,5 -; -; Interrupts and operating system instructions -; -int imm8 nil CD nil $1,8 8086 -int3!onebyte nil,CC 8086 -int03!onebyte nil,CC 8086 -into!onebyte nil,CE 8086 -iret!onebyte nil,CF 8086 -iretw!onebyte 10,CF 8086 -iretd!onebyte 20,CF 386 -rsm!twobyte 0F,AA P5,SMM -bound reg16,mem16 16 62 $2,$1 nil 186 -bound reg32,mem32 32 62 $2,$1 nil 386 -hlt!onebyte nil,F4 8086,PRIV -nop!onebyte nil,90 8086 -; -; Protection control -; -; 286 rm16 protected mode group (LLDT/LTR/STR/VERR/VERW): -!prot286 rm16 nil 0F,00 $1,$0.1 nil 286,PROT,@0 -arpl rm16,reg16 nil 63 $1,$2 nil 286,PROT -lar reg16,rm16 16 0F,02 $2,$1 nil 286,PROT -lar reg32,rm32 32 0F,02 $2,$1 nil 386,PROT -lgdt!twobytemem 0F,01,2 286,PRIV -lidt!twobytemem 0F,01,3 286,PRIV -lldt!prot286 2 PRIV -lmsw rm16 nil 0F,01 $1,6 nil 286,PRIV -lsl reg16,rm16 16 0F,03 $2,$1 nil 286,PROT -lsl reg32,rm32 32 0F,03 $2,$1 nil 286,PROT -ltr!prot286 3 PRIV -sgdt!twobytemem 0F,01,0 286 -sidt!twobytemem 0F,01,1 286 -sldt mem1632 nil 0F,00 $1,0 nil 286 -sldt reg16 16 0F,00 $1r,0 nil 286 -sldt reg32 32 0F,00 $1r,0 nil 386 -smsw mem1632 nil 0F,01 $1,4 nil 286 -smsw reg16 16 0F,01 $1r,4 nil 286 -smsw reg32 32 0F,01 $1r,4 nil 386 -str!prot286 1 -verr!prot286 4 -verw!prot286 5 -; -; Floating point instructions -; -; Load -fld mem32x nil D9 $1,0 nil 8086,FPU -fld mem64x nil DD $1,0 nil 8086,FPU -fld mem80x nil DB $1,5 nil 8086,FPU -fld fpureg nil D9,C0+$1 nil nil 8086,FPU -fild mem16x nil DF $1,0 nil 8086,FPU -fild mem32x nil DB $1,0 nil 8086,FPU -fild mem64x nil DF $1,5 nil 8086,FPU -fbld mem80 nil DF $1,4 nil 8086,FPU -; Store -fst mem32x nil D9 $1,2 nil 8086,FPU -fst mem64x nil DD $1,2 nil 8086,FPU -fst fpureg nil DD,D0+$1 nil nil 8086,FPU -fist mem16x nil DF $1,2 nil 8086,FPU -fist mem32x nil DB $1,2 nil 8086,FPU -; Store (with pop) -fstp mem32x nil D9 $1,3 nil 8086,FPU -fstp mem64x nil DD $1,3 nil 8086,FPU -fstp mem80x nil DB $1,7 nil 8086,FPU -fstp fpureg nil DD,D8+$1 nil nil 8086,FPU -fistp mem16x nil DF $1,3 nil 8086,FPU -fistp mem32x nil DB $1,3 nil 8086,FPU -fistp mem64x nil DF $1,7 nil 8086,FPU -fbstp mem80 nil DF $1,6 nil 8086,FPU -; Exchange (with ST0) -fxch fpureg nil D9,C8+$1 nil nil 8086,FPU -fxch ST0,ST0 nil D9,C8 nil nil 8086,FPU -fxch ST0,FPUREG_NOTST0 nil D9,C8+$2 nil nil 8086,FPU -fxch FPUREG_NOTST0,ST0 nil D9,C8+$1 nil nil 8086,FPU -fxch nil nil D9,C9 nil nil 8086,FPU -; Comparisons -!fcomg mem32x nil D8 $1,$0.1 nil 8086,FPU -!fcomg mem64x nil DC $1,$0.1 nil 8086,FPU -!fcomg fpureg nil D8,$0.2+$1 nil nil 8086,FPU -!fcomg ST0,fpureg nil D8,$0.2+$2 nil nil 8086,FPU -; Extended comparisons -!fcomg2 fpureg nil $0.1,$0.2+$1 nil nil @0,FPU -!fcomg2 ST0,fpureg nil $0.1,$0.2+$2 nil nil @0,FPU -; Comparison (without pop) -fcom!fcomg 2,D0 -ficom mem16x nil DE $1,2 nil 8086,FPU -ficom mem32x nil DA $1,2 nil 8086,FPU -; Comparison (with pop) -fcomp!fcomg 3,D8 -ficomp mem16x nil DE $1,3 nil 8086,FPU -ficomp mem32x nil DA $1,3 nil 8086,FPU -fcompp!twobyte DE,D9 8086,FPU -; Unordered comparison (with pop) -fucom!fcomg2 DD,E0 286,FPU -fucomp!fcomg2 DD,E8 286,FPU -fucompp!twobyte DA,E9 286,FPU -ftst!twobyte D9,E4 8086,FPU -fxam!twobyte D9,E5 8086,FPU -; Load constants into ST0 -fld1!twobyte D9,E8 8086,FPU -fldl2t!twobyte D9,E9 8086,FPU -fldl2e!twobyte D9,EA 8086,FPU -fldpi!twobyte D9,EB 8086,FPU -fldlg2!twobyte D9,EC 8086,FPU -fldln2!twobyte D9,ED 8086,FPU -fldz!twobyte D9,EE 8086,FPU -; Arithmetic -!farith mem32x nil D8 $1,$0.1 nil 8086,FPU -!farith mem64x nil DC $1,$0.1 nil 8086,FPU -!farith fpureg nil D8,$0.2+$1 nil nil 8086,FPU -!farith ST0,ST0 nil D8,$0.2 nil nil 8086,FPU -!farith ST0,FPUREG_NOTST0 nil D8,$0.2+$2 nil nil 8086,FPU -!farith TO fpureg nil DC,$0.3+$1 nil nil 8086,FPU -!farith FPUREG_NOTST0,ST0 nil DC,$0.3+$1 nil nil 8086,FPU -!farithp fpureg nil DE,$0.1+$1 nil nil 8086,FPU -!farithp fpureg,ST0 nil DE,$0.1+$1 nil nil 8086,FPU -!fiarith mem32x nil DA $1,$0.1 nil 8086,FPU -!fiarith mem16x nil DE $1,$0.1 nil 8086,FPU -fadd!farith 0,C0,C0 -faddp!farithp C0 -fiadd!fiarith 0 -fsub!farith 4,E0,E8 -fisub!fiarith 4 -fsubp!farithp E8 -fsubr!farith 5,E8,E0 -fisubr!fiarith 5 -fsubrp!farithp E0 -; Multiply -fmul!farith 1,C8,C8 -fimul!fiarith 1 -fmulp!farithp C8 -; Divide -fdiv!farith 6,F0,F8 -fidiv!fiarith 6 -fdivp!farithp F8 -fdivr!farith 7,F8,F0 -fidivr!fiarith 7 -fdivrp!farithp F0 -; Other arithmetic -f2xm1!twobyte D9,F0 8086,FPU -fyl2x!twobyte D9,F1 8086,FPU -fptan!twobyte D9,F2 8086,FPU -fpatan!twobyte D9,F3 8086,FPU -fxtract!twobyte D9,F4 8086,FPU -fprem1!twobyte D9,F5 286,FPU -fdecstp!twobyte D9,F6 8086,FPU -fincstp!twobyte D9,F7 8086,FPU -fprem!twobyte D9,F8 8086,FPU -fyl2xp1!twobyte D9,F9 8086,FPU -fsqrt!twobyte D9,FA 8086,FPU -fsincos!twobyte D9,FB 286,FPU -frndint!twobyte D9,FC 8086,FPU -fscale!twobyte D9,FD 8086,FPU -fsin!twobyte D9,FE 286,FPU -fcos!twobyte D9,FF 286,FPU -fchs!twobyte D9,E0 8086,FPU -fabs!twobyte D9,E1 8086,FPU -; Processor control -fninit!twobyte DB,E3 8086,FPU -finit!threebyte 9B,DB,E3 8086,FPU -fldcw mem16 nil D9 $1,5 nil 8086,FPU -fnstcw mem16 nil D9 $1,7 nil 8086,FPU -fstcw mem16 nil 9B,D9 $1,7 nil 8086,FPU -fnstsw mem16 nil DD $1,7 nil 8086,FPU -fnstsw reg_ax nil DF,E0 nil nil 8086,FPU -fstsw mem16 nil 9B,DD $1,7 nil 8086,FPU -fstsw reg_ax nil 9B,DF,E0 nil nil 8086,FPU -fnclex!twobyte DB,E2 8086,FPU -fclex!threebyte 9B,DB,E2 8086,FPU -fnstenv!onebytemem D9,6 8086,FPU -fstenv!twobytemem 9B,D9,6 8086,FPU -fldenv!onebytemem D9,4 8086,FPU -fnsave!onebytemem DD,6 8086,FPU -fsave!twobytemem 9B,DD,6 8086,FPU -frstor!onebytemem DD,4 8086,FPU -ffree fpureg nil DD,C0+$1 nil nil 8086,FPU -ffreep fpureg nil DF,C0+$1 nil nil P6,FPU,UNDOC -fnop!twobyte D9,D0 8086,FPU -fwait!onebyte nil,9B 8086,FPU -; -; Prefixes (should the others be here too? should wait be a prefix?) -; -wait!onebyte nil,9B 8086 -; -; 486 extensions -; -; Compare & exchange, exchange & add -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg8,reg8 nil 0F,$0.1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg8 nil 0F,$0.1 $1,$2 nil @0 -!cmpxchgxadd mem8x,reg8 nil 0F,$0.1 $1,$2 nil @0 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg16,reg16 16 0F,$0.1+1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg16 16 0F,$0.1+1 $1,$2 nil @0 -!cmpxchgxadd mem16x,reg16 16 0F,$0.1+1 $1,$2 nil @0 -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -!cmpxchgxadd reg32,reg32 32 0F,$0.1+1 $1r,$2 nil @0 -!cmpxchgxadd mem,reg32 32 0F,$0.1+1 $1,$2 nil @0 -!cmpxchgxadd mem32x,reg32 32 0F,$0.1+1 $1,$2 nil @0 -bswap reg32 32 0F,C8+$1 nil nil 486 -xadd!cmpxchgxadd C0 486 -cmpxchg!cmpxchgxadd B0 486 -cmpxchg486!cmpxchgxadd A6 486,UNDOC -invd!twobyte 0F,08 486,PRIV -wbinvd!twobyte 0F,09 486,PRIV -invlpg!twobytemem 0F,01,7 486,PRIV -; -; 586 and late 486 extensions -; -cpuid!twobyte 0F,A2 486 -; -; Pentium extensions -; -wrmsr!twobyte 0F,30 P5,PRIV -rdtsc!twobyte 0F,31 P5 -rdmsr!twobyte 0F,32 P5,PRIV -cmpxchg8b mem64 nil 0F,C7 $1,1 nil P5 -; -; Pentium II/Pentium Pro extensions -; -sysenter!twobyte 0F,34 P6 -sysexit!twobyte 0F,35 P6,PRIV -fxsave!twobytemem 0F,AE,0 P6,FPU -fxrstor!twobytemem 0F,AE,1 P6,FPU -rdpmc!twobyte 0F,33 P6 -ud2!twobyte 0F,0B 286 -ud1!twobyte 0F,B9 286,UNDOC -; cmov -; fcmov -fcomi!fcomg2 DB,F0 P6 -fucomi!fcomg2 DB,E8 P6 -fcomip!fcomg2 DF,F0 P6 -fucomip!fcomg2 DF,E8 P6 -; -; Pentium4 extensions -; -movnti mem32,reg32 nil 0F,C3 $1,$2 nil P4 -clflush mem8 nil 0F,AE $1,7 nil KATMAI -lfence!threebyte 0F,AE,E8 KATMAI -mfence!threebyte 0F,AE,F0 KATMAI -pause!twobyte F3,90 P4 -; -; MMX/SSE2 instructions -; -; General -!mmxsse MMXREG,rm64 nil 0F,$0.1 $2,$1 nil @0,MMX -!mmxsse XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil @1 -; Shifts -!pshift MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX -!pshift XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2 -!pshift MMXREG,imm8 nil 0F,$0.2 $1r,$0.3 $2,8 P5,MMX -!pshift XMMREG,imm8 nil 66,0F,$0.2 $1r,$0.3 $2,8 P4,SSE2 -emms!twobyte 0F,77 P5,MMX -movd MMXREG,rm32 nil 0F,6E $2,$1 nil P5,MMX -movd rm32,MMXREG nil 0F,7E $1,$2 nil P5,MMX -movd XMMREG,rm32 nil 66,0F,6E $2,$1 nil P4,SSE2 -movd rm32,XMMREG nil 66,0F,7E $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movq MMXREG,MMXREG nil 0F,6F $2r,$1 nil P5,MMX -movq MMXREG,mem64 nil 0F,6F $2,$1 nil P5,MMX -movq mem64,MMXREG nil 0F,7F $1,$2 nil P5,MMX -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movq XMMREG,XMMREG nil F3,0F,7E $2r,$1 nil P4,SSE2 -movq XMMREG,mem64 nil F3,0F,7E $2,$1 nil P4,SSE2 -movq mem64,XMMREG nil 66,0F,D6 $1,$2 nil P4,SSE2 -packssdw!mmxsse 6B P5 P4,SSE2 -packsswb!mmxsse 63 P5 P4,SSE2 -packuswb!mmxsse 67 P5 P4,SSE2 -paddb!mmxsse FC P5 P4,SSE2 -paddw!mmxsse FD P5 P4,SSE2 -paddd!mmxsse FE P5 P4,SSE2 -paddq!mmxsse D4 P5 P4,SSE2 -paddsb!mmxsse EC P5 P4,SSE2 -paddsw!mmxsse ED P5 P4,SSE2 -paddusb!mmxsse DC P5 P4,SSE2 -paddusw!mmxsse DD P5 P4,SSE2 -pand!mmxsse DB P5 P4,SSE2 -pandn!mmxsse DF P5 P4,SSE2 -pcmpeqb!mmxsse 74 P5 P4,SSE2 -pcmpeqw!mmxsse 75 P5 P4,SSE2 -pcmpeqd!mmxsse 76 P5 P4,SSE2 -pcmpgtb!mmxsse 64 P5 P4,SSE2 -pcmpgtw!mmxsse 65 P5 P4,SSE2 -pcmpgtd!mmxsse 66 P5 P4,SSE2 -pmaddwd!mmxsse F5 P5 P4,SSE2 -pmulhw!mmxsse E5 P5 P4,SSE2 -pmullw!mmxsse D5 P5 P4,SSE2 -por!mmxsse EB P5 P4,SSE2 -psllw!pshift F1,71,6 -pslld!pshift F2,72,6 -psllq!pshift F3,73,6 -psraw!pshift E1,71,4 -psrad!pshift E2,72,4 -psrlw!pshift D1,71,2 -psrld!pshift D2,72,2 -psrlq!pshift D3,73,2 -psubb MMXREG,imm8 nil 0F,F8 $1r,2 $2,8 P5,MMX -psubb XMMREG,imm8 nil 66,0F,F8 $1r,2 $2,8 P4,SSE2 -psubw MMXREG,imm8 nil 0F,F9 $1r,2 $2,8 P5,MMX -psubw XMMREG,imm8 nil 66,0F,F9 $1r,2 $2,8 P4,SSE2 -psubd!mmxsse FA P5 P4,SSE2 -psubq!mmxsse FB P5 P4,SSE2 -psubsb!mmxsse E8 P5 P4,SSE2 -psubsw!mmxsse E9 P5 P4,SSE2 -psubusb!mmxsse D8 P5 P4,SSE2 -psubusw!mmxsse D9 P5 P4,SSE2 -punpckhbw!mmxsse 68 P5 P4,SSE2 -punpckhwd!mmxsse 69 P5 P4,SSE2 -punpckhdq!mmxsse 6A P5 P4,SSE2 -punpcklbw!mmxsse 60 P5 P4,SSE2 -punpcklwd!mmxsse 61 P5 P4,SSE2 -punpckldq!mmxsse 62 P5 P4,SSE2 -pxor!mmxsse EF P5 P4,SSE2 -; -; PIII (Katmai) new instructions / SIMD instructions -; -; Standard -!sseps XMMREG,rm128 nil 0F,$0.1 $2,$1 nil @0 -!ssess XMMREG,rm128 nil F3,0F,$0.1 $2,$1 nil @0 -; With immediate -!ssepsimm XMMREG,rm128,imm8 nil 0F,$0.1 $2,$1 $3,8 KATMAI,SSE -; Comparisons -!ssecmpps XMMREG,rm128 nil 0F,C2 $2,$1 $0.1,8 KATMAI,SSE -!ssecmpss XMMREG,rm128 nil F3,0F,C2 $2,$1 $0.1,8 KATMAI,SSE -addps!sseps 58 KATMAI,SSE -addss!ssess 58 KATMAI,SSE -andnps!sseps 55 KATMAI,SSE -andps!sseps 54 KATMAI,SSE -cmpeqps!ssecmpps 0 -cmpeqss!ssecmpss 0 -cmpleps!ssecmpps 2 -cmpless!ssecmpss 2 -cmpltps!ssecmpps 1 -cmpltss!ssecmpss 1 -cmpneqps!ssecmpps 4 -cmpneqss!ssecmpss 4 -cmpnleps!ssecmpps 6 -cmpnless!ssecmpss 6 -cmpnltps!ssecmpps 5 -cmpnltss!ssecmpss 5 -cmpordps!ssecmpps 7 -cmpordss!ssecmpss 7 -cmpunordps!ssecmpps 3 -cmpunordss!ssecmpss 3 -cmpps!ssepsimm C2 -cmpss XMMREG,rm128,imm8 nil F3,0F,C2 $2,$1 $3,8 KATMAI,SSE -comiss!sseps 2F KATMAI,SSE -cvtpi2ps!sseps 2A KATMAI,SSE -cvtps2pi!sseps 2D KATMAI,SSE -cvtsi2ss!ssess 2A KATMAI,SSE -cvtss2si!ssess 2D KATMAI,SSE -cvttps2pi!sseps 2C KATMAI,SSE -cvttss2si!ssess 2C KATMAI,SSE -divps!sseps 5E KATMAI,SSE -divss!ssess 5E KATMAI,SSE -ldmxcsr mem32 nil 0F,AE $1,2 nil KATMAI,SSE -maskmovq MMXREG,MMXREG nil 0F,F7 $2r,$1 nil KATMAI,MMX -maxps!sseps 5F KATMAI,SSE -maxss!ssess 5F KATMAI,SSE -minps!sseps 5D KATMAI,SSE -minss!ssess 5D KATMAI,SSE -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movaps XMMREG,XMMREG nil 0F,28 $2r,$1 nil KATMAI,SSE -movaps XMMREG,mem128 nil 0F,28 $2,$1 nil KATMAI,SSE -movaps mem128,XMMREG nil 0F,29 $1,$2 nil KATMAI,SSE -movhlps XMMREG,XMMREG nil 0F,12 $2r,$1 nil KATMAI,SSE -movhps XMMREG,mem64 nil 0F,16 $2,$1 nil KATMAI,SSE -movhps mem64,XMMREG nil 0F,17 $1,$2 nil KATMAI,SSE -movlhps XMMREG,XMMREG nil 0F,16 $2r,$1 nil KATMAI,SSE -movlps XMMREG,mem64 nil 0F,12 $2,$1 nil KATMAI,SSE -movlps mem64,XMMREG nil 0F,13 $1,$2 nil KATMAI,SSE -movmskps reg32,XMMREG nil 0F,50 $1r,$2 nil KATMAI,SSE -movntps mem128,XMMREG nil 0F,2B $1,$2 nil KATMAI,SSE -movntq mem64,MMXREG nil 0F,E7 $1,$2 nil KATMAI,MMX -movntdq mem128,XMMREG nil 66,0F,E7 $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movss XMMREG,XMMREG nil F3,0F,10 $2r,$1 nil KATMAI,SSE -movss XMMREG,mem64 nil F3,0F,10 $2,$1 nil KATMAI,SSE -movss mem64,XMMREG nil F3,0F,11 $1,$2 nil KATMAI,SSE -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movups XMMREG,XMMREG nil 0F,10 $2r,$1 nil KATMAI,SSE -movups XMMREG,mem64 nil 0F,10 $2,$1 nil KATMAI,SSE -movups mem64,XMMREG nil 0F,11 $1,$2 nil KATMAI,SSE -mulps!sseps 59 KATMAI,SSE -mulss!ssess 59 KATMAI,SSE -orps!sseps 56 KATMAI,SSE -pavgb!mmxsse E0 KATMAI P4,SSE2 -pavgw!mmxsse E3 KATMAI P4,SSE2 -pextrw reg32,MMXREG,imm8 nil 0F,C5 $1r,$2 $3,8 KATMAI,MMX -pextrw reg32,XMMREG,imm8 nil 66,0F,C5 $1r,$2 $3,8 P4,SSE2 -pinsrw MMXREG,reg32,imm8 nil 0F,C4 $2r,$1 $3,8 KATMAI,MMX -pinsrw MMXREG,rm16,imm8 nil 0F,C4 $2,$1 $3,8 KATMAI,MMX -pinsrw XMMREG,reg32,imm8 nil 66,0F,C4 $2r,$1 $3,8 P4,SSE2 -pinsrw XMMREG,rm16,imm8 nil 66,0F,C4 $2,$1 $3,8 P4,SSE2 -pmaxsw!mmxsse EE KATMAI P4,SSE2 -pmaxub!mmxsse DE KATMAI P4,SSE2 -pminsw!mmxsse EA KATMAI P4,SSE2 -pminub!mmxsse DA KATMAI P4,SSE2 -pmovmskb reg32,MMXREG nil 0F,D7 $1r,$2 nil KATMAI,SSE -pmovmskb reg32,XMMREG nil 66,0F,D7 $1r,$2 nil P4,SSE2 -pmulhuw!mmxsse E4 KATMAI P4,SSE2 -prefetchnta!twobytemem 0F,18,0 KATMAI -prefetcht0!twobytemem 0F,18,1 KATMAI -prefetcht1!twobytemem 0F,18,2 KATMAI -prefetcht2!twobytemem 0F,18,3 KATMAI -psadbw!mmxsse F6 KATMAI KATMAI,SSE -pshufw MMXREG,rm64,imm8 nil 0F,70 $2,$1 $3,8 KATMAI,MMX -rcpps!sseps 53 KATMAI,SSE -rcpss!ssess 53 KATMAI,SSE -rsqrtps!sseps 52 KATMAI,SSE -rsqrtss!ssess 52 KATMAI,SSE -sfence!threebyte 0F,AE,F8 KATMAI -shufps!ssepsimm C6 -sqrtps!sseps 51 KATMAI,SSE -sqrtss!ssess 51 KATMAI,SSE -stmxcsr mem32 nil 0F,AE $1,3 nil KATMAI,SSE -subps!sseps 5C KATMAI,SSE -subss!ssess 5C KATMAI,SSE -ucomiss!ssess 2E KATMAI,SSE -unpckhps!sseps 15 KATMAI,SSE -unpcklps!sseps 14 KATMAI,SSE -xorps!sseps 57 KATMAI,SSE -; -; SSE2 instructions -; -; Standard -!sse2pd XMMREG,rm128 nil 66,0F,$0.1 $2,$1 nil P4,SSE2 -!sse2sd XMMREG,rm128 nil F2,0F,$0.1 $2,$1 nil P4,SSE2 -; With immediate -!sse2pdimm XMMREG,rm128,imm8 nil 66,0F,$0.1 $2,$1 $3,8 P4,SSE2 -; Comparisons -!sse2cmppd XMMREG,rm128 nil 66,0F,C2 $2,$1 $0.1,8 P4,SSE2 -!sse2cmpsd XMMREG,rm128 nil F2,0F,C2 $2,$1 $0.1,8 P4,SSE2 -addpd!sse2pd 58 -addsd!sse2sd 58 -andnpd!sse2pd 55 -andpd!sse2pd 54 -cmpeqpd!sse2cmppd 0 -cmpeqsd!sse2cmpsd 0 -cmplepd!sse2cmppd 2 -cmplesd!sse2cmpsd 2 -cmpltpd!sse2cmppd 1 -cmpltsd!sse2cmpsd 1 -cmpneqpd!sse2cmppd 4 -cmpneqsd!sse2cmpsd 4 -cmpnlepd!sse2cmppd 6 -cmpnlesd!sse2cmpsd 6 -cmpnltpd!sse2cmppd 5 -cmpnltsd!sse2cmpsd 5 -cmpordpd!sse2cmppd 7 -cmpordsd!sse2cmpsd 7 -cmpunordpd!sse2cmppd 3 -cmpunordsd!sse2cmpsd 3 -cmppd!sse2pdimm C2 -cmpsd XMMREG,rm128,imm8 nil F2,0F,C2 $2,$1 $3,8 P4,SSE2 -comisd!sse2pd 2F -cvtpi2pd!sse2pd 2A -cvtsi2sd!sse2sd 2A -divpd!sse2pd 5E -divsd!sse2sd 5E -maxpd!sse2pd 5F -maxsd!sse2sd 5F -minpd!sse2pd 5D -minsd!sse2sd 5D -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movapd XMMREG,XMMREG nil 66,0F,28 $2r,$1 nil P4,SSE2 -movapd XMMREG,mem128 nil 66,0F,28 $2,$1 nil P4,SSE2 -movapd mem128,XMMREG nil 66,0F,29 $1,$2 nil P4,SSE2 -movhpd XMMREG,mem64 nil 66,0F,16 $2,$1 nil P4,SSE2 -movhpd mem64,XMMREG nil 66,0F,17 $1,$2 nil P4,SSE2 -movlpd XMMREG,mem64 nil 66,0F,12 $2,$1 nil P4,SSE2 -movlpd mem64,XMMREG nil 66,0F,13 $1,$2 nil P4,SSE2 -movmskpd reg32,XMMREG nil 66,0F,50 $1r,$2 nil P4,SSE2 -movntpd mem128,XMMREG nil 66,0F,2B $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movsd XMMREG,XMMREG nil F2,0F,10 $2r,$1 nil P4,SSE2 -movsd XMMREG,mem64 nil F2,0F,10 $2,$1 nil P4,SSE2 -movsd mem64,XMMREG nil F2,0F,11 $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movupd XMMREG,XMMREG nil 66,0F,10 $2r,$1 nil P4,SSE2 -movupd XMMREG,mem64 nil 66,0F,10 $2,$1 nil P4,SSE2 -movupd mem64,XMMREG nil 66,0F,11 $1,$2 nil P4,SSE2 -mulpd!sse2pd 59 -mulsd!sse2sd 59 -orpd!sse2pd 56 -shufpd!sse2pdimm C6 -sqrtpd!sse2pd 51 -sqrtsd!sse2sd 51 -subpd!sse2pd 5C -subsd!sse2sd 5C -ucomisd!sse2sd 2E -unpckhpd!sse2pd 15 -unpcklpd!sse2pd 14 -xorpd!sse2pd 57 -cvtdq2pd!ssess E6 P4,SSE2 -cvtpd2dq!sse2sd E6 -cvtdq2ps!sseps 5B P4,SSE2 -cvtpd2pi!sse2pd 2D -cvtpd2ps!sse2pd 5A -cvtps2pd!sseps 5A P4,SSE2 -cvtps2dq!sse2pd 5B -cvtsd2si!sse2sd 2D -cvtsd2ss!sse2sd 5A -cvtss2sd!ssess 5A P4,SSE2 -cvttpd2pi!sse2pd 2C -cvttsd2si!sse2sd 2C -cvttpd2dq!sse2pd E6 -cvttps2dq!ssess 5B P4,SSE2 -maskmovdqu XMMREG,XMMREG nil 66,0F,F7 $2r,$1 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movdqa XMMREG,XMMREG nil 66,0F,6F $2r,$1 nil P4,SSE2 -movdqa XMMREG,mem128 nil 66,0F,6F $2,$1 nil P4,SSE2 -movdqa mem128,XMMREG nil 66,0F,7F $1,$2 nil P4,SSE2 -; arbitrary encoding, picked $2r,$1 instead of $1r,$2 -movdqu XMMREG,XMMREG nil F3,0F,6F $2r,$1 nil P4,SSE2 -movdqu XMMREG,mem128 nil F3,0F,6F $2,$1 nil P4,SSE2 -movdqu mem128,XMMREG nil F3,0F,7F $1,$2 nil P4,SSE2 -movdq2q MMXREG,XMMREG nil F2,0F,D6 $2r,$1 nil P4,SSE2 -movq2dq XMMREG,MMXREG nil F3,0F,D6 $2r,$1 nil P4,SSE2 -pmuludq!mmxsse F4 P4 P4,SSE2 -pshufd!sse2pdimm 70 -pshufhw XMMREG,rm128,imm8 nil F3,0F,70 $2,$1 $3,8 P4,SSE2 -pshuflw XMMREG,rm128,imm8 nil F2,0F,70 $2,$1 $3,8 P4,SSE2 -pslldq XMMREG,imm8 nil 66,0F,73 $1r,7 $2,8 P4,SSE2 -psrldq XMMREG,imm8 nil 66,0F,73 $1r,3 $2,8 P4,SSE2 -punpckhqdq!sse2pd 6D -punpcklqdq!sse2pd 6C -; -; AMD 3DNow! instructions -; -!now3d MMXREG,rm64 nil 0F,0F $2,$1 $0.1,8 @0,3DNOW,AMD -prefetch!twobytemem 0F,0D,0 P5,3DNOW,AMD -prefetchw!twobytemem 0F,0D,1 P5,3DNOW,AMD -femms!twobyte 0F,0E P5,3DNOW,AMD -pavgusb!now3d BF P5 -pf2id!now3d 1D P5 -pf2iw!now3d 1C ATHLON -pfacc!now3d AE P5 -pfadd!now3d 9E P5 -pfcmpeq!now3d B0 P5 -pfcmpge!now3d 90 P5 -pfcmpgt!now3d A0 P5 -pfmax!now3d A4 P5 -pfmin!now3d 94 P5 -pfmul!now3d B4 P5 -pfnacc!now3d 8A ATHLON -pfpnacc!now3d 8E ATHLON -pfrcp!now3d 96 P5 -pfrcpit1!now3d A6 P5 -pfrcpit2!now3d B6 P5 -pfrsqit1!now3d A7 P5 -pfrsqrt!now3d 97 P5 -pfsub!now3d 9A P5 -pfsubr!now3d AA P5 -pi2fd!now3d 0D P5 -pi2fw!now3d 0C ATHLON -pmulhrwa!now3d B7 P5 -pswapd!now3d BB ATHLON -; -; AMD extensions -; -syscall!twobyte 0F,05 P6,AMD -sysret!twobyte 0F,07 P6,PRIV,AMD -; swapgs -; -; Cyrix MMX instructions -; -!cyrixmmx MMXREG,rm64 nil 0F,$0.1 $2,$1 nil P5,MMX,CYRIX -paddsiw!cyrixmmx 51 -paveb!cyrixmmx 50 -pdistib!cyrixmmx 54 -pmachriw MMXREG,mem64 nil 0F,5E $2,$1 nil P5,MMX,CYRIX -pmagw!cyrixmmx 52 -pmulhriw!cyrixmmx 5D -pmulhrwc!cyrixmmx 59 -pmvgezb!cyrixmmx 5C -pmvlzb!cyrixmmx 5B -pmvnzb!cyrixmmx 5A -pmvzb!cyrixmmx 58 -psubsiw!cyrixmmx 55 -; -; Cyrix extensions -; -!cyrixsmm mem80 nil 0F,$0.1 $1,0 nil 486,CYRIX,SMM -rdshr!twobyte 0F,36 P6,CYRIX,SMM -rsdc segreg,mem80 nil 0F,79 $2,$1 nil 486,CYRIX,SMM -rsldt!cyrixsmm 7B -rsts!cyrixsmm 7D -svdc mem80,segreg nil 0F,78 $1,$2 nil 486,CYRIX,SMM -svldt!cyrixsmm 7A -svts!cyrixsmm 7C -smint!twobyte 0F,38 P6,CYRIX -smintold!twobyte 0F,7E 486,CYRIX,OBS -wrshr!twobyte 0F,37 P6,CYRIX,SMM -; -; Obsolete/Undocumented Instructions -; -fsetpm!twobyte DB,E4 286,FPU,OBS -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -ibts reg16,reg16 16 0F,A7 $1r,$2 nil 386,UNDOC,OBS -ibts mem,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS -ibts mem16x,reg16 16 0F,A7 $1,$2 nil 386,UNDOC,OBS -; arbitrary encoding, picked $1r,$2 instead of $2r,$1 -ibts reg32,reg32 32 0F,A7 $1r,$2 nil 386,UNDOC,OBS -ibts mem,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS -ibts mem32x,reg32 32 0F,A7 $1,$2 nil 386,UNDOC,OBS -loadall!twobyte 0F,07 386,UNDOC -loadall286!twobyte 0F,05 286,UNDOC -;pop reg_cs nil 0F nil nil 8086,UNDOC,OBS -salc!onebyte nil,D6 8086,UNDOC -smi!onebyte nil,F1 386,UNDOC -; opcode arbitrarily picked for next 3 (could be 12/13 instead of 10/11). -umov reg8,reg8 nil 0F,10 $1r,$2 nil 386,UNDOC -umov reg16,reg16 16 0F,11 $1r,$2 nil 386,UNDOC -umov reg32,reg32 32 0F,11 $1r,$2 nil 386,UNDOC -umov mem,reg8 nil 0F,10 $1,$2 nil 386,UNDOC -umov mem8x,reg8 nil 0F,10 $1,$2 nil 386,UNDOC -umov mem,reg16 16 0F,11 $1,$2 nil 386,UNDOC -umov mem16x,reg16 16 0F,11 $1,$2 nil 386,UNDOC -umov mem,reg32 32 0F,11 $1,$2 nil 386,UNDOC -umov mem32x,reg32 32 0F,11 $1,$2 nil 386,UNDOC -umov reg8,mem8 nil 0F,12 $2,$1 nil 386,UNDOC -umov reg16,mem16 16 0F,13 $2,$1 nil 386,UNDOC -umov reg32,mem32 32 0F,13 $2,$1 nil 386,UNDOC -xbts reg16,mem16 16 0F,A6 $2,$1 nil 386,UNDOC,OBS -xbts reg32,mem32 32 0F,A6 $2,$1 nil 386,UNDOC,OBS diff --git a/src/arch/x86/x86-int.h b/src/arch/x86/x86-int.h deleted file mode 100644 index 86811b62..00000000 --- a/src/arch/x86/x86-int.h +++ /dev/null @@ -1,110 +0,0 @@ -/* $IdPath$ - * x86 internals header file - * - * Copyright (C) 2001 Peter Johnson - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef YASM_X86_INT_H -#define YASM_X86_INT_H - -typedef struct x86_effaddr_data { - unsigned char segment; /* segment override, 0 if none */ - - /* How the spare (register) bits in Mod/RM are handled: - * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) - * They're set in bytecode_new_insn(). - */ - unsigned char modrm; - unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ - unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ - - unsigned char sib; - unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ - unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, - 0xff if unknown */ -} x86_effaddr_data; - -typedef struct x86_insn { - /*@null@*/ effaddr *ea; /* effective address */ - - /*@null@*/ immval *imm; /* immediate or relative value */ - - unsigned char opcode[3]; /* opcode */ - unsigned char opcode_len; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - /* HACK, but a space-saving one: shift opcodes have an immediate - * form and a ,1 form (with no immediate). In the parser, we - * set this and opcode_len=1, but store the ,1 version in the - * second byte of the opcode array. We then choose between the - * two versions once we know the actual value of imm (because we - * don't know it in the parser module). - * - * A override to force the imm version should just leave this at - * 0. Then later code won't know the ,1 version even exists. - * TODO: Figure out how this affects CPU flags processing. - * - * Call x86_SetInsnShiftFlag() to set this flag to 1. - */ - unsigned char shift_op; - - /* HACK, similar to that for shift_op above, for optimizing instructions - * that take a sign-extended imm8 as well as imm values (eg, the arith - * instructions and a subset of the imul instructions). - */ - unsigned char signext_imm8_op; - - unsigned char mode_bits; -} x86_insn; - -typedef struct x86_jmprel { - expr *target; /* target location */ - - struct { - unsigned char opcode[3]; - unsigned char opcode_len; /* 0 = no opc for this version */ - } shortop, nearop; - - /* which opcode are we using? */ - /* The *FORCED forms are specified in the source as such */ - x86_jmprel_opcode_sel op_sel; - - unsigned char addrsize; /* 0 or =mode_bits => no override */ - unsigned char opersize; /* 0 indicates no override */ - unsigned char lockrep_pre; /* 0 indicates no prefix */ - - unsigned char mode_bits; -} x86_jmprel; - -void x86_bc_delete(bytecode *bc); -void x86_bc_print(FILE *f, const bytecode *bc); -bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect, - resolve_label_func resolve_label); -int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, - void *d, output_expr_func output_expr); - -int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, - unsigned char nosplit, unsigned char *displen, - unsigned char *modrm, unsigned char *v_modrm, - unsigned char *n_modrm, unsigned char *sib, - unsigned char *v_sib, unsigned char *n_sib); - -#endif diff --git a/src/arch/x86/x86arch.c b/src/arch/x86/x86arch.c index 755e8bed..c43feb11 100644 --- a/src/arch/x86/x86arch.c +++ b/src/arch/x86/x86arch.c @@ -1,7 +1,7 @@ /* * x86 architecture description * - * Copyright (C) 2001 Peter Johnson + * Copyright (C) 2002 Peter Johnson * * This file is part of YASM. * @@ -22,23 +22,164 @@ #include "util.h" /*@unused@*/ RCSID("$IdPath$"); +#include "globals.h" +#include "errwarn.h" +#include "intnum.h" +#include "expr.h" + #include "bytecode.h" + #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" unsigned char x86_mode_bits = 0; +int +x86_directive(const char *name, valparamhead *valparams, + /*@unused@*/ /*@null@*/ valparamhead *objext_valparams, + /*@unused@*/ sectionhead *headp) +{ + valparam *vp; + const intnum *intn; + long lval; + + if (strcasecmp(name, "bits") == 0) { + if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && + (intn = expr_get_intnum(&vp->param)) != NULL && + (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) + x86_mode_bits = (unsigned char)lval; + else + Error(_("invalid argument to [%s]"), "BITS"); + return 0; + } else + return 1; +} + +unsigned int +x86_get_reg_size(unsigned long reg) +{ + switch ((x86_expritem_reg_size)(reg & ~7)) { + case X86_REG8: + return 1; + case X86_REG16: + return 2; + case X86_REG32: + case X86_CRREG: + case X86_DRREG: + case X86_TRREG: + return 4; + case X86_MMXREG: + return 8; + case X86_XMMREG: + return 16; + case X86_FPUREG: + return 10; + default: + InternalError(_("unknown register size")); + } + return 0; +} + +void +x86_reg_print(FILE *f, unsigned long reg) +{ + static const char *name8[] = {"al","cl","dl","bl","ah","ch","dh","bh"}; + static const char *name1632[] = {"ax","cx","dx","bx","sp","bp","si","di"}; + + switch ((x86_expritem_reg_size)(reg&~7)) { + case X86_REG8: + fprintf(f, "%s", name8[reg&7]); + break; + case X86_REG16: + fprintf(f, "%s", name1632[reg&7]); + break; + case X86_REG32: + fprintf(f, "e%s", name1632[reg&7]); + break; + case X86_MMXREG: + fprintf(f, "mm%d", (int)(reg&7)); + break; + case X86_XMMREG: + fprintf(f, "xmm%d", (int)(reg&7)); + break; + case X86_CRREG: + fprintf(f, "cr%d", (int)(reg&7)); + break; + case X86_DRREG: + fprintf(f, "dr%d", (int)(reg&7)); + break; + case X86_TRREG: + fprintf(f, "tr%d", (int)(reg&7)); + break; + case X86_FPUREG: + fprintf(f, "st%d", (int)(reg&7)); + break; + default: + InternalError(_("unknown register size")); + } +} + +void +x86_segreg_print(FILE *f, unsigned long segreg) +{ + static const char *name[] = {"es","cs","ss","ds","fs","gs"}; + fprintf(f, "%s", name[segreg&7]); +} + +void +x86_handle_prefix(bytecode *bc, const unsigned long data[4]) +{ + switch((x86_parse_insn_prefix)data[0]) { + case X86_LOCKREP: + x86_bc_insn_set_lockrep_prefix(bc, (unsigned char)data[1]); + break; + case X86_ADDRSIZE: + x86_bc_insn_addrsize_override(bc, (unsigned char)data[1]); + break; + case X86_OPERSIZE: + x86_bc_insn_opersize_override(bc, (unsigned char)data[1]); + break; + } +} + +void +x86_handle_seg_prefix(bytecode *bc, unsigned long segreg) +{ + x86_ea_set_segment(x86_bc_insn_get_ea(bc), (unsigned char)(segreg>>8)); +} + +void +x86_handle_seg_override(effaddr *ea, unsigned long segreg) +{ + x86_ea_set_segment(ea, (unsigned char)(segreg>>8)); +} + /* Define arch structure -- see arch.h for details */ arch x86_arch = { "x86 (IA-32, x86-64)", "x86", + { + x86_switch_cpu, + x86_check_identifier, + x86_directive, + x86_new_insn, + x86_handle_prefix, + x86_handle_seg_prefix, + x86_handle_seg_override, + x86_ea_new_expr + }, { X86_BYTECODE_TYPE_MAX, x86_bc_delete, x86_bc_print, x86_bc_resolve, x86_bc_tobytes - } + }, + x86_get_reg_size, + x86_reg_print, + x86_segreg_print, + NULL, /* x86_ea_data_delete */ + x86_ea_data_print }; diff --git a/src/arch/x86/x86arch.h b/src/arch/x86/x86arch.h index 336201b8..c44c0ddc 100644 --- a/src/arch/x86/x86arch.h +++ b/src/arch/x86/x86arch.h @@ -28,6 +28,31 @@ typedef enum { } x86_bytecode_type; #define X86_BYTECODE_TYPE_MAX X86_BC_JMPREL+1 +/* 0-7 (low 3 bits) used for register number, stored in same data area */ +typedef enum { + X86_REG8 = 0x8, + X86_REG16 = 0x10, + X86_REG32 = 0x20, + X86_MMXREG = 0x40, + X86_XMMREG = 0x80, + X86_CRREG = 0xC0, + X86_DRREG = 0xC8, + X86_TRREG = 0xF0, + X86_FPUREG = 0xF8 +} x86_expritem_reg_size; + +typedef enum { + X86_LOCKREP = 1, + X86_ADDRSIZE, + X86_OPERSIZE +} x86_parse_insn_prefix; + +typedef enum { + X86_NEAR, + X86_SHORT, + X86_FAR +} x86_parse_targetmod; + typedef enum { JR_NONE, JR_SHORT, @@ -44,7 +69,7 @@ typedef struct x86_targetval { void x86_ea_set_segment(/*@null@*/ effaddr *ea, unsigned char segment); effaddr *x86_ea_new_reg(unsigned char reg); -effaddr *x86_ea_new_imm(immval *imm, unsigned char im_len); +effaddr *x86_ea_new_imm(/*@keep@*/expr *imm, unsigned char im_len); effaddr *x86_ea_new_expr(/*@keep@*/ expr *e); /*@observer@*/ /*@null@*/ effaddr *x86_bc_insn_get_ea(/*@null@*/ bytecode *bc); @@ -63,7 +88,7 @@ void x86_set_jmprel_opcode_sel(x86_jmprel_opcode_sel *old_sel, */ typedef struct x86_new_insn_data { /*@keep@*/ /*@null@*/ effaddr *ea; - /*@keep@*/ /*@null@*/ immval *imm; + /*@keep@*/ /*@null@*/ expr *imm; unsigned char opersize; unsigned char op_len; unsigned char op[3]; @@ -90,4 +115,116 @@ bytecode *x86_bc_new_jmprel(x86_new_jmprel_data *d); extern unsigned char x86_mode_bits; +typedef struct x86_effaddr_data { + unsigned char segment; /* segment override, 0 if none */ + + /* How the spare (register) bits in Mod/RM are handled: + * Even if valid_modrm=0, the spare bits are still valid (don't overwrite!) + * They're set in bytecode_new_insn(). + */ + unsigned char modrm; + unsigned char valid_modrm; /* 1 if Mod/RM byte currently valid, 0 if not */ + unsigned char need_modrm; /* 1 if Mod/RM byte needed, 0 if not */ + + unsigned char sib; + unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ + unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, + 0xff if unknown */ +} x86_effaddr_data; + +typedef struct x86_insn { + /*@null@*/ effaddr *ea; /* effective address */ + + /*@null@*/ immval *imm; /* immediate or relative value */ + + unsigned char opcode[3]; /* opcode */ + unsigned char opcode_len; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call x86_SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; + + /* HACK, similar to that for shift_op above, for optimizing instructions + * that take a sign-extended imm8 as well as imm values (eg, the arith + * instructions and a subset of the imul instructions). + */ + unsigned char signext_imm8_op; + + unsigned char mode_bits; +} x86_insn; + +typedef struct x86_jmprel { + expr *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; /* 0 = no opc for this version */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + x86_jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 or =mode_bits => no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + + unsigned char mode_bits; +} x86_jmprel; + +void x86_bc_delete(bytecode *bc); +void x86_bc_print(FILE *f, const bytecode *bc); +bc_resolve_flags x86_bc_resolve(bytecode *bc, int save, const section *sect, + resolve_label_func resolve_label); +int x86_bc_tobytes(bytecode *bc, unsigned char **bufp, const section *sect, + void *d, output_expr_func output_expr); + +int x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, + unsigned char nosplit, unsigned char *displen, + unsigned char *modrm, unsigned char *v_modrm, + unsigned char *n_modrm, unsigned char *sib, + unsigned char *v_sib, unsigned char *n_sib); + +void x86_switch_cpu(const char *cpuid); + +arch_check_id_retval x86_check_identifier(unsigned long data[2], + const char *id); + +int x86_directive(const char *name, valparamhead *valparams, + /*@null@*/ valparamhead *objext_valparams, + sectionhead *headp); + +/*@null@*/ bytecode *x86_new_insn(const unsigned long data[2], + int num_operands, + /*@null@*/ insn_operandhead *operands); + +void x86_handle_prefix(bytecode *bc, const unsigned long data[4]); + +void x86_handle_seg_prefix(bytecode *bc, unsigned long segreg); + +void x86_handle_seg_override(effaddr *ea, unsigned long segreg); + +unsigned int x86_get_reg_size(unsigned long reg); + +void x86_reg_print(FILE *f, unsigned long reg); + +void x86_segreg_print(FILE *f, unsigned long segreg); + +void x86_ea_data_print(FILE *f, const effaddr *ea); + #endif diff --git a/src/arch/x86/x86bc.c b/src/arch/x86/x86bc.c index 8cc4d4b4..4393a0c8 100644 --- a/src/arch/x86/x86bc.c +++ b/src/arch/x86/x86bc.c @@ -32,7 +32,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" #include "bc-int.h" @@ -54,11 +54,12 @@ x86_bc_new_insn(x86_new_insn_data *d) ead->modrm |= (d->spare << 3) & 0x38; /* plug in provided bits */ } - insn->imm = d->imm; if (d->imm) { + insn->imm = imm_new_expr(d->imm); insn->imm->len = d->im_len; insn->imm->sign = d->im_sign; - } + } else + insn->imm = NULL; insn->opcode[0] = d->op[0]; insn->opcode[1] = d->op[1]; @@ -173,12 +174,12 @@ x86_ea_new_expr(expr *e) /*@-compmempass@*/ effaddr * -x86_ea_new_imm(immval *imm, unsigned char im_len) +x86_ea_new_imm(expr *imm, unsigned char im_len) { effaddr *ea = xmalloc(sizeof(effaddr)+sizeof(x86_effaddr_data)); x86_effaddr_data *ead = ea_get_data(ea); - ea->disp = imm->val; + ea->disp = imm; ea->len = im_len; ea->nosplit = 0; ead->segment = 0; @@ -320,10 +321,8 @@ x86_bc_delete(bytecode *bc) switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_data(bc); - if (insn->ea) { - expr_delete(insn->ea->disp); - xfree(insn->ea); - } + if (insn->ea) + ea_delete(insn->ea); if (insn->imm) { expr_delete(insn->imm->val); xfree(insn->imm); @@ -336,40 +335,38 @@ x86_bc_delete(bytecode *bc) } } +void +x86_ea_data_print(FILE *f, const effaddr *ea) +{ + const x86_effaddr_data *ead = ea_get_const_data(ea); + fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "", + (unsigned int)ead->segment); + fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "", + (unsigned int)ead->modrm, (unsigned int)ead->valid_modrm, + (unsigned int)ead->need_modrm); + fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", indent_level, "", + (unsigned int)ead->sib, (unsigned int)ead->valid_sib, + (unsigned int)ead->need_sib); +} + void x86_bc_print(FILE *f, const bytecode *bc) { const x86_insn *insn; const x86_jmprel *jmprel; - x86_effaddr_data *ead; switch ((x86_bytecode_type)bc->type) { case X86_BC_INSN: insn = bc_get_const_data(bc); fprintf(f, "%*s_Instruction_\n", indent_level, ""); fprintf(f, "%*sEffective Address:", indent_level, ""); - if (!insn->ea) - fprintf(f, " (nil)\n"); - else { - indent_level++; - fprintf(f, "\n%*sDisp=", indent_level, ""); - expr_print(f, insn->ea->disp); + if (insn->ea) { fprintf(f, "\n"); - ead = ea_get_data(insn->ea); - fprintf(f, "%*sLen=%u SegmentOv=%02x NoSplit=%u\n", - indent_level, "", (unsigned int)insn->ea->len, - (unsigned int)ead->segment, - (unsigned int)insn->ea->nosplit); - fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", - indent_level, "", (unsigned int)ead->modrm, - (unsigned int)ead->valid_modrm, - (unsigned int)ead->need_modrm); - fprintf(f, "%*sSIB=%03o ValidSIB=%u NeedSIB=%u\n", - indent_level, "", (unsigned int)ead->sib, - (unsigned int)ead->valid_sib, - (unsigned int)ead->need_sib); + indent_level++; + ea_print(f, insn->ea); indent_level--; - } + } else + fprintf(f, " (nil)\n"); fprintf(f, "%*sImmediate Value:", indent_level, ""); if (!insn->imm) fprintf(f, " (nil)\n"); @@ -477,8 +474,7 @@ x86_bc_resolve_insn(x86_insn *insn, unsigned long *len, int save, x86_effaddr_data ead_t = *ead; /* structure copy */ unsigned char displen = ea->len; - if ((ea->disp) && ((!ead->valid_sib && ead->need_sib) || - (!ead->valid_modrm && ead->need_modrm))) { + if (ea->disp) { temp = expr_copy(ea->disp); assert(temp != NULL); diff --git a/src/arch/x86/x86expr.c b/src/arch/x86/x86expr.c index d041cc42..a30f14d3 100644 --- a/src/arch/x86/x86expr.c +++ b/src/arch/x86/x86expr.c @@ -33,7 +33,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" #include "expr-int.h" @@ -48,10 +48,10 @@ x86_expr_checkea_get_reg32(ExprItem *ei, /*returned*/ void *d) int *ret; /* don't allow 16-bit registers */ - if (ei->data.reg.size != 32) + if ((ei->data.reg & ~7) != X86_REG32) return 0; - ret = &data[ei->data.reg.num & 7]; /* & 7 for sanity check */ + ret = &data[ei->data.reg & 7]; /* overwrite with 0 to eliminate register from displacement expr */ ei->type = EXPR_INT; @@ -84,10 +84,11 @@ x86_expr_checkea_get_reg16(ExprItem *ei, void *d) reg16[7] = &data->di; /* don't allow 32-bit registers */ - if (ei->data.reg.size != 16) + if ((ei->data.reg & ~7) != X86_REG16) return 0; - ret = reg16[ei->data.reg.num & 7]; /* & 7 for sanity check */ + /* & 7 for sanity check */ + ret = reg16[ei->data.reg & 7]; /* only allow BX, SI, DI, BP */ if (!ret) @@ -469,7 +470,7 @@ x86_expr_checkea_getregsize_callback(ExprItem *ei, void *d) unsigned char *addrsize = (unsigned char *)d; if (ei->type == EXPR_REG) { - *addrsize = ei->data.reg.size; + *addrsize = (unsigned char)ei->data.reg & ~7; return 1; } else return 0; @@ -757,6 +758,12 @@ x86_expr_checkea(expr **ep, unsigned char *addrsize, unsigned char bits, return x86_checkea_calc_displen(ep, 2, havereg == HAVE_NONE, havereg == HAVE_BP, displen, modrm, v_modrm); + } else if (!*n_modrm && !*n_sib) { + /* Special case for MOV MemOffs opcode: displacement but no modrm. */ + if (*addrsize == 32) + *displen = 4; + else if (*addrsize == 16) + *displen = 2; } return 1; } diff --git a/src/arch/x86/x86id.re b/src/arch/x86/x86id.re new file mode 100644 index 00000000..46ab2dbb --- /dev/null +++ b/src/arch/x86/x86id.re @@ -0,0 +1,1282 @@ +/* + * x86 identifier recognition and instruction handling + * + * Copyright (C) 2002 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "bitvect.h" + +#include "globals.h" +#include "errwarn.h" +#include "intnum.h" +#include "floatnum.h" +#include "expr.h" +#include "symrec.h" + +#include "bytecode.h" + +#include "arch.h" +#include "src/arch/x86/x86arch.h" + +#include "expr-int.h" +#include "bc-int.h" + + +/* Available CPU feature flags */ +#define CPU_Any (0) /* Any old cpu will do */ +#define CPU_086 CPU_Any +#define CPU_186 (1<<0) /* i186 or better required */ +#define CPU_286 (1<<1) /* i286 or better required */ +#define CPU_386 (1<<2) /* i386 or better required */ +#define CPU_486 (1<<3) /* i486 or better required */ +#define CPU_586 (1<<4) /* i585 or better required */ +#define CPU_686 (1<<5) /* i686 or better required */ +#define CPU_P3 (1<<6) /* Pentium3 or better required */ +#define CPU_P4 (1<<7) /* Pentium4 or better required */ +#define CPU_IA64 (1<<8) /* IA-64 or better required */ +#define CPU_K6 (1<<9) /* AMD K6 or better required */ +#define CPU_Athlon (1<<10) /* AMD Athlon or better required */ +#define CPU_Hammer (1<<11) /* AMD Sledgehammer or better required */ +#define CPU_FPU (1<<12) /* FPU support required */ +#define CPU_MMX (1<<13) /* MMX support required */ +#define CPU_SSE (1<<14) /* Streaming SIMD extensions required */ +#define CPU_SSE2 (1<<15) /* Streaming SIMD extensions 2 required */ +#define CPU_3DNow (1<<16) /* 3DNow! support required */ +#define CPU_Cyrix (1<<17) /* Cyrix-specific instruction */ +#define CPU_AMD (1<<18) /* AMD-specific inst. (older than K6) */ +#define CPU_SMM (1<<19) /* System Management Mode instruction */ +#define CPU_Prot (1<<20) /* Protected mode only instruction */ +#define CPU_Undoc (1<<21) /* Undocumented instruction */ +#define CPU_Obs (1<<22) /* Obsolete instruction */ +#define CPU_Priv (1<<23) /* Priveleged instruction */ + +/* What instructions/features are enabled? Defaults to all. */ +static unsigned long cpu_enabled = ~CPU_Any; + +/* Opcode modifiers. The opcode bytes are in "reverse" order because the + * parameters are read from the arch-specific data in LSB->MSB order. + * (only for asthetic reasons in the lexer code below, no practical reason). + */ +#define MOD_Op2Add (1<<0) /* Parameter adds to opcode byte 2 */ +#define MOD_Gap0 (1<<1) /* Eats a parameter */ +#define MOD_Op1Add (1<<2) /* Parameter adds to opcode byte 1 */ +#define MOD_Gap1 (1<<3) /* Eats a parameter */ +#define MOD_Op0Add (1<<4) /* Parameter adds to opcode byte 0 */ +#define MOD_SpAdd (1<<5) /* Parameter adds to "spare" value */ +#define MOD_OpSizeR (1<<6) /* Parameter replaces opersize */ +#define MOD_Imm8 (1<<7) /* Parameter is included as immediate byte */ + +/* Operand types. These are more detailed than the "general" types for all + * architectures, as they include the size, for instance. + * Bit Breakdown (from LSB to MSB): + * - 4 bits = general type (must be exact match, except for =3): + * 0 = immediate + * 1 = any general purpose, MMX, XMM, or FPU register + * 2 = memory + * 3 = any general purpose, MMX, XMM, or FPU register OR memory + * 4 = segreg + * 5 = any CR register + * 6 = any DR register + * 7 = any TR register + * 8 = ST0 + * 9 = AL/AX/EAX (depending on size) + * A = CL/CX/ECX (depending on size) + * B = CR4 + * C = memory offset (an EA, but with no registers allowed) + * [special case for MOV opcode] + * - 3 bits = size (user-specified, or from register size): + * 0 = any size acceptable + * 1/2/3/4 = 8/16/32/64 bits (from user or reg size) + * 5/6 = 80/128 bits (from user) + * - 1 bit = size implicit or explicit ("strictness" of size matching on + * non-registers -- registers are always strictly matched): + * 0 = user size must exactly match size above. + * 1 = user size either unspecified or exactly match size above. + * + * MSBs than the above are actions: what to do with the operand if the + * instruction matches. Essentially describes what part of the output bytecode + * gets the operand. This may require conversion (e.g. a register going into + * an ea field). Naturally, only one of each of these may be contained in the + * operands of a single insn_info structure. + * - 3 bits = action: + * 0 = does nothing (operand data is discarded) + * 1 = operand data goes into ea field + * 2 = operand data goes into imm field + * 3 = operand data goes into "spare" field + * 4 = operand data is added to opcode byte 0 + */ +#define OPT_Imm 0x0 +#define OPT_Reg 0x1 +#define OPT_Mem 0x2 +#define OPT_RM 0x3 +#define OPT_SegReg 0x4 +#define OPT_CRReg 0x5 +#define OPT_DRReg 0x6 +#define OPT_TRReg 0x7 +#define OPT_ST0 0x8 +#define OPT_Areg 0x9 +#define OPT_Creg 0xA +#define OPT_CR4 0xB +#define OPT_MemOffs 0xC +#define OPT_MASK 0x000F + +#define OPS_Any (0<<4) +#define OPS_8 (1<<4) +#define OPS_16 (2<<4) +#define OPS_32 (3<<4) +#define OPS_64 (4<<4) +#define OPS_80 (5<<4) +#define OPS_128 (6<<4) +#define OPS_MASK 0x0070 +#define OPS_SHIFT 4 + +#define OPS_Relaxed (1<<7) +#define OPS_RMASK 0x0080 + +#define OPA_None (0<<8) +#define OPA_EA (1<<8) +#define OPA_Imm (2<<8) +#define OPA_Spare (3<<8) +#define OPA_Op0Add (4<<8) +#define OPA_MASK 0x0700 + +typedef struct x86_insn_info { + /* The CPU feature flags needed to execute this instruction. This is OR'ed + * with arch-specific data[2]. This combined value is compared with + * cpu_enabled to see if all bits set here are set in cpu_enabled--if so, + * the instruction is available on this CPU. + */ + unsigned long cpu; + + /* Opcode modifiers for variations of instruction. As each modifier reads + * its parameter in LSB->MSB order from the arch-specific data[1] from the + * lexer data, and the LSB of the arch-specific data[1] is reserved for the + * count of insn_info structures in the instruction grouping, there can + * only be a maximum of 3 modifiers. + */ + unsigned long modifiers; + + /* Operand Size */ + unsigned char opersize; + + /* The length of the basic opcode */ + unsigned char opcode_len; + + /* The basic 1-3 byte opcode */ + unsigned char opcode[3]; + + /* The 3-bit "spare" value (extended opcode) for the R/M byte field */ + unsigned char spare; + + /* The number of operands this form of the instruction takes */ + unsigned char num_operands; + + /* The types of each operand, see above */ + unsigned int operands[3]; +} x86_insn_info; + +/* Define lexer arch-specific data with 0-3 modifiers. */ +#define DEF_INSN_DATA(group, mod, cpu) do { \ + data[0] = (unsigned long)group##_insn; \ + data[1] = ((mod)<<8) | \ + ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \ + data[2] = cpu; \ + } while (0) + +#define RET_INSN(group, mod, cpu) do { \ + DEF_INSN_DATA(group, mod, cpu); \ + return ARCH_CHECK_ID_INSN; \ + } while (0) + +/* + * General instruction groupings + */ + +/* One byte opcode instructions with no operands */ +static const x86_insn_info onebyte_insn[] = { + { CPU_Any, MOD_Op0Add|MOD_OpSizeR, 0, 1, {0, 0, 0}, 0, 0, {0, 0, 0} } +}; + +/* Two byte opcode instructions with no operands */ +static const x86_insn_info twobyte_insn[] = { + { CPU_Any, MOD_Op1Add|MOD_Op0Add, 0, 2, {0, 0, 0}, 0, 0, {0, 0, 0} } +}; + +/* Three byte opcode instructions with no operands */ +static const x86_insn_info threebyte_insn[] = { + { CPU_Any, MOD_Op2Add|MOD_Op1Add|MOD_Op0Add, 0, 3, {0, 0, 0}, 0, 0, + {0, 0, 0} } +}; + +/* One byte opcode instructions with general memory operand */ +static const x86_insn_info onebytemem_insn[] = { + { CPU_Any, MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1, + {OPT_Mem|OPS_Any|OPA_EA, 0, 0} } +}; + +/* Two byte opcode instructions with general memory operand */ +static const x86_insn_info twobytemem_insn[] = { + { CPU_Any, MOD_Op1Add|MOD_Op0Add|MOD_SpAdd, 0, 1, {0, 0, 0}, 0, 1, + {OPT_Mem|OPS_Any|OPA_EA, 0, 0} } +}; + +/* Move instructions */ +static const x86_insn_info mov_insn[] = { + { CPU_Any, 0, 0, 1, {0xA0, 0, 0}, 0, 2, + {OPT_Areg|OPS_8|OPA_None, OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 16, 1, {0xA1, 0, 0}, 0, 2, + {OPT_Areg|OPS_16|OPA_None, OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 32, 1, {0xA1, 0, 0}, 0, 2, + {OPT_Areg|OPS_32|OPA_None, OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 0, 1, {0xA2, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_8|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_8|OPA_None, 0} }, + { CPU_Any, 0, 16, 1, {0xA3, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_16|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_16|OPA_None, 0} }, + { CPU_Any, 0, 32, 1, {0xA3, 0, 0}, 0, 2, + {OPT_MemOffs|OPS_32|OPS_Relaxed|OPA_EA, OPT_Areg|OPS_32|OPA_None, 0} }, + { CPU_Any, 0, 0, 1, {0x88, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_8|OPA_Spare, 0} }, + { CPU_Any, 0, 16, 1, {0x89, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_16|OPA_Spare, 0} }, + { CPU_386, 0, 32, 1, {0x89, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Reg|OPS_32|OPA_Spare, 0} }, + { CPU_Any, 0, 0, 1, {0x8A, 0, 0}, 0, 2, + {OPT_Reg|OPS_8|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_Any, 0, 16, 1, {0x8B, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, 0} }, + { CPU_386, 0, 32, 1, {0x8B, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, 0} }, + /* TODO: segreg here */ + { CPU_Any, 0, 0, 1, {0xB0, 0, 0}, 0, 2, + {OPT_Reg|OPS_8|OPA_Op0Add, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xB8, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Op0Add, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xB8, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Op0Add, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} }, + /* Need two sets here, one for strictness on left side, one for right. */ + { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_8|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_16|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPS_Relaxed|OPA_EA, OPT_Imm|OPS_32|OPA_Imm, 0} }, + { CPU_Any, 0, 0, 1, {0xC6, 0, 0}, 0, 2, + {OPT_RM|OPS_8|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_Any, 0, 16, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_16|OPA_EA, OPT_Imm|OPS_16|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_386, 0, 32, 1, {0xC7, 0, 0}, 0, 2, + {OPT_RM|OPS_32|OPA_EA, OPT_Imm|OPS_32|OPS_Relaxed|OPA_Imm, 0} }, + { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2, + {OPT_CR4|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x22, 0}, 0, 2, + {OPT_CRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_586|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_CR4|OPS_32|OPA_Spare, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x20, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_CRReg|OPS_32|OPA_Spare, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x23, 0}, 0, 2, + {OPT_DRReg|OPS_32|OPA_Spare, OPT_Reg|OPS_32|OPA_EA, 0} }, + { CPU_386|CPU_Priv, 0, 0, 2, {0x0F, 0x21, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_EA, OPT_DRReg|OPS_32|OPA_Spare, 0} } +}; + +/* Move with sign/zero extend */ +static const x86_insn_info movszx_insn[] = { + { CPU_386, MOD_Op1Add, 16, 2, {0x0F, 0, 0}, 0, 2, + {OPT_Reg|OPS_16|OPA_Spare, OPT_RM|OPS_8|OPS_Relaxed|OPA_EA, 0} }, + { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 0, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_8|OPA_EA, 0} }, + { CPU_386, MOD_Op1Add, 32, 2, {0x0F, 1, 0}, 0, 2, + {OPT_Reg|OPS_32|OPA_Spare, OPT_RM|OPS_16|OPA_EA, 0} } +}; + + +bytecode * +x86_new_insn(const unsigned long data[4], int num_operands, + insn_operandhead *operands) +{ + x86_new_insn_data d; + int num_info = (int)(data[1]&0xFF); + x86_insn_info *info = (x86_insn_info *)data[0]; + unsigned long mod_data = data[1] >> 8; + int found = 0; + insn_operand *op; + int i; + static const unsigned int size_lookup[] = {0, 1, 2, 4, 8, 10, 16, 0}; + + /* Just do a simple linear search through the info array for a match. + * First match wins. + */ + for (; num_info>0 && !found; num_info--, info++) { + unsigned long cpu; + unsigned int size; + int mismatch = 0; + + /* Match CPU */ + cpu = info->cpu | data[2]; + if ((cpu_enabled & cpu) != cpu) + continue; + + /* Match # of operands */ + if (num_operands != info->num_operands) + continue; + + if (!operands) { + found = 1; /* no operands -> must have a match here. */ + break; + } + + /* Match each operand type and size */ + for(i = 0, op = ops_first(operands); op && inum_operands && + !mismatch; op = ops_next(op), i++) { + /* Check operand type */ + switch (info->operands[i] & OPT_MASK) { + case OPT_Imm: + if (op->type != INSN_OPERAND_IMM) + mismatch = 1; + break; + case OPT_Reg: + if (op->type != INSN_OPERAND_REG) + mismatch = 1; + else { + size = op->data.reg & ~7; + if (size == X86_CRREG || size == X86_DRREG || + size == X86_TRREG) + mismatch = 1; + } + break; + case OPT_Mem: + if (op->type != INSN_OPERAND_MEMORY) + mismatch = 1; + break; + case OPT_RM: + if (op->type != INSN_OPERAND_REG && + op->type != INSN_OPERAND_MEMORY) + mismatch = 1; + break; + case OPT_SegReg: + if (op->type != INSN_OPERAND_SEGREG) + mismatch = 1; + break; + case OPT_CRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_CRREG) + mismatch = 1; + break; + case OPT_DRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_DRREG) + mismatch = 1; + break; + case OPT_TRReg: + if (op->type != INSN_OPERAND_REG || + (op->data.reg & ~7) != X86_TRREG) + mismatch = 1; + break; + case OPT_ST0: + if (op->type != INSN_OPERAND_REG || + op->data.reg != X86_FPUREG) + mismatch = 1; + break; + case OPT_Areg: + if (op->type != INSN_OPERAND_REG || + ((info->operands[i] & OPS_MASK) == OPS_8 && + op->data.reg != (X86_REG8 | 0)) || + ((info->operands[i] & OPS_MASK) == OPS_16 && + op->data.reg != (X86_REG16 | 0)) || + ((info->operands[i] & OPS_MASK) == OPS_32 && + op->data.reg != (X86_REG32 | 0))) + mismatch = 1; + break; + case OPT_Creg: + if (op->type != INSN_OPERAND_REG || + ((info->operands[i] & OPS_MASK) == OPS_8 && + op->data.reg != (X86_REG8 | 1)) || + ((info->operands[i] & OPS_MASK) == OPS_16 && + op->data.reg != (X86_REG16 | 1)) || + ((info->operands[i] & OPS_MASK) == OPS_32 && + op->data.reg != (X86_REG32 | 1))) + mismatch = 1; + break; + case OPT_CR4: + if (op->type != INSN_OPERAND_REG || + op->data.reg != (X86_CRREG | 4)) + mismatch = 1; + break; + case OPT_MemOffs: + if (op->type != INSN_OPERAND_MEMORY || + expr_contains(ea_get_disp(op->data.ea), EXPR_REG)) + mismatch = 1; + break; + default: + InternalError(_("invalid operand type")); + } + + if (mismatch) + break; + + /* Check operand size */ + size = size_lookup[(info->operands[i] & OPS_MASK)>>OPS_SHIFT]; + if (op->type == INSN_OPERAND_REG && op->size == 0) { + /* Register size must exactly match */ + if (x86_get_reg_size(op->data.reg) != size) + mismatch = 1; + } else { + if ((info->operands[i] & OPS_RMASK) == OPS_Relaxed) { + /* Relaxed checking */ + if (size != 0 && op->size != size && op->size != 0) + mismatch = 1; + } else { + /* Strict checking */ + if (op->size != size) + mismatch = 1; + } + } + } + + if (!mismatch) { + found = 1; + break; + } + } + + if (!found) { + /* Didn't find a matching one */ + /* FIXME: This needs to be more descriptive of certain reasons for a + * mismatch. E.g.: + * "mismatch in operand sizes" + * "operand size not specified" + * etc. This will probably require adding dummy error catchers in the + * insn list which are only looked at if we get here. + */ + Error(_("invalid combination of opcode and operands")); + return NULL; + } + + /* Copy what we can from info */ + d.ea = NULL; + d.imm = NULL; + d.opersize = info->opersize; + d.op_len = info->opcode_len; + d.op[0] = info->opcode[0]; + d.op[1] = info->opcode[1]; + d.op[2] = info->opcode[2]; + d.spare = info->spare; + d.im_len = 0; + d.im_sign = 0; + + /* Apply modifiers */ + if (info->modifiers & MOD_Op2Add) { + d.op[2] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Gap0) + mod_data >>= 8; + if (info->modifiers & MOD_Op1Add) { + d.op[1] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Gap1) + mod_data >>= 8; + if (info->modifiers & MOD_Op0Add) { + d.op[0] += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_SpAdd) { + d.spare += (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_OpSizeR) { + d.opersize = (unsigned char)(mod_data & 0xFF); + mod_data >>= 8; + } + if (info->modifiers & MOD_Imm8) { + d.imm = expr_new_ident(ExprInt(intnum_new_int(mod_data & 0xFF))); + d.im_len = 1; + /*mod_data >>= 8;*/ + } + + /* Go through operands and assign */ + if (operands) { + for(i = 0, op = ops_first(operands); op && inum_operands; + op = ops_next(op), i++) { + switch (info->operands[i] & OPA_MASK) { + case OPA_None: + /* Throw away the operand contents */ + switch (op->type) { + case INSN_OPERAND_REG: + case INSN_OPERAND_SEGREG: + break; + case INSN_OPERAND_MEMORY: + ea_delete(op->data.ea); + break; + case INSN_OPERAND_IMM: + expr_delete(op->data.val); + break; + } + break; + case OPA_EA: + switch (op->type) { + case INSN_OPERAND_REG: + d.ea = x86_ea_new_reg((unsigned char)op->data.reg); + break; + case INSN_OPERAND_SEGREG: + InternalError(_("invalid operand conversion")); + case INSN_OPERAND_MEMORY: + d.ea = op->data.ea; + if ((info->operands[i] & OPT_MASK) == OPT_MemOffs) { + /* Special-case for MOV MemOffs instruction */ + x86_effaddr_data *ead = ea_get_data(d.ea); + ead->valid_modrm = 0; + ead->need_modrm = 0; + ead->valid_sib = 0; + ead->need_sib = 0; + } + break; + case INSN_OPERAND_IMM: + d.ea = x86_ea_new_imm(op->data.val, + size_lookup[(info->operands[i] & + OPS_MASK)>>OPS_SHIFT]); + break; + } + break; + case OPA_Imm: + if (op->type == INSN_OPERAND_IMM) { + d.imm = op->data.val; + d.im_len = size_lookup[(info->operands[i] & + OPS_MASK)>>OPS_SHIFT]; + } else + InternalError(_("invalid operand conversion")); + break; + case OPA_Spare: + if (op->type == INSN_OPERAND_REG || + op->type == INSN_OPERAND_SEGREG) + d.spare = (unsigned char)(op->data.reg&7); + else + InternalError(_("invalid operand conversion")); + break; + case OPA_Op0Add: + if (op->type == INSN_OPERAND_REG) + d.op[0] += (unsigned char)(op->data.reg&7); + else + InternalError(_("invalid operand conversion")); + break; + default: + InternalError(_("unknown operand action")); + } + } + } + + /* Create the bytecode and return it */ + return x86_bc_new_insn(&d); +} + + +#define YYCTYPE char +#define YYCURSOR id +#define YYLIMIT id +#define YYMARKER marker +#define YYFILL(n) + +/*!re2c + any = [\000-\377]; + A = [aA]; + B = [bB]; + C = [cC]; + D = [dD]; + E = [eE]; + F = [fF]; + G = [gG]; + H = [hH]; + I = [iI]; + J = [jJ]; + K = [kK]; + L = [lL]; + M = [mM]; + N = [nN]; + O = [oO]; + P = [pP]; + Q = [qQ]; + R = [rR]; + S = [sS]; + T = [tT]; + U = [uU]; + V = [vV]; + W = [wW]; + X = [xX]; + Y = [yY]; + Z = [zZ]; +*/ + +void +x86_switch_cpu(const char *id) +{ + const char *marker; + + /*!re2c + /* The standard CPU names /set/ cpu_enabled. */ + "8086" { + cpu_enabled = CPU_Priv; + return; + } + ("80" | I)? "186" { + cpu_enabled = CPU_186|CPU_Priv; + return; + } + ("80" | I)? "286" { + cpu_enabled = CPU_186|CPU_286|CPU_Priv; + return; + } + ("80" | I)? "386" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + ("80" | I)? "486" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_FPU|CPU_SMM| + CPU_Prot|CPU_Priv; + return; + } + (I? "586") | (P E N T I U M) | (P "5") { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_FPU| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (I? "686") | (P "6") | (P P R O) | (P E N T I U M P R O) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_FPU|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (P "2") | (P E N T I U M "-"? ("2" | (I I))) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_FPU|CPU_MMX|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (P "3") | (P E N T I U M "-"? ("3" | (I I I))) | (K A T M A I) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SMM|CPU_Prot| + CPU_Priv; + return; + } + (P "4") | (P E N T I U M "-"? ("4" | (I V))) | (W I L L I A M E T T E) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_P4|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (I A "-"? "64") | (I T A N I U M) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_P3|CPU_P4|CPU_IA64|CPU_FPU|CPU_MMX|CPU_SSE| + CPU_SSE2|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + K "6" { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_FPU|CPU_MMX|CPU_3DNow|CPU_SMM|CPU_Prot| + CPU_Priv; + return; + } + A T H L O N { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_Athlon|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow| + CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + (S L E D G E)? (H A M M E R) { + cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686| + CPU_K6|CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE| + CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + + /* Features have "no" versions to disable them, and only set/reset the + * specific feature being changed. All other bits are left alone. + */ + F P U { cpu_enabled |= CPU_FPU; return; } + N O F P U { cpu_enabled &= ~CPU_FPU; return; } + M M X { cpu_enabled |= CPU_MMX; return; } + N O M M X { cpu_enabled &= ~CPU_MMX; return; } + S S E { cpu_enabled |= CPU_SSE; return; } + N O S S E { cpu_enabled &= ~CPU_SSE; return; } + S S E "2" { cpu_enabled |= CPU_SSE2; return; } + N O S S E "2" { cpu_enabled &= ~CPU_SSE2; return; } + "3" D N O W { cpu_enabled |= CPU_3DNow; return; } + N O "3" D N O W { cpu_enabled &= ~CPU_3DNow; return; } + C Y R I X { cpu_enabled |= CPU_Cyrix; return; } + N O C Y R I X { cpu_enabled &= ~CPU_Cyrix; return; } + A M D { cpu_enabled |= CPU_AMD; return; } + N O A M D { cpu_enabled &= ~CPU_AMD; return; } + S M M { cpu_enabled |= CPU_SMM; return; } + N O S M M { cpu_enabled &= ~CPU_SMM; return; } + P R O T { cpu_enabled |= CPU_Prot; return; } + N O P R O T { cpu_enabled &= ~CPU_Prot; return; } + U N D O C { cpu_enabled |= CPU_Undoc; return; } + N O U N D O C { cpu_enabled &= ~CPU_Undoc; return; } + O B S { cpu_enabled |= CPU_Obs; return; } + N O O B S { cpu_enabled &= ~CPU_Obs; return; } + P R I V { cpu_enabled |= CPU_Priv; return; } + N O P R I V { cpu_enabled &= ~CPU_Priv; return; } + + /* catchalls */ + [A-Za-z0-9]+ { + Warning(_("unrecognized CPU identifier `%s'"), id); + return; + } + any { + Warning(_("unrecognized CPU identifier `%s'"), id); + return; + } + */ +} + +arch_check_id_retval +x86_check_identifier(unsigned long data[4], const char *id) +{ + const char *oid = id; + const char *marker; + /*!re2c + /* target modifiers */ + N E A R { + data[0] = X86_NEAR; + return ARCH_CHECK_ID_TARGETMOD; + } + S H O R T { + data[0] = X86_SHORT; + return ARCH_CHECK_ID_TARGETMOD; + } + F A R { + data[0] = X86_FAR; + return ARCH_CHECK_ID_TARGETMOD; + } + + /* operand size overrides */ + O "16" { + data[0] = X86_OPERSIZE; + data[1] = 16; + return ARCH_CHECK_ID_PREFIX; + } + O "32" { + data[0] = X86_OPERSIZE; + data[1] = 32; + return ARCH_CHECK_ID_PREFIX; + } + /* address size overrides */ + A "16" { + data[0] = X86_ADDRSIZE; + data[1] = 16; + return ARCH_CHECK_ID_PREFIX; + } + A "32" { + data[0] = X86_ADDRSIZE; + data[1] = 32; + return ARCH_CHECK_ID_PREFIX; + } + + /* instruction prefixes */ + L O C K { + data[0] = X86_LOCKREP; + data[1] = 0xF0; + return ARCH_CHECK_ID_PREFIX; + } + R E P N E { + data[0] = X86_LOCKREP; + data[1] = 0xF2; + return ARCH_CHECK_ID_PREFIX; + } + R E P N Z { + data[0] = X86_LOCKREP; + data[1] = 0xF2; + return ARCH_CHECK_ID_PREFIX; + } + R E P { + data[0] = X86_LOCKREP; + data[1] = 0xF3; + return ARCH_CHECK_ID_PREFIX; + } + R E P E { + data[0] = X86_LOCKREP; + data[1] = 0xF4; + return ARCH_CHECK_ID_PREFIX; + } + R E P Z { + data[0] = X86_LOCKREP; + data[1] = 0xF4; + return ARCH_CHECK_ID_PREFIX; + } + + /* control, debug, and test registers */ + C R [02-4] { + data[0] = X86_CRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + D R [0-7] { + data[0] = X86_DRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + T R [0-7] { + data[0] = X86_TRREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + + /* floating point, MMX, and SSE/SSE2 registers */ + S T [0-7] { + data[0] = X86_FPUREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + M M [0-7] { + data[0] = X86_MMXREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + X M M [0-7] { + data[0] = X86_XMMREG | (oid[2]-'0'); + return ARCH_CHECK_ID_REG; + } + + /* integer registers */ + E A X { data[0] = X86_REG32 | 0; return ARCH_CHECK_ID_REG; } + E C X { data[0] = X86_REG32 | 1; return ARCH_CHECK_ID_REG; } + E D X { data[0] = X86_REG32 | 2; return ARCH_CHECK_ID_REG; } + E B X { data[0] = X86_REG32 | 3; return ARCH_CHECK_ID_REG; } + E S P { data[0] = X86_REG32 | 4; return ARCH_CHECK_ID_REG; } + E B P { data[0] = X86_REG32 | 5; return ARCH_CHECK_ID_REG; } + E S I { data[0] = X86_REG32 | 6; return ARCH_CHECK_ID_REG; } + E D I { data[0] = X86_REG32 | 7; return ARCH_CHECK_ID_REG; } + + A X { data[0] = X86_REG16 | 0; return ARCH_CHECK_ID_REG; } + C X { data[0] = X86_REG16 | 1; return ARCH_CHECK_ID_REG; } + D X { data[0] = X86_REG16 | 2; return ARCH_CHECK_ID_REG; } + B X { data[0] = X86_REG16 | 3; return ARCH_CHECK_ID_REG; } + S P { data[0] = X86_REG16 | 4; return ARCH_CHECK_ID_REG; } + B P { data[0] = X86_REG16 | 5; return ARCH_CHECK_ID_REG; } + S I { data[0] = X86_REG16 | 6; return ARCH_CHECK_ID_REG; } + D I { data[0] = X86_REG16 | 7; return ARCH_CHECK_ID_REG; } + + A L { data[0] = X86_REG8 | 0; return ARCH_CHECK_ID_REG; } + C L { data[0] = X86_REG8 | 1; return ARCH_CHECK_ID_REG; } + D L { data[0] = X86_REG8 | 2; return ARCH_CHECK_ID_REG; } + B L { data[0] = X86_REG8 | 3; return ARCH_CHECK_ID_REG; } + A H { data[0] = X86_REG8 | 4; return ARCH_CHECK_ID_REG; } + C H { data[0] = X86_REG8 | 5; return ARCH_CHECK_ID_REG; } + D H { data[0] = X86_REG8 | 6; return ARCH_CHECK_ID_REG; } + B H { data[0] = X86_REG8 | 7; return ARCH_CHECK_ID_REG; } + + /* segment registers */ + E S { data[0] = 0x2600; return ARCH_CHECK_ID_SEGREG; } + C S { data[0] = 0x2e01; return ARCH_CHECK_ID_SEGREG; } + S S { data[0] = 0x3602; return ARCH_CHECK_ID_SEGREG; } + D S { data[0] = 0x3e03; return ARCH_CHECK_ID_SEGREG; } + F S { data[0] = 0x6404; return ARCH_CHECK_ID_SEGREG; } + G S { data[0] = 0x6505; return ARCH_CHECK_ID_SEGREG; } + + /* instructions */ + + /* Move */ + M O V { RET_INSN(mov, 0, CPU_Any); } + /* Move with sign/zero extend */ + M O V S X { RET_INSN(movszx, 0xBE, CPU_386); } + M O V Z X { RET_INSN(movszx, 0xB6, CPU_386); } + /* Push instructions */ + /* P U S H */ + P U S H A { RET_INSN(onebyte, 0x0060, CPU_186); } + P U S H A D { RET_INSN(onebyte, 0x2060, CPU_386); } + P U S H A W { RET_INSN(onebyte, 0x1060, CPU_186); } + /* Pop instructions */ + /* P O P */ + P O P A { RET_INSN(onebyte, 0x0061, CPU_186); } + P O P A D { RET_INSN(onebyte, 0x2061, CPU_386); } + P O P A W { RET_INSN(onebyte, 0x1061, CPU_186); } + /* Exchange */ + /* X C H G */ + /* In/out from ports */ + /* I N */ + /* O U T */ + /* Load effective address */ + /* L E A */ + /* Load segment registers from memory */ + /* L D S */ + /* L E S */ + /* L F S */ + /* L G S */ + /* L S S */ + /* Flags register instructions */ + C L C { RET_INSN(onebyte, 0x00F8, CPU_Any); } + C L D { RET_INSN(onebyte, 0x00FC, CPU_Any); } + C L I { RET_INSN(onebyte, 0x00FA, CPU_Any); } + C L T S { RET_INSN(twobyte, 0x0F06, CPU_286|CPU_Priv); } + C M C { RET_INSN(onebyte, 0x00F5, CPU_Any); } + L A H F { RET_INSN(onebyte, 0x009F, CPU_Any); } + S A H F { RET_INSN(onebyte, 0x009E, CPU_Any); } + P U S H F { RET_INSN(onebyte, 0x009C, CPU_Any); } + P U S H F D { RET_INSN(onebyte, 0x209C, CPU_386); } + P U S H F W { RET_INSN(onebyte, 0x109C, CPU_Any); } + P O P F { RET_INSN(onebyte, 0x009D, CPU_Any); } + P O P F D { RET_INSN(onebyte, 0x209D, CPU_386); } + P O P F W { RET_INSN(onebyte, 0x109D, CPU_Any); } + S T C { RET_INSN(onebyte, 0x00F9, CPU_Any); } + S T D { RET_INSN(onebyte, 0x00FD, CPU_Any); } + S T I { RET_INSN(onebyte, 0x00FB, CPU_Any); } + /* Arithmetic */ + /* A D D */ + /* I N C */ + /* S U B */ + /* D E C */ + /* S B B */ + /* C M P */ + /* T E S T */ + /* A N D */ + /* O R */ + /* X O R */ + /* A D C */ + /* N E G */ + /* N O T */ + A A A { RET_INSN(onebyte, 0x0037, CPU_Any); } + A A S { RET_INSN(onebyte, 0x003F, CPU_Any); } + D A A { RET_INSN(onebyte, 0x0027, CPU_Any); } + D A S { RET_INSN(onebyte, 0x002F, CPU_Any); } + /* A A D */ + /* A A M */ + /* Conversion instructions */ + C B W { RET_INSN(onebyte, 0x1098, CPU_Any); } + C W D E { RET_INSN(onebyte, 0x2098, CPU_386); } + C W D { RET_INSN(onebyte, 0x1099, CPU_Any); } + C D Q { RET_INSN(onebyte, 0x2099, CPU_386); } + /* Multiplication and division */ + /* M U L */ + /* I M U L */ + /* D I V */ + /* I D I V */ + /* Shifts */ + /* R O L */ + /* R O R */ + /* R C L */ + /* R C R */ + /* S A L */ + /* S H L */ + /* S H R */ + /* S A R */ + /* S H L D */ + /* S H R D */ + /* Control transfer instructions (unconditional) */ + /* C A L L */ + /* J M P */ + R E T { RET_INSN(onebyte, 0x00C3, CPU_Any); } + /* R E T N */ + /* R E T F */ + /* E N T E R */ + L E A V E { RET_INSN(onebyte, 0x00C9, CPU_186); } + /* Conditional jumps */ + /* J O */ + /* J N O */ + /* J B */ + /* JC */ + /* J N A E */ + /* J N B */ + /* J N C */ + /* J A E */ + /* J E */ + /* J Z */ + /* J N E */ + /* J N Z */ + /* J B E */ + /* J N A */ + /* J N B E */ + /* J A */ + /* J S */ + /* J N S */ + /* J P */ + /* J P E */ + /* J N P */ + /* J P O */ + /* J L */ + /* J N G E */ + /* J N L */ + /* J G E */ + /* J L E */ + /* J N G */ + /* J N L E */ + /* J G */ + /* J C X Z */ + /* J E C X Z */ + /* Loop instructions */ + /* L O O P */ + /* L O O P Z */ + /* L O O P E */ + /* L O O P N Z */ + /* L O O P N E */ + /* Set byte on flag instructions */ + /* S E T O */ + /* S E T N O */ + /* S E T B */ + /* S E T C */ + /* S E T N A E */ + /* S E T N B */ + /* S E T N C */ + /* S E T A E */ + /* S E T E */ + /* S E T Z */ + /* S E T N E */ + /* S E T N Z */ + /* S E T B E */ + /* S E T N A */ + /* S E T N B E */ + /* S E T A */ + /* S E T S */ + /* S E T N S */ + /* S E T P */ + /* S E T P E */ + /* S E T N P */ + /* S E T P O */ + /* S E T L */ + /* S E T N G E */ + /* S E T N L */ + /* S E T G E */ + /* S E T L E */ + /* S E T N G */ + /* S E T N L E */ + /* S E T G */ + /* String instructions. */ + C M P S B { RET_INSN(onebyte, 0x00A6, CPU_Any); } + C M P S W { RET_INSN(onebyte, 0x10A7, CPU_Any); } + /* C M P S D */ + I N S B { RET_INSN(onebyte, 0x006C, CPU_Any); } + I N S W { RET_INSN(onebyte, 0x106D, CPU_Any); } + I N S D { RET_INSN(onebyte, 0x206D, CPU_386); } + O U T S B { RET_INSN(onebyte, 0x006E, CPU_Any); } + O U T S W { RET_INSN(onebyte, 0x106F, CPU_Any); } + O U T S D { RET_INSN(onebyte, 0x206F, CPU_386); } + L O D S B { RET_INSN(onebyte, 0x00AC, CPU_Any); } + L O D S W { RET_INSN(onebyte, 0x10AD, CPU_Any); } + L O D S D { RET_INSN(onebyte, 0x20AD, CPU_386); } + M O V S B { RET_INSN(onebyte, 0x00A4, CPU_Any); } + M O V S W { RET_INSN(onebyte, 0x10A5, CPU_Any); } + /* M O V S D */ + S C A S B { RET_INSN(onebyte, 0x00AE, CPU_Any); } + S C A S W { RET_INSN(onebyte, 0x10AF, CPU_Any); } + S C A S D { RET_INSN(onebyte, 0x20AF, CPU_386); } + S T O S B { RET_INSN(onebyte, 0x00AA, CPU_Any); } + S T O S W { RET_INSN(onebyte, 0x10AB, CPU_Any); } + S T O S D { RET_INSN(onebyte, 0x20AB, CPU_386); } + X L A T B? { RET_INSN(onebyte, 0x00D7, CPU_Any); } + /* Bit manipulation */ + /* B S F */ + /* B S R */ + /* B T */ + /* B T C */ + /* B T R */ + /* B T S */ + /* Interrupts and operating system instructions */ + /* I N T */ + I N T "3" { RET_INSN(onebyte, 0x00CC, CPU_Any); } + I N T "03" { RET_INSN(onebyte, 0x00CC, CPU_Any); } + I N T O { RET_INSN(onebyte, 0x00CE, CPU_Any); } + I R E T { RET_INSN(onebyte, 0x00CF, CPU_Any); } + I R E T W { RET_INSN(onebyte, 0x10CF, CPU_Any); } + I R E T D { RET_INSN(onebyte, 0x20CF, CPU_386); } + R S M { RET_INSN(twobyte, 0x0FAA, CPU_586|CPU_SMM); } + /* B O U N D */ + H L T { RET_INSN(onebyte, 0x00F4, CPU_Priv); } + N O P { RET_INSN(onebyte, 0x0090, CPU_Any); } + /* Protection control */ + /* A R P L */ + /* L A R */ + L G D T { RET_INSN(twobytemem, 0x020F01, CPU_286|CPU_Priv); } + L I D T { RET_INSN(twobytemem, 0x030F01, CPU_286|CPU_Priv); } + /* L L D T */ + /* L M S W */ + /* L S L */ + /* L T R */ + S G D T { RET_INSN(twobytemem, 0x000F01, CPU_286|CPU_Priv); } + S I D T { RET_INSN(twobytemem, 0x010F01, CPU_286|CPU_Priv); } + /* S L D T */ + /* S M S W */ + /* S T R */ + /* V E R R */ + /* V E R W */ + /* Floating point instructions */ + /* F L D */ + /* F I L D */ + /* F B L D */ + /* F S T */ + /* F I S T */ + /* F S T P */ + /* F I S T P */ + /* F B S T P */ + /* F X C H */ + /* F C O M */ + /* F I C O M */ + /* F C O M P */ + /* F I C O M P */ + F C O M P P { RET_INSN(twobyte, 0xDED9, CPU_FPU); } + /* F U C O M */ + /* F U C O M P */ + F U C O M P P { RET_INSN(twobyte, 0xDAE9, CPU_286|CPU_FPU); } + F T S T { RET_INSN(twobyte, 0xD9E4, CPU_FPU); } + F X A M { RET_INSN(twobyte, 0xD9E5, CPU_FPU); } + F L D "1" { RET_INSN(twobyte, 0xD9E8, CPU_FPU); } + F L D L "2" T { RET_INSN(twobyte, 0xD9E9, CPU_FPU); } + F L D L "2" E { RET_INSN(twobyte, 0xD9EA, CPU_FPU); } + F L D P I { RET_INSN(twobyte, 0xD9EB, CPU_FPU); } + F L D L G "2" { RET_INSN(twobyte, 0xD9EC, CPU_FPU); } + F L D L N "2" { RET_INSN(twobyte, 0xD9ED, CPU_FPU); } + F L D Z { RET_INSN(twobyte, 0xD9EE, CPU_FPU); } + /* F A D D */ + /* F A D D P */ + /* F I A D D */ + /* F S U B */ + /* F I S U B */ + /* F S U B P */ + /* F S U B R */ + /* F I S U B R */ + /* F S U B R P */ + /* F M U L */ + /* F I M U L */ + /* F M U L P */ + /* F D I V */ + /* F I D I V */ + /* F D I V P */ + /* F D I V R */ + /* F I D I V R */ + /* F D I V R P */ + F "2" X M "1" { RET_INSN(twobyte, 0xD9F0, CPU_FPU); } + F Y L "2" X { RET_INSN(twobyte, 0xD9F1, CPU_FPU); } + F P T A N { RET_INSN(twobyte, 0xD9F2, CPU_FPU); } + F P A T A N { RET_INSN(twobyte, 0xD9F3, CPU_FPU); } + F X T R A C T { RET_INSN(twobyte, 0xD9F4, CPU_FPU); } + F P R E M "1" { RET_INSN(twobyte, 0xD9F5, CPU_286|CPU_FPU); } + F D E C S T P { RET_INSN(twobyte, 0xD9F6, CPU_FPU); } + F I N C S T P { RET_INSN(twobyte, 0xD9F7, CPU_FPU); } + F P R E M { RET_INSN(twobyte, 0xD9F8, CPU_FPU); } + F Y L "2" X P "1" { RET_INSN(twobyte, 0xD9F9, CPU_FPU); } + F S Q R T { RET_INSN(twobyte, 0xD9FA, CPU_FPU); } + F S I N C O S { RET_INSN(twobyte, 0xD9FB, CPU_286|CPU_FPU); } + F R N D I N T { RET_INSN(twobyte, 0xD9FC, CPU_FPU); } + F S C A L E { RET_INSN(twobyte, 0xD9FD, CPU_FPU); } + F S I N { RET_INSN(twobyte, 0xD9FE, CPU_286|CPU_FPU); } + F C O S { RET_INSN(twobyte, 0xD9FF, CPU_286|CPU_FPU); } + F C H S { RET_INSN(twobyte, 0xD9E0, CPU_FPU); } + F A B S { RET_INSN(twobyte, 0xD9E1, CPU_FPU); } + F N I N I T { RET_INSN(twobyte, 0xDBE3, CPU_FPU); } + F I N I T { RET_INSN(threebyte, 0x98DBE3, CPU_FPU); } + /* F L D C W */ + /* F N S T C W */ + /* F S T C W */ + /* F N S T S W */ + /* F S T S W */ + F N C L E X { RET_INSN(twobyte, 0xDBE2, CPU_FPU); } + F C L E X { RET_INSN(threebyte, 0x98DBE2, CPU_FPU); } + F N S T E N V { RET_INSN(onebytemem, 0x06D9, CPU_FPU); } + F S T E N V { RET_INSN(twobytemem, 0x069BD9, CPU_FPU); } + F L D E N V { RET_INSN(onebytemem, 0x04D9, CPU_FPU); } + F N S A V E { RET_INSN(onebytemem, 0x06DD, CPU_FPU); } + F S A V E { RET_INSN(twobytemem, 0x069BDD, CPU_FPU); } + F R S T O R { RET_INSN(onebytemem, 0x04DD, CPU_FPU); } + /* F F R E E */ + /* F F R E E P */ + F N O P { RET_INSN(twobyte, 0xD9D0, CPU_FPU); } + F W A I T { RET_INSN(onebyte, 0x009B, CPU_FPU); } + /* Prefixes (should the others be here too? should wait be a prefix? */ + W A I T { RET_INSN(onebyte, 0x009B, CPU_Any); } + /* 486 extensions */ + /* B S W A P */ + /* X A D D */ + /* C M P X C H G */ + /* C M P X C H G 4 8 6 */ + I N V D { RET_INSN(twobyte, 0x0F08, CPU_486|CPU_Priv); } + W B I N V D { RET_INSN(twobyte, 0x0F09, CPU_486|CPU_Priv); } + I N V L P G { RET_INSN(twobytemem, 0x070F01, CPU_486|CPU_Priv); } + /* 586+ and late 486 extensions */ + C P U I D { RET_INSN(twobyte, 0x0FA2, CPU_486); } + /* Pentium extensions */ + W R M S R { RET_INSN(twobyte, 0x0F30, CPU_586|CPU_Priv); } + R D T S C { RET_INSN(twobyte, 0x0F31, CPU_586); } + R D M S R { RET_INSN(twobyte, 0x0F32, CPU_586|CPU_Priv); } + /* C M P X C H G 8 B */ + /* Pentium II/Pentium Pro extensions */ + S Y S E N T E R { RET_INSN(twobyte, 0x0F34, CPU_686); } + S Y S E X I T { RET_INSN(twobyte, 0x0F35, CPU_686|CPU_Priv); } + F X S A V E { RET_INSN(twobytemem, 0x000FAE, CPU_686|CPU_FPU); } + F X R S T O R { RET_INSN(twobytemem, 0x010FAE, CPU_686|CPU_FPU); } + R D P M C { RET_INSN(twobyte, 0x0F33, CPU_686); } + U D "2" { RET_INSN(twobyte, 0x0F0B, CPU_286); } + U D "1" { RET_INSN(twobyte, 0x0FB9, CPU_286|CPU_Undoc); } + /* C M O V */ + /* F C M O V */ + /* F C O M I */ + /* F U C O M I */ + /* F C O M I P */ + /* F U C O M I P */ + /* Pentium4 extensions */ + /* M O V N T I */ + /* C L F L U S H */ + L F E N C E { RET_INSN(threebyte, 0x0FAEE8, CPU_P3); } + M F E N C E { RET_INSN(threebyte, 0x0FAEF0, CPU_P3); } + P A U S E { RET_INSN(twobyte, 0xF390, CPU_P4); } + /* MMX/SSE2 instructions */ + E M M S { RET_INSN(twobyte, 0x0F77, CPU_586|CPU_MMX); } + /* PIII (Katmai) new instructions / SIMD instructions */ + /* ... */ + P R E F E T C H N T A { RET_INSN(twobytemem, 0x000F18, CPU_P3); } + P R E F E T C H T "0" { RET_INSN(twobytemem, 0x010F18, CPU_P3); } + P R E F E T C H T "1" { RET_INSN(twobytemem, 0x020F18, CPU_P3); } + P R E F E T C H T "2" { RET_INSN(twobytemem, 0x030F18, CPU_P3); } + /* ... */ + S F E N C E { RET_INSN(threebyte, 0x0FAEF8, CPU_P3); } + /* ... */ + /* SSE2 instructions */ + /* AMD 3DNow! instructions */ + P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_586|CPU_AMD|CPU_3DNow); } + P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_586|CPU_AMD|CPU_3DNow); } + F E M M S { RET_INSN(twobyte, 0x0F0E, CPU_586|CPU_AMD|CPU_3DNow); } + /* ... */ + /* AMD extensions */ + S Y S C A L L { RET_INSN(twobyte, 0x0F05, CPU_686|CPU_AMD); } + S Y S R E T { RET_INSN(twobyte, 0x0F07, CPU_686|CPU_AMD|CPU_Priv); } + /* Cyrix MMX instructions */ + /* Cyrix extensions */ + R D S H R { RET_INSN(twobyte, 0x0F36, CPU_686|CPU_Cyrix|CPU_SMM); } + /* R S D C */ + /* R S L D T */ + /* R S T S */ + /* S V D C */ + /* S V L D T */ + /* S V T S */ + S M I N T { RET_INSN(twobyte, 0x0F38, CPU_686|CPU_Cyrix); } + S M I N T O L D { RET_INSN(twobyte, 0x0F7E, CPU_486|CPU_Cyrix|CPU_Obs); } + W R S H R { RET_INSN(twobyte, 0x0F37, CPU_686|CPU_Cyrix|CPU_SMM); } + /* Obsolete/undocumented instructions */ + F S E T P M { RET_INSN(twobyte, 0xDBE4, CPU_286|CPU_FPU|CPU_Obs); } + /* I B T S */ + L O A D A L L { RET_INSN(twobyte, 0x0F07, CPU_386|CPU_Undoc); } + L O A D A L L "286" { RET_INSN(twobyte, 0x0F05, CPU_286|CPU_Undoc); } + S A L C { RET_INSN(onebyte, 0x00D6, CPU_Undoc); } + S M I { RET_INSN(onebyte, 0x00F1, CPU_386|CPU_Undoc); } + /* U M O V */ + /* X B T S */ + + + /* catchalls */ + [A-Za-z0-9]+ { + return ARCH_CHECK_ID_NONE; + } + any { + return ARCH_CHECK_ID_NONE; + } + */ +} diff --git a/src/bytecode.c b/src/bytecode.c index 15aa5b39..6a46d8c3 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -103,6 +103,12 @@ imm_new_expr(expr *expr_ptr) return im; } +const expr * +ea_get_disp(const effaddr *ptr) +{ + return ptr->disp; +} + void ea_set_len(effaddr *ptr, unsigned char len) { @@ -125,6 +131,30 @@ ea_set_nosplit(effaddr *ptr, unsigned char nosplit) ptr->nosplit = nosplit; } +/*@-nullstate@*/ +void +ea_delete(effaddr *ea) +{ + if (cur_arch->ea_data_delete) + cur_arch->ea_data_delete(ea); + expr_delete(ea->disp); + xfree(ea); +} +/*@=nullstate@*/ + +/*@-nullstate@*/ +void +ea_print(FILE *f, const effaddr *ea) +{ + fprintf(f, "%*sDisp=", indent_level, ""); + expr_print(f, ea->disp); + fprintf(f, "\n%*sLen=%u\n", indent_level, "", (unsigned int)ea->len); + fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit); + if (cur_arch->ea_data_print) + cur_arch->ea_data_print(f, ea); +} +/*@=nullstate@*/ + void bc_set_multiple(bytecode *bc, expr *e) { @@ -258,6 +288,7 @@ bc_delete(bytecode *bc) break; case BC_OBJFMT_DATA: objfmt_data = bc_get_data(bc); + assert(cur_objfmt != NULL); if (cur_objfmt->bc_objfmt_data_delete) cur_objfmt->bc_objfmt_data_delete(objfmt_data->type, objfmt_data->data); @@ -336,6 +367,7 @@ bc_print(FILE *f, const bytecode *bc) case BC_OBJFMT_DATA: objfmt_data = bc_get_const_data(bc); fprintf(f, "%*s_ObjFmt_Data_\n", indent_level, ""); + assert(cur_objfmt != NULL); if (cur_objfmt->bc_objfmt_data_print) cur_objfmt->bc_objfmt_data_print(f, objfmt_data->type, objfmt_data->data); @@ -408,7 +440,7 @@ bc_resolve_reserve(bytecode_reserve *reserve, unsigned long *len, int save, expr_expand_labelequ(*tempp, sect, 1, resolve_label); num = expr_get_intnum(tempp); if (!num) { - if (expr_contains(temp, EXPR_FLOAT)) + if (temp && expr_contains(temp, EXPR_FLOAT)) ErrorAt(line, _("expression must not contain floating point value")); retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN; @@ -534,10 +566,10 @@ bc_resolve(bytecode *bc, int save, const section *sect, case BC_ALIGN: /* TODO */ InternalError(_("TODO: align bytecode not implemented!")); - break; + /*break;*/ case BC_OBJFMT_DATA: InternalError(_("resolving objfmt data bytecode?")); - break; + /*break;*/ default: if (bc->type < cur_arch->bc.type_max) retval = cur_arch->bc.bc_resolve(bc, save, sect, @@ -559,7 +591,7 @@ bc_resolve(bytecode *bc, int save, const section *sect, expr_expand_labelequ(*tempp, sect, 1, resolve_label); num = expr_get_intnum(tempp); if (!num) { - if (expr_contains(temp, EXPR_FLOAT)) + if (temp && expr_contains(temp, EXPR_FLOAT)) ErrorAt(bc->line, _("expression must not contain floating point value")); retval = BC_RESOLVE_ERROR | BC_RESOLVE_UNKNOWN_LEN; @@ -716,7 +748,7 @@ bc_tobytes(bytecode *bc, unsigned char *buf, unsigned long *bufsize, case BC_ALIGN: /* TODO */ InternalError(_("TODO: align bytecode not implemented!")); - break; + /*break;*/ case BC_OBJFMT_DATA: objfmt_data = bc_get_data(bc); if (output_bc_objfmt_data) diff --git a/src/bytecode.h b/src/bytecode.h index d61c6bcd..e68a7ac2 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -43,8 +43,11 @@ typedef enum { /*@only@*/ immval *imm_new_int(unsigned long int_val); /*@only@*/ immval *imm_new_expr(/*@keep@*/ expr *e); +/*@observer@*/ const expr *ea_get_disp(const effaddr *ea); void ea_set_len(effaddr *ea, unsigned char len); void ea_set_nosplit(effaddr *ea, unsigned char nosplit); +void ea_delete(/*@only@*/ effaddr *ea); +void ea_print(FILE *f, const effaddr *ea); void bc_set_multiple(bytecode *bc, /*@keep@*/ expr *e); diff --git a/src/expr-int.h b/src/expr-int.h index 1f0ed2df..a562c2ba 100644 --- a/src/expr-int.h +++ b/src/expr-int.h @@ -39,11 +39,7 @@ struct ExprItem { expr *expn; intnum *intn; floatnum *flt; - /* FIXME: reg structure is moderately x86-specific (namely size) */ - struct reg { - unsigned char num; - unsigned char size; /* in bits, eg AX=16, EAX=32 */ - } reg; + unsigned long reg; } data; }; @@ -62,6 +58,9 @@ struct expr { * * Stops early (and returns 1) if func returns 1. Otherwise returns 0. */ +int expr_traverse_leaves_in_const(const expr *e, /*@null@*/ void *d, + int (*func) (/*@null@*/ const ExprItem *ei, + /*@null@*/ void *d)); int expr_traverse_leaves_in(expr *e, /*@null@*/ void *d, int (*func) (/*@null@*/ ExprItem *ei, /*@null@*/ void *d)); @@ -88,6 +87,6 @@ void expr_order_terms(expr *e); /* Copy entire expression EXCEPT for index "except" at *top level only*. */ expr *expr_copy_except(const expr *e, int except); -int expr_contains(expr *e, ExprType t); +int expr_contains(const expr *e, ExprType t); #endif diff --git a/src/expr.c b/src/expr.c index ade3789a..aa277528 100644 --- a/src/expr.c +++ b/src/expr.c @@ -31,8 +31,11 @@ #include "expr.h" #include "symrec.h" +#include "bytecode.h" #include "section.h" +#include "arch.h" + #include "expr-int.h" @@ -135,12 +138,11 @@ ExprFloat(floatnum *f) } ExprItem * -ExprReg(unsigned char reg, unsigned char size) +ExprReg(unsigned long reg) { ExprItem *e = xmalloc(sizeof(ExprItem)); e->type = EXPR_REG; - e->data.reg.num = reg; - e->data.reg.size = size; + e->data.reg = reg; return e; } @@ -662,8 +664,7 @@ expr_copy_except(const expr *e, int except) dest->data.flt = floatnum_copy(src->data.flt); break; case EXPR_REG: - dest->data.reg.num = src->data.reg.num; - dest->data.reg.size = src->data.reg.size; + dest->data.reg = src->data.reg; break; default: break; @@ -709,16 +710,16 @@ expr_delete(expr *e) /*@=mustfree@*/ static int -expr_contains_callback(ExprItem *ei, void *d) +expr_contains_callback(const ExprItem *ei, void *d) { ExprType *t = d; return (ei->type & *t); } int -expr_contains(expr *e, ExprType t) +expr_contains(const expr *e, ExprType t) { - return expr_traverse_leaves_in(e, &t, expr_contains_callback); + return expr_traverse_leaves_in_const(e, &t, expr_contains_callback); } /* FIXME: expand_labelequ needs to allow resolves of the symbols in exprs like @@ -801,6 +802,33 @@ expr_traverse_nodes_post(expr *e, void *d, return func(e, d); } +/* Traverse over expression tree in order, calling func for each leaf + * (non-operation). The data pointer d is passed to each func call. + * + * Stops early (and returns 1) if func returns 1. Otherwise returns 0. + */ +int +expr_traverse_leaves_in_const(const expr *e, void *d, + int (*func) (/*@null@*/ const ExprItem *ei, + /*@null@*/ void *d)) +{ + int i; + + if (!e) + return 0; + + for (i=0; inumterms; i++) { + if (e->terms[i].type == EXPR_EXPR) { + if (expr_traverse_leaves_in_const(e->terms[i].data.expn, d, func)) + return 1; + } else { + if (func(&e->terms[i], d)) + return 1; + } + } + return 0; +} + /* Traverse over expression tree in order, calling func for each leaf * (non-operation). The data pointer d is passed to each func call. * @@ -877,10 +905,23 @@ expr_get_symrec(expr **ep, int simplify) } /*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/ +/*@-unqualifiedtrans -nullderef -nullstate -onlytrans@*/ +const unsigned long * +expr_get_reg(expr **ep, int simplify) +{ + if (simplify) + *ep = expr_simplify(*ep); + + if ((*ep)->op == EXPR_IDENT && (*ep)->terms[0].type == EXPR_REG) + return &((*ep)->terms[0].data.reg); + else + return NULL; +} +/*@=unqualifiedtrans =nullderef -nullstate -onlytrans@*/ + void expr_print(FILE *f, const expr *e) { - static const char *regs[] = {"ax","cx","dx","bx","sp","bp","si","di"}; char opstr[3]; int i; @@ -982,9 +1023,7 @@ expr_print(FILE *f, const expr *e) floatnum_print(f, e->terms[i].data.flt); break; case EXPR_REG: - if (e->terms[i].data.reg.size == 32) - fprintf(f, "e"); - fprintf(f, "%s", regs[e->terms[i].data.reg.num&7]); + cur_arch->reg_print(f, e->terms[i].data.reg); break; case EXPR_NONE: break; diff --git a/src/expr.h b/src/expr.h index fb97248b..4521286c 100644 --- a/src/expr.h +++ b/src/expr.h @@ -31,7 +31,7 @@ typedef struct ExprItem ExprItem; /*@only@*/ ExprItem *ExprExpr(/*@keep@*/ expr *); /*@only@*/ ExprItem *ExprInt(/*@keep@*/ intnum *); /*@only@*/ ExprItem *ExprFloat(/*@keep@*/ floatnum *); -/*@only@*/ ExprItem *ExprReg(unsigned char reg, unsigned char size); +/*@only@*/ ExprItem *ExprReg(unsigned long reg); #define expr_new_tree(l,o,r) \ expr_new ((o), ExprExpr(l), ExprExpr(r)) @@ -79,6 +79,13 @@ void expr_expand_labelequ(expr *e, const section *srcsect, int withstart, /*@dependent@*/ /*@null@*/ const symrec *expr_get_symrec(expr **ep, int simplify); +/* Gets the register value of e if the expression is just a register. If the + * expression is more complex, returns NULL. Simplifies the expr first if + * simplify is nonzero. + */ +/*@dependent@*/ /*@null@*/ const unsigned long *expr_get_reg(expr **ep, + int simplify); + void expr_print(FILE *f, /*@null@*/ const expr *); #endif diff --git a/src/main.c b/src/main.c index 07c4a536..a8c7c33d 100644 --- a/src/main.c +++ b/src/main.c @@ -289,7 +289,7 @@ main(int argc, char *argv[]) } /* Get initial BITS setting from object format */ - x86_mode_bits = cur_objfmt->default_mode_bits; + /*x86_mode_bits = cur_objfmt->default_mode_bits;*/ /* Parse! */ sections = cur_parser->do_parse(cur_parser, in, in_filename); diff --git a/src/parsers/nasm/Makefile.inc b/src/parsers/nasm/Makefile.inc index dbe7010d..b2a24282 100644 --- a/src/parsers/nasm/Makefile.inc +++ b/src/parsers/nasm/Makefile.inc @@ -3,57 +3,22 @@ YASMPARSERFILES += \ src/parsers/nasm/nasm-parser.c \ src/parsers/nasm/nasm-defs.h \ - nasm-bison.y \ + src/parsers/nasm/nasm-bison.y \ nasm-bison.h \ - nasm-token.l - -if DEV - -nasm-token.l nasm-bison.y: $(srcdir)/src/arch/x86/instrs.dat $(srcdir)/src/parsers/nasm/token.l.in $(srcdir)/src/parsers/nasm/bison.y.in $(srcdir)/src/parsers/nasm/gen_instr.pl - $(PERL) $(srcdir)/src/parsers/nasm/gen_instr.pl -i $(srcdir)/src/arch/x86/instrs.dat --sourcetoken $(srcdir)/src/parsers/nasm/token.l.in -t nasm-token.l --sourcegrammar $(srcdir)/src/parsers/nasm/bison.y.in -g nasm-bison.y - -else - -nasm-token.l: $(srcdir)/nasm-token.l - @echo Warning: Not generating nasm-token.l from nasm-token.l.in. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-token.l . -nasm-token.c: $(srcdir)/nasm-token.c - @echo Warning: Not generating nasm-token.c from nasm-token.l. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-token.c . -nasm-bison.y: $(srcdir)/nasm-bison.y - @echo Warning: Not generating nasm-bison.y from nasm-bison.y.in. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.y . -nasm-bison.c: $(srcdir)/nasm-bison.c - @echo Warning: Not generating nasm-bison.c from nasm-bison.y. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.c . -nasm-bison.h: $(srcdir)/nasm-bison.h - @echo Warning: Not generating nasm-bison.h from nasm-bison.y. - @echo Run configure with --enable-dev to enable generation. - cp $(srcdir)/nasm-bison.h . - -endif + nasm-token.c -noinst_SCRIPTS = src/parsers/nasm/gen_instr.pl +nasm-token.c: $(srcdir)/src/parsers/nasm/nasm-token.re re2c$(EXEEXT) $(srcdir)/tools/re2c/cleanup.pl + re2c$(EXEEXT) -b $(srcdir)/src/parsers/nasm/nasm-token.re | $(PERL) $(srcdir)/tools/re2c/cleanup.pl > $@ BUILT_SOURCES += \ - nasm-bison.y \ nasm-bison.c \ nasm-bison.h \ - nasm-token.l \ nasm-token.c CLEANFILES += \ - nasm-bison.y \ nasm-bison.c \ nasm-bison.h \ - nasm-token.l \ nasm-token.c EXTRA_DIST += \ - src/parsers/nasm/token.l.in \ - src/parsers/nasm/bison.y.in \ - src/parsers/nasm/gen_instr.pl + src/parsers/nasm/nasm-token.re diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in deleted file mode 100644 index b3f0533a..00000000 --- a/src/parsers/nasm/bison.y.in +++ /dev/null @@ -1,791 +0,0 @@ -/* - * NASM-compatible bison parser - * - * Copyright (C) 2001 Peter Johnson, Michael Urman - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -%{ -#include "util.h" -RCSID("$IdPath$"); - -#ifdef STDC_HEADERS -# include -#endif - -#include "bitvect.h" - -#include "globals.h" -#include "errwarn.h" -#include "intnum.h" -#include "floatnum.h" -#include "expr.h" -#include "symrec.h" - -#include "bytecode.h" -#include "section.h" -#include "objfmt.h" - -#include "arch.h" - -#include "src/parsers/nasm/nasm-defs.h" - -void init_table(void); -extern int nasm_parser_lex(void); -void nasm_parser_error(const char *); -static void nasm_parser_directive(const char *name, - valparamhead *valparams, - /*@null@*/ valparamhead *objext_valparams); - -extern objfmt *nasm_parser_objfmt; -extern sectionhead nasm_parser_sections; -extern section *nasm_parser_cur_section; -extern char *nasm_parser_locallabel_base; - -static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL; -static bytecode *nasm_parser_temp_bc; - -/* additional data declarations (dynamically generated) */ -/* @DATADECLS@ */ - -/*@-usedef -nullassign -memtrans -usereleased -compdef -mustfree@*/ -%} - -%union { - unsigned int int_info; - char *str_val; - intnum *intn; - floatnum *flt; - symrec *sym; - unsigned char groupdata[5]; - effaddr *ea; - expr *exp; - immval *im_val; - x86_targetval tgt_val; - datavalhead datahead; - dataval *data; - bytecode *bc; - valparamhead dir_valparams; - valparam *dir_valparam; -} - -%token INTNUM -%token FLTNUM -%token DIRECTIVE_NAME STRING FILENAME -%token BYTE WORD DWORD QWORD TWORD DQWORD -%token DECLARE_DATA -%token RESERVE_SPACE -%token INCBIN EQU TIMES -%token SEG WRT NEAR SHORT FAR NOSPLIT ORG -%token TO -%token LOCK REPNZ REP REPZ -%token OPERSIZE ADDRSIZE -%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG -%token REG_EAX REG_ECX REG_EDX REG_EBX -%token REG_ESP REG_EBP REG_ESI REG_EDI -%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI -%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH -%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS -%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID -%token ID LOCAL_ID SPECIAL_ID -%token LINE - -/* instruction tokens (dynamically generated) */ -/* @TOKENS@ */ - -/* @TYPES@ */ - -%type line lineexp exp instr instrbase - -%type reg_eax reg_ecx -%type reg_ax reg_cx reg_dx -%type reg_al reg_cl -%type reg_es reg_cs reg_ss reg_ds reg_fs reg_gs -%type fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg -%type mem memaddr memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 -%type imm imm8x imm16x imm32x imm8 imm16 imm32 -%type expr expr_no_string memexpr direxpr -%type explabel -%type label_id -%type target -%type dataval -%type datavals -%type directive_valparams -%type directive_valparam - -%left '|' -%left '^' -%left '&' -%left LEFT_OP RIGHT_OP -%left '-' '+' -%left '*' '/' SIGNDIV '%' SIGNMOD -%nonassoc UNARYOP - -%% -input: /* empty */ - | input line { - nasm_parser_temp_bc = bcs_append(section_get_bytecodes(nasm_parser_cur_section), - $2); - if (nasm_parser_temp_bc) - nasm_parser_prev_bc = nasm_parser_temp_bc; - line_index++; - } -; - -line: '\n' { $$ = (bytecode *)NULL; } - | lineexp '\n' - | LINE INTNUM '+' INTNUM FILENAME '\n' { - /* %line indicates the line number of the *next* line, so subtract out - * the increment when setting the line number. - */ - line_set($5, intnum_get_uint($2)-intnum_get_uint($4), - intnum_get_uint($4)); - intnum_delete($2); - intnum_delete($4); - xfree($5); - $$ = (bytecode *)NULL; - } - | directive '\n' { $$ = (bytecode *)NULL; } - | error '\n' { - Error(_("label or instruction expected at start of line")); - $$ = (bytecode *)NULL; - yyerrok; - } -; - -lineexp: exp - | TIMES expr exp { $$ = $3; bc_set_multiple($$, $2); } - | label { $$ = (bytecode *)NULL; } - | label exp { $$ = $2; } - | label TIMES expr exp { $$ = $4; bc_set_multiple($$, $3); } - | label_id EQU expr { - symrec_define_equ($1, $3); - xfree($1); - $$ = (bytecode *)NULL; - } -; - -exp: instr - | DECLARE_DATA datavals { $$ = bc_new_data(&$2, $1); } - | RESERVE_SPACE expr { $$ = bc_new_reserve($2, $1); } - | INCBIN STRING { $$ = bc_new_incbin($2, NULL, NULL); } - | INCBIN STRING ',' expr { $$ = bc_new_incbin($2, $4, NULL); } - | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); } -; - -datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } - | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } -; - -dataval: expr_no_string { $$ = dv_new_expr($1); } - | STRING { $$ = dv_new_string($1); } - | error { - Error(_("expression syntax error")); - $$ = (dataval *)NULL; - } -; - -label: label_id { - symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc, - 1); - xfree($1); - } - | label_id ':' { - symrec_define_label($1, nasm_parser_cur_section, nasm_parser_prev_bc, - 1); - xfree($1); - } -; - -label_id: ID { - $$ = $1; - if (nasm_parser_locallabel_base) - xfree(nasm_parser_locallabel_base); - nasm_parser_locallabel_base = xstrdup($1); - } - | SPECIAL_ID - | LOCAL_ID -; - -/* directives */ -directive: '[' DIRECTIVE_NAME directive_val ']' { - xfree($2); - } - | '[' DIRECTIVE_NAME error ']' { - Error(_("invalid arguments to [%s]"), $2); - xfree($2); - } -; - - /* $0 is the DIRECTIVE_NAME */ - /* After : is (optional) object-format specific extension */ -directive_val: directive_valparams { - nasm_parser_directive($0, &$1, NULL); - } - | directive_valparams ':' directive_valparams { - nasm_parser_directive($0, &$1, &$3); - } -; - -directive_valparams: directive_valparam { - vps_initialize(&$$); - vps_append(&$$, $1); - } - | directive_valparams directive_valparam { - vps_append(&$1, $2); - $$ = $1; - } -; - -directive_valparam: direxpr { - /* If direxpr is just an ID, put it in val and delete the expr */ - const /*@null@*/ symrec *vp_symrec; - if ((vp_symrec = expr_get_symrec(&$1, 0))) { - vp_new($$, xstrdup(symrec_get_name(vp_symrec)), NULL); - expr_delete($1); - } else - vp_new($$, NULL, $1); - } - | ID '=' direxpr { vp_new($$, $1, $3); } -; - -/* register groupings */ -fpureg: ST0 - | FPUREG_NOTST0 -; - -reg_eax: REG_EAX - | DWORD reg_eax { $$ = $2; } -; - -reg_ecx: REG_ECX - | DWORD reg_ecx { $$ = $2; } -; - -rawreg32: REG_EAX - | REG_ECX - | REG_EDX - | REG_EBX - | REG_ESP - | REG_EBP - | REG_ESI - | REG_EDI -; - -reg32: rawreg32 - | DWORD reg32 { $$ = $2; } -; - -reg_ax: REG_AX - | WORD reg_ax { $$ = $2; } -; - -reg_cx: REG_CX - | WORD reg_cx { $$ = $2; } -; - -reg_dx: REG_DX - | WORD reg_dx { $$ = $2; } -; - -rawreg16: REG_AX - | REG_CX - | REG_DX - | REG_BX - | REG_SP - | REG_BP - | REG_SI - | REG_DI -; - -reg16: rawreg16 - | WORD reg16 { $$ = $2; } -; - -reg_al: REG_AL - | BYTE reg_al { $$ = $2; } -; - -reg_cl: REG_CL - | BYTE reg_cl { $$ = $2; } -; - -reg8: REG_AL - | REG_CL - | REG_DL - | REG_BL - | REG_AH - | REG_CH - | REG_DH - | REG_BH - | BYTE reg8 { $$ = $2; } -; - -reg_es: REG_ES - | WORD reg_es { $$ = $2; } -; - -reg_ss: REG_SS - | WORD reg_ss { $$ = $2; } -; - -reg_ds: REG_DS - | WORD reg_ds { $$ = $2; } -; - -reg_fs: REG_FS - | WORD reg_fs { $$ = $2; } -; - -reg_gs: REG_GS - | WORD reg_gs { $$ = $2; } -; - -reg_cs: REG_CS - | WORD reg_cs { $$ = $2; } -; - -segreg: REG_ES - | REG_SS - | REG_DS - | REG_FS - | REG_GS - | REG_CS - | WORD segreg { $$ = $2; } -; - -/* memory addresses */ -/* FIXME: Is there any way this redundancy can be eliminated? This is almost - * identical to expr: the only difference is that FLTNUM is replaced by - * rawreg16 and rawreg32. - * - * Note that the two can't be just combined because of conflicts caused by imm - * vs. reg. I don't see a simple solution right now to this. - * - * We don't attempt to check memory expressions for validity here. - */ -memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); } - | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' memexpr %prec UNARYOP { $$ = $2; } - | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' memexpr ')' { $$ = $2; } - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } - | error { Error(_("invalid effective address")); } -; - -memaddr: memexpr { - $$ = x86_ea_new_expr($1); - x86_ea_set_segment($$, 0); - } - | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } - | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } - | WORD memaddr { $$ = $2; ea_set_len($$, 2); } - | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } - | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } -; - -mem: '[' memaddr ']' { $$ = $2; } -; - -/* explicit memory */ -mem8x: BYTE mem { $$ = $2; } - | BYTE mem8x { $$ = $2; } -; -mem16x: WORD mem { $$ = $2; } - | WORD mem16x { $$ = $2; } -; -mem32x: DWORD mem { $$ = $2; } - | DWORD mem32x { $$ = $2; } -; -mem64x: QWORD mem { $$ = $2; } - | QWORD mem64x { $$ = $2; } -; -mem80x: TWORD mem { $$ = $2; } - | TWORD mem80x { $$ = $2; } -; -mem128x: DQWORD mem { $$ = $2; } - | DQWORD mem128x { $$ = $2; } -; - -/* FAR memory, for jmp and call */ -memfar: FAR mem { $$ = $2; } - | FAR memfar { $$ = $2; } -; - -/* implicit memory */ -mem8: mem - | mem8x -; -mem16: mem - | mem16x -; -mem32: mem - | mem32x -; -mem64: mem - | mem64x -; -mem80: mem - | mem80x -; -mem128: mem - | mem128x -; - -/* both 16 and 32 bit memory */ -mem1632: mem - | mem16x - | mem32x -; - -/* explicit register or memory */ -rm8x: reg8 { $$ = x86_ea_new_reg($1); } - | mem8x -; -rm16x: reg16 { $$ = x86_ea_new_reg($1); } - | mem16x -; -rm32x: reg32 { $$ = x86_ea_new_reg($1); } - | mem32x -; -/* not needed: -rm64x: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64x -; -rm128x: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128x -; -*/ - -/* implicit register or memory */ -rm8: reg8 { $$ = x86_ea_new_reg($1); } - | mem8 -; -rm16: reg16 { $$ = x86_ea_new_reg($1); } - | mem16 -; -rm32: reg32 { $$ = x86_ea_new_reg($1); } - | mem32 -; -rm64: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64 -; -rm128: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128 -; - -/* immediate values */ -imm: expr { $$ = imm_new_expr($1); } -; - -/* explicit immediates */ -imm8x: BYTE imm { $$ = $2; } -; -imm16x: WORD imm { $$ = $2; } -; -imm32x: DWORD imm { $$ = $2; } -; - -/* implicit immediates */ -imm8: imm - | imm8x -; -imm16: imm - | imm16x -; -imm32: imm - | imm32x -; - -/* jump targets */ -target: expr { - $$.val = $1; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); - } - | SHORT target { - $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); - } - | NEAR target { - $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); - } -; - -/* expression trees */ - -/* expr w/o FLTNUM and unary + and -, for use in directives */ -direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | ID { - $$ = expr_new_ident(ExprSym(symrec_define_label($1, NULL, NULL, 0))); - xfree($1); - } - | direxpr '|' direxpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | direxpr '^' direxpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - | direxpr '&' direxpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - | direxpr LEFT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | direxpr RIGHT_OP direxpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | direxpr '+' direxpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | direxpr '-' direxpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | direxpr '*' direxpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | direxpr '/' direxpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | direxpr SIGNDIV direxpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | direxpr '%' direxpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | direxpr SIGNMOD direxpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' direxpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' direxpr ')' { $$ = $2; } -; - -expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' expr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | expr '&' expr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| expr '==' expr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| expr '>' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| expr '<' expr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| expr '>=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| expr '<=' expr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| expr '!=' expr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | expr LEFT_OP expr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | expr RIGHT_OP expr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | expr '+' expr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | expr '-' expr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | expr '*' expr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | expr '/' expr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | expr SIGNDIV expr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | expr '%' expr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | expr SIGNMOD expr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' expr %prec UNARYOP { $$ = $2; } - | '-' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' expr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' expr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' expr ')' { $$ = $2; } -; - -expr: expr_no_string - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } -; - -explabel: ID { - $$ = symrec_use($1); - xfree($1); - } - | SPECIAL_ID { - $$ = symrec_use($1); - xfree($1); - } - | LOCAL_ID { - $$ = symrec_use($1); - xfree($1); - } - | '$' { - $$ = symrec_define_label("$", nasm_parser_cur_section, - nasm_parser_prev_bc, 0); - } - | START_SECTION_ID { - if (section_is_absolute(nasm_parser_cur_section)) { - Error(_("`$$' is not valid within an ABSOLUTE section")); - YYERROR; - } else { - const char *ss_name = section_get_name(nasm_parser_cur_section); - assert(ss_name != NULL); - $$ = symrec_use(ss_name); - } - } -; - -instr: /* empty */ { - idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL; - $$ = x86_bc_new_insn(&idata); - } - | instrbase - | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } - | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } - | REG_CS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); - } - | REG_SS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); - } - | REG_DS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); - } - | REG_ES instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); - } - | REG_FS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); - } - | REG_GS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); - } - | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } - | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } - | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } - | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } -; - -/* instruction grammars (dynamically generated) */ -/* @INSTRUCTIONS@ */ - -%% -/*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/ - -static void -nasm_parser_directive(const char *name, valparamhead *valparams, - valparamhead *objext_valparams) -{ - valparam *vp, *vp2; - const intnum *intn; - long lval; - - assert(cur_objfmt != NULL); - - /* Handle (mostly) output-format independent directives here */ - if (strcasecmp(name, "extern") == 0) { - vp = vps_first(valparams); - if (vp->val) - symrec_declare(vp->val, SYM_EXTERN, - cur_objfmt->extern_data_new(vp->val, - objext_valparams)); - else - Error(_("invalid argument to [%s]"), "EXTERN"); - } else if (strcasecmp(name, "global") == 0) { - vp = vps_first(valparams); - if (vp->val) - symrec_declare(vp->val, SYM_GLOBAL, - cur_objfmt->global_data_new(vp->val, - objext_valparams)); - else - Error(_("invalid argument to [%s]"), "GLOBAL"); - } else if (strcasecmp(name, "common") == 0) { - vp = vps_first(valparams); - if (vp->val) { - vp2 = vps_next(vp); - if (!vp2 || (!vp2->val && !vp2->param)) - Error(_("no size specified in %s declaration"), "COMMON"); - else { - if (vp2->val) - symrec_declare(vp->val, SYM_COMMON, - cur_objfmt->common_data_new(vp->val, - expr_new_ident(ExprSym(symrec_use(vp2->val))), - objext_valparams)); - else if (vp2->param) { - symrec_declare(vp->val, SYM_COMMON, - cur_objfmt->common_data_new(vp->val, vp2->param, - objext_valparams)); - vp2->param = NULL; - } - } - } else - Error(_("invalid argument to [%s]"), "COMMON"); - } else if (strcasecmp(name, "section") == 0 || - strcasecmp(name, "segment") == 0) { - section *new_section = - cur_objfmt->sections_switch(&nasm_parser_sections, valparams, - objext_valparams); - if (new_section) { - nasm_parser_cur_section = new_section; - nasm_parser_prev_bc = (bytecode *)NULL; - } else - Error(_("invalid argument to [%s]"), "SECTION"); - } else if (strcasecmp(name, "absolute") == 0) { - /* it can be just an ID or a complete expression, so handle both. */ - vp = vps_first(valparams); - if (vp->val) - nasm_parser_cur_section = - sections_switch_absolute(&nasm_parser_sections, - expr_new_ident(ExprSym(symrec_use(vp->val)))); - else if (vp->param) { - nasm_parser_cur_section = - sections_switch_absolute(&nasm_parser_sections, vp->param); - vp->param = NULL; - } - nasm_parser_prev_bc = (bytecode *)NULL; - } else if (strcasecmp(name, "bits") == 0) { - if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && - (intn = expr_get_intnum(&vp->param)) != NULL && - (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) - x86_mode_bits = (unsigned char)lval; - else - Error(_("invalid argument to [%s]"), "BITS"); - } else if (cur_objfmt->directive(name, valparams, objext_valparams, - &nasm_parser_sections)) { - Error(_("unrecognized directive [%s]"), name); - } - - vps_delete(valparams); - if (objext_valparams) - vps_delete(objext_valparams); -} - -void -nasm_parser_error(const char *s) -{ - ParserError(s); -} - diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl deleted file mode 100755 index b0599d93..00000000 --- a/src/parsers/nasm/gen_instr.pl +++ /dev/null @@ -1,889 +0,0 @@ -#!/usr/bin/perl -w -# $IdPath$ -# Generates NASM-compatible bison.y and token.l from instrs.dat. -# -# Copyright (C) 2001 Michael Urman -# -# This file is part of YASM. -# -# YASM is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# YASM is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -use strict; -use Getopt::Long; -my $VERSION = "0.0.1"; - -# useful constants for instruction arrays -# common -use constant INST => 0; -use constant OPERANDS => 1; -# general format -use constant OPSIZE => 2; -use constant OPCODE => 3; -use constant EFFADDR => 4; -use constant IMM => 5; -use constant CPU => 6; -# relative target format -use constant ADSIZE => 2; -use constant SHORTOPCODE => 3; -use constant NEAROPCODE => 4; -use constant SHORTCPU => 5; -use constant NEARCPU => 6; - -use constant TOO_MANY_ERRORS => 20; - -# default options -my $instrfile = 'instrs.dat'; -my $tokenfile = 'token.l'; -my $tokensource; -my $grammarfile = 'bison.y'; -my $grammarsource; -my $showversion; -my $showusage; -my $dry_run; - -# allow overrides -my $gotopts = GetOptions ( 'input=s' => \$instrfile, - 'token=s' => \$tokenfile, - 'sourcetoken=s' => \$tokensource, - 'grammar=s' => \$grammarfile, - 'sourcegrammar=s' => \$grammarsource, - 'version' => \$showversion, - 'n|dry-run' => \$dry_run, - 'help|usage' => \$showusage, - ); - -&showusage and exit 1 unless $gotopts; -&showversion if $showversion; -&showusage if $showusage; -exit 0 if $showversion or $showusage; - -# valid values for instrs.dat fields -my $valid_regs = join '|', qw( - reg_al reg_ah reg_ax reg_eax - reg_bl reg_bh reg_bx reg_ebx - reg_cl reg_ch reg_cx reg_ecx - reg_dl reg_dh reg_dx reg_edx - reg_si reg_esi reg_di reg_edi - reg_bp reg_ebp - reg_cs reg_ds reg_es reg_fs reg_gs reg_ss - ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG - fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm - imm8 imm16 imm32 imm64 imm80 imm128 - imm8x imm16x imm32x imm64x imm80x imm128x - rm8 rm16 rm32 rm1632 rm64 rm80 rm128 - rm8x rm16x rm32x rm1632x rm64x rm80x rm128x - reg8 reg16 reg32 reg1632 reg64 reg80 reg128 - reg8x reg16x reg32x reg1632x reg64x reg80x reg128x - mem8 mem16 mem32 mem1632 mem64 mem80 mem128 - mem8x mem16x mem32x mem1632x mem64x mem80x mem128x - target memfar -); -my $valid_opcodes = join '|', qw( - [0-9A-F]{2} - \\$0\\.\\d -); -my $valid_cpus = join '|', qw( - 8086 186 286 386 486 P4 P5 P6 - FPU MMX KATMAI SSE SSE2 - AMD ATHLON 3DNOW - SMM - CYRIX - UNDOC OBS PRIV PROT - @0 @1 -); - -# track errors and warnings rather than die'ing on the first. -my (@messages, $errcount, $warncount); -sub die_with_errors (@) -{ - foreach (@_) { print; }; - if ($errcount) - { - print "Dying with errors\n"; - exit -1; - } -} - -my ($groups) = &read_instructions ($instrfile); - -die_with_errors @messages; - -exit 0 if $dry_run; # done with simple verification, so exit - -unless ($dry_run) -{ - &output_lex ($tokenfile, $tokensource, $groups); - &output_yacc ($grammarfile, $grammarsource, $groups); -} - -# print version for --version, etc. -sub showversion -{ - print "YASM gen_instr.pl $VERSION\n"; -} - -# print usage information for --help, etc. -sub showusage -{ - print <<"EOF"; -Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile] - -i, --input instructions file (default: $instrfile) - -t, --token token output file (default: $tokenfile) - -st, --sourcetoken token input file (default: $tokenfile.in) - -g, --grammar grammar output file (default: $grammarfile) - -sg, --sourcegrammar grammar input file (default: $grammarfile.in) - -v, --version show version and exit - -h, --help, --usage show this message and exit - -n, --dry-run verify input file without writing output files -EOF -} - -# read in instructions, and verify they're valid (well, mostly) -sub read_instructions ($) -{ - my $instrfile = shift || die; - open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; - my %instr; - my %groups; - - sub add_group_rule ($$$$) - { - my ($inst, $args, $groups, $instrfile) = splice @_; - - # slide $0.\d down by one. - # i still say changing instrs.dat would be better ;) - $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg; - - # detect relative target format by looking for "target" in args - if($args =~ m/target/oi) - { - my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) = - split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; - die "Invalid Address Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Short Opcode\n" - if $shortopcode !~ m/^(\$0\.\d\?)?(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; - die "Invalid Near Opcode\n" - if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; - die "Invalid Short CPU\n" - if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - die "Invalid Near CPU\n" - if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu]; - } else { - my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; - die "Invalid Operation Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; - die "Invalid Effective Address\n" - if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; - die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; - } - } - - sub add_group_member ($$$$$) - { - my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_; - - my ($inst, $group) = split /!/, $handle; - my ($args, $cpu) = split /\t+/, $fullargs; - eval { - die "Invalid instruction name\n" - if $inst !~ m/^\w+$/o; - die "Invalid group name\n" - if $group !~ m/^\w+$/o; - die "Invalid CPU\n" - if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n" - unless exists $groups->{$group}; - $warncount++; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - # only allow multiple instances of instructions that aren't of a group - push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++ - if exists $instr->{$inst} and not exists $groups->{$inst}; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu]; - $instr->{$inst} = 1; - } - - while () - { - chomp; - next if /^\s*(?:;.*)$/; - - my ($handle, $args) = split /\t+/, $_, 2; - - # pseudo hack to handle original style instructions (no group) - if ($handle =~ m/^\w+$/) - { - # TODO: this has some long ranging effects, as the eventual - # bison rules get tagged when they don't need - # to, etc. Fix this sometime. - add_group_rule ("!$handle", $args, \%groups, $instrfile); - add_group_member ("$handle!$handle", "", \%groups, \%instr, - $instrfile); - } - elsif ($handle =~ m/^!\w+$/) - { - add_group_rule ($handle, $args, \%groups, $instrfile); - } - elsif ($handle =~ m/^\w+!\w+$/) - { - add_group_member ($handle, $args, \%groups, \%instr, - $instrfile); - } - # TODO: consider if this is necessary: Pete? - # (add_group_member_synonym is -not- implemented) - #elsif ($handle =~ m/^:\w+$/) - #{ - # add_group_member_synonym ($handle, $args); - #} - } - close INPUT; - return (\%groups); -} - -sub output_lex ($@) -{ - my $tokenfile = shift or die; - my $tokensource = shift; - $tokensource ||= "$tokenfile.in"; - my $groups = shift or die; - - open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; - open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; - while () - { - # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content - if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) - { - foreach my $grp (sort keys %$groups) - { - my %printed; - my $group = $grp; $group =~ s/^!//; - - foreach my $grp (@{$groups->{$grp}{members}}) - { - unless (exists $printed{$grp->[0]}) - { - $printed{$grp->[0]} = 1; - my @groupdata; - if ($grp->[2]) - { - @groupdata = split ",", $grp->[2]; - for (my $i=0; $i < @groupdata; ++$i) - { - $groupdata[$i] =~ s/nil/0/; - $groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];"; - } - $groupdata[-1] .= "\n\t "; - } - printf TOKEN "%-12s{%s return %-20s }\n", - $grp->[0], - (join "\n\t ", @groupdata), - "\Ugrp_$group;\E"; - # TODO: change appropriate GRP_FOO back to - # INS_FOO's. not functionally important; - # just pedantically so. - } - } - } - } - else - { - print TOKEN $_; - } - } - close IN; - close TOKEN; -} - -# helper functions for yacc output -sub rule_header ($ $ $) -{ - my ($rule, $tokens, $count) = splice (@_); - $count ? " | $tokens {\n" : "$rule: $tokens {\n"; -} -sub rule_footer () -{ - return " }\n"; -} - -sub cond_action_if ( $ $ $ $ $ $ $ ) -{ - my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_); - return rule_header ($rule, $tokens, $count) . <<"EOF"; - if (\$$regarg == $val) { - @$a_eax - \$\$ = $func; - } -EOF -} -sub cond_action_elsif ( $ $ $ $ ) -{ - my ($regarg, $val, $func, $a_eax) = splice (@_); - return <<"EOF"; - else if (\$$regarg == $val) { - @$a_eax - \$\$ = $func; - } -EOF -} -sub cond_action_else ( $ $ ) -{ - my ($func, $a_args) = splice (@_); - return <<"EOF" . rule_footer; - else { - @$a_args - \$\$ = $func; - } -EOF -} -sub cond_action ( $ $ $ $ $ $ $ $ ) -{ - my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args) - = splice (@_); - return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func, - $a_eax) . cond_action_else ($func, $a_args); -} - -#sub action ( $ $ $ $ $ ) -sub action ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " @$a_args\n" - . " \$\$ = $func;\n" - . rule_footer; -} - -sub action_setshiftflag ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " @$a_args\n" - . " \$\$ = $func;\n" - . " x86_bc_insn_set_shift_flag(\$\$);\n" - . rule_footer; -} - -sub action_setjrshort ( @ $ ) -{ - my ($rule, $tokens, $func, $a_args, $count) = splice @_; - return rule_header ($rule, $tokens, $count) - . " if (\$2.op_sel == JR_NONE)\n" - . " \$2.op_sel = JR_SHORT;\n" - . " @$a_args\n" - . " \$\$ = $func;\n" - . rule_footer; -} - -sub get_token_number ( $ $ ) -{ - my ($tokens, $str) = splice @_; - $tokens =~ s/$str.*/x/; # hold its place - my @f = split /\s+/, $tokens; - return scalar @f; -} - -sub output_yacc ($@) -{ - my $grammarfile = shift or die; - my $grammarsource = shift; - $grammarsource ||= "$grammarfile.in"; - my $groups = shift or die; - - open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; - open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; - - while () - { - if (m{/[*]\s*[@]DATADECLS[@]\s*[*]/}) - { - print GRAMMAR "static x86_new_insn_data idata;\n"; - print GRAMMAR "static x86_new_jmprel_data jrdata;\n"; - } - elsif (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) - { - my $len = length("%token "); - print GRAMMAR "%token "; - foreach my $group (sort keys %$groups) - { - if ($len + length("GRP_$group") < 76) - { - print GRAMMAR " GRP_\U$group\E"; - $len += length(" GRP_$group"); - } - else - { - print GRAMMAR "\n%token GRP_\U$group\E"; - $len = length("%token GRP_$group"); - } - } - print GRAMMAR "\n"; - } - elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/}) - { - my $len = length("%type "); - print GRAMMAR "%type "; - foreach my $group (sort keys %$groups) - { - if ($len + length($group) < 76) - { - print GRAMMAR " $group"; - $len += length(" $group"); - } - else - { - print GRAMMAR "\n%type $group"; - $len = length("%type $group"); - } - } - print GRAMMAR "\n"; - } - elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) - { - # list every kind of instruction that instrbase can be - print GRAMMAR "instrbase: ", - join( "\n | ", sort keys %$groups), "\n;\n"; - - my ($ONE, $AL, $AX, $EAX); # need the outer scope - my (@XCHG_AX, @XCHG_EAX); - - # list the arguments and actions (buildbc) - #foreach my $instrname (sort keys %$instrlist) - foreach my $group (sort keys %$groups) - { - # I'm still convinced this is a hack. The idea is if - # within an instruction we see certain versions of the - # opcodes with ONE, or reg_e?a[lx],imm(8|16|32). If we - # do, defer generation of the action, as we may need to - # fold it into another version with a conditional to - # generate the more efficient variant of the opcode - # BUT, if we don't fold it in, we have to generate the - # original version we would have otherwise. - ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0); - # Folding for xchg (reg_e?ax,reg16 and reg16,reg_e?ax). - (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0)); - my $count = 0; - foreach my $inst (@{$groups->{$group}{rules}}) { - if($inst->[OPERANDS] =~ m/target/oi) - { - # relative target format - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" - if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - $tokens =~ s/:/ ':' /g; - my $datastruct = "x86_new_jmprel_data"; - my $datastructname = "jrdata"; - my $func = "x86_bc_new_jmprel(&$datastructname)"; - - # Create the argument list for bytecode_new - my @args; - - # Target argument: HACK: Always assumed to be arg 1. - push @args, 'target=&$2;'; - - # test for short opcode "nil" - if($inst->[SHORTOPCODE] =~ m/nil/) - { - push @args, 'short_op_len=0;'; - } - else - { - my @opcodes; - # Check for possible length parameter - if($inst->[SHORTOPCODE] =~ m/\?/) - { - my @pieces = split /\?/, $inst->[SHORTOPCODE]; - push @args, "short_op_len=".$pieces[0].";"; - # opcode piece 1 (and 2 and 3 if attached) - @opcodes = split ",", $pieces[1]; - } - else - { - # opcode piece 1 (and 2 and 3 if attached) - @opcodes = split ",", $inst->[SHORTOPCODE]; - # number of bytes of short opcode - push @args, "short_op_len=".@opcodes.";"; - } - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; - push @args, "short_op[$i]=$opcodes[$i];"; - } - } - - # test for near opcode "nil" - if($inst->[NEAROPCODE] =~ m/nil/) - { - push @args, 'near_op_len=0;'; - } - else - { - # opcode piece 1 (and 2 and 3 if attached) - my @opcodes = split ",", $inst->[NEAROPCODE]; - # number of bytes of near opcode - push @args, "near_op_len=".@opcodes.";"; - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2)/eg; - push @args, "near_op[$i]=$opcodes[$i];"; - } - } - - # address size - push @args, "addrsize=$inst->[ADSIZE];"; - $args[-1] =~ s/nil/0/; - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # and add the data structure reference - s/^/$datastructname./g foreach (@args); - - if ($args[0] =~ m/\&\$/) - { - $args[0] = '/*@-immediatetrans@*/' . $args[0] . - '/*@=immediatetrans@*/'; - } - - # generate the grammar - # Specialcase jcc to set op_sel=JR_SHORT. - if ($rule =~ m/jcc/) - { - print GRAMMAR action_setjrshort ($rule, $tokens, $func, \@args, $count++); - } - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - } - } - else - { - # general instruction format - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" - if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - $tokens =~ s/:/ ':' /g; - # offset args - my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; - my $datastruct = "x86_new_insn_data"; - my $datastructname = "idata"; - my $func = "x86_bc_new_insn(&$datastructname)"; - - # Create the argument list for bytecode_new - my @args; - - # operand size - push @args, "opersize=$inst->[OPSIZE];"; - $args[-1] =~ s/nil/0/; - - - # opcode piece 1 (and 2 and 3 if attached) - my @opcodes = split ",", $inst->[OPCODE]; - # number of bytes of opcodes - push @args, "op_len=".@opcodes.";"; - for (my $i=0; $i < @opcodes; ++$i) - { - $opcodes[$i] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $opcodes[$i] =~ s/(0x[0-9A-Fa-f]{2}.*\+)/(unsigned char)$1/g; - # don't match $0.\d in the following rule. - $opcodes[$i] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to)/eg; - push @args, "op[$i]=$opcodes[$i];"; - } - - # effective addresses - my $effaddr = $inst->[EFFADDR]; - $effaddr =~ s/^nil/NULL,0/; - $effaddr =~ s/nil/0/; - # don't let a $0.\d match slip into the following rules. - $effaddr =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $effaddr =~ s/(\$\d+)r/x86_ea_new_reg($1)/; - $effaddr =~ s[(\$\d+)i,\s*(\d+)] - ["x86_ea_new_imm($1^ ".($2/8)."),0"]e; - - die $effaddr if $effaddr =~ m/\d+[ri]/; - - my @effaddr_split = split ',', $effaddr; - $effaddr_split[0] =~ s/\^/,/; - push @args, "ea=$effaddr_split[0];"; - if ($effaddr_split[0] !~ m/NULL/) - { - push @args, "spare=$effaddr_split[1];"; - } - - # immediate sources - my $imm = $inst->[IMM]; - $imm =~ s/nil/NULL,0/; - # don't match $0.\d in the following rules. - $imm =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $imm =~ s[^([0-9A-Fa-f]+),] - [imm_new_int(0x$1),]; - $imm =~ s[^\$0.(\d+),] - [imm_new_int((unsigned long)\$1\[$1\]),]; - - # divide the second, and only the second, by 8 bits/byte - $imm =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $imm .= ($3||'') eq 's' ? ',1' : ',0'; - - die $imm if $imm =~ m/\d+s/; - - my @imm_split = split ",", $imm; - push @args, "imm=$imm_split[0];"; - if ($imm_split[0] !~ m/NULL/) - { - push @args, "im_len=$imm_split[1];"; - push @args, "im_sign=$imm_split[2];"; - } - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # and add the data structure reference - s/^/$datastructname./g foreach (@args); - - # see if we match one of the cases to defer - if (($inst->[OPERANDS]||"") =~ m/,ONE/) - { - $ONE = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_al,imm8/) - { - $AL = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,imm16/) - { - $AX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,imm32/) - { - $EAX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_ax,reg16/) - { - $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg16,reg_ax/) - { - $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg_eax,reg32/) - { - $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg32,reg_eax/) - { - $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; - } - - # or if we've deferred and we match the folding version - elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) - { - $ONE->[4] = 1; - # Output a normal version except imm8 -> imm8x - # (BYTE override always makes longer version, and - # we don't want to conflict with the imm version - # we output right after this one. - $tokens =~ s/imm8/imm8x/; - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - - # Now output imm version, with second opcode byte - # set to ,1 opcode. Also call SetInsnShiftFlag(). - $tokens =~ s/imm8x/imm/; - my $oneval = $ONE->[3]->[2]; - $oneval =~ s/op\[(\d)\]=/"op[".($1+1)."]="/eg; - push @args, $oneval; - print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); - } - elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) - { - $AL->[4] = 1; - my $regarg = get_token_number ($tokens, "reg8"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); - } - elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm(16|16x)?$/) - { - $AX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg16"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); - } - elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm(32|32x)?$/) - { - $EAX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg32"); - - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); - } - elsif (($XCHG_AX[0] or $XCHG_AX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_AX; ++$i) - { - if($XCHG_AX[$i]) - { - $XCHG_AX[$i]->[4] = 1; - # This is definitely a hack. The "right" - # way to do this would be to enhance - # get_token_number to get the nth reg16 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg16") - + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); - } - } - } - print GRAMMAR cond_action_else ($func, \@args); - } - elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_EAX; ++$i) - { - if($XCHG_EAX[$i]) - { - $XCHG_EAX[$i]->[4] = 1; - # This is definitely a hack. The "right" - # way to do this would be to enhance - # get_token_number to get the nth reg32 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg32") - + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); - } - } - } - print GRAMMAR cond_action_else ($func, \@args); - } - - # otherwise, generate the normal version - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); - } - } - } - - # catch deferreds that haven't been folded in. - if ($ONE and not $ONE->[4]) - { - print GRAMMAR action (@$ONE, $count++); - } - if ($AL and not $AL->[4]) - { - print GRAMMAR action (@$AL, $count++); - } - if ($AX and not $AL->[4]) - { - print GRAMMAR action (@$AX, $count++); - } - if ($EAX and not $AL->[4]) - { - print GRAMMAR action (@$EAX, $count++); - } - - # print error action - # ASSUMES: at least one previous action exists - print GRAMMAR " | \Ugrp_$group\E error {\n"; - print GRAMMAR " Error (_(\"expression syntax error\"));\n"; - print GRAMMAR " \$\$ = (bytecode *)NULL;\n"; - print GRAMMAR " }\n"; - - # terminate the rule - print GRAMMAR ";\n"; - } - } - else - { - print GRAMMAR $_; - } - } - close IN; - close GRAMMAR; -} diff --git a/src/parsers/nasm/nasm-bison.y b/src/parsers/nasm/nasm-bison.y index b3f0533a..248820db 100644 --- a/src/parsers/nasm/nasm-bison.y +++ b/src/parsers/nasm/nasm-bison.y @@ -44,8 +44,10 @@ RCSID("$IdPath$"); #include "src/parsers/nasm/nasm-defs.h" + void init_table(void); extern int nasm_parser_lex(void); +extern void nasm_parser_set_directive_state(void); void nasm_parser_error(const char *); static void nasm_parser_directive(const char *name, valparamhead *valparams, @@ -55,6 +57,7 @@ extern objfmt *nasm_parser_objfmt; extern sectionhead nasm_parser_sections; extern section *nasm_parser_cur_section; extern char *nasm_parser_locallabel_base; +extern size_t nasm_parser_locallabel_base_len; static /*@null@*/ bytecode *nasm_parser_prev_bc = (bytecode *)NULL; static bytecode *nasm_parser_temp_bc; @@ -71,16 +74,19 @@ static bytecode *nasm_parser_temp_bc; intnum *intn; floatnum *flt; symrec *sym; - unsigned char groupdata[5]; + unsigned long arch_data[4]; effaddr *ea; expr *exp; - immval *im_val; - x86_targetval tgt_val; datavalhead datahead; dataval *data; bytecode *bc; valparamhead dir_valparams; valparam *dir_valparam; + struct { + insn_operandhead operands; + int num_operands; + } insn_operands; + insn_operand *insn_operand; } %token INTNUM @@ -90,46 +96,25 @@ static bytecode *nasm_parser_temp_bc; %token DECLARE_DATA %token RESERVE_SPACE %token INCBIN EQU TIMES -%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token SEG WRT NOSPLIT %token TO -%token LOCK REPNZ REP REPZ -%token OPERSIZE ADDRSIZE -%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG -%token REG_EAX REG_ECX REG_EDX REG_EBX -%token REG_ESP REG_EBP REG_ESI REG_EDI -%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI -%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH -%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token INSN PREFIX REG SEGREG TARGETMOD %token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD START_SECTION_ID %token ID LOCAL_ID SPECIAL_ID %token LINE -/* instruction tokens (dynamically generated) */ -/* @TOKENS@ */ - -/* @TYPES@ */ - -%type line lineexp exp instr instrbase - -%type reg_eax reg_ecx -%type reg_ax reg_cx reg_dx -%type reg_al reg_cl -%type reg_es reg_cs reg_ss reg_ds reg_fs reg_gs -%type fpureg rawreg32 reg32 rawreg16 reg16 reg8 segreg -%type mem memaddr memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 -%type imm imm8x imm16x imm32x imm8 imm16 imm32 -%type expr expr_no_string memexpr direxpr +%type line lineexp exp instr + +%type memaddr +%type dvexpr expr direxpr %type explabel %type label_id -%type target %type dataval %type datavals %type directive_valparams %type directive_valparam +%type operands +%type operand %left '|' %left '^' @@ -163,7 +148,9 @@ line: '\n' { $$ = (bytecode *)NULL; } xfree($5); $$ = (bytecode *)NULL; } - | directive '\n' { $$ = (bytecode *)NULL; } + | '[' { nasm_parser_set_directive_state(); } directive ']' '\n' { + $$ = (bytecode *)NULL; + } | error '\n' { Error(_("label or instruction expected at start of line")); $$ = (bytecode *)NULL; @@ -191,11 +178,32 @@ exp: instr | INCBIN STRING ',' expr ',' expr { $$ = bc_new_incbin($2, $4, $6); } ; +instr: INSN { + $$ = cur_arch->parse.new_insn($1, 0, NULL); + } + | INSN operands { + $$ = cur_arch->parse.new_insn($1, $2.num_operands, &$2.operands); + ops_delete(&$2.operands, 0); + } + | INSN error { + Error(_("expression syntax error")); + $$ = NULL; + } + | PREFIX instr { + $$ = $2; + cur_arch->parse.handle_prefix($$, $1); + } + | SEGREG instr { + $$ = $2; + cur_arch->parse.handle_seg_prefix($$, $1[0]); + } +; + datavals: dataval { dvs_initialize(&$$); dvs_append(&$$, $1); } | datavals ',' dataval { dvs_append(&$1, $3); $$ = $1; } ; -dataval: expr_no_string { $$ = dv_new_expr($1); } +dataval: dvexpr { $$ = dv_new_expr($1); } | STRING { $$ = dv_new_string($1); } | error { Error(_("expression syntax error")); @@ -219,19 +227,22 @@ label_id: ID { $$ = $1; if (nasm_parser_locallabel_base) xfree(nasm_parser_locallabel_base); - nasm_parser_locallabel_base = xstrdup($1); + nasm_parser_locallabel_base_len = strlen($1); + nasm_parser_locallabel_base = + xmalloc(nasm_parser_locallabel_base_len+1); + strcpy(nasm_parser_locallabel_base, $1); } | SPECIAL_ID | LOCAL_ID ; /* directives */ -directive: '[' DIRECTIVE_NAME directive_val ']' { - xfree($2); +directive: DIRECTIVE_NAME directive_val { + xfree($1); } - | '[' DIRECTIVE_NAME error ']' { - Error(_("invalid arguments to [%s]"), $2); - xfree($2); + | DIRECTIVE_NAME error { + Error(_("invalid arguments to [%s]"), $1); + xfree($1); } ; @@ -267,299 +278,85 @@ directive_valparam: direxpr { | ID '=' direxpr { vp_new($$, $1, $3); } ; -/* register groupings */ -fpureg: ST0 - | FPUREG_NOTST0 -; - -reg_eax: REG_EAX - | DWORD reg_eax { $$ = $2; } -; - -reg_ecx: REG_ECX - | DWORD reg_ecx { $$ = $2; } -; - -rawreg32: REG_EAX - | REG_ECX - | REG_EDX - | REG_EBX - | REG_ESP - | REG_EBP - | REG_ESI - | REG_EDI -; - -reg32: rawreg32 - | DWORD reg32 { $$ = $2; } -; - -reg_ax: REG_AX - | WORD reg_ax { $$ = $2; } -; - -reg_cx: REG_CX - | WORD reg_cx { $$ = $2; } -; - -reg_dx: REG_DX - | WORD reg_dx { $$ = $2; } -; - -rawreg16: REG_AX - | REG_CX - | REG_DX - | REG_BX - | REG_SP - | REG_BP - | REG_SI - | REG_DI -; - -reg16: rawreg16 - | WORD reg16 { $$ = $2; } -; - -reg_al: REG_AL - | BYTE reg_al { $$ = $2; } -; - -reg_cl: REG_CL - | BYTE reg_cl { $$ = $2; } -; - -reg8: REG_AL - | REG_CL - | REG_DL - | REG_BL - | REG_AH - | REG_CH - | REG_DH - | REG_BH - | BYTE reg8 { $$ = $2; } -; - -reg_es: REG_ES - | WORD reg_es { $$ = $2; } -; - -reg_ss: REG_SS - | WORD reg_ss { $$ = $2; } -; - -reg_ds: REG_DS - | WORD reg_ds { $$ = $2; } -; - -reg_fs: REG_FS - | WORD reg_fs { $$ = $2; } -; - -reg_gs: REG_GS - | WORD reg_gs { $$ = $2; } -; - -reg_cs: REG_CS - | WORD reg_cs { $$ = $2; } -; - -segreg: REG_ES - | REG_SS - | REG_DS - | REG_FS - | REG_GS - | REG_CS - | WORD segreg { $$ = $2; } -; - /* memory addresses */ -/* FIXME: Is there any way this redundancy can be eliminated? This is almost - * identical to expr: the only difference is that FLTNUM is replaced by - * rawreg16 and rawreg32. - * - * Note that the two can't be just combined because of conflicts caused by imm - * vs. reg. I don't see a simple solution right now to this. - * - * We don't attempt to check memory expressions for validity here. - */ -memexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } - | rawreg16 { $$ = expr_new_ident(ExprReg($1, 16)); } - | rawreg32 { $$ = expr_new_ident(ExprReg($1, 32)); } - | explabel { $$ = expr_new_ident(ExprSym($1)); } - /*| memexpr '||' memexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ - | memexpr '|' memexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } - | memexpr '^' memexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } - /*| expr '&&' memexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ - | memexpr '&' memexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } - /*| memexpr '==' memexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ - /*| memexpr '>' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '<' memexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ - /*| memexpr '>=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '<=' memexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ - /*| memexpr '!=' memexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ - | memexpr LEFT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } - | memexpr RIGHT_OP memexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } - | memexpr '+' memexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } - | memexpr '-' memexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } - | memexpr '*' memexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } - | memexpr '/' memexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } - | memexpr SIGNDIV memexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } - | memexpr '%' memexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } - | memexpr SIGNMOD memexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } - | '+' memexpr %prec UNARYOP { $$ = $2; } - | '-' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } - /*| '!' memexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ - | '~' memexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } - | '(' memexpr ')' { $$ = $2; } - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); +memaddr: expr { + $$ = cur_arch->parse.ea_new_expr($1); } - | error { Error(_("invalid effective address")); } -; - -memaddr: memexpr { - $$ = x86_ea_new_expr($1); - x86_ea_set_segment($$, 0); + | SEGREG ':' memaddr { + $$ = $3; + cur_arch->parse.handle_seg_override($$, $1[0]); } - | REG_CS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; x86_ea_set_segment($$, 0x65); } | BYTE memaddr { $$ = $2; ea_set_len($$, 1); } | WORD memaddr { $$ = $2; ea_set_len($$, 2); } | DWORD memaddr { $$ = $2; ea_set_len($$, 4); } | NOSPLIT memaddr { $$ = $2; ea_set_nosplit($$, 1); } ; -mem: '[' memaddr ']' { $$ = $2; } -; - -/* explicit memory */ -mem8x: BYTE mem { $$ = $2; } - | BYTE mem8x { $$ = $2; } -; -mem16x: WORD mem { $$ = $2; } - | WORD mem16x { $$ = $2; } -; -mem32x: DWORD mem { $$ = $2; } - | DWORD mem32x { $$ = $2; } -; -mem64x: QWORD mem { $$ = $2; } - | QWORD mem64x { $$ = $2; } -; -mem80x: TWORD mem { $$ = $2; } - | TWORD mem80x { $$ = $2; } -; -mem128x: DQWORD mem { $$ = $2; } - | DQWORD mem128x { $$ = $2; } -; - -/* FAR memory, for jmp and call */ -memfar: FAR mem { $$ = $2; } - | FAR memfar { $$ = $2; } -; - -/* implicit memory */ -mem8: mem - | mem8x -; -mem16: mem - | mem16x -; -mem32: mem - | mem32x -; -mem64: mem - | mem64x -; -mem80: mem - | mem80x -; -mem128: mem - | mem128x -; - -/* both 16 and 32 bit memory */ -mem1632: mem - | mem16x - | mem32x -; - -/* explicit register or memory */ -rm8x: reg8 { $$ = x86_ea_new_reg($1); } - | mem8x -; -rm16x: reg16 { $$ = x86_ea_new_reg($1); } - | mem16x -; -rm32x: reg32 { $$ = x86_ea_new_reg($1); } - | mem32x -; -/* not needed: -rm64x: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64x -; -rm128x: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128x -; -*/ - -/* implicit register or memory */ -rm8: reg8 { $$ = x86_ea_new_reg($1); } - | mem8 -; -rm16: reg16 { $$ = x86_ea_new_reg($1); } - | mem16 -; -rm32: reg32 { $$ = x86_ea_new_reg($1); } - | mem32 -; -rm64: MMXREG { $$ = x86_ea_new_reg($1); } - | mem64 -; -rm128: XMMREG { $$ = x86_ea_new_reg($1); } - | mem128 -; - -/* immediate values */ -imm: expr { $$ = imm_new_expr($1); } -; - -/* explicit immediates */ -imm8x: BYTE imm { $$ = $2; } -; -imm16x: WORD imm { $$ = $2; } -; -imm32x: DWORD imm { $$ = $2; } -; - -/* implicit immediates */ -imm8: imm - | imm8x -; -imm16: imm - | imm16x -; -imm32: imm - | imm32x +/* instruction operands */ +operands: operand { + ops_initialize(&$$.operands); + ops_append(&$$.operands, $1); + $$.num_operands = 1; + } + | operands ',' operand { + ops_append(&$1.operands, $3); + $$.operands = $1.operands; + $$.num_operands = $1.num_operands+1; + } ; -/* jump targets */ -target: expr { - $$.val = $1; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NONE); +operand: '[' memaddr ']' { $$ = operand_new_mem($2); } + | expr { $$ = operand_new_imm($1); } + | SEGREG { $$ = operand_new_segreg($1[0]); } + | BYTE operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 1) + Error(_("cannot override register size")); + else + $$->size = 1; } - | SHORT target { + | WORD operand { $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_SHORT_FORCED); + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 2) + Error(_("cannot override register size")); + else + $$->size = 2; } - | NEAR target { + | DWORD operand { $$ = $2; - x86_set_jmprel_opcode_sel(&$$.op_sel, JR_NEAR_FORCED); + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 4) + Error(_("cannot override register size")); + else + $$->size = 4; } + | QWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 8) + Error(_("cannot override register size")); + else + $$->size = 8; + } + | TWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 10) + Error(_("cannot override register size")); + else + $$->size = 10; + } + | DQWORD operand { + $$ = $2; + if ($$->type == INSN_OPERAND_REG && + cur_arch->get_reg_size($$->data.reg) != 16) + Error(_("cannot override register size")); + else + $$->size = 16; + } + | TARGETMOD operand { $$ = $2; $$->targetmod = $1[0]; } ; /* expression trees */ @@ -587,9 +384,48 @@ direxpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | '(' direxpr ')' { $$ = $2; } ; -expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } +dvexpr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } | explabel { $$ = expr_new_ident(ExprSym($1)); } + /*| dvexpr '||' dvexpr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ + | dvexpr '|' dvexpr { $$ = expr_new_tree($1, EXPR_OR, $3); } + | dvexpr '^' dvexpr { $$ = expr_new_tree($1, EXPR_XOR, $3); } + /*| dvexpr '&&' dvexpr { $$ = expr_new_tree($1, EXPR_LAND, $3); }*/ + | dvexpr '&' dvexpr { $$ = expr_new_tree($1, EXPR_AND, $3); } + /*| dvexpr '==' dvexpr { $$ = expr_new_tree($1, EXPR_EQUALS, $3); }*/ + /*| dvexpr '>' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ + /*| dvexpr '<' dvexpr { $$ = expr_new_tree($1, EXPR_GT, $3); }*/ + /*| dvexpr '>=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ + /*| dvexpr '<=' dvexpr { $$ = expr_new_tree($1, EXPR_GE, $3); }*/ + /*| dvexpr '!=' dvexpr { $$ = expr_new_tree($1, EXPR_NE, $3); }*/ + | dvexpr LEFT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHL, $3); } + | dvexpr RIGHT_OP dvexpr { $$ = expr_new_tree($1, EXPR_SHR, $3); } + | dvexpr '+' dvexpr { $$ = expr_new_tree($1, EXPR_ADD, $3); } + | dvexpr '-' dvexpr { $$ = expr_new_tree($1, EXPR_SUB, $3); } + | dvexpr '*' dvexpr { $$ = expr_new_tree($1, EXPR_MUL, $3); } + | dvexpr '/' dvexpr { $$ = expr_new_tree($1, EXPR_DIV, $3); } + | dvexpr SIGNDIV dvexpr { $$ = expr_new_tree($1, EXPR_SIGNDIV, $3); } + | dvexpr '%' dvexpr { $$ = expr_new_tree($1, EXPR_MOD, $3); } + | dvexpr SIGNMOD dvexpr { $$ = expr_new_tree($1, EXPR_SIGNMOD, $3); } + | '+' dvexpr %prec UNARYOP { $$ = $2; } + | '-' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NEG, $2); } + /*| '!' dvexpr { $$ = expr_new_branch(EXPR_LNOT, $2); }*/ + | '~' dvexpr %prec UNARYOP { $$ = expr_new_branch(EXPR_NOT, $2); } + | '(' dvexpr ')' { $$ = $2; } +; + +/* Expressions for operands and memory expressions. + * We don't attempt to check memory expressions for validity here. + * Essentially the same as expr_no_string above but adds REG and STRING. + */ +expr: INTNUM { $$ = expr_new_ident(ExprInt($1)); } + | FLTNUM { $$ = expr_new_ident(ExprFloat($1)); } + | REG { $$ = expr_new_ident(ExprReg($1[0])); } + | STRING { + $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); + xfree($1); + } + | explabel { $$ = expr_new_ident(ExprSym($1)); } /*| expr '||' expr { $$ = expr_new_tree($1, EXPR_LOR, $3); }*/ | expr '|' expr { $$ = expr_new_tree($1, EXPR_OR, $3); } | expr '^' expr { $$ = expr_new_tree($1, EXPR_XOR, $3); } @@ -617,13 +453,6 @@ expr_no_string: INTNUM { $$ = expr_new_ident(ExprInt($1)); } | '(' expr ')' { $$ = $2; } ; -expr: expr_no_string - | STRING { - $$ = expr_new_ident(ExprInt(intnum_new_charconst_nasm($1))); - xfree($1); - } -; - explabel: ID { $$ = symrec_use($1); xfree($1); @@ -652,46 +481,6 @@ explabel: ID { } ; -instr: /* empty */ { - idata.opersize=0; idata.op_len=0; idata.ea=NULL; idata.imm=NULL; - $$ = x86_bc_new_insn(&idata); - } - | instrbase - | OPERSIZE instr { $$ = $2; x86_bc_insn_opersize_override($$, $1); } - | ADDRSIZE instr { $$ = $2; x86_bc_insn_addrsize_override($$, $1); } - | REG_CS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x2E); - } - | REG_SS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x36); - } - | REG_DS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x3E); - } - | REG_ES instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x26); - } - | REG_FS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x64); - } - | REG_GS instr { - $$ = $2; - x86_ea_set_segment(x86_bc_insn_get_ea($$), 0x65); - } - | LOCK instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF0); } - | REPNZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF2); } - | REP instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF3); } - | REPZ instr { $$ = $2; x86_bc_insn_set_lockrep_prefix($$, 0xF4); } -; - -/* instruction grammars (dynamically generated) */ -/* @INSTRUCTIONS@ */ - %% /*@=usedef =nullassign =memtrans =usereleased =compdef =mustfree@*/ @@ -700,8 +489,6 @@ nasm_parser_directive(const char *name, valparamhead *valparams, valparamhead *objext_valparams) { valparam *vp, *vp2; - const intnum *intn; - long lval; assert(cur_objfmt != NULL); @@ -766,13 +553,25 @@ nasm_parser_directive(const char *name, valparamhead *valparams, vp->param = NULL; } nasm_parser_prev_bc = (bytecode *)NULL; - } else if (strcasecmp(name, "bits") == 0) { - if ((vp = vps_first(valparams)) && !vp->val && vp->param != NULL && - (intn = expr_get_intnum(&vp->param)) != NULL && - (lval = intnum_get_int(intn)) && (lval == 16 || lval == 32)) - x86_mode_bits = (unsigned char)lval; - else - Error(_("invalid argument to [%s]"), "BITS"); + } else if (strcasecmp(name, "cpu") == 0) { + vps_foreach(vp, valparams) { + if (vp->val) + cur_arch->parse.switch_cpu(vp->val); + else if (vp->param) { + const intnum *intcpu; + intcpu = expr_get_intnum(&vp->param); + if (!intcpu) + Error(_("invalid argument to [%s]"), "CPU"); + else { + char strcpu[16]; + sprintf(strcpu, "%lu", intnum_get_uint(intcpu)); + cur_arch->parse.switch_cpu(strcpu); + } + } + } + } else if (!cur_arch->parse.directive(name, valparams, objext_valparams, + &nasm_parser_sections)) { + ; } else if (cur_objfmt->directive(name, valparams, objext_valparams, &nasm_parser_sections)) { Error(_("unrecognized directive [%s]"), name); diff --git a/src/parsers/nasm/nasm-parser.c b/src/parsers/nasm/nasm-parser.c index b7d66972..e21e9554 100644 --- a/src/parsers/nasm/nasm-parser.c +++ b/src/parsers/nasm/nasm-parser.c @@ -34,6 +34,7 @@ extern FILE *nasm_parser_in; extern int nasm_parser_debug; extern int nasm_parser_parse(void); +extern void nasm_parser_cleanup(void); size_t (*nasm_parser_input) (char *buf, size_t max_size); @@ -58,6 +59,8 @@ nasm_parser_do_parse(parser *p, FILE *f, const char *in_filename) nasm_parser_parse(); + nasm_parser_cleanup(); + /* Free locallabel base if necessary */ if (nasm_parser_locallabel_base) xfree(nasm_parser_locallabel_base); diff --git a/src/parsers/nasm/nasm-token.re b/src/parsers/nasm/nasm-token.re new file mode 100644 index 00000000..ab0aa111 --- /dev/null +++ b/src/parsers/nasm/nasm-token.re @@ -0,0 +1,516 @@ +/* + * NASM-compatible lex lexer + * + * Copyright (C) 2001 Peter Johnson + * + * Portions based on re2c's example code. + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "util.h" +RCSID("$IdPath$"); + +#include "bitvect.h" + +#include "errwarn.h" +#include "intnum.h" +#include "floatnum.h" +#include "expr.h" +#include "symrec.h" + +#include "bytecode.h" + +#include "arch.h" + +#include "src/parsers/nasm/nasm-defs.h" +#include "nasm-bison.h" + + +#define BSIZE 8192 + +#define YYCTYPE char +#define YYCURSOR cursor +#define YYLIMIT s.lim +#define YYMARKER s.ptr +#define YYFILL(n) {cursor = fill(cursor);} + +#define RETURN(i) {s.cur = cursor; return i;} + +#define SCANINIT() { \ + s.tchar = cursor - s.pos; \ + s.tline = s.cline; \ + s.tok = cursor; \ + } + +#define TOKLEN (cursor-s.tok) + +void nasm_parser_cleanup(void); +void nasm_parser_set_directive_state(void); +int nasm_parser_lex(void); + +extern size_t (*nasm_parser_input) (char *buf, size_t max_size); + + +typedef struct Scanner { + YYCTYPE *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + unsigned int tchar, tline, cline; +} Scanner; + +static Scanner s = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 1 }; + +FILE *nasm_parser_in = NULL; + +static YYCTYPE * +fill(YYCTYPE *cursor) +{ + if(!s.eof){ + size_t cnt = s.tok - s.bot; + if(cnt){ + memcpy(s.bot, s.tok, s.lim - s.tok); + s.tok = s.bot; + s.ptr -= cnt; + cursor -= cnt; + s.pos -= cnt; + s.lim -= cnt; + } + if((s.top - s.lim) < BSIZE){ + char *buf = xmalloc((s.lim - s.bot) + BSIZE); + memcpy(buf, s.tok, s.lim - s.tok); + s.tok = buf; + s.ptr = &buf[s.ptr - s.bot]; + cursor = &buf[cursor - s.bot]; + s.pos = &buf[s.pos - s.bot]; + s.lim = &buf[s.lim - s.bot]; + s.top = &s.lim[BSIZE]; + if (s.bot) + xfree(s.bot); + s.bot = buf; + } + if((cnt = nasm_parser_input(s.lim, BSIZE)) != BSIZE){ + s.eof = &s.lim[cnt]; *s.eof++ = '\n'; + } + s.lim += cnt; + } + return cursor; +} + +void +nasm_parser_cleanup(void) +{ + if (s.bot) + xfree(s.bot); +} + +/* starting size of string buffer */ +#define STRBUF_ALLOC_SIZE 128 + +/* string buffer used when parsing strings/character constants */ +static char *strbuf = (char *)NULL; + +/* length of strbuf (including terminating NULL character) */ +static size_t strbuf_size = 0; + +/* last "base" label for local (.) labels */ +char *nasm_parser_locallabel_base = (char *)NULL; +size_t nasm_parser_locallabel_base_len = 0; + +static int linechg_numcount; + +/*!re2c + any = [\000-\377]; + digit = [0-9]; + iletter = [a-zA-Z]; + bindigit = [01]; + octdigit = [0-7]; + hexdigit = [0-9a-fA-F]; + ws = [ \t\r]; + quot = ["']; + A = [aA]; + B = [bB]; + C = [cC]; + D = [dD]; + E = [eE]; + F = [fF]; + G = [gG]; + H = [hH]; + I = [iI]; + J = [jJ]; + K = [kK]; + L = [lL]; + M = [mM]; + N = [nN]; + O = [oO]; + P = [pP]; + Q = [qQ]; + R = [rR]; + S = [sS]; + T = [tT]; + U = [uU]; + V = [vV]; + W = [wW]; + X = [xX]; + Y = [yY]; + Z = [zZ]; +*/ + +static enum { + INITIAL, + DIRECTIVE, + DIRECTIVE2, + LINECHG, + LINECHG2 +} state = INITIAL; + +void +nasm_parser_set_directive_state(void) +{ + state = DIRECTIVE; +} + +int +nasm_parser_lex(void) +{ + YYCTYPE *cursor = s.cur; + YYCTYPE endch; + size_t count, len; + YYCTYPE savech; + arch_check_id_retval check_id_ret; + + /* Catch EOF */ + if (s.eof && cursor == s.eof) + return 0; + + /* Jump to proper "exclusive" states */ + switch (state) { + case DIRECTIVE: + goto directive; + case LINECHG: + goto linechg; + case LINECHG2: + goto linechg2; + default: + break; + } + +scan: + SCANINIT(); + + /*!re2c + /* standard decimal integer */ + digit+ { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.intn = intnum_new_dec(s.tok); + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + /* 10010011b - binary number */ + + bindigit+ "b" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'b' */ + yylval.intn = intnum_new_bin(s.tok); + RETURN(INTNUM); + } + + /* 777q - octal number */ + octdigit+ "q" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'q' */ + yylval.intn = intnum_new_oct(s.tok); + RETURN(INTNUM); + } + + /* 0AAh form of hexidecimal number */ + digit hexdigit+ "h" { + s.tok[TOKLEN-1] = '\0'; /* strip off 'h' */ + yylval.intn = intnum_new_hex(s.tok); + RETURN(INTNUM); + } + + /* $0AA and 0xAA forms of hexidecimal number */ + (("$" digit) | "0x") hexdigit+ { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + if (s.tok[1] == 'x') + yylval.intn = intnum_new_hex(s.tok+2); /* skip 0 and x */ + else + yylval.intn = intnum_new_hex(s.tok+1); /* don't skip 0 */ + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + + /* floating point value */ + digit+ "." digit* ("e" [-+]? digit+)? { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.flt = floatnum_new(s.tok); + s.tok[TOKLEN] = savech; + RETURN(FLTNUM); + } + + /* string/character constant values */ + quot { + endch = s.tok[0]; + goto stringconst; + } + + /* %line linenum+lineinc filename */ + "%line" { + state = LINECHG; + linechg_numcount = 0; + RETURN(LINE); + } + + /* size specifiers */ + B Y T E { yylval.int_info = 1; RETURN(BYTE); } + W O R D { yylval.int_info = 2; RETURN(WORD); } + D W O R D { yylval.int_info = 4; RETURN(DWORD); } + Q W O R D { yylval.int_info = 8; RETURN(QWORD); } + T W O R D { yylval.int_info = 10; RETURN(TWORD); } + D Q W O R D { yylval.int_info = 16; RETURN(DQWORD); } + + /* pseudo-instructions */ + D B { yylval.int_info = 1; RETURN(DECLARE_DATA); } + D W { yylval.int_info = 2; RETURN(DECLARE_DATA); } + D D { yylval.int_info = 4; RETURN(DECLARE_DATA); } + D Q { yylval.int_info = 8; RETURN(DECLARE_DATA); } + D T { yylval.int_info = 10; RETURN(DECLARE_DATA); } + + R E S B { yylval.int_info = 1; RETURN(RESERVE_SPACE); } + R E S W { yylval.int_info = 2; RETURN(RESERVE_SPACE); } + R E S D { yylval.int_info = 4; RETURN(RESERVE_SPACE); } + R E S Q { yylval.int_info = 8; RETURN(RESERVE_SPACE); } + R E S T { yylval.int_info = 10; RETURN(RESERVE_SPACE); } + + I N C B I N { RETURN(INCBIN); } + + E Q U { RETURN(EQU); } + + T I M E S { RETURN(TIMES); } + + S E G { RETURN(SEG); } + W R T { RETURN(WRT); } + + N O S P L I T { RETURN(NOSPLIT); } + + T O { RETURN(TO); } + + /* operators */ + "<<" { RETURN(LEFT_OP); } + ">>" { RETURN(RIGHT_OP); } + "//" { RETURN(SIGNDIV); } + "%%" { RETURN(SIGNMOD); } + "$$" { RETURN(START_SECTION_ID); } + [-+|^*&/%~$():=,\[] { RETURN(s.tok[0]); } + + /* handle ] separately for directives */ + "]" { + if (state == DIRECTIVE2) + state = INITIAL; + RETURN(s.tok[0]); + } + + /* special non-local ..@label and labels like ..start */ + ".." [a-zA-Z0-9_$#@~.?]+ { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(SPECIAL_ID); + } + + /* local label (.label) */ + "." [a-zA-Z0-9_$#@~?][a-zA-Z0-9_$#@~.?]* { + /* override local labels in directive state */ + if (state == DIRECTIVE2) { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } else if (!nasm_parser_locallabel_base) { + Warning(_("no non-local label before `%s'"), s.tok[0]); + yylval.str_val = xstrndup(s.tok, TOKLEN); + } else { + len = TOKLEN + nasm_parser_locallabel_base_len; + yylval.str_val = xmalloc(len + 1); + strcpy(yylval.str_val, nasm_parser_locallabel_base); + strncat(yylval.str_val, s.tok, TOKLEN); + yylval.str_val[len] = '\0'; + } + + RETURN(LOCAL_ID); + } + + /* forced identifier */ + "$" [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } + + /* identifier that may be a register, instruction, etc. */ + [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + check_id_ret = cur_arch->parse.check_identifier(yylval.arch_data, + s.tok); + s.tok[TOKLEN] = savech; + switch (check_id_ret) { + case ARCH_CHECK_ID_NONE: + /* Just an identifier, return as such. */ + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + case ARCH_CHECK_ID_INSN: + RETURN(INSN); + case ARCH_CHECK_ID_PREFIX: + RETURN(PREFIX); + case ARCH_CHECK_ID_REG: + RETURN(REG); + case ARCH_CHECK_ID_SEGREG: + RETURN(SEGREG); + case ARCH_CHECK_ID_TARGETMOD: + RETURN(TARGETMOD); + default: + Warning(_("Arch feature not supported, treating as identifier")); + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(ID); + } + } + + ";" (any \ [\n])* { goto scan; } + + ws+ { goto scan; } + + "\n" { state = INITIAL; RETURN(s.tok[0]); } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto scan; + } + */ + + /* %line linenum+lineinc filename */ +linechg: + SCANINIT(); + + /*!re2c + digit+ { + linechg_numcount++; + savech = s.tok[TOKLEN]; + s.tok[TOKLEN] = '\0'; + yylval.intn = intnum_new_dec(s.tok); + s.tok[TOKLEN] = savech; + RETURN(INTNUM); + } + + "\n" { + state = INITIAL; + RETURN(s.tok[0]); + } + + "+" { + RETURN(s.tok[0]); + } + + ws+ { + if (linechg_numcount == 2) + state = LINECHG2; + goto linechg2; + } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto linechg; + } + */ + +linechg2: + SCANINIT(); + + /*!re2c + "\n" { + state = INITIAL; + RETURN(s.tok[0]); + } + + "\r" { } + + (any \ [\r\n])+ { + state = LINECHG; + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(FILENAME); + } + */ + + /* directive: [name value] */ +directive: + SCANINIT(); + + /*!re2c + [\]\n] { + state = INITIAL; + RETURN(s.tok[0]); + } + + iletter+ { + state = DIRECTIVE2; + yylval.str_val = xstrndup(s.tok, TOKLEN); + RETURN(DIRECTIVE_NAME); + } + + any { + if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) + Warning(_("ignoring unrecognized character `%s'"), + conv_unprint(s.tok[0])); + goto directive; + } + */ + + /* string/character constant values */ +stringconst: + strbuf = xmalloc(STRBUF_ALLOC_SIZE); + strbuf_size = STRBUF_ALLOC_SIZE; + count = 0; + +stringconst_scan: + SCANINIT(); + + /*!re2c + "\n" { + if (cursor == s.eof) + Error(_("unexpected end of file in string")); + else + Error(_("unterminated string")); + strbuf[count] = '\0'; + yylval.str_val = strbuf; + RETURN(STRING); + } + + any { + if (s.tok[0] == endch) { + strbuf[count] = '\0'; + yylval.str_val = strbuf; + RETURN(STRING); + } + + strbuf[count++] = s.tok[0]; + if (count >= strbuf_size) { + strbuf = xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); + strbuf_size += STRBUF_ALLOC_SIZE; + } + + goto stringconst_scan; + } + */ +} diff --git a/src/parsers/nasm/token.l.in b/src/parsers/nasm/token.l.in deleted file mode 100644 index 7f458557..00000000 --- a/src/parsers/nasm/token.l.in +++ /dev/null @@ -1,353 +0,0 @@ -/* - * NASM-compatible lex lexer - * - * Copyright (C) 2001 Peter Johnson - * - * This file is part of YASM. - * - * YASM is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * YASM is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -%{ -#include "util.h" -RCSID("$IdPath$"); - -#include "bitvect.h" - -#include "errwarn.h" -#include "intnum.h" -#include "floatnum.h" -#include "expr.h" -#include "symrec.h" - -#include "bytecode.h" - -#include "arch.h" - -#include "src/parsers/nasm/nasm-defs.h" -#include "nasm-bison.h" - - -#define YY_NEVER_INTERACTIVE 1 - -int nasm_parser_lex(void); - -extern size_t (*nasm_parser_input) (char *buf, size_t max_size); -#undef YY_INPUT -#define YY_INPUT(b, r, ms) (r = nasm_parser_input(b, ms)) - -/* starting size of string buffer */ -#define STRBUF_ALLOC_SIZE 128 - -/* string buffer used when parsing strings/character constants */ -static char *strbuf = (char *)NULL; - -/* length of strbuf (including terminating NULL character) */ -static size_t strbuf_size = 0; - -/* last "base" label for local (.) labels */ -char *nasm_parser_locallabel_base = (char *)NULL; - -static int linechg_numcount; - -%} -%option noyywrap -%option nounput -%option case-insensitive -%option never-interactive -%option prefix="nasm_parser_" -%option outfile="lex.yy.c" - -%x DIRECTIVE LINECHG LINECHG2 -%s DIRECTIVE2 - -DIGIT [0-9] -BINDIGIT [01] -OCTDIGIT [0-7] -HEXDIGIT [0-9a-f] -WS [ \t\r] - -%% - - /* standard decimal integer */ -{DIGIT}+ { - yylval.intn = intnum_new_dec(yytext); - return INTNUM; -} - - /* 10010011b - binary number */ -{BINDIGIT}+b { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'b' */ - yylval.intn = intnum_new_bin(yytext); - return INTNUM; -} - - /* 777q - octal number */ -{OCTDIGIT}+q { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'q' */ - yylval.intn = intnum_new_oct(yytext); - return INTNUM; -} - - /* 0AAh form of hexidecimal number */ -{DIGIT}{HEXDIGIT}*h { - yytext[strlen(yytext)-1] = '\0'; /* strip off 'h' */ - yylval.intn = intnum_new_hex(yytext); - return INTNUM; -} - - /* $0AA and 0xAA forms of hexidecimal number */ -(\${DIGIT}|0x){HEXDIGIT}+ { - if (yytext[1] == 'x') - yylval.intn = intnum_new_hex(yytext+2); /* 0x format, skip 0 and x */ - else - yylval.intn = intnum_new_hex(yytext+1); /* $0 format, don't skip 0 */ - return INTNUM; -} - - /* floating point value */ -{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { - yylval.flt = floatnum_new(yytext); - return FLTNUM; -} - - /* string/character constant values */ -["'] { - int inch, count; - char endch = yytext[0]; - - strbuf = xmalloc(STRBUF_ALLOC_SIZE); - - strbuf_size = STRBUF_ALLOC_SIZE; - inch = input(); - count = 0; - while (inch != EOF && inch != endch && inch != '\n') { - strbuf[count++] = inch; - if (count >= strbuf_size) { - strbuf = realloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); - if (!strbuf) - Fatal(FATAL_NOMEM); - strbuf_size += STRBUF_ALLOC_SIZE; - } - inch = input(); - } - - if (inch == '\n') - Error(_("unterminated string")); - else if (inch == EOF) - Error(_("unexpected end of file in string")); - - strbuf[count] = '\0'; - - yylval.str_val = strbuf; - return STRING; -} - - /* %line linenum+lineinc filename */ -^%line { BEGIN LINECHG; linechg_numcount = 0; return LINE; } -{DIGIT}+ { - linechg_numcount++; - yylval.intn = intnum_new_dec(yytext); - return INTNUM; -} -\n { BEGIN INITIAL; return '\n'; } -[+] { return yytext[0]; } -{WS}+ { - if (linechg_numcount == 2) - BEGIN LINECHG2; -} -\n { BEGIN INITIAL; return '\n'; } -\r ; -[^\r\n]+ { - BEGIN LINECHG; - yylval.str_val = xstrdup(yytext); - return FILENAME; -} - - /* directive: [name value] */ -^{WS}*"[" { BEGIN DIRECTIVE; return '['; } -"]" { BEGIN INITIAL; return ']'; } -"]" { BEGIN INITIAL; return ']'; } -\n { BEGIN INITIAL; return '\n'; } -\n { BEGIN INITIAL; return '\n'; } - -[a-z]+ { - BEGIN DIRECTIVE2; - yylval.str_val = xstrdup(yytext); - return DIRECTIVE_NAME; -} -. { - if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) - Warning(_("ignoring unrecognized character `%s'"), - conv_unprint(yytext[0])); -} - - /* override local labels in directive state */ -\.[a-z0-9_$#@~.?]* { - yylval.str_val = xstrdup(yytext); - return ID; -} - - /* size specifiers */ -byte { yylval.int_info = 1; return BYTE; } -word { yylval.int_info = 2; return WORD; } -dword { yylval.int_info = 4; return DWORD; } -qword { yylval.int_info = 8; return QWORD; } -tword { yylval.int_info = 10; return TWORD; } -dqword { yylval.int_info = 16; return DQWORD; } - - /* pseudo-instructions */ -db { yylval.int_info = 1; return DECLARE_DATA; } -dw { yylval.int_info = 2; return DECLARE_DATA; } -dd { yylval.int_info = 4; return DECLARE_DATA; } -dq { yylval.int_info = 8; return DECLARE_DATA; } -dt { yylval.int_info = 10; return DECLARE_DATA; } - -resb { yylval.int_info = 1; return RESERVE_SPACE; } -resw { yylval.int_info = 2; return RESERVE_SPACE; } -resd { yylval.int_info = 4; return RESERVE_SPACE; } -resq { yylval.int_info = 8; return RESERVE_SPACE; } -rest { yylval.int_info = 10; return RESERVE_SPACE; } - -incbin { return INCBIN; } - -equ { return EQU; } - -times { return TIMES; } - -seg { return SEG; } -wrt { return WRT; } -near { return NEAR; } -short { return SHORT; } -far { return FAR; } - -nosplit { return NOSPLIT; } - -org { return ORG; } - -to { return TO; } - - /* operand size overrides */ -o16 { yylval.int_info = 16; return OPERSIZE; } -o32 { yylval.int_info = 32; return OPERSIZE; } - /* address size overrides */ -a16 { yylval.int_info = 16; return ADDRSIZE; } -a32 { yylval.int_info = 32; return ADDRSIZE; } - - /* instruction prefixes */ -lock { return LOCK; } -repne { return REPNZ; } -repnz { return REPNZ; } -rep { return REP; } -repe { return REPZ; } -repz { return REPZ; } - - /* control, debug, and test registers */ -cr4 { yylval.int_info = 4; return CR4; } -cr[023] { yylval.int_info = yytext[2]-'0'; return CRREG_NOTCR4; } -dr[0-367] { yylval.int_info = yytext[2]-'0'; return DRREG; } -tr[3-7] { yylval.int_info = yytext[2]-'0'; return TRREG; } - - /* floating point, MMX, and SSE registers */ -st0 { yylval.int_info = 0; return ST0; } -st[1-7] { yylval.int_info = yytext[2]-'0'; return FPUREG_NOTST0; } -mm[0-7] { yylval.int_info = yytext[2]-'0'; return MMXREG; } -xmm[0-7] { yylval.int_info = yytext[3]-'0'; return XMMREG; } - - /* integer registers */ -eax { yylval.int_info = 0; return REG_EAX; } -ecx { yylval.int_info = 1; return REG_ECX; } -edx { yylval.int_info = 2; return REG_EDX; } -ebx { yylval.int_info = 3; return REG_EBX; } -esp { yylval.int_info = 4; return REG_ESP; } -ebp { yylval.int_info = 5; return REG_EBP; } -esi { yylval.int_info = 6; return REG_ESI; } -edi { yylval.int_info = 7; return REG_EDI; } - -ax { yylval.int_info = 0; return REG_AX; } -cx { yylval.int_info = 1; return REG_CX; } -dx { yylval.int_info = 2; return REG_DX; } -bx { yylval.int_info = 3; return REG_BX; } -sp { yylval.int_info = 4; return REG_SP; } -bp { yylval.int_info = 5; return REG_BP; } -si { yylval.int_info = 6; return REG_SI; } -di { yylval.int_info = 7; return REG_DI; } - -al { yylval.int_info = 0; return REG_AL; } -cl { yylval.int_info = 1; return REG_CL; } -dl { yylval.int_info = 2; return REG_DL; } -bl { yylval.int_info = 3; return REG_BL; } -ah { yylval.int_info = 4; return REG_AH; } -ch { yylval.int_info = 5; return REG_CH; } -dh { yylval.int_info = 6; return REG_DH; } -bh { yylval.int_info = 7; return REG_BH; } - - /* segment registers */ -es { yylval.int_info = 0; return REG_ES; } -cs { yylval.int_info = 1; return REG_CS; } -ss { yylval.int_info = 2; return REG_SS; } -ds { yylval.int_info = 3; return REG_DS; } -fs { yylval.int_info = 4; return REG_FS; } -gs { yylval.int_info = 5; return REG_GS; } - - /* operators */ -"<<" { return LEFT_OP; } -">>" { return RIGHT_OP; } -"//" { return SIGNDIV; } -"%%" { return SIGNMOD; } -"$$" { return START_SECTION_ID; } -[-+|^&*/%~$():[\]=,] { return yytext[0]; } - - /* special non-local ..@label and labels like ..start */ -\.\.[a-z0-9_$#@~.?]+ { - yylval.str_val = xstrdup(yytext); - return SPECIAL_ID; -} - - /* local label (.label) */ -\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { - if (!nasm_parser_locallabel_base) { - Warning(_("no non-local label before `%s'"), yytext); - yylval.str_val = xstrdup(yytext); - } else { - yylval.str_val = xmalloc(strlen(yytext) + - strlen(nasm_parser_locallabel_base) + 1); - strcpy(yylval.str_val, nasm_parser_locallabel_base); - strcat(yylval.str_val, yytext); - } - - return LOCAL_ID; -} - - /* instructions */ - /* @INSTRUCTIONS@ */ - - /* label */ -[a-z_?][a-z0-9_$#@~.?]* { - yylval.str_val = xstrdup(yytext); - return ID; -} - -;.* ; - -{WS}+ ; - -\n return '\n'; - -. { - if (WARN_ENABLED(WARN_UNRECOGNIZED_CHAR)) - Warning(_("ignoring unrecognized character `%s'"), - conv_unprint(yytext[0])); -} - diff --git a/src/tests/bytecode_test.c b/src/tests/bytecode_test.c index f3411512..f702596c 100644 --- a/src/tests/bytecode_test.c +++ b/src/tests/bytecode_test.c @@ -25,7 +25,7 @@ #include "bytecode.h" #include "bc-int.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" START_TEST(test_x86_ea_new_reg) { diff --git a/src/tests/memexpr_test.c b/src/tests/memexpr_test.c index ec9c001d..86f7f320 100644 --- a/src/tests/memexpr_test.c +++ b/src/tests/memexpr_test.c @@ -32,7 +32,7 @@ #include "bytecode.h" #include "arch.h" -#include "x86-int.h" +#include "x86arch.h" typedef enum { REG_AX = 0, -- 2.40.0