From: Peter Johnson Date: Sun, 8 Jul 2007 05:31:59 +0000 (-0000) Subject: Change handling of frontend (mnenomic) instructions to make it easier to X-Git-Tag: v0.6.2~8^2~23 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fb8b8b893614301b3a81cc02fc112213228833a1;p=yasm Change handling of frontend (mnenomic) instructions to make it easier to customize on the arch side of things. Instead of passing around an arch_data[4] for instructions, now the arch can extend the structure itself to add additional information in any format it likes. svn path=/trunk/yasm/; revision=1889 --- diff --git a/libyasm.h b/libyasm.h index ec92aa53..8fb7aca7 100644 --- a/libyasm.h +++ b/libyasm.h @@ -73,6 +73,7 @@ typedef unsigned long uintptr_t; #include #include +#include #include #include diff --git a/libyasm/Makefile.inc b/libyasm/Makefile.inc index 2fc87633..98dbfa68 100644 --- a/libyasm/Makefile.inc +++ b/libyasm/Makefile.inc @@ -1,12 +1,10 @@ # $Id$ -libyasm_a_SOURCES += libyasm/arch.c libyasm_a_SOURCES += libyasm/assocdat.c libyasm_a_SOURCES += libyasm/bitvect.c libyasm_a_SOURCES += libyasm/bc-align.c libyasm_a_SOURCES += libyasm/bc-data.c libyasm_a_SOURCES += libyasm/bc-incbin.c -libyasm_a_SOURCES += libyasm/bc-insn.c libyasm_a_SOURCES += libyasm/bc-org.c libyasm_a_SOURCES += libyasm/bc-reserve.c libyasm_a_SOURCES += libyasm/bytecode.c @@ -15,6 +13,7 @@ libyasm_a_SOURCES += libyasm/expr.c libyasm_a_SOURCES += libyasm/file.c libyasm_a_SOURCES += libyasm/floatnum.c libyasm_a_SOURCES += libyasm/hamt.c +libyasm_a_SOURCES += libyasm/insn.c libyasm_a_SOURCES += libyasm/intnum.c libyasm_a_SOURCES += libyasm/inttree.c libyasm_a_SOURCES += libyasm/linemap.c @@ -65,6 +64,7 @@ modinclude_HEADERS += libyasm/expr-int.h modinclude_HEADERS += libyasm/file.h modinclude_HEADERS += libyasm/floatnum.h modinclude_HEADERS += libyasm/hamt.h +modinclude_HEADERS += libyasm/insn.h modinclude_HEADERS += libyasm/intnum.h modinclude_HEADERS += libyasm/inttree.h modinclude_HEADERS += libyasm/linemap.h diff --git a/libyasm/arch.c b/libyasm/arch.c deleted file mode 100644 index e16f6ca7..00000000 --- a/libyasm/arch.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Architecture interface - * - * Copyright (C) 2001-2007 Peter Johnson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#define YASM_LIB_INTERNAL -#define YASM_ARCH_INTERNAL -#include "util.h" -/*@unused@*/ RCSID("$Id$"); - -#include "libyasm-stdint.h" -#include "coretype.h" - -#include "expr.h" - -#include "bytecode.h" - -#include "arch.h" - - -yasm_insn_operand * -yasm_operand_create_reg(uintptr_t reg) -{ - yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); - - retval->type = YASM_INSN__OPERAND_REG; - retval->data.reg = reg; - retval->targetmod = 0; - retval->size = 0; - retval->deref = 0; - retval->strict = 0; - - return retval; -} - -yasm_insn_operand * -yasm_operand_create_segreg(uintptr_t segreg) -{ - yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); - - retval->type = YASM_INSN__OPERAND_SEGREG; - retval->data.reg = segreg; - retval->targetmod = 0; - retval->size = 0; - retval->deref = 0; - retval->strict = 0; - - return retval; -} - -yasm_insn_operand * -yasm_operand_create_mem(/*@only@*/ yasm_effaddr *ea) -{ - yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); - - retval->type = YASM_INSN__OPERAND_MEMORY; - retval->data.ea = ea; - retval->targetmod = 0; - retval->size = 0; - retval->deref = 0; - retval->strict = 0; - - return retval; -} - -yasm_insn_operand * -yasm_operand_create_imm(/*@only@*/ yasm_expr *val) -{ - yasm_insn_operand *retval; - const uintptr_t *reg; - - reg = yasm_expr_get_reg(&val, 0); - if (reg) { - retval = yasm_operand_create_reg(*reg); - yasm_expr_destroy(val); - } else { - retval = yasm_xmalloc(sizeof(yasm_insn_operand)); - retval->type = YASM_INSN__OPERAND_IMM; - retval->data.val = val; - retval->targetmod = 0; - retval->size = 0; - retval->deref = 0; - retval->strict = 0; - } - - return retval; -} - -void -yasm_operand_print(const yasm_insn_operand *op, FILE *f, int indent_level, - yasm_arch *arch) -{ - switch (op->type) { - case YASM_INSN__OPERAND_REG: - fprintf(f, "%*sReg=", indent_level, ""); - yasm_arch_reg_print(arch, op->data.reg, f); - fprintf(f, "\n"); - break; - case YASM_INSN__OPERAND_SEGREG: - fprintf(f, "%*sSegReg=", indent_level, ""); - yasm_arch_segreg_print(arch, op->data.reg, f); - fprintf(f, "\n"); - break; - case YASM_INSN__OPERAND_MEMORY: - fprintf(f, "%*sMemory=\n", indent_level, ""); - yasm_ea_print(op->data.ea, f, indent_level); - break; - case YASM_INSN__OPERAND_IMM: - fprintf(f, "%*sImm=", indent_level, ""); - yasm_expr_print(op->data.val, f); - fprintf(f, "\n"); - break; - } - fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", - (unsigned long)op->targetmod); - fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size); - fprintf(f, "%*sDeref=%d, Strict=%d\n", indent_level+1, "", (int)op->deref, - (int)op->strict); -} - -void -yasm_ops_delete(yasm_insn_operands *headp, int content) -{ - yasm_insn_operand *cur, *next; - - cur = STAILQ_FIRST(headp); - while (cur) { - next = STAILQ_NEXT(cur, link); - if (content) - switch (cur->type) { - case YASM_INSN__OPERAND_MEMORY: - yasm_ea_destroy(cur->data.ea); - break; - case YASM_INSN__OPERAND_IMM: - yasm_expr_destroy(cur->data.val); - break; - default: - break; - } - yasm_xfree(cur); - cur = next; - } - STAILQ_INIT(headp); -} - -/*@null@*/ yasm_insn_operand * -yasm_ops_append(yasm_insn_operands *headp, - /*@returned@*/ /*@null@*/ yasm_insn_operand *op) -{ - if (op) { - STAILQ_INSERT_TAIL(headp, op, link); - return op; - } - return (yasm_insn_operand *)NULL; -} - -void -yasm_ops_print(const yasm_insn_operands *headp, FILE *f, int indent_level, - yasm_arch *arch) -{ - yasm_insn_operand *cur; - - STAILQ_FOREACH (cur, headp, link) - yasm_operand_print(cur, f, indent_level, arch); -} - -yasm_insn_operands * -yasm_ops_create(void) -{ - yasm_insn_operands *headp = yasm_xmalloc(sizeof(yasm_insn_operands)); - yasm_ops_initialize(headp); - return headp; -} - -void -yasm_ops_destroy(yasm_insn_operands *headp, int content) -{ - yasm_ops_delete(headp, content); - yasm_xfree(headp); -} - -/* Non-macro yasm_ops_first() for non-YASM_LIB_INTERNAL users. */ -#undef yasm_ops_first -yasm_insn_operand * -yasm_ops_first(yasm_insn_operands *headp) -{ - return STAILQ_FIRST(headp); -} - -/* Non-macro yasm_operand_next() for non-YASM_LIB_INTERNAL users. */ -#undef yasm_operand_next -yasm_insn_operand * -yasm_operand_next(yasm_insn_operand *cur) -{ - return STAILQ_NEXT(cur, link); -} diff --git a/libyasm/arch.h b/libyasm/arch.h index 1daa4f37..eed31765 100644 --- a/libyasm/arch.h +++ b/libyasm/arch.h @@ -59,12 +59,6 @@ typedef enum yasm_arch_regtmod { YASM_ARCH_TARGETMOD /**< A target modifier (for jumps) */ } yasm_arch_regtmod; -/** An instruction operand (opaque type). */ -typedef struct yasm_insn_operand yasm_insn_operand; -#ifdef YASM_LIB_INTERNAL -/*@reldef@*/ STAILQ_HEAD(yasm_insn_operands, yasm_insn_operand); -#endif - #ifndef YASM_DOXYGEN /** Base #yasm_arch structure. Must be present as the first element in any * #yasm_arch implementation. @@ -140,30 +134,21 @@ typedef struct yasm_arch_module { * Call yasm_arch_parse_check_insnprefix() instead of calling this function. */ yasm_arch_insnprefix (*parse_check_insnprefix) - (yasm_arch *arch, /*@out@*/ uintptr_t data[4], const char *id, - size_t id_len); + (yasm_arch *arch, const char *id, size_t id_len, unsigned long line, + /*@out@*/ /*@only@*/ yasm_bytecode **bc, /*@out@*/ uintptr_t *prefix); /** Module-level implementation of yasm_arch_parse_check_regtmod(). * Call yasm_arch_parse_check_regtmod() instead of calling this function. */ yasm_arch_regtmod (*parse_check_regtmod) - (yasm_arch *arch, /*@out@*/ uintptr_t *data, const char *id, - size_t id_len); + (yasm_arch *arch, const char *id, size_t id_len, + /*@out@*/ uintptr_t *data); /** Module-level implementation of yasm_arch_get_fill(). * Call yasm_arch_get_fill() instead of calling this function. */ const unsigned char ** (*get_fill) (const yasm_arch *arch); - /** Module-level implementation of yasm_arch_finalize_insn(). - * Call yasm_arch_finalize_insn() instead of calling this function. - */ - void (*finalize_insn) - (yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, - const uintptr_t data[4], int num_operands, - /*@null@*/ yasm_insn_operands *operands, int num_prefixes, - uintptr_t **prefixes, int num_segregs, const uintptr_t *segregs); - /** Module-level implementation of yasm_arch_floatnum_tobytes(). * Call yasm_arch_floatnum_tobytes() instead of calling this function. */ @@ -205,6 +190,22 @@ typedef struct yasm_arch_module { */ yasm_effaddr * (*ea_create) (yasm_arch *arch, /*@keep@*/ yasm_expr *e); + /** Module-level implementation of yasm_arch_ea_destroy(). + * Call yasm_arch_ea_destroy() instead of calling this function. + */ + void (*ea_destroy) (/*@only@*/ yasm_effaddr *ea); + + /** Module-level implementation of yasm_arch_ea_print(). + * Call yasm_arch_ea_print() instead of calling this function. + */ + void (*ea_print) (const yasm_effaddr *ea, FILE *f, int indent_level); + + /** Module-level implementation of yasm_arch_create_empty_insn(). + * Call yasm_arch_create_empty_insn() instead of calling this function. + */ + /*@only@*/ yasm_bytecode * (*create_empty_insn) (yasm_arch *arch, + unsigned long line); + /** NULL-terminated list of machines for this architecture. * Call yasm_arch_get_machine() to get the active machine of a particular * #yasm_arch. @@ -230,56 +231,6 @@ typedef struct yasm_arch_module { unsigned int min_insn_len; } yasm_arch_module; -#ifdef YASM_LIB_INTERNAL -/** An instruction operand. \internal */ -struct yasm_insn_operand { - /** Link for building linked list of operands. \internal */ - /*@reldef@*/ STAILQ_ENTRY(yasm_insn_operand) link; - - /** Operand type. */ - enum yasm_insn_operand_type { - YASM_INSN__OPERAND_REG = 1, /**< A register. */ - YASM_INSN__OPERAND_SEGREG, /**< A segment register. */ - YASM_INSN__OPERAND_MEMORY, /**< An effective address - * (memory reference). */ - YASM_INSN__OPERAND_IMM /**< An immediate or jump target. */ - } type; - - /** Operand data. */ - union { - uintptr_t reg; /**< Arch data for reg/segreg. */ - yasm_effaddr *ea; /**< Effective address for memory references. */ - yasm_expr *val; /**< Value of immediate or jump target. */ - } data; - - uintptr_t targetmod; /**< Arch target modifier, 0 if none. */ - - /** Specified size of the operand, in bits. 0 if not user-specified. */ - unsigned int size:8; - - /** Nonzero if dereference. Used for "*foo" in GAS. - * The reason for this is that by default in GAS, an unprefixed value - * is a memory address, except for jumps/calls, in which case it needs a - * "*" prefix to become a memory address (otherwise it's an immediate). - * This isn't knowable in the parser stage, so the parser sets this flag - * to indicate the "*" prefix has been used, and the arch needs to adjust - * the operand type appropriately depending on the instruction type. - */ - unsigned int deref:1; - - /** Nonzero if strict. Used for "strict foo" in NASM. - * This is used to inhibit optimization on otherwise "sized" values. - * For example, the user may just want to be explicit with the size on - * "push dword 4", but not actually want to force the immediate size to - * 4 bytes (rather wanting the optimizer to optimize it down to 1 byte as - * though "dword" was not specified). To indicate the immediate should - * actually be forced to 4 bytes, the user needs to write - * "push strict dword 4", which sets this flag. - */ - unsigned int strict:1; -}; -#endif - /** Get the one-line description of an architecture. * \param arch architecture * \return One-line description of architecture. @@ -348,15 +299,18 @@ int yasm_arch_set_var(yasm_arch *arch, const char *var, unsigned long val); * symbols. Any additional data beyond just the type (almost always necessary) * should be returned into the space provided by the data parameter. * \param arch architecture - * \param data extra identification information (yasm_arch-specific) - * [output] * \param id identifier as in the input file * \param id_len length of id string + * \param line virtual line + * \param bc for instructions, yasm_insn-based bytecode is returned + * (and NULL otherwise) + * \param prefix for prefixes, yasm_arch-specific value is returned + * (and 0 otherwise) * \return Identifier type (#YASM_ARCH_NOTINSNPREFIX if unrecognized) */ yasm_arch_insnprefix yasm_arch_parse_check_insnprefix - (yasm_arch *arch, /*@out@*/ uintptr_t data[4], const char *id, - size_t id_len); + (yasm_arch *arch, const char *id, size_t id_len, unsigned long line, + /*@out@*/ /*@only@*/ yasm_bytecode **bc, /*@out@*/ uintptr_t *prefix); /** Check an generic identifier to see if it matches architecture specific * names for registers or target modifiers. Unrecognized identifiers should @@ -364,15 +318,15 @@ yasm_arch_insnprefix yasm_arch_parse_check_insnprefix * (almost always necessary) should be returned into the space provided by the * data parameter. * \param arch architecture - * \param data extra identification information (yasm_arch-specific) - * [output] * \param id identifier as in the input file * \param id_len length of id string + * \param data extra identification information (yasm_arch-specific) + * [output] * \return Identifier type (#YASM_ARCH_NOTREGTMOD if unrecognized) */ yasm_arch_regtmod yasm_arch_parse_check_regtmod - (yasm_arch *arch, /*@out@*/ uintptr_t *data, const char *id, - size_t id_len); + (yasm_arch *arch, const char *id, size_t id_len, + /*@out@*/ uintptr_t *data); /** Get NOP fill patterns for 1-15 bytes of fill. * \param arch architecture @@ -381,28 +335,6 @@ yasm_arch_regtmod yasm_arch_parse_check_regtmod */ const unsigned char **yasm_arch_get_fill(const yasm_arch *arch); -/** Finalize an instruction from a semi-generic insn description. Note an - * existing bytecode is required. - * \param arch architecture - * \param bc bytecode to finalize - * \param prev_bc previous bytecode in section - * \param data instruction data (from parse_check_id()); all - * zero indicates an empty instruction - * \param num_operands number of operands - * \param operands list of operands (in parse order) - * \param num_prefixes number of prefixes - * \param prefixes array of 4-element prefix data - * \param num_segregs number of segment register prefixes - * \param segregs array of segment register data - * \return If no match is found (the instruction is invalid), no action is - * performed and an error is recorded. - */ -void yasm_arch_finalize_insn - (yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, - const uintptr_t data[4], int num_operands, - /*@null@*/ yasm_insn_operands *operands, int num_prefixes, - const uintptr_t **prefixes, int num_segregs, const uintptr_t *segregs); - /** Output #yasm_floatnum to buffer. Puts the value into the least * significant bits of the destination, or may be shifted into more * significant bits by the shift parameter. The destination bits are @@ -481,6 +413,29 @@ void yasm_arch_segreg_print(yasm_arch *arch, uintptr_t segreg, FILE *f); */ yasm_effaddr *yasm_arch_ea_create(yasm_arch *arch, /*@keep@*/ yasm_expr *e); +/** Delete (free allocated memory for) an effective address. + * \param arch architecture + * \param ea effective address (only pointer to it). + */ +void yasm_arch_ea_destroy(yasm_arch *arch, /*@only@*/ yasm_effaddr *ea); + +/** Print an effective address. For debugging purposes. + * \param arch architecture + * \param ea effective address + * \param f file + * \param indent_level indentation level + */ +void yasm_arch_ea_print(const yasm_arch *arch, const yasm_effaddr *ea, + FILE *f, int indent_level); + +/** Create a bytecode that represents a single empty (0 length) instruction. + * This is used for handling solitary prefixes. + * \param arch architecture + * \param line virtual line (from yasm_linemap) + * \return Newly allocated bytecode. + */ +/*@only@*/ yasm_bytecode *yasm_arch_create_empty_insn(yasm_arch *arch, + unsigned long line); #ifndef YASM_DOXYGEN @@ -506,20 +461,14 @@ yasm_effaddr *yasm_arch_ea_create(yasm_arch *arch, /*@keep@*/ yasm_expr *e); ((yasm_arch_base *)arch)->module->get_address_size(arch) #define yasm_arch_set_var(arch, var, val) \ ((yasm_arch_base *)arch)->module->set_var(arch, var, val) -#define yasm_arch_parse_check_insnprefix(arch, data, id, id_len) \ - ((yasm_arch_base *)arch)->module->parse_check_insnprefix(arch, data, id, \ - id_len) -#define yasm_arch_parse_check_regtmod(arch, data, id, id_len) \ - ((yasm_arch_base *)arch)->module->parse_check_regtmod(arch, data, id, \ - id_len) +#define yasm_arch_parse_check_insnprefix(arch, id, id_len, line, bc, prefix) \ + ((yasm_arch_base *)arch)->module->parse_check_insnprefix \ + (arch, id, id_len, line, bc, prefix) +#define yasm_arch_parse_check_regtmod(arch, id, id_len, data) \ + ((yasm_arch_base *)arch)->module->parse_check_regtmod \ + (arch, id, id_len, data) #define yasm_arch_get_fill(arch) \ ((yasm_arch_base *)arch)->module->get_fill(arch) -#define yasm_arch_finalize_insn(arch, bc, prev_bc, data, num_operands, \ - operands, num_prefixes, prefixes, \ - num_segregs, segregs) \ - ((yasm_arch_base *)arch)->module->finalize_insn \ - (arch, bc, prev_bc, data, num_operands, operands, num_prefixes, \ - prefixes, num_segregs, segregs) #define yasm_arch_floatnum_tobytes(arch, flt, buf, destsize, valsize, shift, \ warn) \ ((yasm_arch_base *)arch)->module->floatnum_tobytes \ @@ -538,100 +487,13 @@ yasm_effaddr *yasm_arch_ea_create(yasm_arch *arch, /*@keep@*/ yasm_expr *e); ((yasm_arch_base *)arch)->module->segreg_print(arch, segreg, f) #define yasm_arch_ea_create(arch, e) \ ((yasm_arch_base *)arch)->module->ea_create(arch, e) +#define yasm_arch_ea_destroy(arch, ea) \ + ((yasm_arch_base *)arch)->module->ea_destroy(ea) +#define yasm_arch_ea_print(arch, ea, f, i) \ + ((yasm_arch_base *)arch)->module->ea_print(ea, f, i) +#define yasm_arch_create_empty_insn(arch, line) \ + ((yasm_arch_base *)arch)->module->create_empty_insn(arch, line) #endif -/** Create an instruction operand from a register. - * \param reg register - * \return Newly allocated operand. - */ -yasm_insn_operand *yasm_operand_create_reg(uintptr_t reg); - -/** Create an instruction operand from a segment register. - * \param segreg segment register - * \return Newly allocated operand. - */ -yasm_insn_operand *yasm_operand_create_segreg(uintptr_t segreg); - -/** Create an instruction operand from an effective address. - * \param ea effective address - * \return Newly allocated operand. - */ -yasm_insn_operand *yasm_operand_create_mem(/*@only@*/ yasm_effaddr *ea); - -/** Create an instruction operand from an immediate expression. - * Looks for cases of a single register and creates a register variant of - * #yasm_insn_operand. - * \param val immediate expression - * \return Newly allocated operand. - */ -yasm_insn_operand *yasm_operand_create_imm(/*@only@*/ yasm_expr *val); - -/** Print an instruction operand. For debugging purposes. - * \param arch architecture - * \param f file - * \param indent_level indentation level - * \param op instruction operand - */ -void yasm_operand_print(const yasm_insn_operand *op, FILE *f, int indent_level, - yasm_arch *arch); - -/** Create a new list of instruction operands. - * \return Newly allocated list. - */ -yasm_insn_operands *yasm_ops_create(void); - -/** Destroy a list of instruction operands (created with yasm_ops_create()). - * \param headp list of instruction operands - * \param content if nonzero, deletes content of each operand - */ -void yasm_ops_destroy(yasm_insn_operands *headp, int content); - -/** Get the first operand in a list of instruction operands. - * \param headp list of instruction operands - * \return First operand in list (NULL if list is empty). - */ -yasm_insn_operand *yasm_ops_first(yasm_insn_operands *headp); - -/** Get the next operand in a list of instruction operands. - * \param cur previous operand - * \return Next operand in list (NULL if cur was the last operand). - */ -yasm_insn_operand *yasm_operand_next(yasm_insn_operand *cur); - -#ifdef YASM_LIB_INTERNAL -#define yasm_ops_initialize(headp) STAILQ_INIT(headp) -#define yasm_ops_first(headp) STAILQ_FIRST(headp) -#define yasm_operand_next(cur) STAILQ_NEXT(cur, link) - -/** Delete (free allocated memory for) a list of instruction operands (created - * with yasm_ops_initialize()). - * \param headp list of instruction operands - * \param content if nonzero, deletes content of each operand - */ -void yasm_ops_delete(yasm_insn_operands *headp, int content); -#endif - -/** Add data value to the end of a list of instruction operands. - * \note Does not make a copy of the operand; so don't pass this function - * static or local variables, and discard the op pointer after calling - * this function. - * \param headp list of instruction operands - * \param op operand (may be NULL) - * \return If operand was actually appended (it wasn't NULL), the operand; - * otherwise NULL. - */ -/*@null@*/ yasm_insn_operand *yasm_ops_append - (yasm_insn_operands *headp, - /*@returned@*/ /*@null@*/ yasm_insn_operand *op); - -/** Print a list of instruction operands. For debugging purposes. - * \param arch architecture - * \param f file - * \param indent_level indentation level - * \param headp list of instruction operands - */ -void yasm_ops_print(const yasm_insn_operands *headp, FILE *f, int indent_level, - yasm_arch *arch); - #endif diff --git a/libyasm/bc-insn.c b/libyasm/bc-insn.c deleted file mode 100644 index de8f5143..00000000 --- a/libyasm/bc-insn.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Insn bytecode - * - * Copyright (C) 2005-2007 Peter Johnson - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#define YASM_LIB_INTERNAL -#include "util.h" -/*@unused@*/ RCSID("$Id$"); - -#include "libyasm-stdint.h" -#include "coretype.h" - -#include "errwarn.h" -#include "expr.h" -#include "value.h" - -#include "bytecode.h" -#include "arch.h" - -#include "bc-int.h" - - -typedef struct bytecode_insn { - /*@dependent@*/ yasm_arch *arch; - uintptr_t insn_data[4]; - - int num_operands; - /*@null@*/ yasm_insn_operands operands; - - /* array of 4-element prefix_data arrays */ - int num_prefixes; - /*@null@*/ uintptr_t **prefixes; - - /* array of segment prefixes */ - int num_segregs; - /*@null@*/ uintptr_t *segregs; -} bytecode_insn; - -static void bc_insn_destroy(void *contents); -static void bc_insn_print(const void *contents, FILE *f, int indent_level); -static void bc_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc); -static int bc_insn_calc_len(yasm_bytecode *bc, yasm_bc_add_span_func add_span, - void *add_span_data); -static int bc_insn_tobytes(yasm_bytecode *bc, unsigned char **bufp, void *d, - yasm_output_value_func output_value, - /*@null@*/ yasm_output_reloc_func output_reloc); - -static const yasm_bytecode_callback bc_insn_callback = { - bc_insn_destroy, - bc_insn_print, - bc_insn_finalize, - bc_insn_calc_len, - yasm_bc_expand_common, - bc_insn_tobytes, - 0 -}; - - -const yasm_expr * -yasm_ea_get_disp(const yasm_effaddr *ea) -{ - return ea->disp.abs; -} - -void -yasm_ea_set_len(yasm_effaddr *ptr, unsigned int len) -{ - if (!ptr) - return; - - /* Currently don't warn if length truncated, as this is called only from - * an explicit override, where we expect the user knows what they're doing. - */ - - ptr->disp.size = (unsigned char)len; -} - -void -yasm_ea_set_nosplit(yasm_effaddr *ptr, unsigned int nosplit) -{ - if (!ptr) - return; - - ptr->nosplit = (unsigned char)nosplit; -} - -void -yasm_ea_set_strong(yasm_effaddr *ptr, unsigned int strong) -{ - if (!ptr) - return; - - ptr->strong = (unsigned char)strong; -} - -void -yasm_ea_set_segreg(yasm_effaddr *ea, uintptr_t segreg) -{ - if (!ea) - return; - - if (segreg != 0 && ea->segreg != 0) - yasm_warn_set(YASM_WARN_GENERAL, - N_("multiple segment overrides, using leftmost")); - - ea->segreg = segreg; -} - -/*@-nullstate@*/ -void -yasm_ea_destroy(yasm_effaddr *ea) -{ - ea->callback->destroy(ea); - yasm_value_delete(&ea->disp); - yasm_xfree(ea); -} -/*@=nullstate@*/ - -/*@-nullstate@*/ -void -yasm_ea_print(const yasm_effaddr *ea, FILE *f, int indent_level) -{ - fprintf(f, "%*sDisp:\n", indent_level, ""); - yasm_value_print(&ea->disp, f, indent_level+1); - fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit); - ea->callback->print(ea, f, indent_level); -} -/*@=nullstate@*/ - -static void -bc_insn_destroy(void *contents) -{ - bytecode_insn *insn = (bytecode_insn *)contents; - if (insn->num_operands > 0) - yasm_ops_delete(&insn->operands, 0); - if (insn->num_prefixes > 0) { - int i; - for (i=0; inum_prefixes; i++) - yasm_xfree(insn->prefixes[i]); - yasm_xfree(insn->prefixes); - } - if (insn->num_segregs > 0) - yasm_xfree(insn->segregs); - yasm_xfree(contents); -} - -static void -bc_insn_print(const void *contents, FILE *f, int indent_level) -{ -} - -static void -bc_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) -{ - bytecode_insn *insn = (bytecode_insn *)bc->contents; - int i; - yasm_insn_operand *op; - yasm_error_class eclass; - char *str, *xrefstr; - unsigned long xrefline; - - /* Simplify the operands' expressions first. */ - for (i = 0, op = yasm_ops_first(&insn->operands); - op && inum_operands; op = yasm_operand_next(op), i++) { - /* Check operand type */ - switch (op->type) { - case YASM_INSN__OPERAND_MEMORY: - /* Don't get over-ambitious here; some archs' memory expr - * parser are sensitive to the presence of *1, etc, so don't - * simplify reg*1 identities. - */ - if (op->data.ea) - op->data.ea->disp.abs = - yasm_expr__level_tree(op->data.ea->disp.abs, 1, 1, 0, - 0, NULL, NULL); - if (yasm_error_occurred()) { - /* Add a pointer to where it was used to the error */ - yasm_error_fetch(&eclass, &str, &xrefline, &xrefstr); - if (xrefstr) { - yasm_error_set_xref(xrefline, "%s", xrefstr); - yasm_xfree(xrefstr); - } - if (str) { - yasm_error_set(eclass, "%s in memory expression", str); - yasm_xfree(str); - } - return; - } - break; - case YASM_INSN__OPERAND_IMM: - op->data.val = - yasm_expr__level_tree(op->data.val, 1, 1, 1, 0, NULL, - NULL); - if (yasm_error_occurred()) { - /* Add a pointer to where it was used to the error */ - yasm_error_fetch(&eclass, &str, &xrefline, &xrefstr); - if (xrefstr) { - yasm_error_set_xref(xrefline, "%s", xrefstr); - yasm_xfree(xrefstr); - } - if (str) { - yasm_error_set(eclass, "%s in immediate expression", - str); - yasm_xfree(str); - } - return; - } - break; - default: - break; - } - } - - yasm_arch_finalize_insn(insn->arch, bc, prev_bc, insn->insn_data, - insn->num_operands, &insn->operands, - insn->num_prefixes, insn->prefixes, - insn->num_segregs, insn->segregs); -} - -static int -bc_insn_calc_len(yasm_bytecode *bc, yasm_bc_add_span_func add_span, - void *add_span_data) -{ - yasm_internal_error(N_("bc_insn_calc_len() is not implemented")); - /*@notreached@*/ - return 0; -} - -static int -bc_insn_tobytes(yasm_bytecode *bc, unsigned char **bufp, void *d, - yasm_output_value_func output_value, - /*@unused@*/ yasm_output_reloc_func output_reloc) -{ - yasm_internal_error(N_("bc_insn_tobytes() is not implemented")); - /*@notreached@*/ - return 1; -} - -yasm_bytecode * -yasm_bc_create_insn(yasm_arch *arch, const uintptr_t insn_data[4], - int num_operands, /*@null@*/ yasm_insn_operands *operands, - unsigned long line) -{ - bytecode_insn *insn = yasm_xmalloc(sizeof(bytecode_insn)); - - insn->arch = arch; - insn->insn_data[0] = insn_data[0]; - insn->insn_data[1] = insn_data[1]; - insn->insn_data[2] = insn_data[2]; - insn->insn_data[3] = insn_data[3]; - insn->num_operands = num_operands; - if (operands) - insn->operands = *operands; /* structure copy */ - else - yasm_ops_initialize(&insn->operands); - insn->num_prefixes = 0; - insn->prefixes = NULL; - insn->num_segregs = 0; - insn->segregs = NULL; - - return yasm_bc_create_common(&bc_insn_callback, insn, line); -} - -yasm_bytecode * -yasm_bc_create_empty_insn(yasm_arch *arch, unsigned long line) -{ - bytecode_insn *insn = yasm_xmalloc(sizeof(bytecode_insn)); - - insn->arch = arch; - insn->insn_data[0] = 0; - insn->insn_data[1] = 0; - insn->insn_data[2] = 0; - insn->insn_data[3] = 0; - insn->num_operands = 0; - yasm_ops_initialize(&insn->operands); - insn->num_prefixes = 0; - insn->prefixes = NULL; - insn->num_segregs = 0; - insn->segregs = NULL; - - return yasm_bc_create_common(&bc_insn_callback, insn, line); -} - -void -yasm_bc_insn_add_prefix(yasm_bytecode *bc, const uintptr_t prefix_data[4]) -{ - bytecode_insn *insn = (bytecode_insn *)bc->contents; - - assert(bc->callback == bc_insn_callback); - - insn->prefixes = - yasm_xrealloc(insn->prefixes, - (insn->num_prefixes+1)*sizeof(uintptr_t *)); - insn->prefixes[insn->num_prefixes] = yasm_xmalloc(4*sizeof(uintptr_t)); - insn->prefixes[insn->num_prefixes][0] = prefix_data[0]; - insn->prefixes[insn->num_prefixes][1] = prefix_data[1]; - insn->prefixes[insn->num_prefixes][2] = prefix_data[2]; - insn->prefixes[insn->num_prefixes][3] = prefix_data[3]; - insn->num_prefixes++; -} - -void -yasm_bc_insn_add_seg_prefix(yasm_bytecode *bc, uintptr_t segreg) -{ - bytecode_insn *insn = (bytecode_insn *)bc->contents; - - assert(bc->callback == bc_insn_callback); - - insn->segregs = - yasm_xrealloc(insn->segregs, (insn->num_segregs+1)*sizeof(uintptr_t)); - insn->segregs[insn->num_segregs] = segreg; - insn->num_segregs++; -} diff --git a/libyasm/bc-int.h b/libyasm/bc-int.h index 76338509..d941ad6a 100644 --- a/libyasm/bc-int.h +++ b/libyasm/bc-int.h @@ -41,7 +41,8 @@ typedef struct yasm_bytecode_callback { enum yasm_bytecode_special_type { YASM_BC_SPECIAL_NONE = 0, YASM_BC_SPECIAL_RESERVE,/* Reserves space instead of outputting data */ - YASM_BC_SPECIAL_OFFSET /* Adjusts offset instead of calculating len */ + YASM_BC_SPECIAL_OFFSET, /* Adjusts offset instead of calculating len */ + YASM_BC_SPECIAL_INSN /* Instruction bytecode */ } special; } yasm_bytecode_callback; @@ -100,6 +101,12 @@ void yasm_bc_transform(yasm_bytecode *bc, */ void yasm_bc_finalize_common(yasm_bytecode *bc, yasm_bytecode *prev_bc); +/** Common bytecode callback calc_len function, for where the bytecode has + * no calculatable length. Causes an internal error if called. + */ +int yasm_bc_calc_len_common(yasm_bytecode *bc, yasm_bc_add_span_func add_span, + void *add_span_data); + /** Common bytecode callback expand function, for where the bytecode is * always short (calc_len never calls add_span). Causes an internal * error if called. @@ -108,6 +115,14 @@ int yasm_bc_expand_common (yasm_bytecode *bc, int span, long old_val, long new_val, /*@out@*/ long *neg_thres, /*@out@*/ long *pos_thres); +/** Common bytecode callback tobytes function, for where the bytecode + * cannot be converted to bytes. Causes an internal error if called. + */ +int yasm_bc_tobytes_common + (yasm_bytecode *bc, unsigned char **bufp, void *d, + yasm_output_value_func output_value, + /*@null@*/ yasm_output_reloc_func output_reloc); + #define yasm_bc__next(x) STAILQ_NEXT(x, link) #endif diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index 41fe3ca2..d9c89282 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -58,6 +58,15 @@ yasm_bc_finalize_common(yasm_bytecode *bc, yasm_bytecode *prev_bc) { } +int +yasm_bc_calc_len_common(yasm_bytecode *bc, yasm_bc_add_span_func add_span, + void *add_span_data) +{ + yasm_internal_error(N_("bytecode length cannot be calculated")); + /*@unreached@*/ + return 0; +} + int yasm_bc_expand_common(yasm_bytecode *bc, int span, long old_val, long new_val, /*@out@*/ long *neg_thres, /*@out@*/ long *pos_thres) @@ -67,6 +76,16 @@ yasm_bc_expand_common(yasm_bytecode *bc, int span, long old_val, long new_val, return 0; } +int +yasm_bc_tobytes_common(yasm_bytecode *bc, unsigned char **bufp, void *d, + yasm_output_value_func output_value, + /*@null@*/ yasm_output_reloc_func output_reloc) +{ + yasm_internal_error(N_("bytecode cannot be converted to bytes")); + /*@unreached@*/ + return 0; +} + void yasm_bc_transform(yasm_bytecode *bc, const yasm_bytecode_callback *callback, void *contents) @@ -345,3 +364,11 @@ yasm_bc_get_multiple_expr(const yasm_bytecode *bc) { return bc->multiple; } + +yasm_insn * +yasm_bc_get_insn(yasm_bytecode *bc) +{ + if (bc->callback->special != YASM_BC_SPECIAL_INSN) + return NULL; + return (yasm_insn *)bc->contents; +} diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index 86c3c7e5..ef693ed3 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -34,45 +34,6 @@ #ifndef YASM_BYTECODE_H #define YASM_BYTECODE_H -/** An effective address. */ -typedef struct yasm_effaddr yasm_effaddr; - -/** Callbacks for effective address implementations. */ -typedef struct yasm_effaddr_callback { - /** Destroy the effective address (freeing it). - * \param ea effective address - */ - void (*destroy) (/*@only@*/ yasm_effaddr *ea); - - /** Print the effective address. - * \param ea effective address - * \param f file to output to - * \param indent_level indentation level - */ - void (*print) (const yasm_effaddr *ea, FILE *f, int indent_level); -} yasm_effaddr_callback; - -/** An effective address. */ -struct yasm_effaddr { - const yasm_effaddr_callback *callback; /**< callback functions */ - - yasm_value disp; /**< address displacement */ - - uintptr_t segreg; /**< segment register override (0 if none) */ - - unsigned char need_nonzero_len; /**< 1 if length of disp must be >0. */ - unsigned char need_disp; /**< 1 if a displacement should be present - * in the output. - */ - unsigned char nosplit; /**< 1 if reg*2 should not be split into - * reg+reg. (0 if not) - */ - unsigned char strong; /**< 1 if effective address is *definitely* - * an effective address, e.g. in GAS if - * expr(,1) form is used vs. just expr. - */ -}; - /** A data value (opaque type). */ typedef struct yasm_dataval yasm_dataval; /** A list of data values (opaque type). */ @@ -82,59 +43,6 @@ typedef struct yasm_datavalhead yasm_datavalhead; /*@reldef@*/ STAILQ_HEAD(yasm_datavalhead, yasm_dataval); #endif -/** Get the displacement portion of an effective address. - * \param ea effective address - * \return Expression representing the displacement (read-only). - */ -/*@observer@*/ const yasm_expr *yasm_ea_get_disp(const yasm_effaddr *ea); - -/** Set the length of the displacement portion of an effective address. - * The length is specified in bits. - * \param ea effective address - * \param len length in bits - */ -void yasm_ea_set_len(yasm_effaddr *ea, unsigned int len); - -/** Set/clear nosplit flag of an effective address. - * The nosplit flag indicates (for architectures that support complex effective - * addresses such as x86) if various types of complex effective addresses can - * be split into different forms in order to minimize instruction length. - * \param ea effective address - * \param nosplit nosplit flag setting (0=splits allowed, nonzero=splits - * not allowed) - */ -void yasm_ea_set_nosplit(yasm_effaddr *ea, unsigned int nosplit); - -/** Set/clear strong flag of an effective address. - * The strong flag indicates if an effective address is *definitely* an - * effective address. This is used in e.g. the GAS parser to differentiate - * between "expr" (which might or might not be an effective address) and - * "expr(,1)" (which is definitely an effective address). - * \param ea effective address - * \param strong strong flag setting (0=not strong, nonzero=strong) - */ -void yasm_ea_set_strong(yasm_effaddr *ea, unsigned int strong); - -/** Set segment override for an effective address. - * Some architectures (such as x86) support segment overrides on effective - * addresses. A override of an override will result in a warning. - * \param ea effective address - * \param segreg segment register (0 if none) - */ -void yasm_ea_set_segreg(yasm_effaddr *ea, uintptr_t segreg); - -/** Delete (free allocated memory for) an effective address. - * \param ea effective address (only pointer to it). - */ -void yasm_ea_destroy(/*@only@*/ yasm_effaddr *ea); - -/** Print an effective address. For debugging purposes. - * \param f file - * \param indent_level indentation level - * \param ea effective address - */ -void yasm_ea_print(const yasm_effaddr *ea, FILE *f, int indent_level); - /** Set multiple field of a bytecode. * A bytecode can be repeated a number of times when output. This function * sets that multiple. @@ -227,42 +135,6 @@ void yasm_bc_set_multiple(yasm_bytecode *bc, /*@keep@*/ yasm_expr *e); /*@only@*/ yasm_bytecode *yasm_bc_create_org (unsigned long start, unsigned long line); -/** Create a bytecode that represents a single instruction. - * \param arch instruction's architecture - * \param insn_data data that identifies the type of instruction - * \param num_operands number of operands - * \param operands instruction operands (may be NULL if no operands) - * \param line virtual line (from yasm_linemap) - * \return Newly allocated bytecode. - * \note Keeps the list of operands; do not call yasm_ops_delete() after - * giving operands to this function. - */ -/*@only@*/ yasm_bytecode *yasm_bc_create_insn - (yasm_arch *arch, const uintptr_t insn_data[4], int num_operands, - /*@null@*/ yasm_insn_operands *operands, unsigned long line); - -/** Create a bytecode that represents a single empty (0 length) instruction. - * This is used for handling solitary prefixes. - * \param arch instruction's architecture - * \param line virtual line (from yasm_linemap) - * \return Newly allocated bytecode. - */ -/*@only@*/ yasm_bytecode *yasm_bc_create_empty_insn(yasm_arch *arch, - unsigned long line); - -/** Associate a prefix with an instruction bytecode. - * \param bc instruction bytecode - * \param prefix_data data the identifies the prefix - */ -void yasm_bc_insn_add_prefix(yasm_bytecode *bc, - const uintptr_t prefix_data[4]); - -/** Associate a segment prefix with an instruction bytecode. - * \param bc instruction bytecode - * \param segreg data the identifies the segment register - */ -void yasm_bc_insn_add_seg_prefix(yasm_bytecode *bc, uintptr_t segreg); - /** Get the section that contains a particular bytecode. * \param bc bytecode * \return Section containing bc (can be NULL if bytecode is not part of a @@ -404,6 +276,13 @@ int yasm_bc_get_multiple(yasm_bytecode *bc, /*@out@*/ long *multiple, */ const yasm_expr *yasm_bc_get_multiple_expr(const yasm_bytecode *bc); +/** Get a #yasm_insn structure from an instruction bytecode (if possible). + * \param bc bytecode + * \return Instruction details if bytecode is an instruction bytecode, + * otherwise NULL. + */ +/*@dependent@*/ /*@null@*/ yasm_insn *yasm_bc_get_insn(yasm_bytecode *bc); + /** Create a new data value from an expression. * \param expn expression * \return Newly allocated data value. diff --git a/libyasm/coretype.h b/libyasm/coretype.h index 3fb4ec1a..dd4eeec4 100644 --- a/libyasm/coretype.h +++ b/libyasm/coretype.h @@ -192,11 +192,15 @@ typedef struct yasm_valparamhead yasm_valparamhead; */ typedef struct yasm_directive yasm_directive; -/** A list of instruction operands (opaque type). - * The list goes from left-to-right as parsed. - * \see arch.h for related functions. +/** An effective address. + * \see insn.h for related functions. */ -typedef struct yasm_insn_operands yasm_insn_operands; +typedef struct yasm_effaddr yasm_effaddr; + +/** An instruction. + * \see insn.h for related functions. + */ +typedef struct yasm_insn yasm_insn; /** Expression operators usable in #yasm_expr expressions. */ typedef enum yasm_expr_op { diff --git a/libyasm/insn.c b/libyasm/insn.c new file mode 100644 index 00000000..7fc21c38 --- /dev/null +++ b/libyasm/insn.c @@ -0,0 +1,310 @@ +/* + * Mnemonic instruction bytecode + * + * Copyright (C) 2005-2007 Peter Johnson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#define YASM_LIB_INTERNAL +#include "util.h" +/*@unused@*/ RCSID("$Id$"); + +#include "libyasm-stdint.h" +#include "coretype.h" + +#include "errwarn.h" +#include "expr.h" +#include "value.h" + +#include "bytecode.h" +#include "insn.h" +#include "arch.h" + +#include "bc-int.h" + + +void +yasm_ea_set_segreg(yasm_effaddr *ea, uintptr_t segreg) +{ + if (!ea) + return; + + if (segreg != 0 && ea->segreg != 0) + yasm_warn_set(YASM_WARN_GENERAL, + N_("multiple segment overrides, using leftmost")); + + ea->segreg = segreg; +} + +yasm_insn_operand * +yasm_operand_create_reg(uintptr_t reg) +{ + yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); + + retval->type = YASM_INSN__OPERAND_REG; + retval->data.reg = reg; + retval->targetmod = 0; + retval->size = 0; + retval->deref = 0; + retval->strict = 0; + + return retval; +} + +yasm_insn_operand * +yasm_operand_create_segreg(uintptr_t segreg) +{ + yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); + + retval->type = YASM_INSN__OPERAND_SEGREG; + retval->data.reg = segreg; + retval->targetmod = 0; + retval->size = 0; + retval->deref = 0; + retval->strict = 0; + + return retval; +} + +yasm_insn_operand * +yasm_operand_create_mem(/*@only@*/ yasm_effaddr *ea) +{ + yasm_insn_operand *retval = yasm_xmalloc(sizeof(yasm_insn_operand)); + + retval->type = YASM_INSN__OPERAND_MEMORY; + retval->data.ea = ea; + retval->targetmod = 0; + retval->size = 0; + retval->deref = 0; + retval->strict = 0; + + return retval; +} + +yasm_insn_operand * +yasm_operand_create_imm(/*@only@*/ yasm_expr *val) +{ + yasm_insn_operand *retval; + const uintptr_t *reg; + + reg = yasm_expr_get_reg(&val, 0); + if (reg) { + retval = yasm_operand_create_reg(*reg); + yasm_expr_destroy(val); + } else { + retval = yasm_xmalloc(sizeof(yasm_insn_operand)); + retval->type = YASM_INSN__OPERAND_IMM; + retval->data.val = val; + retval->targetmod = 0; + retval->size = 0; + retval->deref = 0; + retval->strict = 0; + } + + return retval; +} + +yasm_insn_operand * +yasm_insn_ops_append(yasm_insn *insn, yasm_insn_operand *op) +{ + if (op) { + insn->num_operands++; + STAILQ_INSERT_TAIL(&insn->operands, op, link); + return op; + } + return (yasm_insn_operand *)NULL; +} + +void +yasm_insn_add_prefix(yasm_insn *insn, uintptr_t prefix) +{ + insn->prefixes = + yasm_xrealloc(insn->prefixes, + (insn->num_prefixes+1)*sizeof(uintptr_t)); + insn->prefixes[insn->num_prefixes] = prefix; + insn->num_prefixes++; +} + +void +yasm_insn_add_seg_prefix(yasm_insn *insn, uintptr_t segreg) +{ + insn->segregs = + yasm_xrealloc(insn->segregs, (insn->num_segregs+1)*sizeof(uintptr_t)); + insn->segregs[insn->num_segregs] = segreg; + insn->num_segregs++; +} + +void +yasm_insn_initialize(yasm_insn *insn) +{ + STAILQ_INIT(&insn->operands); + + insn->prefixes = NULL; + insn->segregs = NULL; + + insn->num_operands = 0; + insn->num_prefixes = 0; + insn->num_segregs = 0; +} + +void +yasm_insn_delete(yasm_insn *insn, + void (*ea_destroy) (/*@only@*/ yasm_effaddr *)) +{ + if (insn->num_operands > 0) { + yasm_insn_operand *cur, *next; + + cur = STAILQ_FIRST(&insn->operands); + while (cur) { + next = STAILQ_NEXT(cur, link); + switch (cur->type) { + case YASM_INSN__OPERAND_MEMORY: + ea_destroy(cur->data.ea); + break; + case YASM_INSN__OPERAND_IMM: + yasm_expr_destroy(cur->data.val); + break; + default: + break; + } + yasm_xfree(cur); + cur = next; + } + } + if (insn->num_prefixes > 0) + yasm_xfree(insn->prefixes); + if (insn->num_segregs > 0) + yasm_xfree(insn->segregs); +} + +void +yasm_insn_print(const yasm_insn *insn, FILE *f, int indent_level) +{ + const yasm_insn_operand *op; + + STAILQ_FOREACH (op, &insn->operands, link) { + switch (op->type) { + case YASM_INSN__OPERAND_REG: + fprintf(f, "%*sReg=", indent_level, ""); + /*yasm_arch_reg_print(arch, op->data.reg, f);*/ + fprintf(f, "\n"); + break; + case YASM_INSN__OPERAND_SEGREG: + fprintf(f, "%*sSegReg=", indent_level, ""); + /*yasm_arch_segreg_print(arch, op->data.reg, f);*/ + fprintf(f, "\n"); + break; + case YASM_INSN__OPERAND_MEMORY: + fprintf(f, "%*sMemory=\n", indent_level, ""); + /*yasm_arch_ea_print(arch, op->data.ea, f, indent_level);*/ + break; + case YASM_INSN__OPERAND_IMM: + fprintf(f, "%*sImm=", indent_level, ""); + yasm_expr_print(op->data.val, f); + fprintf(f, "\n"); + break; + } + fprintf(f, "%*sTargetMod=%lx\n", indent_level+1, "", + (unsigned long)op->targetmod); + fprintf(f, "%*sSize=%u\n", indent_level+1, "", op->size); + fprintf(f, "%*sDeref=%d, Strict=%d\n", indent_level+1, "", + (int)op->deref, (int)op->strict); + } +} + +void +yasm_insn_finalize(yasm_insn *insn) +{ + unsigned int i; + yasm_insn_operand *op; + yasm_error_class eclass; + char *str, *xrefstr; + unsigned long xrefline; + + /* Simplify the operands' expressions first. */ + for (i = 0, op = yasm_insn_ops_first(insn); + op && inum_operands; op = yasm_insn_op_next(op), i++) { + /* Check operand type */ + switch (op->type) { + case YASM_INSN__OPERAND_MEMORY: + /* Don't get over-ambitious here; some archs' memory expr + * parser are sensitive to the presence of *1, etc, so don't + * simplify reg*1 identities. + */ + if (op->data.ea) + op->data.ea->disp.abs = + yasm_expr__level_tree(op->data.ea->disp.abs, 1, 1, 0, + 0, NULL, NULL); + if (yasm_error_occurred()) { + /* Add a pointer to where it was used to the error */ + yasm_error_fetch(&eclass, &str, &xrefline, &xrefstr); + if (xrefstr) { + yasm_error_set_xref(xrefline, "%s", xrefstr); + yasm_xfree(xrefstr); + } + if (str) { + yasm_error_set(eclass, "%s in memory expression", str); + yasm_xfree(str); + } + return; + } + break; + case YASM_INSN__OPERAND_IMM: + op->data.val = + yasm_expr__level_tree(op->data.val, 1, 1, 1, 0, NULL, + NULL); + if (yasm_error_occurred()) { + /* Add a pointer to where it was used to the error */ + yasm_error_fetch(&eclass, &str, &xrefline, &xrefstr); + if (xrefstr) { + yasm_error_set_xref(xrefline, "%s", xrefstr); + yasm_xfree(xrefstr); + } + if (str) { + yasm_error_set(eclass, "%s in immediate expression", + str); + yasm_xfree(str); + } + return; + } + break; + default: + break; + } + } +} + +/* Non-macro yasm_insn_ops_first() for non-YASM_LIB_INTERNAL users. */ +#undef yasm_insn_ops_first +yasm_insn_operand * +yasm_insn_ops_first(yasm_insn *insn) +{ + return STAILQ_FIRST(&insn->operands); +} + +/* Non-macro yasm_insn_op_next() for non-YASM_LIB_INTERNAL users. */ +#undef yasm_insn_op_next +yasm_insn_operand * +yasm_insn_op_next(yasm_insn_operand *cur) +{ + return STAILQ_NEXT(cur, link); +} diff --git a/libyasm/insn.h b/libyasm/insn.h new file mode 100644 index 00000000..a12e1eff --- /dev/null +++ b/libyasm/insn.h @@ -0,0 +1,248 @@ +/** + * \file libyasm/insn.h + * \brief YASM mnenomic instruction. + * + * \rcs + * $Id$ + * \endrcs + * + * \license + * Copyright (C) 2002-2007 Peter Johnson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * \endlicense + */ +#ifndef YASM_INSN_H +#define YASM_INSN_H + +/** Base structure for an effective address. As with all base + * structures, must be present as the first element in any + * #yasm_arch implementation of an effective address. + */ +struct yasm_effaddr { + yasm_value disp; /**< address displacement */ + + /** Segment register override (0 if none). */ + uintptr_t segreg; + + /** 1 if length of disp must be >0. */ + unsigned int need_nonzero_len:1; + + /** 1 if a displacement should be present in the output. */ + unsigned int need_disp:1; + + /** 1 if reg*2 should not be split into reg+reg. (0 if not). + * This flag indicates (for architectures that support complex effective + * addresses such as x86) if various types of complex effective addresses + * can be split into different forms in order to minimize instruction + * length. + */ + unsigned int nosplit:1; + + /** 1 if effective address is /definitely/ an effective address. + * This is used in e.g. the GAS parser to differentiate + * between "expr" (which might or might not be an effective address) and + * "expr(,1)" (which is definitely an effective address). + */ + unsigned int strong:1; +}; + +/** An instruction operand (opaque type). */ +typedef struct yasm_insn_operand yasm_insn_operand; + +#ifdef YASM_LIB_INTERNAL +/** An instruction operand. */ +struct yasm_insn_operand { + /** Link for building linked list of operands. \internal */ + /*@reldef@*/ STAILQ_ENTRY(yasm_insn_operand) link; + + /** Operand type. */ + enum yasm_insn_operand_type { + YASM_INSN__OPERAND_REG = 1, /**< A register. */ + YASM_INSN__OPERAND_SEGREG, /**< A segment register. */ + YASM_INSN__OPERAND_MEMORY, /**< An effective address + * (memory reference). */ + YASM_INSN__OPERAND_IMM /**< An immediate or jump target. */ + } type; + + /** Operand data. */ + union { + uintptr_t reg; /**< Arch data for reg/segreg. */ + yasm_effaddr *ea; /**< Effective address for memory references. */ + yasm_expr *val; /**< Value of immediate or jump target. */ + } data; + + uintptr_t targetmod; /**< Arch target modifier, 0 if none. */ + + /** Specified size of the operand, in bits. 0 if not user-specified. */ + unsigned int size:16; + + /** Nonzero if dereference. Used for "*foo" in GAS. + * The reason for this is that by default in GAS, an unprefixed value + * is a memory address, except for jumps/calls, in which case it needs a + * "*" prefix to become a memory address (otherwise it's an immediate). + * This isn't knowable in the parser stage, so the parser sets this flag + * to indicate the "*" prefix has been used, and the arch needs to adjust + * the operand type appropriately depending on the instruction type. + */ + unsigned int deref:1; + + /** Nonzero if strict. Used for "strict foo" in NASM. + * This is used to inhibit optimization on otherwise "sized" values. + * For example, the user may just want to be explicit with the size on + * "push dword 4", but not actually want to force the immediate size to + * 4 bytes (rather wanting the optimizer to optimize it down to 1 byte as + * though "dword" was not specified). To indicate the immediate should + * actually be forced to 4 bytes, the user needs to write + * "push strict dword 4", which sets this flag. + */ + unsigned int strict:1; +}; + +/** Base structure for "instruction" bytecodes. These are the mnenomic + * (rather than raw) representation of instructions. As with all base + * structures, must be present as the first element in any + * #yasm_arch implementation of mnenomic instruction bytecodes. + */ +struct yasm_insn { + /** Linked list of operands. */ + /*@reldef@*/ STAILQ_HEAD(yasm_insn_operands, yasm_insn_operand) operands; + + /** Array of prefixes. */ + /*@null@*/ uintptr_t *prefixes; + + /** Array of segment prefixes. */ + /*@null@*/ uintptr_t *segregs; + + unsigned int num_operands; /**< Number of operands. */ + unsigned int num_prefixes; /**< Number of prefixes. */ + unsigned int num_segregs; /**< Number of segment prefixes. */ +}; +#endif + +/** Set segment override for an effective address. + * Some architectures (such as x86) support segment overrides on effective + * addresses. A override of an override will result in a warning. + * \param ea effective address + * \param segreg segment register (0 if none) + */ +void yasm_ea_set_segreg(yasm_effaddr *ea, uintptr_t segreg); + +/** Create an instruction operand from a register. + * \param reg register + * \return Newly allocated operand. + */ +yasm_insn_operand *yasm_operand_create_reg(uintptr_t reg); + +/** Create an instruction operand from a segment register. + * \param segreg segment register + * \return Newly allocated operand. + */ +yasm_insn_operand *yasm_operand_create_segreg(uintptr_t segreg); + +/** Create an instruction operand from an effective address. + * \param ea effective address + * \return Newly allocated operand. + */ +yasm_insn_operand *yasm_operand_create_mem(/*@only@*/ yasm_effaddr *ea); + +/** Create an instruction operand from an immediate expression. + * Looks for cases of a single register and creates a register variant of + * #yasm_insn_operand. + * \param val immediate expression + * \return Newly allocated operand. + */ +yasm_insn_operand *yasm_operand_create_imm(/*@only@*/ yasm_expr *val); + +/** Get the first operand in an instruction. + * \param insn instruction + * \return First operand (NULL if no operands). + */ +yasm_insn_operand *yasm_insn_ops_first(yasm_insn *insn); + +/** Get the next operand in an instruction. + * \param op previous operand + * \return Next operand (NULL if op was the last operand). + */ +yasm_insn_operand *yasm_insn_op_next(yasm_insn_operand *op); + +#if defined(YASM_LIB_INTERNAL) && !defined(YASM_DOXYGEN) +#define yasm_insn_ops_first(insn) STAILQ_FIRST(&(insn)->operands) +#define yasm_insn_op_next(cur) STAILQ_NEXT(cur, link) +#endif + +/** Add operand to the end of an instruction. + * \note Does not make a copy of the operand; so don't pass this function + * static or local variables, and discard the op pointer after calling + * this function. + * \param insn instruction + * \param op operand (may be NULL) + * \return If operand was actually appended (it wasn't NULL), the operand; + * otherwise NULL. + */ +/*@null@*/ yasm_insn_operand *yasm_insn_ops_append + (yasm_insn *insn, + /*@returned@*/ /*@null@*/ yasm_insn_operand *op); + +/** Associate a prefix with an instruction. + * \param insn instruction + * \param prefix data that identifies the prefix + */ +void yasm_insn_add_prefix(yasm_insn *insn, uintptr_t prefix); + +/** Associate a segment prefix with an instruction. + * \param insn instruction + * \param segreg data that identifies the segment register + */ +void yasm_insn_add_seg_prefix(yasm_insn *insn, uintptr_t segreg); + +/** Initialize the common parts of an instruction. + * \internal For use by yasm_arch implementations only. + * \param insn instruction + */ +void yasm_insn_initialize(/*@out@*/ yasm_insn *insn); + +/** Delete the common parts of an instruction. + * \internal For use by yasm_arch implementations only. + * \param insn instruction + * \param content if nonzero, deletes content of each operand + * \param arch architecture + */ +void yasm_insn_delete(yasm_insn *insn, + void (*ea_destroy) (/*@only@*/ yasm_effaddr *)); + +/** Print a list of instruction operands. For debugging purposes. + * \internal For use by yasm_arch implementations only. + * \param insn instruction + * \param f file + * \param indent_level indentation level + * \param arch architecture + */ +void yasm_insn_print(const yasm_insn *insn, FILE *f, int indent_level); + +/** Finalize the common parts of an instruction. + * \internal For use by yasm_arch implementations only. + * \param insn instruction + */ +void yasm_insn_finalize(yasm_insn *insn); + +#endif diff --git a/modules/arch/x86/x86arch.c b/modules/arch/x86/x86arch.c index 42a95389..ba323185 100644 --- a/modules/arch/x86/x86arch.c +++ b/modules/arch/x86/x86arch.c @@ -110,9 +110,9 @@ x86_set_var(yasm_arch *arch, const char *var, unsigned long val) { yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; if (yasm__strcasecmp(var, "mode_bits") == 0) - arch_x86->mode_bits = (unsigned char)val; + arch_x86->mode_bits = (unsigned int)val; else if (yasm__strcasecmp(var, "force_strict") == 0) - arch_x86->force_strict = (unsigned char)val; + arch_x86->force_strict = (unsigned int)val; else return 1; return 0; @@ -364,7 +364,7 @@ x86_get_fill(const yasm_arch *arch) } unsigned int -yasm_x86__get_reg_size(yasm_arch *arch, uintptr_t reg) +yasm_x86__get_reg_size(uintptr_t reg) { switch ((x86_expritem_reg_size)(reg & ~0xFUL)) { case X86_REG8: @@ -390,6 +390,12 @@ yasm_x86__get_reg_size(yasm_arch *arch, uintptr_t reg) return 0; } +static unsigned int +x86_get_reg_size(yasm_arch *arch, uintptr_t reg) +{ + return yasm_x86__get_reg_size(reg); +} + static uintptr_t x86_reggroup_get_reg(yasm_arch *arch, uintptr_t reggroup, unsigned long regindex) @@ -510,14 +516,16 @@ yasm_arch_module yasm_x86_LTX_arch = { yasm_x86__parse_check_insnprefix, yasm_x86__parse_check_regtmod, x86_get_fill, - yasm_x86__finalize_insn, yasm_x86__floatnum_tobytes, yasm_x86__intnum_tobytes, - yasm_x86__get_reg_size, + x86_get_reg_size, x86_reggroup_get_reg, x86_reg_print, x86_segreg_print, yasm_x86__ea_create_expr, + yasm_x86__ea_destroy, + yasm_x86__ea_print, + yasm_x86__create_empty_insn, x86_machines, "x86", 16, diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index 5659ba5f..6d2fa050 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -76,11 +76,11 @@ typedef struct yasm_arch_x86 { unsigned long cpu_enabled; unsigned int amd64_machine; enum { - X86_PARSER_NASM, - X86_PARSER_GAS + X86_PARSER_NASM = 0, + X86_PARSER_GAS = 1 } parser; - unsigned char mode_bits; - unsigned char force_strict; + unsigned int mode_bits; + unsigned int force_strict; } yasm_arch_x86; /* 0-15 (low 4 bits) used for register number, stored in same data area. @@ -101,12 +101,13 @@ typedef enum { X86_RIP = 0xC<<4 /* 64-bit mode only, always RIP (regnum ignored) */ } x86_expritem_reg_size; +/* Low 8 bits are used for the prefix value, stored in same data area. */ typedef enum { - X86_LOCKREP = 1, - X86_ADDRSIZE, - X86_OPERSIZE, - X86_SEGREG, - X86_REX + X86_LOCKREP = 1<<8, + X86_ADDRSIZE = 2<<8, + X86_OPERSIZE = 3<<8, + X86_SEGREG = 4<<8, + X86_REX = 5<<8 } x86_parse_insn_prefix; typedef enum { @@ -168,6 +169,8 @@ x86_effaddr *yasm_x86__ea_create_imm (/*@keep@*/ yasm_expr *imm, unsigned int im_len); yasm_effaddr *yasm_x86__ea_create_expr(yasm_arch *arch, /*@keep@*/ yasm_expr *e); +void yasm_x86__ea_destroy(yasm_effaddr *ea); +void yasm_x86__ea_print(const yasm_effaddr *ea, FILE *f, int indent_level); void yasm_x86__bc_insn_opersize_override(yasm_bytecode *bc, unsigned int opersize); @@ -253,7 +256,7 @@ void yasm_x86__bc_transform_jmpfar(yasm_bytecode *bc, x86_jmpfar *jmpfar); void yasm_x86__bc_apply_prefixes (x86_common *common, unsigned char *rex, unsigned int def_opersize_64, - int num_prefixes, uintptr_t **prefixes); + unsigned int num_prefixes, uintptr_t *prefixes); /* Check an effective address. Returns 0 if EA was successfully determined, * 1 if invalid EA, or 2 if indeterminate EA. @@ -266,17 +269,11 @@ void yasm_x86__parse_cpu(yasm_arch_x86 *arch_x86, const char *cpuid, size_t cpuid_len); yasm_arch_insnprefix yasm_x86__parse_check_insnprefix - (yasm_arch *arch, /*@out@*/ uintptr_t data[4], const char *id, - size_t id_len); + (yasm_arch *arch, const char *id, size_t id_len, unsigned long line, + /*@out@*/ yasm_bytecode **bc, /*@out@*/ uintptr_t *prefix); yasm_arch_regtmod yasm_x86__parse_check_regtmod - (yasm_arch *arch, /*@out@*/ uintptr_t *data, const char *id, - size_t id_len); - -void yasm_x86__finalize_insn - (yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, - const uintptr_t data[4], int num_operands, - /*@null@*/ yasm_insn_operands *operands, int num_prefixes, - uintptr_t **prefixes, int num_segregs, const uintptr_t *segregs); + (yasm_arch *arch, const char *id, size_t id_len, + /*@out@*/ uintptr_t *data); int yasm_x86__floatnum_tobytes (yasm_arch *arch, const yasm_floatnum *flt, unsigned char *buf, @@ -286,5 +283,8 @@ int yasm_x86__intnum_tobytes size_t destsize, size_t valsize, int shift, const yasm_bytecode *bc, int warn); -unsigned int yasm_x86__get_reg_size(yasm_arch *arch, uintptr_t reg); +unsigned int yasm_x86__get_reg_size(uintptr_t reg); + +/*@only@*/ yasm_bytecode *yasm_x86__create_empty_insn(yasm_arch *arch, + unsigned long line); #endif diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index b1fa1a28..ade330e2 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -35,11 +35,6 @@ #include "x86arch.h" -/* Effective address callback function prototypes */ - -static void x86_ea_destroy(yasm_effaddr *ea); -static void x86_ea_print(const yasm_effaddr *ea, FILE *f, int indent_level); - /* Bytecode callback function prototypes */ static void x86_bc_insn_destroy(void *contents); @@ -78,13 +73,6 @@ static int x86_bc_jmpfar_tobytes yasm_output_value_func output_value, /*@null@*/ yasm_output_reloc_func output_reloc); -/* Effective address callback structures */ - -static const yasm_effaddr_callback x86_ea_callback = { - x86_ea_destroy, - x86_ea_print -}; - /* Bytecode callback structures */ static const yasm_bytecode_callback x86_bc_callback_insn = { @@ -199,7 +187,6 @@ yasm_x86__ea_create_reg(unsigned long reg, unsigned char *rex, x86_ea = yasm_xmalloc(sizeof(x86_effaddr)); - x86_ea->ea.callback = &x86_ea_callback; yasm_value_initialize(&x86_ea->ea.disp, NULL, 0); x86_ea->ea.need_nonzero_len = 0; x86_ea->ea.need_disp = 0; @@ -224,7 +211,6 @@ yasm_x86__ea_create_expr(yasm_arch *arch, yasm_expr *e) x86_ea = yasm_xmalloc(sizeof(x86_effaddr)); - x86_ea->ea.callback = &x86_ea_callback; if (arch_x86->parser == X86_PARSER_GAS) { /* Need to change foo+rip into foo wrt rip. * Note this assumes a particular ordering coming from the parser @@ -267,7 +253,6 @@ yasm_x86__ea_create_imm(yasm_expr *imm, unsigned int im_len) x86_ea = yasm_xmalloc(sizeof(x86_effaddr)); - x86_ea->ea.callback = &x86_ea_callback; yasm_value_initialize(&x86_ea->ea.disp, imm, im_len); x86_ea->ea.need_disp = 1; x86_ea->ea.nosplit = 0; @@ -286,25 +271,25 @@ yasm_x86__ea_create_imm(yasm_expr *imm, unsigned int im_len) void yasm_x86__bc_apply_prefixes(x86_common *common, unsigned char *rex, - unsigned int def_opersize_64, int num_prefixes, - uintptr_t **prefixes) + unsigned int def_opersize_64, + unsigned int num_prefixes, uintptr_t *prefixes) { - int i; + unsigned int i; int first = 1; for (i=0; ilockrep_pre != 0) yasm_warn_set(YASM_WARN_GENERAL, N_("multiple LOCK or REP prefixes, using leftmost")); - common->lockrep_pre = (unsigned char)prefixes[i][1]; + common->lockrep_pre = (unsigned char)prefixes[i] & 0xff; break; case X86_ADDRSIZE: - common->addrsize = (unsigned char)prefixes[i][1]; + common->addrsize = (unsigned char)prefixes[i] & 0xff; break; case X86_OPERSIZE: - common->opersize = (unsigned char)prefixes[i][1]; + common->opersize = (unsigned char)prefixes[i] & 0xff; if (common->mode_bits == 64 && common->opersize == 64 && def_opersize_64 != 64) { if (*rex == 0xff) @@ -318,7 +303,7 @@ yasm_x86__bc_apply_prefixes(x86_common *common, unsigned char *rex, /* This is a hack.. we should really be putting this in the * the effective address! */ - common->lockrep_pre = (unsigned char)prefixes[i][1]; + common->lockrep_pre = (unsigned char)prefixes[i] & 0xff; break; case X86_REX: if (!rex) @@ -340,7 +325,7 @@ yasm_x86__bc_apply_prefixes(x86_common *common, unsigned char *rex, * 64 bit mode due to checks in parse_check_prefix(). */ common->mode_bits = 64; - *rex = (unsigned char)prefixes[i][1]; + *rex = (unsigned char)prefixes[i] & 0xff; } first = 0; break; @@ -353,7 +338,7 @@ x86_bc_insn_destroy(void *contents) { x86_insn *insn = (x86_insn *)contents; if (insn->x86_ea) - yasm_ea_destroy((yasm_effaddr *)insn->x86_ea); + yasm_x86__ea_destroy((yasm_effaddr *)insn->x86_ea); if (insn->imm) { yasm_value_delete(insn->imm); yasm_xfree(insn->imm); @@ -378,15 +363,20 @@ x86_bc_jmpfar_destroy(void *contents) yasm_xfree(contents); } -static void -x86_ea_destroy(yasm_effaddr *ea) +void +yasm_x86__ea_destroy(yasm_effaddr *ea) { + yasm_value_delete(&ea->disp); + yasm_xfree(ea); } -static void -x86_ea_print(const yasm_effaddr *ea, FILE *f, int indent_level) +void +yasm_x86__ea_print(const yasm_effaddr *ea, FILE *f, int indent_level) { const x86_effaddr *x86_ea = (const x86_effaddr *)ea; + fprintf(f, "%*sDisp:\n", indent_level, ""); + yasm_value_print(&ea->disp, f, indent_level+1); + fprintf(f, "%*sNoSplit=%u\n", indent_level, "", (unsigned int)ea->nosplit); fprintf(f, "%*sSegmentOv=%02x\n", indent_level, "", (unsigned int)x86_ea->ea.segreg); fprintf(f, "%*sModRM=%03o ValidRM=%u NeedRM=%u\n", indent_level, "", @@ -427,7 +417,7 @@ x86_bc_insn_print(const void *contents, FILE *f, int indent_level) fprintf(f, "%*sEffective Address:", indent_level, ""); if (insn->x86_ea) { fprintf(f, "\n"); - yasm_ea_print((yasm_effaddr *)insn->x86_ea, f, indent_level+1); + yasm_x86__ea_print((yasm_effaddr *)insn->x86_ea, f, indent_level+1); } else fprintf(f, " (nil)\n"); fprintf(f, "%*sImmediate Value:", indent_level, ""); diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c index 7e27273d..76b621be 100644 --- a/modules/arch/x86/x86id.c +++ b/modules/arch/x86/x86id.c @@ -279,6 +279,48 @@ typedef struct x86_insn_info { unsigned long operands[3]; } x86_insn_info; +typedef struct x86_id_insn { + yasm_insn insn; /* base structure */ + + /* instruction parse group - NULL if empty instruction (just prefixes) */ + /*@null@*/ const x86_insn_info *group; + + /* CPU feature flags enabled at the time of parsing the instruction */ + unsigned long cpu_enabled; + + /* Modifier data */ + unsigned long mod_data; + + /* Number of elements in the instruction parse group */ + unsigned int num_info:8; + + /* BITS setting active at the time of parsing the instruction */ + unsigned int mode_bits:8; + + /* Suffix flags */ + unsigned int suffix:8; + + /* Parser enabled at the time of parsing the instruction */ + unsigned int parser:2; + + /* Strict forced setting at the time of parsing the instruction */ + unsigned int force_strict:1; +} x86_id_insn; + +static void x86_id_insn_destroy(void *contents); +static void x86_id_insn_print(const void *contents, FILE *f, int indent_level); +static void x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc); + +static const yasm_bytecode_callback x86_id_insn_callback = { + x86_id_insn_destroy, + x86_id_insn_print, + x86_id_insn_finalize, + yasm_bc_calc_len_common, + yasm_bc_expand_common, + yasm_bc_tobytes_common, + YASM_BC_SPECIAL_INSN +}; + /* * General instruction groupings */ @@ -2275,7 +2317,7 @@ static const x86_insn_info xbts_insn[] = { static void x86_finalize_common(x86_common *common, const x86_insn_info *info, - uintptr_t mode_bits) + unsigned int mode_bits) { common->addrsize = 0; common->opersize = info->opersize; @@ -2292,21 +2334,32 @@ x86_finalize_opcode(x86_opcode *opcode, const x86_insn_info *info) opcode->opcode[2] = info->opcode[2]; } +/* Clear operands so they don't get destroyed after we've copied references. */ +static void +x86_id_insn_clear_operands(x86_id_insn *id_insn) +{ + yasm_insn_operand *op = yasm_insn_ops_first(&id_insn->insn); + while (op) { + op->type = YASM_INSN__OPERAND_REG; + op = yasm_insn_op_next(op); + } +} + static void -x86_finalize_jmpfar(yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, - const uintptr_t data[4], int num_operands, - yasm_insn_operands *operands, int num_prefixes, - uintptr_t **prefixes, const x86_insn_info *info) +x86_finalize_jmpfar(yasm_bytecode *bc, yasm_bytecode *prev_bc, + const x86_insn_info *info) { + x86_id_insn *id_insn = (x86_id_insn *)bc->contents; + unsigned int mode_bits = id_insn->mode_bits; x86_jmpfar *jmpfar; yasm_insn_operand *op; /*@only@*/ yasm_expr *segment; jmpfar = yasm_xmalloc(sizeof(x86_jmpfar)); - x86_finalize_common(&jmpfar->common, info, data[3]); + x86_finalize_common(&jmpfar->common, info, mode_bits); x86_finalize_opcode(&jmpfar->opcode, info); - op = yasm_ops_first(operands); + op = yasm_insn_ops_first(&id_insn->insn); if (op->type == YASM_INSN__OPERAND_IMM && yasm_expr_is_op(op->data.val, YASM_EXPR_SEGOFF)) { @@ -2334,30 +2387,32 @@ x86_finalize_jmpfar(yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, yasm_internal_error(N_("didn't get FAR expression in jmpfar")); yasm_x86__bc_apply_prefixes((x86_common *)jmpfar, NULL, - info->def_opersize_64, num_prefixes, - prefixes); + info->def_opersize_64, + id_insn->insn.num_prefixes, + id_insn->insn.prefixes); + + x86_id_insn_clear_operands(id_insn); /* Transform the bytecode */ yasm_x86__bc_transform_jmpfar(bc, jmpfar); } static void -x86_finalize_jmp(yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, - const uintptr_t data[4], int num_operands, - yasm_insn_operands *operands, int num_prefixes, - uintptr_t **prefixes, const x86_insn_info *jinfo) +x86_finalize_jmp(yasm_bytecode *bc, yasm_bytecode *prev_bc, + const x86_insn_info *jinfo) { + x86_id_insn *id_insn = (x86_id_insn *)bc->contents; x86_jmp *jmp; - int num_info = (int)(data[1]&0xFF); - x86_insn_info *info = (x86_insn_info *)data[0]; - unsigned long mod_data = (unsigned long)(data[1] >> 8); - unsigned int mode_bits = (unsigned int)(data[3] & 0xFF); - /*unsigned char suffix = (unsigned char)((data[3]>>8) & 0xFF);*/ + int num_info = id_insn->num_info; + const x86_insn_info *info = id_insn->group; + unsigned long mod_data = id_insn->mod_data; + unsigned int mode_bits = id_insn->mode_bits; + /*unsigned char suffix = id_insn->suffix;*/ yasm_insn_operand *op; static const unsigned char size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0}; /* We know the target is in operand 0, but sanity check for Imm. */ - op = yasm_ops_first(operands); + op = yasm_insn_ops_first(&id_insn->insn); if (op->type != YASM_INSN__OPERAND_IMM) yasm_internal_error(N_("invalid operand conversion")); @@ -2408,7 +2463,7 @@ x86_finalize_jmp(yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, continue; cpu &= ~(CPU_64 | CPU_Not64); - if ((data[2] & cpu) != cpu) + if ((id_insn->cpu_enabled & cpu) != cpu) continue; if (info->num_operands == 0) @@ -2449,20 +2504,23 @@ x86_finalize_jmp(yasm_arch *arch, yasm_bytecode *bc, yasm_bytecode *prev_bc, } yasm_x86__bc_apply_prefixes((x86_common *)jmp, NULL, info->def_opersize_64, - num_prefixes, prefixes); + id_insn->insn.num_prefixes, + id_insn->insn.prefixes); + + x86_id_insn_clear_operands(id_insn); /* Transform the bytecode */ yasm_x86__bc_transform_jmp(bc, jmp); } static const x86_insn_info * -x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, - uintptr_t cpu, unsigned int mode_bits, unsigned int suffix, - int num_operands, yasm_insn_operand **ops, +x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops, yasm_insn_operand **rev_ops, const unsigned int *size_lookup, int bypass) { - yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; + const x86_insn_info *info = id_insn->group; + unsigned int num_info = id_insn->num_info; + unsigned int suffix = id_insn->suffix; int found = 0; /* Just do a simple linear search through the info array for a match. @@ -2478,25 +2536,25 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, /* Match CPU */ icpu = info->cpu; - if ((icpu & CPU_64) && mode_bits != 64) + if ((icpu & CPU_64) && id_insn->mode_bits != 64) continue; - if ((icpu & CPU_Not64) && mode_bits == 64) + if ((icpu & CPU_Not64) && id_insn->mode_bits == 64) continue; icpu &= ~(CPU_64 | CPU_Not64); - if (bypass != 7 && (cpu & icpu) != icpu) + if (bypass != 7 && (id_insn->cpu_enabled & icpu) != icpu) continue; /* Match # of operands */ - if (num_operands != info->num_operands) + if (id_insn->insn.num_operands != info->num_operands) continue; /* Match parser mode */ if ((info->modifiers & MOD_GasOnly) - && arch_x86->parser != X86_PARSER_GAS) + && id_insn->parser != X86_PARSER_GAS) continue; if ((info->modifiers & MOD_GasIllegal) - && arch_x86->parser == X86_PARSER_GAS) + && id_insn->parser == X86_PARSER_GAS) continue; /* Match suffix (if required) */ @@ -2506,11 +2564,11 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, /* Use reversed operands in GAS mode if not otherwise specified */ use_ops = ops; - if (arch_x86->parser == X86_PARSER_GAS + if (id_insn->parser == X86_PARSER_GAS && !(info->modifiers & MOD_GasNoRev)) use_ops = rev_ops; - if (num_operands == 0) { + if (id_insn->insn.num_operands == 0) { found = 1; /* no operands -> must have a match here. */ break; } @@ -2668,7 +2726,7 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, break; case OPT_MemOffs: if (op->type != YASM_INSN__OPERAND_MEMORY || - yasm_expr__contains(yasm_ea_get_disp(op->data.ea), + yasm_expr__contains(op->data.ea->disp.abs, YASM_EXPR_REG)) mismatch = 1; break; @@ -2725,7 +2783,7 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, */ if (op->type == YASM_INSN__OPERAND_REG && op->size == 0) { /* Register size must exactly match */ - if (yasm_x86__get_reg_size(arch, op->data.reg) != size) + if (yasm_x86__get_reg_size(op->data.reg) != size) mismatch = 1; } else if (((info->operands[i] & OPT_MASK) == OPT_Imm || (info->operands[i] & OPT_MASK) == OPT_ImmNotSegOff @@ -2739,8 +2797,7 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, if ((bypass == 4 && i == 0) || (bypass == 5 && i == 1) || (bypass == 6 && i == 3)) ; - else if (yasm_x86__get_reg_size(arch, - op->data.reg) != size) + else if (yasm_x86__get_reg_size(op->data.reg) != size) mismatch = 1; } else { if ((bypass == 1 && i == 0) || (bypass == 2 && i == 1) @@ -2812,9 +2869,7 @@ x86_find_match(yasm_arch *arch, int num_info, const x86_insn_info *info, } static void -x86_match_error(yasm_arch *arch, int num_info, const x86_insn_info *info, - uintptr_t cpu, unsigned int mode_bits, unsigned int suffix, - int num_operands, yasm_insn_operand **ops, +x86_match_error(x86_id_insn *id_insn, yasm_insn_operand **ops, yasm_insn_operand **rev_ops, const unsigned int *size_lookup) { const x86_insn_info *i; @@ -2824,8 +2879,8 @@ x86_match_error(yasm_arch *arch, int num_info, const x86_insn_info *info, /* Check for matching # of operands */ found = 0; - for (ni=num_info, i=info; ni>0; ni--, i++) { - if (num_operands == i->num_operands) { + for (ni=id_insn->num_info, i=id_insn->group; ni>0; ni--, i++) { + if (id_insn->insn.num_operands == i->num_operands) { found = 1; break; } @@ -2836,8 +2891,7 @@ x86_match_error(yasm_arch *arch, int num_info, const x86_insn_info *info, } for (bypass=1; bypass<8; bypass++) { - i = x86_find_match(arch, num_info, info, cpu, mode_bits, suffix, - num_operands, ops, rev_ops, size_lookup, bypass); + i = x86_find_match(id_insn, ops, rev_ops, size_lookup, bypass); if (i) break; } @@ -2869,56 +2923,48 @@ x86_match_error(yasm_arch *arch, int num_info, const x86_insn_info *info, } } -void -yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, - yasm_bytecode *prev_bc, const uintptr_t data[4], - int num_operands, - /*@null@*/ yasm_insn_operands *operands, - int num_prefixes, uintptr_t **prefixes, - int num_segregs, const uintptr_t *segregs) +static void +x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) { - yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; + x86_id_insn *id_insn = (x86_id_insn *)bc->contents; x86_insn *insn; - int num_info = (int)(data[1]&0xFF); - const x86_insn_info *info = (const x86_insn_info *)data[0]; - unsigned long mod_data = (unsigned long)(data[1] >> 8); - unsigned int mode_bits = (unsigned int)(data[3] & 0xFF); - unsigned int suffix = (unsigned int)((data[3]>>8) & 0xFF); + const x86_insn_info *info = id_insn->group; + unsigned long mod_data = id_insn->mod_data; + unsigned int mode_bits = id_insn->mode_bits; yasm_insn_operand *op, *ops[4], *rev_ops[4]; /*@null@*/ yasm_expr *imm; unsigned char im_len; unsigned char im_sign; unsigned char spare; - int i; + unsigned int i; unsigned int size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0}; unsigned long do_postop = 0; size_lookup[7] = mode_bits; - if (!info) { - num_info = 1; - info = empty_insn; - } + yasm_insn_finalize(&id_insn->insn); /* Build local array of operands from list, since we know we have a max * of 3 operands. */ - if (num_operands > 3) { + if (id_insn->insn.num_operands > 3) { yasm_error_set(YASM_ERROR_TYPE, N_("too many operands")); return; } ops[0] = ops[1] = ops[2] = ops[3] = NULL; - for (i = 0, op = yasm_ops_first(operands); op && i < num_operands; - op = yasm_operand_next(op), i++) + for (i = 0, op = yasm_insn_ops_first(&id_insn->insn); + op && i < id_insn->insn.num_operands; + op = yasm_insn_op_next(op), i++) ops[i] = op; /* If we're running in GAS mode, build a reverse array of the operands * as most GAS instructions have reversed operands from Intel style. */ - if (arch_x86->parser == X86_PARSER_GAS) { + if (id_insn->parser == X86_PARSER_GAS) { rev_ops[0] = rev_ops[1] = rev_ops[2] = rev_ops[3] = NULL; - for (i = num_operands-1, op = yasm_ops_first(operands); op && i >= 0; - op = yasm_operand_next(op), i--) + for (i = id_insn->insn.num_operands-1, + op = yasm_insn_ops_first(&id_insn->insn); + op; op = yasm_insn_op_next(op), i--) rev_ops[i] = op; } @@ -2926,7 +2972,7 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, * if this is a relative jump (OPA_JmpRel). If so, run through the * operands and adjust for dereferences / lack thereof. */ - if (arch_x86->parser == X86_PARSER_GAS + if (id_insn->parser == X86_PARSER_GAS && (info->operands[0] & OPA_MASK) == OPA_JmpRel) { for (i = 0, op = ops[0]; op; op = ops[++i]) { if (!op->deref && (op->type == YASM_INSN__OPERAND_REG @@ -2944,37 +2990,30 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, N_("skipping prefixes on this instruction")); imm = op->data.ea->disp.abs; op->data.ea->disp.abs = NULL; - yasm_ea_destroy(op->data.ea); + yasm_x86__ea_destroy(op->data.ea); op->type = YASM_INSN__OPERAND_IMM; op->data.val = imm; } } } - info = x86_find_match(arch, num_info, info, data[2], mode_bits, suffix, - num_operands, ops, rev_ops, size_lookup, 0); + info = x86_find_match(id_insn, ops, rev_ops, size_lookup, 0); if (!info) { /* Didn't find a match */ - info = (const x86_insn_info *)data[0]; - if (!info) - info = empty_insn; - x86_match_error(arch, num_info, info, data[2], mode_bits, suffix, - num_operands, ops, rev_ops, size_lookup); + x86_match_error(id_insn, ops, rev_ops, size_lookup); return; } - if (operands) { + if (id_insn->insn.num_operands > 0) { switch (info->operands[0] & OPA_MASK) { case OPA_JmpRel: /* Shortcut to JmpRel */ - x86_finalize_jmp(arch, bc, prev_bc, data, num_operands, - operands, num_prefixes, prefixes, info); + x86_finalize_jmp(bc, prev_bc, info); return; case OPA_JmpFar: /* Shortcut to JmpFar */ - x86_finalize_jmpfar(arch, bc, prev_bc, data, num_operands, - operands, num_prefixes, prefixes, info); + x86_finalize_jmpfar(bc, prev_bc, info); return; } } @@ -3039,17 +3078,19 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, /*mod_data >>= 8;*/ } - /* In 64-bit mode, if opersize is 64 and default is not 64, force REX byte */ + /* In 64-bit mode, if opersize is 64 and default is not 64, + * force REX byte. + */ if (mode_bits == 64 && insn->common.opersize == 64 && insn->def_opersize_64 != 64) insn->rex = 0x48; /* Go through operands and assign */ - if (operands) { + if (id_insn->insn.num_operands > 0) { yasm_insn_operand **use_ops = ops; /* Use reversed operands in GAS mode if not otherwise specified */ - if (arch_x86->parser == X86_PARSER_GAS + if (id_insn->parser == X86_PARSER_GAS && !(info->modifiers & MOD_GasNoRev)) use_ops = rev_ops; @@ -3063,7 +3104,7 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, case YASM_INSN__OPERAND_SEGREG: break; case YASM_INSN__OPERAND_MEMORY: - yasm_ea_destroy(op->data.ea); + yasm_x86__ea_destroy(op->data.ea); break; case YASM_INSN__OPERAND_IMM: yasm_expr_destroy(op->data.val); @@ -3178,7 +3219,7 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, else yasm_error_set(YASM_ERROR_TYPE, N_("unsupported address size")); - yasm_ea_destroy(op->data.ea); + yasm_x86__ea_destroy(op->data.ea); break; } default: @@ -3196,7 +3237,7 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, * pre-emptively expand to full size. * For unspecified size case, still optimize. */ - if (!(arch_x86->force_strict || op->strict) + if (!(id_insn->force_strict || op->strict) || op->size == 0) insn->postop = X86_POSTOP_SIGNEXT_IMM8; else if (op->size != 8) { @@ -3223,14 +3264,15 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, if (insn->x86_ea) { yasm_x86__ea_init(insn->x86_ea, spare, prev_bc); - for (i=0; ix86_ea->ea, segregs[i]); - } else if (num_segregs > 0 && insn->special_prefix == 0) { - if (num_segregs > 1) + for (i=0; iinsn.num_segregs; i++) + yasm_ea_set_segreg(&insn->x86_ea->ea, id_insn->insn.segregs[i]); + } else if (id_insn->insn.num_segregs > 0 && insn->special_prefix == 0) { + if (id_insn->insn.num_segregs > 1) yasm_warn_set(YASM_WARN_GENERAL, N_("multiple segment overrides, using leftmost")); - insn->special_prefix = (unsigned char)(segregs[num_segregs-1]>>8); - } else if (num_segregs > 0) + insn->special_prefix = (unsigned char) + (id_insn->insn.segregs[id_insn->insn.num_segregs-1]>>8); + } else if (id_insn->insn.num_segregs > 0) yasm_internal_error(N_("unhandled segment prefix")); if (imm) { @@ -3244,7 +3286,9 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, insn->imm = NULL; yasm_x86__bc_apply_prefixes((x86_common *)insn, &insn->rex, - insn->def_opersize_64, num_prefixes, prefixes); + insn->def_opersize_64, + id_insn->insn.num_prefixes, + id_insn->insn.prefixes); if (insn->postop == X86_POSTOP_ADDRESS16 && insn->common.addrsize) { yasm_warn_set(YASM_WARN_GENERAL, N_("address size override ignored")); @@ -3298,6 +3342,8 @@ yasm_x86__finalize_insn(yasm_arch *arch, yasm_bytecode *bc, break; } + x86_id_insn_clear_operands(id_insn); + /* Transform the bytecode */ yasm_x86__bc_transform_insn(bc, insn); } @@ -3442,14 +3488,18 @@ cpu_find_reverse(unsigned long cpu) } yasm_arch_insnprefix -yasm_x86__parse_check_insnprefix(yasm_arch *arch, uintptr_t data[4], - const char *id, size_t id_len) +yasm_x86__parse_check_insnprefix(yasm_arch *arch, const char *id, + size_t id_len, unsigned long line, + yasm_bytecode **bc, uintptr_t *prefix) { yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; /*@null@*/ const insnprefix_parse_data *pdata; size_t i; static char lcaseid[16]; + *bc = (yasm_bytecode *)NULL; + *prefix = 0; + if (id_len > 15) return YASM_ARCH_NOTINSNPREFIX; for (i=0; igroup) { unsigned long cpu = pdata->data2; + x86_id_insn *id_insn; if ((cpu & CPU_64) && arch_x86->mode_bits != 64) { yasm_warn_set(YASM_WARN_GENERAL, @@ -3480,10 +3531,17 @@ yasm_x86__parse_check_insnprefix(yasm_arch *arch, uintptr_t data[4], if ((cpu & CPU_Not64) && arch_x86->mode_bits == 64) { yasm_error_set(YASM_ERROR_GENERAL, N_("`%s' invalid in 64-bit mode"), id); - data[0] = (uintptr_t)not64_insn; - data[1] = NELEMS(not64_insn); - data[2] = CPU_Not64; - data[3] = arch_x86->mode_bits; + id_insn = yasm_xmalloc(sizeof(x86_id_insn)); + yasm_insn_initialize(&id_insn->insn); + id_insn->group = not64_insn; + id_insn->cpu_enabled = CPU_Not64; + id_insn->mod_data = 0; + id_insn->num_info = NELEMS(not64_insn); + id_insn->mode_bits = arch_x86->mode_bits; + id_insn->suffix = 0; + id_insn->parser = arch_x86->parser; + id_insn->force_strict = arch_x86->force_strict != 0; + *bc = yasm_bc_create_common(&x86_id_insn_callback, id_insn, line); return YASM_ARCH_INSN; } @@ -3495,10 +3553,17 @@ yasm_x86__parse_check_insnprefix(yasm_arch *arch, uintptr_t data[4], return YASM_ARCH_NOTINSNPREFIX; } - data[0] = (uintptr_t)pdata->group; - data[1] = pdata->data1; - data[2] = arch_x86->cpu_enabled; - data[3] = (((unsigned long)pdata->flags)<<8) | arch_x86->mode_bits; + id_insn = yasm_xmalloc(sizeof(x86_id_insn)); + yasm_insn_initialize(&id_insn->insn); + id_insn->group = pdata->group; + id_insn->cpu_enabled = arch_x86->cpu_enabled; + id_insn->mod_data = pdata->data1 >> 8; + id_insn->num_info = pdata->data1 & 0xff; + id_insn->mode_bits = arch_x86->mode_bits; + id_insn->suffix = pdata->flags; + id_insn->parser = arch_x86->parser; + id_insn->force_strict = arch_x86->force_strict != 0; + *bc = yasm_bc_create_common(&x86_id_insn_callback, id_insn, line); return YASM_ARCH_INSN; } else { unsigned long type = pdata->data1; @@ -3523,8 +3588,7 @@ yasm_x86__parse_check_insnprefix(yasm_arch *arch, uintptr_t data[4], N_("`%s' is a prefix in 64-bit mode"), id); return YASM_ARCH_NOTINSNPREFIX; } - data[0] = type; - data[1] = value; + *prefix = type|value; return YASM_ARCH_PREFIX; } } @@ -3564,8 +3628,8 @@ yasm_x86__parse_cpu(yasm_arch_x86 *arch_x86, const char *cpuid, } yasm_arch_regtmod -yasm_x86__parse_check_regtmod(yasm_arch *arch, uintptr_t *data, - const char *id, size_t id_len) +yasm_x86__parse_check_regtmod(yasm_arch *arch, const char *id, size_t id_len, + uintptr_t *data) { yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; /*@null@*/ const regtmod_parse_data *pdata; @@ -3603,3 +3667,38 @@ yasm_x86__parse_check_regtmod(yasm_arch *arch, uintptr_t *data, return type; } +static void +x86_id_insn_destroy(void *contents) +{ + x86_id_insn *id_insn = (x86_id_insn *)contents; + yasm_insn_delete(&id_insn->insn, yasm_x86__ea_destroy); + yasm_xfree(contents); +} + +static void +x86_id_insn_print(const void *contents, FILE *f, int indent_level) +{ + const x86_id_insn *id_insn = (const x86_id_insn *)contents; + yasm_insn_print(&id_insn->insn, f, indent_level); + /*TODO*/ +} + +/*@only@*/ yasm_bytecode * +yasm_x86__create_empty_insn(yasm_arch *arch, unsigned long line) +{ + yasm_arch_x86 *arch_x86 = (yasm_arch_x86 *)arch; + x86_id_insn *id_insn = yasm_xmalloc(sizeof(x86_id_insn)); + + yasm_insn_initialize(&id_insn->insn); + id_insn->group = empty_insn; + id_insn->cpu_enabled = arch_x86->cpu_enabled; + id_insn->mod_data = 0; + id_insn->num_info = NELEMS(empty_insn); + id_insn->mode_bits = arch_x86->mode_bits; + id_insn->suffix = 0; + id_insn->parser = arch_x86->parser; + id_insn->force_strict = arch_x86->force_strict != 0; + + return yasm_bc_create_common(&x86_id_insn_callback, id_insn, line); +} + diff --git a/modules/parsers/gas/gas-parse.c b/modules/parsers/gas/gas-parse.c index 702b47dd..8cbe4350 100644 --- a/modules/parsers/gas/gas-parse.c +++ b/modules/parsers/gas/gas-parse.c @@ -113,6 +113,9 @@ destroy_curtok_(yasm_parser_gas *parser_gas) case STRING: yasm_xfree(curval.str.contents); break; + case INSN: + yasm_bc_destroy(curval.bc); + break; default: break; } @@ -699,59 +702,53 @@ parse_instr(yasm_parser_gas *parser_gas) switch (curtok) { case INSN: { - yystype insn = curval; /* structure copy */ - yasm_insn_operands operands; - int num_operands = 0; + yasm_insn *insn; + bc = INSN_val; + insn = yasm_bc_get_insn(bc); get_next_token(); - if (is_eol()) { - /* no operands */ - return yasm_bc_create_insn(p_object->arch, insn.arch_data, - 0, NULL, cur_line); - } + if (is_eol()) + return bc; /* no operands */ /* parse operands */ - yasm_ops_initialize(&operands); for (;;) { yasm_insn_operand *op = parse_operand(parser_gas); if (!op) { yasm_error_set(YASM_ERROR_SYNTAX, N_("expression syntax error")); - yasm_ops_delete(&operands, 1); + yasm_bc_destroy(bc); return NULL; } - yasm_ops_append(&operands, op); - num_operands++; + yasm_insn_ops_append(insn, op); if (is_eol()) break; if (!expect(',')) { - yasm_ops_delete(&operands, 1); + yasm_bc_destroy(bc); return NULL; } get_next_token(); } - return yasm_bc_create_insn(p_object->arch, insn.arch_data, - num_operands, &operands, cur_line); + return bc; } case PREFIX: { - yystype prefix = curval; /* structure copy */ + uintptr_t prefix = PREFIX_val; get_next_token(); /* PREFIX */ bc = parse_instr(parser_gas); if (!bc) - bc = yasm_bc_create_empty_insn(p_object->arch, cur_line); - yasm_bc_insn_add_prefix(bc, prefix.arch_data); + bc = yasm_arch_create_empty_insn(p_object->arch, cur_line); + yasm_insn_add_prefix(yasm_bc_get_insn(bc), prefix); return bc; } case SEGREG: { - uintptr_t segreg = SEGREG_val[0]; + uintptr_t segreg = SEGREG_val; get_next_token(); /* SEGREG */ bc = parse_instr(parser_gas); if (!bc) - bc = yasm_bc_create_empty_insn(p_object->arch, cur_line); - yasm_bc_insn_add_seg_prefix(bc, segreg); + bc = yasm_arch_create_empty_insn(p_object->arch, cur_line); + yasm_insn_add_seg_prefix(yasm_bc_get_insn(bc), segreg); } default: return NULL; @@ -869,7 +866,7 @@ parse_memaddr(yasm_parser_gas *parser_gas) int strong = 0; if (curtok == SEGREG) { - uintptr_t segreg = SEGREG_val[0]; + uintptr_t segreg = SEGREG_val; get_next_token(); /* SEGREG */ if (!expect(':')) return NULL; get_next_token(); /* ':' */ @@ -900,7 +897,7 @@ parse_memaddr(yasm_parser_gas *parser_gas) /* base register */ if (curtok == REG) { - e2 = p_expr_new_ident(yasm_expr_reg(REG_val[0])); + e2 = p_expr_new_ident(yasm_expr_reg(REG_val)); get_next_token(); /* REG */ } else e2 = p_expr_new_ident(yasm_expr_int(yasm_intnum_create_uint(0))); @@ -921,7 +918,7 @@ parse_memaddr(yasm_parser_gas *parser_gas) /* index register */ if (curtok == REG) { - reg = REG_val[0]; + reg = REG_val; havereg = 1; get_next_token(); /* REG */ if (curtok != ',') { @@ -978,7 +975,7 @@ done: return NULL; ea = yasm_arch_ea_create(p_object->arch, e1); if (strong) - yasm_ea_set_strong(ea, 1); + ea->strong = 1; return ea; } @@ -991,7 +988,7 @@ parse_operand(yasm_parser_gas *parser_gas) switch (curtok) { case REG: - reg = REG_val[0]; + reg = REG_val; get_next_token(); /* REG */ return yasm_operand_create_reg(reg); case SEGREG: @@ -1003,13 +1000,13 @@ parse_operand(yasm_parser_gas *parser_gas) return NULL; return yasm_operand_create_mem(ea); } - reg = SEGREG_val[0]; + reg = SEGREG_val; get_next_token(); /* SEGREG */ return yasm_operand_create_segreg(reg); case REGGROUP: { unsigned long regindex; - reg = REGGROUP_val[0]; + reg = REGGROUP_val; get_next_token(); /* REGGROUP */ if (curtok != '(') return yasm_operand_create_reg(reg); @@ -1050,7 +1047,7 @@ parse_operand(yasm_parser_gas *parser_gas) case '*': get_next_token(); /* '*' */ if (curtok == REG) { - op = yasm_operand_create_reg(REG_val[0]); + op = yasm_operand_create_reg(REG_val); get_next_token(); /* REG */ } else { ea = parse_memaddr(parser_gas); diff --git a/modules/parsers/gas/gas-parser.h b/modules/parsers/gas/gas-parser.h index 214048e0..94ceff19 100644 --- a/modules/parsers/gas/gas-parser.h +++ b/modules/parsers/gas/gas-parser.h @@ -75,7 +75,8 @@ typedef union { char *str_val; yasm_intnum *intn; yasm_floatnum *flt; - uintptr_t arch_data[4]; + yasm_bytecode *bc; + uintptr_t arch_data; struct { char *contents; size_t len; @@ -160,7 +161,7 @@ typedef struct yasm_parser_gas { #define INTNUM_val (curval.intn) #define FLTNUM_val (curval.flt) #define STRING_val (curval.str) -#define INSN_val (curval.arch_data) +#define INSN_val (curval.bc) #define PREFIX_val (curval.arch_data) #define REG_val (curval.arch_data) #define REGGROUP_val (curval.arch_data) diff --git a/modules/parsers/gas/gas-token.re b/modules/parsers/gas/gas-token.re index 69d816b1..6cd6cd02 100644 --- a/modules/parsers/gas/gas-token.re +++ b/modules/parsers/gas/gas-token.re @@ -498,7 +498,7 @@ scan: savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_regtmod - (p_object->arch, lvalp->arch_data, TOK+1, TOKLEN-1)) { + (p_object->arch, TOK+1, TOKLEN-1, &lvalp->arch_data)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; RETURN(REG); @@ -514,10 +514,7 @@ scan: yasm_error_set(YASM_ERROR_GENERAL, N_("Unrecognized register name `%s'"), s->tok); s->tok[TOKLEN] = savech; - lvalp->arch_data[0] = 0; - lvalp->arch_data[1] = 0; - lvalp->arch_data[2] = 0; - lvalp->arch_data[3] = 0; + lvalp->arch_data = 0; RETURN(REG); } @@ -539,15 +536,18 @@ scan: * instruction or directive. */ if (parser_gas->state != INSTDIR) { + uintptr_t prefix; savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_insnprefix - (p_object->arch, lvalp->arch_data, TOK, TOKLEN)) { + (p_object->arch, TOK, TOKLEN, cur_line, &lvalp->bc, + &prefix)) { case YASM_ARCH_INSN: s->tok[TOKLEN] = savech; parser_gas->state = INSTDIR; RETURN(INSN); case YASM_ARCH_PREFIX: + lvalp->arch_data = prefix; s->tok[TOKLEN] = savech; RETURN(PREFIX); default: diff --git a/modules/parsers/nasm/nasm-parse.c b/modules/parsers/nasm/nasm-parse.c index 7866cffe..d710a61a 100644 --- a/modules/parsers/nasm/nasm-parse.c +++ b/modules/parsers/nasm/nasm-parse.c @@ -107,6 +107,9 @@ destroy_curtok_(yasm_parser_nasm *parser_nasm) case STRING: yasm_xfree(curval.str.contents); break; + case INSN: + yasm_bc_destroy(curval.bc); + break; default: break; } @@ -596,60 +599,56 @@ incbin_done: static yasm_bytecode * parse_instr(yasm_parser_nasm *parser_nasm) { + yasm_bytecode *bc; + switch (curtok) { case INSN: { - yystype insn = curval; /* structure copy */ - yasm_insn_operands operands; - int num_operands = 0; + yasm_insn *insn; + bc = INSN_val; + insn = yasm_bc_get_insn(bc); get_next_token(); - if (is_eol()) { - /* no operands */ - return yasm_bc_create_insn(p_object->arch, insn.arch_data, 0, - NULL, cur_line); - } + if (is_eol()) + return bc; /* no operands */ /* parse operands */ - yasm_ops_initialize(&operands); for (;;) { yasm_insn_operand *op = parse_operand(parser_nasm); if (!op) { yasm_error_set(YASM_ERROR_SYNTAX, N_("expression syntax error")); - yasm_ops_delete(&operands, 1); + yasm_bc_destroy(bc); return NULL; } - yasm_ops_append(&operands, op); - num_operands++; + yasm_insn_ops_append(insn, op); if (is_eol()) break; if (!expect(',')) { - yasm_ops_delete(&operands, 1); + yasm_bc_destroy(bc); return NULL; } get_next_token(); } - return yasm_bc_create_insn(p_object->arch, insn.arch_data, - num_operands, &operands, cur_line); + return bc; } - case PREFIX: { - yystype prefix = curval; /* structure copy */ - yasm_bytecode *bc; + case PREFIX: + { + uintptr_t prefix = PREFIX_val; get_next_token(); bc = parse_instr(parser_nasm); if (bc) - yasm_bc_insn_add_prefix(bc, prefix.arch_data); + yasm_insn_add_prefix(yasm_bc_get_insn(bc), prefix); return bc; } - case SEGREG: { - uintptr_t segreg = SEGREG_val[0]; - yasm_bytecode *bc; + case SEGREG: + { + uintptr_t segreg = SEGREG_val; get_next_token(); bc = parse_instr(parser_nasm); if (bc) - yasm_bc_insn_add_seg_prefix(bc, segreg); + yasm_insn_add_seg_prefix(yasm_bc_get_insn(bc), segreg); return bc; } default: @@ -679,11 +678,11 @@ parse_operand(yasm_parser_nasm *parser_nasm) return yasm_operand_create_mem(ea); } case SEGREG: - op = yasm_operand_create_segreg(SEGREG_val[0]); + op = yasm_operand_create_segreg(SEGREG_val); get_next_token(); return op; case REG: - op = yasm_operand_create_reg(REG_val[0]); + op = yasm_operand_create_reg(REG_val); get_next_token(); return op; case STRICT: @@ -726,7 +725,7 @@ parse_operand(yasm_parser_nasm *parser_nasm) } case TARGETMOD: { - uintptr_t tmod = TARGETMOD_val[0]; + uintptr_t tmod = TARGETMOD_val; get_next_token(); op = parse_operand(parser_nasm); if (op) @@ -751,7 +750,7 @@ parse_memaddr(yasm_parser_nasm *parser_nasm) switch (curtok) { case SEGREG: { - uintptr_t segreg = SEGREG_val[0]; + uintptr_t segreg = SEGREG_val; get_next_token(); if (!expect(':')) { yasm_error_set(YASM_ERROR_SYNTAX, @@ -770,14 +769,14 @@ parse_memaddr(yasm_parser_nasm *parser_nasm) get_next_token(); ea = parse_memaddr(parser_nasm); if (ea) - yasm_ea_set_len(ea, size); + ea->disp.size = size; return ea; } case NOSPLIT: get_next_token(); ea = parse_memaddr(parser_nasm); if (ea) - yasm_ea_set_nosplit(ea, 1); + ea->nosplit = 1; return ea; default: { @@ -975,7 +974,7 @@ parse_expr6(yasm_parser_nasm *parser_nasm, expr_type type) e = p_expr_new_ident(yasm_expr_int(INTNUM_val)); break; case REG: - e = p_expr_new_ident(yasm_expr_reg(REG_val[0])); + e = p_expr_new_ident(yasm_expr_reg(REG_val)); break; case ID: sym = yasm_symtab_use(p_symtab, ID_val, cur_line); @@ -1030,7 +1029,7 @@ parse_expr6(yasm_parser_nasm *parser_nasm, expr_type type) N_("data values can't have registers")); return NULL; } - e = p_expr_new_ident(yasm_expr_reg(REG_val[0])); + e = p_expr_new_ident(yasm_expr_reg(REG_val)); break; case STRING: e = p_expr_new_ident(yasm_expr_int( diff --git a/modules/parsers/nasm/nasm-parser.h b/modules/parsers/nasm/nasm-parser.h index de0824b5..a2671559 100644 --- a/modules/parsers/nasm/nasm-parser.h +++ b/modules/parsers/nasm/nasm-parser.h @@ -69,7 +69,8 @@ typedef union { char *str_val; yasm_intnum *intn; yasm_floatnum *flt; - uintptr_t arch_data[4]; + yasm_bytecode *bc; + uintptr_t arch_data; struct { char *contents; size_t len; @@ -144,7 +145,7 @@ typedef struct yasm_parser_nasm { #define SIZE_OVERRIDE_val (curval.int_info) #define DECLARE_DATA_val (curval.int_info) #define RESERVE_SPACE_val (curval.int_info) -#define INSN_val (curval.arch_data) +#define INSN_val (curval.bc) #define PREFIX_val (curval.arch_data) #define REG_val (curval.arch_data) #define SEGREG_val (curval.arch_data) diff --git a/modules/parsers/nasm/nasm-token.re b/modules/parsers/nasm/nasm-token.re index 56bd8dfc..0a56073a 100644 --- a/modules/parsers/nasm/nasm-token.re +++ b/modules/parsers/nasm/nasm-token.re @@ -350,21 +350,25 @@ scan: [a-zA-Z_?][a-zA-Z0-9_$#@~.?]* { savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; - if (parser_nasm->state != INSTRUCTION) + if (parser_nasm->state != INSTRUCTION) { + uintptr_t prefix; switch (yasm_arch_parse_check_insnprefix - (p_object->arch, lvalp->arch_data, TOK, TOKLEN)) { + (p_object->arch, TOK, TOKLEN, cur_line, &lvalp->bc, + &prefix)) { case YASM_ARCH_INSN: parser_nasm->state = INSTRUCTION; s->tok[TOKLEN] = savech; RETURN(INSN); case YASM_ARCH_PREFIX: + lvalp->arch_data = prefix; s->tok[TOKLEN] = savech; RETURN(PREFIX); default: break; } + } switch (yasm_arch_parse_check_regtmod - (p_object->arch, lvalp->arch_data, TOK, TOKLEN)) { + (p_object->arch, TOK, TOKLEN, &lvalp->arch_data)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; RETURN(REG); @@ -618,7 +622,7 @@ directive2: savech = s->tok[TOKLEN]; s->tok[TOKLEN] = '\0'; switch (yasm_arch_parse_check_regtmod - (p_object->arch, lvalp->arch_data, TOK, TOKLEN)) { + (p_object->arch, TOK, TOKLEN, &lvalp->arch_data)) { case YASM_ARCH_REG: s->tok[TOKLEN] = savech; RETURN(REG);