From: Peter Johnson Date: Thu, 20 Sep 2001 03:21:26 +0000 (-0000) Subject: Cleaned up bytecode data structure, some effaddr functions. Made ea an X-Git-Tag: v0.1.0~315 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=60fd596a66075a949611fbffcdc950de2ec4dc24;p=yasm Cleaned up bytecode data structure, some effaddr functions. Made ea an effaddr * instead of an effaddr to decrease size of bytecode. Changed parser to use effaddr * as well. Finally fixed issue with ONE by adding flag to bytecode.data.insn and utilizing spare bytes of the opcode. Hackish, but works. svn path=/trunk/yasm/; revision=203 --- diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index a9854b87..a8b17ed2 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -44,9 +44,6 @@ RCSID("$IdPath$"); /* Static structures for when NULL is passed to conversion functions. */ -/* for Convert*ToEA() */ -static effaddr eff_static; - /* for Convert*ToImm() */ static immval im_static; @@ -56,57 +53,63 @@ unsigned char bytes_static[16]; static bytecode *bytecode_new_common(void); effaddr * -ConvertRegToEA(effaddr *ptr, unsigned long reg) +effaddr_new_reg(unsigned long reg) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); - ptr->len = 0; - ptr->segment = 0; - ptr->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ - ptr->valid_modrm = 1; - ptr->need_modrm = 1; - ptr->valid_sib = 0; - ptr->need_sib = 0; + if (!ea) + Fatal(FATAL_NOMEM); - return ptr; + ea->len = 0; + ea->segment = 0; + ea->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ea->valid_modrm = 1; + ea->need_modrm = 1; + ea->valid_sib = 0; + ea->need_sib = 0; + + return ea; } effaddr * -ConvertExprToEA(effaddr *ptr, expr *expr_ptr) +effaddr_new_expr(expr *expr_ptr) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); - ptr->segment = 0; + if (!ea) + Fatal(FATAL_NOMEM); - ptr->valid_modrm = 0; - ptr->need_modrm = 1; - ptr->valid_sib = 0; - ptr->need_sib = 0; + ea->segment = 0; - ptr->disp = expr_ptr; + ea->valid_modrm = 0; + ea->need_modrm = 1; + ea->valid_sib = 0; + ea->need_sib = 0; - return ptr; + ea->disp = expr_ptr; + + return ea; } effaddr * -ConvertImmToEA(effaddr *ptr, immval *im_ptr, unsigned char im_len) +effaddr_new_imm(immval *im_ptr, unsigned char im_len) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); + + if (!ea) + Fatal(FATAL_NOMEM); - ptr->disp = im_ptr->val; + ea->disp = im_ptr->val; if (im_ptr->len > im_len) Warning(_("%s value exceeds bounds"), "word"); - ptr->len = im_len; - ptr->segment = 0; - ptr->valid_modrm = 0; - ptr->need_modrm = 0; - ptr->valid_sib = 0; - ptr->need_sib = 0; - - return ptr; + ea->len = im_len; + ea->segment = 0; + ea->valid_modrm = 0; + ea->need_modrm = 0; + ea->valid_sib = 0; + ea->need_sib = 0; + + return ea; } immval * @@ -235,6 +238,19 @@ SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) *lockrep_pre = prefix; } +void +SetInsnShiftFlag(bytecode *bc) +{ + if (!bc) + return; + + if (bc->type != BC_INSN) + InternalError(__LINE__, __FILE__, + _("Attempted to set shift flag on non-instruction")); + + bc->data.insn.shift_op = 1; +} + void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) { @@ -281,15 +297,10 @@ bytecode_new_insn(unsigned char opersize, bc->type = BC_INSN; + bc->data.insn.ea = ea_ptr; if (ea_ptr) { - bc->data.insn.ea = *ea_ptr; - bc->data.insn.ea.modrm &= 0xC7; /* zero spare/reg bits */ - bc->data.insn.ea.modrm |= (spare << 3) & 0x38; /* plug in provided bits */ - } else { - bc->data.insn.ea.len = 0; - bc->data.insn.ea.segment = 0; - bc->data.insn.ea.need_modrm = 0; - bc->data.insn.ea.need_sib = 0; + bc->data.insn.ea->modrm &= 0xC7; /* zero spare/reg bits */ + bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */ } if (im_ptr) { @@ -310,18 +321,17 @@ bytecode_new_insn(unsigned char opersize, bc->data.insn.addrsize = 0; bc->data.insn.opersize = opersize; bc->data.insn.lockrep_pre = 0; + bc->data.insn.shift_op = 0; return bc; } bytecode * bytecode_new_jmprel(targetval *target, - unsigned char short_valid, unsigned char short_opcode_len, unsigned char short_op0, unsigned char short_op1, unsigned char short_op2, - unsigned char near_valid, unsigned char near_opcode_len, unsigned char near_op0, unsigned char near_op1, @@ -335,26 +345,20 @@ bytecode_new_jmprel(targetval *target, bc->data.jmprel.target = target->val; bc->data.jmprel.op_sel = target->op_sel; - if ((target->op_sel == JR_SHORT_FORCED) && (!short_valid)) + if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0)) Error(_("no SHORT form of that jump instruction exists")); - if ((target->op_sel == JR_NEAR_FORCED) && (!near_valid)) + if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0)) Error(_("no NEAR form of that jump instruction exists")); - bc->data.jmprel.shortop.valid = short_valid; - if (short_valid) { - bc->data.jmprel.shortop.opcode[0] = short_op0; - bc->data.jmprel.shortop.opcode[1] = short_op1; - bc->data.jmprel.shortop.opcode[2] = short_op2; - bc->data.jmprel.shortop.opcode_len = short_opcode_len; - } + bc->data.jmprel.shortop.opcode[0] = short_op0; + bc->data.jmprel.shortop.opcode[1] = short_op1; + bc->data.jmprel.shortop.opcode[2] = short_op2; + bc->data.jmprel.shortop.opcode_len = short_opcode_len; - bc->data.jmprel.nearop.valid = near_valid; - if (near_valid) { - bc->data.jmprel.nearop.opcode[0] = near_op0; - bc->data.jmprel.nearop.opcode[1] = near_op1; - bc->data.jmprel.nearop.opcode[2] = near_op2; - bc->data.jmprel.nearop.opcode_len = near_opcode_len; - } + bc->data.jmprel.nearop.opcode[0] = near_op0; + bc->data.jmprel.nearop.opcode[1] = near_op1; + bc->data.jmprel.nearop.opcode[2] = near_op2; + bc->data.jmprel.nearop.opcode_len = near_opcode_len; bc->data.jmprel.addrsize = addrsize; bc->data.jmprel.opersize = 0; @@ -437,24 +441,25 @@ bytecode_print(bytecode *bc) break; case BC_INSN: printf("_Instruction_\n"); - printf("Effective Address:\n"); - printf(" Disp="); - if (!bc->data.insn.ea.disp) - printf("(nil)"); - else - expr_print(bc->data.insn.ea.disp); - printf("\n"); - printf(" Len=%u SegmentOv=%2x\n", - (unsigned int)bc->data.insn.ea.len, - (unsigned int)bc->data.insn.ea.segment); - printf(" ModRM=%2x ValidRM=%u NeedRM=%u\n", - (unsigned int)bc->data.insn.ea.modrm, - (unsigned int)bc->data.insn.ea.valid_modrm, - (unsigned int)bc->data.insn.ea.need_modrm); - printf(" SIB=%2x ValidSIB=%u NeedSIB=%u\n", - (unsigned int)bc->data.insn.ea.sib, - (unsigned int)bc->data.insn.ea.valid_sib, - (unsigned int)bc->data.insn.ea.need_sib); + printf("Effective Address:"); + if (!bc->data.insn.ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + expr_print(bc->data.insn.ea->disp); + printf("\n"); + printf(" Len=%u SegmentOv=%2x\n", + (unsigned int)bc->data.insn.ea->len, + (unsigned int)bc->data.insn.ea->segment); + printf(" ModRM=%2x ValidRM=%u NeedRM=%u\n", + (unsigned int)bc->data.insn.ea->modrm, + (unsigned int)bc->data.insn.ea->valid_modrm, + (unsigned int)bc->data.insn.ea->need_modrm); + printf(" SIB=%2x ValidSIB=%u NeedSIB=%u\n", + (unsigned int)bc->data.insn.ea->sib, + (unsigned int)bc->data.insn.ea->valid_sib, + (unsigned int)bc->data.insn.ea->need_sib); + } printf("Immediate Value:\n"); printf(" Val="); if (!bc->data.insn.imm.val) @@ -483,7 +488,7 @@ bytecode_print(bytecode *bc) printf("Target="); expr_print(bc->data.jmprel.target); printf("\nShort Form:\n"); - if (!bc->data.jmprel.shortop.valid) + if (!bc->data.jmprel.shortop.opcode_len == 0) printf(" None\n"); else printf(" Opcode: %2x %2x %2x OpLen=%u\n", @@ -491,7 +496,7 @@ bytecode_print(bytecode *bc) (unsigned int)bc->data.jmprel.shortop.opcode[1], (unsigned int)bc->data.jmprel.shortop.opcode[2], (unsigned int)bc->data.jmprel.shortop.opcode_len); - if (!bc->data.jmprel.nearop.valid) + if (!bc->data.jmprel.nearop.opcode_len == 0) printf(" None\n"); else printf(" Opcode: %2x %2x %2x OpLen=%u\n", diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index 9b45b03d..5ff60a8c 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -82,9 +82,21 @@ typedef struct bytecode_s { enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; + /* This union has been somewhat tweaked to get it as small as possible + * on the 4-byte-aligned x86 architecture (without resorting to + * bitfields). In particular, insn and jmprel are the largest structures + * in the union, and are also the same size (after padding). jmprel + * can have another unsigned char added to the end without affecting + * its size. + * + * Don't worry about this too much, but keep it in mind when changing + * this structure. We care about the size of bytecode in particular + * because it accounts for the majority of the memory usage in the + * assembler when assembling a large file. + */ union { struct { - effaddr ea; /* effective address */ + effaddr *ea; /* effective address */ immval imm; /* immediate or relative value */ @@ -94,14 +106,28 @@ typedef struct bytecode_s { unsigned char addrsize; /* 0 indicates no override */ unsigned char opersize; /* 0 indicates no override */ unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; } insn; struct { struct expr_s *target; /* target location */ struct { unsigned char opcode[3]; - unsigned char opcode_len; - unsigned char valid; /* does the opcode exist? */ + unsigned char opcode_len; /* 0 = no opc for this version */ } shortop, nearop; /* which opcode are we using? */ @@ -133,12 +159,12 @@ typedef struct bytecode_s { /* other assembler state info */ unsigned long offset; - unsigned int mode_bits; + unsigned char mode_bits; } bytecode; -effaddr *ConvertRegToEA(effaddr *ptr, unsigned long reg); -effaddr *ConvertImmToEA(effaddr *ptr, immval *im_ptr, unsigned char im_len); -effaddr *ConvertExprToEA(effaddr *ptr, struct expr_s *expr_ptr); +effaddr *effaddr_new_reg(unsigned long reg); +effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len); +effaddr *effaddr_new_expr(struct expr_s *expr_ptr); immval *ConvertIntToImm(immval *ptr, unsigned long int_val); immval *ConvertExprToImm(immval *ptr, struct expr_s *expr_ptr); @@ -149,9 +175,13 @@ void SetEALen(effaddr *ptr, unsigned char len); void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); +void SetInsnShiftFlag(bytecode *bc); void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); +/* IMPORTANT: ea_ptr cannot be reused or freed after calling this function + * (it doesn't make a copy). im_ptr, on the other hand, can be. + */ bytecode *bytecode_new_insn(unsigned char opersize, unsigned char opcode_len, unsigned char op0, @@ -163,13 +193,12 @@ bytecode *bytecode_new_insn(unsigned char opersize, unsigned char im_len, unsigned char im_sign); +/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */ bytecode *bytecode_new_jmprel(targetval *target, - unsigned char short_valid, unsigned char short_opcode_len, unsigned char short_op0, unsigned char short_op1, unsigned char short_op2, - unsigned char near_valid, unsigned char near_opcode_len, unsigned char near_op0, unsigned char near_op1, diff --git a/libyasm/tests/bytecode_test.c b/libyasm/tests/bytecode_test.c index d9a8077b..c60b788d 100644 --- a/libyasm/tests/bytecode_test.c +++ b/libyasm/tests/bytecode_test.c @@ -8,33 +8,31 @@ #include "bytecode.h" -START_TEST(test_ConvertRegToEA) +START_TEST(test_effaddr_new_reg) { - effaddr static_val, *retp; + effaddr *ea; int i; - /* Test with non-NULL */ - fail_unless(ConvertRegToEA(&static_val, 1) == &static_val, - "Should return ptr if non-NULL passed in ptr"); - /* Test with NULL */ - retp = ConvertRegToEA(NULL, 1); - fail_unless(retp != NULL, - "Should return static structure if NULL passed in ptr"); + ea = effaddr_new_reg(1); + fail_unless(ea != NULL, "Should die if out of memory (not return NULL)"); /* Test structure values function should set */ - fail_unless(retp->len == 0, "len should be 0"); - fail_unless(retp->segment == 0, "Should be no segment override"); - fail_unless(retp->valid_modrm == 1, "Mod/RM should be valid"); - fail_unless(retp->need_modrm == 1, "Mod/RM should be needed"); - fail_unless(retp->valid_sib == 0, "SIB should be invalid"); - fail_unless(retp->need_sib == 0, "SIB should not be needed"); + fail_unless(ea->len == 0, "len should be 0"); + fail_unless(ea->segment == 0, "Should be no segment override"); + fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid"); + fail_unless(ea->need_modrm == 1, "Mod/RM should be needed"); + fail_unless(ea->valid_sib == 0, "SIB should be invalid"); + fail_unless(ea->need_sib == 0, "SIB should not be needed"); + + free(ea); /* Exhaustively test generated Mod/RM byte with register values */ for(i=0; i<8; i++) { - ConvertRegToEA(&static_val, i); - fail_unless(static_val.modrm == 0xC0 | (i & 0x07), + ea = effaddr_new_reg(i); + fail_unless(ea->modrm == 0xC0 | (i & 0x07), "Invalid Mod/RM byte generated"); + free(ea); } } END_TEST @@ -45,7 +43,7 @@ Suite *bytecode_suite(void) TCase *tc_conversion = tcase_create("Conversion"); suite_add_tcase(s, tc_conversion); - tcase_add_test(tc_conversion, test_ConvertRegToEA); + tcase_add_test(tc_conversion, test_effaddr_new_reg); return s; } diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in index 156b56bd..d3eee5ac 100644 --- a/modules/parsers/nasm/bison.y.in +++ b/modules/parsers/nasm/bison.y.in @@ -69,7 +69,7 @@ extern section *nasm_parser_cur_section; int line; } syminfo; unsigned char groupdata[4]; - effaddr ea_val; + effaddr *ea; expr *exp; immval im_val; targetval tgt_val; @@ -105,11 +105,11 @@ extern section *nasm_parser_cur_section; %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 +%type mem memaddr memexp memfar +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 %type imm imm8x imm16x imm32x imm8 imm16 imm32 %type expr expr_no_string %type explabel @@ -245,19 +245,19 @@ segreg: REG_ES ; /* memory addresses */ -memexp: expr { expr_simplify ($1); ConvertExprToEA (&$$, $1); } +memexp: expr { expr_simplify ($1); $$ = effaddr_new_expr($1); } ; -memaddr: memexp { $$ = $1; $$.segment = 0; } - | REG_CS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment(&$$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen(&$$, 1); } - | WORD memaddr { $$ = $2; SetEALen(&$$, 2); } - | DWORD memaddr { $$ = $2; SetEALen(&$$, 4); } +memaddr: memexp { $$ = $1; $$->segment = 0; } + | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } + | BYTE memaddr { $$ = $2; SetEALen($$, 1); } + | WORD memaddr { $$ = $2; SetEALen($$, 2); } + | DWORD memaddr { $$ = $2; SetEALen($$, 4); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -308,38 +308,38 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8x: reg8 { $$ = effaddr_new_reg($1); } | mem8x ; -rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16x: reg16 { $$ = effaddr_new_reg($1); } | mem16x ; -rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32x: reg32 { $$ = effaddr_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64x: MMXREG { $$ = effaddr_new_reg($1); } | mem64x ; -rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128x: XMMREG { $$ = effaddr_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8: reg8 { $$ = effaddr_new_reg($1); } | mem8 ; -rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16: reg16 { $$ = effaddr_new_reg($1); } | mem16 ; -rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32: reg32 { $$ = effaddr_new_reg($1); } | mem32 ; -rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64: MMXREG { $$ = effaddr_new_reg($1); } | mem64 ; -rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128: XMMREG { $$ = effaddr_new_reg($1); } | mem128 ; @@ -417,12 +417,12 @@ explabel: ID instr: instrbase | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x65); } + | REG_CS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x2E); } + | REG_SS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x36); } + | REG_DS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x3E); } + | REG_ES instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x26); } + | REG_FS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x64); } + | REG_GS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x65); } | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl index 28714042..3561a3e8 100755 --- a/modules/parsers/nasm/gen_instr.pl +++ b/modules/parsers/nasm/gen_instr.pl @@ -392,6 +392,15 @@ sub action ( @ $ ) . rule_footer; } +sub action_setshiftflag ( @ $ ) +{ + my ($rule, $tokens, $func, $a_args, $count) = splice @_; + return rule_header ($rule, $tokens, $count) + . " \$\$ = $func (@$a_args);\n" + . " SetInsnShiftFlag(\$\$);\n" + . rule_footer; +} + sub get_token_number ( $ $ ) { my ($tokens, $str) = splice @_; @@ -502,13 +511,10 @@ sub output_yacc ($@) # test for short opcode "nil" if($inst->[SHORTOPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0, 0,'; + push @args, '0, 0, 0, 0,'; } else { - # opcode is valid - push @args, '1,'; - # number of bytes of short opcode push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; @@ -529,13 +535,10 @@ sub output_yacc ($@) # test for near opcode "nil" if($inst->[NEAROPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0, 0,'; + push @args, '0, 0, 0, 0,'; } else { - # opcode is valid - push @args, '1,'; - # number of bytes of near opcode push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; @@ -612,10 +615,10 @@ sub output_yacc ($@) $args[-1] =~ s/nil/0/; # don't let a $0.\d match slip into the following rules. $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; - $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] - ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/; + $args[-1] =~ s[(\$\d+)i,\s*(\d+)] + ["effaddr_new_imm(\&$1, ".($2/8)."), 0"]e; $args[-1] .= ','; die $args[-1] if $args[-1] =~ m/\d+[ri]/; @@ -678,10 +681,20 @@ sub output_yacc ($@) # or if we've deferred and we match the folding version elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) { - my $immarg = get_token_number ($tokens, "imm8"); - $ONE->[4] = 1; - print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); + # Output a normal version except imm8 -> imm8x + # (BYTE override always makes longer version, and + # we don't want to conflict with the imm version + # we output right after this one. + $tokens =~ s/imm8/imm8x/; + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + + # Now output imm version, with second opcode byte + # set to ,1 opcode. Also call SetInsnShiftFlag(). + $tokens =~ s/imm8x/imm/; + die "no space for ONE?" if $args[3] !~ m/0,/; + $args[3] = $ONE->[3]->[2]; + print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); } elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) { diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y index 156b56bd..d3eee5ac 100644 --- a/modules/parsers/nasm/nasm-bison.y +++ b/modules/parsers/nasm/nasm-bison.y @@ -69,7 +69,7 @@ extern section *nasm_parser_cur_section; int line; } syminfo; unsigned char groupdata[4]; - effaddr ea_val; + effaddr *ea; expr *exp; immval im_val; targetval tgt_val; @@ -105,11 +105,11 @@ extern section *nasm_parser_cur_section; %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 +%type mem memaddr memexp memfar +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 %type imm imm8x imm16x imm32x imm8 imm16 imm32 %type expr expr_no_string %type explabel @@ -245,19 +245,19 @@ segreg: REG_ES ; /* memory addresses */ -memexp: expr { expr_simplify ($1); ConvertExprToEA (&$$, $1); } +memexp: expr { expr_simplify ($1); $$ = effaddr_new_expr($1); } ; -memaddr: memexp { $$ = $1; $$.segment = 0; } - | REG_CS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment(&$$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen(&$$, 1); } - | WORD memaddr { $$ = $2; SetEALen(&$$, 2); } - | DWORD memaddr { $$ = $2; SetEALen(&$$, 4); } +memaddr: memexp { $$ = $1; $$->segment = 0; } + | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } + | BYTE memaddr { $$ = $2; SetEALen($$, 1); } + | WORD memaddr { $$ = $2; SetEALen($$, 2); } + | DWORD memaddr { $$ = $2; SetEALen($$, 4); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -308,38 +308,38 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8x: reg8 { $$ = effaddr_new_reg($1); } | mem8x ; -rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16x: reg16 { $$ = effaddr_new_reg($1); } | mem16x ; -rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32x: reg32 { $$ = effaddr_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64x: MMXREG { $$ = effaddr_new_reg($1); } | mem64x ; -rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128x: XMMREG { $$ = effaddr_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8: reg8 { $$ = effaddr_new_reg($1); } | mem8 ; -rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16: reg16 { $$ = effaddr_new_reg($1); } | mem16 ; -rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32: reg32 { $$ = effaddr_new_reg($1); } | mem32 ; -rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64: MMXREG { $$ = effaddr_new_reg($1); } | mem64 ; -rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128: XMMREG { $$ = effaddr_new_reg($1); } | mem128 ; @@ -417,12 +417,12 @@ explabel: ID instr: instrbase | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x65); } + | REG_CS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x2E); } + | REG_SS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x36); } + | REG_DS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x3E); } + | REG_ES instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x26); } + | REG_FS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x64); } + | REG_GS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x65); } | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } diff --git a/src/bytecode.c b/src/bytecode.c index a9854b87..a8b17ed2 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -44,9 +44,6 @@ RCSID("$IdPath$"); /* Static structures for when NULL is passed to conversion functions. */ -/* for Convert*ToEA() */ -static effaddr eff_static; - /* for Convert*ToImm() */ static immval im_static; @@ -56,57 +53,63 @@ unsigned char bytes_static[16]; static bytecode *bytecode_new_common(void); effaddr * -ConvertRegToEA(effaddr *ptr, unsigned long reg) +effaddr_new_reg(unsigned long reg) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); - ptr->len = 0; - ptr->segment = 0; - ptr->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ - ptr->valid_modrm = 1; - ptr->need_modrm = 1; - ptr->valid_sib = 0; - ptr->need_sib = 0; + if (!ea) + Fatal(FATAL_NOMEM); - return ptr; + ea->len = 0; + ea->segment = 0; + ea->modrm = 0xC0 | (reg & 0x07); /* Mod=11, R/M=Reg, Reg=0 */ + ea->valid_modrm = 1; + ea->need_modrm = 1; + ea->valid_sib = 0; + ea->need_sib = 0; + + return ea; } effaddr * -ConvertExprToEA(effaddr *ptr, expr *expr_ptr) +effaddr_new_expr(expr *expr_ptr) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); - ptr->segment = 0; + if (!ea) + Fatal(FATAL_NOMEM); - ptr->valid_modrm = 0; - ptr->need_modrm = 1; - ptr->valid_sib = 0; - ptr->need_sib = 0; + ea->segment = 0; - ptr->disp = expr_ptr; + ea->valid_modrm = 0; + ea->need_modrm = 1; + ea->valid_sib = 0; + ea->need_sib = 0; - return ptr; + ea->disp = expr_ptr; + + return ea; } effaddr * -ConvertImmToEA(effaddr *ptr, immval *im_ptr, unsigned char im_len) +effaddr_new_imm(immval *im_ptr, unsigned char im_len) { - if (!ptr) - ptr = &eff_static; + effaddr *ea = malloc(sizeof(effaddr)); + + if (!ea) + Fatal(FATAL_NOMEM); - ptr->disp = im_ptr->val; + ea->disp = im_ptr->val; if (im_ptr->len > im_len) Warning(_("%s value exceeds bounds"), "word"); - ptr->len = im_len; - ptr->segment = 0; - ptr->valid_modrm = 0; - ptr->need_modrm = 0; - ptr->valid_sib = 0; - ptr->need_sib = 0; - - return ptr; + ea->len = im_len; + ea->segment = 0; + ea->valid_modrm = 0; + ea->need_modrm = 0; + ea->valid_sib = 0; + ea->need_sib = 0; + + return ea; } immval * @@ -235,6 +238,19 @@ SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) *lockrep_pre = prefix; } +void +SetInsnShiftFlag(bytecode *bc) +{ + if (!bc) + return; + + if (bc->type != BC_INSN) + InternalError(__LINE__, __FILE__, + _("Attempted to set shift flag on non-instruction")); + + bc->data.insn.shift_op = 1; +} + void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) { @@ -281,15 +297,10 @@ bytecode_new_insn(unsigned char opersize, bc->type = BC_INSN; + bc->data.insn.ea = ea_ptr; if (ea_ptr) { - bc->data.insn.ea = *ea_ptr; - bc->data.insn.ea.modrm &= 0xC7; /* zero spare/reg bits */ - bc->data.insn.ea.modrm |= (spare << 3) & 0x38; /* plug in provided bits */ - } else { - bc->data.insn.ea.len = 0; - bc->data.insn.ea.segment = 0; - bc->data.insn.ea.need_modrm = 0; - bc->data.insn.ea.need_sib = 0; + bc->data.insn.ea->modrm &= 0xC7; /* zero spare/reg bits */ + bc->data.insn.ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */ } if (im_ptr) { @@ -310,18 +321,17 @@ bytecode_new_insn(unsigned char opersize, bc->data.insn.addrsize = 0; bc->data.insn.opersize = opersize; bc->data.insn.lockrep_pre = 0; + bc->data.insn.shift_op = 0; return bc; } bytecode * bytecode_new_jmprel(targetval *target, - unsigned char short_valid, unsigned char short_opcode_len, unsigned char short_op0, unsigned char short_op1, unsigned char short_op2, - unsigned char near_valid, unsigned char near_opcode_len, unsigned char near_op0, unsigned char near_op1, @@ -335,26 +345,20 @@ bytecode_new_jmprel(targetval *target, bc->data.jmprel.target = target->val; bc->data.jmprel.op_sel = target->op_sel; - if ((target->op_sel == JR_SHORT_FORCED) && (!short_valid)) + if ((target->op_sel == JR_SHORT_FORCED) && (near_opcode_len == 0)) Error(_("no SHORT form of that jump instruction exists")); - if ((target->op_sel == JR_NEAR_FORCED) && (!near_valid)) + if ((target->op_sel == JR_NEAR_FORCED) && (short_opcode_len == 0)) Error(_("no NEAR form of that jump instruction exists")); - bc->data.jmprel.shortop.valid = short_valid; - if (short_valid) { - bc->data.jmprel.shortop.opcode[0] = short_op0; - bc->data.jmprel.shortop.opcode[1] = short_op1; - bc->data.jmprel.shortop.opcode[2] = short_op2; - bc->data.jmprel.shortop.opcode_len = short_opcode_len; - } + bc->data.jmprel.shortop.opcode[0] = short_op0; + bc->data.jmprel.shortop.opcode[1] = short_op1; + bc->data.jmprel.shortop.opcode[2] = short_op2; + bc->data.jmprel.shortop.opcode_len = short_opcode_len; - bc->data.jmprel.nearop.valid = near_valid; - if (near_valid) { - bc->data.jmprel.nearop.opcode[0] = near_op0; - bc->data.jmprel.nearop.opcode[1] = near_op1; - bc->data.jmprel.nearop.opcode[2] = near_op2; - bc->data.jmprel.nearop.opcode_len = near_opcode_len; - } + bc->data.jmprel.nearop.opcode[0] = near_op0; + bc->data.jmprel.nearop.opcode[1] = near_op1; + bc->data.jmprel.nearop.opcode[2] = near_op2; + bc->data.jmprel.nearop.opcode_len = near_opcode_len; bc->data.jmprel.addrsize = addrsize; bc->data.jmprel.opersize = 0; @@ -437,24 +441,25 @@ bytecode_print(bytecode *bc) break; case BC_INSN: printf("_Instruction_\n"); - printf("Effective Address:\n"); - printf(" Disp="); - if (!bc->data.insn.ea.disp) - printf("(nil)"); - else - expr_print(bc->data.insn.ea.disp); - printf("\n"); - printf(" Len=%u SegmentOv=%2x\n", - (unsigned int)bc->data.insn.ea.len, - (unsigned int)bc->data.insn.ea.segment); - printf(" ModRM=%2x ValidRM=%u NeedRM=%u\n", - (unsigned int)bc->data.insn.ea.modrm, - (unsigned int)bc->data.insn.ea.valid_modrm, - (unsigned int)bc->data.insn.ea.need_modrm); - printf(" SIB=%2x ValidSIB=%u NeedSIB=%u\n", - (unsigned int)bc->data.insn.ea.sib, - (unsigned int)bc->data.insn.ea.valid_sib, - (unsigned int)bc->data.insn.ea.need_sib); + printf("Effective Address:"); + if (!bc->data.insn.ea) + printf(" (nil)\n"); + else { + printf("\n Disp="); + expr_print(bc->data.insn.ea->disp); + printf("\n"); + printf(" Len=%u SegmentOv=%2x\n", + (unsigned int)bc->data.insn.ea->len, + (unsigned int)bc->data.insn.ea->segment); + printf(" ModRM=%2x ValidRM=%u NeedRM=%u\n", + (unsigned int)bc->data.insn.ea->modrm, + (unsigned int)bc->data.insn.ea->valid_modrm, + (unsigned int)bc->data.insn.ea->need_modrm); + printf(" SIB=%2x ValidSIB=%u NeedSIB=%u\n", + (unsigned int)bc->data.insn.ea->sib, + (unsigned int)bc->data.insn.ea->valid_sib, + (unsigned int)bc->data.insn.ea->need_sib); + } printf("Immediate Value:\n"); printf(" Val="); if (!bc->data.insn.imm.val) @@ -483,7 +488,7 @@ bytecode_print(bytecode *bc) printf("Target="); expr_print(bc->data.jmprel.target); printf("\nShort Form:\n"); - if (!bc->data.jmprel.shortop.valid) + if (!bc->data.jmprel.shortop.opcode_len == 0) printf(" None\n"); else printf(" Opcode: %2x %2x %2x OpLen=%u\n", @@ -491,7 +496,7 @@ bytecode_print(bytecode *bc) (unsigned int)bc->data.jmprel.shortop.opcode[1], (unsigned int)bc->data.jmprel.shortop.opcode[2], (unsigned int)bc->data.jmprel.shortop.opcode_len); - if (!bc->data.jmprel.nearop.valid) + if (!bc->data.jmprel.nearop.opcode_len == 0) printf(" None\n"); else printf(" Opcode: %2x %2x %2x OpLen=%u\n", diff --git a/src/bytecode.h b/src/bytecode.h index 9b45b03d..5ff60a8c 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -82,9 +82,21 @@ typedef struct bytecode_s { enum { BC_EMPTY, BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; + /* This union has been somewhat tweaked to get it as small as possible + * on the 4-byte-aligned x86 architecture (without resorting to + * bitfields). In particular, insn and jmprel are the largest structures + * in the union, and are also the same size (after padding). jmprel + * can have another unsigned char added to the end without affecting + * its size. + * + * Don't worry about this too much, but keep it in mind when changing + * this structure. We care about the size of bytecode in particular + * because it accounts for the majority of the memory usage in the + * assembler when assembling a large file. + */ union { struct { - effaddr ea; /* effective address */ + effaddr *ea; /* effective address */ immval imm; /* immediate or relative value */ @@ -94,14 +106,28 @@ typedef struct bytecode_s { unsigned char addrsize; /* 0 indicates no override */ unsigned char opersize; /* 0 indicates no override */ unsigned char lockrep_pre; /* 0 indicates no prefix */ + + /* HACK, but a space-saving one: shift opcodes have an immediate + * form and a ,1 form (with no immediate). In the parser, we + * set this and opcode_len=1, but store the ,1 version in the + * second byte of the opcode array. We then choose between the + * two versions once we know the actual value of imm (because we + * don't know it in the parser module). + * + * A override to force the imm version should just leave this at + * 0. Then later code won't know the ,1 version even exists. + * TODO: Figure out how this affects CPU flags processing. + * + * Call SetInsnShiftFlag() to set this flag to 1. + */ + unsigned char shift_op; } insn; struct { struct expr_s *target; /* target location */ struct { unsigned char opcode[3]; - unsigned char opcode_len; - unsigned char valid; /* does the opcode exist? */ + unsigned char opcode_len; /* 0 = no opc for this version */ } shortop, nearop; /* which opcode are we using? */ @@ -133,12 +159,12 @@ typedef struct bytecode_s { /* other assembler state info */ unsigned long offset; - unsigned int mode_bits; + unsigned char mode_bits; } bytecode; -effaddr *ConvertRegToEA(effaddr *ptr, unsigned long reg); -effaddr *ConvertImmToEA(effaddr *ptr, immval *im_ptr, unsigned char im_len); -effaddr *ConvertExprToEA(effaddr *ptr, struct expr_s *expr_ptr); +effaddr *effaddr_new_reg(unsigned long reg); +effaddr *effaddr_new_imm(immval *im_ptr, unsigned char im_len); +effaddr *effaddr_new_expr(struct expr_s *expr_ptr); immval *ConvertIntToImm(immval *ptr, unsigned long int_val); immval *ConvertExprToImm(immval *ptr, struct expr_s *expr_ptr); @@ -149,9 +175,13 @@ void SetEALen(effaddr *ptr, unsigned char len); void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); +void SetInsnShiftFlag(bytecode *bc); void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); +/* IMPORTANT: ea_ptr cannot be reused or freed after calling this function + * (it doesn't make a copy). im_ptr, on the other hand, can be. + */ bytecode *bytecode_new_insn(unsigned char opersize, unsigned char opcode_len, unsigned char op0, @@ -163,13 +193,12 @@ bytecode *bytecode_new_insn(unsigned char opersize, unsigned char im_len, unsigned char im_sign); +/* Pass 0 for the opcode_len if that version of the opcode doesn't exist. */ bytecode *bytecode_new_jmprel(targetval *target, - unsigned char short_valid, unsigned char short_opcode_len, unsigned char short_op0, unsigned char short_op1, unsigned char short_op2, - unsigned char near_valid, unsigned char near_opcode_len, unsigned char near_op0, unsigned char near_op1, diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in index 156b56bd..d3eee5ac 100644 --- a/src/parsers/nasm/bison.y.in +++ b/src/parsers/nasm/bison.y.in @@ -69,7 +69,7 @@ extern section *nasm_parser_cur_section; int line; } syminfo; unsigned char groupdata[4]; - effaddr ea_val; + effaddr *ea; expr *exp; immval im_val; targetval tgt_val; @@ -105,11 +105,11 @@ extern section *nasm_parser_cur_section; %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 +%type mem memaddr memexp memfar +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 %type imm imm8x imm16x imm32x imm8 imm16 imm32 %type expr expr_no_string %type explabel @@ -245,19 +245,19 @@ segreg: REG_ES ; /* memory addresses */ -memexp: expr { expr_simplify ($1); ConvertExprToEA (&$$, $1); } +memexp: expr { expr_simplify ($1); $$ = effaddr_new_expr($1); } ; -memaddr: memexp { $$ = $1; $$.segment = 0; } - | REG_CS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment(&$$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen(&$$, 1); } - | WORD memaddr { $$ = $2; SetEALen(&$$, 2); } - | DWORD memaddr { $$ = $2; SetEALen(&$$, 4); } +memaddr: memexp { $$ = $1; $$->segment = 0; } + | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } + | BYTE memaddr { $$ = $2; SetEALen($$, 1); } + | WORD memaddr { $$ = $2; SetEALen($$, 2); } + | DWORD memaddr { $$ = $2; SetEALen($$, 4); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -308,38 +308,38 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8x: reg8 { $$ = effaddr_new_reg($1); } | mem8x ; -rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16x: reg16 { $$ = effaddr_new_reg($1); } | mem16x ; -rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32x: reg32 { $$ = effaddr_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64x: MMXREG { $$ = effaddr_new_reg($1); } | mem64x ; -rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128x: XMMREG { $$ = effaddr_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8: reg8 { $$ = effaddr_new_reg($1); } | mem8 ; -rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16: reg16 { $$ = effaddr_new_reg($1); } | mem16 ; -rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32: reg32 { $$ = effaddr_new_reg($1); } | mem32 ; -rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64: MMXREG { $$ = effaddr_new_reg($1); } | mem64 ; -rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128: XMMREG { $$ = effaddr_new_reg($1); } | mem128 ; @@ -417,12 +417,12 @@ explabel: ID instr: instrbase | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x65); } + | REG_CS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x2E); } + | REG_SS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x36); } + | REG_DS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x3E); } + | REG_ES instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x26); } + | REG_FS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x64); } + | REG_GS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x65); } | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl index 28714042..3561a3e8 100755 --- a/src/parsers/nasm/gen_instr.pl +++ b/src/parsers/nasm/gen_instr.pl @@ -392,6 +392,15 @@ sub action ( @ $ ) . rule_footer; } +sub action_setshiftflag ( @ $ ) +{ + my ($rule, $tokens, $func, $a_args, $count) = splice @_; + return rule_header ($rule, $tokens, $count) + . " \$\$ = $func (@$a_args);\n" + . " SetInsnShiftFlag(\$\$);\n" + . rule_footer; +} + sub get_token_number ( $ $ ) { my ($tokens, $str) = splice @_; @@ -502,13 +511,10 @@ sub output_yacc ($@) # test for short opcode "nil" if($inst->[SHORTOPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0, 0,'; + push @args, '0, 0, 0, 0,'; } else { - # opcode is valid - push @args, '1,'; - # number of bytes of short opcode push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; @@ -529,13 +535,10 @@ sub output_yacc ($@) # test for near opcode "nil" if($inst->[NEAROPCODE] =~ m/nil/) { - push @args, '0, 0, 0, 0, 0,'; + push @args, '0, 0, 0, 0,'; } else { - # opcode is valid - push @args, '1,'; - # number of bytes of near opcode push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; @@ -612,10 +615,10 @@ sub output_yacc ($@) $args[-1] =~ s/nil/0/; # don't let a $0.\d match slip into the following rules. $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; - $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] - ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + #$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/; + $args[-1] =~ s[(\$\d+)i,\s*(\d+)] + ["effaddr_new_imm(\&$1, ".($2/8)."), 0"]e; $args[-1] .= ','; die $args[-1] if $args[-1] =~ m/\d+[ri]/; @@ -678,10 +681,20 @@ sub output_yacc ($@) # or if we've deferred and we match the folding version elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) { - my $immarg = get_token_number ($tokens, "imm8"); - $ONE->[4] = 1; - print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); + # Output a normal version except imm8 -> imm8x + # (BYTE override always makes longer version, and + # we don't want to conflict with the imm version + # we output right after this one. + $tokens =~ s/imm8/imm8x/; + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + + # Now output imm version, with second opcode byte + # set to ,1 opcode. Also call SetInsnShiftFlag(). + $tokens =~ s/imm8x/imm/; + die "no space for ONE?" if $args[3] !~ m/0,/; + $args[3] = $ONE->[3]->[2]; + print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++); } elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) { diff --git a/src/parsers/nasm/nasm-bison.y b/src/parsers/nasm/nasm-bison.y index 156b56bd..d3eee5ac 100644 --- a/src/parsers/nasm/nasm-bison.y +++ b/src/parsers/nasm/nasm-bison.y @@ -69,7 +69,7 @@ extern section *nasm_parser_cur_section; int line; } syminfo; unsigned char groupdata[4]; - effaddr ea_val; + effaddr *ea; expr *exp; immval im_val; targetval tgt_val; @@ -105,11 +105,11 @@ extern section *nasm_parser_cur_section; %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp memfar -%type mem8x mem16x mem32x mem64x mem80x mem128x -%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 -%type rm8x rm16x rm32x /*rm64x rm128x*/ -%type rm8 rm16 rm32 rm64 rm128 +%type mem memaddr memexp memfar +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 %type imm imm8x imm16x imm32x imm8 imm16 imm32 %type expr expr_no_string %type explabel @@ -245,19 +245,19 @@ segreg: REG_ES ; /* memory addresses */ -memexp: expr { expr_simplify ($1); ConvertExprToEA (&$$, $1); } +memexp: expr { expr_simplify ($1); $$ = effaddr_new_expr($1); } ; -memaddr: memexp { $$ = $1; $$.segment = 0; } - | REG_CS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x2E); } - | REG_SS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x36); } - | REG_DS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x3E); } - | REG_ES ':' memaddr { $$ = $3; SetEASegment(&$$, 0x26); } - | REG_FS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x64); } - | REG_GS ':' memaddr { $$ = $3; SetEASegment(&$$, 0x65); } - | BYTE memaddr { $$ = $2; SetEALen(&$$, 1); } - | WORD memaddr { $$ = $2; SetEALen(&$$, 2); } - | DWORD memaddr { $$ = $2; SetEALen(&$$, 4); } +memaddr: memexp { $$ = $1; $$->segment = 0; } + | REG_CS ':' memaddr { $$ = $3; SetEASegment($$, 0x2E); } + | REG_SS ':' memaddr { $$ = $3; SetEASegment($$, 0x36); } + | REG_DS ':' memaddr { $$ = $3; SetEASegment($$, 0x3E); } + | REG_ES ':' memaddr { $$ = $3; SetEASegment($$, 0x26); } + | REG_FS ':' memaddr { $$ = $3; SetEASegment($$, 0x64); } + | REG_GS ':' memaddr { $$ = $3; SetEASegment($$, 0x65); } + | BYTE memaddr { $$ = $2; SetEALen($$, 1); } + | WORD memaddr { $$ = $2; SetEALen($$, 2); } + | DWORD memaddr { $$ = $2; SetEALen($$, 4); } ; mem: '[' memaddr ']' { $$ = $2; } @@ -308,38 +308,38 @@ mem1632: mem ; /* explicit register or memory */ -rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8x: reg8 { $$ = effaddr_new_reg($1); } | mem8x ; -rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16x: reg16 { $$ = effaddr_new_reg($1); } | mem16x ; -rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32x: reg32 { $$ = effaddr_new_reg($1); } | mem32x ; /* not needed: -rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64x: MMXREG { $$ = effaddr_new_reg($1); } | mem64x ; -rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128x: XMMREG { $$ = effaddr_new_reg($1); } | mem128x ; */ /* implicit register or memory */ -rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } +rm8: reg8 { $$ = effaddr_new_reg($1); } | mem8 ; -rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } +rm16: reg16 { $$ = effaddr_new_reg($1); } | mem16 ; -rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } +rm32: reg32 { $$ = effaddr_new_reg($1); } | mem32 ; -rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } +rm64: MMXREG { $$ = effaddr_new_reg($1); } | mem64 ; -rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } +rm128: XMMREG { $$ = effaddr_new_reg($1); } | mem128 ; @@ -417,12 +417,12 @@ explabel: ID instr: instrbase | OPERSIZE instr { $$ = $2; SetInsnOperSizeOverride($$, $1); } | ADDRSIZE instr { $$ = $2; SetInsnAddrSizeOverride($$, $1); } - | REG_CS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x2E); } - | REG_SS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x36); } - | REG_DS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x3E); } - | REG_ES instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x26); } - | REG_FS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x64); } - | REG_GS instr { $$ = $2; SetEASegment(&$$->data.insn.ea, 0x65); } + | REG_CS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x2E); } + | REG_SS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x36); } + | REG_DS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x3E); } + | REG_ES instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x26); } + | REG_FS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x64); } + | REG_GS instr { $$ = $2; SetEASegment($$->data.insn.ea, 0x65); } | LOCK instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF0); } | REPNZ instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF2); } | REP instr { $$ = $2; SetInsnLockRepPrefix($$, 0xF3); } diff --git a/src/tests/bytecode_test.c b/src/tests/bytecode_test.c index d9a8077b..c60b788d 100644 --- a/src/tests/bytecode_test.c +++ b/src/tests/bytecode_test.c @@ -8,33 +8,31 @@ #include "bytecode.h" -START_TEST(test_ConvertRegToEA) +START_TEST(test_effaddr_new_reg) { - effaddr static_val, *retp; + effaddr *ea; int i; - /* Test with non-NULL */ - fail_unless(ConvertRegToEA(&static_val, 1) == &static_val, - "Should return ptr if non-NULL passed in ptr"); - /* Test with NULL */ - retp = ConvertRegToEA(NULL, 1); - fail_unless(retp != NULL, - "Should return static structure if NULL passed in ptr"); + ea = effaddr_new_reg(1); + fail_unless(ea != NULL, "Should die if out of memory (not return NULL)"); /* Test structure values function should set */ - fail_unless(retp->len == 0, "len should be 0"); - fail_unless(retp->segment == 0, "Should be no segment override"); - fail_unless(retp->valid_modrm == 1, "Mod/RM should be valid"); - fail_unless(retp->need_modrm == 1, "Mod/RM should be needed"); - fail_unless(retp->valid_sib == 0, "SIB should be invalid"); - fail_unless(retp->need_sib == 0, "SIB should not be needed"); + fail_unless(ea->len == 0, "len should be 0"); + fail_unless(ea->segment == 0, "Should be no segment override"); + fail_unless(ea->valid_modrm == 1, "Mod/RM should be valid"); + fail_unless(ea->need_modrm == 1, "Mod/RM should be needed"); + fail_unless(ea->valid_sib == 0, "SIB should be invalid"); + fail_unless(ea->need_sib == 0, "SIB should not be needed"); + + free(ea); /* Exhaustively test generated Mod/RM byte with register values */ for(i=0; i<8; i++) { - ConvertRegToEA(&static_val, i); - fail_unless(static_val.modrm == 0xC0 | (i & 0x07), + ea = effaddr_new_reg(i); + fail_unless(ea->modrm == 0xC0 | (i & 0x07), "Invalid Mod/RM byte generated"); + free(ea); } } END_TEST @@ -45,7 +43,7 @@ Suite *bytecode_suite(void) TCase *tc_conversion = tcase_create("Conversion"); suite_add_tcase(s, tc_conversion); - tcase_add_test(tc_conversion, test_ConvertRegToEA); + tcase_add_test(tc_conversion, test_effaddr_new_reg); return s; }