From: Peter Johnson Date: Wed, 11 Jul 2001 04:07:11 +0000 (-0000) Subject: First attempt at supporting relative jump instructions. A few hacks, but X-Git-Tag: v0.1.0~392 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fa5c9d5ea9eea333c97e9dabe43d9188bc45be11;p=yasm First attempt at supporting relative jump instructions. A few hacks, but seems to work well. A few other minor fixes/changes as well (noticed when coding). svn path=/trunk/yasm/; revision=119 --- diff --git a/include/bytecode.h b/include/bytecode.h index d7c89f59..21592850 100644 --- a/include/bytecode.h +++ b/include/bytecode.h @@ -1,4 +1,4 @@ -/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $ +/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $ * Bytecode utility functions header file * * Copyright (C) 2001 Peter Johnson @@ -41,18 +41,30 @@ typedef struct immval_s { struct expr_s *val; unsigned char len; /* length of val (in bytes), 0 if none */ - unsigned char isrel; unsigned char isneg; /* the value has been explicitly negated */ unsigned char f_len; /* final imm length */ - unsigned char f_rel; /* 1 if final imm should be rel */ unsigned char f_sign; /* 1 if final imm should be signed */ } immval; +typedef enum jmprel_opcode_sel_e { + JR_NONE, + JR_SHORT, + JR_NEAR, + JR_SHORT_FORCED, + JR_NEAR_FORCED +} jmprel_opcode_sel; + +typedef struct targetval_s { + struct expr_s *val; + + jmprel_opcode_sel op_sel; +} targetval; + typedef struct bytecode_s { struct bytecode_s *next; - enum { BC_INSN, BC_DATA, BC_RESERVE } type; + enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; union { struct { @@ -67,6 +79,23 @@ typedef struct bytecode_s { unsigned char opersize; /* 0 indicates no override */ unsigned char lockrep_pre; /* 0 indicates no prefix */ } insn; + struct { + struct expr_s *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; + unsigned char valid; /* does the opcode exist? */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 indicates no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + } jmprel; struct { unsigned char *data; } data; @@ -98,6 +127,8 @@ void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); +void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); + void BuildBC_Insn(bytecode *bc, unsigned char opersize, unsigned char opcode_len, @@ -108,8 +139,21 @@ void BuildBC_Insn(bytecode *bc, unsigned char spare, immval *im_ptr, unsigned char im_len, - unsigned char im_sign, - unsigned char im_rel); + unsigned char im_sign); + +void BuildBC_JmpRel(bytecode *bc, + targetval *target, + unsigned char short_valid, + unsigned char short_opcode_len, + unsigned char short_op0, + unsigned char short_op1, + unsigned char short_op2, + unsigned char near_valid, + unsigned char near_opcode_len, + unsigned char near_op0, + unsigned char near_op1, + unsigned char near_op2, + unsigned char addrsize); unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len); diff --git a/include/errwarn.h b/include/errwarn.h index 6ef8e013..66098a9d 100644 --- a/include/errwarn.h +++ b/include/errwarn.h @@ -1,4 +1,4 @@ -/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $ +/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $ * Error and warning reporting and related functions header file. * * Copyright (C) 2001 Peter Johnson @@ -29,6 +29,7 @@ typedef enum { FATAL_NOMEM } fatal_num; +void InternalError(unsigned int line, char *file, char *message); void Fatal(fatal_num); typedef enum { @@ -41,7 +42,8 @@ typedef enum { ERR_INVALID_LINE, ERR_EXP_SYNTAX, ERR_DUPLICATE_DEF, - ERR_OP_SIZE_MISMATCH + ERR_OP_SIZE_MISMATCH, + ERR_NO_JMPREL_FORM } err_num; void Error(err_num, char *, ...); @@ -52,7 +54,8 @@ typedef enum { WARN_VALUE_EXCEEDS_BOUNDS, WARN_MULT_SEG_OVERRIDE, WARN_MULT_LOCKREP_PREFIX, - WARN_NO_BASE_LABEL + WARN_NO_BASE_LABEL, + WARN_MULT_SHORTNEAR } warn_num; void Warning(warn_num, char *, ...); diff --git a/libyasm/bytecode.c b/libyasm/bytecode.c index a180aed1..a0c15a08 100644 --- a/libyasm/bytecode.c +++ b/libyasm/bytecode.c @@ -1,4 +1,4 @@ -/* $Id: bytecode.c,v 1.11 2001/07/06 06:25:53 mu Exp $ +/* $Id: bytecode.c,v 1.12 2001/07/11 04:07:10 peter Exp $ * Bytecode utility functions * * Copyright (C) 2001 Peter Johnson @@ -120,7 +120,6 @@ immval *ConvertIntToImm(immval *ptr, unsigned long int_val) else ptr->len = 4; - ptr->isrel = 0; ptr->isneg = 0; return ptr; @@ -133,7 +132,6 @@ immval *ConvertExprToImm(immval *ptr, expr *expr_ptr) ptr->val = expr_ptr; - ptr->isrel = 0; ptr->isneg = 0; return ptr; @@ -167,7 +165,18 @@ void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize) if(!bc) return; - bc->data.insn.opersize = opersize; + switch(bc->type) { + case BC_INSN: + bc->data.insn.opersize = opersize; + break; + case BC_JMPREL: + bc->data.jmprel.opersize = opersize; + break; + default: + InternalError(__LINE__, __FILE__, + "OperSize override applied to non-instruction"); + return; + } } void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) @@ -175,18 +184,65 @@ void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) if(!bc) return; - bc->data.insn.addrsize = addrsize; + switch(bc->type) { + case BC_INSN: + bc->data.insn.addrsize = addrsize; + break; + case BC_JMPREL: + bc->data.jmprel.addrsize = addrsize; + break; + default: + InternalError(__LINE__, __FILE__, + "AddrSize override applied to non-instruction"); + return; + } } void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) { + unsigned char *lockrep_pre = (unsigned char *)NULL; + if(!bc) return; - if(bc->data.insn.lockrep_pre != 0) + switch(bc->type) { + case BC_INSN: + lockrep_pre = &bc->data.insn.lockrep_pre; + break; + case BC_JMPREL: + lockrep_pre = &bc->data.jmprel.lockrep_pre; + break; + default: + InternalError(__LINE__, __FILE__, + "LockRep prefix applied to non-instruction"); + return; + } + + if(*lockrep_pre != 0) Warning(WARN_MULT_LOCKREP_PREFIX, (char *)NULL); - bc->data.insn.lockrep_pre = prefix; + *lockrep_pre = prefix; +} + +void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) +{ + if(!old_sel) + return; + + if((*old_sel == JR_SHORT_FORCED) || (*old_sel == JR_NEAR_FORCED)) + Warning(WARN_MULT_SHORTNEAR, (char *)NULL); + *old_sel = new_sel; +} + +static void BuildBC_Common(bytecode *bc) +{ + bc->len = 0; + + bc->filename = (char *)NULL; + bc->lineno = line_number; + + bc->offset = 0; + bc->mode_bits = mode_bits; } void BuildBC_Insn(bytecode *bc, @@ -199,8 +255,7 @@ void BuildBC_Insn(bytecode *bc, unsigned char spare, immval *im_ptr, unsigned char im_len, - unsigned char im_sign, - unsigned char im_rel) + unsigned char im_sign) { bc->next = (bytecode *)NULL; bc->type = BC_INSN; @@ -218,12 +273,10 @@ void BuildBC_Insn(bytecode *bc, if(im_ptr) { bc->data.insn.imm = *im_ptr; - bc->data.insn.imm.f_rel = im_rel; bc->data.insn.imm.f_sign = im_sign; bc->data.insn.imm.f_len = im_len; } else { bc->data.insn.imm.len = 0; - bc->data.insn.imm.f_rel = 0; bc->data.insn.imm.f_sign = 0; bc->data.insn.imm.f_len = 0; } @@ -235,16 +288,59 @@ void BuildBC_Insn(bytecode *bc, bc->data.insn.addrsize = 0; bc->data.insn.opersize = opersize; + bc->data.insn.lockrep_pre = 0; - bc->len = 0; + BuildBC_Common(bc); +} - bc->filename = (char *)NULL; - bc->lineno = line_number; +void BuildBC_JmpRel(bytecode *bc, + targetval *target, + unsigned char short_valid, + unsigned char short_opcode_len, + unsigned char short_op0, + unsigned char short_op1, + unsigned char short_op2, + unsigned char near_valid, + unsigned char near_opcode_len, + unsigned char near_op0, + unsigned char near_op1, + unsigned char near_op2, + unsigned char addrsize) +{ + bc->next = (bytecode *)NULL; + bc->type = BC_JMPREL; + + bc->data.jmprel.target = target->val; + bc->data.jmprel.op_sel = target->op_sel; + + if((target->op_sel == JR_SHORT_FORCED) && (!short_valid)) + Error(ERR_NO_JMPREL_FORM, (char *)NULL, "SHORT"); + if((target->op_sel == JR_NEAR_FORCED) && (!near_valid)) + Error(ERR_NO_JMPREL_FORM, (char *)NULL, "NEAR"); + + bc->data.jmprel.shortop.valid = short_valid; + if(short_valid) { + bc->data.jmprel.shortop.opcode[0] = short_op0; + bc->data.jmprel.shortop.opcode[1] = short_op1; + bc->data.jmprel.shortop.opcode[2] = short_op2; + bc->data.jmprel.shortop.opcode_len = short_opcode_len; + } - bc->offset = 0; - bc->mode_bits = mode_bits; + bc->data.jmprel.nearop.valid = near_valid; + if(near_valid) { + bc->data.jmprel.nearop.opcode[0] = near_op0; + bc->data.jmprel.nearop.opcode[1] = near_op1; + bc->data.jmprel.nearop.opcode[2] = near_op2; + bc->data.jmprel.nearop.opcode_len = near_opcode_len; + } + + bc->data.jmprel.addrsize = addrsize; + bc->data.jmprel.opersize = 0; + bc->data.jmprel.lockrep_pre = 0; + + BuildBC_Common(bc); } - + /* TODO: implement. Shouldn't be difficult. */ unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len) { @@ -278,29 +374,64 @@ void DebugPrintBC(bytecode *bc) (unsigned int)bc->data.insn.ea.sib, (unsigned int)bc->data.insn.ea.valid_sib, (unsigned int)bc->data.insn.ea.need_sib); - printf("Immediate/Relative Value:\n"); + printf("Immediate Value:\n"); printf(" Val="); if (!bc->data.insn.imm.val) printf("(nil)"); else expr_print(bc->data.insn.imm.val); printf("\n"); - printf(" Len=%u, IsRel=%u, IsNeg=%u\n", + printf(" Len=%u, IsNeg=%u\n", (unsigned int)bc->data.insn.imm.len, - (unsigned int)bc->data.insn.imm.isrel, (unsigned int)bc->data.insn.imm.isneg); - printf(" FLen=%u, FRel=%u, FSign=%u\n", + printf(" FLen=%u, FSign=%u\n", (unsigned int)bc->data.insn.imm.f_len, - (unsigned int)bc->data.insn.imm.f_rel, (unsigned int)bc->data.insn.imm.f_sign); - printf("Opcode: %2x %2x OpLen=%u\n", + printf("Opcode: %2x %2x %2x OpLen=%u\n", (unsigned int)bc->data.insn.opcode[0], (unsigned int)bc->data.insn.opcode[1], + (unsigned int)bc->data.insn.opcode[2], (unsigned int)bc->data.insn.opcode_len); - printf("OperSize=%u LockRepPre=%2x\n", + printf("AddrSize=%u OperSize=%u LockRepPre=%2x\n", + (unsigned int)bc->data.insn.addrsize, (unsigned int)bc->data.insn.opersize, (unsigned int)bc->data.insn.lockrep_pre); break; + case BC_JMPREL: + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(bc->data.jmprel.target); + printf("\nShort Form:\n"); + if(!bc->data.jmprel.shortop.valid) + printf(" None\n"); + else + printf(" Opcode: %2x %2x %2x OpLen=%u\n", + (unsigned int)bc->data.jmprel.shortop.opcode[0], + (unsigned int)bc->data.jmprel.shortop.opcode[1], + (unsigned int)bc->data.jmprel.shortop.opcode[2], + (unsigned int)bc->data.jmprel.shortop.opcode_len); + if(!bc->data.jmprel.nearop.valid) + printf(" None\n"); + else + printf(" Opcode: %2x %2x %2x OpLen=%u\n", + (unsigned int)bc->data.jmprel.nearop.opcode[0], + (unsigned int)bc->data.jmprel.nearop.opcode[1], + (unsigned int)bc->data.jmprel.nearop.opcode[2], + (unsigned int)bc->data.jmprel.nearop.opcode_len); + printf("OpSel="); + switch(bc->data.jmprel.op_sel) { + case JR_NONE: printf("None"); break; + case JR_SHORT: printf("Short"); break; + case JR_NEAR: printf("Near"); break; + case JR_SHORT_FORCED: printf("Forced Short"); break; + case JR_NEAR_FORCED: printf("Forced Near"); break; + default: printf("UNKNOWN!!"); break; + } + printf("\nAddrSize=%u OperSize=%u LockRepPre=%2x\n", + (unsigned int)bc->data.jmprel.addrsize, + (unsigned int)bc->data.jmprel.opersize, + (unsigned int)bc->data.jmprel.lockrep_pre); + break; case BC_DATA: printf("_Data_\n"); for(i=0; ilen; i++) { diff --git a/libyasm/bytecode.h b/libyasm/bytecode.h index d7c89f59..21592850 100644 --- a/libyasm/bytecode.h +++ b/libyasm/bytecode.h @@ -1,4 +1,4 @@ -/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $ +/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $ * Bytecode utility functions header file * * Copyright (C) 2001 Peter Johnson @@ -41,18 +41,30 @@ typedef struct immval_s { struct expr_s *val; unsigned char len; /* length of val (in bytes), 0 if none */ - unsigned char isrel; unsigned char isneg; /* the value has been explicitly negated */ unsigned char f_len; /* final imm length */ - unsigned char f_rel; /* 1 if final imm should be rel */ unsigned char f_sign; /* 1 if final imm should be signed */ } immval; +typedef enum jmprel_opcode_sel_e { + JR_NONE, + JR_SHORT, + JR_NEAR, + JR_SHORT_FORCED, + JR_NEAR_FORCED +} jmprel_opcode_sel; + +typedef struct targetval_s { + struct expr_s *val; + + jmprel_opcode_sel op_sel; +} targetval; + typedef struct bytecode_s { struct bytecode_s *next; - enum { BC_INSN, BC_DATA, BC_RESERVE } type; + enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; union { struct { @@ -67,6 +79,23 @@ typedef struct bytecode_s { unsigned char opersize; /* 0 indicates no override */ unsigned char lockrep_pre; /* 0 indicates no prefix */ } insn; + struct { + struct expr_s *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; + unsigned char valid; /* does the opcode exist? */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 indicates no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + } jmprel; struct { unsigned char *data; } data; @@ -98,6 +127,8 @@ void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); +void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); + void BuildBC_Insn(bytecode *bc, unsigned char opersize, unsigned char opcode_len, @@ -108,8 +139,21 @@ void BuildBC_Insn(bytecode *bc, unsigned char spare, immval *im_ptr, unsigned char im_len, - unsigned char im_sign, - unsigned char im_rel); + unsigned char im_sign); + +void BuildBC_JmpRel(bytecode *bc, + targetval *target, + unsigned char short_valid, + unsigned char short_opcode_len, + unsigned char short_op0, + unsigned char short_op1, + unsigned char short_op2, + unsigned char near_valid, + unsigned char near_opcode_len, + unsigned char near_op0, + unsigned char near_op1, + unsigned char near_op2, + unsigned char addrsize); unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len); diff --git a/libyasm/errwarn.c b/libyasm/errwarn.c index 807b90e1..c0f3c058 100644 --- a/libyasm/errwarn.c +++ b/libyasm/errwarn.c @@ -1,4 +1,4 @@ -/* $Id: errwarn.c,v 1.14 2001/07/04 20:57:53 peter Exp $ +/* $Id: errwarn.c,v 1.15 2001/07/11 04:07:10 peter Exp $ * Error and warning reporting and related functions. * * Copyright (C) 2001 Peter Johnson @@ -56,7 +56,8 @@ static char *err_msgs[] = { "label or instruction expected at start of line", "expression syntax error", "duplicate definition of `%1'; previously defined line %2", - "mismatch in operand sizes" + "mismatch in operand sizes", + "no %s form of that jump instruction exists" }; static char *warn_msgs[] = { @@ -65,7 +66,8 @@ static char *warn_msgs[] = { "%s value exceeds bounds", "multiple segment overrides, using leftmost", "multiple LOCK or REP prefixes, using leftmost", - "no non-local label before '%s'" + "no non-local label before '%s'", + "multiple SHORT or NEAR specifiers, using leftmost" }; /* hate to define these as static buffers; better solution would be to use @@ -103,6 +105,13 @@ void yyerror(char *s) Error(ERR_PARSER, (char *)NULL, s); } +void InternalError(unsigned int line, char *file, char *message) +{ + fprintf(stderr, "INTERNAL ERROR at %s, line %d: %s\n", file, line, + message); + exit(EXIT_FAILURE); +} + void Fatal(fatal_num num) { fprintf(stderr, "FATAL: %s\n", fatal_msgs[num]); diff --git a/libyasm/errwarn.h b/libyasm/errwarn.h index 6ef8e013..66098a9d 100644 --- a/libyasm/errwarn.h +++ b/libyasm/errwarn.h @@ -1,4 +1,4 @@ -/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $ +/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $ * Error and warning reporting and related functions header file. * * Copyright (C) 2001 Peter Johnson @@ -29,6 +29,7 @@ typedef enum { FATAL_NOMEM } fatal_num; +void InternalError(unsigned int line, char *file, char *message); void Fatal(fatal_num); typedef enum { @@ -41,7 +42,8 @@ typedef enum { ERR_INVALID_LINE, ERR_EXP_SYNTAX, ERR_DUPLICATE_DEF, - ERR_OP_SIZE_MISMATCH + ERR_OP_SIZE_MISMATCH, + ERR_NO_JMPREL_FORM } err_num; void Error(err_num, char *, ...); @@ -52,7 +54,8 @@ typedef enum { WARN_VALUE_EXCEEDS_BOUNDS, WARN_MULT_SEG_OVERRIDE, WARN_MULT_LOCKREP_PREFIX, - WARN_NO_BASE_LABEL + WARN_NO_BASE_LABEL, + WARN_MULT_SHORTNEAR } warn_num; void Warning(warn_num, char *, ...); diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in index 911a6c41..410862b6 100644 --- a/modules/parsers/nasm/bison.y.in +++ b/modules/parsers/nasm/bison.y.in @@ -1,4 +1,4 @@ -/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $ +/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $ * Main bison parser * * Copyright (C) 2001 Peter Johnson, Michael Urman @@ -45,10 +45,11 @@ extern void yyerror(char *); char *name; int line; } syminfo; - unsigned char groupdata[3]; + unsigned char groupdata[4]; effaddr ea_val; expr *exp; immval im_val; + targetval tgt_val; bytecode bc; } @@ -80,7 +81,7 @@ extern void yyerror(char *); %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp +%type mem memaddr memexp memfar %type mem8x mem16x mem32x mem64x mem80x mem128x %type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 %type rm8x rm16x rm32x /*rm64x rm128x*/ @@ -89,6 +90,7 @@ extern void yyerror(char *); %type expr %type explabel %type label_id +%type target %left '|' %left '^' @@ -215,6 +217,10 @@ mem80x: TWORD mem { $$ = $2; } mem128x: DQWORD mem { $$ = $2; } ; +/* FAR memory, for jmp and call */ +memfar: FAR mem { $$ = $2; } +; + /* implicit memory */ mem8: mem | mem8x @@ -300,6 +306,15 @@ imm32: imm | imm32x ; +/* jump targets */ +target: explabel { + $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL))); + $$.op_sel = JR_NONE; + } + | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } + | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +; + /* expression trees */ expr: INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); } diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl index 25c8220a..a122e084 100755 --- a/modules/parsers/nasm/gen_instr.pl +++ b/modules/parsers/nasm/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $ +# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -26,13 +26,21 @@ use Getopt::Long; my $VERSION = "0.0.1"; # useful constants for instruction arrays -use constant INST => 0; -use constant OPERANDS => 1; -use constant OPSIZE => 2; -use constant OPCODE => 3; -use constant EFFADDR => 4; -use constant IMM => 5; -use constant CPU => 6; +# common +use constant INST => 0; +use constant OPERANDS => 1; +# general format +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; +# relative target format +use constant ADSIZE => 2; +use constant SHORTOPCODE => 3; +use constant NEAROPCODE => 4; +use constant SHORTCPU => 5; +use constant NEARCPU => 6; use constant TOO_MANY_ERRORS => 20; @@ -81,6 +89,7 @@ my $valid_regs = join '|', qw( reg8x reg16x reg32x reg1632x reg64x reg80x reg128x mem8 mem16 mem32 mem1632 mem64 mem80 mem128 mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + target memfar ); my $valid_opcodes = join '|', qw( [0-9A-F]{2} @@ -158,28 +167,56 @@ sub read_instructions ($) # i still say changing instrs.dat would be better ;) $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg; - my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi; - die "Invalid Operation Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; - die "Invalid Effective Address\n" - if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; - die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # detect relative target format by looking for "target" in args + if($args =~ m/target/oi) + { + my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) = + split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Address Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Short Opcode\n" + if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Near Opcode\n" + if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Short CPU\n" + if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + die "Invalid Near CPU\n" + if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu]; + } else { + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; + die "Invalid Effective Address\n" + if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } } sub add_group_member ($$$$$) @@ -439,212 +476,308 @@ sub output_yacc ($@) (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0)); my $count = 0; foreach my $inst (@{$groups->{$group}{rules}}) { - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args - - my $func = "BuildBC_Insn"; - - # Create the argument list for BuildBC - my @args; - - # First argument is always &$$ - push @args, '&$$,'; - - # opcode size - push @args, "$inst->[OPSIZE],"; - $args[-1] =~ s/nil/0/; - - # number of bytes of opcodes - push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; - - # opcode piece 1 (and 2 if attached) - push @args, $inst->[OPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; - $args[-1] .= ','; - - # opcode piece 2 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,/o; - # opcode piece 3 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; - - # effective addresses - push @args, $inst->[EFFADDR]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - # don't let a $0.\d match slip into the following rules. - $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; - $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] - ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; - $args[-1] .= ','; - - die $args[-1] if $args[-1] =~ m/\d+[ri]/; - - # immediate sources - push @args, $inst->[IMM]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(immval *)NULL, 0/; - # don't match $0.\d in the following rules. - $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s[^([0-9A-Fa-f]+),] - [ConvertIntToImm((immval *)NULL, 0x$1),]; - $args[-1] =~ s[^\$0.(\d+),] - [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; - - # divide the second, and only the second, by 8 bits/byte - $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; - - $args[-1] =~ s/(\&\$\d+)(r)?/$1/; - $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; - - die $args[-1] if $args[-1] =~ m/\d+[ris]/; - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # see if we match one of the cases to defer - if (($inst->[OPERANDS]||"") =~ m/,ONE/) - { - $ONE = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) - { - $AL = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) - { - $AX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) - { - $EAX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) - { - $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) - { - $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) - { - $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + if($inst->[OPERANDS] =~ m/target/oi) { - $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; - } + # relative target format + # build the instruction in pieces. - # or if we've deferred and we match the folding version - elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) - { - my $immarg = get_token_number ($tokens, "imm8"); + # rulename = instruction + my $rule = "$inst->[INST]"; - $ONE->[4] = 1; - print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); - } - elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) - { - $AL->[4] = 1; - my $regarg = get_token_number ($tokens, "reg8"); + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + my $func = "BuildBC_JmpRel"; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); - } - elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) - { - $AX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg16"); + # Create the argument list for BuildBC + my @args; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); - } - elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) - { - $EAX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg32"); + # First argument is always &$$ + push @args, '&$$,'; + + # Target argument: HACK: Always assumed to be arg 1. + push @args, '&$2,'; + + # test for short opcode "nil" + if($inst->[SHORTOPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of short opcode + push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[SHORTOPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o; + } + + # test for near opcode "nil" + if($inst->[NEAROPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of near opcode + push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[NEAROPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o; + } + + # address size + push @args, "$inst->[ADSIZE]"; + $args[-1] =~ s/nil/0/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + # generate the grammar + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); } - elsif (($XCHG_AX[0] or $XCHG_AX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + else { - my $first = 1; - for (my $i=0; $i < @XCHG_AX; ++$i) + # general instruction format + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + # offset args + my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # operand size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/; + $args[-1] =~ s/nil/0/; + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+s/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + + # see if we match one of the cases to defer + if (($inst->[OPERANDS]||"") =~ m/,ONE/) + { + $ONE = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) + { + $AL = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) + { + $AX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) + { + $EAX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) + { + $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) + { + $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) + { + $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + { + $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; + } + + # or if we've deferred and we match the folding version + elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) + { + my $immarg = get_token_number ($tokens, "imm8"); + + $ONE->[4] = 1; + print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); + } + elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) + { + $AL->[4] = 1; + my $regarg = get_token_number ($tokens, "reg8"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); + } + elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) { - if($XCHG_AX[$i]) + $AX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg16"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); + } + elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) + { + $EAX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg32"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + } + elsif (($XCHG_AX[0] or $XCHG_AX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + { + my $first = 1; + for (my $i=0; $i < @XCHG_AX; ++$i) { - $XCHG_AX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg16 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg16") + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); - $first = 0; - } - else + if($XCHG_AX[$i]) { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + $XCHG_AX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg16 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg16") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_EAX; ++$i) + elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) { - if($XCHG_EAX[$i]) + my $first = 1; + for (my $i=0; $i < @XCHG_EAX; ++$i) { - $XCHG_EAX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg32 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg32") + $i*2; - - if ($first) + if($XCHG_EAX[$i]) { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $XCHG_EAX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg32 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg32") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - # otherwise, generate the normal version - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + # otherwise, generate the normal version + else + { + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + } } } diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y index 97609a3a..ccd537a3 100644 --- a/modules/parsers/nasm/nasm-bison.y +++ b/modules/parsers/nasm/nasm-bison.y @@ -1,4 +1,4 @@ -/* $Id: nasm-bison.y,v 1.19 2001/07/06 06:25:53 mu Exp $ +/* $Id: nasm-bison.y,v 1.20 2001/07/11 04:07:10 peter Exp $ * Main bison parser * * Copyright (C) 2001 Peter Johnson, Michael Urman @@ -45,10 +45,11 @@ extern void yyerror(char *); char *name; int line; } syminfo; - unsigned char groupdata[3]; + unsigned char groupdata[4]; effaddr ea_val; expr *exp; immval im_val; + targetval tgt_val; bytecode bc; } @@ -80,7 +81,7 @@ extern void yyerror(char *); %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp +%type mem memaddr memexp memfar %type mem8x mem16x mem32x mem64x mem80x mem128x %type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 %type rm8x rm16x rm32x /*rm64x rm128x*/ @@ -89,6 +90,7 @@ extern void yyerror(char *); %type expr %type explabel %type label_id +%type target %left '|' %left '^' @@ -215,6 +217,10 @@ mem80x: TWORD mem { $$ = $2; } mem128x: DQWORD mem { $$ = $2; } ; +/* FAR memory, for jmp and call */ +memfar: FAR mem { $$ = $2; } +; + /* implicit memory */ mem8: mem | mem8x @@ -300,6 +306,15 @@ imm32: imm | imm32x ; +/* jump targets */ +target: explabel { + $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL))); + $$.op_sel = JR_NONE; + } + | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } + | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +; + /* expression trees */ expr: INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); } diff --git a/src/bison.y.in b/src/bison.y.in index 911a6c41..410862b6 100644 --- a/src/bison.y.in +++ b/src/bison.y.in @@ -1,4 +1,4 @@ -/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $ +/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $ * Main bison parser * * Copyright (C) 2001 Peter Johnson, Michael Urman @@ -45,10 +45,11 @@ extern void yyerror(char *); char *name; int line; } syminfo; - unsigned char groupdata[3]; + unsigned char groupdata[4]; effaddr ea_val; expr *exp; immval im_val; + targetval tgt_val; bytecode bc; } @@ -80,7 +81,7 @@ extern void yyerror(char *); %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp +%type mem memaddr memexp memfar %type mem8x mem16x mem32x mem64x mem80x mem128x %type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 %type rm8x rm16x rm32x /*rm64x rm128x*/ @@ -89,6 +90,7 @@ extern void yyerror(char *); %type expr %type explabel %type label_id +%type target %left '|' %left '^' @@ -215,6 +217,10 @@ mem80x: TWORD mem { $$ = $2; } mem128x: DQWORD mem { $$ = $2; } ; +/* FAR memory, for jmp and call */ +memfar: FAR mem { $$ = $2; } +; + /* implicit memory */ mem8: mem | mem8x @@ -300,6 +306,15 @@ imm32: imm | imm32x ; +/* jump targets */ +target: explabel { + $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL))); + $$.op_sel = JR_NONE; + } + | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } + | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +; + /* expression trees */ expr: INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); } diff --git a/src/bytecode.c b/src/bytecode.c index a180aed1..a0c15a08 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1,4 +1,4 @@ -/* $Id: bytecode.c,v 1.11 2001/07/06 06:25:53 mu Exp $ +/* $Id: bytecode.c,v 1.12 2001/07/11 04:07:10 peter Exp $ * Bytecode utility functions * * Copyright (C) 2001 Peter Johnson @@ -120,7 +120,6 @@ immval *ConvertIntToImm(immval *ptr, unsigned long int_val) else ptr->len = 4; - ptr->isrel = 0; ptr->isneg = 0; return ptr; @@ -133,7 +132,6 @@ immval *ConvertExprToImm(immval *ptr, expr *expr_ptr) ptr->val = expr_ptr; - ptr->isrel = 0; ptr->isneg = 0; return ptr; @@ -167,7 +165,18 @@ void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize) if(!bc) return; - bc->data.insn.opersize = opersize; + switch(bc->type) { + case BC_INSN: + bc->data.insn.opersize = opersize; + break; + case BC_JMPREL: + bc->data.jmprel.opersize = opersize; + break; + default: + InternalError(__LINE__, __FILE__, + "OperSize override applied to non-instruction"); + return; + } } void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) @@ -175,18 +184,65 @@ void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize) if(!bc) return; - bc->data.insn.addrsize = addrsize; + switch(bc->type) { + case BC_INSN: + bc->data.insn.addrsize = addrsize; + break; + case BC_JMPREL: + bc->data.jmprel.addrsize = addrsize; + break; + default: + InternalError(__LINE__, __FILE__, + "AddrSize override applied to non-instruction"); + return; + } } void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix) { + unsigned char *lockrep_pre = (unsigned char *)NULL; + if(!bc) return; - if(bc->data.insn.lockrep_pre != 0) + switch(bc->type) { + case BC_INSN: + lockrep_pre = &bc->data.insn.lockrep_pre; + break; + case BC_JMPREL: + lockrep_pre = &bc->data.jmprel.lockrep_pre; + break; + default: + InternalError(__LINE__, __FILE__, + "LockRep prefix applied to non-instruction"); + return; + } + + if(*lockrep_pre != 0) Warning(WARN_MULT_LOCKREP_PREFIX, (char *)NULL); - bc->data.insn.lockrep_pre = prefix; + *lockrep_pre = prefix; +} + +void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel) +{ + if(!old_sel) + return; + + if((*old_sel == JR_SHORT_FORCED) || (*old_sel == JR_NEAR_FORCED)) + Warning(WARN_MULT_SHORTNEAR, (char *)NULL); + *old_sel = new_sel; +} + +static void BuildBC_Common(bytecode *bc) +{ + bc->len = 0; + + bc->filename = (char *)NULL; + bc->lineno = line_number; + + bc->offset = 0; + bc->mode_bits = mode_bits; } void BuildBC_Insn(bytecode *bc, @@ -199,8 +255,7 @@ void BuildBC_Insn(bytecode *bc, unsigned char spare, immval *im_ptr, unsigned char im_len, - unsigned char im_sign, - unsigned char im_rel) + unsigned char im_sign) { bc->next = (bytecode *)NULL; bc->type = BC_INSN; @@ -218,12 +273,10 @@ void BuildBC_Insn(bytecode *bc, if(im_ptr) { bc->data.insn.imm = *im_ptr; - bc->data.insn.imm.f_rel = im_rel; bc->data.insn.imm.f_sign = im_sign; bc->data.insn.imm.f_len = im_len; } else { bc->data.insn.imm.len = 0; - bc->data.insn.imm.f_rel = 0; bc->data.insn.imm.f_sign = 0; bc->data.insn.imm.f_len = 0; } @@ -235,16 +288,59 @@ void BuildBC_Insn(bytecode *bc, bc->data.insn.addrsize = 0; bc->data.insn.opersize = opersize; + bc->data.insn.lockrep_pre = 0; - bc->len = 0; + BuildBC_Common(bc); +} - bc->filename = (char *)NULL; - bc->lineno = line_number; +void BuildBC_JmpRel(bytecode *bc, + targetval *target, + unsigned char short_valid, + unsigned char short_opcode_len, + unsigned char short_op0, + unsigned char short_op1, + unsigned char short_op2, + unsigned char near_valid, + unsigned char near_opcode_len, + unsigned char near_op0, + unsigned char near_op1, + unsigned char near_op2, + unsigned char addrsize) +{ + bc->next = (bytecode *)NULL; + bc->type = BC_JMPREL; + + bc->data.jmprel.target = target->val; + bc->data.jmprel.op_sel = target->op_sel; + + if((target->op_sel == JR_SHORT_FORCED) && (!short_valid)) + Error(ERR_NO_JMPREL_FORM, (char *)NULL, "SHORT"); + if((target->op_sel == JR_NEAR_FORCED) && (!near_valid)) + Error(ERR_NO_JMPREL_FORM, (char *)NULL, "NEAR"); + + bc->data.jmprel.shortop.valid = short_valid; + if(short_valid) { + bc->data.jmprel.shortop.opcode[0] = short_op0; + bc->data.jmprel.shortop.opcode[1] = short_op1; + bc->data.jmprel.shortop.opcode[2] = short_op2; + bc->data.jmprel.shortop.opcode_len = short_opcode_len; + } - bc->offset = 0; - bc->mode_bits = mode_bits; + bc->data.jmprel.nearop.valid = near_valid; + if(near_valid) { + bc->data.jmprel.nearop.opcode[0] = near_op0; + bc->data.jmprel.nearop.opcode[1] = near_op1; + bc->data.jmprel.nearop.opcode[2] = near_op2; + bc->data.jmprel.nearop.opcode_len = near_opcode_len; + } + + bc->data.jmprel.addrsize = addrsize; + bc->data.jmprel.opersize = 0; + bc->data.jmprel.lockrep_pre = 0; + + BuildBC_Common(bc); } - + /* TODO: implement. Shouldn't be difficult. */ unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len) { @@ -278,29 +374,64 @@ void DebugPrintBC(bytecode *bc) (unsigned int)bc->data.insn.ea.sib, (unsigned int)bc->data.insn.ea.valid_sib, (unsigned int)bc->data.insn.ea.need_sib); - printf("Immediate/Relative Value:\n"); + printf("Immediate Value:\n"); printf(" Val="); if (!bc->data.insn.imm.val) printf("(nil)"); else expr_print(bc->data.insn.imm.val); printf("\n"); - printf(" Len=%u, IsRel=%u, IsNeg=%u\n", + printf(" Len=%u, IsNeg=%u\n", (unsigned int)bc->data.insn.imm.len, - (unsigned int)bc->data.insn.imm.isrel, (unsigned int)bc->data.insn.imm.isneg); - printf(" FLen=%u, FRel=%u, FSign=%u\n", + printf(" FLen=%u, FSign=%u\n", (unsigned int)bc->data.insn.imm.f_len, - (unsigned int)bc->data.insn.imm.f_rel, (unsigned int)bc->data.insn.imm.f_sign); - printf("Opcode: %2x %2x OpLen=%u\n", + printf("Opcode: %2x %2x %2x OpLen=%u\n", (unsigned int)bc->data.insn.opcode[0], (unsigned int)bc->data.insn.opcode[1], + (unsigned int)bc->data.insn.opcode[2], (unsigned int)bc->data.insn.opcode_len); - printf("OperSize=%u LockRepPre=%2x\n", + printf("AddrSize=%u OperSize=%u LockRepPre=%2x\n", + (unsigned int)bc->data.insn.addrsize, (unsigned int)bc->data.insn.opersize, (unsigned int)bc->data.insn.lockrep_pre); break; + case BC_JMPREL: + printf("_Relative Jump_\n"); + printf("Target="); + expr_print(bc->data.jmprel.target); + printf("\nShort Form:\n"); + if(!bc->data.jmprel.shortop.valid) + printf(" None\n"); + else + printf(" Opcode: %2x %2x %2x OpLen=%u\n", + (unsigned int)bc->data.jmprel.shortop.opcode[0], + (unsigned int)bc->data.jmprel.shortop.opcode[1], + (unsigned int)bc->data.jmprel.shortop.opcode[2], + (unsigned int)bc->data.jmprel.shortop.opcode_len); + if(!bc->data.jmprel.nearop.valid) + printf(" None\n"); + else + printf(" Opcode: %2x %2x %2x OpLen=%u\n", + (unsigned int)bc->data.jmprel.nearop.opcode[0], + (unsigned int)bc->data.jmprel.nearop.opcode[1], + (unsigned int)bc->data.jmprel.nearop.opcode[2], + (unsigned int)bc->data.jmprel.nearop.opcode_len); + printf("OpSel="); + switch(bc->data.jmprel.op_sel) { + case JR_NONE: printf("None"); break; + case JR_SHORT: printf("Short"); break; + case JR_NEAR: printf("Near"); break; + case JR_SHORT_FORCED: printf("Forced Short"); break; + case JR_NEAR_FORCED: printf("Forced Near"); break; + default: printf("UNKNOWN!!"); break; + } + printf("\nAddrSize=%u OperSize=%u LockRepPre=%2x\n", + (unsigned int)bc->data.jmprel.addrsize, + (unsigned int)bc->data.jmprel.opersize, + (unsigned int)bc->data.jmprel.lockrep_pre); + break; case BC_DATA: printf("_Data_\n"); for(i=0; ilen; i++) { diff --git a/src/bytecode.h b/src/bytecode.h index d7c89f59..21592850 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -1,4 +1,4 @@ -/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $ +/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $ * Bytecode utility functions header file * * Copyright (C) 2001 Peter Johnson @@ -41,18 +41,30 @@ typedef struct immval_s { struct expr_s *val; unsigned char len; /* length of val (in bytes), 0 if none */ - unsigned char isrel; unsigned char isneg; /* the value has been explicitly negated */ unsigned char f_len; /* final imm length */ - unsigned char f_rel; /* 1 if final imm should be rel */ unsigned char f_sign; /* 1 if final imm should be signed */ } immval; +typedef enum jmprel_opcode_sel_e { + JR_NONE, + JR_SHORT, + JR_NEAR, + JR_SHORT_FORCED, + JR_NEAR_FORCED +} jmprel_opcode_sel; + +typedef struct targetval_s { + struct expr_s *val; + + jmprel_opcode_sel op_sel; +} targetval; + typedef struct bytecode_s { struct bytecode_s *next; - enum { BC_INSN, BC_DATA, BC_RESERVE } type; + enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type; union { struct { @@ -67,6 +79,23 @@ typedef struct bytecode_s { unsigned char opersize; /* 0 indicates no override */ unsigned char lockrep_pre; /* 0 indicates no prefix */ } insn; + struct { + struct expr_s *target; /* target location */ + + struct { + unsigned char opcode[3]; + unsigned char opcode_len; + unsigned char valid; /* does the opcode exist? */ + } shortop, nearop; + + /* which opcode are we using? */ + /* The *FORCED forms are specified in the source as such */ + jmprel_opcode_sel op_sel; + + unsigned char addrsize; /* 0 indicates no override */ + unsigned char opersize; /* 0 indicates no override */ + unsigned char lockrep_pre; /* 0 indicates no prefix */ + } jmprel; struct { unsigned char *data; } data; @@ -98,6 +127,8 @@ void SetInsnOperSizeOverride(bytecode *bc, unsigned char opersize); void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize); void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix); +void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel); + void BuildBC_Insn(bytecode *bc, unsigned char opersize, unsigned char opcode_len, @@ -108,8 +139,21 @@ void BuildBC_Insn(bytecode *bc, unsigned char spare, immval *im_ptr, unsigned char im_len, - unsigned char im_sign, - unsigned char im_rel); + unsigned char im_sign); + +void BuildBC_JmpRel(bytecode *bc, + targetval *target, + unsigned char short_valid, + unsigned char short_opcode_len, + unsigned char short_op0, + unsigned char short_op1, + unsigned char short_op2, + unsigned char near_valid, + unsigned char near_opcode_len, + unsigned char near_op0, + unsigned char near_op1, + unsigned char near_op2, + unsigned char addrsize); unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len); diff --git a/src/errwarn.c b/src/errwarn.c index 807b90e1..c0f3c058 100644 --- a/src/errwarn.c +++ b/src/errwarn.c @@ -1,4 +1,4 @@ -/* $Id: errwarn.c,v 1.14 2001/07/04 20:57:53 peter Exp $ +/* $Id: errwarn.c,v 1.15 2001/07/11 04:07:10 peter Exp $ * Error and warning reporting and related functions. * * Copyright (C) 2001 Peter Johnson @@ -56,7 +56,8 @@ static char *err_msgs[] = { "label or instruction expected at start of line", "expression syntax error", "duplicate definition of `%1'; previously defined line %2", - "mismatch in operand sizes" + "mismatch in operand sizes", + "no %s form of that jump instruction exists" }; static char *warn_msgs[] = { @@ -65,7 +66,8 @@ static char *warn_msgs[] = { "%s value exceeds bounds", "multiple segment overrides, using leftmost", "multiple LOCK or REP prefixes, using leftmost", - "no non-local label before '%s'" + "no non-local label before '%s'", + "multiple SHORT or NEAR specifiers, using leftmost" }; /* hate to define these as static buffers; better solution would be to use @@ -103,6 +105,13 @@ void yyerror(char *s) Error(ERR_PARSER, (char *)NULL, s); } +void InternalError(unsigned int line, char *file, char *message) +{ + fprintf(stderr, "INTERNAL ERROR at %s, line %d: %s\n", file, line, + message); + exit(EXIT_FAILURE); +} + void Fatal(fatal_num num) { fprintf(stderr, "FATAL: %s\n", fatal_msgs[num]); diff --git a/src/errwarn.h b/src/errwarn.h index 6ef8e013..66098a9d 100644 --- a/src/errwarn.h +++ b/src/errwarn.h @@ -1,4 +1,4 @@ -/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $ +/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $ * Error and warning reporting and related functions header file. * * Copyright (C) 2001 Peter Johnson @@ -29,6 +29,7 @@ typedef enum { FATAL_NOMEM } fatal_num; +void InternalError(unsigned int line, char *file, char *message); void Fatal(fatal_num); typedef enum { @@ -41,7 +42,8 @@ typedef enum { ERR_INVALID_LINE, ERR_EXP_SYNTAX, ERR_DUPLICATE_DEF, - ERR_OP_SIZE_MISMATCH + ERR_OP_SIZE_MISMATCH, + ERR_NO_JMPREL_FORM } err_num; void Error(err_num, char *, ...); @@ -52,7 +54,8 @@ typedef enum { WARN_VALUE_EXCEEDS_BOUNDS, WARN_MULT_SEG_OVERRIDE, WARN_MULT_LOCKREP_PREFIX, - WARN_NO_BASE_LABEL + WARN_NO_BASE_LABEL, + WARN_MULT_SHORTNEAR } warn_num; void Warning(warn_num, char *, ...); diff --git a/src/gen_instr.pl b/src/gen_instr.pl index 25c8220a..a122e084 100755 --- a/src/gen_instr.pl +++ b/src/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $ +# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -26,13 +26,21 @@ use Getopt::Long; my $VERSION = "0.0.1"; # useful constants for instruction arrays -use constant INST => 0; -use constant OPERANDS => 1; -use constant OPSIZE => 2; -use constant OPCODE => 3; -use constant EFFADDR => 4; -use constant IMM => 5; -use constant CPU => 6; +# common +use constant INST => 0; +use constant OPERANDS => 1; +# general format +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; +# relative target format +use constant ADSIZE => 2; +use constant SHORTOPCODE => 3; +use constant NEAROPCODE => 4; +use constant SHORTCPU => 5; +use constant NEARCPU => 6; use constant TOO_MANY_ERRORS => 20; @@ -81,6 +89,7 @@ my $valid_regs = join '|', qw( reg8x reg16x reg32x reg1632x reg64x reg80x reg128x mem8 mem16 mem32 mem1632 mem64 mem80 mem128 mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + target memfar ); my $valid_opcodes = join '|', qw( [0-9A-F]{2} @@ -158,28 +167,56 @@ sub read_instructions ($) # i still say changing instrs.dat would be better ;) $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg; - my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi; - die "Invalid Operation Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; - die "Invalid Effective Address\n" - if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; - die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # detect relative target format by looking for "target" in args + if($args =~ m/target/oi) + { + my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) = + split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Address Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Short Opcode\n" + if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Near Opcode\n" + if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Short CPU\n" + if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + die "Invalid Near CPU\n" + if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu]; + } else { + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; + die "Invalid Effective Address\n" + if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } } sub add_group_member ($$$$$) @@ -439,212 +476,308 @@ sub output_yacc ($@) (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0)); my $count = 0; foreach my $inst (@{$groups->{$group}{rules}}) { - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args - - my $func = "BuildBC_Insn"; - - # Create the argument list for BuildBC - my @args; - - # First argument is always &$$ - push @args, '&$$,'; - - # opcode size - push @args, "$inst->[OPSIZE],"; - $args[-1] =~ s/nil/0/; - - # number of bytes of opcodes - push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; - - # opcode piece 1 (and 2 if attached) - push @args, $inst->[OPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; - $args[-1] .= ','; - - # opcode piece 2 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,/o; - # opcode piece 3 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; - - # effective addresses - push @args, $inst->[EFFADDR]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - # don't let a $0.\d match slip into the following rules. - $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; - $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] - ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; - $args[-1] .= ','; - - die $args[-1] if $args[-1] =~ m/\d+[ri]/; - - # immediate sources - push @args, $inst->[IMM]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(immval *)NULL, 0/; - # don't match $0.\d in the following rules. - $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s[^([0-9A-Fa-f]+),] - [ConvertIntToImm((immval *)NULL, 0x$1),]; - $args[-1] =~ s[^\$0.(\d+),] - [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; - - # divide the second, and only the second, by 8 bits/byte - $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; - - $args[-1] =~ s/(\&\$\d+)(r)?/$1/; - $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; - - die $args[-1] if $args[-1] =~ m/\d+[ris]/; - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # see if we match one of the cases to defer - if (($inst->[OPERANDS]||"") =~ m/,ONE/) - { - $ONE = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) - { - $AL = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) - { - $AX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) - { - $EAX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) - { - $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) - { - $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) - { - $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + if($inst->[OPERANDS] =~ m/target/oi) { - $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; - } + # relative target format + # build the instruction in pieces. - # or if we've deferred and we match the folding version - elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) - { - my $immarg = get_token_number ($tokens, "imm8"); + # rulename = instruction + my $rule = "$inst->[INST]"; - $ONE->[4] = 1; - print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); - } - elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) - { - $AL->[4] = 1; - my $regarg = get_token_number ($tokens, "reg8"); + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + my $func = "BuildBC_JmpRel"; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); - } - elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) - { - $AX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg16"); + # Create the argument list for BuildBC + my @args; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); - } - elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) - { - $EAX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg32"); + # First argument is always &$$ + push @args, '&$$,'; + + # Target argument: HACK: Always assumed to be arg 1. + push @args, '&$2,'; + + # test for short opcode "nil" + if($inst->[SHORTOPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of short opcode + push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[SHORTOPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o; + } + + # test for near opcode "nil" + if($inst->[NEAROPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of near opcode + push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[NEAROPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o; + } + + # address size + push @args, "$inst->[ADSIZE]"; + $args[-1] =~ s/nil/0/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + # generate the grammar + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); } - elsif (($XCHG_AX[0] or $XCHG_AX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + else { - my $first = 1; - for (my $i=0; $i < @XCHG_AX; ++$i) + # general instruction format + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + # offset args + my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # operand size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/; + $args[-1] =~ s/nil/0/; + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+s/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + + # see if we match one of the cases to defer + if (($inst->[OPERANDS]||"") =~ m/,ONE/) + { + $ONE = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) + { + $AL = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) + { + $AX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) + { + $EAX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) + { + $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) + { + $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) + { + $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + { + $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; + } + + # or if we've deferred and we match the folding version + elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) + { + my $immarg = get_token_number ($tokens, "imm8"); + + $ONE->[4] = 1; + print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); + } + elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) + { + $AL->[4] = 1; + my $regarg = get_token_number ($tokens, "reg8"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); + } + elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) { - if($XCHG_AX[$i]) + $AX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg16"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); + } + elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) + { + $EAX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg32"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + } + elsif (($XCHG_AX[0] or $XCHG_AX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + { + my $first = 1; + for (my $i=0; $i < @XCHG_AX; ++$i) { - $XCHG_AX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg16 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg16") + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); - $first = 0; - } - else + if($XCHG_AX[$i]) { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + $XCHG_AX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg16 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg16") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_EAX; ++$i) + elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) { - if($XCHG_EAX[$i]) + my $first = 1; + for (my $i=0; $i < @XCHG_EAX; ++$i) { - $XCHG_EAX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg32 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg32") + $i*2; - - if ($first) + if($XCHG_EAX[$i]) { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $XCHG_EAX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg32 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg32") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - # otherwise, generate the normal version - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + # otherwise, generate the normal version + else + { + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + } } } diff --git a/src/instrs.dat b/src/instrs.dat index 95b7bd35..f5b7b05f 100644 --- a/src/instrs.dat +++ b/src/instrs.dat @@ -1,4 +1,4 @@ -; $Id: instrs.dat,v 1.29 2001/07/05 09:39:30 peter Exp $ +; $Id: instrs.dat,v 1.30 2001/07/11 04:07:11 peter Exp $ ; List of valid instruction/operand combinations ; ; Copyright (C) 2001 Peter Johnson @@ -37,7 +37,6 @@ ; $xr indicates operand is register, not ModRM (needs convert to RM) ; $xi indicates operand is immediate (2nd parm is size in bits) ; Imm - Immediate source operand and forced size (in bits). -; $xr means relative displacement needed ; "s" after size indicates signed number ; A number instead of a $x is a hex constant value. ; @@ -49,7 +48,7 @@ ; ; Instructions are listed in the same order as that in GNU binutils ; /include/opcode/i386.h, used for the GAS assembler. See -; . +; . ; ; TODO: ; Finish instructions (may require changing parser code). @@ -427,8 +426,21 @@ shrd!shlrd AC ; ; Control transfer instructions (unconditional) ; -; call -; jmp +; Special format for relative targets: +; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU +; +!jmpcall target nil $0.1 $0.2 8086 8086 +!jmpcall imm:imm nil $0.3 $2i,nil $1,16 8086 +!jmpcall WORD imm:imm 16 $0.3 $2i,16 $1,16 8086 +!jmpcall DWORD imm:imm 32 $0.3 $2i,32 $1,16 386 +!jmpcall memfar nil FF $1,$0.4+1 nil 8086 +!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086 +!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386 +!jmpcall mem nil FF $1,$0.4 nil 8086 +!jmpcall rm16x 16 FF $1,$0.4 nil 8086 +!jmpcall rm32x 32 FF $1,$0.4 nil 386 +call!jmpcall nil,E8,9A,2 +jmp!jmpcall EB,E9,EA,4 ret!onebyte nil,C3 8086 retn nil nil C3 nil nil 8086 retf nil nil CB nil nil 8086 @@ -439,26 +451,84 @@ leave!onebyte nil,C9 186 ; ; Conditional jumps ; -; jcc -; jcxz -; jecxz +!jcc target nil 70+$0.1 0F,80+$0.1 8086 386 +jo!jcc 0 +jno!jcc 1 +jb!jcc 2 +jc!jcc 2 +jnae!jcc 2 +jnb!jcc 3 +jnc!jcc 3 +jae!jcc 3 +je!jcc 4 +jz!jcc 4 +jne!jcc 5 +jnz!jcc 5 +jbe!jcc 6 +jna!jcc 6 +jnbe!jcc 7 +ja!jcc 7 +js!jcc 8 +jns!jcc 9 +jp!jcc A +jpe!jcc A +jnp!jcc B +jpo!jcc B +jl!jcc C +jnge!jcc C +jnl!jcc D +jge!jcc D +jle!jcc E +jng!jcc E +jnle!jcc F +jg!jcc F +jcxz target 16 E3 nil 8086 8086 +jecxz target 32 E3 nil 386 386 ; ; Loop instructions ; -; loop -; loopcc: -;:loope loopz -;loopz imm1632 nil E1 nil $1r,8s 8086 -;loopz imm1632,REG_CX 16 E1 nil $1r,8s 8086 -;loopz imm1632,REG_ECX 32 E1 nil $1r,8s 386 -;:loopne loopnz -;loopnz imm1632 nil E1 nil $1r,8s 8086 -;loopnz imm1632,REG_CX 16 E1 nil $1r,8s 8086 -;loopnz imm1632,REG_ECX 32 E1 nil $1r,8s 386 +!loopg target nil E0+$0.1 nil 8086 8086 +!loopg target,REG_CX 16 E0+$0.1 nil 8086 8086 +!loopg target,REG_ECX 32 E0+$0.1 nil 386 386 +loop!loopg 2 +loopz!loopg 1 +loope!loopg 1 +loopnz!loopg 0 +loopne!loopg 0 ; ; Set byte on flag instructions ; -; setcc +!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386 +seto!setcc 0 +setno!setcc 1 +setb!setcc 2 +setc!setcc 2 +setnae!setcc 2 +setnb!setcc 3 +setnc!setcc 3 +setae!setcc 3 +sete!setcc 4 +setz!setcc 4 +setne!setcc 5 +setnz!setcc 5 +setbe!setcc 6 +setna!setcc 6 +setnbe!setcc 7 +seta!setcc 7 +sets!setcc 8 +setns!setcc 9 +setp!setcc A +setpe!setcc A +setnp!setcc B +setpo!setcc B +setl!setcc C +setnge!setcc C +setnl!setcc D +setge!setcc D +setle!setcc E +setng!setcc E +setnle!setcc F +setg!setcc F ; ; String instructions ; diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in index 911a6c41..410862b6 100644 --- a/src/parsers/nasm/bison.y.in +++ b/src/parsers/nasm/bison.y.in @@ -1,4 +1,4 @@ -/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $ +/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $ * Main bison parser * * Copyright (C) 2001 Peter Johnson, Michael Urman @@ -45,10 +45,11 @@ extern void yyerror(char *); char *name; int line; } syminfo; - unsigned char groupdata[3]; + unsigned char groupdata[4]; effaddr ea_val; expr *exp; immval im_val; + targetval tgt_val; bytecode bc; } @@ -80,7 +81,7 @@ extern void yyerror(char *); %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp +%type mem memaddr memexp memfar %type mem8x mem16x mem32x mem64x mem80x mem128x %type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 %type rm8x rm16x rm32x /*rm64x rm128x*/ @@ -89,6 +90,7 @@ extern void yyerror(char *); %type expr %type explabel %type label_id +%type target %left '|' %left '^' @@ -215,6 +217,10 @@ mem80x: TWORD mem { $$ = $2; } mem128x: DQWORD mem { $$ = $2; } ; +/* FAR memory, for jmp and call */ +memfar: FAR mem { $$ = $2; } +; + /* implicit memory */ mem8: mem | mem8x @@ -300,6 +306,15 @@ imm32: imm | imm32x ; +/* jump targets */ +target: explabel { + $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL))); + $$.op_sel = JR_NONE; + } + | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } + | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +; + /* expression trees */ expr: INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); } diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl index 25c8220a..a122e084 100755 --- a/src/parsers/nasm/gen_instr.pl +++ b/src/parsers/nasm/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $ +# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -26,13 +26,21 @@ use Getopt::Long; my $VERSION = "0.0.1"; # useful constants for instruction arrays -use constant INST => 0; -use constant OPERANDS => 1; -use constant OPSIZE => 2; -use constant OPCODE => 3; -use constant EFFADDR => 4; -use constant IMM => 5; -use constant CPU => 6; +# common +use constant INST => 0; +use constant OPERANDS => 1; +# general format +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; +# relative target format +use constant ADSIZE => 2; +use constant SHORTOPCODE => 3; +use constant NEAROPCODE => 4; +use constant SHORTCPU => 5; +use constant NEARCPU => 6; use constant TOO_MANY_ERRORS => 20; @@ -81,6 +89,7 @@ my $valid_regs = join '|', qw( reg8x reg16x reg32x reg1632x reg64x reg80x reg128x mem8 mem16 mem32 mem1632 mem64 mem80 mem128 mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + target memfar ); my $valid_opcodes = join '|', qw( [0-9A-F]{2} @@ -158,28 +167,56 @@ sub read_instructions ($) # i still say changing instrs.dat would be better ;) $args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg; - my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; - eval { - die "Invalid group name\n" - if $inst !~ m/^!\w+$/o; - die "Invalid Operands\n" - if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi; - die "Invalid Operation Size\n" - if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; - die "Invalid Effective Address\n" - if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; - die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; - }; - push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; - die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; - # knock the ! off of $inst for the groupname - $inst = substr $inst, 1; - push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # detect relative target format by looking for "target" in args + if($args =~ m/target/oi) + { + my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) = + split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Address Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Short Opcode\n" + if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Near Opcode\n" + if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi; + die "Invalid Short CPU\n" + if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + die "Invalid Near CPU\n" + if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu]; + } else { + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; + eval { + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^(nil|16|32|\$0\.\d)$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi; + die "Invalid Effective Address\n" + if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@; + die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } } sub add_group_member ($$$$$) @@ -439,212 +476,308 @@ sub output_yacc ($@) (@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0)); my $count = 0; foreach my $inst (@{$groups->{$group}{rules}}) { - # build the instruction in pieces. - - # rulename = instruction - my $rule = "$inst->[INST]"; - - # tokens it eats: instruction and arguments - # nil => no arguments - my $tokens = "\Ugrp_$rule\E"; - $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; - $tokens =~ s/,/ ',' /g; - my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args - - my $func = "BuildBC_Insn"; - - # Create the argument list for BuildBC - my @args; - - # First argument is always &$$ - push @args, '&$$,'; - - # opcode size - push @args, "$inst->[OPSIZE],"; - $args[-1] =~ s/nil/0/; - - # number of bytes of opcodes - push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; - - # opcode piece 1 (and 2 if attached) - push @args, $inst->[OPCODE]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - # don't match $0.\d in the following rule. - $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; - $args[-1] .= ','; - - # opcode piece 2 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,/o; - # opcode piece 3 (if not attached) - push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; - - # effective addresses - push @args, $inst->[EFFADDR]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - # don't let a $0.\d match slip into the following rules. - $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; - $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] - ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; - $args[-1] .= ','; - - die $args[-1] if $args[-1] =~ m/\d+[ri]/; - - # immediate sources - push @args, $inst->[IMM]; - $args[-1] =~ s/,/, /; - $args[-1] =~ s/nil/(immval *)NULL, 0/; - # don't match $0.\d in the following rules. - $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! - $args[-1] =~ s[^([0-9A-Fa-f]+),] - [ConvertIntToImm((immval *)NULL, 0x$1),]; - $args[-1] =~ s[^\$0.(\d+),] - [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; - - # divide the second, and only the second, by 8 bits/byte - $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; - $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; - - $args[-1] =~ s/(\&\$\d+)(r)?/$1/; - $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; - - die $args[-1] if $args[-1] =~ m/\d+[ris]/; - - # now that we've constructed the arglist, subst $0.\d - s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - - # see if we match one of the cases to defer - if (($inst->[OPERANDS]||"") =~ m/,ONE/) - { - $ONE = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) - { - $AL = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) - { - $AX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) - { - $EAX = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) - { - $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) - { - $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) - { - $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; - } - elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + if($inst->[OPERANDS] =~ m/target/oi) { - $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; - } + # relative target format + # build the instruction in pieces. - # or if we've deferred and we match the folding version - elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) - { - my $immarg = get_token_number ($tokens, "imm8"); + # rulename = instruction + my $rule = "$inst->[INST]"; - $ONE->[4] = 1; - print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); - } - elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) - { - $AL->[4] = 1; - my $regarg = get_token_number ($tokens, "reg8"); + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + my $func = "BuildBC_JmpRel"; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); - } - elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) - { - $AX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg16"); + # Create the argument list for BuildBC + my @args; - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); - } - elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) - { - $EAX->[4] = 1; - my $regarg = get_token_number ($tokens, "reg32"); + # First argument is always &$$ + push @args, '&$$,'; + + # Target argument: HACK: Always assumed to be arg 1. + push @args, '&$2,'; + + # test for short opcode "nil" + if($inst->[SHORTOPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of short opcode + push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[SHORTOPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o; + } + + # test for near opcode "nil" + if($inst->[NEAROPCODE] =~ m/nil/) + { + push @args, '0, 0, 0, 0, 0,'; + } + else + { + # opcode is valid + push @args, '1,'; + + # number of bytes of near opcode + push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[NEAROPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o; + } + + # address size + push @args, "$inst->[ADSIZE]"; + $args[-1] =~ s/nil/0/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); - print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + # generate the grammar + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); } - elsif (($XCHG_AX[0] or $XCHG_AX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + else { - my $first = 1; - for (my $i=0; $i < @XCHG_AX; ++$i) + # general instruction format + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Ugrp_$rule\E"; + $tokens .= " $inst->[OPERANDS]" + if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + $tokens =~ s/:/ ':' /g; + # offset args + my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0; + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # operand size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 and 3 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/o; + # opcode piece 3 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/; + $args[-1] =~ s/nil/0/; + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1\[$1\]),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+s/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args); + + # see if we match one of the cases to defer + if (($inst->[OPERANDS]||"") =~ m/,ONE/) + { + $ONE = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/) + { + $AL = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/) + { + $AX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/) + { + $EAX = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/) + { + $XCHG_AX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/) + { + $XCHG_AX[1] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/) + { + $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args]; + } + elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/) + { + $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args]; + } + + # or if we've deferred and we match the folding version + elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/) + { + my $immarg = get_token_number ($tokens, "imm8"); + + $ONE->[4] = 1; + print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args); + } + elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/) + { + $AL->[4] = 1; + my $regarg = get_token_number ($tokens, "reg8"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args); + } + elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/) { - if($XCHG_AX[$i]) + $AX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg16"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args); + } + elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/) + { + $EAX->[4] = 1; + my $regarg = get_token_number ($tokens, "reg32"); + + print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args); + } + elsif (($XCHG_AX[0] or $XCHG_AX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg16,reg16/) + { + my $first = 1; + for (my $i=0; $i < @XCHG_AX; ++$i) { - $XCHG_AX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg16 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg16") + $i*2; - - if ($first) - { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); - $first = 0; - } - else + if($XCHG_AX[$i]) { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + $XCHG_AX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg16 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg16") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and - ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) - { - my $first = 1; - for (my $i=0; $i < @XCHG_EAX; ++$i) + elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and + ($inst->[OPERANDS]||"") =~ m/reg32,reg32/) { - if($XCHG_EAX[$i]) + my $first = 1; + for (my $i=0; $i < @XCHG_EAX; ++$i) { - $XCHG_EAX[$i]->[4] = 1; - # This is definitely a hack. The "right" way - # to do this would be to enhance - # get_token_number to get the nth reg32 - # instead of always getting the first. - my $regarg = - get_token_number ($tokens, "reg32") + $i*2; - - if ($first) + if($XCHG_EAX[$i]) { - print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); - $first = 0; - } - else - { - $count++; - print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $XCHG_EAX[$i]->[4] = 1; + # This is definitely a hack. The "right" + # way to do this would be to enhance + # get_token_number to get the nth reg32 + # instead of always getting the first. + my $regarg = + get_token_number ($tokens, "reg32") + + $i*2; + + if ($first) + { + print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]); + $first = 0; + } + else + { + $count++; + print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]); + } } } + print GRAMMAR cond_action_else ($func, \@args); } - print GRAMMAR cond_action_else ($func, \@args); - } - # otherwise, generate the normal version - else - { - print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + # otherwise, generate the normal version + else + { + print GRAMMAR action ($rule, $tokens, $func, \@args, $count++); + } } } diff --git a/src/parsers/nasm/nasm-bison.y b/src/parsers/nasm/nasm-bison.y index 97609a3a..ccd537a3 100644 --- a/src/parsers/nasm/nasm-bison.y +++ b/src/parsers/nasm/nasm-bison.y @@ -1,4 +1,4 @@ -/* $Id: nasm-bison.y,v 1.19 2001/07/06 06:25:53 mu Exp $ +/* $Id: nasm-bison.y,v 1.20 2001/07/11 04:07:10 peter Exp $ * Main bison parser * * Copyright (C) 2001 Peter Johnson, Michael Urman @@ -45,10 +45,11 @@ extern void yyerror(char *); char *name; int line; } syminfo; - unsigned char groupdata[3]; + unsigned char groupdata[4]; effaddr ea_val; expr *exp; immval im_val; + targetval tgt_val; bytecode bc; } @@ -80,7 +81,7 @@ extern void yyerror(char *); %type line exp instr instrbase label %type fpureg reg32 reg16 reg8 segreg -%type mem memaddr memexp +%type mem memaddr memexp memfar %type mem8x mem16x mem32x mem64x mem80x mem128x %type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 %type rm8x rm16x rm32x /*rm64x rm128x*/ @@ -89,6 +90,7 @@ extern void yyerror(char *); %type expr %type explabel %type label_id +%type target %left '|' %left '^' @@ -215,6 +217,10 @@ mem80x: TWORD mem { $$ = $2; } mem128x: DQWORD mem { $$ = $2; } ; +/* FAR memory, for jmp and call */ +memfar: FAR mem { $$ = $2; } +; + /* implicit memory */ mem8: mem | mem8x @@ -300,6 +306,15 @@ imm32: imm | imm32x ; +/* jump targets */ +target: explabel { + $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL))); + $$.op_sel = JR_NONE; + } + | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); } + | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); } +; + /* expression trees */ expr: INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }