seems to work well.
A few other minor fixes/changes as well (noticed when coding).
svn path=/trunk/yasm/; revision=119
-/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $
+/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $
* Bytecode utility functions header file
*
* Copyright (C) 2001 Peter Johnson
struct expr_s *val;
unsigned char len; /* length of val (in bytes), 0 if none */
- unsigned char isrel;
unsigned char isneg; /* the value has been explicitly negated */
unsigned char f_len; /* final imm length */
- unsigned char f_rel; /* 1 if final imm should be rel */
unsigned char f_sign; /* 1 if final imm should be signed */
} immval;
+typedef enum jmprel_opcode_sel_e {
+ JR_NONE,
+ JR_SHORT,
+ JR_NEAR,
+ JR_SHORT_FORCED,
+ JR_NEAR_FORCED
+} jmprel_opcode_sel;
+
+typedef struct targetval_s {
+ struct expr_s *val;
+
+ jmprel_opcode_sel op_sel;
+} targetval;
+
typedef struct bytecode_s {
struct bytecode_s *next;
- enum { BC_INSN, BC_DATA, BC_RESERVE } type;
+ enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
union {
struct {
unsigned char opersize; /* 0 indicates no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
} insn;
+ struct {
+ struct expr_s *target; /* target location */
+
+ struct {
+ unsigned char opcode[3];
+ unsigned char opcode_len;
+ unsigned char valid; /* does the opcode exist? */
+ } shortop, nearop;
+
+ /* which opcode are we using? */
+ /* The *FORCED forms are specified in the source as such */
+ jmprel_opcode_sel op_sel;
+
+ unsigned char addrsize; /* 0 indicates no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+ } jmprel;
struct {
unsigned char *data;
} data;
void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize);
void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix);
+void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel);
+
void BuildBC_Insn(bytecode *bc,
unsigned char opersize,
unsigned char opcode_len,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
- unsigned char im_sign,
- unsigned char im_rel);
+ unsigned char im_sign);
+
+void BuildBC_JmpRel(bytecode *bc,
+ targetval *target,
+ unsigned char short_valid,
+ unsigned char short_opcode_len,
+ unsigned char short_op0,
+ unsigned char short_op1,
+ unsigned char short_op2,
+ unsigned char near_valid,
+ unsigned char near_opcode_len,
+ unsigned char near_op0,
+ unsigned char near_op1,
+ unsigned char near_op2,
+ unsigned char addrsize);
unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len);
-/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $
+/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $
* Error and warning reporting and related functions header file.
*
* Copyright (C) 2001 Peter Johnson
FATAL_NOMEM
} fatal_num;
+void InternalError(unsigned int line, char *file, char *message);
void Fatal(fatal_num);
typedef enum {
ERR_INVALID_LINE,
ERR_EXP_SYNTAX,
ERR_DUPLICATE_DEF,
- ERR_OP_SIZE_MISMATCH
+ ERR_OP_SIZE_MISMATCH,
+ ERR_NO_JMPREL_FORM
} err_num;
void Error(err_num, char *, ...);
WARN_VALUE_EXCEEDS_BOUNDS,
WARN_MULT_SEG_OVERRIDE,
WARN_MULT_LOCKREP_PREFIX,
- WARN_NO_BASE_LABEL
+ WARN_NO_BASE_LABEL,
+ WARN_MULT_SHORTNEAR
} warn_num;
void Warning(warn_num, char *, ...);
-/* $Id: bytecode.c,v 1.11 2001/07/06 06:25:53 mu Exp $
+/* $Id: bytecode.c,v 1.12 2001/07/11 04:07:10 peter Exp $
* Bytecode utility functions
*
* Copyright (C) 2001 Peter Johnson
else
ptr->len = 4;
- ptr->isrel = 0;
ptr->isneg = 0;
return ptr;
ptr->val = expr_ptr;
- ptr->isrel = 0;
ptr->isneg = 0;
return ptr;
if(!bc)
return;
- bc->data.insn.opersize = opersize;
+ switch(bc->type) {
+ case BC_INSN:
+ bc->data.insn.opersize = opersize;
+ break;
+ case BC_JMPREL:
+ bc->data.jmprel.opersize = opersize;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "OperSize override applied to non-instruction");
+ return;
+ }
}
void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize)
if(!bc)
return;
- bc->data.insn.addrsize = addrsize;
+ switch(bc->type) {
+ case BC_INSN:
+ bc->data.insn.addrsize = addrsize;
+ break;
+ case BC_JMPREL:
+ bc->data.jmprel.addrsize = addrsize;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "AddrSize override applied to non-instruction");
+ return;
+ }
}
void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix)
{
+ unsigned char *lockrep_pre = (unsigned char *)NULL;
+
if(!bc)
return;
- if(bc->data.insn.lockrep_pre != 0)
+ switch(bc->type) {
+ case BC_INSN:
+ lockrep_pre = &bc->data.insn.lockrep_pre;
+ break;
+ case BC_JMPREL:
+ lockrep_pre = &bc->data.jmprel.lockrep_pre;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "LockRep prefix applied to non-instruction");
+ return;
+ }
+
+ if(*lockrep_pre != 0)
Warning(WARN_MULT_LOCKREP_PREFIX, (char *)NULL);
- bc->data.insn.lockrep_pre = prefix;
+ *lockrep_pre = prefix;
+}
+
+void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel)
+{
+ if(!old_sel)
+ return;
+
+ if((*old_sel == JR_SHORT_FORCED) || (*old_sel == JR_NEAR_FORCED))
+ Warning(WARN_MULT_SHORTNEAR, (char *)NULL);
+ *old_sel = new_sel;
+}
+
+static void BuildBC_Common(bytecode *bc)
+{
+ bc->len = 0;
+
+ bc->filename = (char *)NULL;
+ bc->lineno = line_number;
+
+ bc->offset = 0;
+ bc->mode_bits = mode_bits;
}
void BuildBC_Insn(bytecode *bc,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
- unsigned char im_sign,
- unsigned char im_rel)
+ unsigned char im_sign)
{
bc->next = (bytecode *)NULL;
bc->type = BC_INSN;
if(im_ptr) {
bc->data.insn.imm = *im_ptr;
- bc->data.insn.imm.f_rel = im_rel;
bc->data.insn.imm.f_sign = im_sign;
bc->data.insn.imm.f_len = im_len;
} else {
bc->data.insn.imm.len = 0;
- bc->data.insn.imm.f_rel = 0;
bc->data.insn.imm.f_sign = 0;
bc->data.insn.imm.f_len = 0;
}
bc->data.insn.addrsize = 0;
bc->data.insn.opersize = opersize;
+ bc->data.insn.lockrep_pre = 0;
- bc->len = 0;
+ BuildBC_Common(bc);
+}
- bc->filename = (char *)NULL;
- bc->lineno = line_number;
+void BuildBC_JmpRel(bytecode *bc,
+ targetval *target,
+ unsigned char short_valid,
+ unsigned char short_opcode_len,
+ unsigned char short_op0,
+ unsigned char short_op1,
+ unsigned char short_op2,
+ unsigned char near_valid,
+ unsigned char near_opcode_len,
+ unsigned char near_op0,
+ unsigned char near_op1,
+ unsigned char near_op2,
+ unsigned char addrsize)
+{
+ bc->next = (bytecode *)NULL;
+ bc->type = BC_JMPREL;
+
+ bc->data.jmprel.target = target->val;
+ bc->data.jmprel.op_sel = target->op_sel;
+
+ if((target->op_sel == JR_SHORT_FORCED) && (!short_valid))
+ Error(ERR_NO_JMPREL_FORM, (char *)NULL, "SHORT");
+ if((target->op_sel == JR_NEAR_FORCED) && (!near_valid))
+ Error(ERR_NO_JMPREL_FORM, (char *)NULL, "NEAR");
+
+ bc->data.jmprel.shortop.valid = short_valid;
+ if(short_valid) {
+ bc->data.jmprel.shortop.opcode[0] = short_op0;
+ bc->data.jmprel.shortop.opcode[1] = short_op1;
+ bc->data.jmprel.shortop.opcode[2] = short_op2;
+ bc->data.jmprel.shortop.opcode_len = short_opcode_len;
+ }
- bc->offset = 0;
- bc->mode_bits = mode_bits;
+ bc->data.jmprel.nearop.valid = near_valid;
+ if(near_valid) {
+ bc->data.jmprel.nearop.opcode[0] = near_op0;
+ bc->data.jmprel.nearop.opcode[1] = near_op1;
+ bc->data.jmprel.nearop.opcode[2] = near_op2;
+ bc->data.jmprel.nearop.opcode_len = near_opcode_len;
+ }
+
+ bc->data.jmprel.addrsize = addrsize;
+ bc->data.jmprel.opersize = 0;
+ bc->data.jmprel.lockrep_pre = 0;
+
+ BuildBC_Common(bc);
}
-
+
/* TODO: implement. Shouldn't be difficult. */
unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len)
{
(unsigned int)bc->data.insn.ea.sib,
(unsigned int)bc->data.insn.ea.valid_sib,
(unsigned int)bc->data.insn.ea.need_sib);
- printf("Immediate/Relative Value:\n");
+ printf("Immediate Value:\n");
printf(" Val=");
if (!bc->data.insn.imm.val)
printf("(nil)");
else
expr_print(bc->data.insn.imm.val);
printf("\n");
- printf(" Len=%u, IsRel=%u, IsNeg=%u\n",
+ printf(" Len=%u, IsNeg=%u\n",
(unsigned int)bc->data.insn.imm.len,
- (unsigned int)bc->data.insn.imm.isrel,
(unsigned int)bc->data.insn.imm.isneg);
- printf(" FLen=%u, FRel=%u, FSign=%u\n",
+ printf(" FLen=%u, FSign=%u\n",
(unsigned int)bc->data.insn.imm.f_len,
- (unsigned int)bc->data.insn.imm.f_rel,
(unsigned int)bc->data.insn.imm.f_sign);
- printf("Opcode: %2x %2x OpLen=%u\n",
+ printf("Opcode: %2x %2x %2x OpLen=%u\n",
(unsigned int)bc->data.insn.opcode[0],
(unsigned int)bc->data.insn.opcode[1],
+ (unsigned int)bc->data.insn.opcode[2],
(unsigned int)bc->data.insn.opcode_len);
- printf("OperSize=%u LockRepPre=%2x\n",
+ printf("AddrSize=%u OperSize=%u LockRepPre=%2x\n",
+ (unsigned int)bc->data.insn.addrsize,
(unsigned int)bc->data.insn.opersize,
(unsigned int)bc->data.insn.lockrep_pre);
break;
+ case BC_JMPREL:
+ printf("_Relative Jump_\n");
+ printf("Target=");
+ expr_print(bc->data.jmprel.target);
+ printf("\nShort Form:\n");
+ if(!bc->data.jmprel.shortop.valid)
+ printf(" None\n");
+ else
+ printf(" Opcode: %2x %2x %2x OpLen=%u\n",
+ (unsigned int)bc->data.jmprel.shortop.opcode[0],
+ (unsigned int)bc->data.jmprel.shortop.opcode[1],
+ (unsigned int)bc->data.jmprel.shortop.opcode[2],
+ (unsigned int)bc->data.jmprel.shortop.opcode_len);
+ if(!bc->data.jmprel.nearop.valid)
+ printf(" None\n");
+ else
+ printf(" Opcode: %2x %2x %2x OpLen=%u\n",
+ (unsigned int)bc->data.jmprel.nearop.opcode[0],
+ (unsigned int)bc->data.jmprel.nearop.opcode[1],
+ (unsigned int)bc->data.jmprel.nearop.opcode[2],
+ (unsigned int)bc->data.jmprel.nearop.opcode_len);
+ printf("OpSel=");
+ switch(bc->data.jmprel.op_sel) {
+ case JR_NONE: printf("None"); break;
+ case JR_SHORT: printf("Short"); break;
+ case JR_NEAR: printf("Near"); break;
+ case JR_SHORT_FORCED: printf("Forced Short"); break;
+ case JR_NEAR_FORCED: printf("Forced Near"); break;
+ default: printf("UNKNOWN!!"); break;
+ }
+ printf("\nAddrSize=%u OperSize=%u LockRepPre=%2x\n",
+ (unsigned int)bc->data.jmprel.addrsize,
+ (unsigned int)bc->data.jmprel.opersize,
+ (unsigned int)bc->data.jmprel.lockrep_pre);
+ break;
case BC_DATA:
printf("_Data_\n");
for(i=0; i<bc->len; i++) {
-/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $
+/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $
* Bytecode utility functions header file
*
* Copyright (C) 2001 Peter Johnson
struct expr_s *val;
unsigned char len; /* length of val (in bytes), 0 if none */
- unsigned char isrel;
unsigned char isneg; /* the value has been explicitly negated */
unsigned char f_len; /* final imm length */
- unsigned char f_rel; /* 1 if final imm should be rel */
unsigned char f_sign; /* 1 if final imm should be signed */
} immval;
+typedef enum jmprel_opcode_sel_e {
+ JR_NONE,
+ JR_SHORT,
+ JR_NEAR,
+ JR_SHORT_FORCED,
+ JR_NEAR_FORCED
+} jmprel_opcode_sel;
+
+typedef struct targetval_s {
+ struct expr_s *val;
+
+ jmprel_opcode_sel op_sel;
+} targetval;
+
typedef struct bytecode_s {
struct bytecode_s *next;
- enum { BC_INSN, BC_DATA, BC_RESERVE } type;
+ enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
union {
struct {
unsigned char opersize; /* 0 indicates no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
} insn;
+ struct {
+ struct expr_s *target; /* target location */
+
+ struct {
+ unsigned char opcode[3];
+ unsigned char opcode_len;
+ unsigned char valid; /* does the opcode exist? */
+ } shortop, nearop;
+
+ /* which opcode are we using? */
+ /* The *FORCED forms are specified in the source as such */
+ jmprel_opcode_sel op_sel;
+
+ unsigned char addrsize; /* 0 indicates no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+ } jmprel;
struct {
unsigned char *data;
} data;
void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize);
void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix);
+void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel);
+
void BuildBC_Insn(bytecode *bc,
unsigned char opersize,
unsigned char opcode_len,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
- unsigned char im_sign,
- unsigned char im_rel);
+ unsigned char im_sign);
+
+void BuildBC_JmpRel(bytecode *bc,
+ targetval *target,
+ unsigned char short_valid,
+ unsigned char short_opcode_len,
+ unsigned char short_op0,
+ unsigned char short_op1,
+ unsigned char short_op2,
+ unsigned char near_valid,
+ unsigned char near_opcode_len,
+ unsigned char near_op0,
+ unsigned char near_op1,
+ unsigned char near_op2,
+ unsigned char addrsize);
unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len);
-/* $Id: errwarn.c,v 1.14 2001/07/04 20:57:53 peter Exp $
+/* $Id: errwarn.c,v 1.15 2001/07/11 04:07:10 peter Exp $
* Error and warning reporting and related functions.
*
* Copyright (C) 2001 Peter Johnson
"label or instruction expected at start of line",
"expression syntax error",
"duplicate definition of `%1'; previously defined line %2",
- "mismatch in operand sizes"
+ "mismatch in operand sizes",
+ "no %s form of that jump instruction exists"
};
static char *warn_msgs[] = {
"%s value exceeds bounds",
"multiple segment overrides, using leftmost",
"multiple LOCK or REP prefixes, using leftmost",
- "no non-local label before '%s'"
+ "no non-local label before '%s'",
+ "multiple SHORT or NEAR specifiers, using leftmost"
};
/* hate to define these as static buffers; better solution would be to use
Error(ERR_PARSER, (char *)NULL, s);
}
+void InternalError(unsigned int line, char *file, char *message)
+{
+ fprintf(stderr, "INTERNAL ERROR at %s, line %d: %s\n", file, line,
+ message);
+ exit(EXIT_FAILURE);
+}
+
void Fatal(fatal_num num)
{
fprintf(stderr, "FATAL: %s\n", fatal_msgs[num]);
-/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $
+/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $
* Error and warning reporting and related functions header file.
*
* Copyright (C) 2001 Peter Johnson
FATAL_NOMEM
} fatal_num;
+void InternalError(unsigned int line, char *file, char *message);
void Fatal(fatal_num);
typedef enum {
ERR_INVALID_LINE,
ERR_EXP_SYNTAX,
ERR_DUPLICATE_DEF,
- ERR_OP_SIZE_MISMATCH
+ ERR_OP_SIZE_MISMATCH,
+ ERR_NO_JMPREL_FORM
} err_num;
void Error(err_num, char *, ...);
WARN_VALUE_EXCEEDS_BOUNDS,
WARN_MULT_SEG_OVERRIDE,
WARN_MULT_LOCKREP_PREFIX,
- WARN_NO_BASE_LABEL
+ WARN_NO_BASE_LABEL,
+ WARN_MULT_SHORTNEAR
} warn_num;
void Warning(warn_num, char *, ...);
-/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $
+/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $
* Main bison parser
*
* Copyright (C) 2001 Peter Johnson, Michael Urman
char *name;
int line;
} syminfo;
- unsigned char groupdata[3];
+ unsigned char groupdata[4];
effaddr ea_val;
expr *exp;
immval im_val;
+ targetval tgt_val;
bytecode bc;
}
%type <bc> line exp instr instrbase label
%type <int_val> fpureg reg32 reg16 reg8 segreg
-%type <ea_val> mem memaddr memexp
+%type <ea_val> mem memaddr memexp memfar
%type <ea_val> mem8x mem16x mem32x mem64x mem80x mem128x
%type <ea_val> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
%type <ea_val> rm8x rm16x rm32x /*rm64x rm128x*/
%type <exp> expr
%type <syminfo> explabel
%type <sym> label_id
+%type <tgt_val> target
%left '|'
%left '^'
mem128x: DQWORD mem { $$ = $2; }
;
+/* FAR memory, for jmp and call */
+memfar: FAR mem { $$ = $2; }
+;
+
/* implicit memory */
mem8: mem
| mem8x
| imm32x
;
+/* jump targets */
+target: explabel {
+ $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL)));
+ $$.op_sel = JR_NONE;
+ }
+ | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
+ | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+;
+
/* expression trees */
expr:
INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }
#!/usr/bin/perl -w
-# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $
+# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $
# Generates bison.y and token.l from instrs.dat for YASM
#
# Copyright (C) 2001 Michael Urman
my $VERSION = "0.0.1";
# useful constants for instruction arrays
-use constant INST => 0;
-use constant OPERANDS => 1;
-use constant OPSIZE => 2;
-use constant OPCODE => 3;
-use constant EFFADDR => 4;
-use constant IMM => 5;
-use constant CPU => 6;
+# common
+use constant INST => 0;
+use constant OPERANDS => 1;
+# general format
+use constant OPSIZE => 2;
+use constant OPCODE => 3;
+use constant EFFADDR => 4;
+use constant IMM => 5;
+use constant CPU => 6;
+# relative target format
+use constant ADSIZE => 2;
+use constant SHORTOPCODE => 3;
+use constant NEAROPCODE => 4;
+use constant SHORTCPU => 5;
+use constant NEARCPU => 6;
use constant TOO_MANY_ERRORS => 20;
reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
mem8 mem16 mem32 mem1632 mem64 mem80 mem128
mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
+ target memfar
);
my $valid_opcodes = join '|', qw(
[0-9A-F]{2}
# i still say changing instrs.dat would be better ;)
$args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
- my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi;
- die "Invalid Operation Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Opcode\n"
- if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
- die "Invalid Effective Address\n"
- if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
- die "Invalid Immediate Operand\n"
- if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
- die "Invalid CPU\n"
- if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ # detect relative target format by looking for "target" in args
+ if($args =~ m/target/oi)
+ {
+ my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
+ split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Address Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Short Opcode\n"
+ if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Near Opcode\n"
+ if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Short CPU\n"
+ if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ die "Invalid Near CPU\n"
+ if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
+ } else {
+ my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Operation Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Opcode\n"
+ if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
+ die "Invalid Effective Address\n"
+ if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
+ die "Invalid Immediate Operand\n"
+ if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
+ die "Invalid CPU\n"
+ if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ }
}
sub add_group_member ($$$$$)
(@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
my $count = 0;
foreach my $inst (@{$groups->{$group}{rules}}) {
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args
-
- my $func = "BuildBC_Insn";
-
- # Create the argument list for BuildBC
- my @args;
-
- # First argument is always &$$
- push @args, '&$$,';
-
- # opcode size
- push @args, "$inst->[OPSIZE],";
- $args[-1] =~ s/nil/0/;
-
- # number of bytes of opcodes
- push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
-
- # opcode piece 1 (and 2 if attached)
- push @args, $inst->[OPCODE];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- # don't match $0.\d in the following rule.
- $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
- $args[-1] .= ',';
-
- # opcode piece 2 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,/o;
- # opcode piece 3 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
-
- # effective addresses
- push @args, $inst->[EFFADDR];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(effaddr *)NULL, 0/;
- # don't let a $0.\d match slip into the following rules.
- $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
- $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
- ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
- $args[-1] .= ',';
-
- die $args[-1] if $args[-1] =~ m/\d+[ri]/;
-
- # immediate sources
- push @args, $inst->[IMM];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(immval *)NULL, 0/;
- # don't match $0.\d in the following rules.
- $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s[^([0-9A-Fa-f]+),]
- [ConvertIntToImm((immval *)NULL, 0x$1),];
- $args[-1] =~ s[^\$0.(\d+),]
- [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
-
- # divide the second, and only the second, by 8 bits/byte
- $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
- $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
-
- $args[-1] =~ s/(\&\$\d+)(r)?/$1/;
- $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0';
-
- die $args[-1] if $args[-1] =~ m/\d+[ris]/;
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # see if we match one of the cases to defer
- if (($inst->[OPERANDS]||"") =~ m/,ONE/)
- {
- $ONE = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
- {
- $AL = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
- {
- $AX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
- {
- $EAX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
- {
- $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
- {
- $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
- {
- $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ if($inst->[OPERANDS] =~ m/target/oi)
{
- $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
- }
+ # relative target format
+ # build the instruction in pieces.
- # or if we've deferred and we match the folding version
- elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
- {
- my $immarg = get_token_number ($tokens, "imm8");
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
- $ONE->[4] = 1;
- print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
- }
- elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
- {
- $AL->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg8");
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ my $func = "BuildBC_JmpRel";
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
- }
- elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
- {
- $AX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg16");
+ # Create the argument list for BuildBC
+ my @args;
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
- }
- elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
- {
- $EAX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg32");
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # Target argument: HACK: Always assumed to be arg 1.
+ push @args, '&$2,';
+
+ # test for short opcode "nil"
+ if($inst->[SHORTOPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of short opcode
+ push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[SHORTOPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
+ }
+
+ # test for near opcode "nil"
+ if($inst->[NEAROPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of near opcode
+ push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[NEAROPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
+ }
+
+ # address size
+ push @args, "$inst->[ADSIZE]";
+ $args[-1] =~ s/nil/0/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ # generate the grammar
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
}
- elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ else
{
- my $first = 1;
- for (my $i=0; $i < @XCHG_AX; ++$i)
+ # general instruction format
+ # build the instruction in pieces.
+
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
+
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ # offset args
+ my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
+ my $func = "BuildBC_Insn";
+
+ # Create the argument list for BuildBC
+ my @args;
+
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # operand size
+ push @args, "$inst->[OPSIZE],";
+ $args[-1] =~ s/nil/0/;
+
+ # number of bytes of opcodes
+ push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[OPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
+
+ # effective addresses
+ push @args, $inst->[EFFADDR];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
+ $args[-1] =~ s/nil/0/;
+ # don't let a $0.\d match slip into the following rules.
+ $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
+ $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
+ ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
+ $args[-1] .= ',';
+
+ die $args[-1] if $args[-1] =~ m/\d+[ri]/;
+
+ # immediate sources
+ push @args, $inst->[IMM];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/nil/(immval *)NULL, 0/;
+ # don't match $0.\d in the following rules.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s[^([0-9A-Fa-f]+),]
+ [ConvertIntToImm((immval *)NULL, 0x$1),];
+ $args[-1] =~ s[^\$0.(\d+),]
+ [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
+
+ # divide the second, and only the second, by 8 bits/byte
+ $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
+ $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
+
+ die $args[-1] if $args[-1] =~ m/\d+s/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
+
+ # see if we match one of the cases to defer
+ if (($inst->[OPERANDS]||"") =~ m/,ONE/)
+ {
+ $ONE = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
+ {
+ $AL = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
+ {
+ $AX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
+ {
+ $EAX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
+ {
+ $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
+ {
+ $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
+ {
+ $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ {
+ $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
+ }
+
+ # or if we've deferred and we match the folding version
+ elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
+ {
+ my $immarg = get_token_number ($tokens, "imm8");
+
+ $ONE->[4] = 1;
+ print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
+ }
+ elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
+ {
+ $AL->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg8");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
+ }
+ elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
{
- if($XCHG_AX[$i])
+ $AX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg16");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
+ }
+ elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
+ {
+ $EAX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg32");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ }
+ elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ {
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_AX; ++$i)
{
- $XCHG_AX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg16
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg16") + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
- $first = 0;
- }
- else
+ if($XCHG_AX[$i])
{
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $XCHG_AX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg16
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg16")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_EAX; ++$i)
+ elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
{
- if($XCHG_EAX[$i])
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_EAX; ++$i)
{
- $XCHG_EAX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg32
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg32") + $i*2;
-
- if ($first)
+ if($XCHG_EAX[$i])
{
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $XCHG_EAX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg32
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg32")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- # otherwise, generate the normal version
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ # otherwise, generate the normal version
+ else
+ {
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ }
}
}
-/* $Id: nasm-bison.y,v 1.19 2001/07/06 06:25:53 mu Exp $
+/* $Id: nasm-bison.y,v 1.20 2001/07/11 04:07:10 peter Exp $
* Main bison parser
*
* Copyright (C) 2001 Peter Johnson, Michael Urman
char *name;
int line;
} syminfo;
- unsigned char groupdata[3];
+ unsigned char groupdata[4];
effaddr ea_val;
expr *exp;
immval im_val;
+ targetval tgt_val;
bytecode bc;
}
%type <bc> line exp instr instrbase label
%type <int_val> fpureg reg32 reg16 reg8 segreg
-%type <ea_val> mem memaddr memexp
+%type <ea_val> mem memaddr memexp memfar
%type <ea_val> mem8x mem16x mem32x mem64x mem80x mem128x
%type <ea_val> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
%type <ea_val> rm8x rm16x rm32x /*rm64x rm128x*/
%type <exp> expr
%type <syminfo> explabel
%type <sym> label_id
+%type <tgt_val> target
%left '|'
%left '^'
mem128x: DQWORD mem { $$ = $2; }
;
+/* FAR memory, for jmp and call */
+memfar: FAR mem { $$ = $2; }
+;
+
/* implicit memory */
mem8: mem
| mem8x
| imm32x
;
+/* jump targets */
+target: explabel {
+ $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL)));
+ $$.op_sel = JR_NONE;
+ }
+ | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
+ | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+;
+
/* expression trees */
expr:
INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }
-/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $
+/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $
* Main bison parser
*
* Copyright (C) 2001 Peter Johnson, Michael Urman
char *name;
int line;
} syminfo;
- unsigned char groupdata[3];
+ unsigned char groupdata[4];
effaddr ea_val;
expr *exp;
immval im_val;
+ targetval tgt_val;
bytecode bc;
}
%type <bc> line exp instr instrbase label
%type <int_val> fpureg reg32 reg16 reg8 segreg
-%type <ea_val> mem memaddr memexp
+%type <ea_val> mem memaddr memexp memfar
%type <ea_val> mem8x mem16x mem32x mem64x mem80x mem128x
%type <ea_val> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
%type <ea_val> rm8x rm16x rm32x /*rm64x rm128x*/
%type <exp> expr
%type <syminfo> explabel
%type <sym> label_id
+%type <tgt_val> target
%left '|'
%left '^'
mem128x: DQWORD mem { $$ = $2; }
;
+/* FAR memory, for jmp and call */
+memfar: FAR mem { $$ = $2; }
+;
+
/* implicit memory */
mem8: mem
| mem8x
| imm32x
;
+/* jump targets */
+target: explabel {
+ $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL)));
+ $$.op_sel = JR_NONE;
+ }
+ | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
+ | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+;
+
/* expression trees */
expr:
INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }
-/* $Id: bytecode.c,v 1.11 2001/07/06 06:25:53 mu Exp $
+/* $Id: bytecode.c,v 1.12 2001/07/11 04:07:10 peter Exp $
* Bytecode utility functions
*
* Copyright (C) 2001 Peter Johnson
else
ptr->len = 4;
- ptr->isrel = 0;
ptr->isneg = 0;
return ptr;
ptr->val = expr_ptr;
- ptr->isrel = 0;
ptr->isneg = 0;
return ptr;
if(!bc)
return;
- bc->data.insn.opersize = opersize;
+ switch(bc->type) {
+ case BC_INSN:
+ bc->data.insn.opersize = opersize;
+ break;
+ case BC_JMPREL:
+ bc->data.jmprel.opersize = opersize;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "OperSize override applied to non-instruction");
+ return;
+ }
}
void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize)
if(!bc)
return;
- bc->data.insn.addrsize = addrsize;
+ switch(bc->type) {
+ case BC_INSN:
+ bc->data.insn.addrsize = addrsize;
+ break;
+ case BC_JMPREL:
+ bc->data.jmprel.addrsize = addrsize;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "AddrSize override applied to non-instruction");
+ return;
+ }
}
void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix)
{
+ unsigned char *lockrep_pre = (unsigned char *)NULL;
+
if(!bc)
return;
- if(bc->data.insn.lockrep_pre != 0)
+ switch(bc->type) {
+ case BC_INSN:
+ lockrep_pre = &bc->data.insn.lockrep_pre;
+ break;
+ case BC_JMPREL:
+ lockrep_pre = &bc->data.jmprel.lockrep_pre;
+ break;
+ default:
+ InternalError(__LINE__, __FILE__,
+ "LockRep prefix applied to non-instruction");
+ return;
+ }
+
+ if(*lockrep_pre != 0)
Warning(WARN_MULT_LOCKREP_PREFIX, (char *)NULL);
- bc->data.insn.lockrep_pre = prefix;
+ *lockrep_pre = prefix;
+}
+
+void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel)
+{
+ if(!old_sel)
+ return;
+
+ if((*old_sel == JR_SHORT_FORCED) || (*old_sel == JR_NEAR_FORCED))
+ Warning(WARN_MULT_SHORTNEAR, (char *)NULL);
+ *old_sel = new_sel;
+}
+
+static void BuildBC_Common(bytecode *bc)
+{
+ bc->len = 0;
+
+ bc->filename = (char *)NULL;
+ bc->lineno = line_number;
+
+ bc->offset = 0;
+ bc->mode_bits = mode_bits;
}
void BuildBC_Insn(bytecode *bc,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
- unsigned char im_sign,
- unsigned char im_rel)
+ unsigned char im_sign)
{
bc->next = (bytecode *)NULL;
bc->type = BC_INSN;
if(im_ptr) {
bc->data.insn.imm = *im_ptr;
- bc->data.insn.imm.f_rel = im_rel;
bc->data.insn.imm.f_sign = im_sign;
bc->data.insn.imm.f_len = im_len;
} else {
bc->data.insn.imm.len = 0;
- bc->data.insn.imm.f_rel = 0;
bc->data.insn.imm.f_sign = 0;
bc->data.insn.imm.f_len = 0;
}
bc->data.insn.addrsize = 0;
bc->data.insn.opersize = opersize;
+ bc->data.insn.lockrep_pre = 0;
- bc->len = 0;
+ BuildBC_Common(bc);
+}
- bc->filename = (char *)NULL;
- bc->lineno = line_number;
+void BuildBC_JmpRel(bytecode *bc,
+ targetval *target,
+ unsigned char short_valid,
+ unsigned char short_opcode_len,
+ unsigned char short_op0,
+ unsigned char short_op1,
+ unsigned char short_op2,
+ unsigned char near_valid,
+ unsigned char near_opcode_len,
+ unsigned char near_op0,
+ unsigned char near_op1,
+ unsigned char near_op2,
+ unsigned char addrsize)
+{
+ bc->next = (bytecode *)NULL;
+ bc->type = BC_JMPREL;
+
+ bc->data.jmprel.target = target->val;
+ bc->data.jmprel.op_sel = target->op_sel;
+
+ if((target->op_sel == JR_SHORT_FORCED) && (!short_valid))
+ Error(ERR_NO_JMPREL_FORM, (char *)NULL, "SHORT");
+ if((target->op_sel == JR_NEAR_FORCED) && (!near_valid))
+ Error(ERR_NO_JMPREL_FORM, (char *)NULL, "NEAR");
+
+ bc->data.jmprel.shortop.valid = short_valid;
+ if(short_valid) {
+ bc->data.jmprel.shortop.opcode[0] = short_op0;
+ bc->data.jmprel.shortop.opcode[1] = short_op1;
+ bc->data.jmprel.shortop.opcode[2] = short_op2;
+ bc->data.jmprel.shortop.opcode_len = short_opcode_len;
+ }
- bc->offset = 0;
- bc->mode_bits = mode_bits;
+ bc->data.jmprel.nearop.valid = near_valid;
+ if(near_valid) {
+ bc->data.jmprel.nearop.opcode[0] = near_op0;
+ bc->data.jmprel.nearop.opcode[1] = near_op1;
+ bc->data.jmprel.nearop.opcode[2] = near_op2;
+ bc->data.jmprel.nearop.opcode_len = near_opcode_len;
+ }
+
+ bc->data.jmprel.addrsize = addrsize;
+ bc->data.jmprel.opersize = 0;
+ bc->data.jmprel.lockrep_pre = 0;
+
+ BuildBC_Common(bc);
}
-
+
/* TODO: implement. Shouldn't be difficult. */
unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len)
{
(unsigned int)bc->data.insn.ea.sib,
(unsigned int)bc->data.insn.ea.valid_sib,
(unsigned int)bc->data.insn.ea.need_sib);
- printf("Immediate/Relative Value:\n");
+ printf("Immediate Value:\n");
printf(" Val=");
if (!bc->data.insn.imm.val)
printf("(nil)");
else
expr_print(bc->data.insn.imm.val);
printf("\n");
- printf(" Len=%u, IsRel=%u, IsNeg=%u\n",
+ printf(" Len=%u, IsNeg=%u\n",
(unsigned int)bc->data.insn.imm.len,
- (unsigned int)bc->data.insn.imm.isrel,
(unsigned int)bc->data.insn.imm.isneg);
- printf(" FLen=%u, FRel=%u, FSign=%u\n",
+ printf(" FLen=%u, FSign=%u\n",
(unsigned int)bc->data.insn.imm.f_len,
- (unsigned int)bc->data.insn.imm.f_rel,
(unsigned int)bc->data.insn.imm.f_sign);
- printf("Opcode: %2x %2x OpLen=%u\n",
+ printf("Opcode: %2x %2x %2x OpLen=%u\n",
(unsigned int)bc->data.insn.opcode[0],
(unsigned int)bc->data.insn.opcode[1],
+ (unsigned int)bc->data.insn.opcode[2],
(unsigned int)bc->data.insn.opcode_len);
- printf("OperSize=%u LockRepPre=%2x\n",
+ printf("AddrSize=%u OperSize=%u LockRepPre=%2x\n",
+ (unsigned int)bc->data.insn.addrsize,
(unsigned int)bc->data.insn.opersize,
(unsigned int)bc->data.insn.lockrep_pre);
break;
+ case BC_JMPREL:
+ printf("_Relative Jump_\n");
+ printf("Target=");
+ expr_print(bc->data.jmprel.target);
+ printf("\nShort Form:\n");
+ if(!bc->data.jmprel.shortop.valid)
+ printf(" None\n");
+ else
+ printf(" Opcode: %2x %2x %2x OpLen=%u\n",
+ (unsigned int)bc->data.jmprel.shortop.opcode[0],
+ (unsigned int)bc->data.jmprel.shortop.opcode[1],
+ (unsigned int)bc->data.jmprel.shortop.opcode[2],
+ (unsigned int)bc->data.jmprel.shortop.opcode_len);
+ if(!bc->data.jmprel.nearop.valid)
+ printf(" None\n");
+ else
+ printf(" Opcode: %2x %2x %2x OpLen=%u\n",
+ (unsigned int)bc->data.jmprel.nearop.opcode[0],
+ (unsigned int)bc->data.jmprel.nearop.opcode[1],
+ (unsigned int)bc->data.jmprel.nearop.opcode[2],
+ (unsigned int)bc->data.jmprel.nearop.opcode_len);
+ printf("OpSel=");
+ switch(bc->data.jmprel.op_sel) {
+ case JR_NONE: printf("None"); break;
+ case JR_SHORT: printf("Short"); break;
+ case JR_NEAR: printf("Near"); break;
+ case JR_SHORT_FORCED: printf("Forced Short"); break;
+ case JR_NEAR_FORCED: printf("Forced Near"); break;
+ default: printf("UNKNOWN!!"); break;
+ }
+ printf("\nAddrSize=%u OperSize=%u LockRepPre=%2x\n",
+ (unsigned int)bc->data.jmprel.addrsize,
+ (unsigned int)bc->data.jmprel.opersize,
+ (unsigned int)bc->data.jmprel.lockrep_pre);
+ break;
case BC_DATA:
printf("_Data_\n");
for(i=0; i<bc->len; i++) {
-/* $Id: bytecode.h,v 1.12 2001/07/06 06:25:53 mu Exp $
+/* $Id: bytecode.h,v 1.13 2001/07/11 04:07:10 peter Exp $
* Bytecode utility functions header file
*
* Copyright (C) 2001 Peter Johnson
struct expr_s *val;
unsigned char len; /* length of val (in bytes), 0 if none */
- unsigned char isrel;
unsigned char isneg; /* the value has been explicitly negated */
unsigned char f_len; /* final imm length */
- unsigned char f_rel; /* 1 if final imm should be rel */
unsigned char f_sign; /* 1 if final imm should be signed */
} immval;
+typedef enum jmprel_opcode_sel_e {
+ JR_NONE,
+ JR_SHORT,
+ JR_NEAR,
+ JR_SHORT_FORCED,
+ JR_NEAR_FORCED
+} jmprel_opcode_sel;
+
+typedef struct targetval_s {
+ struct expr_s *val;
+
+ jmprel_opcode_sel op_sel;
+} targetval;
+
typedef struct bytecode_s {
struct bytecode_s *next;
- enum { BC_INSN, BC_DATA, BC_RESERVE } type;
+ enum { BC_INSN, BC_JMPREL, BC_DATA, BC_RESERVE } type;
union {
struct {
unsigned char opersize; /* 0 indicates no override */
unsigned char lockrep_pre; /* 0 indicates no prefix */
} insn;
+ struct {
+ struct expr_s *target; /* target location */
+
+ struct {
+ unsigned char opcode[3];
+ unsigned char opcode_len;
+ unsigned char valid; /* does the opcode exist? */
+ } shortop, nearop;
+
+ /* which opcode are we using? */
+ /* The *FORCED forms are specified in the source as such */
+ jmprel_opcode_sel op_sel;
+
+ unsigned char addrsize; /* 0 indicates no override */
+ unsigned char opersize; /* 0 indicates no override */
+ unsigned char lockrep_pre; /* 0 indicates no prefix */
+ } jmprel;
struct {
unsigned char *data;
} data;
void SetInsnAddrSizeOverride(bytecode *bc, unsigned char addrsize);
void SetInsnLockRepPrefix(bytecode *bc, unsigned char prefix);
+void SetOpcodeSel(jmprel_opcode_sel *old_sel, jmprel_opcode_sel new_sel);
+
void BuildBC_Insn(bytecode *bc,
unsigned char opersize,
unsigned char opcode_len,
unsigned char spare,
immval *im_ptr,
unsigned char im_len,
- unsigned char im_sign,
- unsigned char im_rel);
+ unsigned char im_sign);
+
+void BuildBC_JmpRel(bytecode *bc,
+ targetval *target,
+ unsigned char short_valid,
+ unsigned char short_opcode_len,
+ unsigned char short_op0,
+ unsigned char short_op1,
+ unsigned char short_op2,
+ unsigned char near_valid,
+ unsigned char near_opcode_len,
+ unsigned char near_op0,
+ unsigned char near_op1,
+ unsigned char near_op2,
+ unsigned char addrsize);
unsigned char *ConvertBCInsnToBytes(unsigned char *ptr, bytecode *bc, int *len);
-/* $Id: errwarn.c,v 1.14 2001/07/04 20:57:53 peter Exp $
+/* $Id: errwarn.c,v 1.15 2001/07/11 04:07:10 peter Exp $
* Error and warning reporting and related functions.
*
* Copyright (C) 2001 Peter Johnson
"label or instruction expected at start of line",
"expression syntax error",
"duplicate definition of `%1'; previously defined line %2",
- "mismatch in operand sizes"
+ "mismatch in operand sizes",
+ "no %s form of that jump instruction exists"
};
static char *warn_msgs[] = {
"%s value exceeds bounds",
"multiple segment overrides, using leftmost",
"multiple LOCK or REP prefixes, using leftmost",
- "no non-local label before '%s'"
+ "no non-local label before '%s'",
+ "multiple SHORT or NEAR specifiers, using leftmost"
};
/* hate to define these as static buffers; better solution would be to use
Error(ERR_PARSER, (char *)NULL, s);
}
+void InternalError(unsigned int line, char *file, char *message)
+{
+ fprintf(stderr, "INTERNAL ERROR at %s, line %d: %s\n", file, line,
+ message);
+ exit(EXIT_FAILURE);
+}
+
void Fatal(fatal_num num)
{
fprintf(stderr, "FATAL: %s\n", fatal_msgs[num]);
-/* $Id: errwarn.h,v 1.8 2001/07/04 20:53:21 peter Exp $
+/* $Id: errwarn.h,v 1.9 2001/07/11 04:07:10 peter Exp $
* Error and warning reporting and related functions header file.
*
* Copyright (C) 2001 Peter Johnson
FATAL_NOMEM
} fatal_num;
+void InternalError(unsigned int line, char *file, char *message);
void Fatal(fatal_num);
typedef enum {
ERR_INVALID_LINE,
ERR_EXP_SYNTAX,
ERR_DUPLICATE_DEF,
- ERR_OP_SIZE_MISMATCH
+ ERR_OP_SIZE_MISMATCH,
+ ERR_NO_JMPREL_FORM
} err_num;
void Error(err_num, char *, ...);
WARN_VALUE_EXCEEDS_BOUNDS,
WARN_MULT_SEG_OVERRIDE,
WARN_MULT_LOCKREP_PREFIX,
- WARN_NO_BASE_LABEL
+ WARN_NO_BASE_LABEL,
+ WARN_MULT_SHORTNEAR
} warn_num;
void Warning(warn_num, char *, ...);
#!/usr/bin/perl -w
-# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $
+# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $
# Generates bison.y and token.l from instrs.dat for YASM
#
# Copyright (C) 2001 Michael Urman
my $VERSION = "0.0.1";
# useful constants for instruction arrays
-use constant INST => 0;
-use constant OPERANDS => 1;
-use constant OPSIZE => 2;
-use constant OPCODE => 3;
-use constant EFFADDR => 4;
-use constant IMM => 5;
-use constant CPU => 6;
+# common
+use constant INST => 0;
+use constant OPERANDS => 1;
+# general format
+use constant OPSIZE => 2;
+use constant OPCODE => 3;
+use constant EFFADDR => 4;
+use constant IMM => 5;
+use constant CPU => 6;
+# relative target format
+use constant ADSIZE => 2;
+use constant SHORTOPCODE => 3;
+use constant NEAROPCODE => 4;
+use constant SHORTCPU => 5;
+use constant NEARCPU => 6;
use constant TOO_MANY_ERRORS => 20;
reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
mem8 mem16 mem32 mem1632 mem64 mem80 mem128
mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
+ target memfar
);
my $valid_opcodes = join '|', qw(
[0-9A-F]{2}
# i still say changing instrs.dat would be better ;)
$args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
- my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi;
- die "Invalid Operation Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Opcode\n"
- if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
- die "Invalid Effective Address\n"
- if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
- die "Invalid Immediate Operand\n"
- if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
- die "Invalid CPU\n"
- if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ # detect relative target format by looking for "target" in args
+ if($args =~ m/target/oi)
+ {
+ my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
+ split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Address Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Short Opcode\n"
+ if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Near Opcode\n"
+ if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Short CPU\n"
+ if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ die "Invalid Near CPU\n"
+ if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
+ } else {
+ my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Operation Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Opcode\n"
+ if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
+ die "Invalid Effective Address\n"
+ if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
+ die "Invalid Immediate Operand\n"
+ if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
+ die "Invalid CPU\n"
+ if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ }
}
sub add_group_member ($$$$$)
(@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
my $count = 0;
foreach my $inst (@{$groups->{$group}{rules}}) {
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args
-
- my $func = "BuildBC_Insn";
-
- # Create the argument list for BuildBC
- my @args;
-
- # First argument is always &$$
- push @args, '&$$,';
-
- # opcode size
- push @args, "$inst->[OPSIZE],";
- $args[-1] =~ s/nil/0/;
-
- # number of bytes of opcodes
- push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
-
- # opcode piece 1 (and 2 if attached)
- push @args, $inst->[OPCODE];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- # don't match $0.\d in the following rule.
- $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
- $args[-1] .= ',';
-
- # opcode piece 2 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,/o;
- # opcode piece 3 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
-
- # effective addresses
- push @args, $inst->[EFFADDR];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(effaddr *)NULL, 0/;
- # don't let a $0.\d match slip into the following rules.
- $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
- $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
- ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
- $args[-1] .= ',';
-
- die $args[-1] if $args[-1] =~ m/\d+[ri]/;
-
- # immediate sources
- push @args, $inst->[IMM];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(immval *)NULL, 0/;
- # don't match $0.\d in the following rules.
- $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s[^([0-9A-Fa-f]+),]
- [ConvertIntToImm((immval *)NULL, 0x$1),];
- $args[-1] =~ s[^\$0.(\d+),]
- [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
-
- # divide the second, and only the second, by 8 bits/byte
- $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
- $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
-
- $args[-1] =~ s/(\&\$\d+)(r)?/$1/;
- $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0';
-
- die $args[-1] if $args[-1] =~ m/\d+[ris]/;
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # see if we match one of the cases to defer
- if (($inst->[OPERANDS]||"") =~ m/,ONE/)
- {
- $ONE = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
- {
- $AL = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
- {
- $AX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
- {
- $EAX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
- {
- $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
- {
- $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
- {
- $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ if($inst->[OPERANDS] =~ m/target/oi)
{
- $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
- }
+ # relative target format
+ # build the instruction in pieces.
- # or if we've deferred and we match the folding version
- elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
- {
- my $immarg = get_token_number ($tokens, "imm8");
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
- $ONE->[4] = 1;
- print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
- }
- elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
- {
- $AL->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg8");
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ my $func = "BuildBC_JmpRel";
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
- }
- elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
- {
- $AX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg16");
+ # Create the argument list for BuildBC
+ my @args;
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
- }
- elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
- {
- $EAX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg32");
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # Target argument: HACK: Always assumed to be arg 1.
+ push @args, '&$2,';
+
+ # test for short opcode "nil"
+ if($inst->[SHORTOPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of short opcode
+ push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[SHORTOPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
+ }
+
+ # test for near opcode "nil"
+ if($inst->[NEAROPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of near opcode
+ push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[NEAROPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
+ }
+
+ # address size
+ push @args, "$inst->[ADSIZE]";
+ $args[-1] =~ s/nil/0/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ # generate the grammar
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
}
- elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ else
{
- my $first = 1;
- for (my $i=0; $i < @XCHG_AX; ++$i)
+ # general instruction format
+ # build the instruction in pieces.
+
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
+
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ # offset args
+ my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
+ my $func = "BuildBC_Insn";
+
+ # Create the argument list for BuildBC
+ my @args;
+
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # operand size
+ push @args, "$inst->[OPSIZE],";
+ $args[-1] =~ s/nil/0/;
+
+ # number of bytes of opcodes
+ push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[OPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
+
+ # effective addresses
+ push @args, $inst->[EFFADDR];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
+ $args[-1] =~ s/nil/0/;
+ # don't let a $0.\d match slip into the following rules.
+ $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
+ $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
+ ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
+ $args[-1] .= ',';
+
+ die $args[-1] if $args[-1] =~ m/\d+[ri]/;
+
+ # immediate sources
+ push @args, $inst->[IMM];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/nil/(immval *)NULL, 0/;
+ # don't match $0.\d in the following rules.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s[^([0-9A-Fa-f]+),]
+ [ConvertIntToImm((immval *)NULL, 0x$1),];
+ $args[-1] =~ s[^\$0.(\d+),]
+ [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
+
+ # divide the second, and only the second, by 8 bits/byte
+ $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
+ $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
+
+ die $args[-1] if $args[-1] =~ m/\d+s/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
+
+ # see if we match one of the cases to defer
+ if (($inst->[OPERANDS]||"") =~ m/,ONE/)
+ {
+ $ONE = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
+ {
+ $AL = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
+ {
+ $AX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
+ {
+ $EAX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
+ {
+ $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
+ {
+ $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
+ {
+ $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ {
+ $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
+ }
+
+ # or if we've deferred and we match the folding version
+ elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
+ {
+ my $immarg = get_token_number ($tokens, "imm8");
+
+ $ONE->[4] = 1;
+ print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
+ }
+ elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
+ {
+ $AL->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg8");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
+ }
+ elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
{
- if($XCHG_AX[$i])
+ $AX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg16");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
+ }
+ elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
+ {
+ $EAX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg32");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ }
+ elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ {
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_AX; ++$i)
{
- $XCHG_AX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg16
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg16") + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
- $first = 0;
- }
- else
+ if($XCHG_AX[$i])
{
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $XCHG_AX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg16
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg16")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_EAX; ++$i)
+ elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
{
- if($XCHG_EAX[$i])
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_EAX; ++$i)
{
- $XCHG_EAX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg32
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg32") + $i*2;
-
- if ($first)
+ if($XCHG_EAX[$i])
{
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $XCHG_EAX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg32
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg32")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- # otherwise, generate the normal version
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ # otherwise, generate the normal version
+ else
+ {
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ }
}
}
-; $Id: instrs.dat,v 1.29 2001/07/05 09:39:30 peter Exp $
+; $Id: instrs.dat,v 1.30 2001/07/11 04:07:11 peter Exp $
; List of valid instruction/operand combinations
;
; Copyright (C) 2001 Peter Johnson
; $xr indicates operand is register, not ModRM (needs convert to RM)
; $xi indicates operand is immediate (2nd parm is size in bits)
; Imm - Immediate source operand and forced size (in bits).
-; $xr means relative displacement needed
; "s" after size indicates signed number
; A number instead of a $x is a hex constant value.
;
;
; Instructions are listed in the same order as that in GNU binutils
; /include/opcode/i386.h, used for the GAS assembler. See
-; <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h>.
+; <http://sources.redhat.com/cgi-bin/cvsweb.cgi/src/include/opcode/i386.h?cvsroot=src>.
;
; TODO:
; Finish instructions (may require changing parser code).
;
; Control transfer instructions (unconditional)
;
-; call
-; jmp
+; Special format for relative targets:
+; !Grp/Inst target AdSize ShrtOp NearOp ShrtCPU NearCPU
+;
+!jmpcall target nil $0.1 $0.2 8086 8086
+!jmpcall imm:imm nil $0.3 $2i,nil $1,16 8086
+!jmpcall WORD imm:imm 16 $0.3 $2i,16 $1,16 8086
+!jmpcall DWORD imm:imm 32 $0.3 $2i,32 $1,16 386
+!jmpcall memfar nil FF $1,$0.4+1 nil 8086
+!jmpcall WORD memfar 16 FF $1,$0.4+1 nil 8086
+!jmpcall DWORD memfar 32 FF $1,$0.4+1 nil 386
+!jmpcall mem nil FF $1,$0.4 nil 8086
+!jmpcall rm16x 16 FF $1,$0.4 nil 8086
+!jmpcall rm32x 32 FF $1,$0.4 nil 386
+call!jmpcall nil,E8,9A,2
+jmp!jmpcall EB,E9,EA,4
ret!onebyte nil,C3 8086
retn nil nil C3 nil nil 8086
retf nil nil CB nil nil 8086
;
; Conditional jumps
;
-; jcc
-; jcxz
-; jecxz
+!jcc target nil 70+$0.1 0F,80+$0.1 8086 386
+jo!jcc 0
+jno!jcc 1
+jb!jcc 2
+jc!jcc 2
+jnae!jcc 2
+jnb!jcc 3
+jnc!jcc 3
+jae!jcc 3
+je!jcc 4
+jz!jcc 4
+jne!jcc 5
+jnz!jcc 5
+jbe!jcc 6
+jna!jcc 6
+jnbe!jcc 7
+ja!jcc 7
+js!jcc 8
+jns!jcc 9
+jp!jcc A
+jpe!jcc A
+jnp!jcc B
+jpo!jcc B
+jl!jcc C
+jnge!jcc C
+jnl!jcc D
+jge!jcc D
+jle!jcc E
+jng!jcc E
+jnle!jcc F
+jg!jcc F
+jcxz target 16 E3 nil 8086 8086
+jecxz target 32 E3 nil 386 386
;
; Loop instructions
;
-; loop
-; loopcc:
-;:loope loopz
-;loopz imm1632 nil E1 nil $1r,8s 8086
-;loopz imm1632,REG_CX 16 E1 nil $1r,8s 8086
-;loopz imm1632,REG_ECX 32 E1 nil $1r,8s 386
-;:loopne loopnz
-;loopnz imm1632 nil E1 nil $1r,8s 8086
-;loopnz imm1632,REG_CX 16 E1 nil $1r,8s 8086
-;loopnz imm1632,REG_ECX 32 E1 nil $1r,8s 386
+!loopg target nil E0+$0.1 nil 8086 8086
+!loopg target,REG_CX 16 E0+$0.1 nil 8086 8086
+!loopg target,REG_ECX 32 E0+$0.1 nil 386 386
+loop!loopg 2
+loopz!loopg 1
+loope!loopg 1
+loopnz!loopg 0
+loopne!loopg 0
;
; Set byte on flag instructions
;
-; setcc
+!setcc rm8 nil 0F,90+$0.1 $1,2 nil 386
+seto!setcc 0
+setno!setcc 1
+setb!setcc 2
+setc!setcc 2
+setnae!setcc 2
+setnb!setcc 3
+setnc!setcc 3
+setae!setcc 3
+sete!setcc 4
+setz!setcc 4
+setne!setcc 5
+setnz!setcc 5
+setbe!setcc 6
+setna!setcc 6
+setnbe!setcc 7
+seta!setcc 7
+sets!setcc 8
+setns!setcc 9
+setp!setcc A
+setpe!setcc A
+setnp!setcc B
+setpo!setcc B
+setl!setcc C
+setnge!setcc C
+setnl!setcc D
+setge!setcc D
+setle!setcc E
+setng!setcc E
+setnle!setcc F
+setg!setcc F
;
; String instructions
;
-/* $Id: bison.y.in,v 1.19 2001/07/06 06:25:53 mu Exp $
+/* $Id: bison.y.in,v 1.20 2001/07/11 04:07:10 peter Exp $
* Main bison parser
*
* Copyright (C) 2001 Peter Johnson, Michael Urman
char *name;
int line;
} syminfo;
- unsigned char groupdata[3];
+ unsigned char groupdata[4];
effaddr ea_val;
expr *exp;
immval im_val;
+ targetval tgt_val;
bytecode bc;
}
%type <bc> line exp instr instrbase label
%type <int_val> fpureg reg32 reg16 reg8 segreg
-%type <ea_val> mem memaddr memexp
+%type <ea_val> mem memaddr memexp memfar
%type <ea_val> mem8x mem16x mem32x mem64x mem80x mem128x
%type <ea_val> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
%type <ea_val> rm8x rm16x rm32x /*rm64x rm128x*/
%type <exp> expr
%type <syminfo> explabel
%type <sym> label_id
+%type <tgt_val> target
%left '|'
%left '^'
mem128x: DQWORD mem { $$ = $2; }
;
+/* FAR memory, for jmp and call */
+memfar: FAR mem { $$ = $2; }
+;
+
/* implicit memory */
mem8: mem
| mem8x
| imm32x
;
+/* jump targets */
+target: explabel {
+ $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL)));
+ $$.op_sel = JR_NONE;
+ }
+ | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
+ | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+;
+
/* expression trees */
expr:
INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }
#!/usr/bin/perl -w
-# $Id: gen_instr.pl,v 1.17 2001/07/09 05:30:55 mu Exp $
+# $Id: gen_instr.pl,v 1.18 2001/07/11 04:07:11 peter Exp $
# Generates bison.y and token.l from instrs.dat for YASM
#
# Copyright (C) 2001 Michael Urman
my $VERSION = "0.0.1";
# useful constants for instruction arrays
-use constant INST => 0;
-use constant OPERANDS => 1;
-use constant OPSIZE => 2;
-use constant OPCODE => 3;
-use constant EFFADDR => 4;
-use constant IMM => 5;
-use constant CPU => 6;
+# common
+use constant INST => 0;
+use constant OPERANDS => 1;
+# general format
+use constant OPSIZE => 2;
+use constant OPCODE => 3;
+use constant EFFADDR => 4;
+use constant IMM => 5;
+use constant CPU => 6;
+# relative target format
+use constant ADSIZE => 2;
+use constant SHORTOPCODE => 3;
+use constant NEAROPCODE => 4;
+use constant SHORTCPU => 5;
+use constant NEARCPU => 6;
use constant TOO_MANY_ERRORS => 20;
reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
mem8 mem16 mem32 mem1632 mem64 mem80 mem128
mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
+ target memfar
);
my $valid_opcodes = join '|', qw(
[0-9A-F]{2}
# i still say changing instrs.dat would be better ;)
$args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;
- my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
- eval {
- die "Invalid group name\n"
- if $inst !~ m/^!\w+$/o;
- die "Invalid Operands\n"
- if $op !~ m/^(nil|(TO\s)?(?:$valid_regs)(,(?:$valid_regs)){0,2})$/oi;
- die "Invalid Operation Size\n"
- if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
- die "Invalid Opcode\n"
- if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
- die "Invalid Effective Address\n"
- if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
- die "Invalid Immediate Operand\n"
- if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
- die "Invalid CPU\n"
- if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
- };
- push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
- die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
- # knock the ! off of $inst for the groupname
- $inst = substr $inst, 1;
- push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ # detect relative target format by looking for "target" in args
+ if($args =~ m/target/oi)
+ {
+ my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
+ split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Address Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Short Opcode\n"
+ if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Near Opcode\n"
+ if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
+ die "Invalid Short CPU\n"
+ if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ die "Invalid Near CPU\n"
+ if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
+ } else {
+ my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
+ eval {
+ die "Invalid group name\n"
+ if $inst !~ m/^!\w+$/o;
+ die "Invalid Operands\n"
+ if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
+ die "Invalid Operation Size\n"
+ if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
+ die "Invalid Opcode\n"
+ if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
+ die "Invalid Effective Address\n"
+ if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
+ die "Invalid Immediate Operand\n"
+ if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
+ die "Invalid CPU\n"
+ if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
+ };
+ push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
+ die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
+ # knock the ! off of $inst for the groupname
+ $inst = substr $inst, 1;
+ push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
+ }
}
sub add_group_member ($$$$$)
(@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
my $count = 0;
foreach my $inst (@{$groups->{$group}{rules}}) {
- # build the instruction in pieces.
-
- # rulename = instruction
- my $rule = "$inst->[INST]";
-
- # tokens it eats: instruction and arguments
- # nil => no arguments
- my $tokens = "\Ugrp_$rule\E";
- $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil';
- $tokens =~ s/,/ ',' /g;
- my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args
-
- my $func = "BuildBC_Insn";
-
- # Create the argument list for BuildBC
- my @args;
-
- # First argument is always &$$
- push @args, '&$$,';
-
- # opcode size
- push @args, "$inst->[OPSIZE],";
- $args[-1] =~ s/nil/0/;
-
- # number of bytes of opcodes
- push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
-
- # opcode piece 1 (and 2 if attached)
- push @args, $inst->[OPCODE];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
- # don't match $0.\d in the following rule.
- $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
- $args[-1] .= ',';
-
- # opcode piece 2 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,/o;
- # opcode piece 3 (if not attached)
- push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
-
- # effective addresses
- push @args, $inst->[EFFADDR];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(effaddr *)NULL, 0/;
- # don't let a $0.\d match slip into the following rules.
- $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
- $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
- ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
- $args[-1] .= ',';
-
- die $args[-1] if $args[-1] =~ m/\d+[ri]/;
-
- # immediate sources
- push @args, $inst->[IMM];
- $args[-1] =~ s/,/, /;
- $args[-1] =~ s/nil/(immval *)NULL, 0/;
- # don't match $0.\d in the following rules.
- $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
- $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first!
- $args[-1] =~ s[^([0-9A-Fa-f]+),]
- [ConvertIntToImm((immval *)NULL, 0x$1),];
- $args[-1] =~ s[^\$0.(\d+),]
- [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
-
- # divide the second, and only the second, by 8 bits/byte
- $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
- $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
-
- $args[-1] =~ s/(\&\$\d+)(r)?/$1/;
- $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0';
-
- die $args[-1] if $args[-1] =~ m/\d+[ris]/;
-
- # now that we've constructed the arglist, subst $0.\d
- s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
-
- # see if we match one of the cases to defer
- if (($inst->[OPERANDS]||"") =~ m/,ONE/)
- {
- $ONE = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
- {
- $AL = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
- {
- $AX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
- {
- $EAX = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
- {
- $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
- {
- $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
- {
- $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
- }
- elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ if($inst->[OPERANDS] =~ m/target/oi)
{
- $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
- }
+ # relative target format
+ # build the instruction in pieces.
- # or if we've deferred and we match the folding version
- elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
- {
- my $immarg = get_token_number ($tokens, "imm8");
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
- $ONE->[4] = 1;
- print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
- }
- elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
- {
- $AL->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg8");
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ my $func = "BuildBC_JmpRel";
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
- }
- elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
- {
- $AX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg16");
+ # Create the argument list for BuildBC
+ my @args;
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
- }
- elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
- {
- $EAX->[4] = 1;
- my $regarg = get_token_number ($tokens, "reg32");
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # Target argument: HACK: Always assumed to be arg 1.
+ push @args, '&$2,';
+
+ # test for short opcode "nil"
+ if($inst->[SHORTOPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of short opcode
+ push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[SHORTOPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
+ }
+
+ # test for near opcode "nil"
+ if($inst->[NEAROPCODE] =~ m/nil/)
+ {
+ push @args, '0, 0, 0, 0, 0,';
+ }
+ else
+ {
+ # opcode is valid
+ push @args, '1,';
+
+ # number of bytes of near opcode
+ push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[NEAROPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
+ }
+
+ # address size
+ push @args, "$inst->[ADSIZE]";
+ $args[-1] =~ s/nil/0/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
- print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ # generate the grammar
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
}
- elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ else
{
- my $first = 1;
- for (my $i=0; $i < @XCHG_AX; ++$i)
+ # general instruction format
+ # build the instruction in pieces.
+
+ # rulename = instruction
+ my $rule = "$inst->[INST]";
+
+ # tokens it eats: instruction and arguments
+ # nil => no arguments
+ my $tokens = "\Ugrp_$rule\E";
+ $tokens .= " $inst->[OPERANDS]"
+ if $inst->[OPERANDS] ne 'nil';
+ $tokens =~ s/,/ ',' /g;
+ $tokens =~ s/:/ ':' /g;
+ # offset args
+ my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
+ my $func = "BuildBC_Insn";
+
+ # Create the argument list for BuildBC
+ my @args;
+
+ # First argument is always &$$
+ push @args, '&$$,';
+
+ # operand size
+ push @args, "$inst->[OPSIZE],";
+ $args[-1] =~ s/nil/0/;
+
+ # number of bytes of opcodes
+ push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";
+
+ # opcode piece 1 (and 2 and 3 if attached)
+ push @args, $inst->[OPCODE];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
+ # don't match $0.\d in the following rule.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
+ $args[-1] .= ',';
+
+ # opcode piece 2 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,/o;
+ # opcode piece 3 (if not attached)
+ push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;
+
+ # effective addresses
+ push @args, $inst->[EFFADDR];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
+ $args[-1] =~ s/nil/0/;
+ # don't let a $0.\d match slip into the following rules.
+ $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/;
+ $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)]
+ ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e;
+ $args[-1] .= ',';
+
+ die $args[-1] if $args[-1] =~ m/\d+[ri]/;
+
+ # immediate sources
+ push @args, $inst->[IMM];
+ $args[-1] =~ s/,/, /;
+ $args[-1] =~ s/nil/(immval *)NULL, 0/;
+ # don't match $0.\d in the following rules.
+ $args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
+ $args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first!
+ $args[-1] =~ s[^([0-9A-Fa-f]+),]
+ [ConvertIntToImm((immval *)NULL, 0x$1),];
+ $args[-1] =~ s[^\$0.(\d+),]
+ [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];
+
+ # divide the second, and only the second, by 8 bits/byte
+ $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
+ $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';
+
+ die $args[-1] if $args[-1] =~ m/\d+s/;
+
+ # now that we've constructed the arglist, subst $0.\d
+ s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);
+
+ # see if we match one of the cases to defer
+ if (($inst->[OPERANDS]||"") =~ m/,ONE/)
+ {
+ $ONE = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
+ {
+ $AL = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
+ {
+ $AX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
+ {
+ $EAX = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
+ {
+ $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
+ {
+ $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
+ {
+ $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
+ }
+ elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
+ {
+ $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
+ }
+
+ # or if we've deferred and we match the folding version
+ elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
+ {
+ my $immarg = get_token_number ($tokens, "imm8");
+
+ $ONE->[4] = 1;
+ print GRAMMAR cond_action ($rule, $tokens, $count++, "$immarg.val", 1, $func, $ONE->[3], \@args);
+ }
+ elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
+ {
+ $AL->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg8");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
+ }
+ elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
{
- if($XCHG_AX[$i])
+ $AX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg16");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
+ }
+ elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
+ {
+ $EAX->[4] = 1;
+ my $regarg = get_token_number ($tokens, "reg32");
+
+ print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
+ }
+ elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
+ {
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_AX; ++$i)
{
- $XCHG_AX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg16
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg16") + $i*2;
-
- if ($first)
- {
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
- $first = 0;
- }
- else
+ if($XCHG_AX[$i])
{
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $XCHG_AX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg16
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg16")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
- ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
- {
- my $first = 1;
- for (my $i=0; $i < @XCHG_EAX; ++$i)
+ elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
+ ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
{
- if($XCHG_EAX[$i])
+ my $first = 1;
+ for (my $i=0; $i < @XCHG_EAX; ++$i)
{
- $XCHG_EAX[$i]->[4] = 1;
- # This is definitely a hack. The "right" way
- # to do this would be to enhance
- # get_token_number to get the nth reg32
- # instead of always getting the first.
- my $regarg =
- get_token_number ($tokens, "reg32") + $i*2;
-
- if ($first)
+ if($XCHG_EAX[$i])
{
- print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
- $first = 0;
- }
- else
- {
- $count++;
- print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $XCHG_EAX[$i]->[4] = 1;
+ # This is definitely a hack. The "right"
+ # way to do this would be to enhance
+ # get_token_number to get the nth reg32
+ # instead of always getting the first.
+ my $regarg =
+ get_token_number ($tokens, "reg32")
+ + $i*2;
+
+ if ($first)
+ {
+ print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ $first = 0;
+ }
+ else
+ {
+ $count++;
+ print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
+ }
}
}
+ print GRAMMAR cond_action_else ($func, \@args);
}
- print GRAMMAR cond_action_else ($func, \@args);
- }
- # otherwise, generate the normal version
- else
- {
- print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ # otherwise, generate the normal version
+ else
+ {
+ print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
+ }
}
}
-/* $Id: nasm-bison.y,v 1.19 2001/07/06 06:25:53 mu Exp $
+/* $Id: nasm-bison.y,v 1.20 2001/07/11 04:07:10 peter Exp $
* Main bison parser
*
* Copyright (C) 2001 Peter Johnson, Michael Urman
char *name;
int line;
} syminfo;
- unsigned char groupdata[3];
+ unsigned char groupdata[4];
effaddr ea_val;
expr *exp;
immval im_val;
+ targetval tgt_val;
bytecode bc;
}
%type <bc> line exp instr instrbase label
%type <int_val> fpureg reg32 reg16 reg8 segreg
-%type <ea_val> mem memaddr memexp
+%type <ea_val> mem memaddr memexp memfar
%type <ea_val> mem8x mem16x mem32x mem64x mem80x mem128x
%type <ea_val> mem8 mem16 mem32 mem64 mem80 mem128 mem1632
%type <ea_val> rm8x rm16x rm32x /*rm64x rm128x*/
%type <exp> expr
%type <syminfo> explabel
%type <sym> label_id
+%type <tgt_val> target
%left '|'
%left '^'
mem128x: DQWORD mem { $$ = $2; }
;
+/* FAR memory, for jmp and call */
+memfar: FAR mem { $$ = $2; }
+;
+
/* implicit memory */
mem8: mem
| mem8x
| imm32x
;
+/* jump targets */
+target: explabel {
+ $$.val = expr_new_ident(EXPR_SYM, ExprSym(sym_use_get($1.name, SYM_LABEL)));
+ $$.op_sel = JR_NONE;
+ }
+ | SHORT target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_SHORT_FORCED); }
+ | NEAR target { $$ = $2; SetOpcodeSel(&$$.op_sel, JR_NEAR_FORCED); }
+;
+
/* expression trees */
expr:
INTNUM { $$ = expr_new_ident (EXPR_NUM, ExprNum($1)); }