From: Peter Johnson Date: Tue, 15 May 2001 05:20:39 +0000 (-0000) Subject: Initial check-in. X-Git-Tag: v0.1.0~506 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2fdefb7a66b1818a5ab3fcb9ac63c24636d9572c;p=yasm Initial check-in. Contains hardcoded instructions that should be dynamically generated. svn path=/trunk/yasm/; revision=5 --- diff --git a/modules/parsers/nasm/bison.y.in b/modules/parsers/nasm/bison.y.in new file mode 100644 index 00000000..036f5b76 --- /dev/null +++ b/modules/parsers/nasm/bison.y.in @@ -0,0 +1,432 @@ +/* $Id: bison.y.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/modules/parsers/nasm/nasm-bison.y b/modules/parsers/nasm/nasm-bison.y new file mode 100644 index 00000000..77cc3be2 --- /dev/null +++ b/modules/parsers/nasm/nasm-bison.y @@ -0,0 +1,432 @@ +/* $Id: nasm-bison.y,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/modules/parsers/nasm/token.l.in b/modules/parsers/nasm/token.l.in new file mode 100644 index 00000000..b108c50f --- /dev/null +++ b/modules/parsers/nasm/token.l.in @@ -0,0 +1,245 @@ +/* $Id: token.l.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main lexer + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "bytecode.h" +#include "bison.tab.h" +%} +%option noyywrap +%option nounput +%option case-insensitive + static char *locallabel_base = (char *)NULL; + +DIGIT [0-9] +BINDIGIT [01] +OCTDIGIT [0-7] +HEXDIGIT [0-9a-f] +WS [ \t\r] + +%% + + /* standard decimal integer */ +{DIGIT}+ { + yylval.int_val = strtoul(yytext, (char **)NULL, 10); + return INTNUM; +} + + /* 10010011b - binary number */ +{BINDIGIT}+b { + yylval.int_val = strtoul(yytext, (char **)NULL, 2); + return INTNUM; +} + + /* 777q - octal number */ +{OCTDIGIT}+q { + yylval.int_val = strtoul(yytext, (char **)NULL, 8); + return INTNUM; +} + + /* 0AAh form of hexidecimal number */ +0{HEXDIGIT}+h { + yylval.int_val = strtoul(yytext+1, (char **)NULL, 16); + return INTNUM; +} + + /* $0AA and 0xAA forms of hexidecimal number */ +(\$0|0x){HEXDIGIT}+ { + yylval.int_val = strtoul(yytext+2, (char **)NULL, 16); + return INTNUM; +} + + /* floating point value */ +{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { + yylval.double_val = strtod(yytext, (char **)NULL); + return FLTNUM; +} + /* directives */ +bits { return BITS; } +section { return SECTION; } +segment { return SECTION; } +absolute { return ABSOLUTE; } +extern { return EXTERN; } +global { return GLOBAL; } +common { return COMMON; } + + /* size specifiers */ +byte { yylval.int_val = 1; return BYTE; } +word { yylval.int_val = 2; return WORD; } +dword { yylval.int_val = 4; return DWORD; } +qword { yylval.int_val = 8; return QWORD; } +tword { yylval.int_val = 10; return TWORD; } +dqword { yylval.int_val = 16; return DQWORD; } + + /* pseudo-instructions */ +db { yylval.int_val = 1; return DECLARE_DATA; } +dw { yylval.int_val = 2; return DECLARE_DATA; } +dd { yylval.int_val = 4; return DECLARE_DATA; } +dq { yylval.int_val = 8; return DECLARE_DATA; } +dt { yylval.int_val = 10; return DECLARE_DATA; } + +resb { yylval.int_val = 1; return RESERVE_SPACE; } +resw { yylval.int_val = 2; return RESERVE_SPACE; } +resd { yylval.int_val = 4; return RESERVE_SPACE; } +resq { yylval.int_val = 8; return RESERVE_SPACE; } +rest { yylval.int_val = 10; return RESERVE_SPACE; } + +incbin { return INCBIN; } + +equ { return EQU; } + +times { return TIMES; } + +seg { return SEG; } +wrt { return WRT; } +near { return NEAR; } +short { return SHORT; } +far { return FAR; } + +nosplit { return NOSPLIT; } + +org { return ORG; } + + /* operand size overrides */ +o16 { yylval.int_val = 16; return OPERSIZE; } +o32 { yylval.int_val = 32; return OPERSIZE; } + /* address size overrides */ +a16 { yylval.int_val = 16; return ADDRSIZE; } +a32 { yylval.int_val = 32; return ADDRSIZE; } + + /* instruction prefixes */ +lock { return LOCK; } +repne { return REPNZ; } +repnz { return REPNZ; } +rep { return REP; } +repe { return REPZ; } +repz { return REPZ; } + + /* control, debug, and test registers */ +cr4 { yylval.int_val = 4; return CR4; } +cr[023] { yylval.int_val = yytext[2]-'0'; return CRREG_NOTCR4; } +dr[0-367] { yylval.int_val = yytext[2]-'0'; return DRREG; } +tr[3-7] { yylval.int_val = yytext[2]-'0'; return TRREG; } + + /* floating point, MMX, and SSE registers */ +st0 { yylval.int_val = 0; return ST0; } +st[1-7] { yylval.int_val = yytext[2]-'0'; return FPUREG_NOTST0; } +mm[0-7] { yylval.int_val = yytext[2]-'0'; return MMXREG; } +xmm[0-7] { yylval.int_val = yytext[3]-'0'; return XMMREG; } + + /* integer registers */ +eax { yylval.int_val = 0; return REG_EAX; } +ecx { yylval.int_val = 1; return REG_ECX; } +edx { yylval.int_val = 2; return REG_EDX; } +ebx { yylval.int_val = 3; return REG_EBX; } +esp { yylval.int_val = 4; return REG_ESP; } +ebp { yylval.int_val = 5; return REG_EBP; } +esi { yylval.int_val = 6; return REG_ESI; } +edi { yylval.int_val = 7; return REG_EDI; } + +ax { yylval.int_val = 0; return REG_AX; } +cx { yylval.int_val = 1; return REG_CX; } +dx { yylval.int_val = 2; return REG_DX; } +bx { yylval.int_val = 3; return REG_BX; } +sp { yylval.int_val = 4; return REG_SP; } +bp { yylval.int_val = 5; return REG_BP; } +si { yylval.int_val = 6; return REG_SI; } +di { yylval.int_val = 7; return REG_DI; } + +al { yylval.int_val = 0; return REG_AL; } +cl { yylval.int_val = 1; return REG_CL; } +dl { yylval.int_val = 2; return REG_DL; } +bl { yylval.int_val = 3; return REG_BL; } +ah { yylval.int_val = 4; return REG_AH; } +ch { yylval.int_val = 5; return REG_CH; } +dh { yylval.int_val = 6; return REG_DH; } +bh { yylval.int_val = 7; return REG_BH; } + + /* segment registers */ +es { yylval.int_val = 0; return REG_ES; } +cs { yylval.int_val = 1; return REG_CS; } +ss { yylval.int_val = 2; return REG_SS; } +ds { yylval.int_val = 3; return REG_DS; } +fs { yylval.int_val = 4; return REG_FS; } +gs { yylval.int_val = 5; return REG_GS; } + +"<<" { return LEFT_OP; } +">>" { return RIGHT_OP; } +"//" { return SIGNDIV; } +"%%" { return SIGNMOD; } +"$$" { return START_SECTION_OFFSET; } +[-+|^&*/%~$():[\],] { return yytext[0]; } + +\.\.start:? { return ENTRY_POINT; } + + /* special non-local ..@label */ +\.\.@[a-z0-9_$#@~.?]+ { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* local label (.label) */ +\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { + /* TODO: append yytext to locallabel_base before testing */ + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* instructions */ + /* TODO: dynamically generate */ +aaa { return INS_AAA; } +aad { return INS_AAD; } +idiv { return INS_IDIV; } +imul { return INS_IMUL; } +in { return INS_IN; } +loope { return INS_LOOPZ; } +loopz { return INS_LOOPZ; } +lsl { return INS_LSL; } + + /* label */ +[a-z_?][a-z0-9_$#@~.?]* { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + + /* TODO: optimize to decrease number of allocations */ + if(locallabel_base) + free(locallabel_base); + locallabel_base = strdup(yytext); + + return s->type; +} + +;.* ; + +{WS}+ ; + +\n return '\n'; + diff --git a/src/bison.y b/src/bison.y new file mode 100644 index 00000000..cc04b9b5 --- /dev/null +++ b/src/bison.y @@ -0,0 +1,432 @@ +/* $Id: bison.y,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/src/bison.y.in b/src/bison.y.in new file mode 100644 index 00000000..036f5b76 --- /dev/null +++ b/src/bison.y.in @@ -0,0 +1,432 @@ +/* $Id: bison.y.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/src/parsers/nasm/bison.y.in b/src/parsers/nasm/bison.y.in new file mode 100644 index 00000000..036f5b76 --- /dev/null +++ b/src/parsers/nasm/bison.y.in @@ -0,0 +1,432 @@ +/* $Id: bison.y.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/src/parsers/nasm/nasm-bison.y b/src/parsers/nasm/nasm-bison.y new file mode 100644 index 00000000..77cc3be2 --- /dev/null +++ b/src/parsers/nasm/nasm-bison.y @@ -0,0 +1,432 @@ +/* $Id: nasm-bison.y,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main bison parser + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "globals.h" +#include "bytecode.h" + +#define YYDEBUG 1 + +void init_table(void); +extern int yylex(void); +extern void yyerror(char *); + +%} + +%union { + unsigned long int_val; + double double_val; + symrec *sym; + effaddr ea_val; + immval im_val; + bytecode bc; +} + +%token INTNUM +%token FLTNUM +%token BITS SECTION ABSOLUTE EXTERN GLOBAL COMMON +%token BYTE WORD DWORD QWORD TWORD DQWORD +%token DECLARE_DATA +%token RESERVE_SPACE +%token INCBIN EQU TIMES +%token SEG WRT NEAR SHORT FAR NOSPLIT ORG +%token O16 O32 A16 A32 LOCK REPNZ REP REPZ +%token OPERSIZE ADDRSIZE +%token CR4 CRREG_NOTCR4 DRREG TRREG ST0 FPUREG_NOTST0 MMXREG XMMREG +%token REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI +%token REG_AX REG_CX REG_DX REG_BX REG_SP REG_BP REG_SI REG_DI +%token REG_AL REG_CL REG_DL REG_BL REG_AH REG_CH REG_DH REG_BH +%token REG_ES REG_CS REG_SS REG_DS REG_FS REG_GS +%token LEFT_OP RIGHT_OP SIGNDIV SIGNMOD +%token START_SECTION_OFFSET ENTRY_POINT +%token ID + +/* TODO: dynamically generate instruction tokens: */ +%token INS_AAA INS_AAD INS_IDIV INS_IMUL INS_IN INS_LOOPZ INS_LSL + +%type aaa aad idiv imul in loopz lsl + +%type line exp instr instrbase +%type fpureg reg32 reg16 reg8 reg_dess reg_fsgs reg_notcs +%type mem memaddr memexp +%type mem8x mem16x mem32x mem64x mem80x mem128x +%type mem8 mem16 mem32 mem64 mem80 mem128 mem1632 +%type rm8x rm16x rm32x /*rm64x xrm64x rm128x*/ +%type rm8 rm16 rm32 rm64 rm128 xrm64 +%type immexp imm8x imm16x imm32x imm8 imm16 imm32 imm1632 + +%left '-' '+' +%left '*' '/' + +%% +input: /* empty */ + | input line +; + +line: '\n' { $$.len = 0; line_number++; } + | exp '\n' { DebugPrintBC(&$1); $$ = $1; line_number++; } + | error '\n' { yyerrok; line_number++; } +; + +exp: instr +; + +/* directives */ +directive: bits + | section + | absolute + | extern + | global + | common +; + +bits: '[' BITS INTNUM ']' { } +; +section: '[' SECTION ']' { } +; +absolute: '[' ABSOLUTE INTNUM ']' { } +; +extern: '[' EXTERN ']' { } +; +global: '[' GLOBAL ']' { } +; +common: '[' COMMON ']' { } +; + +/* register groupings */ +fpureg: ST0 + | FPUREG_NOTST0 +; + +reg32: REG_EAX + | REG_ECX + | REG_EDX + | REG_EBX + | REG_ESP + | REG_EBP + | REG_ESI + | REG_EDI + | DWORD reg32 +; + +reg16: REG_AX + | REG_CX + | REG_DX + | REG_BX + | REG_SP + | REG_BP + | REG_SI + | REG_DI + | WORD reg16 +; + +reg8: REG_AL + | REG_CL + | REG_DL + | REG_BL + | REG_AH + | REG_CH + | REG_DH + | REG_BH + | BYTE reg8 +; + +reg_dess: REG_ES + | REG_SS + | REG_DS + | WORD reg_dess +; + +reg_fsgs: REG_FS + | REG_GS + | WORD reg_fsgs +; + +reg_notcs: reg_dess + | reg_fsgs + | WORD reg_notcs +; + +/* memory addresses */ +/* TODO: formula expansion */ +memexp: INTNUM { (void)ConvertIntToEA(&$$, $1); } +; + +memaddr: memexp { $$ = $1; $$.segment = 0; } + | REG_CS ':' memaddr { $$ = $3; $$.segment = 0x2E; } + | REG_SS ':' memaddr { $$ = $3; $$.segment = 0x36; } + | REG_DS ':' memaddr { $$ = $3; $$.segment = 0x3E; } + | REG_ES ':' memaddr { $$ = $3; $$.segment = 0x26; } + | REG_FS ':' memaddr { $$ = $3; $$.segment = 0x64; } + | REG_GS ':' memaddr { $$ = $3; $$.segment = 0x65; } + | BYTE memaddr { $$ = $2; $$.addrsize = 8; $$.len = 2; } + | WORD memaddr { $$ = $2; $$.addrsize = 16; $$.len = 3; } + | DWORD memaddr { $$ = $2; $$.addrsize = 32; $$.len = 5; } +; + +mem: '[' memaddr ']' { $$ = $2; } +; + +/* explicit memory */ +mem8x: BYTE mem { $$ = $2; } +; +mem16x: WORD mem { $$ = $2; } +; +mem32x: DWORD mem { $$ = $2; } +; +mem64x: QWORD mem { $$ = $2; } +; +mem80x: TWORD mem { $$ = $2; } +; +mem128x: DQWORD mem { $$ = $2; } +; + +/* implicit memory */ +mem8: mem + | mem8x +; +mem16: mem + | mem16x +; +mem32: mem + | mem32x +; +mem64: mem + | mem64x +; +mem80: mem + | mem80x +; +mem128: mem + | mem128x +; + +/* both 16 and 32 bit memory */ +mem1632: mem + | mem16x + | mem32x +; + +/* explicit register or memory */ +rm8x: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8x +; +rm16x: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16x +; +rm32x: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32x +; +/* not needed: +rm64x: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +xrm64x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64x +; +rm128x: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128x +; +*/ + +/* implicit register or memory */ +rm8: reg8 { (void)ConvertRegToEA(&$$, $1); } + | mem8 +; +rm16: reg16 { (void)ConvertRegToEA(&$$, $1); } + | mem16 +; +rm32: reg32 { (void)ConvertRegToEA(&$$, $1); } + | mem32 +; +rm64: MMXREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +xrm64: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem64 +; +rm128: XMMREG { (void)ConvertRegToEA(&$$, $1); } + | mem128 +; + +/* immediate values */ +/* TODO: formula expansion */ +immexp: INTNUM { (void)ConvertIntToImm(&$$, $1); } +; + +/* explicit immediates */ +imm8x: BYTE immexp { $$ = $2; } +; +imm16x: WORD immexp { $$ = $2; } +; +imm32x: DWORD immexp { $$ = $2; } +; + +/* implicit immediates */ +imm8: immexp + | imm8x +; +imm16: immexp + | imm16x +; +imm32: immexp + | imm32x +; + +/* both 16 and 32 bit immediates */ +imm1632: immexp + | imm16x + | imm32x +; + +instr: instrbase + | OPERSIZE instr { $$ = $2; $$.data.insn.opersize = $1; } + | ADDRSIZE instr { $$ = $2; $$.data.insn.ea.addrsize = $1; } + | REG_CS instr { $$ = $2; $$.data.insn.ea.segment = 0x2E; } + | REG_SS instr { $$ = $2; $$.data.insn.ea.segment = 0x36; } + | REG_DS instr { $$ = $2; $$.data.insn.ea.segment = 0x3E; } + | REG_ES instr { $$ = $2; $$.data.insn.ea.segment = 0x26; } + | REG_FS instr { $$ = $2; $$.data.insn.ea.segment = 0x64; } + | REG_GS instr { $$ = $2; $$.data.insn.ea.segment = 0x65; } + | LOCK instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF0; } + | REPNZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF2; } + | REP instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF3; } + | REPZ instr { $$ = $2; $$.data.insn.lockrep_pre = 0xF4; } +; + +/* instructions */ +/* TODO: dynamically generate */ +instrbase: aaa + | aad + | idiv + | imul + | in + | loopz + | lsl +; + +aaa: INS_AAA { + BuildBC_Insn(&$$, 0, 1, 0x37, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +aad: INS_AAD { + BuildBC_Insn(&$$, 0, 2, 0xD5, 0x0A, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_AAD imm8 { + BuildBC_Insn(&$$, 0, 1, 0xD5, 0, (effaddr *)NULL, 0, &$2, 1); + } +; + +idiv: INS_IDIV rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } + | INS_IDIV rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 7, (immval *)NULL, 0); + } +; + +imul: INS_IMUL rm8x { + BuildBC_Insn(&$$, 0, 1, 0xF6, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm16x { + BuildBC_Insn(&$$, 16, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL rm32x { + BuildBC_Insn(&$$, 32, 1, 0xF7, 0, &$2, 5, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0xAF, &$4, $2, (immval *)NULL, 0); + } + | INS_IMUL reg16 ',' rm16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg32 ',' rm32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, &$4, $2, &$6, 1); + } + | INS_IMUL reg16 ',' rm16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, &$4, $2, &$6, 2); + } + | INS_IMUL reg32 ',' rm32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, &$4, $2, &$6, 4); + } + | INS_IMUL reg16 ',' imm8x { + BuildBC_Insn(&$$, 16, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg32 ',' imm8x { + BuildBC_Insn(&$$, 32, 1, 0x6B, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 1); + } + | INS_IMUL reg16 ',' imm16 { + BuildBC_Insn(&$$, 16, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 2); + } + | INS_IMUL reg32 ',' imm32 { + BuildBC_Insn(&$$, 32, 1, 0x69, 0, ConvertRegToEA((effaddr *)NULL, $2), $2, &$4, 4); + } +; + +in: INS_IN REG_AL ',' imm8 { + BuildBC_Insn(&$$, 0, 1, 0xE4, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AX ',' imm8 { + BuildBC_Insn(&$$, 16, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_EAX ',' imm8 { + BuildBC_Insn(&$$, 32, 1, 0xE5, 0, (effaddr *)NULL, 0, &$4, 1); + } + | INS_IN REG_AL ',' REG_DX { + BuildBC_Insn(&$$, 0, 1, 0xEC, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_AX ',' REG_DX { + BuildBC_Insn(&$$, 16, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } + | INS_IN REG_EAX ',' REG_DX { + BuildBC_Insn(&$$, 32, 1, 0xED, 0, (effaddr *)NULL, 0, (immval *)NULL, 0); + } +; + +loopz: INS_LOOPZ imm1632 { + BuildBC_Insn_Rel(&$$, 0, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_CX { + BuildBC_Insn_Rel(&$$, 16, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } + | INS_LOOPZ imm1632 ',' REG_ECX { + BuildBC_Insn_Rel(&$$, 32, 1, 0xE1, 0, (effaddr *)NULL, 0, ConvertImmToRel((relval *)NULL, &$2, 1)); + } +; + +lsl: INS_LSL reg16 ',' rm16 { + BuildBC_Insn(&$$, 16, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } + | INS_LSL reg32 ',' rm32 { + BuildBC_Insn(&$$, 32, 2, 0x0F, 0x03, &$4, $2, (immval *)NULL, 0); + } +; + diff --git a/src/parsers/nasm/token.l.in b/src/parsers/nasm/token.l.in new file mode 100644 index 00000000..b108c50f --- /dev/null +++ b/src/parsers/nasm/token.l.in @@ -0,0 +1,245 @@ +/* $Id: token.l.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main lexer + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "bytecode.h" +#include "bison.tab.h" +%} +%option noyywrap +%option nounput +%option case-insensitive + static char *locallabel_base = (char *)NULL; + +DIGIT [0-9] +BINDIGIT [01] +OCTDIGIT [0-7] +HEXDIGIT [0-9a-f] +WS [ \t\r] + +%% + + /* standard decimal integer */ +{DIGIT}+ { + yylval.int_val = strtoul(yytext, (char **)NULL, 10); + return INTNUM; +} + + /* 10010011b - binary number */ +{BINDIGIT}+b { + yylval.int_val = strtoul(yytext, (char **)NULL, 2); + return INTNUM; +} + + /* 777q - octal number */ +{OCTDIGIT}+q { + yylval.int_val = strtoul(yytext, (char **)NULL, 8); + return INTNUM; +} + + /* 0AAh form of hexidecimal number */ +0{HEXDIGIT}+h { + yylval.int_val = strtoul(yytext+1, (char **)NULL, 16); + return INTNUM; +} + + /* $0AA and 0xAA forms of hexidecimal number */ +(\$0|0x){HEXDIGIT}+ { + yylval.int_val = strtoul(yytext+2, (char **)NULL, 16); + return INTNUM; +} + + /* floating point value */ +{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { + yylval.double_val = strtod(yytext, (char **)NULL); + return FLTNUM; +} + /* directives */ +bits { return BITS; } +section { return SECTION; } +segment { return SECTION; } +absolute { return ABSOLUTE; } +extern { return EXTERN; } +global { return GLOBAL; } +common { return COMMON; } + + /* size specifiers */ +byte { yylval.int_val = 1; return BYTE; } +word { yylval.int_val = 2; return WORD; } +dword { yylval.int_val = 4; return DWORD; } +qword { yylval.int_val = 8; return QWORD; } +tword { yylval.int_val = 10; return TWORD; } +dqword { yylval.int_val = 16; return DQWORD; } + + /* pseudo-instructions */ +db { yylval.int_val = 1; return DECLARE_DATA; } +dw { yylval.int_val = 2; return DECLARE_DATA; } +dd { yylval.int_val = 4; return DECLARE_DATA; } +dq { yylval.int_val = 8; return DECLARE_DATA; } +dt { yylval.int_val = 10; return DECLARE_DATA; } + +resb { yylval.int_val = 1; return RESERVE_SPACE; } +resw { yylval.int_val = 2; return RESERVE_SPACE; } +resd { yylval.int_val = 4; return RESERVE_SPACE; } +resq { yylval.int_val = 8; return RESERVE_SPACE; } +rest { yylval.int_val = 10; return RESERVE_SPACE; } + +incbin { return INCBIN; } + +equ { return EQU; } + +times { return TIMES; } + +seg { return SEG; } +wrt { return WRT; } +near { return NEAR; } +short { return SHORT; } +far { return FAR; } + +nosplit { return NOSPLIT; } + +org { return ORG; } + + /* operand size overrides */ +o16 { yylval.int_val = 16; return OPERSIZE; } +o32 { yylval.int_val = 32; return OPERSIZE; } + /* address size overrides */ +a16 { yylval.int_val = 16; return ADDRSIZE; } +a32 { yylval.int_val = 32; return ADDRSIZE; } + + /* instruction prefixes */ +lock { return LOCK; } +repne { return REPNZ; } +repnz { return REPNZ; } +rep { return REP; } +repe { return REPZ; } +repz { return REPZ; } + + /* control, debug, and test registers */ +cr4 { yylval.int_val = 4; return CR4; } +cr[023] { yylval.int_val = yytext[2]-'0'; return CRREG_NOTCR4; } +dr[0-367] { yylval.int_val = yytext[2]-'0'; return DRREG; } +tr[3-7] { yylval.int_val = yytext[2]-'0'; return TRREG; } + + /* floating point, MMX, and SSE registers */ +st0 { yylval.int_val = 0; return ST0; } +st[1-7] { yylval.int_val = yytext[2]-'0'; return FPUREG_NOTST0; } +mm[0-7] { yylval.int_val = yytext[2]-'0'; return MMXREG; } +xmm[0-7] { yylval.int_val = yytext[3]-'0'; return XMMREG; } + + /* integer registers */ +eax { yylval.int_val = 0; return REG_EAX; } +ecx { yylval.int_val = 1; return REG_ECX; } +edx { yylval.int_val = 2; return REG_EDX; } +ebx { yylval.int_val = 3; return REG_EBX; } +esp { yylval.int_val = 4; return REG_ESP; } +ebp { yylval.int_val = 5; return REG_EBP; } +esi { yylval.int_val = 6; return REG_ESI; } +edi { yylval.int_val = 7; return REG_EDI; } + +ax { yylval.int_val = 0; return REG_AX; } +cx { yylval.int_val = 1; return REG_CX; } +dx { yylval.int_val = 2; return REG_DX; } +bx { yylval.int_val = 3; return REG_BX; } +sp { yylval.int_val = 4; return REG_SP; } +bp { yylval.int_val = 5; return REG_BP; } +si { yylval.int_val = 6; return REG_SI; } +di { yylval.int_val = 7; return REG_DI; } + +al { yylval.int_val = 0; return REG_AL; } +cl { yylval.int_val = 1; return REG_CL; } +dl { yylval.int_val = 2; return REG_DL; } +bl { yylval.int_val = 3; return REG_BL; } +ah { yylval.int_val = 4; return REG_AH; } +ch { yylval.int_val = 5; return REG_CH; } +dh { yylval.int_val = 6; return REG_DH; } +bh { yylval.int_val = 7; return REG_BH; } + + /* segment registers */ +es { yylval.int_val = 0; return REG_ES; } +cs { yylval.int_val = 1; return REG_CS; } +ss { yylval.int_val = 2; return REG_SS; } +ds { yylval.int_val = 3; return REG_DS; } +fs { yylval.int_val = 4; return REG_FS; } +gs { yylval.int_val = 5; return REG_GS; } + +"<<" { return LEFT_OP; } +">>" { return RIGHT_OP; } +"//" { return SIGNDIV; } +"%%" { return SIGNMOD; } +"$$" { return START_SECTION_OFFSET; } +[-+|^&*/%~$():[\],] { return yytext[0]; } + +\.\.start:? { return ENTRY_POINT; } + + /* special non-local ..@label */ +\.\.@[a-z0-9_$#@~.?]+ { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* local label (.label) */ +\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { + /* TODO: append yytext to locallabel_base before testing */ + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* instructions */ + /* TODO: dynamically generate */ +aaa { return INS_AAA; } +aad { return INS_AAD; } +idiv { return INS_IDIV; } +imul { return INS_IMUL; } +in { return INS_IN; } +loope { return INS_LOOPZ; } +loopz { return INS_LOOPZ; } +lsl { return INS_LSL; } + + /* label */ +[a-z_?][a-z0-9_$#@~.?]* { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + + /* TODO: optimize to decrease number of allocations */ + if(locallabel_base) + free(locallabel_base); + locallabel_base = strdup(yytext); + + return s->type; +} + +;.* ; + +{WS}+ ; + +\n return '\n'; + diff --git a/src/token.l b/src/token.l new file mode 100644 index 00000000..327c85dc --- /dev/null +++ b/src/token.l @@ -0,0 +1,245 @@ +/* $Id: token.l,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main lexer + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "bytecode.h" +#include "bison.tab.h" +%} +%option noyywrap +%option nounput +%option case-insensitive + static char *locallabel_base = (char *)NULL; + +DIGIT [0-9] +BINDIGIT [01] +OCTDIGIT [0-7] +HEXDIGIT [0-9a-f] +WS [ \t\r] + +%% + + /* standard decimal integer */ +{DIGIT}+ { + yylval.int_val = strtoul(yytext, (char **)NULL, 10); + return INTNUM; +} + + /* 10010011b - binary number */ +{BINDIGIT}+b { + yylval.int_val = strtoul(yytext, (char **)NULL, 2); + return INTNUM; +} + + /* 777q - octal number */ +{OCTDIGIT}+q { + yylval.int_val = strtoul(yytext, (char **)NULL, 8); + return INTNUM; +} + + /* 0AAh form of hexidecimal number */ +0{HEXDIGIT}+h { + yylval.int_val = strtoul(yytext+1, (char **)NULL, 16); + return INTNUM; +} + + /* $0AA and 0xAA forms of hexidecimal number */ +(\$0|0x){HEXDIGIT}+ { + yylval.int_val = strtoul(yytext+2, (char **)NULL, 16); + return INTNUM; +} + + /* floating point value */ +{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { + yylval.double_val = strtod(yytext, (char **)NULL); + return FLTNUM; +} + /* directives */ +bits { return BITS; } +section { return SECTION; } +segment { return SECTION; } +absolute { return ABSOLUTE; } +extern { return EXTERN; } +global { return GLOBAL; } +common { return COMMON; } + + /* size specifiers */ +byte { yylval.int_val = 1; return BYTE; } +word { yylval.int_val = 2; return WORD; } +dword { yylval.int_val = 4; return DWORD; } +qword { yylval.int_val = 8; return QWORD; } +tword { yylval.int_val = 10; return TWORD; } +dqword { yylval.int_val = 16; return DQWORD; } + + /* pseudo-instructions */ +db { yylval.int_val = 1; return DECLARE_DATA; } +dw { yylval.int_val = 2; return DECLARE_DATA; } +dd { yylval.int_val = 4; return DECLARE_DATA; } +dq { yylval.int_val = 8; return DECLARE_DATA; } +dt { yylval.int_val = 10; return DECLARE_DATA; } + +resb { yylval.int_val = 1; return RESERVE_SPACE; } +resw { yylval.int_val = 2; return RESERVE_SPACE; } +resd { yylval.int_val = 4; return RESERVE_SPACE; } +resq { yylval.int_val = 8; return RESERVE_SPACE; } +rest { yylval.int_val = 10; return RESERVE_SPACE; } + +incbin { return INCBIN; } + +equ { return EQU; } + +times { return TIMES; } + +seg { return SEG; } +wrt { return WRT; } +near { return NEAR; } +short { return SHORT; } +far { return FAR; } + +nosplit { return NOSPLIT; } + +org { return ORG; } + + /* operand size overrides */ +o16 { yylval.int_val = 16; return OPERSIZE; } +o32 { yylval.int_val = 32; return OPERSIZE; } + /* address size overrides */ +a16 { yylval.int_val = 16; return ADDRSIZE; } +a32 { yylval.int_val = 32; return ADDRSIZE; } + + /* instruction prefixes */ +lock { return LOCK; } +repne { return REPNZ; } +repnz { return REPNZ; } +rep { return REP; } +repe { return REPZ; } +repz { return REPZ; } + + /* control, debug, and test registers */ +cr4 { yylval.int_val = 4; return CR4; } +cr[023] { yylval.int_val = yytext[2]-'0'; return CRREG_NOTCR4; } +dr[0-367] { yylval.int_val = yytext[2]-'0'; return DRREG; } +tr[3-7] { yylval.int_val = yytext[2]-'0'; return TRREG; } + + /* floating point, MMX, and SSE registers */ +st0 { yylval.int_val = 0; return ST0; } +st[1-7] { yylval.int_val = yytext[2]-'0'; return FPUREG_NOTST0; } +mm[0-7] { yylval.int_val = yytext[2]-'0'; return MMXREG; } +xmm[0-7] { yylval.int_val = yytext[3]-'0'; return XMMREG; } + + /* integer registers */ +eax { yylval.int_val = 0; return REG_EAX; } +ecx { yylval.int_val = 1; return REG_ECX; } +edx { yylval.int_val = 2; return REG_EDX; } +ebx { yylval.int_val = 3; return REG_EBX; } +esp { yylval.int_val = 4; return REG_ESP; } +ebp { yylval.int_val = 5; return REG_EBP; } +esi { yylval.int_val = 6; return REG_ESI; } +edi { yylval.int_val = 7; return REG_EDI; } + +ax { yylval.int_val = 0; return REG_AX; } +cx { yylval.int_val = 1; return REG_CX; } +dx { yylval.int_val = 2; return REG_DX; } +bx { yylval.int_val = 3; return REG_BX; } +sp { yylval.int_val = 4; return REG_SP; } +bp { yylval.int_val = 5; return REG_BP; } +si { yylval.int_val = 6; return REG_SI; } +di { yylval.int_val = 7; return REG_DI; } + +al { yylval.int_val = 0; return REG_AL; } +cl { yylval.int_val = 1; return REG_CL; } +dl { yylval.int_val = 2; return REG_DL; } +bl { yylval.int_val = 3; return REG_BL; } +ah { yylval.int_val = 4; return REG_AH; } +ch { yylval.int_val = 5; return REG_CH; } +dh { yylval.int_val = 6; return REG_DH; } +bh { yylval.int_val = 7; return REG_BH; } + + /* segment registers */ +es { yylval.int_val = 0; return REG_ES; } +cs { yylval.int_val = 1; return REG_CS; } +ss { yylval.int_val = 2; return REG_SS; } +ds { yylval.int_val = 3; return REG_DS; } +fs { yylval.int_val = 4; return REG_FS; } +gs { yylval.int_val = 5; return REG_GS; } + +"<<" { return LEFT_OP; } +">>" { return RIGHT_OP; } +"//" { return SIGNDIV; } +"%%" { return SIGNMOD; } +"$$" { return START_SECTION_OFFSET; } +[-+|^&*/%~$():[\],] { return yytext[0]; } + +\.\.start:? { return ENTRY_POINT; } + + /* special non-local ..@label */ +\.\.@[a-z0-9_$#@~.?]+ { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* local label (.label) */ +\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { + /* TODO: append yytext to locallabel_base before testing */ + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* instructions */ + /* TODO: dynamically generate */ +aaa { return INS_AAA; } +aad { return INS_AAD; } +idiv { return INS_IDIV; } +imul { return INS_IMUL; } +in { return INS_IN; } +loope { return INS_LOOPZ; } +loopz { return INS_LOOPZ; } +lsl { return INS_LSL; } + + /* label */ +[a-z_?][a-z0-9_$#@~.?]* { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + + /* TODO: optimize to decrease number of allocations */ + if(locallabel_base) + free(locallabel_base); + locallabel_base = strdup(yytext); + + return s->type; +} + +;.* ; + +{WS}+ ; + +\n return '\n'; + diff --git a/src/token.l.in b/src/token.l.in new file mode 100644 index 00000000..b108c50f --- /dev/null +++ b/src/token.l.in @@ -0,0 +1,245 @@ +/* $Id: token.l.in,v 1.1 2001/05/15 05:20:39 peter Exp $ + * Main lexer + * + * Copyright (C) 2001 Peter Johnson + * + * This file is part of YASM. + * + * YASM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * YASM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +%{ +#include +#include +#include "symrec.h" +#include "bytecode.h" +#include "bison.tab.h" +%} +%option noyywrap +%option nounput +%option case-insensitive + static char *locallabel_base = (char *)NULL; + +DIGIT [0-9] +BINDIGIT [01] +OCTDIGIT [0-7] +HEXDIGIT [0-9a-f] +WS [ \t\r] + +%% + + /* standard decimal integer */ +{DIGIT}+ { + yylval.int_val = strtoul(yytext, (char **)NULL, 10); + return INTNUM; +} + + /* 10010011b - binary number */ +{BINDIGIT}+b { + yylval.int_val = strtoul(yytext, (char **)NULL, 2); + return INTNUM; +} + + /* 777q - octal number */ +{OCTDIGIT}+q { + yylval.int_val = strtoul(yytext, (char **)NULL, 8); + return INTNUM; +} + + /* 0AAh form of hexidecimal number */ +0{HEXDIGIT}+h { + yylval.int_val = strtoul(yytext+1, (char **)NULL, 16); + return INTNUM; +} + + /* $0AA and 0xAA forms of hexidecimal number */ +(\$0|0x){HEXDIGIT}+ { + yylval.int_val = strtoul(yytext+2, (char **)NULL, 16); + return INTNUM; +} + + /* floating point value */ +{DIGIT}+\.{DIGIT}*(e[-+]?{DIGIT}+)? { + yylval.double_val = strtod(yytext, (char **)NULL); + return FLTNUM; +} + /* directives */ +bits { return BITS; } +section { return SECTION; } +segment { return SECTION; } +absolute { return ABSOLUTE; } +extern { return EXTERN; } +global { return GLOBAL; } +common { return COMMON; } + + /* size specifiers */ +byte { yylval.int_val = 1; return BYTE; } +word { yylval.int_val = 2; return WORD; } +dword { yylval.int_val = 4; return DWORD; } +qword { yylval.int_val = 8; return QWORD; } +tword { yylval.int_val = 10; return TWORD; } +dqword { yylval.int_val = 16; return DQWORD; } + + /* pseudo-instructions */ +db { yylval.int_val = 1; return DECLARE_DATA; } +dw { yylval.int_val = 2; return DECLARE_DATA; } +dd { yylval.int_val = 4; return DECLARE_DATA; } +dq { yylval.int_val = 8; return DECLARE_DATA; } +dt { yylval.int_val = 10; return DECLARE_DATA; } + +resb { yylval.int_val = 1; return RESERVE_SPACE; } +resw { yylval.int_val = 2; return RESERVE_SPACE; } +resd { yylval.int_val = 4; return RESERVE_SPACE; } +resq { yylval.int_val = 8; return RESERVE_SPACE; } +rest { yylval.int_val = 10; return RESERVE_SPACE; } + +incbin { return INCBIN; } + +equ { return EQU; } + +times { return TIMES; } + +seg { return SEG; } +wrt { return WRT; } +near { return NEAR; } +short { return SHORT; } +far { return FAR; } + +nosplit { return NOSPLIT; } + +org { return ORG; } + + /* operand size overrides */ +o16 { yylval.int_val = 16; return OPERSIZE; } +o32 { yylval.int_val = 32; return OPERSIZE; } + /* address size overrides */ +a16 { yylval.int_val = 16; return ADDRSIZE; } +a32 { yylval.int_val = 32; return ADDRSIZE; } + + /* instruction prefixes */ +lock { return LOCK; } +repne { return REPNZ; } +repnz { return REPNZ; } +rep { return REP; } +repe { return REPZ; } +repz { return REPZ; } + + /* control, debug, and test registers */ +cr4 { yylval.int_val = 4; return CR4; } +cr[023] { yylval.int_val = yytext[2]-'0'; return CRREG_NOTCR4; } +dr[0-367] { yylval.int_val = yytext[2]-'0'; return DRREG; } +tr[3-7] { yylval.int_val = yytext[2]-'0'; return TRREG; } + + /* floating point, MMX, and SSE registers */ +st0 { yylval.int_val = 0; return ST0; } +st[1-7] { yylval.int_val = yytext[2]-'0'; return FPUREG_NOTST0; } +mm[0-7] { yylval.int_val = yytext[2]-'0'; return MMXREG; } +xmm[0-7] { yylval.int_val = yytext[3]-'0'; return XMMREG; } + + /* integer registers */ +eax { yylval.int_val = 0; return REG_EAX; } +ecx { yylval.int_val = 1; return REG_ECX; } +edx { yylval.int_val = 2; return REG_EDX; } +ebx { yylval.int_val = 3; return REG_EBX; } +esp { yylval.int_val = 4; return REG_ESP; } +ebp { yylval.int_val = 5; return REG_EBP; } +esi { yylval.int_val = 6; return REG_ESI; } +edi { yylval.int_val = 7; return REG_EDI; } + +ax { yylval.int_val = 0; return REG_AX; } +cx { yylval.int_val = 1; return REG_CX; } +dx { yylval.int_val = 2; return REG_DX; } +bx { yylval.int_val = 3; return REG_BX; } +sp { yylval.int_val = 4; return REG_SP; } +bp { yylval.int_val = 5; return REG_BP; } +si { yylval.int_val = 6; return REG_SI; } +di { yylval.int_val = 7; return REG_DI; } + +al { yylval.int_val = 0; return REG_AL; } +cl { yylval.int_val = 1; return REG_CL; } +dl { yylval.int_val = 2; return REG_DL; } +bl { yylval.int_val = 3; return REG_BL; } +ah { yylval.int_val = 4; return REG_AH; } +ch { yylval.int_val = 5; return REG_CH; } +dh { yylval.int_val = 6; return REG_DH; } +bh { yylval.int_val = 7; return REG_BH; } + + /* segment registers */ +es { yylval.int_val = 0; return REG_ES; } +cs { yylval.int_val = 1; return REG_CS; } +ss { yylval.int_val = 2; return REG_SS; } +ds { yylval.int_val = 3; return REG_DS; } +fs { yylval.int_val = 4; return REG_FS; } +gs { yylval.int_val = 5; return REG_GS; } + +"<<" { return LEFT_OP; } +">>" { return RIGHT_OP; } +"//" { return SIGNDIV; } +"%%" { return SIGNMOD; } +"$$" { return START_SECTION_OFFSET; } +[-+|^&*/%~$():[\],] { return yytext[0]; } + +\.\.start:? { return ENTRY_POINT; } + + /* special non-local ..@label */ +\.\.@[a-z0-9_$#@~.?]+ { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* local label (.label) */ +\.[a-z0-9_$#@~?][a-z0-9_$#@~.?]* { + /* TODO: append yytext to locallabel_base before testing */ + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + return s->type; +} + + /* instructions */ + /* TODO: dynamically generate */ +aaa { return INS_AAA; } +aad { return INS_AAD; } +idiv { return INS_IDIV; } +imul { return INS_IMUL; } +in { return INS_IN; } +loope { return INS_LOOPZ; } +loopz { return INS_LOOPZ; } +lsl { return INS_LSL; } + + /* label */ +[a-z_?][a-z0-9_$#@~.?]* { + symrec *s = getsym(yytext); + if(!s) + s = putsym(yytext, ID); + yylval.sym = s; + + /* TODO: optimize to decrease number of allocations */ + if(locallabel_base) + free(locallabel_base); + locallabel_base = strdup(yytext); + + return s->type; +} + +;.* ; + +{WS}+ ; + +\n return '\n'; +