From: Michael Urman Date: Mon, 21 May 2001 21:46:33 +0000 (-0000) Subject: First commit. Generates all needed sections of token.l, bison.y from X-Git-Tag: v0.1.0~483 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c6274f11707c36b909ac224f63f864eae8f9fed8;p=yasm First commit. Generates all needed sections of token.l, bison.y from instrs.dat, token.l.in, bison.y.in. Optionally can use other files, or just verify input is valid. svn path=/trunk/yasm/; revision=28 --- diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl new file mode 100755 index 00000000..0d2cf4a5 --- /dev/null +++ b/modules/parsers/nasm/gen_instr.pl @@ -0,0 +1,336 @@ +#!/usr/bin/perl -w +# $Id: gen_instr.pl,v 1.1 2001/05/21 21:46:33 mu Exp $ +# Generates bison.y and token.l from instrs.dat for YASM +# +# Copyright (C) 2001 Michael Urman +# +# This file is part of YASM. +# +# YASM is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# YASM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +use strict; +use Getopt::Long; +my $VERSION = "0.0.1"; + +# useful constants for instruction arrays +use constant INST => 0; +use constant OPERANDS => 1; +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; + +# default options +my $instrfile = 'instrs.dat'; +my $tokenfile = 'token.l'; +my $tokensource; +my $grammarfile = 'bison.y'; +my $grammarsource; +my $showversion; +my $showusage; +my $dry_run; + +# allow overrides +my $gotopts = GetOptions ( 'input=s' => \$instrfile, + 'token=s' => \$tokenfile, + 'sourcetoken=s' => \$tokensource, + 'grammar=s' => \$grammarfile, + 'sourcegrammar=s' => \$grammarsource, + 'version' => \$showversion, + 'n|dry-run' => \$dry_run, + 'help|usage' => \$showusage, + ); + +&showusage and exit 1 unless $gotopts; +&showversion if $showversion; +&showusage if $showusage; +exit 0 if $showversion or $showusage; + + +my ($instructions, $instrlist) = &read_instructions ($instrfile); + +exit 0 if $dry_run; # done with simple verification, so exit + +unless ($dry_run) +{ + &output_lex ($tokenfile, $tokensource, $instructions, $instrlist); + &output_yacc ($grammarfile, $grammarsource, $instructions, $instrlist); +} + +# print version for --version, etc. +sub showversion +{ + print "YASM gen_instr.pl $VERSION\n"; +} + +# print usage information for --help, etc. +sub showusage +{ + print <<"EOF"; +Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile] + -i, --input instructions file (default: $instrfile) + -t, --token token output file (default: $tokenfile) + -st, --sourcetoken token input file (default: $tokenfile.in) + -g, --grammar grammar output file (default: $grammarfile) + -sg, --sourcegrammar grammar input file (default: $grammarfile.in) + -v, --version show version and exit + -h, --help, --usage show this message and exit + -n, --dry-run verify input file without writing output files +EOF +} + +# read in instructions, and verify they're valid (well, mostly) +sub read_instructions ($) +{ + my $instrfile = shift || die; + open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; + my @instr; + my %instr; + my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG xrm64 + fpureg ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem + imm8 imm16 imm32 imm1632 imm64 imm80 imm128 + imm8x imm16x imm32x imm1632x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + ); + my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + ); + while () + { + next if /^\s*(?:;.*)$/; + if (m/^:(\w+)\t+(\w+)$/) + { + # TODO: actually handle this. Probably in lex. + # alias $1 -> $2 + next; + } + my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^nil|16|32|128$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + die "Invalid Effective Address\n" + if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; + push @instr, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + $instr{$inst} = 1; + } + close INPUT; + return (\@instr, \%instr); +} + +sub output_lex ($@) +{ + my $tokenfile = shift or die; + my $tokensource = shift; + $tokensource ||= "$tokenfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; + open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; + while () + { + # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content + if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + foreach my $inst (sort keys %$instrlist) + { + printf TOKEN "%-12s{ return %-16s }\n", $inst, "\Uins_$inst;\E"; + } + } + else + { + print TOKEN $_; + } + } + close IN; + close TOKEN; +} + +sub output_yacc ($@) +{ + my $grammarfile = shift or die; + my $grammarsource = shift; + $grammarsource ||= "$grammarfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; + open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; + + while () + { + if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) + { + my $len = length("%token"); + print GRAMMAR "%token"; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length("INS_$inst") < 76) + { + print GRAMMAR " INS_\U$inst\E"; + $len += length(" INS_$inst"); + } + else + { + print GRAMMAR "\n%token INS_\U$inst\E"; + $len = length("%token INS_$inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/}) + { + my $len = length("%type "); + print GRAMMAR "%type "; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length($inst) < 76) + { + print GRAMMAR " $inst"; + $len += length(" $inst"); + } + else + { + print GRAMMAR "\n%type $inst"; + $len = length("%type $inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + # list every kind of instruction that instrbase can be + print GRAMMAR "instrbase: ", + join( "\n | ", sort keys %$instrlist), "\n;\n"; + + # list the arguments and actions (buildbc) + foreach my $inst (@$instructions) + { + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Uins_$inst->[INST]\E"; + $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # opcode size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + $args[-1] =~ s/\$(\d+)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(effaddr *)NULL, 0/; + $args[-1] =~ s/\$(\d+)([ri])?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + $args[-1] =~ s/\$(\d+)(r)?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + $args[-1] =~ s/(\&\$\d+)(r)?/$1/; + $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # lame way first---rule: args { action }; for every version + # do the compression to |'s later + print GRAMMAR <<"EOF"; +$rule: $tokens { + $func (@args); + }; +EOF + } + } + else + { + print GRAMMAR $_; + } + } + close IN; + close GRAMMAR; +} diff --git a/src/gen_instr.pl b/src/gen_instr.pl new file mode 100755 index 00000000..0d2cf4a5 --- /dev/null +++ b/src/gen_instr.pl @@ -0,0 +1,336 @@ +#!/usr/bin/perl -w +# $Id: gen_instr.pl,v 1.1 2001/05/21 21:46:33 mu Exp $ +# Generates bison.y and token.l from instrs.dat for YASM +# +# Copyright (C) 2001 Michael Urman +# +# This file is part of YASM. +# +# YASM is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# YASM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +use strict; +use Getopt::Long; +my $VERSION = "0.0.1"; + +# useful constants for instruction arrays +use constant INST => 0; +use constant OPERANDS => 1; +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; + +# default options +my $instrfile = 'instrs.dat'; +my $tokenfile = 'token.l'; +my $tokensource; +my $grammarfile = 'bison.y'; +my $grammarsource; +my $showversion; +my $showusage; +my $dry_run; + +# allow overrides +my $gotopts = GetOptions ( 'input=s' => \$instrfile, + 'token=s' => \$tokenfile, + 'sourcetoken=s' => \$tokensource, + 'grammar=s' => \$grammarfile, + 'sourcegrammar=s' => \$grammarsource, + 'version' => \$showversion, + 'n|dry-run' => \$dry_run, + 'help|usage' => \$showusage, + ); + +&showusage and exit 1 unless $gotopts; +&showversion if $showversion; +&showusage if $showusage; +exit 0 if $showversion or $showusage; + + +my ($instructions, $instrlist) = &read_instructions ($instrfile); + +exit 0 if $dry_run; # done with simple verification, so exit + +unless ($dry_run) +{ + &output_lex ($tokenfile, $tokensource, $instructions, $instrlist); + &output_yacc ($grammarfile, $grammarsource, $instructions, $instrlist); +} + +# print version for --version, etc. +sub showversion +{ + print "YASM gen_instr.pl $VERSION\n"; +} + +# print usage information for --help, etc. +sub showusage +{ + print <<"EOF"; +Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile] + -i, --input instructions file (default: $instrfile) + -t, --token token output file (default: $tokenfile) + -st, --sourcetoken token input file (default: $tokenfile.in) + -g, --grammar grammar output file (default: $grammarfile) + -sg, --sourcegrammar grammar input file (default: $grammarfile.in) + -v, --version show version and exit + -h, --help, --usage show this message and exit + -n, --dry-run verify input file without writing output files +EOF +} + +# read in instructions, and verify they're valid (well, mostly) +sub read_instructions ($) +{ + my $instrfile = shift || die; + open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; + my @instr; + my %instr; + my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG xrm64 + fpureg ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem + imm8 imm16 imm32 imm1632 imm64 imm80 imm128 + imm8x imm16x imm32x imm1632x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + ); + my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + ); + while () + { + next if /^\s*(?:;.*)$/; + if (m/^:(\w+)\t+(\w+)$/) + { + # TODO: actually handle this. Probably in lex. + # alias $1 -> $2 + next; + } + my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^nil|16|32|128$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + die "Invalid Effective Address\n" + if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; + push @instr, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + $instr{$inst} = 1; + } + close INPUT; + return (\@instr, \%instr); +} + +sub output_lex ($@) +{ + my $tokenfile = shift or die; + my $tokensource = shift; + $tokensource ||= "$tokenfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; + open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; + while () + { + # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content + if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + foreach my $inst (sort keys %$instrlist) + { + printf TOKEN "%-12s{ return %-16s }\n", $inst, "\Uins_$inst;\E"; + } + } + else + { + print TOKEN $_; + } + } + close IN; + close TOKEN; +} + +sub output_yacc ($@) +{ + my $grammarfile = shift or die; + my $grammarsource = shift; + $grammarsource ||= "$grammarfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; + open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; + + while () + { + if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) + { + my $len = length("%token"); + print GRAMMAR "%token"; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length("INS_$inst") < 76) + { + print GRAMMAR " INS_\U$inst\E"; + $len += length(" INS_$inst"); + } + else + { + print GRAMMAR "\n%token INS_\U$inst\E"; + $len = length("%token INS_$inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/}) + { + my $len = length("%type "); + print GRAMMAR "%type "; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length($inst) < 76) + { + print GRAMMAR " $inst"; + $len += length(" $inst"); + } + else + { + print GRAMMAR "\n%type $inst"; + $len = length("%type $inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + # list every kind of instruction that instrbase can be + print GRAMMAR "instrbase: ", + join( "\n | ", sort keys %$instrlist), "\n;\n"; + + # list the arguments and actions (buildbc) + foreach my $inst (@$instructions) + { + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Uins_$inst->[INST]\E"; + $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # opcode size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + $args[-1] =~ s/\$(\d+)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(effaddr *)NULL, 0/; + $args[-1] =~ s/\$(\d+)([ri])?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + $args[-1] =~ s/\$(\d+)(r)?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + $args[-1] =~ s/(\&\$\d+)(r)?/$1/; + $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # lame way first---rule: args { action }; for every version + # do the compression to |'s later + print GRAMMAR <<"EOF"; +$rule: $tokens { + $func (@args); + }; +EOF + } + } + else + { + print GRAMMAR $_; + } + } + close IN; + close GRAMMAR; +} diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl new file mode 100755 index 00000000..0d2cf4a5 --- /dev/null +++ b/src/parsers/nasm/gen_instr.pl @@ -0,0 +1,336 @@ +#!/usr/bin/perl -w +# $Id: gen_instr.pl,v 1.1 2001/05/21 21:46:33 mu Exp $ +# Generates bison.y and token.l from instrs.dat for YASM +# +# Copyright (C) 2001 Michael Urman +# +# This file is part of YASM. +# +# YASM is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# YASM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +use strict; +use Getopt::Long; +my $VERSION = "0.0.1"; + +# useful constants for instruction arrays +use constant INST => 0; +use constant OPERANDS => 1; +use constant OPSIZE => 2; +use constant OPCODE => 3; +use constant EFFADDR => 4; +use constant IMM => 5; +use constant CPU => 6; + +# default options +my $instrfile = 'instrs.dat'; +my $tokenfile = 'token.l'; +my $tokensource; +my $grammarfile = 'bison.y'; +my $grammarsource; +my $showversion; +my $showusage; +my $dry_run; + +# allow overrides +my $gotopts = GetOptions ( 'input=s' => \$instrfile, + 'token=s' => \$tokenfile, + 'sourcetoken=s' => \$tokensource, + 'grammar=s' => \$grammarfile, + 'sourcegrammar=s' => \$grammarsource, + 'version' => \$showversion, + 'n|dry-run' => \$dry_run, + 'help|usage' => \$showusage, + ); + +&showusage and exit 1 unless $gotopts; +&showversion if $showversion; +&showusage if $showusage; +exit 0 if $showversion or $showusage; + + +my ($instructions, $instrlist) = &read_instructions ($instrfile); + +exit 0 if $dry_run; # done with simple verification, so exit + +unless ($dry_run) +{ + &output_lex ($tokenfile, $tokensource, $instructions, $instrlist); + &output_yacc ($grammarfile, $grammarsource, $instructions, $instrlist); +} + +# print version for --version, etc. +sub showversion +{ + print "YASM gen_instr.pl $VERSION\n"; +} + +# print usage information for --help, etc. +sub showusage +{ + print <<"EOF"; +Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile] + -i, --input instructions file (default: $instrfile) + -t, --token token output file (default: $tokenfile) + -st, --sourcetoken token input file (default: $tokenfile.in) + -g, --grammar grammar output file (default: $grammarfile) + -sg, --sourcegrammar grammar input file (default: $grammarfile.in) + -v, --version show version and exit + -h, --help, --usage show this message and exit + -n, --dry-run verify input file without writing output files +EOF +} + +# read in instructions, and verify they're valid (well, mostly) +sub read_instructions ($) +{ + my $instrfile = shift || die; + open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; + my @instr; + my %instr; + my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG xrm64 + fpureg ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem + imm8 imm16 imm32 imm1632 imm64 imm80 imm128 + imm8x imm16x imm32x imm1632x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x + ); + my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + ); + while () + { + next if /^\s*(?:;.*)$/; + if (m/^:(\w+)\t+(\w+)$/) + { + # TODO: actually handle this. Probably in lex. + # alias $1 -> $2 + next; + } + my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid Operands\n" + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + die "Invalid Operation Size\n" + if $size !~ m/^nil|16|32|128$/oi; + die "Invalid Opcode\n" + if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + die "Invalid Effective Address\n" + if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + die "Invalid Immediate Operand\n" + if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; + push @instr, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + $instr{$inst} = 1; + } + close INPUT; + return (\@instr, \%instr); +} + +sub output_lex ($@) +{ + my $tokenfile = shift or die; + my $tokensource = shift; + $tokensource ||= "$tokenfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; + open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; + while () + { + # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content + if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + foreach my $inst (sort keys %$instrlist) + { + printf TOKEN "%-12s{ return %-16s }\n", $inst, "\Uins_$inst;\E"; + } + } + else + { + print TOKEN $_; + } + } + close IN; + close TOKEN; +} + +sub output_yacc ($@) +{ + my $grammarfile = shift or die; + my $grammarsource = shift; + $grammarsource ||= "$grammarfile.in"; + my $instructions = shift or die; + my $instrlist = shift or die; + + open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; + open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; + + while () + { + if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) + { + my $len = length("%token"); + print GRAMMAR "%token"; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length("INS_$inst") < 76) + { + print GRAMMAR " INS_\U$inst\E"; + $len += length(" INS_$inst"); + } + else + { + print GRAMMAR "\n%token INS_\U$inst\E"; + $len = length("%token INS_$inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/}) + { + my $len = length("%type "); + print GRAMMAR "%type "; + foreach my $inst (sort keys %$instrlist) + { + if ($len + length($inst) < 76) + { + print GRAMMAR " $inst"; + $len += length(" $inst"); + } + else + { + print GRAMMAR "\n%type $inst"; + $len = length("%type $inst"); + } + } + print GRAMMAR "\n"; + } + elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) + { + # list every kind of instruction that instrbase can be + print GRAMMAR "instrbase: ", + join( "\n | ", sort keys %$instrlist), "\n;\n"; + + # list the arguments and actions (buildbc) + foreach my $inst (@$instructions) + { + # build the instruction in pieces. + + # rulename = instruction + my $rule = "$inst->[INST]"; + + # tokens it eats: instruction and arguments + # nil => no arguments + my $tokens = "\Uins_$inst->[INST]\E"; + $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; + $tokens =~ s/,/ ',' /g; + + my $func = "BuildBC_Insn"; + + # Create the argument list for BuildBC + my @args; + + # First argument is always &$$ + push @args, '&$$,'; + + # opcode size + push @args, "$inst->[OPSIZE],"; + $args[-1] =~ s/nil/0/; + + # number of bytes of opcodes + push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ","; + + # opcode piece 1 (and 2 if attached) + push @args, $inst->[OPCODE]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; + $args[-1] =~ s/\$(\d+)/"\$" . ($1*2)/eg; + $args[-1] .= ','; + + # opcode piece 2 (if not attached) + push @args, "0," if $inst->[OPCODE] !~ m/,/; + + # effective addresses + push @args, $inst->[EFFADDR]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(effaddr *)NULL, 0/; + $args[-1] =~ s/\$(\d+)([ri])?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; + $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] + ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; + $args[-1] .= ','; + + die $args[-1] if $args[-1] =~ m/\d+[ri]/; + + # immediate sources + push @args, $inst->[IMM]; + $args[-1] =~ s/,/, /; + $args[-1] =~ s/nil/(immval *)NULL, 0/; + $args[-1] =~ s/\$(\d+)(r)?/"\$" . ($1*2) . ($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + $args[-1] =~ s[^([0-9A-Fa-f]+),] + [ConvertIntToImm((immval *)NULL, 0x$1),]; + + # divide the second, and only the second, by 8 bits/byte + $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; + $args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0'; + + $args[-1] =~ s/(\&\$\d+)(r)?/$1/; + $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; + + die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # lame way first---rule: args { action }; for every version + # do the compression to |'s later + print GRAMMAR <<"EOF"; +$rule: $tokens { + $func (@args); + }; +EOF + } + } + else + { + print GRAMMAR $_; + } + } + close IN; + close GRAMMAR; +}