From: Michael Urman Date: Thu, 5 Jul 2001 03:56:14 +0000 (-0000) Subject: Handle grouped instructions format. X-Git-Tag: v0.1.0~415 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2630eece5f9c50423ee585ab8ba65824cb61646c;p=yasm Handle grouped instructions format. svn path=/trunk/yasm/; revision=96 --- diff --git a/modules/parsers/nasm/gen_instr.pl b/modules/parsers/nasm/gen_instr.pl index 9f9af1c8..5ae6f558 100755 --- a/modules/parsers/nasm/gen_instr.pl +++ b/modules/parsers/nasm/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.7 2001/05/30 21:39:53 mu Exp $ +# $Id: gen_instr.pl,v 1.8 2001/07/05 03:56:14 mu Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -60,15 +60,44 @@ my $gotopts = GetOptions ( 'input=s' => \$instrfile, &showusage if $showusage; exit 0 if $showversion or $showusage; - -my $instrlist = &read_instructions ($instrfile); +# valid values for instrs.dat fields +my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG + fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm + imm8 imm16 imm32 imm64 imm80 imm128 + imm8x imm16x imm32x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x +); +my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + #0 #1 +); + +my ($groups) = &read_instructions ($instrfile); exit 0 if $dry_run; # done with simple verification, so exit unless ($dry_run) { - &output_lex ($tokenfile, $tokensource, $instrlist); - &output_yacc ($grammarfile, $grammarsource, $instrlist); + &output_lex ($tokenfile, $tokensource, $groups); + &output_yacc ($grammarfile, $grammarsource, $groups); } # print version for --version, etc. @@ -96,69 +125,98 @@ EOF # read in instructions, and verify they're valid (well, mostly) sub read_instructions ($) { - my $instrfile = shift || die; + our $instrfile = shift || die; open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; - my @instr; - my %instr; - my $valid_regs = join '|', qw( - REG_AL REG_AH REG_AX REG_EAX - REG_BL REG_BH REG_BX REG_EBX - REG_CL REG_CH REG_CX REG_ECX - REG_DL REG_DH REG_DX REG_EDX - REG_SI REG_ESI REG_DI REG_EDI - REG_BP REG_EBP - REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS - ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG - fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm - imm8 imm16 imm32 imm64 imm80 imm128 - imm8x imm16x imm32x imm64x imm80x imm128x - rm8 rm16 rm32 rm1632 rm64 rm80 rm128 - rm8x rm16x rm32x rm1632x rm64x rm80x rm128x - reg8 reg16 reg32 reg1632 reg64 reg80 reg128 - reg8x reg16x reg32x reg1632x reg64x reg80x reg128x - mem8 mem16 mem32 mem1632 mem64 mem80 mem128 - mem8x mem16x mem32x mem1632x mem64x mem80x mem128x - ); - my $valid_cpus = join '|', qw( - 8086 186 286 386 486 P4 P5 P6 - FPU MMX KATMAI SSE SSE2 - AMD ATHLON 3DNOW - SMM - CYRIX - UNDOC OBS PRIV PROT - ); - while () + our %instr; + our %groups; + + sub add_group_rule ($$) { - next if /^\s*(?:;.*)$/; - if (m/^:(\w+)\t+(\w+)$/) - { - $instr{$1} = $2; # simple scalar => alias - next; - } - my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + my ($inst, $args) = splice @_; + + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; eval { - die "Invalid instruction name\n" - if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; die "Invalid Operands\n" - if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; die "Invalid Operation Size\n" - if $size !~ m/^nil|16|32|128$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; - die "Invalid Effective Address\n" - if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + if $size !~ m/^nil|16|32|128$/oi; + # TODO: update these for $0.\d inclusion + #die "Invalid Opcode\n" + # if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + #die "Invalid Effective Address\n" + # if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + #die "Invalid Immediate Operand\n" + # if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; +# die "Multiple Definiton for alias $inst at $instrfile line $.\n" +# if exists $instr{$inst} and not ref $instr{$inst}; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } + + sub add_group_member ($$) + { + my ($handle, $fullargs) = splice @_; + + my ($inst, $group) = split /!/, $handle; + my ($args, $cpu) = split /\t+/, $fullargs; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $group !~ m/^\w+$/o; die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + warn "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n" + unless exists $groups{$group}; }; die "Malformed Instruction at $instrfile line $.: $@" if $@; - die "Multiple Definiton for alias $inst at $instrfile line $.\n" - if exists $instr{$inst} and not ref $instr{$inst}; - push @{$instr{$inst}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # only allow multiple instances of instructions that aren't of a group + die "Multiple Definiton for instruction $inst at $instrfile line $.\n" + if exists $instr{$inst} and not exists $groups{$inst}; + push @{$groups{$group}{members}}, [$inst, $group, $args, $cpu]; + $instr{$inst} = 1; + } + + while () + { + chomp; + next if /^\s*(?:;.*)$/; + + my ($handle, $args) = split /\t+/, $_, 2; + + # pseudo hack to handle original style instructions (no group) + if ($handle =~ m/^\w+$/) + { + # TODO: this has some long ranging effects, as the eventual + # bison rules get tagged when they don't need + # to, etc. Fix this sometime. + add_group_rule ("!$handle", $args); + add_group_member ("$handle!$handle", ""); + } + elsif ($handle =~ m/^!\w+$/) + { + add_group_rule ($handle, $args); + } + elsif ($handle =~ m/^\w+!\w+$/) + { + add_group_member ($handle, $args); + } + # TODO: consider if this is necessary: Pete? + # (add_group_member_synonym is -not- implemented) + #elsif ($handle =~ m/^:\w+$/) + #{ + # add_group_member_synonym ($handle, $args); + #} } close INPUT; - return (\%instr); + return (\%groups); } sub output_lex ($@) @@ -166,7 +224,7 @@ sub output_lex ($@) my $tokenfile = shift or die; my $tokensource = shift; $tokensource ||= "$tokenfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; @@ -175,11 +233,37 @@ sub output_lex ($@) # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) { - foreach my $inst (sort keys %$instrlist) + foreach my $grp (sort keys %$groups) { - printf TOKEN "%-12s{ return %-16s }\n", $inst, - (ref $instrlist->{$inst}) ? - "\Uins_$inst;\E" : "\Uins_$instrlist->{$inst};\E"; + my %printed; + my $group = $grp; $group =~ s/^!//; + + foreach my $grp (@{$groups->{$grp}{members}}) + { + unless (exists $printed{$grp->[0]}) + { + $printed{$grp->[0]} = 1; + my @groupdata; + if ($grp->[2]) + { + @groupdata = split ",", $grp->[2]; + # choke. gasp. yes, the .d\d array starts + # at 1 not 0. *glares* + for (my $i=1; $i <= @groupdata; ++$i) + { + $groupdata[$i-1] = " yylval.groupdata.d$i = 0x$groupdata[$i-1];"; + } + $groupdata[-1] .= "\n\t "; + } + printf TOKEN "%-12s{%s return %-20s }\n", + $grp->[0], + (join "\n\t ", @groupdata), + "\Ugrp_$group;\E"; + # TODO: change appropriate GRP_FOO back to + # INS_FOO's. not functionally important; + # just pedantically so. + } + } } } else @@ -237,7 +321,7 @@ sub output_yacc ($@) my $grammarfile = shift or die; my $grammarsource = shift; $grammarsource ||= "$grammarfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; @@ -246,19 +330,19 @@ sub output_yacc ($@) { if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) { - my $len = length("%token"); - print GRAMMAR "%token"; - foreach my $inst (sort keys %$instrlist) + my $len = length("%token "); + print GRAMMAR "%token "; + foreach my $group (sort keys %$groups) { - if ($len + length("INS_$inst") < 76) + if ($len + length("GRP_$group") < 76) { - print GRAMMAR " INS_\U$inst\E"; - $len += length(" INS_$inst"); + print GRAMMAR " GRP_\U$group\E"; + $len += length(" GRP_$group"); } else { - print GRAMMAR "\n%token INS_\U$inst\E"; - $len = length("%token INS_$inst"); + print GRAMMAR "\n%token GRP_\U$group\E"; + $len = length("%token GRP_$group"); } } print GRAMMAR "\n"; @@ -267,18 +351,18 @@ sub output_yacc ($@) { my $len = length("%type "); print GRAMMAR "%type "; - foreach my $inst (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { - next unless ref $instrlist->{$inst}; - if ($len + length($inst) < 76) + #next unless ref $instrlist->{$inst}; + if ($len + length($group) < 76) { - print GRAMMAR " $inst"; - $len += length(" $inst"); + print GRAMMAR " $group"; + $len += length(" $group"); } else { - print GRAMMAR "\n%type $inst"; - $len = length("%type $inst"); + print GRAMMAR "\n%type $group"; + $len = length("%type $group"); } } print GRAMMAR "\n"; @@ -287,12 +371,13 @@ sub output_yacc ($@) { # list every kind of instruction that instrbase can be print GRAMMAR "instrbase: ", - join( "\n | ", sort grep {ref $instrlist->{$_}} keys %$instrlist), "\n;\n"; + join( "\n | ", sort keys %$groups), "\n;\n"; my ($ONE, $AL, $AX, $EAX); # need the outer scope # list the arguments and actions (buildbc) - foreach my $instrname (sort keys %$instrlist) + #foreach my $instrname (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { # I'm still convinced this is a hack. The idea is if # within an instruction we see certain versions of the @@ -304,8 +389,8 @@ sub output_yacc ($@) # original version we would have otherwise. ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0); my $count = 0; - next unless ref $instrlist->{$instrname}; - foreach my $inst (@{$instrlist->{$instrname}}) { + #next unless ref $instrlist->{$instrname}; + foreach my $inst (@{$groups->{$group}{rules}}) { # build the instruction in pieces. # rulename = instruction @@ -313,7 +398,7 @@ sub output_yacc ($@) # tokens it eats: instruction and arguments # nil => no arguments - my $tokens = "\Uins_$inst->[INST]\E"; + my $tokens = "\Ugrp_$rule\E"; $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; $tokens =~ s/,/ ',' /g; my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args @@ -337,7 +422,8 @@ sub output_yacc ($@) push @args, $inst->[OPCODE]; $args[-1] =~ s/,/, /; $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $args[-1] =~ s/\$(\d+)/"\$" . ($1*2+$to)/eg; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; $args[-1] .= ','; # opcode piece 2 (if not attached) @@ -349,8 +435,9 @@ sub output_yacc ($@) push @args, $inst->[EFFADDR]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - $args[-1] =~ s/\$(\d+)([ri])?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; @@ -362,10 +449,13 @@ sub output_yacc ($@) push @args, $inst->[IMM]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(immval *)NULL, 0/; - $args[-1] =~ s/\$(\d+)(r)?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s[^([0-9A-Fa-f]+),] [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1.d$1),]; # divide the second, and only the second, by 8 bits/byte $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; @@ -375,6 +465,9 @@ sub output_yacc ($@) $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1.d$1/g foreach (@args); # see if we match one of the cases to defer if (($inst->[OPERANDS]||"") =~ m/,ONE/) @@ -451,7 +544,7 @@ sub output_yacc ($@) # print error action # ASSUMES: at least one previous action exists - print GRAMMAR " | \Uins_$instrname\E error {\n"; + print GRAMMAR " | \Ugrp_$group\E error {\n"; print GRAMMAR " Error (ERR_EXP_SYNTAX, (char *)NULL);\n"; print GRAMMAR " }\n"; diff --git a/src/gen_instr.pl b/src/gen_instr.pl index 9f9af1c8..5ae6f558 100755 --- a/src/gen_instr.pl +++ b/src/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.7 2001/05/30 21:39:53 mu Exp $ +# $Id: gen_instr.pl,v 1.8 2001/07/05 03:56:14 mu Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -60,15 +60,44 @@ my $gotopts = GetOptions ( 'input=s' => \$instrfile, &showusage if $showusage; exit 0 if $showversion or $showusage; - -my $instrlist = &read_instructions ($instrfile); +# valid values for instrs.dat fields +my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG + fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm + imm8 imm16 imm32 imm64 imm80 imm128 + imm8x imm16x imm32x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x +); +my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + #0 #1 +); + +my ($groups) = &read_instructions ($instrfile); exit 0 if $dry_run; # done with simple verification, so exit unless ($dry_run) { - &output_lex ($tokenfile, $tokensource, $instrlist); - &output_yacc ($grammarfile, $grammarsource, $instrlist); + &output_lex ($tokenfile, $tokensource, $groups); + &output_yacc ($grammarfile, $grammarsource, $groups); } # print version for --version, etc. @@ -96,69 +125,98 @@ EOF # read in instructions, and verify they're valid (well, mostly) sub read_instructions ($) { - my $instrfile = shift || die; + our $instrfile = shift || die; open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; - my @instr; - my %instr; - my $valid_regs = join '|', qw( - REG_AL REG_AH REG_AX REG_EAX - REG_BL REG_BH REG_BX REG_EBX - REG_CL REG_CH REG_CX REG_ECX - REG_DL REG_DH REG_DX REG_EDX - REG_SI REG_ESI REG_DI REG_EDI - REG_BP REG_EBP - REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS - ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG - fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm - imm8 imm16 imm32 imm64 imm80 imm128 - imm8x imm16x imm32x imm64x imm80x imm128x - rm8 rm16 rm32 rm1632 rm64 rm80 rm128 - rm8x rm16x rm32x rm1632x rm64x rm80x rm128x - reg8 reg16 reg32 reg1632 reg64 reg80 reg128 - reg8x reg16x reg32x reg1632x reg64x reg80x reg128x - mem8 mem16 mem32 mem1632 mem64 mem80 mem128 - mem8x mem16x mem32x mem1632x mem64x mem80x mem128x - ); - my $valid_cpus = join '|', qw( - 8086 186 286 386 486 P4 P5 P6 - FPU MMX KATMAI SSE SSE2 - AMD ATHLON 3DNOW - SMM - CYRIX - UNDOC OBS PRIV PROT - ); - while () + our %instr; + our %groups; + + sub add_group_rule ($$) { - next if /^\s*(?:;.*)$/; - if (m/^:(\w+)\t+(\w+)$/) - { - $instr{$1} = $2; # simple scalar => alias - next; - } - my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + my ($inst, $args) = splice @_; + + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; eval { - die "Invalid instruction name\n" - if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; die "Invalid Operands\n" - if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; die "Invalid Operation Size\n" - if $size !~ m/^nil|16|32|128$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; - die "Invalid Effective Address\n" - if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + if $size !~ m/^nil|16|32|128$/oi; + # TODO: update these for $0.\d inclusion + #die "Invalid Opcode\n" + # if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + #die "Invalid Effective Address\n" + # if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + #die "Invalid Immediate Operand\n" + # if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; +# die "Multiple Definiton for alias $inst at $instrfile line $.\n" +# if exists $instr{$inst} and not ref $instr{$inst}; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } + + sub add_group_member ($$) + { + my ($handle, $fullargs) = splice @_; + + my ($inst, $group) = split /!/, $handle; + my ($args, $cpu) = split /\t+/, $fullargs; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $group !~ m/^\w+$/o; die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + warn "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n" + unless exists $groups{$group}; }; die "Malformed Instruction at $instrfile line $.: $@" if $@; - die "Multiple Definiton for alias $inst at $instrfile line $.\n" - if exists $instr{$inst} and not ref $instr{$inst}; - push @{$instr{$inst}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # only allow multiple instances of instructions that aren't of a group + die "Multiple Definiton for instruction $inst at $instrfile line $.\n" + if exists $instr{$inst} and not exists $groups{$inst}; + push @{$groups{$group}{members}}, [$inst, $group, $args, $cpu]; + $instr{$inst} = 1; + } + + while () + { + chomp; + next if /^\s*(?:;.*)$/; + + my ($handle, $args) = split /\t+/, $_, 2; + + # pseudo hack to handle original style instructions (no group) + if ($handle =~ m/^\w+$/) + { + # TODO: this has some long ranging effects, as the eventual + # bison rules get tagged when they don't need + # to, etc. Fix this sometime. + add_group_rule ("!$handle", $args); + add_group_member ("$handle!$handle", ""); + } + elsif ($handle =~ m/^!\w+$/) + { + add_group_rule ($handle, $args); + } + elsif ($handle =~ m/^\w+!\w+$/) + { + add_group_member ($handle, $args); + } + # TODO: consider if this is necessary: Pete? + # (add_group_member_synonym is -not- implemented) + #elsif ($handle =~ m/^:\w+$/) + #{ + # add_group_member_synonym ($handle, $args); + #} } close INPUT; - return (\%instr); + return (\%groups); } sub output_lex ($@) @@ -166,7 +224,7 @@ sub output_lex ($@) my $tokenfile = shift or die; my $tokensource = shift; $tokensource ||= "$tokenfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; @@ -175,11 +233,37 @@ sub output_lex ($@) # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) { - foreach my $inst (sort keys %$instrlist) + foreach my $grp (sort keys %$groups) { - printf TOKEN "%-12s{ return %-16s }\n", $inst, - (ref $instrlist->{$inst}) ? - "\Uins_$inst;\E" : "\Uins_$instrlist->{$inst};\E"; + my %printed; + my $group = $grp; $group =~ s/^!//; + + foreach my $grp (@{$groups->{$grp}{members}}) + { + unless (exists $printed{$grp->[0]}) + { + $printed{$grp->[0]} = 1; + my @groupdata; + if ($grp->[2]) + { + @groupdata = split ",", $grp->[2]; + # choke. gasp. yes, the .d\d array starts + # at 1 not 0. *glares* + for (my $i=1; $i <= @groupdata; ++$i) + { + $groupdata[$i-1] = " yylval.groupdata.d$i = 0x$groupdata[$i-1];"; + } + $groupdata[-1] .= "\n\t "; + } + printf TOKEN "%-12s{%s return %-20s }\n", + $grp->[0], + (join "\n\t ", @groupdata), + "\Ugrp_$group;\E"; + # TODO: change appropriate GRP_FOO back to + # INS_FOO's. not functionally important; + # just pedantically so. + } + } } } else @@ -237,7 +321,7 @@ sub output_yacc ($@) my $grammarfile = shift or die; my $grammarsource = shift; $grammarsource ||= "$grammarfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; @@ -246,19 +330,19 @@ sub output_yacc ($@) { if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) { - my $len = length("%token"); - print GRAMMAR "%token"; - foreach my $inst (sort keys %$instrlist) + my $len = length("%token "); + print GRAMMAR "%token "; + foreach my $group (sort keys %$groups) { - if ($len + length("INS_$inst") < 76) + if ($len + length("GRP_$group") < 76) { - print GRAMMAR " INS_\U$inst\E"; - $len += length(" INS_$inst"); + print GRAMMAR " GRP_\U$group\E"; + $len += length(" GRP_$group"); } else { - print GRAMMAR "\n%token INS_\U$inst\E"; - $len = length("%token INS_$inst"); + print GRAMMAR "\n%token GRP_\U$group\E"; + $len = length("%token GRP_$group"); } } print GRAMMAR "\n"; @@ -267,18 +351,18 @@ sub output_yacc ($@) { my $len = length("%type "); print GRAMMAR "%type "; - foreach my $inst (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { - next unless ref $instrlist->{$inst}; - if ($len + length($inst) < 76) + #next unless ref $instrlist->{$inst}; + if ($len + length($group) < 76) { - print GRAMMAR " $inst"; - $len += length(" $inst"); + print GRAMMAR " $group"; + $len += length(" $group"); } else { - print GRAMMAR "\n%type $inst"; - $len = length("%type $inst"); + print GRAMMAR "\n%type $group"; + $len = length("%type $group"); } } print GRAMMAR "\n"; @@ -287,12 +371,13 @@ sub output_yacc ($@) { # list every kind of instruction that instrbase can be print GRAMMAR "instrbase: ", - join( "\n | ", sort grep {ref $instrlist->{$_}} keys %$instrlist), "\n;\n"; + join( "\n | ", sort keys %$groups), "\n;\n"; my ($ONE, $AL, $AX, $EAX); # need the outer scope # list the arguments and actions (buildbc) - foreach my $instrname (sort keys %$instrlist) + #foreach my $instrname (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { # I'm still convinced this is a hack. The idea is if # within an instruction we see certain versions of the @@ -304,8 +389,8 @@ sub output_yacc ($@) # original version we would have otherwise. ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0); my $count = 0; - next unless ref $instrlist->{$instrname}; - foreach my $inst (@{$instrlist->{$instrname}}) { + #next unless ref $instrlist->{$instrname}; + foreach my $inst (@{$groups->{$group}{rules}}) { # build the instruction in pieces. # rulename = instruction @@ -313,7 +398,7 @@ sub output_yacc ($@) # tokens it eats: instruction and arguments # nil => no arguments - my $tokens = "\Uins_$inst->[INST]\E"; + my $tokens = "\Ugrp_$rule\E"; $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; $tokens =~ s/,/ ',' /g; my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args @@ -337,7 +422,8 @@ sub output_yacc ($@) push @args, $inst->[OPCODE]; $args[-1] =~ s/,/, /; $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $args[-1] =~ s/\$(\d+)/"\$" . ($1*2+$to)/eg; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; $args[-1] .= ','; # opcode piece 2 (if not attached) @@ -349,8 +435,9 @@ sub output_yacc ($@) push @args, $inst->[EFFADDR]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - $args[-1] =~ s/\$(\d+)([ri])?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; @@ -362,10 +449,13 @@ sub output_yacc ($@) push @args, $inst->[IMM]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(immval *)NULL, 0/; - $args[-1] =~ s/\$(\d+)(r)?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s[^([0-9A-Fa-f]+),] [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1.d$1),]; # divide the second, and only the second, by 8 bits/byte $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; @@ -375,6 +465,9 @@ sub output_yacc ($@) $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1.d$1/g foreach (@args); # see if we match one of the cases to defer if (($inst->[OPERANDS]||"") =~ m/,ONE/) @@ -451,7 +544,7 @@ sub output_yacc ($@) # print error action # ASSUMES: at least one previous action exists - print GRAMMAR " | \Uins_$instrname\E error {\n"; + print GRAMMAR " | \Ugrp_$group\E error {\n"; print GRAMMAR " Error (ERR_EXP_SYNTAX, (char *)NULL);\n"; print GRAMMAR " }\n"; diff --git a/src/parsers/nasm/gen_instr.pl b/src/parsers/nasm/gen_instr.pl index 9f9af1c8..5ae6f558 100755 --- a/src/parsers/nasm/gen_instr.pl +++ b/src/parsers/nasm/gen_instr.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# $Id: gen_instr.pl,v 1.7 2001/05/30 21:39:53 mu Exp $ +# $Id: gen_instr.pl,v 1.8 2001/07/05 03:56:14 mu Exp $ # Generates bison.y and token.l from instrs.dat for YASM # # Copyright (C) 2001 Michael Urman @@ -60,15 +60,44 @@ my $gotopts = GetOptions ( 'input=s' => \$instrfile, &showusage if $showusage; exit 0 if $showversion or $showusage; - -my $instrlist = &read_instructions ($instrfile); +# valid values for instrs.dat fields +my $valid_regs = join '|', qw( + REG_AL REG_AH REG_AX REG_EAX + REG_BL REG_BH REG_BX REG_EBX + REG_CL REG_CH REG_CX REG_ECX + REG_DL REG_DH REG_DX REG_EDX + REG_SI REG_ESI REG_DI REG_EDI + REG_BP REG_EBP + REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS + ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG + fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm + imm8 imm16 imm32 imm64 imm80 imm128 + imm8x imm16x imm32x imm64x imm80x imm128x + rm8 rm16 rm32 rm1632 rm64 rm80 rm128 + rm8x rm16x rm32x rm1632x rm64x rm80x rm128x + reg8 reg16 reg32 reg1632 reg64 reg80 reg128 + reg8x reg16x reg32x reg1632x reg64x reg80x reg128x + mem8 mem16 mem32 mem1632 mem64 mem80 mem128 + mem8x mem16x mem32x mem1632x mem64x mem80x mem128x +); +my $valid_cpus = join '|', qw( + 8086 186 286 386 486 P4 P5 P6 + FPU MMX KATMAI SSE SSE2 + AMD ATHLON 3DNOW + SMM + CYRIX + UNDOC OBS PRIV PROT + #0 #1 +); + +my ($groups) = &read_instructions ($instrfile); exit 0 if $dry_run; # done with simple verification, so exit unless ($dry_run) { - &output_lex ($tokenfile, $tokensource, $instrlist); - &output_yacc ($grammarfile, $grammarsource, $instrlist); + &output_lex ($tokenfile, $tokensource, $groups); + &output_yacc ($grammarfile, $grammarsource, $groups); } # print version for --version, etc. @@ -96,69 +125,98 @@ EOF # read in instructions, and verify they're valid (well, mostly) sub read_instructions ($) { - my $instrfile = shift || die; + our $instrfile = shift || die; open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n"; - my @instr; - my %instr; - my $valid_regs = join '|', qw( - REG_AL REG_AH REG_AX REG_EAX - REG_BL REG_BH REG_BX REG_EBX - REG_CL REG_CH REG_CX REG_ECX - REG_DL REG_DH REG_DX REG_EDX - REG_SI REG_ESI REG_DI REG_EDI - REG_BP REG_EBP - REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS - ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG - fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm - imm8 imm16 imm32 imm64 imm80 imm128 - imm8x imm16x imm32x imm64x imm80x imm128x - rm8 rm16 rm32 rm1632 rm64 rm80 rm128 - rm8x rm16x rm32x rm1632x rm64x rm80x rm128x - reg8 reg16 reg32 reg1632 reg64 reg80 reg128 - reg8x reg16x reg32x reg1632x reg64x reg80x reg128x - mem8 mem16 mem32 mem1632 mem64 mem80 mem128 - mem8x mem16x mem32x mem1632x mem64x mem80x mem128x - ); - my $valid_cpus = join '|', qw( - 8086 186 286 386 486 P4 P5 P6 - FPU MMX KATMAI SSE SSE2 - AMD ATHLON 3DNOW - SMM - CYRIX - UNDOC OBS PRIV PROT - ); - while () + our %instr; + our %groups; + + sub add_group_rule ($$) { - next if /^\s*(?:;.*)$/; - if (m/^:(\w+)\t+(\w+)$/) - { - $instr{$1} = $2; # simple scalar => alias - next; - } - my ($inst, $op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/; + my ($inst, $args) = splice @_; + + my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args; eval { - die "Invalid instruction name\n" - if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $inst !~ m/^!\w+$/o; die "Invalid Operands\n" - if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; + if $op !~ m/^(?:TO\s)?nil|(?:$valid_regs)(?:,(?:$valid_regs)){0,2}$/oi; die "Invalid Operation Size\n" - if $size !~ m/^nil|16|32|128$/oi; - die "Invalid Opcode\n" - if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; - die "Invalid Effective Address\n" - if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; - die "Invalid Immediate Operand\n" - if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + if $size !~ m/^nil|16|32|128$/oi; + # TODO: update these for $0.\d inclusion + #die "Invalid Opcode\n" + # if $opcode !~ m/[0-9A-F]{2}(,[0-9A-F]{2})?(\+\$\d)?/oi; + #die "Invalid Effective Address\n" + # if $eff !~ m/nil|(\$?\d[ir]?(,\$?\d))/oi; + #die "Invalid Immediate Operand\n" + # if $imm !~ m/nil|((\$\d[r]?|8|16|32|[0-9A-F]{2})(,\$\d|(8|16|32[s]?))?)/oi; + die "Invalid CPU\n" + if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + }; + die "Malformed Instruction at $instrfile line $.: $@" if $@; +# die "Multiple Definiton for alias $inst at $instrfile line $.\n" +# if exists $instr{$inst} and not ref $instr{$inst}; + # knock the ! off of $inst for the groupname + $inst = substr $inst, 1; + push @{$groups{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + } + + sub add_group_member ($$) + { + my ($handle, $fullargs) = splice @_; + + my ($inst, $group) = split /!/, $handle; + my ($args, $cpu) = split /\t+/, $fullargs; + eval { + die "Invalid instruction name\n" + if $inst !~ m/^\w+$/o; + die "Invalid group name\n" + if $group !~ m/^\w+$/o; die "Invalid CPU\n" - if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o; + warn "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n" + unless exists $groups{$group}; }; die "Malformed Instruction at $instrfile line $.: $@" if $@; - die "Multiple Definiton for alias $inst at $instrfile line $.\n" - if exists $instr{$inst} and not ref $instr{$inst}; - push @{$instr{$inst}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu]; + # only allow multiple instances of instructions that aren't of a group + die "Multiple Definiton for instruction $inst at $instrfile line $.\n" + if exists $instr{$inst} and not exists $groups{$inst}; + push @{$groups{$group}{members}}, [$inst, $group, $args, $cpu]; + $instr{$inst} = 1; + } + + while () + { + chomp; + next if /^\s*(?:;.*)$/; + + my ($handle, $args) = split /\t+/, $_, 2; + + # pseudo hack to handle original style instructions (no group) + if ($handle =~ m/^\w+$/) + { + # TODO: this has some long ranging effects, as the eventual + # bison rules get tagged when they don't need + # to, etc. Fix this sometime. + add_group_rule ("!$handle", $args); + add_group_member ("$handle!$handle", ""); + } + elsif ($handle =~ m/^!\w+$/) + { + add_group_rule ($handle, $args); + } + elsif ($handle =~ m/^\w+!\w+$/) + { + add_group_member ($handle, $args); + } + # TODO: consider if this is necessary: Pete? + # (add_group_member_synonym is -not- implemented) + #elsif ($handle =~ m/^:\w+$/) + #{ + # add_group_member_synonym ($handle, $args); + #} } close INPUT; - return (\%instr); + return (\%groups); } sub output_lex ($@) @@ -166,7 +224,7 @@ sub output_lex ($@) my $tokenfile = shift or die; my $tokensource = shift; $tokensource ||= "$tokenfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n"; open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n"; @@ -175,11 +233,37 @@ sub output_lex ($@) # Replace token.l.in /* @INSTRUCTIONS@ */ with generated content if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/}) { - foreach my $inst (sort keys %$instrlist) + foreach my $grp (sort keys %$groups) { - printf TOKEN "%-12s{ return %-16s }\n", $inst, - (ref $instrlist->{$inst}) ? - "\Uins_$inst;\E" : "\Uins_$instrlist->{$inst};\E"; + my %printed; + my $group = $grp; $group =~ s/^!//; + + foreach my $grp (@{$groups->{$grp}{members}}) + { + unless (exists $printed{$grp->[0]}) + { + $printed{$grp->[0]} = 1; + my @groupdata; + if ($grp->[2]) + { + @groupdata = split ",", $grp->[2]; + # choke. gasp. yes, the .d\d array starts + # at 1 not 0. *glares* + for (my $i=1; $i <= @groupdata; ++$i) + { + $groupdata[$i-1] = " yylval.groupdata.d$i = 0x$groupdata[$i-1];"; + } + $groupdata[-1] .= "\n\t "; + } + printf TOKEN "%-12s{%s return %-20s }\n", + $grp->[0], + (join "\n\t ", @groupdata), + "\Ugrp_$group;\E"; + # TODO: change appropriate GRP_FOO back to + # INS_FOO's. not functionally important; + # just pedantically so. + } + } } } else @@ -237,7 +321,7 @@ sub output_yacc ($@) my $grammarfile = shift or die; my $grammarsource = shift; $grammarsource ||= "$grammarfile.in"; - my $instrlist = shift or die; + my $groups = shift or die; open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n"; open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n"; @@ -246,19 +330,19 @@ sub output_yacc ($@) { if (m{/[*]\s*[@]TOKENS[@]\s*[*]/}) { - my $len = length("%token"); - print GRAMMAR "%token"; - foreach my $inst (sort keys %$instrlist) + my $len = length("%token "); + print GRAMMAR "%token "; + foreach my $group (sort keys %$groups) { - if ($len + length("INS_$inst") < 76) + if ($len + length("GRP_$group") < 76) { - print GRAMMAR " INS_\U$inst\E"; - $len += length(" INS_$inst"); + print GRAMMAR " GRP_\U$group\E"; + $len += length(" GRP_$group"); } else { - print GRAMMAR "\n%token INS_\U$inst\E"; - $len = length("%token INS_$inst"); + print GRAMMAR "\n%token GRP_\U$group\E"; + $len = length("%token GRP_$group"); } } print GRAMMAR "\n"; @@ -267,18 +351,18 @@ sub output_yacc ($@) { my $len = length("%type "); print GRAMMAR "%type "; - foreach my $inst (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { - next unless ref $instrlist->{$inst}; - if ($len + length($inst) < 76) + #next unless ref $instrlist->{$inst}; + if ($len + length($group) < 76) { - print GRAMMAR " $inst"; - $len += length(" $inst"); + print GRAMMAR " $group"; + $len += length(" $group"); } else { - print GRAMMAR "\n%type $inst"; - $len = length("%type $inst"); + print GRAMMAR "\n%type $group"; + $len = length("%type $group"); } } print GRAMMAR "\n"; @@ -287,12 +371,13 @@ sub output_yacc ($@) { # list every kind of instruction that instrbase can be print GRAMMAR "instrbase: ", - join( "\n | ", sort grep {ref $instrlist->{$_}} keys %$instrlist), "\n;\n"; + join( "\n | ", sort keys %$groups), "\n;\n"; my ($ONE, $AL, $AX, $EAX); # need the outer scope # list the arguments and actions (buildbc) - foreach my $instrname (sort keys %$instrlist) + #foreach my $instrname (sort keys %$instrlist) + foreach my $group (sort keys %$groups) { # I'm still convinced this is a hack. The idea is if # within an instruction we see certain versions of the @@ -304,8 +389,8 @@ sub output_yacc ($@) # original version we would have otherwise. ($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0); my $count = 0; - next unless ref $instrlist->{$instrname}; - foreach my $inst (@{$instrlist->{$instrname}}) { + #next unless ref $instrlist->{$instrname}; + foreach my $inst (@{$groups->{$group}{rules}}) { # build the instruction in pieces. # rulename = instruction @@ -313,7 +398,7 @@ sub output_yacc ($@) # tokens it eats: instruction and arguments # nil => no arguments - my $tokens = "\Uins_$inst->[INST]\E"; + my $tokens = "\Ugrp_$rule\E"; $tokens .= " $inst->[OPERANDS]" if $inst->[OPERANDS] ne 'nil'; $tokens =~ s/,/ ',' /g; my $to = $tokens =~ m/\bTO\b/ ? 1 : 0; # offset args @@ -337,7 +422,8 @@ sub output_yacc ($@) push @args, $inst->[OPCODE]; $args[-1] =~ s/,/, /; $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g; - $args[-1] =~ s/\$(\d+)/"\$" . ($1*2+$to)/eg; + # don't match $0.\d in the following rule. + $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg; $args[-1] .= ','; # opcode piece 2 (if not attached) @@ -349,8 +435,9 @@ sub output_yacc ($@) push @args, $inst->[EFFADDR]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(effaddr *)NULL, 0/; - $args[-1] =~ s/\$(\d+)([ri])?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+[ri]?)/\&$1/; # Just the first! + # don't let a $0.\d match slip into the following rules. + $args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s/\&(\$\d+)r/ConvertRegToEA((effaddr *)NULL, $1)/; $args[-1] =~ s[\&(\$\d+)i,\s*(\d+)] ["ConvertImmToEA((effaddr *)NULL, \&$1, ".($2/8)."), 0"]e; @@ -362,10 +449,13 @@ sub output_yacc ($@) push @args, $inst->[IMM]; $args[-1] =~ s/,/, /; $args[-1] =~ s/nil/(immval *)NULL, 0/; - $args[-1] =~ s/\$(\d+)(r)?/"\$".($1*2+$to).($2||'')/eg; - $args[-1] =~ s/(\$\d+r?)/\&$1/; # Just the first! + # don't match $0.\d in the following rules. + $args[-1] =~ s/\$(\d+)(r)?(?!\.)/"\$".($1*2+$to).($2||'')/eg; + $args[-1] =~ s/(\$\d+r?)(?!\.)/\&$1/; # Just the first! $args[-1] =~ s[^([0-9A-Fa-f]+),] [ConvertIntToImm((immval *)NULL, 0x$1),]; + $args[-1] =~ s[^\$0.(\d+),] + [ConvertIntToImm((immval *)NULL, \$1.d$1),]; # divide the second, and only the second, by 8 bits/byte $args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg; @@ -375,6 +465,9 @@ sub output_yacc ($@) $args[-1] .= ($2||'') eq 'r' ? ', 1' : ', 0'; die $args[-1] if $args[-1] =~ m/\d+[ris]/; + + # now that we've constructed the arglist, subst $0.\d + s/\$0\.(\d+)/\$1.d$1/g foreach (@args); # see if we match one of the cases to defer if (($inst->[OPERANDS]||"") =~ m/,ONE/) @@ -451,7 +544,7 @@ sub output_yacc ($@) # print error action # ASSUMES: at least one previous action exists - print GRAMMAR " | \Uins_$instrname\E error {\n"; + print GRAMMAR " | \Ugrp_$group\E error {\n"; print GRAMMAR " Error (ERR_EXP_SYNTAX, (char *)NULL);\n"; print GRAMMAR " }\n";