From 623647c9607afee8a3010494ccc52cb3c1d2c09b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 3 Oct 2017 00:06:41 +0000 Subject: [PATCH] AMDGPU: Remove global isGCN predicates These are problematic because they apply to everything, and can easily clobber whatever more specific predicate you are trying to add to a function. Currently instructions use SubtargetPredicate/PredicateControl to apply this to patterns applied to an instruction definition, but not to free standing Pats. Add a wrapper around Pat so the special PredicateControls requirements can be appended to the final predicate list like how Mips does it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314742 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.td | 14 +- lib/Target/AMDGPU/AMDGPUInstructions.td | 50 ++-- lib/Target/AMDGPU/BUFInstructions.td | 112 ++++---- lib/Target/AMDGPU/CaymanInstructions.td | 42 +-- lib/Target/AMDGPU/DSInstructions.td | 26 +- lib/Target/AMDGPU/EvergreenInstructions.td | 71 ++--- lib/Target/AMDGPU/FLATInstructions.td | 38 +-- lib/Target/AMDGPU/MIMGInstructions.td | 26 +- lib/Target/AMDGPU/R600InstrFormats.td | 14 +- lib/Target/AMDGPU/R600Instructions.td | 75 ++--- lib/Target/AMDGPU/SIInstrFormats.td | 9 + lib/Target/AMDGPU/SIInstructions.td | 301 +++++++++++---------- lib/Target/AMDGPU/SMInstructions.td | 30 +- lib/Target/AMDGPU/SOPInstructions.td | 24 +- lib/Target/AMDGPU/VOP1Instructions.td | 22 +- lib/Target/AMDGPU/VOP2Instructions.td | 28 +- lib/Target/AMDGPU/VOP3Instructions.td | 8 +- lib/Target/AMDGPU/VOP3PInstructions.td | 10 +- lib/Target/AMDGPU/VOPCInstructions.td | 8 +- 19 files changed, 466 insertions(+), 442 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 7ec786fe71f..1b17208200c 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -724,18 +724,28 @@ def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, def HasMadMix : Predicate<"Subtarget->hasMadMixInsts()">, AssemblerPredicate<"FeatureGFX9Insts">; + +// Exists to help track down where SubtargetPredicate isn't set rather +// than letting tablegen crash with an unhelpful error. +def InvalidPred : Predicate<"predicate not set on instruction or pattern">; + class PredicateControl { - Predicate SubtargetPredicate; + Predicate SubtargetPredicate = InvalidPred; Predicate SIAssemblerPredicate = isSICI; Predicate VIAssemblerPredicate = isVI; list AssemblerPredicates = []; Predicate AssemblerPredicate = TruePredicate; list OtherPredicates = []; - list Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate], + list Predicates = !listconcat([SubtargetPredicate, + AssemblerPredicate], AssemblerPredicates, OtherPredicates); } +class AMDGPUPat : Pat, + PredicateControl; + + // Include AMDGPU TD files include "R600Schedule.td" include "SISchedule.td" diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 6d388e48b76..636dc7364b3 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -505,7 +505,7 @@ def FP_HALF : PatLeaf < /* -------------------------------------- */ class POW_Common - : Pat < + : AMDGPUPat < (fpow f32:$src0, f32:$src1), (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) >; @@ -516,30 +516,34 @@ class POW_Common /* Extract element pattern */ class Extract_Element - : Pat< + : AMDGPUPat< (sub_type (extractelt vec_type:$src, sub_idx)), (EXTRACT_SUBREG $src, sub_reg) ->; +> { + let SubtargetPredicate = TruePredicate; +} /* Insert element pattern */ class Insert_Element - : Pat < + : AMDGPUPat < (insertelt vec_type:$vec, elem_type:$elem, sub_idx), (INSERT_SUBREG $vec, $elem, sub_reg) ->; +> { + let SubtargetPredicate = TruePredicate; +} // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. // bitconvert pattern -class BitConvert : Pat < +class BitConvert : AMDGPUPat < (dt (bitconvert (st rc:$src0))), (dt rc:$src0) >; // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. -class DwordAddrPat : Pat < +class DwordAddrPat : AMDGPUPat < (vt (AMDGPUdwordaddr (vt rc:$addr))), (vt rc:$addr) >; @@ -551,30 +555,30 @@ multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) - def : Pat < + def : AMDGPUPat < (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), (BFI_INT $x, $y, $z) >; // SHA-256 Ch function // z ^ (x & (y ^ z)) - def : Pat < + def : AMDGPUPat < (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), (BFI_INT $x, $y, $z) >; - def : Pat < + def : AMDGPUPat < (fcopysign f32:$src0, f32:$src1), (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1) >; - def : Pat < + def : AMDGPUPat < (f32 (fcopysign f32:$src0, f64:$src1)), (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, (i32 (EXTRACT_SUBREG $src1, sub1))) >; - def : Pat < + def : AMDGPUPat < (f64 (fcopysign f64:$src0, f64:$src1)), (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -583,7 +587,7 @@ multiclass BFIPatterns ; - def : Pat < + def : AMDGPUPat < (f64 (fcopysign f64:$src0, f32:$src1)), (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -596,7 +600,7 @@ multiclass BFIPatterns BFI_INT (XOR x, y), z, y -class SHA256MaPattern : Pat < +class SHA256MaPattern : AMDGPUPat < (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) >; @@ -613,24 +617,24 @@ def IMMPopCount : SDNodeXForm; multiclass BFEPattern { - def : Pat < + def : AMDGPUPat < (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; - def : Pat < + def : AMDGPUPat < (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), (UBFE $src, (i32 0), $width) >; - def : Pat < + def : AMDGPUPat < (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), (SBFE $src, (i32 0), $width) >; } // rotr pattern -class ROTRPattern : Pat < +class ROTRPattern : AMDGPUPat < (rotr i32:$src0, i32:$src1), (BIT_ALIGN $src0, $src0, $src1) >; @@ -641,7 +645,7 @@ class IntMed3Pat : Pat< + ValueType vt = i32> : AMDGPUPat< (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst $src0, $src1, $src2) @@ -661,24 +665,24 @@ def cvt_flr_i32_f32 : PatFrag < [{ (void)N; return TM.Options.NoNaNsFPMath; }] >; -class IMad24Pat : Pat < +class IMad24Pat : AMDGPUPat < (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), (Inst $src0, $src1, $src2)) >; -class UMad24Pat : Pat < +class UMad24Pat : AMDGPUPat < (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), (Inst $src0, $src1, $src2)) >; -class RcpPat : Pat < +class RcpPat : AMDGPUPat < (fdiv FP_ONE, vt:$src), (RcpInst $src) >; -class RsqPat : Pat < +class RsqPat : AMDGPUPat < (AMDGPUrcp (fsqrt vt:$src)), (RsqInst $src) >; diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 076ce0f0cc4..6eb39aee893 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -647,8 +647,6 @@ multiclass MUBUF_Pseudo_Atomics ; @@ -862,8 +860,6 @@ defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; -} // End let SubtargetPredicate = isGCN - let SubtargetPredicate = isCIVI in { //===----------------------------------------------------------------------===// @@ -882,10 +878,8 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", // MUBUF Patterns //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - // Offset in an 32-bit VGPR -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0) >; @@ -897,7 +891,7 @@ def : Pat < multiclass MUBUF_LoadIntrinsicPat { - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc)), @@ -905,7 +899,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc)), @@ -913,7 +907,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc)), @@ -921,7 +915,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc)), @@ -941,7 +935,7 @@ defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), @@ -949,7 +943,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), @@ -958,7 +952,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), @@ -967,7 +961,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), @@ -991,7 +985,7 @@ defm : MUBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -999,7 +993,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -1007,7 +1001,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), @@ -1015,7 +1009,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), @@ -1037,7 +1031,7 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), @@ -1049,7 +1043,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), @@ -1061,7 +1055,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), @@ -1073,7 +1067,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), @@ -1088,7 +1082,7 @@ def : Pat< class MUBUFLoad_PatternADDR64 : Pat < + PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) @@ -1096,19 +1090,19 @@ class MUBUFLoad_PatternADDR64 { - def : Pat < + def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; - def : Pat < + def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } -let Predicates = [isSICI] in { +let SubtargetPredicate = isSICI in { def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; @@ -1116,19 +1110,19 @@ def : MUBUFLoad_PatternADDR64 ; defm : MUBUFLoad_Atomic_Pattern ; -} // End Predicates = [isSICI] +} // End SubtargetPredicate = isSICI multiclass MUBUFLoad_Pattern { - def : Pat < + def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe) >; } -let Predicates = [Has16BitInsts] in { +let OtherPredicates = [Has16BitInsts] in { defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; @@ -1137,18 +1131,18 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; -} // End Predicates = [Has16BitInsts] +} // End OtherPredicates = [Has16BitInsts] multiclass MUBUFScratchLoadPat { - def : Pat < + def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; - def : Pat < + def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) >; @@ -1158,25 +1152,25 @@ multiclass MUBUFScratchLoadPat { - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)))), (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)))))), (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))), (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))))), (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; @@ -1193,7 +1187,7 @@ defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { defm : MUBUFScratchLoadPat_Hi16; defm : MUBUFScratchLoadPat_Hi16; defm : MUBUFScratchLoadPat_Hi16; @@ -1206,7 +1200,7 @@ multiclass MUBUF_Load_Dword { - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), @@ -1214,7 +1208,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), @@ -1222,7 +1216,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), @@ -1230,7 +1224,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm:$offset, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), @@ -1249,27 +1243,27 @@ defm : MUBUF_Load_Dword { // Store follows atomic op convention so address is forst - def : Pat < + def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; - def : Pat < + def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } -let Predicates = [isSICI] in { +let SubtargetPredicate = isSICI in { defm : MUBUFStore_Atomic_Pattern ; defm : MUBUFStore_Atomic_Pattern ; -} // End Predicates = [isSICI] +} // End Predicates = isSICI multiclass MUBUFStore_Pattern { - def : Pat < + def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)), (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe) @@ -1282,13 +1276,13 @@ defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat { - def : Pat < + def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; - def : Pat < + def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) @@ -1304,7 +1298,7 @@ defm : MUBUFScratchStorePat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { @@ -1323,28 +1317,28 @@ defm : MUBUFScratchStorePat { - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _BOTHEN) @@ -1363,7 +1357,7 @@ defm : MTBUF_LoadIntrinsicPat multiclass MTBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, @@ -1372,7 +1366,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, @@ -1381,7 +1375,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, @@ -1390,7 +1384,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _BOTHEN_exact) @@ -1410,8 +1404,6 @@ defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; -} // End let Predicates = [isGCN] - //===----------------------------------------------------------------------===// // Target instructions, move to the appropriate target TD file //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td index dd21946c7c3..0ba5acad680 100644 --- a/lib/Target/AMDGPU/CaymanInstructions.td +++ b/lib/Target/AMDGPU/CaymanInstructions.td @@ -18,7 +18,7 @@ def isCayman : Predicate<"Subtarget->hasCaymanISA()">; // Cayman Instructions //===----------------------------------------------------------------------===// -let Predicates = [isCayman] in { +let SubtargetPredicate = isCayman in { def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU @@ -57,20 +57,21 @@ defm DIV_cm : DIV_Common; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range -def : Pat < +def : R600Pat < (AMDGPUurecip i32:$src0), (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { +def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { let ADDR = 0; let POP_COUNT = 0; let COUNT = 0; } -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + +def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; class RAT_STORE_DWORD mask> : CF_MEM_RAT_CACHELESS <0x14, 0, mask, @@ -179,44 +180,43 @@ def VTX_READ_128_cm //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 3)>; -def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 3)>; -def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 3)>; //===----------------------------------------------------------------------===// // VTX Read from constant memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 2)>; -def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 2)>; -def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 2)>; //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 1)>; -def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 1)>; -def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 1)>; -} // End isCayman - +} // End let SubtargetPredicate = isCayman diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index e66bf402178..4fbef5ca82d 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -537,25 +537,23 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32", // DS Patterns //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - -def : Pat < +def : GCNPat < (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; -class DSReadPat : Pat < +class DSReadPat : GCNPat < (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), (inst $ptr, (as_i16imm $offset), (i1 0)) >; multiclass DSReadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))), (v2i16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))))), (v2f16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo)) >; @@ -577,14 +575,14 @@ def : DSReadPat ; } // End AddedComplexity = 100 -def : Pat < +def : GCNPat < (v2i32 (load_local_m0 (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0)) >; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { let AddedComplexity = 100 in { defm : DSReadPat_Hi16; defm : DSReadPat_Hi16; @@ -592,7 +590,7 @@ defm : DSReadPat_Hi16; } } -class DSWritePat : Pat < +class DSWritePat : GCNPat < (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; @@ -603,7 +601,7 @@ def : DSWritePat ; def : DSWritePat ; def : DSWritePat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : DSWritePat ; def : DSWritePat ; } @@ -613,7 +611,7 @@ let AddedComplexity = 100 in { def : DSWritePat ; } // End AddedComplexity = 100 -def : Pat < +def : GCNPat < (store_local_m0 v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), @@ -621,12 +619,12 @@ def : Pat < (i1 0)) >; -class DSAtomicRetPat : Pat < +class DSAtomicRetPat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; -class DSAtomicCmpXChg : Pat < +class DSAtomicCmpXChg : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0)) >; @@ -663,8 +661,6 @@ def : DSAtomicRetPat; def : DSAtomicCmpXChg; -} // let Predicates = [isGCN] - //===----------------------------------------------------------------------===// // Real instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 52038db7150..ab980f64ba8 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -15,20 +15,28 @@ def isEG : Predicate< "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " - "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " + "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && " "!Subtarget->hasCaymanISA()" >; def isEGorCayman : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" - "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS" + "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" >; +class EGPat : AMDGPUPat { + let SubtargetPredicate = isEG; +} + +class EGOrCaymanPat : AMDGPUPat { + let SubtargetPredicate = isEGorCayman; +} + //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// -let Predicates = [isEGorCayman] in { +let SubtargetPredicate = isEGorCayman in { class CF_MEM_RAT_CACHELESS rat_inst, bits<4> rat_id, bits<4> mask, dag ins, string name, list pattern> @@ -88,13 +96,13 @@ defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">; defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">; defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">; -} // End let Predicates = [isEGorCayman] +} // End SubtargetPredicate = isEGorCayman //===----------------------------------------------------------------------===// // Evergreen Only instructions //===----------------------------------------------------------------------===// -let Predicates = [isEG] in { +let SubtargetPredicate = isEG in { def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; defm DIV_eg : DIV_Common; @@ -116,7 +124,8 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common ; -def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +} // End SubtargetPredicate = isEG //===----------------------------------------------------------------------===// // Memory read/write instructions @@ -241,58 +250,56 @@ def VTX_READ_128_eg //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 3)>; -def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 3)>; -def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 3)>; //===----------------------------------------------------------------------===// // VTX Read from constant memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 2)>; -def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 2)>; -def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 2)>; //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 1)>; -def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 1)>; -def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 1)>; -} // End Predicates = [isEG] - //===----------------------------------------------------------------------===// // Evergreen / Cayman Instructions //===----------------------------------------------------------------------===// -let Predicates = [isEGorCayman] in { +let SubtargetPredicate = isEGorCayman in { multiclass AtomicPat { // FIXME: Add _RTN version. We need per WI scratch location to store the old value // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : Pat<(i32 (node_noret i32:$ptr, i32:$data)), + def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, i32:$data)), (EXTRACT_SUBREG (inst_noret (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>; } @@ -300,7 +307,7 @@ multiclass AtomicIncDecPat { // FIXME: Add _RTN version. We need per WI scratch location to store the old value // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : Pat<(i32 (node_noret i32:$ptr, C)), + def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)), (EXTRACT_SUBREG (inst_noret (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>; } @@ -308,7 +315,7 @@ multiclass AtomicIncDecPat; -def : Pat<(i32 (sext_inreg i32:$src, i1)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i1)), (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>; -def : Pat<(i32 (sext_inreg i32:$src, i8)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i8)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>; -def : Pat<(i32 (sext_inreg i32:$src, i16)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i16)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; defm : BFIPatterns ; @@ -681,9 +688,9 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, // which do not need to be truncated since the fp values are 0.0f or 1.0f. // We should look into handling these cases separately. -def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; +def : EGOrCaymanPat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; -def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; +def : EGOrCaymanPat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; // SHA-256 Patterns def : SHA256MaPattern ; diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 8c32ce232dc..af0147f69ef 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -625,63 +625,63 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor //===----------------------------------------------------------------------===// // Patterns for global loads with no offset. -class FlatLoadPat : Pat < +class FlatLoadPat : GCNPat < (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; multiclass FlatLoadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$elt0, (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))), (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; - def : Pat < + def : GCNPat < (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))))), (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; } multiclass FlatSignedLoadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$elt0, (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))), (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; - def : Pat < + def : GCNPat < (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))))), (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; } -class FlatLoadAtomicPat : Pat < +class FlatLoadAtomicPat : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; -class FlatLoadSignedPat : Pat < +class FlatLoadSignedPat : GCNPat < (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; -class FlatStorePat : Pat < +class FlatStorePat : GCNPat < (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreSignedPat : Pat < +class FlatStoreSignedPat : GCNPat < (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreAtomicPat : Pat < +class FlatStoreAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreSignedAtomicPat : Pat < +class FlatStoreSignedAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), @@ -689,18 +689,18 @@ class FlatStoreSignedAtomicPat ; class FlatAtomicPat : Pat < + ValueType data_vt = vt> : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), (inst $vaddr, $data, $offset, $slc) >; class FlatSignedAtomicPat : Pat < + ValueType data_vt = vt> : GCNPat < (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), (inst $vaddr, $data, $offset, $slc) >; -let Predicates = [HasFlatAddressSpace] in { +let OtherPredicates = [HasFlatAddressSpace] in { def : FlatLoadPat ; def : FlatLoadPat ; @@ -756,7 +756,7 @@ def : FlatAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; - let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : FlatStorePat ; def : FlatStorePat ; @@ -767,9 +767,9 @@ defm : FlatLoadPat_Hi16 ; } } -} // End Predicates = [HasFlatAddressSpace] +} // End OtherPredicates = [HasFlatAddressSpace] -let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { +let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; @@ -794,7 +794,7 @@ def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; - let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; @@ -834,7 +834,7 @@ def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -} // End Predicates = [HasFlatGlobalInsts] +} // End OtherPredicates = [HasFlatGlobalInsts] //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 06e2c11b019..99a018d2e24 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -349,7 +349,7 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" /********** ======================= **********/ // Image + sampler -class SampleRawPattern : Pat < +class SampleRawPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), (opcode $addr, $rsrc, $sampler, @@ -371,7 +371,7 @@ multiclass SampleRawPatterns { // 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). // 3. Add A16 support when we pass address of half type. multiclass AMDGCNSamplePattern { - def : Pat< + def : GCNPat< (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), (opcode $addr, $rsrc, $sampler, @@ -396,7 +396,7 @@ multiclass AMDGCNSamplePatterns { } // Image only -class ImagePattern : Pat < +class ImagePattern : GCNPat < (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe), (opcode $addr, $rsrc, @@ -411,7 +411,7 @@ multiclass ImagePatterns { } multiclass ImageLoadPattern { - def : Pat < + def : GCNPat < (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), (opcode $addr, $rsrc, @@ -434,7 +434,7 @@ multiclass ImageLoadPatterns { } multiclass ImageStorePattern { - def : Pat < + def : GCNPat < (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da), (opcode $data, $addr, $rsrc, @@ -456,7 +456,7 @@ multiclass ImageStorePatterns { defm : ImageStoreDataPatterns(opcode # _V4), v4f32>; } -class ImageAtomicPattern : Pat < +class ImageAtomicPattern : GCNPat < (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)) >; @@ -467,7 +467,7 @@ multiclass ImageAtomicPatterns { def : ImageAtomicPattern(opcode # _V4), v4i32>; } -class ImageAtomicCmpSwapPattern : Pat < +class ImageAtomicCmpSwapPattern : GCNPat < (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (EXTRACT_SUBREG @@ -584,34 +584,34 @@ defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; /* SIsample for simple 1D texture lookup */ -def : Pat < +def : GCNPat < (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; -class SamplePattern : Pat < +class SamplePattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; -class SampleRectPattern : Pat < +class SampleRectPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT), (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0) >; -class SampleArrayPattern : Pat < +class SampleArrayPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) >; class SampleShadowPattern : Pat < + ValueType vt> : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; class SampleShadowArrayPattern : Pat < + ValueType vt> : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) >; diff --git a/lib/Target/AMDGPU/R600InstrFormats.td b/lib/Target/AMDGPU/R600InstrFormats.td index 68fcc545916..61106ed42e6 100644 --- a/lib/Target/AMDGPU/R600InstrFormats.td +++ b/lib/Target/AMDGPU/R600InstrFormats.td @@ -11,9 +11,18 @@ // //===----------------------------------------------------------------------===// +def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; + +def isR600toCayman : Predicate< + "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; + +class R600Pat : AMDGPUPat { + let SubtargetPredicate = isR600toCayman; +} + class InstR600 pattern, - InstrItinClass itin> - : AMDGPUInst { + InstrItinClass itin = NoItinerary> + : AMDGPUInst , PredicateControl { field bits<64> Inst; bit Trig = 0; @@ -31,6 +40,7 @@ class InstR600 pattern, bit IsExport = 0; bit LDS_1A2D = 0; + let SubtargetPredicate = isR600toCayman; let Namespace = "AMDGPU"; let OutOperandList = outs; let InOperandList = ins; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 63a35b6dc59..f422f441af4 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -15,6 +15,13 @@ include "R600Intrinsics.td" include "R600InstrFormats.td" +// FIXME: Should not be arbitrarily split from other R600 inst classes. +class R600WrapperInst pattern = []> : + AMDGPUInst, PredicateControl { + let SubtargetPredicate = isR600toCayman; +} + + class InstR600ISA pattern = []> : InstR600 { @@ -346,12 +353,6 @@ def vtx_id2_az_extloadi8 : LoadVtxId2 ; def vtx_id2_az_extloadi16 : LoadVtxId2 ; def vtx_id2_load : LoadVtxId2 ; -def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; - -def isR600toCayman - : Predicate< - "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; - //===----------------------------------------------------------------------===// // R600 SDNodes //===----------------------------------------------------------------------===// @@ -393,7 +394,7 @@ def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; multiclass TexPattern TextureOp, Instruction inst, ValueType vt = v4f32> { -def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, +def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), @@ -479,7 +480,7 @@ class ExportBufWord1 { } multiclass ExportPattern cf_inst> { - def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : R600Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) @@ -490,22 +491,22 @@ multiclass ExportPattern cf_inst> { multiclass SteamOutputExportPattern buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; @@ -549,7 +550,7 @@ class ExportBufInst : InstR600ISA<( def KCACHE : InstFlag<"printKCache">; -class ALU_CLAUSE inst, string OpName> : AMDGPUInst <(outs), +class ALU_CLAUSE inst, string OpName> : R600WrapperInst <(outs), (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, @@ -578,7 +579,7 @@ class CF_WORD0_R600 { let Word0 = ADDR; } -class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : R600WrapperInst <(outs), ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; bits<4> CNT; @@ -598,7 +599,7 @@ ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { let Inst{63-32} = Word1; } -class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : R600WrapperInst <(outs), ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { field bits<64> Inst; @@ -621,7 +622,7 @@ def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">; def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">; def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">; -def FETCH_CLAUSE : AMDGPUInst <(outs), +def FETCH_CLAUSE : R600WrapperInst <(outs), (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { field bits<8> Inst; bits<8> num; @@ -629,7 +630,7 @@ def FETCH_CLAUSE : AMDGPUInst <(outs), let isCodeGenOnly = 1; } -def ALU_CLAUSE : AMDGPUInst <(outs), +def ALU_CLAUSE : R600WrapperInst <(outs), (ins i32imm:$addr), "ALU clause starting at $addr:", [] > { field bits<8> Inst; bits<8> num; @@ -637,7 +638,7 @@ def ALU_CLAUSE : AMDGPUInst <(outs), let isCodeGenOnly = 1; } -def LITERALS : AMDGPUInst <(outs), +def LITERALS : R600WrapperInst <(outs), (ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { let isCodeGenOnly = 1; @@ -649,12 +650,10 @@ def LITERALS : AMDGPUInst <(outs), let Inst{63-32} = literal2; } -def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { +def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > { field bits<64> Inst; } -let Predicates = [isR600toCayman] in { - //===----------------------------------------------------------------------===// // Common Instructions R600, R700, Evergreen, Cayman //===----------------------------------------------------------------------===// @@ -784,7 +783,7 @@ def MOV : R600_1OP <0x19, "MOV", []>; // Most DUMMY_CHAINs should be eliminated during legalization, but undef // values can sneak in some to selection. let isPseudo = 1, isCodeGenOnly = 1 in { -def DUMMY_CHAIN : AMDGPUInst < +def DUMMY_CHAIN : R600WrapperInst < (outs), (ins), "DUMMY_CHAIN", @@ -795,7 +794,7 @@ def DUMMY_CHAIN : AMDGPUInst < let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { -class MOV_IMM : AMDGPUInst < +class MOV_IMM : R600WrapperInst < (outs R600_Reg32:$dst), (ins immType:$imm), "", @@ -805,20 +804,20 @@ class MOV_IMM : AMDGPUInst < } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 def MOV_IMM_I32 : MOV_IMM; -def : Pat < +def : R600Pat < (imm:$val), (MOV_IMM_I32 imm:$val) >; def MOV_IMM_GLOBAL_ADDR : MOV_IMM; -def : Pat < +def : R600Pat < (AMDGPUconstdata_ptr tglobaladdr:$addr), (MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr) >; def MOV_IMM_F32 : MOV_IMM; -def : Pat < +def : R600Pat < (fpimm:$val), (MOV_IMM_F32 fpimm:$val) >; @@ -1201,7 +1200,7 @@ def FNEG_R600 : FNEG; // FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common { -def : Pat< +def : R600Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; @@ -1248,7 +1247,7 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common; def : POW_Common ; - def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat; def R600_ExportSwz : ExportSwzInst { @@ -1336,11 +1335,11 @@ defm R600_ : RegisterLoadStore ; // Hardcode channel to 0 // NOTE: LSHR is not available here. LSHR is per family instruction -def : Pat < +def : R600Pat < (i32 (load_private ADDRIndirect:$addr) ), (R600_RegisterLoad FRAMEri:$addr, (i32 0)) >; -def : Pat < +def : R600Pat < (store_private i32:$val, ADDRIndirect:$addr), (R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0)) >; @@ -1691,7 +1690,7 @@ def R600_INSERT_ELT_V2 : InsertVertical ; def R600_INSERT_ELT_V4 : InsertVertical ; class ExtractVerticalPat : Pat < + ValueType scalar_ty> : R600Pat < (scalar_ty (extractelt vec_ty:$vec, i32:$index)), (inst $vec, $index) >; @@ -1702,7 +1701,7 @@ def : ExtractVerticalPat ; def : ExtractVerticalPat ; class InsertVerticalPat : Pat < + ValueType scalar_ty> : R600Pat < (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)), (inst $vec, $value, $index) >; @@ -1716,9 +1715,11 @@ def : InsertVerticalPat ; // ISel Patterns //===----------------------------------------------------------------------===// +let SubtargetPredicate = isR600toCayman in { + // CND*_INT Patterns for f32 True / False values -class CND_INT_f32 : Pat < +class CND_INT_f32 : R600Pat < (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), (cnd $src0, $src1, $src2) >; @@ -1728,18 +1729,18 @@ def : CND_INT_f32 ; def : CND_INT_f32 ; //CNDGE_INT extra pattern -def : Pat < +def : R600Pat < (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), (CNDGE_INT $src0, $src1, $src2) >; // KIL Patterns -def KILP : Pat < +def KILP : R600Pat < (int_AMDGPU_kilp), (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) >; -def KIL : Pat < +def KIL : R600Pat < (int_AMDGPU_kill f32:$src0), (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; @@ -1788,7 +1789,7 @@ def : BitConvert ; // DWORDADDR pattern def : DwordAddrPat ; -} // End isR600toCayman Predicate +} // End SubtargetPredicate = isR600toCayman def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index faf14fff5b2..250fb9eda2a 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -11,9 +11,18 @@ // //===----------------------------------------------------------------------===// +def isGCN : Predicate<"Subtarget->getGeneration() " + ">= SISubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureGCN">; +def isSI : Predicate<"Subtarget->getGeneration() " + "== SISubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureSouthernIslands">; + + class InstSI pattern = []> : AMDGPUInst, PredicateControl { + let SubtargetPredicate = isGCN; // Low bits - basic encoding information. field bit SALU = 0; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 1ed5e8e0937..54e68483e87 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -11,13 +11,6 @@ // that are not yet supported remain commented out. //===----------------------------------------------------------------------===// -def isGCN : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SOUTHERN_ISLANDS">, - AssemblerPredicate<"FeatureGCN">; -def isSI : Predicate<"Subtarget->getGeneration() " - "== SISubtarget::SOUTHERN_ISLANDS">, - AssemblerPredicate<"FeatureSouthernIslands">; - def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, @@ -25,14 +18,17 @@ def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, def HasMovrel : Predicate<"Subtarget->hasMovrel()">, AssemblerPredicate<"FeatureMovrel">; +class GCNPat : AMDGPUPat { + let SubtargetPredicate = isGCN; +} + + include "VOPInstructions.td" include "SOPInstructions.td" include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" -let SubtargetPredicate = isGCN in { - //===----------------------------------------------------------------------===// // EXP Instructions //===----------------------------------------------------------------------===// @@ -526,30 +522,27 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < let Defs = [SCC]; } -} // End SubtargetPredicate = isGCN - -let Predicates = [isGCN] in { -def : Pat < +def : GCNPat < (AMDGPUinit_exec i64:$src), (SI_INIT_EXEC (as_i64imm $src)) >; -def : Pat < +def : GCNPat < (AMDGPUinit_exec_from_input i32:$input, i32:$shift), (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) >; -def : Pat< +def : GCNPat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) >; -def : Pat< +def : GCNPat< (AMDGPUelse i64:$src, bb:$target), (SI_ELSE $src, $target, 0) >; -def : Pat < +def : GCNPat < (int_AMDGPU_kilp), (SI_KILL (i32 0xbf800000)) >; @@ -558,7 +551,7 @@ def : Pat < // VOP1 Patterns //===----------------------------------------------------------------------===// -let Predicates = [UnsafeFPMath] in { +let SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] in { //def : RcpPat; //defm : RsqPat; @@ -568,70 +561,70 @@ def : RsqPat; def : RsqPat; // Convert (x - floor(x)) to fract(x) -def : Pat < +def : GCNPat < (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) >; // Convert (x + (-floor(x))) to fract(x) -def : Pat < +def : GCNPat < (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // End Predicates = [UnsafeFPMath] +} // End SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] // f16_to_fp patterns -def : Pat < +def : GCNPat < (f32 (f16_to_fp i32:$src0)), (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f64 (fpextend f16:$src)), (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) >; // fp_to_fp16 patterns -def : Pat < +def : GCNPat < (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (i32 (fp_to_sint f16:$src)), (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src)) >; -def : Pat < +def : GCNPat < (i32 (fp_to_uint f16:$src)), (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src)) >; -def : Pat < +def : GCNPat < (f16 (sint_to_fp i32:$src)), (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src)) >; -def : Pat < +def : GCNPat < (f16 (uint_to_fp i32:$src)), (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src)) >; @@ -641,7 +634,7 @@ def : Pat < //===----------------------------------------------------------------------===// multiclass FMADPat { - def : Pat < + def : GCNPat < (vt (fmad (VOP3NoMods vt:$src0), (VOP3NoMods vt:$src1), (VOP3NoMods vt:$src2))), @@ -653,7 +646,7 @@ multiclass FMADPat { defm : FMADPat ; defm : FMADPat ; -class FMADModsPat : Pat< +class FMADModsPat : GCNPat< (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod), (VOP3Mods f32:$src1, i32:$src1_mod), (VOP3Mods f32:$src2, i32:$src2_mod))), @@ -664,7 +657,7 @@ class FMADModsPat : Pat< def : FMADModsPat; multiclass SelectPat { - def : Pat < + def : GCNPat < (vt (select i1:$src0, vt:$src1, vt:$src2)), (inst $src2, $src1, $src0) >; @@ -675,7 +668,7 @@ defm : SelectPat ; defm : SelectPat ; defm : SelectPat ; -def : Pat < +def : GCNPat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; @@ -748,6 +741,8 @@ foreach Index = 0-15 in { >; } +let SubtargetPredicate = isGCN in { + // FIXME: Why do only some of these type combinations for SReg and // VReg? // 16-bit bitcast @@ -808,6 +803,8 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +} // End SubtargetPredicate = isGCN + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ @@ -815,7 +812,7 @@ def : BitConvert ; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. -class ClampPat : Pat < +class ClampPat : GCNPat < (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) @@ -825,7 +822,7 @@ def : ClampPat; def : ClampPat; def : ClampPat; -def : Pat < +def : GCNPat < (v2f16 (AMDGPUclamp (VOP3PMods v2f16:$src0, i32:$src0_modifiers))), (V_PK_MAX_F16 $src0_modifiers, $src0, $src0_modifiers, $src0, DSTCLAMP.ENABLE) @@ -837,13 +834,13 @@ def : Pat < // Prevent expanding both fneg and fabs. -def : Pat < +def : GCNPat < (fneg (fabs f32:$src)), (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit >; // FIXME: Should use S_OR_B32 -def : Pat < +def : GCNPat < (fneg (fabs f64:$src)), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -853,17 +850,17 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fabs f32:$src), (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff))) >; -def : Pat < +def : GCNPat < (fneg f32:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000))) >; -def : Pat < +def : GCNPat < (fabs f64:$src), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -873,7 +870,7 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fneg f64:$src), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -883,18 +880,18 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) >; -def : Pat < +def : GCNPat < (fcopysign f32:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0, (V_LSHLREV_B32_e64 (i32 16), $src1)) >; -def : Pat < +def : GCNPat < (fcopysign f64:$src0, f16:$src1), (REG_SEQUENCE SReg_64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -902,39 +899,39 @@ def : Pat < (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f32:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), $src1)) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f64:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) >; -def : Pat < +def : GCNPat < (fneg f16:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) >; -def : Pat < +def : GCNPat < (fabs f16:$src), (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff))) >; -def : Pat < +def : GCNPat < (fneg (fabs f16:$src)), (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit >; -def : Pat < +def : GCNPat < (fneg v2f16:$src), (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src) >; -def : Pat < +def : GCNPat < (fabs v2f16:$src), (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src) >; @@ -943,7 +940,7 @@ def : Pat < // // fabs is not reported as free because there is modifier for it in // VOP3P instructions, so it is turned into the bit op. -def : Pat < +def : GCNPat < (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))), (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit >; @@ -952,17 +949,17 @@ def : Pat < /********** Immediate Patterns **********/ /********** ================== **********/ -def : Pat < +def : GCNPat < (VGPRImm<(i32 imm)>:$imm), (V_MOV_B32_e32 imm:$imm) >; -def : Pat < +def : GCNPat < (VGPRImm<(f32 fpimm)>:$imm), (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (i32 imm:$imm), (S_MOV_B32 imm:$imm) >; @@ -970,27 +967,27 @@ def : Pat < // FIXME: Workaround for ordering issue with peephole optimizer where // a register class copy interferes with immediate folding. Should // use s_mov_b32, which can be shrunk to s_movk_i32 -def : Pat < +def : GCNPat < (VGPRImm<(f16 fpimm)>:$imm), (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (f16 fpimm:$imm), (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (i32 frameindex:$fi), (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) >; -def : Pat < +def : GCNPat < (i64 InlineImm:$imm), (S_MOV_B64 InlineImm:$imm) >; @@ -998,12 +995,12 @@ def : Pat < // XXX - Should this use a s_cmp to set SCC? // Set to sign-extended 64-bit value (true = -1, false = 0) -def : Pat < +def : GCNPat < (i1 imm:$imm), (S_MOV_B64 (i64 (as_i64imm $imm))) >; -def : Pat < +def : GCNPat < (f64 InlineFPImm:$imm), (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm:$imm))) >; @@ -1012,14 +1009,16 @@ def : Pat < /********** Intrinsic Patterns **********/ /********** ================== **********/ +let SubtargetPredicate = isGCN in { def : POW_Common ; +} -def : Pat < +def : GCNPat < (i32 (sext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; -class Ext32Pat : Pat < +class Ext32Pat : GCNPat < (i32 (ext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) >; @@ -1028,7 +1027,7 @@ def : Ext32Pat ; def : Ext32Pat ; // The multiplication scales from [0,1] to the unsigned integer range -def : Pat < +def : GCNPat < (AMDGPUurecip i32:$src0), (V_CVT_U32_F32_e32 (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1), @@ -1039,17 +1038,21 @@ def : Pat < // VOP3 Patterns //===----------------------------------------------------------------------===// +let SubtargetPredicate = isGCN in { + def : IMad24Pat; def : UMad24Pat; defm : BFIPatterns ; def : ROTRPattern ; -def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), +} + +def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; -def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), +def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; @@ -1059,13 +1062,13 @@ def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), multiclass SI_INDIRECT_Pattern { // Extract with offset - def : Pat< + def : GCNPat< (eltvt (extractelt vt:$src, (MOVRELOffset i32:$idx, (i32 imm:$offset)))), (!cast("SI_INDIRECT_SRC_"#VecSize) $src, $idx, imm:$offset) >; // Insert with offset - def : Pat< + def : GCNPat< (insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))), (!cast("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val) >; @@ -1085,14 +1088,14 @@ defm : SI_INDIRECT_Pattern ; // SAD Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (add (sub_oneuse (umax i32:$src0, i32:$src1), (umin i32:$src0, i32:$src1)), i32:$src2), (V_SAD_U32 $src0, $src1, $src2, (i1 0)) >; -def : Pat < +def : GCNPat < (add (select_oneuse (i1 (setugt i32:$src0, i32:$src1)), (sub i32:$src0, i32:$src1), (sub i32:$src1, i32:$src0)), @@ -1104,51 +1107,51 @@ def : Pat < // Conversion Patterns //===----------------------------------------------------------------------===// -def : Pat<(i32 (sext_inreg i32:$src, i1)), +def : GCNPat<(i32 (sext_inreg i32:$src, i1)), (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16 // Handle sext_inreg in i64 -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i1)), (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16 >; -def : Pat < +def : GCNPat < (i16 (sext_inreg i16:$src, i1)), (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16 >; -def : Pat < +def : GCNPat < (i16 (sext_inreg i16:$src, i8)), (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i8)), (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i16)), (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i32)), (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16 >; -def : Pat < +def : GCNPat < (i64 (zext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (anyext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) >; -class ZExt_i64_i1_Pat : Pat < +class ZExt_i64_i1_Pat : GCNPat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, @@ -1161,20 +1164,20 @@ def : ZExt_i64_i1_Pat; // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple outputs. -def : Pat < +def : GCNPat < (i64 (sext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0, (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1) >; -class FPToI1Pat : Pat < +class FPToI1Pat : GCNPat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; @@ -1190,37 +1193,37 @@ def : FPToI1Pat; // 64-bit comparisons. When legalizing SGPR copies, instructions // resulting in the copies from SCC to these instructions will be // moved to the VALU. -def : Pat < +def : GCNPat < (i1 (and i1:$src0, i1:$src1)), (S_AND_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (i1 (or i1:$src0, i1:$src1)), (S_OR_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (i1 (xor i1:$src0, i1:$src1)), (S_XOR_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) >; -def : Pat < +def : GCNPat < (f32 (uint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src) >; -def : Pat < +def : GCNPat < (f64 (sint_to_fp i1:$src)), (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) >; -def : Pat < +def : GCNPat < (f64 (uint_to_fp i1:$src)), (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) >; @@ -1228,103 +1231,87 @@ def : Pat < //===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (i32 (AMDGPUfp16_zext f16:$src)), (COPY $src) >; -def : Pat < +def : GCNPat < (i32 (trunc i64:$a)), (EXTRACT_SUBREG $a, sub0) >; -def : Pat < +def : GCNPat < (i1 (trunc i32:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) >; -def : Pat < +def : GCNPat < (i1 (trunc i16:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) >; -def : Pat < +def : GCNPat < (i1 (trunc i64:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; -def : Pat < +def : GCNPat < (i32 (bswap i32:$a)), (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)), (V_ALIGNBIT_B32 $a, $a, (i32 24)), (V_ALIGNBIT_B32 $a, $a, (i32 8))) >; -multiclass BFMPatterns { - def : Pat < - (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), - (BFM $a, $b) - >; - - def : Pat < - (vt (add (vt (shl 1, vt:$a)), -1)), - (BFM $a, (MOV (i32 0))) - >; -} - -defm : BFMPatterns ; -// FIXME: defm : BFMPatterns ; -defm : BFEPattern ; - -let Predicates = [NoFP16Denormals] in { -def : Pat< +let OtherPredicates = [NoFP16Denormals] in { +def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0) >; -def : Pat< +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) >; } -let Predicates = [FP16Denormals] in { -def : Pat< +let OtherPredicates = [FP16Denormals] in { +def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0) >; -def : Pat< +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE) >; } -let Predicates = [NoFP32Denormals] in { -def : Pat< +let OtherPredicates = [NoFP32Denormals] in { +def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; } -let Predicates = [FP32Denormals] in { -def : Pat< +let OtherPredicates = [FP32Denormals] in { +def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MAX_F32_e64 $src_mods, $src, $src_mods, $src, 0, 0) >; } -let Predicates = [NoFP64Denormals] in { -def : Pat< +let OtherPredicates = [NoFP64Denormals] in { +def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0) >; } -let Predicates = [FP64Denormals] in { -def : Pat< +let OtherPredicates = [FP64Denormals] in { +def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MAX_F64 $src_mods, $src, $src_mods, $src, 0, 0) >; @@ -1332,7 +1319,7 @@ def : Pat< // Allow integer inputs -class ExpPattern : Pat< +class ExpPattern : GCNPat< (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)), (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en) >; @@ -1340,43 +1327,43 @@ class ExpPattern : Pat< def : ExpPattern; def : ExpPattern; -def : Pat < +def : GCNPat < (v2i16 (build_vector i16:$src0, i16:$src1)), (v2i16 (S_PACK_LL_B32_B16 $src0, $src1)) >; // COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. -def : Pat < +def : GCNPat < (v2i16 (build_vector (i16 0), i16:$src1)), (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0)) >; // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. -def : Pat < +def : GCNPat < (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1)) >; -def : Pat < +def : GCNPat < (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))), (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), (v2i16 (S_PACK_HH_B32_B16 $src0, $src1)) >; // TODO: Should source modifiers be matched to v_pack_b32_f16? -def : Pat < +def : GCNPat < (v2f16 (build_vector f16:$src0, f16:$src1)), (v2f16 (S_PACK_LL_B32_B16 $src0, $src1)) >; -// def : Pat < +// def : GCNPat < // (v2f16 (scalar_to_vector f16:$src0)), // (COPY $src0) // >; -// def : Pat < +// def : GCNPat < // (v2i16 (scalar_to_vector i16:$src0)), // (COPY $src0) // >; @@ -1385,7 +1372,7 @@ def : Pat < // Fract Patterns //===----------------------------------------------------------------------===// -let Predicates = [isSI] in { +let SubtargetPredicate = isSI in { // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient @@ -1394,7 +1381,7 @@ let Predicates = [isSI] in { // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999) // Convert floor(x) to (x - fract(x)) -def : Pat < +def : GCNPat < (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))), (V_ADD_F64 $mods, @@ -1412,7 +1399,7 @@ def : Pat < DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // End Predicates = [isSI] +} // End SubtargetPredicates = isSI //============================================================================// // Miscellaneous Optimization Patterns @@ -1421,20 +1408,41 @@ def : Pat < // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. // TODO: Also do for 64-bit. -def : Pat< +def : GCNPat< (add i32:$src0, (i32 NegSubInlineConst32:$src1)), (S_SUB_I32 $src0, NegSubInlineConst32:$src1) >; + +multiclass BFMPatterns { + def : GCNPat < + (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), + (BFM $a, $b) + >; + + def : GCNPat < + (vt (add (vt (shl 1, vt:$a)), -1)), + (BFM $a, (MOV (i32 0))) + >; +} + +let SubtargetPredicate = isGCN in { + +defm : BFMPatterns ; +// FIXME: defm : BFMPatterns ; + +defm : BFEPattern ; def : SHA256MaPattern ; def : IntMed3Pat; def : IntMed3Pat; +} + // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) class FPMed3Pat : Pat< + Instruction med3Inst> : GCNPat< (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), @@ -1444,7 +1452,7 @@ class FPMed3Pat; class FP16Med3Pat : Pat< + Instruction med3Inst> : GCNPat< (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), @@ -1457,7 +1465,7 @@ class Int16Med3Pat : Pat< + ValueType vt = i32> : GCNPat< (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) @@ -1465,7 +1473,7 @@ class Int16Med3Pat; -let Predicates = [isGFX9] in { +let OtherPredicates = [isGFX9] in { def : FP16Med3Pat; def : Int16Med3Pat; def : Int16Med3Pat; @@ -1498,6 +1506,7 @@ multiclass NoCarryAlias; @@ -1513,5 +1522,3 @@ def : MnemonicAlias<"v_add_u32", "v_add_i32">; def : MnemonicAlias<"v_sub_u32", "v_sub_i32">; def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">; } - -} // End isGCN predicate diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 73dd8b7daa4..131cd2f990f 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -241,25 +241,23 @@ def SMRDBufferImm : ComplexPattern; def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgpr : ComplexPattern; -let Predicates = [isGCN] in { - multiclass SMRD_Pattern { // 1. IMM offset - def : Pat < + def : GCNPat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) >; // 2. SGPR offset - def : Pat < + def : GCNPat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) >; } -let Predicates = [isSICI] in { -def : Pat < +let OtherPredicates = [isSICI] in { +def : GCNPat < (i64 (readcyclecounter)), (S_MEMTIME) >; @@ -277,29 +275,27 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; // 1. Offset as an immediate -def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI +def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on CI (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; // 2. Offset loaded in an 32bit SGPR -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0) >; } // End let AddedComplexity = 100 -} // let Predicates = [isGCN] - -let Predicates = [isVI] in { +let OtherPredicates = [isVI] in { -def : Pat < +def : GCNPat < (i64 (readcyclecounter)), (S_MEMREALTIME) >; -} // let Predicates = [isVI] +} // let OtherPredicates = [isVI] //===----------------------------------------------------------------------===// @@ -508,10 +504,10 @@ def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { -class SMRD_Pattern_ci : Pat < +class SMRD_Pattern_ci : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let Predicates = [isCIOnly]; + let OtherPredicates = [isCIOnly]; } def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; @@ -520,10 +516,10 @@ def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { - let Predicates = [isCI]; // should this be isCIOnly? + let OtherPredicates = [isCI]; // should this be isCIOnly? } } // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 041fec52efe..43c54875944 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -948,12 +948,10 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16), } } -let Predicates = [isGCN] in { - //===----------------------------------------------------------------------===// // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (int_amdgcn_s_getreg imm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; @@ -962,25 +960,25 @@ def : Pat < // SOP1 Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0, (S_MOV_B32 (i32 0)), sub1)) >; -def : Pat < +def : GCNPat < (i32 (smax i32:$x, (i32 (ineg i32:$x)))), (S_ABS_I32 $x) >; -def : Pat < +def : GCNPat < (i16 imm:$imm), (S_MOV_B32 imm:$imm) >; // Same as a 32-bit inreg -def : Pat< +def : GCNPat< (i32 (sext i16:$src)), (S_SEXT_I32_I16 $src) >; @@ -992,7 +990,7 @@ def : Pat< // V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector // case, the sgpr-copies pass will fix this to use the vector version. -def : Pat < +def : GCNPat < (i32 (addc i32:$src0, i32:$src1)), (S_ADD_U32 $src0, $src1) >; @@ -1000,20 +998,20 @@ def : Pat < // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple // outputs. -def : Pat< +def : GCNPat< (i64 (zext i16:$src)), (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (S_MOV_B32 (i32 0xffff))), SGPR_32)), sub0, (S_MOV_B32 (i32 0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (sext i16:$src)), (REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1) >; -def : Pat< +def : GCNPat< (i32 (zext i16:$src)), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src) >; @@ -1024,13 +1022,11 @@ def : Pat< // SOPP Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (int_amdgcn_s_waitcnt i32:$simm16), (S_WAITCNT (as_i16imm $simm16)) >; -} // End isGCN predicate - //===----------------------------------------------------------------------===// // Real target instructions, move this to the appropriate subtarget TD file diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 4520f474d66..ff2bd245440 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -361,14 +361,14 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [Has16BitInsts] in { +let OtherPredicates = [Has16BitInsts] in { -def : Pat< +def : GCNPat< (f32 (f16_to_fp i16:$src)), (V_CVT_F32_F16_e32 $src) >; -def : Pat< +def : GCNPat< (i16 (AMDGPUfp_to_f16 f32:$src)), (V_CVT_F16_F32_e32 $src) >; @@ -653,9 +653,9 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; -let Predicates = [isVI] in { +let OtherPredicates = [isVI] in { -def : Pat < +def : GCNPat < (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, imm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), @@ -663,7 +663,7 @@ def : Pat < (as_i1imm $bound_ctrl)) >; -def : Pat < +def : GCNPat < (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, imm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), @@ -671,26 +671,26 @@ def : Pat < (as_i1imm $bound_ctrl)) >; -def : Pat< +def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) >; -def : Pat< +def : GCNPat< (i64 (anyext i16:$src)), (REG_SEQUENCE VReg_64, (i32 (COPY $src)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; -def : Pat< +def : GCNPat< (i16 (trunc i32:$src)), (COPY $src) >; -def : Pat < +def : GCNPat < (i16 (trunc i64:$src)), (EXTRACT_SUBREG $src, sub0) >; -} // End Predicates = [isVI] +} // End OtherPredicates = [isVI] diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 2db0669310c..e0ef8ce3c77 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -408,12 +408,12 @@ defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT; -def : Pat< +def : GCNPat< (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), (V_SUBB_U32_e64 $src0, $src1, $src2) >; @@ -469,17 +469,17 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { -def : Pat< +def : GCNPat< (op i16:$src0, i16:$src1), (inst $src0, $src1) >; -def : Pat< +def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src0, $src1) >; -def : Pat< +def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src0, $src1), sub0, @@ -490,18 +490,18 @@ def : Pat< multiclass Bits_OpsRev_i16_Pats { -def : Pat< +def : GCNPat< (op i16:$src0, i16:$src1), (inst $src1, $src0) >; -def : Pat< +def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src1, $src0) >; -def : Pat< +def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src1, $src0), sub0, @@ -509,7 +509,7 @@ def : Pat< >; } -class ZExt_i16_i1_Pat : Pat < +class ZExt_i16_i1_Pat : GCNPat < (i16 (ext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; @@ -524,17 +524,17 @@ defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; -def : Pat < +def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 $src0, $src1) >; -def : Pat < +def : GCNPat < (or i16:$src0, i16:$src1), (V_OR_B32_e64 $src0, $src1) >; -def : Pat < +def : GCNPat < (xor i16:$src0, i16:$src1), (V_XOR_B32_e64 $src0, $src1) >; @@ -546,7 +546,7 @@ defm : Bits_OpsRev_i16_Pats; def : ZExt_i16_i1_Pat; def : ZExt_i16_i1_Pat; -def : Pat < +def : GCNPat < (i16 (sext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) >; @@ -554,7 +554,7 @@ def : Pat < // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. // TODO: Also do for 64-bit. -def : Pat< +def : GCNPat< (add i16:$src0, (i16 NegSubInlineConst16:$src1)), (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 736c6a5c449..aa041aab51c 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -450,17 +450,17 @@ let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { -def : Pat< +def : GCNPat < (op2 (op1 i16:$src0, i16:$src1), i16:$src2), (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; -def : Pat< +def : GCNPat< (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; -def : Pat< +def : GCNPat< (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (REG_SEQUENCE VReg_64, (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0, @@ -528,7 +528,7 @@ class getClampRes { ret1)); } -class IntClampPat : Pat< +class IntClampPat : GCNPat< getClampPat.ret, getClampRes.ret >; diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 313792f3704..85a56db3070 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -82,9 +82,9 @@ def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile; -def : Pat < +def : GCNPat < (build_vector f16:$elt0, (AMDGPUclamp (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), @@ -122,7 +122,7 @@ def : Pat < $elt0)) >; -def : Pat < +def : GCNPat < (AMDGPUclamp (build_vector (fpround (fmad (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index b636fc9be43..146870e2153 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -607,9 +607,7 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; // V_ICMPIntrinsic Pattern. //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - -class ICMP_Pattern : Pat < +class ICMP_Pattern : GCNPat < (AMDGPUsetcc vt:$src0, vt:$src1, cond), (inst $src0, $src1) >; @@ -636,7 +634,7 @@ def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; -class FCMP_Pattern : Pat < +class FCMP_Pattern : GCNPat < (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), (inst $src0_modifiers, $src0, $src1_modifiers, $src1, @@ -671,8 +669,6 @@ def : FCMP_Pattern ; def : FCMP_Pattern ; def : FCMP_Pattern ; -} // End Predicates = [isGCN] - //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// -- 2.40.0