From: Dmitry Preobrazhensky Date: Wed, 16 Aug 2017 13:51:56 +0000 (+0000) Subject: [AMDGPU][MC][GFX9] Added integer clamping support for VOP3 opcodes X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=600899c871151f7fe8e2cb09b9a35f458f57f902;p=llvm [AMDGPU][MC][GFX9] Added integer clamping support for VOP3 opcodes See Bug 34152: https://bugs.llvm.org//show_bug.cgi?id=34152 Reviewers: SamWot, artem.tamazov, arsenm Differential Revision: https://reviews.llvm.org/D36674 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311006 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 959a43bfa51..e417318f834 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -274,6 +274,12 @@ def FeatureDPP : SubtargetFeature<"dpp", "Support DPP (Data Parallel Primitives) extension" >; +def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", + "HasIntClamp", + "true", + "Support clamp for integer destination" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -452,7 +458,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP + FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, + FeatureIntClamp ] >; @@ -462,7 +469,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, - FeatureFastFMAF32, FeatureDPP, + FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, FeatureAddNoCarryInsts @@ -704,6 +711,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; +def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, + AssemblerPredicate<"FeatureIntClamp">; + class PredicateControl { Predicate SubtargetPredicate; Predicate SIAssemblerPredicate = isSICI; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index b9b1773f027..eb335b41e29 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -702,14 +702,16 @@ def cvt_flr_i32_f32 : PatFrag < [{ (void)N; return TM.Options.NoNaNsFPMath; }] >; -class IMad24Pat : Pat < +class IMad24Pat : Pat < (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), - (Inst $src0, $src1, $src2) + !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), + (Inst $src0, $src1, $src2)) >; -class UMad24Pat : Pat < +class UMad24Pat : Pat < (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), - (Inst $src0, $src1, $src2) + !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), + (Inst $src0, $src1, $src2)) >; class RcpPat : Pat < diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index deba76a207c..18fdaf441e0 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, SGPRInitBug(false), HasSMemRealTime(false), Has16BitInsts(false), + HasIntClamp(false), HasVOP3PInsts(false), HasMovrel(false), HasVGPRIndexMode(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 8aad97caa97..3ac6fe283c4 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -145,6 +145,7 @@ protected: bool SGPRInitBug; bool HasSMemRealTime; bool Has16BitInsts; + bool HasIntClamp; bool HasVOP3PInsts; bool HasMovrel; bool HasVGPRIndexMode; @@ -241,6 +242,10 @@ public: return Has16BitInsts; } + bool hasIntClamp() const { + return HasIntClamp; + } + bool hasVOP3PInsts() const { return HasVOP3PInsts; } diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 9a0e4a39d31..e1de580437e 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -911,6 +911,10 @@ public: return !isVI(); } + bool hasIntClamp() const { + return getFeatureBits()[AMDGPU::FeatureIntClamp]; + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast(TS); @@ -1011,6 +1015,7 @@ private: bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); bool validateConstantBusLimitations(const MCInst &Inst); bool validateEarlyClobberLimitations(const MCInst &Inst); + bool validateIntClampSupported(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -2199,6 +2204,20 @@ bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { return true; } +bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { + int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); + assert(ClampIdx != -1); + return Inst.getOperand(ClampIdx).getImm() == 0; + } + + return true; +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc) { if (!validateConstantBusLimitations(Inst)) { @@ -2211,6 +2230,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "destination must be different than all sources"); return false; } + if (!validateIntClampSupported(Inst)) { + Error(IDLoc, + "integer clamping is not supported on this GPU"); + return false; + } return true; } diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index f5f6e804b05..500b25bbb04 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -70,7 +70,8 @@ enum : uint64_t { HasFPClamp = UINT64_C(1) << 42, VOP3_OPSEL = UINT64_C(1) << 43, maybeAtomic = UINT64_C(1) << 44, - F16_ZFILL = UINT64_C(1) << 45 + F16_ZFILL = UINT64_C(1) << 45, + IntClamp = UINT64_C(1) << 46 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index b5da21a6577..b4b4eed9b43 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -94,6 +94,10 @@ class InstSI { // Returns the input arguments for VOP3 instructions for the given SrcVT. class getIns64 { dag ret = @@ -1087,7 +1087,9 @@ class getIns64 { // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. -class getAsm64 { string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); @@ -1313,9 +1320,10 @@ class getAsm64 .ret, + getAsm32.ret # iclamp, dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", "")); } @@ -1520,6 +1528,7 @@ class VOPProfile _ArgVT> { field bit HasClamp = HasModifiers; field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; + field bit HasIntClamp = !if(isFloatType.ret, 0, HasClamp); field bit HasHigh = 0; field bit IsPacked = isPackedType.ret; @@ -1545,7 +1554,7 @@ class VOPProfile _ArgVT> { field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; field dag InsVOP3P = getInsVOP3P _ArgVT> { field string Asm32 = getAsm32.ret; - field string Asm64 = getAsm64.ret; + field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel; -def : UMad24Pat; +def : IMad24Pat; +def : UMad24Pat; defm : BFIPatterns ; def : ROTRPattern ; @@ -1083,7 +1083,7 @@ def : Pat < (add (sub_oneuse (umax i32:$src0, i32:$src1), (umin i32:$src0, i32:$src1)), i32:$src2), - (V_SAD_U32 $src0, $src1, $src2) + (V_SAD_U32 $src0, $src1, $src2, (i1 0)) >; def : Pat < @@ -1091,7 +1091,7 @@ def : Pat < (sub i32:$src0, i32:$src1), (sub i32:$src1, i32:$src0)), i32:$src2), - (V_SAD_U32 $src0, $src1, $src2) + (V_SAD_U32 $src0, $src1, $src2, (i1 0)) >; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index b2b9af3dca9..4520f474d66 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -275,7 +275,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; - let Asm64 = getAsm64<1, 1, 0, 1>.ret; + let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; let AsmSDWA = getAsmSDWA<1, 1>.ret; let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 29266fbac92..2db0669310c 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -208,7 +208,7 @@ def VOP_MADMK_F32 : VOP_MADMK ; class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, - HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; + 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; let InsDPP = (ins DstRCDPP:$old, Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, @@ -222,7 +222,7 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; - let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; + let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; @@ -235,13 +235,13 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { def VOP_MAC_F16 : VOP_MAC { // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives // 'not a string initializer' error. - let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret; + let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f16>.ret; } def VOP_MAC_F32 : VOP_MAC { // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives // 'not a string initializer' error. - let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret; + let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f32>.ret; } // Write out to vcc or arbitrary SGPR. diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index e81e656da6c..cd516b771f8 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -102,10 +102,25 @@ class getVOP3Pat { ret1)); } +class getVOP3ClampPat { + list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; + list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; + list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + class VOP3Inst : VOP3_Pseudo.ret, getVOP3Pat.ret), - VOP3Only>; + !if(P.HasModifiers, + getVOP3ModPat.ret, + !if(P.HasIntClamp, + getVOP3ClampPat.ret, + getVOP3Pat.ret)), + VOP3Only> { + let IntClamp = P.HasIntClamp; +} class VOP3OpSelInst : VOP3_Pseudo : VOPProfile { let Asm64 = " " # P.Asm64; } +class VOP3Clamp_Profile : VOPProfile { + let HasClamp = 1; + + // FIXME: Hack to stop printing _e64 + let Outs64 = (outs DstRC.RegClass:$vdst); + let Asm64 = " " # getAsm64.ret; +} + class VOP3OpSel_Profile : VOP3_Profile

{ let HasClamp = 1; let HasOpSel = 1; @@ -167,11 +190,13 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { } def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { + let HasClamp = 1; + // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp"; } //===----------------------------------------------------------------------===// @@ -244,8 +269,8 @@ let isCommutable = 1 in { def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile>; def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile, fmad>; -def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile, AMDGPUmad_i24>; -def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile, AMDGPUmad_u24>; +def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3Clamp_Profile>; +def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3Clamp_Profile>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; @@ -305,10 +330,10 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile, AMDG def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile, AMDGPUumed3>; -def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile, int_amdgcn_sad_u8>; -def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile, int_amdgcn_sad_hi_u8>; -def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile, int_amdgcn_sad_u16>; -def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; +def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3Clamp_Profile>; +def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3Clamp_Profile>; +def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3Clamp_Profile>; +def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3Clamp_Profile>; def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile, AMDGPUdiv_fixup>; @@ -330,10 +355,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, let AsmMatchConverter = ""; } -def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; +def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3Clamp_Profile>; let Constraints = "@earlyclobber $vdst" in { -def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3Clamp_Profile>; } // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { @@ -358,8 +383,8 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst" in { -def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; -def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3Clamp_Profile>; +def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3Clamp_Profile>; } // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { @@ -383,15 +408,15 @@ let isCommutable = 1 in { let F16_ZFILL = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; -def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; -def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; +def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3Clamp_Profile>; +def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3Clamp_Profile>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; } let SubtargetPredicate = isGFX9 in { def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; -def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; -def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; +def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3Clamp_Profile>; +def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3Clamp_Profile>; def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; } // End SubtargetPredicate = isGFX9 @@ -416,18 +441,18 @@ multiclass Ternary_i16_Pats { def : Pat< (op2 (op1 i16:$src0, i16:$src1), i16:$src2), - (inst i16:$src0, i16:$src1, i16:$src2) + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; def : Pat< (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), - (inst i16:$src0, i16:$src1, i16:$src2) + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; def : Pat< (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (REG_SEQUENCE VReg_64, - (inst i16:$src0, i16:$src1, i16:$src2), sub0, + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } @@ -470,6 +495,45 @@ def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Prof def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile>; } // End SubtargetPredicate = isGFX9 +//===----------------------------------------------------------------------===// +// Integer Clamp Patterns +//===----------------------------------------------------------------------===// + +class getClampPat { + dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2)); + dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1)); + dag ret1 = (P.DstVT (node P.Src0VT:$src0)); + dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getClampRes { + dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0)); + dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0)); + dag ret1 = (inst P.Src0VT:$src0, (i1 0)); + dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class IntClampPat : Pat< + getClampPat.ret, + getClampRes.ret +>; + +def : IntClampPat; +def : IntClampPat; + +def : IntClampPat; +def : IntClampPat; +def : IntClampPat; + +def : IntClampPat; +def : IntClampPat; + +def : IntClampPat; +def : IntClampPat; //===----------------------------------------------------------------------===// // Target diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index cb754ef4fa5..9f2800cae99 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -112,7 +112,7 @@ class VOP3_Pseudo pattern = [], let AsmMatchConverter = !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", - !if(!or(P.HasModifiers, P.HasOMod), + !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)), "cvtVOP3", "")); diff --git a/test/MC/AMDGPU/vop3-gfx9.s b/test/MC/AMDGPU/vop3-gfx9.s index 6e696ebf52c..b43fe348d06 100644 --- a/test/MC/AMDGPU/vop3-gfx9.s +++ b/test/MC/AMDGPU/vop3-gfx9.s @@ -278,6 +278,9 @@ v_mad_i16 v5, v1, -1, v3 v_mad_i16 v5, v1, v2, -4.0 // GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +v_mad_i16 v5, v1, v2, v3 clamp +// GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] + v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] @@ -305,6 +308,9 @@ v_mad_legacy_i16 v5, v1, -1, v3 v_mad_legacy_i16 v5, v1, v2, -4.0 // GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] +v_mad_legacy_i16 v5, v1, v2, -4.0 clamp +// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03] + v_mad_legacy_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] @@ -314,6 +320,9 @@ v_mad_legacy_u16 v5, v1, -1, v3 v_mad_legacy_u16 v5, v1, v2, -4.0 // GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +v_mad_legacy_u16 v5, v1, v2, -4.0 clamp +// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03] + v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] @@ -322,3 +331,6 @@ v_mad_u16 v5, v1, -1, v3 v_mad_u16 v5, v1, v2, -4.0 // GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] + +v_mad_u16 v5, v1, v2, v3 clamp +// GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s index 5eeb0d31f9d..b77c2f4ec68 100644 --- a/test/MC/AMDGPU/vop3.s +++ b/test/MC/AMDGPU/vop3.s @@ -518,6 +518,58 @@ v_mad_u16 v5, v1, 0, v3 v_mad_u16 v5, v1, v2, -4.0 // VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +///===---------------------------------------------------------------------===// +// VOP3 with Integer Clamp +///===---------------------------------------------------------------------===// + +v_mad_i32_i24 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_mad_i32_i24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u32_u24 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_mad_u32_u24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04] + +v_sad_u8 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_sad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04] + +v_sad_hi_u8 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_sad_hi_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04] + +v_sad_u16 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_sad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04] + +v_sad_u32 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_sad_u32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04] + +v_msad_u8 v5, v1, v2, v3 clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_msad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp +// NOSICI: error: integer clamping is not supported on this GPU +// VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp +// NOSICI: error: +// VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04] + +v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp +// NOSICI: error: +// VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 clamp +// NOSICI: error: +// VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_i16 v5, v1, v2, v3 clamp +// NOSICI: error: +// VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] + // // v_interp* // diff --git a/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt b/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt index 898eddcbb05..bada0c194af 100644 --- a/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ b/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -12,122 +12,134 @@ # GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04 -# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84] +# GFX9: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84] 0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84 -# CHECK: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] +# GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] 0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04 -# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] +# GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] 0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04 -# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] +# GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] 0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03 -# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4] +# GFX9: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4] 0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4 -# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04] +# GFX9: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04 -# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] +# GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04 -# CHECK: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +# GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] 0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04 -# CHECK: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +# GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] 0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04 -# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +# GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] 0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03 -# CHECK: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4] +# GFX9: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4] 0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4 -# CHECK: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] +# GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] 0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04 -# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] +# GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] 0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04 -# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] +# GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] 0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03 -# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4] +# GFX9: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4] 0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4 -# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04] +# GFX9: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04 -# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] +# GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04 -# CHECK: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] +# GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] 0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04 -# CHECK: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] +# GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] 0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04 -# CHECK: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +# GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] 0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03 -# CHECK: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +# GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] 0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04 -# CHECK: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +# GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] 0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04 -# CHECK: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +# GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] 0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03 -# CHECK: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4] +# GFX9: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4] 0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4 -# CHECK: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +# GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] 0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04 -# CHECK: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] +# GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] 0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04 -# CHECK: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] +# GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] 0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04 -# CHECK: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] 0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03 -# CHECK: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] +# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03] +0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03 + +# GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] 0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04 -# CHECK: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] +# GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] 0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04 -# CHECK: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] 0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03 -# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] +# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03] +0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03 + +# GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] 0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04 -# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] +# GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] 0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04 -# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] +# GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] 0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03 + +# GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04 diff --git a/test/MC/Disassembler/AMDGPU/vop3_vi.txt b/test/MC/Disassembler/AMDGPU/vop3_vi.txt index 6dc94a80847..abc24a127ba 100644 --- a/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ b/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -446,3 +446,39 @@ # VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] 0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04 + +# VI: v_mad_i32_i24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_mad_u32_u24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_sad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_sad_hi_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_sad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_sad_u32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_msad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04] +0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04