From d56619e6f9c73399c919bc69900b67632607a53f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 20 Jul 2017 17:42:47 +0000 Subject: [PATCH] AMDGPU: Add encoding for carryless add/sub instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308639 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.td | 15 +++- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 + lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 ++ lib/Target/AMDGPU/SIInstructions.td | 35 +++++++++ lib/Target/AMDGPU/VOP2Instructions.td | 14 ++++ test/MC/AMDGPU/add-sub-no-carry.s | 104 ++++++++++++++++++++++++++ 6 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 test/MC/AMDGPU/add-sub-no-carry.s diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index f1d899c4d00..79d47f59551 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -79,6 +79,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", "Have scratch_* flat memory instructions" >; +def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", + "AddNoCarryInsts", + "true", + "Have VALU add/sub instructions without carry out" +>; + def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", @@ -464,7 +470,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, FeatureFastFMAF32, FeatureDPP, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, - FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts + FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, + FeatureAddNoCarryInsts ] >; @@ -681,6 +688,12 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, AssemblerPredicate<"FeatureFlatGlobalInsts">; +def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">, + AssemblerPredicate<"FeatureAddNoCarryInsts">; + +def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">, + AssemblerPredicate<"!FeatureAddNoCarryInsts">; + def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<"Feature16BitInsts">; def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 77961762901..f4484b9c653 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -167,6 +167,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FlatInstOffsets(false), FlatGlobalInsts(false), FlatScratchInsts(false), + AddNoCarryInsts(false), R600ALUInst(false), CaymanISA(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index d4b6a5fe802..389fdc9d636 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -159,6 +159,7 @@ protected: bool FlatInstOffsets; bool FlatGlobalInsts; bool FlatScratchInsts; + bool AddNoCarryInsts; bool R600ALUInst; bool CaymanISA; bool CFALUBug; @@ -419,6 +420,10 @@ public: return FlatScratchInsts; } + bool hasAddNoCarry() const { + return AddNoCarryInsts; + } + bool isMesaKernel(const MachineFunction &MF) const { return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index ba69e42d912..0c34b8d2260 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1300,8 +1300,43 @@ def : IntMed3Pat; // Assembler aliases //============================================================================// +multiclass NoCarryAlias { + def : InstAlias, + Requires<[HasAddNoCarryInsts]>; + + def : InstAlias, + Requires<[HasAddNoCarryInsts]>; + + def : InstAlias, + Requires<[HasAddNoCarryInsts]>; + + def : InstAlias, + Requires<[HasAddNoCarryInsts]>; +} + +// gfx9 made a mess of add instruction names. The existing add +// instructions add _co added to the names, and their old names were +// repurposed to a version without carry out. +let Predicates = [HasAddNoCarryInsts] in { +defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi, + V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>; +defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi, + V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>; +defm : NoCarryAlias<"v_subrev_u32", + V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi, + V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>; +} + +let Predicates = [NotHasAddNoCarryInsts] in { def : MnemonicAlias<"v_add_u32", "v_add_i32">; def : MnemonicAlias<"v_sub_u32", "v_sub_i32">; def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">; +} } // End isGCN predicate diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index d5acb49b4f3..9f3df2b1d43 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -375,6 +375,14 @@ defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>; defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>; defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; + + +let SubtargetPredicate = HasAddNoCarryInsts in { +defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32>; +defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32>; +defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32">; +} + } // End isCommutable = 1 // These are special and do not read the exec mask. @@ -833,3 +841,9 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; } // End SubtargetPredicate = isVI + +let SubtargetPredicate = HasAddNoCarryInsts in { +defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>; +defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>; +defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>; +} diff --git a/test/MC/AMDGPU/add-sub-no-carry.s b/test/MC/AMDGPU/add-sub-no-carry.s new file mode 100644 index 00000000000..22eb43e5354 --- /dev/null +++ b/test/MC/AMDGPU/add-sub-no-carry.s @@ -0,0 +1,104 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefixes=GCN,VI %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s +// FIXME: pre-gfx9 errors should be more useful + + +// FIXME: These should parse to VOP2 encoding +v_add_u32 v1, v2, v3 +// GFX9: v_add_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x07,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_add_u32 v1, v2, s1 +// GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_add_u32 v1, s1, v2 +// GFX9: v_add_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x34,0xd1,0x01,0x04,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_add_u32 v1, 4.0, v2 +// GFX9: v_add_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x34,0xd1,0xf6,0x04,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_add_u32 v1, v2, 4.0 +// GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_add_u32_e32 v1, v2, v3 +// GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68] +// ERR-SICIVI: :19: error: invalid operand for instruction + +v_add_u32_e32 v1, s1, v3 +// GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68] +// ERR-SICIVI: :19: error: invalid operand for instruction + + + +v_sub_u32 v1, v2, v3 +// GFX9: v_sub_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x07,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_sub_u32 v1, v2, s1 +// GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_sub_u32 v1, s1, v2 +// GFX9: v_sub_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x35,0xd1,0x01,0x04,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_sub_u32 v1, 4.0, v2 +// GFX9: v_sub_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x35,0xd1,0xf6,0x04,0x02,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_sub_u32 v1, v2, 4.0 +// GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00] +// ERR-SICIVI: :15: error: invalid operand for instruction + +v_sub_u32_e32 v1, v2, v3 +// GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a] +// ERR-SICIVI: :19: error: invalid operand for instruction + +v_sub_u32_e32 v1, s1, v3 +// GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a] +// ERR-SICIVI: :19: error: invalid operand for instruction + + + +v_subrev_u32 v1, v2, v3 +// GFX9: v_subrev_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x07,0x02,0x00] +// ERR-SICIVI: :18: error: invalid operand for instruction + +v_subrev_u32 v1, v2, s1 +// GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00] +// ERR-SICIVI: :18: error: invalid operand for instruction + +v_subrev_u32 v1, s1, v2 +// GFX9: v_subrev_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x36,0xd1,0x01,0x04,0x02,0x00] +// ERR-SICIVI: :18: error: invalid operand for instruction + +v_subrev_u32 v1, 4.0, v2 +// GFX9: v_subrev_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x36,0xd1,0xf6,0x04,0x02,0x00] +// ERR-SICIVI: :18: error: invalid operand for instruction + +v_subrev_u32 v1, v2, 4.0 +// GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00] +// ERR-SICIVI: :18: error: invalid operand for instruction + +v_subrev_u32_e32 v1, v2, v3 +// GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c] +// ERR-SICIVI: :22: error: invalid operand for instruction + +v_subrev_u32_e32 v1, s1, v3 +// GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c] +// ERR-SICIVI: :22: error: invalid operand for instruction + + + +v_add_u32 v1, vcc, v2, v3 +// GCN: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] + +v_add_u32 v1, s[0:1], v2, v3 +// GCN: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] -- 2.50.1