From: Stanislav Mekhanoshin Date: Tue, 9 Jul 2019 22:42:24 +0000 (+0000) Subject: [AMDGPU] gfx908 v_pk_fmac_f16 support X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8c11403746957331a636920ca026cbb9d8492115;p=llvm [AMDGPU] gfx908 v_pk_fmac_f16 support Differential Revision: https://reviews.llvm.org/D64433 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365573 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 47e64218ac6..baeba534012 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -666,8 +666,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, - FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, - FeatureDoesNotSupportSRAMECC + FeatureVOP3Literal, FeatureDPP8, + FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC ] >; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 1465f6ba209..e4f38229a3b 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -694,10 +694,12 @@ let Constraints = "$vdst = $src2", defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; } -defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; - } // End SubtargetPredicate = isGFX10Plus +let SubtargetPredicate = HasPkFmacF16Inst in { +defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; +} // End SubtargetPredicate = HasPkFmacF16Inst + // Note: 16-bit instructions produce a 0 result in the high 16-bits // on GFX8 and GFX9 and preserve high 16 bits on GFX10+ def ClearHI16 : OutPatFrag<(ops node:$op), @@ -1548,3 +1550,7 @@ let SubtargetPredicate = HasDot5Insts in { let SubtargetPredicate = HasDot6Insts in { defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; } + +let SubtargetPredicate = HasPkFmacF16Inst in { +defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; +} // End SubtargetPredicate = HasPkFmacF16Inst diff --git a/test/MC/AMDGPU/xdl-insts-gfx908.s b/test/MC/AMDGPU/xdl-insts-gfx908.s new file mode 100644 index 00000000000..81a788258ef --- /dev/null +++ b/test/MC/AMDGPU/xdl-insts-gfx908.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s + +// CHECK: encoding: [0x01,0x05,0x0a,0x78] +v_pk_fmac_f16 v5, v1, v2 + +// CHECK: encoding: [0x01,0x05,0xfe,0x79] +v_pk_fmac_f16 v255, v1, v2 + +// CHECK: encoding: [0xff,0x05,0x0a,0x78] +v_pk_fmac_f16 v5, v255, v2 + +// CHECK: encoding: [0x01,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, s1, v2 + +// CHECK: encoding: [0x6a,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, vcc_lo, v2 + +// CHECK: encoding: [0x6b,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, vcc_hi, v2 + +// CHECK: encoding: [0x77,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, ttmp11, v2 + +// CHECK: encoding: [0x7c,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, m0, v2 + +// CHECK: encoding: [0x7e,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, exec_lo, v2 + +// CHECK: encoding: [0x7f,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, exec_hi, v2 + +// CHECK: encoding: [0x80,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, 0, v2 + +// CHECK: encoding: [0xc1,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, -1, v2 + +// CHECK: encoding: [0xf0,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, 0.5, v2 + +// CHECK: encoding: [0xf7,0x04,0x0a,0x78] +v_pk_fmac_f16 v5, -4.0, v2 + +// CHECK: encoding: [0x01,0xff,0x0b,0x78] +v_pk_fmac_f16 v5, v1, v255 diff --git a/test/MC/Disassembler/AMDGPU/xdl-insts-gfx908.txt b/test/MC/Disassembler/AMDGPU/xdl-insts-gfx908.txt new file mode 100644 index 00000000000..893fc3d012a --- /dev/null +++ b/test/MC/Disassembler/AMDGPU/xdl-insts-gfx908.txt @@ -0,0 +1,46 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck %s + +# CHECK: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] +0x01,0x05,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] +0x01,0x05,0xfe,0x79 + +# CHECK: v_pk_fmac_f16_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] +0xff,0x05,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] +0x01,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] +0x6a,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] +0x6b,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78] +0x77,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78] +0x7c,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] +0x7e,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] +0x7f,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] +0x80,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +0xc1,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +0xf0,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] +0xf7,0x04,0x0a,0x78 + +# CHECK: v_pk_fmac_f16_e32 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] +0x01,0xff,0x0b,0x78