From 77d3eb784a6de6077bc6e5d59866aa9dc028ce03 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 22 Apr 2019 22:05:49 +0000 Subject: [PATCH] [AMDGPU] Fix an issue in `op_sel_hi` skipping. Summary: - Only apply packed literal `op_sel_hi` skipping on operands requiring packed literals. Even an instruction is `packed`, it may have operand requiring non-packed literal, such as `v_dot2_f32_f16`. Reviewers: rampitec, arsenm, kzhuravl Subscribers: jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60978 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIFoldOperands.cpp | 23 ++++++++++++++++------- test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 7 +++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 2ebcbae90bb..f5e60819f30 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -189,15 +189,24 @@ static bool updateOperand(FoldCandidate &Fold, unsigned Val = Mod.getImm(); if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1)) return false; - // If upper part is all zero we do not need op_sel_hi. - if (!isUInt<16>(Fold.ImmToFold)) { - if (!(Fold.ImmToFold & 0xffff)) { - Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + // Only apply the following transformation if that operand requries + // a packed immediate. + switch (TII.get(Opcode).OpInfo[OpNo].OperandType) { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + // If upper part is all zero we do not need op_sel_hi. + if (!isUInt<16>(Fold.ImmToFold)) { + if (!(Fold.ImmToFold & 0xffff)) { + Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); + return true; + } Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); - return true; } - Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + break; + default: + break; } } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll index 7efb1850a27..e16450bfd3c 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -33,3 +33,10 @@ entry: store float %r.val, float addrspace(1)* %r ret void } + +; GFX906-LABEL: {{^}}fdot2_inline_literal +; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +define float @fdot2_inline_literal(<2 x half> %a, <2 x half> %b) { + %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false) + ret float %ret +} -- 2.40.0