From f04e8d9380cd7499ac90b3d5ca6bf056e1a1ddbb Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Sat, 5 Jan 2019 19:20:00 +0000 Subject: [PATCH] Added single use check to ShrinkDemandedConstant Fixes cvt_f32_ubyte combine. performCvtF32UByteNCombine() could shrink source node to demanded bits only even if there are other uses. Differential Revision: https://reviews.llvm.org/D56289 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350475 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c006bd9c5e5..2e21f57c560 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -350,6 +350,9 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); + if (!Op.hasOneUse()) + return false; + // Do target-specific constant optimization. if (targetShrinkDemandedConstant(Op, Demanded, TLO)) return TLO.New.getNode(); diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index d8126fa635a..20d2e9edd82 100644 --- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -281,3 +281,23 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out store float %cvt, float addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}cvt_ubyte0_or_multiuse: +; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]], +; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x80000001, [[LOADREG]] +; GCN-DAG: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[OR]] +; GCN: v_add_f32_e32 [[RES:v[0-9]+]], [[OR]], [[CONV]] +; GCN: buffer_store_dword [[RES]], +define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %lid + %load = load i32, i32 addrspace(1)* %gep + %or = or i32 %load, -2147483647 + %and = and i32 %or, 255 + %uitofp = uitofp i32 %and to float + %cast = bitcast i32 %or to float + %add = fadd float %cast, %uitofp + store float %add, float addrspace(1)* %out + ret void +} -- 2.50.1