From 0597e5e3b04c9977f7479d877e54a96068afbb98 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 21 Mar 2017 16:20:16 +0000 Subject: [PATCH] DAG: Fold bitcast/extract_vector_elt of undef to undef Fixes not eliminating store when intrinsic is lowered to undef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298385 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll | 24 +++++++++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll | 15 +++++++----- test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll | 3 +-- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 40ce46a0d79..bc051f07bb2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8320,6 +8320,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending // on the bitconvert. @@ -13189,6 +13192,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); EVT NVT = N->getValueType(0); + if (InVec.isUndef()) + return DAG.getUNDEF(NVT); + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the diff --git a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll index 3a55870c288..04384bf451e 100644 --- a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -67,3 +67,27 @@ define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %o store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: +; GCN-NOT: store_dword +define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 + %bc = bitcast i64 %undef to <2 x i32> + store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: +; GCN-NOT: store_dword +define void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 + %bc = bitcast i64 %undef to <2 x i32> + %elt1 = extractelement <2 x i32> %bc, i32 1 + store volatile i32 %elt1, i32 addrspace(1)* %out + ret void +} + +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone convergent } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll index beada53a220..24a897521f7 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}} ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}} ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]] -; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]] +; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]] define void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -23,9 +23,12 @@ define void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float ret void } +; FIXME: Folds to 0 on gfx9 ; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef: ; GCN-NEXT: ; BB#0 -; GCN-NEXT: s_endpgm +; SI-NEXT: s_endpgm +; VI-NEXT: s_endpgm +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} define void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -36,7 +39,7 @@ define void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 { ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] -; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]] +; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]] define void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -67,7 +70,7 @@ define void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] -; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]] +; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]] define void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll index a75267b8d69..53cb607d54f 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll @@ -22,8 +22,7 @@ define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 { ; GCN-LABEL: {{^}}v_fract_undef_f32: ; GCN-NOT: v_fract_f32 -; GCN-NOT: v0 -; GCN: buffer_store_dword v0 +; GCN-NOT: store_dword define void @v_fract_undef_f32(float addrspace(1)* %out) #1 { %fract = call float @llvm.amdgcn.fract.f32(float undef) store float %fract, float addrspace(1)* %out -- 2.40.0