DAG: Fold bitcast/extract_vector_elt of undef to undef

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 40ce46a0d7983167c833c26bd9fee0a95927d8cf..bc051f07bb221a153f9b31ef9d7de38464421811 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8320,6 +8320,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
    // If the input is a BUILD_VECTOR with all constant elements, fold this now.
    // Only do this before legalize, since afterward the target may be depending
    // on the bitconvert.
@@ -13189,6 +13192,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
    EVT VT = InVec.getValueType();
    EVT NVT = N->getValueType(0);
  
+  if (InVec.isUndef())
+    return DAG.getUNDEF(NVT);
+
    if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
      // Check if the result type doesn't match the inserted element type. A
      // SCALAR_TO_VECTOR may truncate the inserted element and the
diff --git a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll

index 3a55870c2882313c4a15f74b244bb295c264d342..04384bf451e54a1591029af3d764e6de5d76c487 100644 (file)
--- a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -67,3 +67,27 @@ define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %o
    store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
    ret void
  }
+
+; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
+; GCN-NOT: store_dword
+define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+  %bc = bitcast i64 %undef to <2 x i32>
+  store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
+; GCN-NOT: store_dword
+define void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
+  %bc = bitcast i64 %undef to <2 x i32>
+  %elt1 = extractelement <2 x i32> %bc, i32 1
+  store volatile i32 %elt1, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone convergent }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll

index beada53a220bcabcb91bd369ef99fc89ffac16b8..24a897521f7d3f78e8d82e425842b19bdce397a7 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
@@ -1,13 +1,13 @@
  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
  
  ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
  ; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
  ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
  ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
  ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
-; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
+; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
  define void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
    %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
    store <2 x half> %result, <2 x half> addrspace(1)* %out
@@ -23,9 +23,12 @@ define void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float
    ret void
  }
  
+; FIXME: Folds to 0 on gfx9
  ; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef:
  ; GCN-NEXT: ; BB#0
-; GCN-NEXT: s_endpgm
+; SI-NEXT: s_endpgm
+; VI-NEXT: s_endpgm
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
  define void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
    %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
    store <2 x half> %result, <2 x half> addrspace(1)* %out
@@ -36,7 +39,7 @@ define void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
  ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
  ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
-; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]]
+; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]]
  define void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
    %tid.ext = sext i32 %tid to i64
@@ -67,7 +70,7 @@ define void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float
  ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
  ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
  ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
-; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]]
+; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]]
  define void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
    %tid.ext = sext i32 %tid to i64
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll

index a75267b8d6933a1787b201c504df25d1e5fb177f..53cb607d54fd0ad041fd6ec1ddd93f9daefc4e67 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
@@ -22,8 +22,7 @@ define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 {
  
  ; GCN-LABEL: {{^}}v_fract_undef_f32:
  ; GCN-NOT: v_fract_f32
-; GCN-NOT: v0
-; GCN: buffer_store_dword v0
+; GCN-NOT: store_dword
  define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
    %fract = call float @llvm.amdgcn.fract.f32(float undef)
    store float %fract, float addrspace(1)* %out
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 21 Mar 2017 16:20:16 +0000 (16:20 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/bitcast-vector-extract.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll		patch \| blob \| history