AMDGPU: Remove llvm.AMDGPU.cube intrinsic

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td

index ecce7a882f5b407435dfd536168970e92d9ac3c1..c154ba60a035f8a81470213fb86095fd0a818e87 100644 (file)
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -64,6 +64,10 @@ def int_r600_recipsqrt_clamped : Intrinsic<
    [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
  >;
  
+def int_r600_cube : Intrinsic<
+  [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
+>;
+
  } // End TargetPrefix = "r600"
  
  let TargetPrefix = "amdgcn" in {
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td

index e3a21c35a58f62d848921622cedb7917a3416f91..d4263e2d878c69010b89f0981c17dac864dde76d 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -20,9 +20,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    // Deprecated in favor of llvm.amdgcn.sffbh
    def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
  
-  // Deprecated in favor of separate int_amdgcn_cube* intrinsics.
-  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
    // Deprecated in favor of expanded bit operations
    def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td

index 9210e66b0fe76e6ad54d75a353c76e0fca94ccf2..a5d1a0afb9fcce629e27021f375b5734173afc5f 100644 (file)
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1013,7 +1013,7 @@ multiclass CUBE_Common <bits<11> inst> {
      (outs R600_Reg128:$dst),
      (ins R600_Reg128:$src0),
      "CUBE $dst $src0",
-    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
+    [(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
      VecALU
    > {
      let isPseudo = 1;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td

index be82c9f03e7cfd00b10f0b452954ae7c09f65995..1d09880147d8a20c585727d27a911b1890b21812 100644 (file)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -817,27 +817,6 @@ def : Pat <
  
  def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
  
-def : Pat <
-  (int_AMDGPU_cube v4f32:$src),
-  (REG_SEQUENCE VReg_128,
-    (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub0,
-    (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub1,
-    (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub2,
-    (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
-                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
-                  0 /* clamp */, 0 /* omod */), sub3)
->;
-
  def : Pat <
    (i32 (sext i1:$src0)),
    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
diff --git a/test/CodeGen/AMDGPU/cube.ll b/test/CodeGen/AMDGPU/cube.ll

index 9b512c439b0e5cc52dcb2adebe0d4108c67514c1..3c126a42fdd7936dddd1943f4319915b933d463b 100644 (file)
--- a/test/CodeGen/AMDGPU/cube.ll
+++ b/test/CodeGen/AMDGPU/cube.ll
@@ -6,9 +6,6 @@ declare float @llvm.amdgcn.cubesc(float, float, float) #0
  declare float @llvm.amdgcn.cubetc(float, float, float) #0
  declare float @llvm.amdgcn.cubema(float, float, float) #0
  
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
-
-
  ; GCN-LABEL: {{^}}cube:
  ; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
  ; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -29,18 +26,5 @@ define void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c)
    ret void
  }
  
-; GCN-LABEL: {{^}}legacy_cube:
-; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-; GCN: _store_dwordx4
-define void @legacy_cube(<4 x float> addrspace(1)* %out, <4 x float> %abcx) #1 {
-  %cube = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %abcx)
-  store <4 x float> %cube, <4 x float> addrspace(1)* %out
-  ret void
-}
-
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
-
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll b/test/CodeGen/AMDGPU/llvm.r600.cube.ll

similarity index 95%

rename from test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll

rename to test/CodeGen/AMDGPU/llvm.r600.cube.ll

index 78b88122229bb96d88d4d96f65df22636f15bafe..b5a0de95acf50daacbb516372c7280256c32965b 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll
+++ b/test/CodeGen/AMDGPU/llvm.r600.cube.ll
@@ -22,7 +22,7 @@ main_body:
    %tmp12 = insertelement <4 x float> %tmp11, float %tmp7, i32 1
    %tmp13 = insertelement <4 x float> %tmp12, float %tmp10, i32 2
    %tmp14 = insertelement <4 x float> %tmp13, float 1.000000e+00, i32 3
-  %tmp15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp14)
+  %tmp15 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp14)
    %tmp16 = extractelement <4 x float> %tmp15, i32 0
    %tmp17 = extractelement <4 x float> %tmp15, i32 1
    %tmp18 = extractelement <4 x float> %tmp15, i32 2
@@ -44,7 +44,7 @@ main_body:
  }
  
  ; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) #0
  
  ; Function Attrs: nounwind readnone
  declare float @llvm.fabs.f32(float) #0
diff --git a/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll b/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll

index 461caf5b5d204573b05ec8c9c741f9696e271b0a..e2143ff85b72e70c81458f4d71c926fa17599e04 100644 (file)
--- a/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
+++ b/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
@@ -10,7 +10,7 @@ main_body:
    %tmp6 = insertelement <4 x float> %tmp5, float %tmp2, i32 1
    %tmp7 = insertelement <4 x float> %tmp6, float %tmp3, i32 2
    %tmp8 = insertelement <4 x float> %tmp7, float %tmp4, i32 3
-  %tmp9 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp8)
+  %tmp9 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp8)
    %tmp10 = extractelement <4 x float> %tmp9, i32 0
    %tmp11 = extractelement <4 x float> %tmp9, i32 1
    %tmp12 = extractelement <4 x float> %tmp9, i32 2
@@ -45,7 +45,7 @@ main_body:
  }
  
  ; Function Attrs: readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) #0
  
  ; Function Attrs: readnone
  declare float @fabs(float) #0
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll

index 974823e8b8c76b5eba3be99699e1b6e3afb2e632..d71f872637b8c42d677bf7ad9fc3f30a6144d3f2 100644 (file)
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -586,7 +586,19 @@ IF67:                                             ; preds = %LOOP65
    %tmp449 = insertelement <4 x float> %tmp448, float %tmp445, i32 1
    %tmp450 = insertelement <4 x float> %tmp449, float %tmp447, i32 2
    %tmp451 = insertelement <4 x float> %tmp450, float %tmp194, i32 3
-  %tmp452 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp451)
+
+  %tmp451.x = extractelement <4 x float> %tmp451, i32 0
+  %tmp451.y = extractelement <4 x float> %tmp451, i32 1
+  %tmp451.z = extractelement <4 x float> %tmp451, i32 2
+  %cubetc = call float @llvm.amdgcn.cubetc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubesc = call float @llvm.amdgcn.cubesc(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubema = call float @llvm.amdgcn.cubema(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %cubeid = call float @llvm.amdgcn.cubeid(float %tmp451.x, float %tmp451.y, float %tmp451.z)
+  %tmp452.0 = insertelement <4 x float> undef, float %cubetc, i32 0
+  %tmp452.1 = insertelement <4 x float> %tmp452.0, float %cubesc, i32 1
+  %tmp452.2 = insertelement <4 x float> %tmp452.1, float %cubema, i32 2
+  %tmp452 = insertelement <4 x float> %tmp452.2, float %cubeid, i32 3
+
    %tmp453 = extractelement <4 x float> %tmp452, i32 0
    %tmp454 = extractelement <4 x float> %tmp452, i32 1
    %tmp455 = extractelement <4 x float> %tmp452, i32 2
@@ -1840,9 +1852,6 @@ declare float @llvm.amdgcn.rsq.f32(float) #0
  ; Function Attrs: nounwind readnone
  declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
  
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
-
  ; Function Attrs: nounwind readnone
  declare float @llvm.fabs.f32(float) #0
  
@@ -1863,6 +1872,11 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
  ; Function Attrs: nounwind readnone
  declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
  
+declare float @llvm.amdgcn.cubeid(float, float, float) #0
+declare float @llvm.amdgcn.cubesc(float, float, float) #0
+declare float @llvm.amdgcn.cubetc(float, float, float) #0
+declare float @llvm.amdgcn.cubema(float, float, float) #0
+
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 16 Feb 2017 19:09:04 +0000 (19:09 +0000)
include/llvm/IR/IntrinsicsAMDGPU.td		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUIntrinsics.td		patch \| blob \| history
lib/Target/AMDGPU/R600Instructions.td		patch \| blob \| history
lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
test/CodeGen/AMDGPU/cube.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.r600.cube.ll	[moved from test/CodeGen/AMDGPU/llvm.AMDGPU.cube.ll with 95% similarity]	patch \| blob \| history
test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll		patch \| blob \| history
test/CodeGen/AMDGPU/si-sgpr-spill.ll		patch \| blob \| history