AMDGPU: Fold fneg into round instructions

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index d92178328ff0baafe140a0d1195ffcf07bb29f90..c9c44bf9d8092ae9f9624b842db976931dcc3b59 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -492,6 +492,9 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
    case ISD::FMA:
    case ISD::FMAD:
    case ISD::FSIN:
+  case ISD::FTRUNC:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
    case AMDGPUISD::RCP:
    case AMDGPUISD::RCP_LEGACY:
    case AMDGPUISD::SIN_HW:
@@ -2924,9 +2927,12 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
      return Res;
    }
    case ISD::FP_EXTEND:
+  case ISD::FTRUNC:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT: // XXX - Should fround be handled?
+  case ISD::FSIN:
    case AMDGPUISD::RCP:
    case AMDGPUISD::RCP_LEGACY:
-  case ISD::FSIN:
    case AMDGPUISD::SIN_HW: {
      SDValue CvtSrc = N0.getOperand(0);
      if (CvtSrc.getOpcode() == ISD::FNEG) {
diff --git a/test/CodeGen/AMDGPU/fneg-combines.ll b/test/CodeGen/AMDGPU/fneg-combines.ll

index 040f1eceed5876d47fa345d917e224634d6994a3..37115e795e8d3d3bd8f955de088da190c12100e8 100644 (file)
--- a/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1327,7 +1327,91 @@ define void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)*
    %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
    %a = load volatile float, float addrspace(1)* %a.gep
    %sin = call float @llvm.amdgcn.sin.f32(float %a)
-  %fneg = fsub float -0.000000e+00, %sin
+  %fneg = fsub float -0.0, %sin
+  store float %fneg, float addrspace(1)* %out.gep
+  ret void
+}
+
+; --------------------------------------------------------------------------------
+; ftrunc tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_trunc_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = sext i32 %tid to i64
+  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+  %a = load volatile float, float addrspace(1)* %a.gep
+  %trunc = call float @llvm.trunc.f32(float %a)
+  %fneg = fsub float -0.0, %trunc
+  store float %fneg, float addrspace(1)* %out.gep
+  ret void
+}
+
+; --------------------------------------------------------------------------------
+; fround tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_round_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_trunc_f32_e32
+; GCN: v_subrev_f32_e32
+; GCN: v_cndmask_b32
+; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
+; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, v{{[0-9]+}}
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = sext i32 %tid to i64
+  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+  %a = load volatile float, float addrspace(1)* %a.gep
+  %round = call float @llvm.round.f32(float %a)
+  %fneg = fsub float -0.0, %round
+  store float %fneg, float addrspace(1)* %out.gep
+  ret void
+}
+
+; --------------------------------------------------------------------------------
+; rint tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_rint_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = sext i32 %tid to i64
+  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+  %a = load volatile float, float addrspace(1)* %a.gep
+  %rint = call float @llvm.rint.f32(float %a)
+  %fneg = fsub float -0.0, %rint
+  store float %fneg, float addrspace(1)* %out.gep
+  ret void
+}
+
+; --------------------------------------------------------------------------------
+; nearbyint tests
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
+; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
+; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid.ext = sext i32 %tid to i64
+  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+  %a = load volatile float, float addrspace(1)* %a.gep
+  %nearbyint = call float @llvm.nearbyint.f32(float %a)
+  %fneg = fsub float -0.0, %nearbyint
    store float %fneg, float addrspace(1)* %out.gep
    ret void
  }
@@ -1336,6 +1420,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
  declare float @llvm.fma.f32(float, float, float) #1
  declare float @llvm.fmuladd.f32(float, float, float) #1
  declare float @llvm.sin.f32(float) #1
+declare float @llvm.trunc.f32(float) #1
+declare float @llvm.round.f32(float) #1
+declare float @llvm.rint.f32(float) #1
+declare float @llvm.nearbyint.f32(float) #1
  
  declare float @llvm.amdgcn.sin.f32(float) #1
  declare float @llvm.amdgcn.rcp.f32(float) #1
diff --git a/test/CodeGen/AMDGPU/frem.ll b/test/CodeGen/AMDGPU/frem.ll

index 039623c02194f9f0747f5ee89d0e3324a585a937..97533c418c94f862494564a977c39b29191bf13d 100644 (file)
--- a/test/CodeGen/AMDGPU/frem.ll
+++ b/test/CodeGen/AMDGPU/frem.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
  
  ; FUNC-LABEL: {{^}}frem_f32:
  ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
@@ -12,8 +12,8 @@
  ; GCN: v_mul_f32_e32
  ; GCN: v_div_fmas_f32
  ; GCN: v_div_fixup_f32
-; GCN: v_trunc_f32_e32
-; GCN: v_mad_f32
+; GCN: v_trunc_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}
+; GCN: v_mac_f32_e32
  ; GCN: s_endpgm
  define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                        float addrspace(1)* %in2) #0 {
@@ -28,11 +28,11 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
  ; FUNC-LABEL: {{^}}unsafe_frem_f32:
  ; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
  ; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
-; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; GCN: v_rcp_f32_e64 [[INVY:v[0-9]+]], -[[Y]]
  ; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
  ; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
-; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: v_mac_f32_e32 [[X]], [[Y]], [[TRUNC]]
+; GCN: buffer_store_dword [[X]]
  ; GCN: s_endpgm
  define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                               float addrspace(1)* %in2) #1 {
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll

index 011a0fdbd219b2cb71d5d9fa48dce7c6760da0d7..3798c46677f06a8ec0eec0fbc3820132b4cbb428 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
@@ -46,8 +46,8 @@ entry:
  ; GCN-LABEL: {{^}}class_f16_fneg
  ; GCN: s_load_dword s[[SA_F16:[0-9]+]]
  ; GCN: s_load_dword s[[SB_I32:[0-9]+]]
-; VI:  v_trunc_f16_e32 v[[VA_F16:[0-9]+]], s[[SA_F16]]
-; VI:  v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -v[[VA_F16]], s[[SB_I32]]
+; VI:  v_trunc_f16_e64 v[[VA_F16:[0-9]+]], -s[[SA_F16]]
+; VI:  v_cmp_class_f16_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v[[VA_F16]], s[[SB_I32]]
  ; VI:  v_cndmask_b32_e64 v[[VR_I32:[0-9]+]], 0, -1, [[CMP]]
  ; GCN: buffer_store_dword v[[VR_I32]]
  ; GCN: s_endpgm
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 26 Jan 2017 01:25:36 +0000 (01:25 +0000)
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/fneg-combines.ll		patch \| blob \| history
test/CodeGen/AMDGPU/frem.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll		patch \| blob \| history