[AMDGPU] Fix regression in AMDGPULibCalls allowing native for doubles

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp

index 57cb2a6a9214e50c4bd064986ccab23118fb5462..ff89ce7b96f9220139b6e7707567514c96cc9103 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1170,6 +1170,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
  
  // Get a scalar native builtin signle argument FP function
  Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
+  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
+    return nullptr;
    FuncInfo nf = FInfo;
    nf.setPrefix(AMDGPULibFunc::NATIVE);
    return getFunction(M, nf);
@@ -1178,9 +1180,7 @@ Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
  // fold sqrt -> native_sqrt (x)
  bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
                                 const FuncInfo &FInfo) {
-  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
-       getArgType(FInfo) == AMDGPULibFunc::F64) &&
-      (getVecSize(FInfo) == 1) &&
+  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
        (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
      if (Constant *FPExpr = getNativeFunction(
          CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
diff --git a/test/CodeGen/AMDGPU/simplify-libcalls.ll b/test/CodeGen/AMDGPU/simplify-libcalls.ll

index e487f3c417eae781c09bf6477ae7b1a2293b18ab..eab1fe4c70f79f54eb0dc9a79a88a4c81695ebda 100644 (file)
--- a/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -641,7 +641,18 @@ entry:
    ret void
  }
  
+; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
+; GCN: tail call fast double @_Z4sqrtd(double %tmp)
+define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
+entry:
+  %tmp = load double, double addrspace(1)* %a, align 8
+  %call = tail call fast double @_Z4sqrtd(double %tmp)
+  store double %call, double addrspace(1)* %a, align 8
+  ret void
+}
+
  declare float @_Z4sqrtf(float)
+declare double @_Z4sqrtd(double)
  
  ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
  ; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Mon, 28 Aug 2017 18:00:08 +0000 (18:00 +0000)
lib/Target/AMDGPU/AMDGPULibCalls.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/simplify-libcalls.ll		patch \| blob \| history