[TargetTransformInfo] Handle intrinsic call in getInstructionLatency()

author Guozhi Wei <carrot@google.com>

Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)

committer Guozhi Wei <carrot@google.com>

Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)
author Guozhi Wei <carrot@google.com>
Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)
committer Guozhi Wei <carrot@google.com>
Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h

index 63b47ffa3a550494a2fac8f5b4873376a6fa7dfc..0f66dc46dc32ef612e66a354840082c84bdadff5 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -785,16 +785,27 @@ public:
      if (getUserCost(I, Operands) == TTI::TCC_Free)
        return 0;
  
-    if (isa<CallInst>(I))
-      return 40;
-
      if (isa<LoadInst>(I))
        return 4;
  
-    Type *dstTy = I->getType();
-    if (VectorType *VectorTy = dyn_cast<VectorType>(dstTy))
-      dstTy = VectorTy->getElementType();
-    if (dstTy->isFloatingPointTy())
+    Type *DstTy = I->getType();
+
+    // Usually an intrinsic is a simple instruction.
+    // A real function call is much slower.
+    if (auto *CI = dyn_cast<CallInst>(I)) {
+      const Function *F = CI->getCalledFunction();
+      if (static_cast<T *>(this)->isLoweredToCall(F))
+        return 40;
+      // Some intrinsics return a value and a flag, we use the value type
+      // to decide its latency.
+      if (StructType* StructTy = dyn_cast<StructType>(DstTy))
+        DstTy = StructTy->getElementType(0);
+      // Fall through to simple instructions.
+    }
+
+    if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
+      DstTy = VectorTy->getElementType();
+    if (DstTy->isFloatingPointTy())
        return 3;
  
      return 1;
diff --git a/test/Analysis/CostModel/X86/costmodel.ll b/test/Analysis/CostModel/X86/costmodel.ll

index d16cc27cb16e1ee52abe962db5496b982d87f415..19e7128ff44932725d5c2e4fcd40e39eea0f1e38 100644 (file)
--- a/test/Analysis/CostModel/X86/costmodel.ll
+++ b/test/Analysis/CostModel/X86/costmodel.ll
@@ -5,6 +5,8 @@
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
  define i64 @foo(i64 %arg) {
  
    ; LATENCY:  cost of 0 {{.*}} alloca i32
@@ -39,6 +41,10 @@ define i64 @foo(i64 %arg) {
    ; CODESIZE: cost of 0 {{.*}} trunc
    %TC = trunc i64 undef to i32
  
+  ; LATENCY:  cost of 1 {{.*}} call
+  ; CODESIZE: cost of 1 {{.*}} call
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+
    ; LATENCY:  cost of 1 {{.*}} ret
    ; CODESIZE: cost of 1 {{.*}} ret
    ret i64 undef
author	Guozhi Wei <carrot@google.com>
	Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)
committer	Guozhi Wei <carrot@google.com>
	Fri, 22 Sep 2017 18:25:53 +0000 (18:25 +0000)
include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
test/Analysis/CostModel/X86/costmodel.ll		patch \| blob \| history