From: Jun Bum Lim Date: Wed, 4 Oct 2017 18:33:52 +0000 (+0000) Subject: Recommit : Use the basic cost if a GEP is not used as addressing mode X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e3f6227d5695b42022c6187c255229c24dceb4e0;p=llvm Recommit : Use the basic cost if a GEP is not used as addressing mode Recommitting r314517 with the fix for handling ConstantExpr. Original commit message: Currently, getGEPCost() returns TCC_FREE whenever a GEP is a legal addressing mode in the target. However, since it doesn't check its actual users, it will return FREE even in cases where the GEP cannot be folded away as a part of actual addressing mode. For example, if an user of the GEP is a call instruction taking the GEP as a parameter, then the GEP may not be folded in isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314923 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index afc16e89da6..10ecf64a39c 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -193,6 +193,13 @@ public: int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const; + /// \brief Estimate the cost of a GEP operation when lowered. + /// + /// This user-based overload adds the ability to check if the GEP can be + /// folded into its users. + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) const; + /// \brief Estimate the cost of a EXT operation when lowered. /// /// The contract for this function is the same as \c getOperationCost except @@ -251,9 +258,9 @@ public: /// \brief Estimate the cost of a given IR user when lowered. /// /// This can estimate the cost of either a ConstantExpr or Instruction when - /// lowered. It has two primary advantages over the \c getOperationCost and - /// \c getGEPCost above, and one significant disadvantage: it can only be - /// used when the IR construct has already been formed. + /// lowered. It has two primary advantages over the \c getOperationCost above, + /// and one significant disadvantage: it can only be used when the IR + /// construct has already been formed. /// /// The advantages are that it can inspect the SSA use graph to reason more /// accurately about the cost. For example, all-constant-GEPs can often be @@ -932,6 +939,8 @@ public: virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) = 0; + virtual int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual int getCallCost(const Function *F, int NumArgs) = 0; @@ -1113,6 +1122,10 @@ public: ArrayRef Operands) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) override { + return Impl.getGEPCost(GEP, Operands); + } int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index b3b3e07b4dc..5819b2eb9f4 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -726,6 +726,38 @@ public: return TTI::TCC_Basic; } + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { + if (!isa(GEP)) + return TTI::TCC_Basic; + + Type *PointeeType = GEP->getSourceElementType(); + const Value *Ptr = GEP->getPointerOperand(); + + if (getGEPCost(PointeeType, Ptr, Operands) == TTI::TCC_Free) { + // Should check if the GEP is actually used in load / store instructions. + // For simplicity, we check only direct users of the GEP. + // + // FIXME: GEPs could also be folded away as a part of addressing mode in + // load/store instructions together with other instructions (e.g., other + // GEPs). Handling all such cases must be expensive to be performed + // in this function, so we stay conservative for now. + for (const User *U : GEP->users()) { + const Operator *UOP = cast(U); + const Value *PointerOperand = nullptr; + if (auto *LI = dyn_cast(UOP)) + PointerOperand = LI->getPointerOperand(); + else if (auto *SI = dyn_cast(UOP)) + PointerOperand = SI->getPointerOperand(); + + if ((!PointerOperand || PointerOperand != GEP) && + !GEP->hasAllZeroIndices()) + return TTI::TCC_Basic; + } + return TTI::TCC_Free; + } + return TTI::TCC_Basic; + } + using BaseT::getIntrinsicCost; unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -749,11 +781,9 @@ public: if (A->isStaticAlloca()) return TTI::TCC_Free; - if (const GEPOperator *GEP = dyn_cast(U)) { - return static_cast(this)->getGEPCost(GEP->getSourceElementType(), - GEP->getPointerOperand(), + if (const GEPOperator *GEP = dyn_cast(U)) + return static_cast(this)->getGEPCost(GEP, Operands.drop_front()); - } if (auto CS = ImmutableCallSite(U)) { const Function *F = CS.getCalledFunction(); diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 0334ed9eacb..72f45ecd3dc 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -189,6 +189,11 @@ public: return BaseT::getGEPCost(PointeeType, Ptr, Operands); } + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) { + return BaseT::getGEPCost(GEP, Operands); + } + int getExtCost(const Instruction *I, const Value *Src) { if (getTLI()->isExtFree(I)) return TargetTransformInfo::TCC_Free; diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 54e1165a111..84ab4f36444 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -456,6 +456,8 @@ public: if (ConstantInt *C = dyn_cast(I)) if (C->isZero()) continue; + if (isa(I)) + continue; return false; } return true; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index fad918dabb5..154021a51b9 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -88,6 +88,11 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); } +int TargetTransformInfo::getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) const { + return TTIImpl->getGEPCost(GEP, Operands); +} + int TargetTransformInfo::getExtCost(const Instruction *I, const Value *Src) const { return TTIImpl->getExtCost(I, Src); diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index d0bfe360389..3d5a513e3d3 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -264,7 +264,7 @@ static bool isGEPFoldable(GetElementPtrInst *GEP, SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 8b8d6590aa6..3f93eaecc5d 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -239,7 +239,7 @@ static bool isGEPFoldable(GetElementPtrInst *GEP, SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } diff --git a/test/Analysis/CostModel/AArch64/gep.ll b/test/Analysis/CostModel/AArch64/gep.ll index 08bfc3d2123..594be51f91c 100644 --- a/test/Analysis/CostModel/AArch64/gep.ll +++ b/test/Analysis/CostModel/AArch64/gep.ll @@ -290,3 +290,49 @@ define i64 @test36(i64* %p) { %v = load i64, i64* %a ret i64 %v } + +; CHECK-LABEL: test37 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test37(i64 %j, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 %j + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL: test38 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test38(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test39 +; CHECK: cost of 0 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test39(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 0 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test40 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8** @test40(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + ret i8** %arrayidx0 +} + +; CHECK-LABEL:test41 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8, i8* +define i8 @test41(i8* %V, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8, i8* %V, i64 10 + store i8* %arrayidx0, i8** %P + ret i8 0 +} + +declare i8* @func(i8**) diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll index 17f70dfc7a7..5c00512ba34 100644 --- a/test/Analysis/CostModel/X86/vector_gep.ll +++ b/test/Analysis/CostModel/X86/vector_gep.ll @@ -10,7 +10,7 @@ define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds [1000 x i32] %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res diff --git a/test/Transforms/SimplifyCFG/SpeculativeExecGepCE.ll b/test/Transforms/SimplifyCFG/SpeculativeExecGepCE.ll new file mode 100644 index 00000000000..46b91fa4ccb --- /dev/null +++ b/test/Transforms/SimplifyCFG/SpeculativeExecGepCE.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -simplifycfg -phi-node-folding-threshold=0 -S | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +@d_buf = internal constant [8 x i8] [i8 126, i8 127, i8 128, i8 129, i8 130, i8 131, i8 132, i8 133], align 8 +@a = internal constant { i8*, i64} {i8* getelementptr inbounds ([8 x i8], [8 x i8]* @d_buf, i64 0, i64 0), i64 0} + +; CHECK-LABEL: @test +; CHECK-LABEL: end: +; CHECK: %x1 = phi i8* +define i8* @test(i1* %dummy, i8* %a, i8* %b, i8 %v) { + +entry: + %cond1 = load volatile i1, i1* %dummy + br i1 %cond1, label %if, label %end + +if: + %cond2 = load volatile i1, i1* %dummy + br i1 %cond2, label %then, label %end + +then: + br label %end + +end: + %x1 = phi i8* [ %a, %entry ], [ %b, %if ], [getelementptr inbounds ([8 x i8], [8 x i8]* @d_buf, i64 0, i64 0) , %then ] + + ret i8* %x1 +}