NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {}
};
+ /// Cache storing single nonlocal def for the instruction.
+ /// It is set when nonlocal def would be found in function returning only
+ /// local dependencies.
+ DenseMap<Instruction *, NonLocalDepResult> NonLocalDefsCache;
/// This map stores the cached results of doing a pointer lookup at the
/// bottom of a block.
///
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
/// find anything, and Def if it can be assumed that 2 instructions load or
- /// store the same value.
- /// FIXME: This analysis works only on single block because of restrictions
- /// at the call site.
+ /// store the same value and NonLocal which indicate that non-local Def was
+ /// found, which can be retrieved by calling getNonLocalPointerDependency
+ /// with the same queried instruction.
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB);
/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
- MemDepResult InvariantGroupDependency =
- getInvariantGroupPointerDependency(LI, BB);
+ InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
if (InvariantGroupDependency.isDef())
return InvariantGroupDependency;
}
}
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
- Limit);
+ MemDepResult SimpleDep = getSimplePointerDependencyFrom(
+ MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+ if (SimpleDep.isDef())
+ return SimpleDep;
+ // Non-local invariant group dependency indicates there is non local Def
+ // (it only returns nonLocal if it finds nonLocal def), which is better than
+ // local clobber and everything else.
+ if (InvariantGroupDependency.isNonLocal())
+ return InvariantGroupDependency;
+
+ assert(InvariantGroupDependency.isUnknown() &&
+ "InvariantGroupDependency should be only unknown at this point");
+ return SimpleDep;
}
MemDepResult
// Queue to process all pointers that are equivalent to load operand.
SmallVector<const Value *, 8> LoadOperandsQueue;
LoadOperandsQueue.push_back(LoadOperand);
+
+ Instruction *ClosestDependency = nullptr;
+ // Order of instructions in uses list is unpredictible. In order to always
+ // get the same result, we will look for the closest dominance.
+ auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
+ assert(Other && "Must call it with not null instruction");
+ if (Best == nullptr || DT.dominates(Best, Other))
+ return Other;
+ return Best;
+ };
+
+
+ // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
+ // we will see all the instructions. This should be fixed in MSSA.
while (!LoadOperandsQueue.empty()) {
const Value *Ptr = LoadOperandsQueue.pop_back_val();
assert(Ptr && !isa<GlobalValue>(Ptr) &&
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
- if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+ if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
- return MemDepResult::getDef(U);
+ ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
- return MemDepResult::getUnknown();
+
+ if (!ClosestDependency)
+ return MemDepResult::getUnknown();
+ if (ClosestDependency->getParent() == BB)
+ return MemDepResult::getDef(ClosestDependency);
+ // Def(U) can't be returned here because it is non-local. If local
+ // dependency won't be found then return nonLocal counting that the
+ // user will call getNonLocalPointerDependency, which will return cached
+ // result.
+ NonLocalDefsCache.try_emplace(
+ LI, NonLocalDepResult(ClosestDependency->getParent(),
+ MemDepResult::getDef(ClosestDependency), nullptr));
+ return MemDepResult::getNonLocal();
}
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+ {
+ // Check if there is cached Def with invariant.group. FIXME: cache might be
+ // invalid if cached instruction would be removed between call to
+ // getPointerDependencyFrom and this function.
+ auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
+ if (NonLocalDefIt != NonLocalDefsCache.end()) {
+ Result.push_back(std::move(NonLocalDefIt->second));
+ NonLocalDefsCache.erase(NonLocalDefIt);
+ return;
+ }
+ }
// This routine does not expect to deal with volatile instructions.
// Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
-; FIXME: those loads could be also direct, but right now the invariant.group
-; analysis works only on single block
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end
ret void
}
+; CHECK-LABEL: define void @handling_loops()
+define void @handling_loops() {
+ %a = alloca %struct.A, align 8
+ %1 = bitcast %struct.A* %a to i8*
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
+ %3 = load i8, i8* @unknownPtr, align 4
+ %4 = icmp sgt i8 %3, 0
+ br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
+
+.lr.ph.i: ; preds = %0
+ %5 = bitcast %struct.A* %a to void (%struct.A*)***
+ %6 = load i8, i8* @unknownPtr, align 4
+ %7 = icmp sgt i8 %6, 1
+ br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
+
+._crit_edge.preheader: ; preds = %.lr.ph.i
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
+ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
+ %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
+ %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
+ ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+ call void %9(%struct.A* nonnull %a) #3
+ ; CHECK-NOT: call void %
+ %10 = add nuw nsw i8 %8, 1
+ %11 = load i8, i8* @unknownPtr, align 4
+ %12 = icmp slt i8 %10, %11
+ br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
+
+_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
+ br label %_Z2g2R1A.exit
+
+_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
+ ret void
+}
+
declare void @foo(i8*)
declare void @foo2(i8*, i8)
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
-; FIXME: those loads could be also direct, but right now the invariant.group
-; analysis works only on single block
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
-; CHECK-NOT: call i32 @_ZN1A3fooEv(
+; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end
ret void
}
+; CHECK-LABEL: define void @handling_loops()
+define void @handling_loops() {
+ %a = alloca %struct.A, align 8
+ %1 = bitcast %struct.A* %a to i8*
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
+ %3 = load i8, i8* @unknownPtr, align 4
+ %4 = icmp sgt i8 %3, 0
+ br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
+
+.lr.ph.i: ; preds = %0
+ %5 = bitcast %struct.A* %a to void (%struct.A*)***
+ %6 = load i8, i8* @unknownPtr, align 4
+ %7 = icmp sgt i8 %6, 1
+ br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
+
+._crit_edge.preheader: ; preds = %.lr.ph.i
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
+ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
+ %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
+ %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
+; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+ call void %9(%struct.A* nonnull %a) #3
+
+; CHECK-NOT: call void %
+ %10 = add nuw nsw i8 %8, 1
+ %11 = load i8, i8* @unknownPtr, align 4
+ %12 = icmp slt i8 %10, %11
+ br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
+
+_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
+ br label %_Z2g2R1A.exit
+
+_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
+ ret void
+}
+
declare void @foo(i8*)
declare void @foo2(i8*, i8)