public:
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI,
- bool SpeculateSymbolicStrides);
+ DominatorTree *DT, LoopInfo *LI);
/// Return true we can analyze the memory accesses in the loop and there are
/// no memory dependence cycles.
/// \brief Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
- /// \brief Used to ensure that if the analysis was run with speculating the
- /// value of symbolic strides, the client queries it with the same assumption.
- /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
- bool SpeculateSymbolicStrides;
-
/// \brief Checks existence of store to invariant address inside loop.
/// If the loop has any store to invariant address, then it returns true,
/// else returns false.
/// \brief Query the result of the loop access information for the loop \p L.
///
- /// \p SpeculateSymbolicStrides enables symbolic value speculation. The
- /// corresponding run-time checks are collected in LAI::PSE.
- ///
/// If there is no cached result available run the analysis.
- const LoopAccessInfo &getInfo(Loop *L, bool SpeculateSymbolicStrides = false);
+ const LoopAccessInfo &getInfo(Loop *L);
void releaseMemory() override {
// Invalidate the cache when the pass is freed.
"loop-access analysis (default = 100)"),
cl::init(100));
+/// This enables versioning on the strides of symbolically striding memory
+/// accesses in code like the following.
+/// for (i = 0; i < N; ++i)
+/// A[i * Stride1] += B[i * Stride2] ...
+///
+/// Will be roughly translated to
+/// if (Stride1 == 1 && Stride2 == 1) {
+/// for (i = 0; i < N; i+=4)
+/// A[i:i+3] += ...
+/// } else
+/// ...
+static cl::opt<bool> EnableMemAccessVersioning(
+ "enable-mem-access-versioning", cl::init(true), cl::Hidden,
+ cl::desc("Enable symbolic stride memory access versioning"));
+
/// \brief Enable store-to-load forwarding conflict detection. This option can
/// be disabled for correctness testing.
static cl::opt<bool> EnableForwardingConflictDetection(
NumLoads++;
Loads.push_back(Ld);
DepChecker.addAccess(Ld);
- if (SpeculateSymbolicStrides)
+ if (EnableMemAccessVersioning)
collectStridedAccess(Ld);
continue;
}
NumStores++;
Stores.push_back(St);
DepChecker.addAccess(St);
- if (SpeculateSymbolicStrides)
+ if (EnableMemAccessVersioning)
collectStridedAccess(St);
}
} // Next instr.
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
- DominatorTree *DT, LoopInfo *LI,
- bool SpeculateSymbolicStrides)
- : SpeculateSymbolicStrides(SpeculateSymbolicStrides), PSE(*SE, *L),
- PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI),
- AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+ DominatorTree *DT, LoopInfo *LI)
+ : PSE(*SE, *L), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
+ TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
PSE.print(OS, Depth);
}
-const LoopAccessInfo &
-LoopAccessAnalysis::getInfo(Loop *L, bool SpeculateSymbolicStrides) {
+const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
-#ifndef NDEBUG
- assert((!LAI || LAI->SpeculateSymbolicStrides == SpeculateSymbolicStrides) &&
- "Symbolic strides changed for loop");
-#endif
-
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
- SpeculateSymbolicStrides);
+ LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI);
}
return *LAI.get();
}
return false;
}
// Get LoopAccessInfo from current loop.
- LAI = &LAA->getInfo(CurLoop, true);
+ LAI = &LAA->getInfo(CurLoop);
// Check LoopAccessInfo for need of runtime check.
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
cl::desc("Maximize bandwidth when selecting vectorization factor which "
"will be determined by the smallest type in loop."));
-/// This enables versioning on the strides of symbolically striding memory
-/// accesses in code like the following.
-/// for (i = 0; i < N; ++i)
-/// A[i * Stride1] += B[i * Stride2] ...
-///
-/// Will be roughly translated to
-/// if (Stride1 == 1 && Stride2 == 1) {
-/// for (i = 0; i < N; i+=4)
-/// A[i:i+3] += ...
-/// } else
-/// ...
-static cl::opt<bool> EnableMemAccessVersioning(
- "enable-mem-access-versioning", cl::init(true), cl::Hidden,
- cl::desc("Enable symbolic stride memory access versioning"));
-
static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
}
bool LoopVectorizationLegality::canVectorizeMemory() {
- LAI = &LAA->getInfo(TheLoop, EnableMemAccessVersioning);
+ LAI = &LAA->getInfo(TheLoop);
auto &OptionalReport = LAI->getReport();
if (OptionalReport)
emitAnalysis(VectorizationReport(*OptionalReport));
--- /dev/null
+; RUN: opt -basicaa -loop-distribute -S < %s | \
+; RUN: FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC
+
+; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \
+; RUN: FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC
+
+; If we don't speculate stride for 1 we can't distribute along the line
+; because we could have a backward dependence:
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * B[i];
+; =======================
+; C[i] = D[i] * A[stride * i];
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; ALL-LABEL: @f(
+define void @f(i32* noalias %a,
+ i32* noalias %b,
+ i32* noalias %c,
+ i32* noalias %d,
+ i64 %stride) {
+entry:
+ br label %for.body
+
+; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+; STRIDE_SPEC: for.body.ldist1:
+; NO_STRIDE_SPEC-NOT: for.body.ldist1:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %mul = mul i64 %ind, %stride
+ %arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul
+ %loadStridedA = load i32, i32* %arrayidxStridedA, align 4
+
+ %mulC = mul i32 %loadD, %loadStridedA
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
-; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN: -check-prefix=TWO_STRIDE_SPEC
-; Forwarding in the presence of symbolic strides is currently not supported:
+; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \
+; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
+
+; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
+
+; Forwarding in the presence of symbolic strides:
;
; for (unsigned i = 0; i < 100; i++)
; A[i + 1] = A[Stride * i] + B[i];
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-; CHECK-LABEL: @f(
+; ALL-LABEL: @f(
define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
i64 %stride) {
+
+; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
entry:
-; CHECK-NOT: %load_initial = load i32, i32* %A
+; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A
br label %for.body
for.body: ; preds = %for.body, %entry
-; CHECK-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%mul = mul i64 %indvars.iv, %stride
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
%load = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%load_1 = load i32, i32* %arrayidx2, align 4
-; CHECK-NOT: %add = add i32 %load_1, %store_forwarded
+; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
%add = add i32 %load_1, %load
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
for.end: ; preds = %for.body
ret void
}
+
+; With two symbolic strides:
+;
+; for (unsigned i = 0; i < 100; i++)
+; A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i];
+
+; ALL-LABEL: @two_strides(
+define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
+ i64 %stride.1, i64 %stride.2) {
+
+; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1
+; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1
+; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1
+
+entry:
+; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %mul = mul i64 %indvars.iv, %stride.1
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %mul.2 = mul i64 %indvars.iv.next, %stride.2
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}