assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
+ assert(LI->isUnordered() && "rules below are incorrect for ordered access");
const DataLayout &DL = LI->getModule()->getDataLayout();
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (Address) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
if (Offset != -1) {
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
int Offset =
AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (Address) {
+ if (Address && !LI->isAtomic()) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
if (Offset != -1) {
LI->getType(), DL))
return false;
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (S->isAtomic() < LI->isAtomic())
+ return false;
+
Res = AvailableValue::get(S->getValueOperand());
return true;
}
!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LD->isAtomic() < LI->isAtomic())
+ return false;
+
Res = AvailableValue::getLoad(LD);
return true;
}
if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
return false;
+ // This code hasn't been audited for atomic, ordered, or volatile memory
+ // access.
+ if (!LI->isSimple())
+ return false;
+
// Step 1: Find the non-local dependencies of the load.
LoadDepVect Deps;
MD->getNonLocalPointerDependency(LI, Deps);
if (!MD)
return false;
- if (!L->isSimple())
+ // This code hasn't been audited for ordered or volatile memory access
+ if (!L->isUnordered())
return false;
if (L->use_empty()) {
; CHECK: load i32, i32* @y, align 4
ret i32 %load
}
+
+; CHECK-LABEL: @test12(
+; Can't remove a load over a ordering barrier
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test13b(
+define i32 @test13b(i32* %P1) {
+ store atomic i32 0, i32* %P1 unordered, align 4
+ %b = load i32, i32* %P1
+ ret i32 %b
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1 seq_cst
+ ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16b(
+define i32 @test16b(i32* %P1) {
+ store i32 0, i32* %P1
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store(
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst(
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full singlethread fence
+define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_st(
+; CHECK: store
+; CHECK: fence singlethread seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence singlethread seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect(
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm sideeffect "", ""()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory(
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm "", "~{memory}"()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @volatile_load(
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @redundant_volatile_load(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store(
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store(
+; CHECK: store volatile
+; CHECK: store volatile
+ store volatile i32 0, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; We're currently conservative about widening
+define i64 @widen1(i32* %P1) {
+ ; CHECK-LABEL: @widen1(
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i64, i64* %p2
+ %p2 = bitcast i32* %P1 to i64*
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %b = load atomic i64, i64* %p2 unordered, align 4
+ %a64 = sext i32 %a to i64
+ %res = sub i64 %a64, %b
+ ret i64 %res
+}
+
+; narrowing does work
+define i64 @narrow(i32* %P1) {
+ ; CHECK-LABEL: @narrow(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK-NOT: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 unordered, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+
+; Missed optimization, we don't yet optimize ordered loads
+define i64 @narrow2(i32* %P1) {
+ ; CHECK-LABEL: @narrow2(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 acquire, align 4
+ %b = load atomic i32, i32* %P1 acquire, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+