[GVN] Do local FRE for unordered atomic loads

author Philip Reames <listmail@philipreames.com>

Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)

committer Philip Reames <listmail@philipreames.com>

Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)
author Philip Reames <listmail@philipreames.com>
Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)
committer Philip Reames <listmail@philipreames.com>
Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 9565dc8c3dc0f0720fdea23e9c4c16537c3e0563..116dc69b372b242a03ec87b3799861e18549c8b3 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1219,6 +1219,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
  
    assert((DepInfo.isDef() || DepInfo.isClobber()) &&
           "expected a local dependence");
+  assert(LI->isUnordered() && "rules below are incorrect for ordered access");
  
    const DataLayout &DL = LI->getModule()->getDataLayout();
  
@@ -1227,7 +1228,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
      // read by the load, we can extract the bits we need for the load from the
      // stored value.
      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
-      if (Address) {
+      // Can't forward from non-atomic to atomic without violating memory model.
+      if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
          int Offset =
            AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
          if (Offset != -1) {
@@ -1244,7 +1246,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
      if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
        // If this is a clobber and L is the first instruction in its block, then
        // we have the first instruction in the entry block.
-      if (DepLI != LI && Address) {
+      // Can't forward from non-atomic to atomic without violating memory model.
+      if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
          int Offset =
            AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
  
@@ -1258,7 +1261,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
      // If the clobbering value is a memset/memcpy/memmove, see if we can
      // forward a value on from it.
      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
-      if (Address) {
+      if (Address && !LI->isAtomic()) {
          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
                                                        DepMI, DL);
          if (Offset != -1) {
@@ -1304,6 +1307,10 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
                                           LI->getType(), DL))
        return false;
  
+    // Can't forward from non-atomic to atomic without violating memory model.
+    if (S->isAtomic() < LI->isAtomic())
+      return false;
+
      Res = AvailableValue::get(S->getValueOperand());
      return true;
    }
@@ -1316,6 +1323,10 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
          !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
        return false;
  
+    // Can't forward from non-atomic to atomic without violating memory model.
+    if (LD->isAtomic() < LI->isAtomic())
+      return false;
+
      Res = AvailableValue::getLoad(LD);
      return true;
    }
@@ -1587,6 +1598,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
    if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
      return false;
  
+  // This code hasn't been audited for atomic, ordered, or volatile memory
+  // access.
+  if (!LI->isSimple())
+    return false;
+
    // Step 1: Find the non-local dependencies of the load.
    LoadDepVect Deps;
    MD->getNonLocalPointerDependency(LI, Deps);
@@ -1755,7 +1771,8 @@ bool GVN::processLoad(LoadInst *L) {
    if (!MD)
      return false;
  
-  if (!L->isSimple())
+  // This code hasn't been audited for ordered or volatile memory access
+  if (!L->isUnordered())
      return false;
  
    if (L->use_empty()) {
diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll

index 11b54f39756b9afcb8b62538049a860a83099643..4429e9bbad04950aec514c8bcc6272f5c01102de 100644 (file)
--- a/test/Transforms/GVN/atomic.ll
+++ b/test/Transforms/GVN/atomic.ll
@@ -107,3 +107,233 @@ if.end:
  ; CHECK: load i32, i32* @y, align 4
    ret i32 %load
  }
+
+; CHECK-LABEL: @test12(
+; Can't remove a load over a ordering barrier
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+  %load0 = load i32, i32* %P1
+  %1 = load atomic i32, i32* %P2 seq_cst, align 4
+  %load1 = load i32, i32* %P1
+  %sel = select i1 %B, i32 %load0, i32 %load1
+  ret i32 %sel
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i32* %P1) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load i32, i32* %P1
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1
+  ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test13b(
+define i32 @test13b(i32* %P1) {
+  store  atomic i32 0, i32* %P1 unordered, align 4
+  %b = load i32, i32* %P1
+  ret i32 %b
+  ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i32* %P1) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1 seq_cst
+  ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i32* %P1, i32* %P2) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load atomic i32, i32* %P1 seq_cst, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1
+  ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example.  We just don't
+; do that right now.)
+define i32 @test16(i32* %P1, i32* %P2) {
+  %a = load i32, i32* %P1, align 4
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16b(
+define i32 @test16b(i32* %P1) {
+  store i32 0, i32* %P1
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  ret i32 %b
+  ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store(
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  store atomic i32 0, i32* %P2 seq_cst, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst(
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  fence seq_cst
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full singlethread fence
+define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_st(
+; CHECK: store
+; CHECK: fence singlethread seq_cst
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  fence singlethread seq_cst
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect(
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  call void asm sideeffect "", ""()
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory(
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  call void asm "", "~{memory}"()
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i32* %P1, i32* %P2) {
+  %a = load i32, i32* %P1, align 4
+  %b = load volatile i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @volatile_load(
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
+  %a = load volatile i32, i32* %P1, align 4
+  %b = load volatile i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @redundant_volatile_load(
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store(
+; CHECK: store volatile
+; CHECK: store
+  store volatile i32 0, i32* %P1, align 4
+  store i32 3, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store(
+; CHECK: store volatile
+; CHECK: store volatile
+  store volatile i32 0, i32* %P1, align 4
+  store volatile i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i32* %P1, i32* %P2) {
+  %a = load volatile i32, i32* %P1, align 4
+  %b = load i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @test20(
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: ret i32 0
+}
+
+; We're currently conservative about widening
+define i64 @widen1(i32* %P1) {
+  ; CHECK-LABEL: @widen1(
+  ; CHECK: load atomic i32, i32* %P1
+  ; CHECK: load atomic i64, i64* %p2
+  %p2 = bitcast i32* %P1 to i64*
+  %a = load atomic i32, i32* %P1 unordered, align 4
+  %b = load atomic i64, i64* %p2 unordered, align 4
+  %a64 = sext i32 %a to i64
+  %res = sub i64 %a64, %b
+  ret i64 %res
+}
+
+; narrowing does work
+define i64 @narrow(i32* %P1) {
+  ; CHECK-LABEL: @narrow(
+  ; CHECK: load atomic i64, i64* %p2
+  ; CHECK-NOT: load atomic i32, i32* %P1
+  %p2 = bitcast i32* %P1 to i64*
+  %a64 = load atomic i64, i64* %p2 unordered, align 4
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  %b64 = sext i32 %b to i64
+  %res = sub i64 %a64, %b64
+  ret i64 %res
+}
+
+; Missed optimization, we don't yet optimize ordered loads
+define i64 @narrow2(i32* %P1) {
+  ; CHECK-LABEL: @narrow2(
+  ; CHECK: load atomic i64, i64* %p2
+  ; CHECK: load atomic i32, i32* %P1
+  %p2 = bitcast i32* %P1 to i64*
+  %a64 = load atomic i64, i64* %p2 acquire, align 4
+  %b = load atomic i32, i32* %P1 acquire, align 4
+  %b64 = sext i32 %b to i64
+  %res = sub i64 %a64, %b64
+  ret i64 %res
+}
+
author	Philip Reames <listmail@philipreames.com>
	Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Fri, 6 May 2016 18:17:13 +0000 (18:17 +0000)
lib/Transforms/Scalar/GVN.cpp		patch \| blob \| history
test/Transforms/GVN/atomic.ll		patch \| blob \| history