[LICM] Make store promotion work in the face of unordered atomics

author Philip Reames <listmail@philipreames.com>

Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)

committer Philip Reames <listmail@philipreames.com>

Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)
author Philip Reames <listmail@philipreames.com>
Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)
committer Philip Reames <listmail@philipreames.com>
Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp

index f3a27ceacf0c812a3e3cabf831aa50d26d0d70aa..2035da02db0bd4d16c1d84581f896d2537d86683 100644 (file)
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -917,6 +917,7 @@ class LoopPromoter : public LoadAndStorePromoter {
    LoopInfo &LI;
    DebugLoc DL;
    int Alignment;
+  bool UnorderedAtomic;
    AAMDNodes AATags;
  
    Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
@@ -940,10 +941,11 @@ public:
                 SmallVectorImpl<BasicBlock *> &LEB,
                 SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
                 AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
-               const AAMDNodes &AATags)
+               bool UnorderedAtomic, const AAMDNodes &AATags)
        : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
          LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
-        LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
+        LI(li), DL(std::move(dl)), Alignment(alignment),
+        UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
  
    bool isInstInList(Instruction *I,
                      const SmallVectorImpl<Instruction *> &) const override {
@@ -967,6 +969,8 @@ public:
        Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
        Instruction *InsertPos = LoopInsertPts[i];
        StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+      if (UnorderedAtomic)
+        NewSI->setOrdering(AtomicOrdering::Unordered);
        NewSI->setAlignment(Alignment);
        NewSI->setDebugLoc(DL);
        if (AATags)
@@ -1057,6 +1061,9 @@ bool llvm::promoteLoopAccessesToScalars(
    // We start with an alignment of one and try to find instructions that allow
    // us to prove better alignment.
    unsigned Alignment = 1;
+  // Keep track of which types of access we see
+  bool SawUnorderedAtomic = false; 
+  bool SawNotAtomic = false;
    AAMDNodes AATags;
  
    const DataLayout &MDL = Preheader->getModule()->getDataLayout();
@@ -1114,8 +1121,11 @@ bool llvm::promoteLoopAccessesToScalars(
        // it.
        if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
          assert(!Load->isVolatile() && "AST broken");
-        if (!Load->isSimple())
+        if (!Load->isUnordered())
            return false;
+        
+        SawUnorderedAtomic |= Load->isAtomic();
+        SawNotAtomic |= !Load->isAtomic();
  
          if (!DereferenceableInPH)
            DereferenceableInPH = isSafeToExecuteUnconditionally(
@@ -1126,9 +1136,12 @@ bool llvm::promoteLoopAccessesToScalars(
          if (UI->getOperand(1) != ASIV)
            continue;
          assert(!Store->isVolatile() && "AST broken");
-        if (!Store->isSimple())
+        if (!Store->isUnordered())
            return false;
  
+        SawUnorderedAtomic |= Store->isAtomic();
+        SawNotAtomic |= !Store->isAtomic();
+
          // If the store is guaranteed to execute, both properties are satisfied.
          // We may want to check if a store is guaranteed to execute even if we
          // already know that promotion is safe, since it may have higher
@@ -1181,6 +1194,12 @@ bool llvm::promoteLoopAccessesToScalars(
      }
    }
  
+  // If we found both an unordered atomic instruction and a non-atomic memory
+  // access, bail.  We can't blindly promote non-atomic to atomic since we
+  // might not be able to lower the result.  We can't downgrade since that
+  // would violate memory model.  Also, align 0 is an error for atomics.
+  if (SawUnorderedAtomic && SawNotAtomic)
+    return false;
  
    // If we couldn't prove we can hoist the load, bail.
    if (!DereferenceableInPH)
@@ -1224,12 +1243,15 @@ bool llvm::promoteLoopAccessesToScalars(
    SmallVector<PHINode *, 16> NewPHIs;
    SSAUpdater SSA(&NewPHIs);
    LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
+                        InsertPts, PIC, *CurAST, *LI, DL, Alignment,
+                        SawUnorderedAtomic, AATags);
  
    // Set up the preheader to have a definition of the value.  It is the live-out
    // value from the preheader that uses in the loop will use.
    LoadInst *PreheaderLoad = new LoadInst(
        SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+  if (SawUnorderedAtomic)
+    PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
    PreheaderLoad->setAlignment(Alignment);
    PreheaderLoad->setDebugLoc(DL);
    if (AATags)
diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll

index 919d1bdd114c5b3f22dde2dd745f9c4be2f11c48..15c461aeca27509f151e138042e2c91600f31dab 100644 (file)
--- a/test/Transforms/LICM/atomics.ll
+++ b/test/Transforms/LICM/atomics.ll
@@ -60,8 +60,7 @@ end:
  ; CHECK-NEXT: br label %loop
  }
  
-; Don't try to "sink" unordered stores yet; it is legal, but the machinery
-; isn't there.
+; We can sink an unordered store
  define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
  entry:
    br label %loop
@@ -75,6 +74,149 @@ loop:
  end:
    ret i32 %vala
  ; CHECK-LABEL: define i32 @test4(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NOT: store
+; CHECK-LABEL: end:
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %vala
+; CHECK:   store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4
+}
+
+; We currently don't handle ordered atomics.
+define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x release, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test5(
  ; CHECK: load atomic i32, i32* %y monotonic
  ; CHECK-NEXT: store atomic
  }
+
+; We currently don't touch volatiles
+define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store volatile i32 %vala, i32* %x, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test6(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store volatile
+}
+
+; We currently don't touch volatiles
+define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic volatile i32 %vala, i32* %x unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test6b(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic volatile
+}
+
+; Mixing unorder atomics and normal loads/stores is
+; current unimplemented
+define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  store i32 5, i32* %x
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test7(
+; CHECK: store i32 5, i32* %x
+; CHECK-NEXT: load atomic i32, i32* %y
+; CHECK-NEXT: store atomic i32
+}
+
+; Three provably noalias locations - we can sink normal and unordered, but
+;  not monotonic
+define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  store i32 5, i32* %x
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %z unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test7b(
+; CHECK: load atomic i32, i32* %y monotonic
+
+; CHECK-LABEL: end:
+; CHECK: store i32 5, i32* %x
+; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4
+}
+
+
+define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x unordered, align 4
+  fence release
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test8(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: fence
+}
+
+; Exact semantics of monotonic accesses are a bit vague in the C++ spec,
+; for the moment, be conservative and don't touch them.
+define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x monotonic, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test9(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT:   store atomic i32 %vala, i32* %x monotonic, align 4
+}
diff --git a/test/Transforms/LICM/scalar-promote.ll b/test/Transforms/LICM/scalar-promote.ll

index c88701154b8f1cbca551729bf0b986d319c48468..89888546494fb8c179a96239d46a0647d248c706 100644 (file)
--- a/test/Transforms/LICM/scalar-promote.ll
+++ b/test/Transforms/LICM/scalar-promote.ll
@@ -378,6 +378,33 @@ exit:
    ret i32 %ret
  }
  
+define void @test10(i32 %i) {
+Entry:
+  br label %Loop
+; CHECK-LABEL: @test10(
+; CHECK: Entry:
+; CHECK-NEXT:   load atomic i32, i32* @X unordered, align 4
+; CHECK-NEXT:   br label %Loop
+
+
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load atomic i32, i32* @X unordered, align 4
+  %x2 = add i32 %x, 1
+  store atomic i32 %x2, i32* @X unordered, align 4
+  %Next = add i32 %j, 1
+  %cond = icmp eq i32 %Next, 0
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
+; CHECK: Out:
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %x2
+; CHECK-NEXT:   store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4
+; CHECK-NEXT:   ret void
+
+}
+
  !0 = !{!4, !4, i64 0}
  !1 = !{!"omnipotent char", !2}
  !2 = !{!"Simple C/C++ TBAA"}
author	Philip Reames <listmail@philipreames.com>
	Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Tue, 14 Feb 2017 01:38:31 +0000 (01:38 +0000)
lib/Transforms/Scalar/LICM.cpp		patch \| blob \| history
test/Transforms/LICM/atomics.ll		patch \| blob \| history
test/Transforms/LICM/scalar-promote.ll		patch \| blob \| history