From c833b3773e01f91340e85f76dc52c3bcb4a6530d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sat, 5 Oct 2019 00:32:10 +0000 Subject: [PATCH] Fix a *nasty* miscompile in experimental unordered atomic lowering This is an omission in rL371441. Loads which happened to be unordered weren't being added to the PendingLoad set, and thus weren't be ordered w/respect to side effects which followed before the end of the block. Included test case is how I spotted this. We had an atomic load being folded into a using instruction after a fence that load was supposed to be ordered with. I'm sure it showed up a bunch of other ways as well. Spotted via manual inspecting of assembly differences in a corpus w/and w/o the new experimental mode. Finding this with testing would have been "unpleasant". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373814 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 7 +- test/CodeGen/X86/atomic-unordered.ll | 78 ++++++++++++++----- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c6587188bc0..31cecc01d9d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4672,10 +4672,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { L = DAG.getPtrExtOrTrunc(L, dl, VT); setValue(&I, L); - if (!I.isUnordered()) { - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) DAG.setRoot(OutChain); - } + else + PendingLoads.push_back(OutChain); return; } diff --git a/test/CodeGen/X86/atomic-unordered.ll b/test/CodeGen/X86/atomic-unordered.ll index 4d17c0584b9..35055a5adca 100644 --- a/test/CodeGen/X86/atomic-unordered.ll +++ b/test/CodeGen/X86/atomic-unordered.ll @@ -2315,22 +2315,11 @@ define i64 @constant_folding(i64* %p) { ; Legal to forward and fold (TODO) define i64 @load_forwarding(i64* %p) { -; CHECK-O0-LABEL: load_forwarding: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: orq (%rdi), %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-CUR-LABEL: load_forwarding: -; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: orq (%rdi), %rax -; CHECK-O3-CUR-NEXT: retq -; -; CHECK-O3-EX-LABEL: load_forwarding: -; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movq (%rdi), %rax -; CHECK-O3-EX-NEXT: retq +; CHECK-LABEL: load_forwarding: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: orq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 @@ -2459,8 +2448,8 @@ define i64 @fold_constant_clobber(i64* %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_constant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p @@ -2486,8 +2475,8 @@ define i64 @fold_constant_fence(i64 %arg) { ; CHECK-O3-EX-LABEL: fold_constant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rdi, %rax -; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst @@ -2513,8 +2502,8 @@ define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_invariant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: addq (%rdi), %rax +; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p @@ -2541,8 +2530,8 @@ define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_invariant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: addq (%rdi), %rax +; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst @@ -2713,3 +2702,52 @@ define i16 @load_combine(i8* %p) { %res = or i16 %v1.ext, %v2.sht ret i16 %res } + +define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { +; CHECK-O0-LABEL: fold_cmp_over_fence: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl (%rdi), %eax +; CHECK-O0-NEXT: mfence +; CHECK-O0-NEXT: cmpl %eax, %esi +; CHECK-O0-NEXT: jne .LBB116_2 +; CHECK-O0-NEXT: # %bb.1: # %taken +; CHECK-O0-NEXT: movb $1, %al +; CHECK-O0-NEXT: retq +; CHECK-O0-NEXT: .LBB116_2: # %untaken +; CHECK-O0-NEXT: xorl %eax, %eax +; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-CUR-LABEL: fold_cmp_over_fence: +; CHECK-O3-CUR: # %bb.0: +; CHECK-O3-CUR-NEXT: movl (%rdi), %eax +; CHECK-O3-CUR-NEXT: mfence +; CHECK-O3-CUR-NEXT: cmpl %eax, %esi +; CHECK-O3-CUR-NEXT: jne .LBB116_2 +; CHECK-O3-CUR-NEXT: # %bb.1: # %taken +; CHECK-O3-CUR-NEXT: movb $1, %al +; CHECK-O3-CUR-NEXT: retq +; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken +; CHECK-O3-CUR-NEXT: xorl %eax, %eax +; CHECK-O3-CUR-NEXT: retq +; +; CHECK-O3-EX-LABEL: fold_cmp_over_fence: +; CHECK-O3-EX: # %bb.0: +; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi +; CHECK-O3-EX-NEXT: mfence +; CHECK-O3-EX-NEXT: jne .LBB116_2 +; CHECK-O3-EX-NEXT: # %bb.1: # %taken +; CHECK-O3-EX-NEXT: movb $1, %al +; CHECK-O3-EX-NEXT: retq +; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken +; CHECK-O3-EX-NEXT: xorl %eax, %eax +; CHECK-O3-EX-NEXT: retq + %v2 = load atomic i32, i32* %p unordered, align 4 + fence seq_cst + %cmp = icmp eq i32 %v1, %v2 + br i1 %cmp, label %taken, label %untaken +taken: + ret i1 true +untaken: + ret i1 false +} -- 2.40.0