From: Tim Northover Date: Thu, 13 Mar 2014 19:25:48 +0000 (+0000) Subject: CodeGen: make use of weaker failure orders on cmpxchg. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c72aad39783bfe88bb6fdd4e8ea638e8061124f4;p=clang CodeGen: make use of weaker failure orders on cmpxchg. This makes Clang take advantage of the recent IR addition of a "failure" memory ordering requirement. As with the "success" ordering, we try to emit just a single version if the expression is constant, but fall back to runtime detection (to allow optimisation across function-call boundaries). rdar://problem/15996804 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@203837 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index dcb3ff1033..68437866e4 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -174,10 +174,134 @@ bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const { return true; } -static void -EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, - llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2, - uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) { +static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, + llvm::Value *Dest, llvm::Value *Ptr, + llvm::Value *Val1, llvm::Value *Val2, + uint64_t Size, unsigned Align, + llvm::AtomicOrdering SuccessOrder, + llvm::AtomicOrdering FailureOrder) { + // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment. + llvm::LoadInst *Expected = CGF.Builder.CreateLoad(Val1); + Expected->setAlignment(Align); + llvm::LoadInst *Desired = CGF.Builder.CreateLoad(Val2); + Desired->setAlignment(Align); + + llvm::AtomicCmpXchgInst *Old = CGF.Builder.CreateAtomicCmpXchg( + Ptr, Expected, Desired, SuccessOrder, FailureOrder); + Old->setVolatile(E->isVolatile()); + + // Cmp holds the result of the compare-exchange operation: true on success, + // false on failure. + llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(Old, Expected); + + // This basic block is used to hold the store instruction if the operation + // failed. + llvm::BasicBlock *StoreExpectedBB = + CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn); + + // This basic block is the exit point of the operation, we should end up + // here regardless of whether or not the operation succeeded. + llvm::BasicBlock *ContinueBB = + CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn); + + // Update Expected if Expected isn't equal to Old, otherwise branch to the + // exit point. + CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB); + + CGF.Builder.SetInsertPoint(StoreExpectedBB); + // Update the memory at Expected with Old's value. + llvm::StoreInst *StoreExpected = CGF.Builder.CreateStore(Old, Val1); + StoreExpected->setAlignment(Align); + // Finally, branch to the exit point. + CGF.Builder.CreateBr(ContinueBB); + + CGF.Builder.SetInsertPoint(ContinueBB); + // Update the memory at Dest with Cmp's value. + CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType())); + return; +} + +/// Given an ordering required on success, emit all possible cmpxchg +/// instructions to cope with the provided (but possibly only dynamically known) +/// FailureOrder. +static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, + llvm::Value *Dest, llvm::Value *Ptr, + llvm::Value *Val1, llvm::Value *Val2, + llvm::Value *FailureOrderVal, + uint64_t Size, unsigned Align, + llvm::AtomicOrdering SuccessOrder) { + llvm::AtomicOrdering FailureOrder; + if (llvm::ConstantInt *FO = dyn_cast(FailureOrderVal)) { + switch (FO->getSExtValue()) { + default: + FailureOrder = llvm::Monotonic; + break; + case AtomicExpr::AO_ABI_memory_order_consume: + case AtomicExpr::AO_ABI_memory_order_acquire: + FailureOrder = llvm::Acquire; + break; + case AtomicExpr::AO_ABI_memory_order_seq_cst: + FailureOrder = llvm::SequentiallyConsistent; + break; + } + if (FailureOrder >= SuccessOrder) { + // Don't assert on undefined behaviour. + FailureOrder = + llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); + } + emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2, Size, Align, SuccessOrder, + FailureOrder); + return; + } + + // Create all the relevant BB's + llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *SeqCstBB = 0; + MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn); + if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release) + AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn); + if (SuccessOrder == llvm::SequentiallyConsistent) + SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn); + + llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn); + + llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB); + + // Emit all the different atomics + + // MonotonicBB is arbitrarily chosen as the default case; in practice, this + // doesn't matter unless someone is crazy enough to use something that + // doesn't fold to a constant for the ordering. + CGF.Builder.SetInsertPoint(MonotonicBB); + emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2, + Size, Align, SuccessOrder, llvm::Monotonic); + CGF.Builder.CreateBr(ContBB); + + if (AcquireBB) { + CGF.Builder.SetInsertPoint(AcquireBB); + emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2, + Size, Align, SuccessOrder, llvm::Acquire); + CGF.Builder.CreateBr(ContBB); + SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume), + AcquireBB); + SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire), + AcquireBB); + } + if (SeqCstBB) { + CGF.Builder.SetInsertPoint(SeqCstBB); + emitAtomicCmpXchg(CGF, E, Dest, Ptr, Val1, Val2, + Size, Align, SuccessOrder, llvm::SequentiallyConsistent); + CGF.Builder.CreateBr(ContBB); + SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst), + SeqCstBB); + } + + CGF.Builder.SetInsertPoint(ContBB); +} + +static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, + llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2, + llvm::Value *FailureOrder, uint64_t Size, + unsigned Align, llvm::AtomicOrdering Order) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; @@ -188,50 +312,10 @@ EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange: - case AtomicExpr::AO__atomic_compare_exchange_n: { - // Note that cmpxchg only supports specifying one ordering and - // doesn't support weak cmpxchg, at least at the moment. - llvm::LoadInst *Expected = CGF.Builder.CreateLoad(Val1); - Expected->setAlignment(Align); - llvm::LoadInst *Desired = CGF.Builder.CreateLoad(Val2); - Desired->setAlignment(Align); - llvm::AtomicOrdering FailureOrder = - llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Order); - llvm::AtomicCmpXchgInst *Old = CGF.Builder.CreateAtomicCmpXchg( - Ptr, Expected, Desired, Order, FailureOrder); - Old->setVolatile(E->isVolatile()); - - // Cmp holds the result of the compare-exchange operation: true on success, - // false on failure. - llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(Old, Expected); - - // This basic block is used to hold the store instruction if the operation - // failed. - llvm::BasicBlock *StoreExpectedBB = - CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn); - - // This basic block is the exit point of the operation, we should end up - // here regardless of whether or not the operation succeeded. - llvm::BasicBlock *ContinueBB = - CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn); - - // Update Expected if Expected isn't equal to Old, otherwise branch to the - // exit point. - CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB); - - CGF.Builder.SetInsertPoint(StoreExpectedBB); - // Update the memory at Expected with Old's value. - llvm::StoreInst *StoreExpected = CGF.Builder.CreateStore(Old, Val1); - StoreExpected->setAlignment(Align); - // Finally, branch to the exit point. - CGF.Builder.CreateBr(ContinueBB); - - CGF.Builder.SetInsertPoint(ContinueBB); - // Update the memory at Dest with Cmp's value. - CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType())); + case AtomicExpr::AO__atomic_compare_exchange_n: + emitAtomicCmpXchgFailureSet(CGF, E, Dest, Ptr, Val1, Val2, FailureOrder, + Size, Align, Order); return; - } - case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { @@ -633,31 +717,31 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { int ord = cast(Order)->getZExtValue(); switch (ord) { case AtomicExpr::AO_ABI_memory_order_relaxed: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Monotonic); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Monotonic); break; case AtomicExpr::AO_ABI_memory_order_consume: case AtomicExpr::AO_ABI_memory_order_acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Acquire); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Acquire); break; case AtomicExpr::AO_ABI_memory_order_release: if (IsLoad) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Release); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Release); break; case AtomicExpr::AO_ABI_memory_order_acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::AcquireRelease); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::AcquireRelease); break; case AtomicExpr::AO_ABI_memory_order_seq_cst: - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::SequentiallyConsistent); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::SequentiallyConsistent); break; default: // invalid order // We should not ever get here normally, but it's hard to @@ -693,34 +777,34 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Monotonic); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Monotonic); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Acquire); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Acquire); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::Release); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::Release); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::AcquireRelease); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::AcquireRelease); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, - llvm::SequentiallyConsistent); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OrderFail, + Size, Align, llvm::SequentiallyConsistent); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); diff --git a/test/CodeGen/atomic-ops.c b/test/CodeGen/atomic-ops.c index 1b236bba7a..2517b67bbc 100644 --- a/test/CodeGen/atomic-ops.c +++ b/test/CodeGen/atomic-ops.c @@ -315,4 +315,92 @@ void atomic_init_foo() // CHECK: } } +// CHECK-LABEL: @failureOrder +void failureOrder(_Atomic(int) *ptr, int *ptr2) { + __c11_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed); + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acquire monotonic + + __c11_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire); + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst acquire + + // Unknown ordering: conservatively pick strongest valid option (for now!). + __atomic_compare_exchange(ptr2, ptr2, ptr2, 0, memory_order_acq_rel, *ptr2); + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acq_rel acquire + + // Undefined behaviour: don't really care what that last ordering is so leave + // it out: + __atomic_compare_exchange_n(ptr2, ptr2, 43, 1, memory_order_seq_cst, 42); + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst +} + +// CHECK-LABEL: @generalFailureOrder +void generalFailureOrder(_Atomic(int) *ptr, int *ptr2, int success, int fail) { + __c11_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail); + // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE]] + // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] + + // CHECK: [[MONOTONIC]] + // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQUIRE]] + // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[RELEASE]] + // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQREL]] + // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[SEQCST]] + // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[MONOTONIC_MONOTONIC]] + // CHECK: cmpxchg {{.*}} monotonic monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acquire monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acquire acquire + // CHECK: br + + // CHECK: [[ACQREL_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acq_rel monotonic + // CHECK: br + + // CHECK: [[ACQREL_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acq_rel acquire + // CHECK: br + + // CHECK: [[SEQCST_MONOTONIC]] + // CHECK: cmpxchg {{.*}} seq_cst monotonic + // CHECK: br + + // CHECK: [[SEQCST_ACQUIRE]] + // CHECK: cmpxchg {{.*}} seq_cst acquire + // CHECK: br + + // CHECK: [[SEQCST_SEQCST]] + // CHECK: cmpxchg {{.*}} seq_cst seq_cst + // CHECK: br +} + #endif