From f100795281f48b6cbf9e00438b10da5ebae546a0 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Mon, 25 Mar 2019 20:54:00 +0000 Subject: [PATCH] AMDGPU: Add support for cross address space synchronization scopes (clang) Differential Revision: https://reviews.llvm.org/D59494 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@356947 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGAtomic.cpp | 7 +++++-- lib/CodeGen/TargetInfo.cpp | 33 +++++++++++++++++++++++--------- lib/CodeGen/TargetInfo.h | 6 ++++-- test/CodeGenOpenCL/atomic-ops.cl | 20 +++++++++---------- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 1d18e57b0e..11618e18d3 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -679,7 +679,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // Handle constant scope. if (auto SC = dyn_cast(Scope)) { auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( - ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()), + Order, CGF.CGM.getLLVMContext()); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SCID); return; @@ -708,7 +709,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Builder.SetInsertPoint(B); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, - CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(), + ScopeModel->map(S), + Order, CGF.getLLVMContext())); Builder.CreateBr(ContBB); } diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index d9bbf594ed..5c2b3ff353 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -462,8 +462,11 @@ TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, } llvm::SyncScope::ID -TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { - return C.getOrInsertSyncScopeID(""); /* default sync scope */ +TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -7824,8 +7827,10 @@ public: } LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; - llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; llvm::Function * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, @@ -7971,10 +7976,12 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, } llvm::SyncScope::ID -AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const { - StringRef Name; - switch (S) { +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + std::string Name; + switch (Scope) { case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; @@ -7987,7 +7994,15 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, case SyncScope::OpenCLSubGroup: Name = "wavefront"; } - return C.getOrInsertSyncScopeID(Name); + + if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { + if (!Name.empty()) + Name = Twine(Twine(Name) + Twine("-")).str(); + + Name = Twine(Twine(Name) + Twine("one-as")).str(); + } + + return Ctx.getOrInsertSyncScopeID(Name); } bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index 8a4154030c..d7e9eee9c5 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -268,8 +268,10 @@ public: llvm::Type *DestTy) const; /// Get the syncscope used in LLVM IR. - virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const; + virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const; /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl index 4899b734e9..88f2e0d0ea 100644 --- a/test/CodeGenOpenCL/atomic-ops.cl +++ b/test/CodeGenOpenCL/atomic-ops.cl @@ -83,7 +83,7 @@ void fi3(atomic_int *i, atomic_uint *ui) { bool fi4(atomic_int *i) { // CHECK-LABEL: @fi4( - // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] @@ -141,21 +141,21 @@ void fi6(atomic_int *i, int order, int scope) { // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] // CHECK-NEXT: ] // CHECK: [[MON_WG]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic // CHECK: [[MON_DEV]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic // CHECK: [[MON_ALL]]: // CHECK: load atomic i32, i32* %{{.*}} monotonic // CHECK: [[MON_SUB]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic // CHECK: [[ACQ_WG]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire // CHECK: [[ACQ_DEV]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire // CHECK: [[ACQ_ALL]]: // CHECK: load atomic i32, i32* %{{.*}} acquire // CHECK: [[ACQ_SUB]]: - // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire // CHECK: [[SEQ_WG]]: // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: [[SEQ_DEV]]: @@ -169,13 +169,13 @@ void fi6(atomic_int *i, int order, int scope) { float ff1(global atomic_float *d) { // CHECK-LABEL: @ff1 - // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); } void ff2(atomic_float *d) { // CHECK-LABEL: @ff2 - // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release + // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); } @@ -198,7 +198,7 @@ void atomic_init_foo() // CHECK-LABEL: @failureOrder void failureOrder(atomic_int *ptr, int *ptr2) { - // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire -- 2.40.0