SharedMemorySize = 128,
};
-enum NamedBarrier : unsigned {
- /// Synchronize on this barrier #ID using a named barrier primitive.
- /// Only the subset of active threads in a parallel region arrive at the
- /// barrier.
- NB_Parallel = 1,
-};
-
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
RefExpr = RefExpr->IgnoreParens();
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
CGF.EmitRuntimeCall(F);
}
-/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
-/// a CTA.
-static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
- llvm::Value *NumThreads) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
- llvm::Function *F = llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier);
- F->addFnAttr(llvm::Attribute::Convergent);
- CGF.EmitRuntimeCall(F, Args);
-}
-
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-/// Synchronize worker threads in a parallel region.
-static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
- return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
-}
-
/// Get the value of the thread_limit clause in the teams directive.
/// For the 'generic' execution mode, the runtime encodes thread_limit in
/// the launch parameters, always starting thread_limit+warpSize threads per
CGF.EmitBlock(MergeBB);
- Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
- llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
- AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
-
- llvm::Value *NumActiveThreads = Bld.CreateNSWMul(
- NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
- // named_barrier_sync(ParallelBarrierID, num_active_threads)
- syncParallelThreads(CGF, NumActiveThreads);
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
//
// Warp 0 copies reduce element from transfer medium.
llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+ Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+ llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+ AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
+
// Up to 32 threads in warp 0 are active.
llvm::Value *IsActiveThread =
Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
// While warp 0 copies values from transfer medium, all other warps must
// wait.
- syncParallelThreads(CGF, NumActiveThreads);
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
if (NumIters > 1) {
Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: [[NEXT:%.+]] = add nsw i32 [[CNT]], 1
// CHECK: store i32 [[NEXT]], i32* [[CNT_ADDR]],
// CHECK: br label
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0
// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]]
//
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: ret
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0
// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]]
//
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: ret
#endif
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0
// CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]]
//
//
// Barrier after copy to shared memory storage medium.
// CHECK: [[COPY_CONT]]
- // CHECK: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[ACTIVE_THREADS:%.+]] = mul nsw i32 [[ACTIVE_WARPS:%.+]], [[WS]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
+ // CHECK: [[ACTIVE_WARPS:%.+]] = load i32, i32*
//
// Read into warp 0.
// CHECK: [[IS_W0_ACTIVE_THREAD:%.+]] = icmp ult i32 [[TID:%.+]], [[ACTIVE_WARPS]]
// CHECK: br label {{%?}}[[READ_CONT]]
//
// CHECK: [[READ_CONT]]
- // CHECK: call void @llvm.nvvm.barrier(i32 1, i32 [[ACTIVE_THREADS]])
+ // CHECK: call void @__kmpc_barrier(%struct.ident_t* @
// CHECK: ret
#endif