From b7d1036581060e3b39ba26a6e380e5dd13993ba6 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 6 Apr 2018 16:03:36 +0000 Subject: [PATCH] [OPENMP, NVPTX] Fix codegen for the teams reduction. Added NUW flags for all the add|mul|sub operations + replaced sdiv by udiv as we operate on unsigned values only (addresses, converted to integers) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@329411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 44 ++++------ test/OpenMP/nvptx_parallel_codegen.cpp | 8 +- test/OpenMP/nvptx_target_codegen.cpp | 24 ++--- test/OpenMP/nvptx_target_printf_codegen.c | 6 +- test/OpenMP/nvptx_target_teams_codegen.cpp | 8 +- test/OpenMP/nvptx_teams_reduction_codegen.cpp | 88 +++++++++---------- 6 files changed, 86 insertions(+), 92 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 75d3ba75b3..0d7386490d 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -529,8 +529,8 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; return IsInSpmdExecutionMode ? getNVPTXNumThreads(CGF) - : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), - "thread_limit"); + : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), + "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -545,9 +545,9 @@ static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { llvm::Value *NumThreads = getNVPTXNumThreads(CGF); // We assume that the warp size is a power of 2. - llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); + llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); - return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)), + return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)), Bld.CreateNot(Mask), "master_tid"); } @@ -1714,13 +1714,11 @@ static void emitReductionListCopy( // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); auto *CurrentOffset = - Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), - ScratchpadIndex); + Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); auto *ScratchPadElemAbsolutePtrVal = - Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); + Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, @@ -1731,13 +1729,11 @@ static void emitReductionListCopy( case ScratchpadToThread: { // Step 1.1: Get the address for the src element in the scratchpad. // address = base + index * ElementSizeInChars. - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); auto *CurrentOffset = - Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars), - ScratchpadIndex); + Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); auto *ScratchPadElemAbsolutePtrVal = - Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset); + Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, @@ -1796,22 +1792,20 @@ static void emitReductionListCopy( if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { llvm::Value *ScratchpadBasePtr = IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); - unsigned ElementSizeInChars = - C.getTypeSizeInChars(Private->getType()).getQuantity(); - ScratchpadBasePtr = Bld.CreateAdd( + llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); + ScratchpadBasePtr = Bld.CreateNUWAdd( ScratchpadBasePtr, - Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get( - CGM.SizeTy, ElementSizeInChars))); + Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars)); // Take care of global memory alignment for performance - ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateSDiv( + ScratchpadBasePtr = Bld.CreateNUWSub( + ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateUDiv( ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); - ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr, - llvm::ConstantInt::get(CGM.SizeTy, 1)); - ScratchpadBasePtr = Bld.CreateMul( + ScratchpadBasePtr = Bld.CreateNUWAdd( + ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); + ScratchpadBasePtr = Bld.CreateNUWMul( ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); diff --git a/test/OpenMP/nvptx_parallel_codegen.cpp b/test/OpenMP/nvptx_parallel_codegen.cpp index 5aa9a21e92..932454a6f8 100644 --- a/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/test/OpenMP/nvptx_parallel_codegen.cpp @@ -127,7 +127,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] +// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -145,7 +145,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] +// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*), // CHECK: call void @llvm.nvvm.barrier0() @@ -246,7 +246,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] +// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -264,7 +264,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] +// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK-64: [[N:%.+]] = load i32, i32* [[REF_N]], // CHECK-32: [[N:%.+]] = load i32, i32* [[LOCAL_N]], diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp index 23b40e10c4..70f3973acd 100644 --- a/test/OpenMP/nvptx_target_codegen.cpp +++ b/test/OpenMP/nvptx_target_codegen.cpp @@ -68,7 +68,7 @@ int foo(int n) { // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -86,7 +86,7 @@ int foo(int n) { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK: br label {{%?}}[[TERMINATE:.+]] // @@ -144,7 +144,7 @@ int foo(int n) { // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -162,7 +162,7 @@ int foo(int n) { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK: load i16, i16* [[AA_CADDR]], // CHECK: br label {{%?}}[[TERMINATE:.+]] @@ -245,7 +245,7 @@ int foo(int n) { // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -263,7 +263,7 @@ int foo(int n) { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // Use captures. @@ -414,7 +414,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -432,7 +432,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // CHECK-64-DAG: load i32, i32* [[REF_A]] // CHECK-32-DAG: load i32, i32* [[LOCAL_A]] @@ -503,7 +503,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -521,7 +521,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // Use captures. // CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0 @@ -587,7 +587,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -605,7 +605,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // CHECK-64-DAG: load i32, i32* [[REF_A]] diff --git a/test/OpenMP/nvptx_target_printf_codegen.c b/test/OpenMP/nvptx_target_printf_codegen.c index 9f57d9ee30..e7bfb874f4 100644 --- a/test/OpenMP/nvptx_target_printf_codegen.c +++ b/test/OpenMP/nvptx_target_printf_codegen.c @@ -29,7 +29,7 @@ int CheckSimple() { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // printf in master-only basic block. @@ -69,7 +69,7 @@ void CheckNoArgs() { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // printf in master-only basic block. @@ -102,7 +102,7 @@ void CheckAllocaIsInEntryBlock() { // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] if (foo) { diff --git a/test/OpenMP/nvptx_target_teams_codegen.cpp b/test/OpenMP/nvptx_target_teams_codegen.cpp index d547c16714..eec9507c25 100644 --- a/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -93,7 +93,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -111,7 +111,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // CHECK-NOT: kmpc_fork_teams @@ -179,7 +179,7 @@ int bar(int n){ // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]] + // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]] // CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]] // CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]] // @@ -197,7 +197,7 @@ int bar(int n){ // CHECK: [[MASTER]] // CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() - // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] + // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] // // CHECK-NOT: kmpc_fork_teams diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 696940bcf1..d268ab5c5e 100644 --- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -246,8 +246,8 @@ int bar(int n){ // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align @@ -272,8 +272,8 @@ int bar(int n){ // CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align // CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]] // - // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 @@ -588,24 +588,24 @@ int bar(int n){ // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align // CHECK: store i8 [[ELT_VAL]], i8* [[SCRATCHPAD_ELT_PTR]], align // - // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1 - // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]] - // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256 - // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256 + // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1 + // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] + // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align @@ -631,8 +631,8 @@ int bar(int n){ // CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align // CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]] // - // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 @@ -640,15 +640,15 @@ int bar(int n){ // CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[REMOTE_ELT1]], align // CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align // - // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1 - // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]] - // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256 - // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256 + // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1 + // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] + // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 // - // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 @@ -1013,26 +1013,26 @@ int bar(int n){ // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32* // CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align // - // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4 - // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]] - // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256 - // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256 + // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4 + // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] + // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 // // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align @@ -1058,8 +1058,8 @@ int bar(int n){ // CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align // CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]] // - // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 @@ -1069,15 +1069,15 @@ int bar(int n){ // CHECK: [[REMOTE_ELT1_PTR:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8* // CHECK: store i8* [[REMOTE_ELT1_PTR]], i8** [[REMOTE_ELT_REF]], align // - // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4 - // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]] - // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1 - // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256 - // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1 - // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256 + // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4 + // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]] + // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1 + // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256 + // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1 + // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256 // - // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]] - // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] + // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]] + // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 -- 2.40.0