From: Alexey Bataev Date: Thu, 4 Jan 2018 20:18:55 +0000 (+0000) Subject: [OPENMP] Fix casting in NVPTX support library. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=27fbdd87381487e957b2756be18cbb3f32a6be3c;p=clang [OPENMP] Fix casting in NVPTX support library. If the reduction required shuffle in the NVPTX codegen, we may need to cast the reduced value to the integer type. This casting was implemented incorrectly and may cause compiler crash. Patch fixes this problem. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@321818 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index eb4c120258..ac82737e77 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1059,19 +1059,41 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } +/// Cast value to the specified type. +static llvm::Value * +castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy, + llvm::Optional IsSigned = llvm::None) { + if (Val->getType() == CastTy) + return Val; + if (Val->getType()->getPrimitiveSizeInBits() > 0 && + CastTy->getPrimitiveSizeInBits() > 0 && + Val->getType()->getPrimitiveSizeInBits() == + CastTy->getPrimitiveSizeInBits()) + return CGF.Builder.CreateBitCast(Val, CastTy); + if (IsSigned.hasValue() && CastTy->isIntegerTy() && + Val->getType()->isIntegerTy()) + return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned); + Address CastItem = CGF.CreateTempAlloca( + CastTy, + CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType()))); + Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace())); + CGF.Builder.CreateStore(Val, ValCastItem); + return CGF.Builder.CreateLoad(CastItem); +} + /// This function creates calls to one of two shuffle functions to copy /// variables between lanes in a warp. static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, - QualType ElemTy, llvm::Value *Elem, llvm::Value *Offset) { auto &CGM = CGF.CGM; - auto &C = CGM.getContext(); auto &Bld = CGF.Builder; CGOpenMPRuntimeNVPTX &RT = *(static_cast(&CGM.getOpenMPRuntime())); - unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity(); + unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType()); assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction."); OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4 @@ -1079,17 +1101,16 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, : OMPRTL_NVPTX__kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. - auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; - auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy); - auto *WarpSize = CGF.EmitScalarConversion( - getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true), - C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation()); + llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; + llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true); + auto *WarpSize = + Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); auto *ShuffledVal = CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); - return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy)); + return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true); } namespace { @@ -1151,10 +1172,9 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. @@ -1170,32 +1190,26 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); - llvm::Value *DestElementPtr = - CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()); - Address DestElemAddr = - Address(DestElementPtr, C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - DestElemAddr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = CGF.EmitLoadOfPointer( + DestElementPtrAddr, + C.getPointerType(Private->getType())->castAs()); break; } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs()); // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. @@ -1208,11 +1222,8 @@ static void emitReductionListCopy( Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); - Address ScratchpadPtr = - Address(ScratchPadElemAbsolutePtrVal, - C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); IncrScratchpadDest = true; break; } @@ -1253,10 +1264,11 @@ static void emitReductionListCopy( // Now that all active lanes have read the element in the // Reduce list, shuffle over the value from the remote lane. - if (ShuffleInElement) { - Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem, - RemoteLaneOffset); - } + if (ShuffleInElement) + Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset); + + DestElementAddr = Bld.CreateElementBitCast(DestElementAddr, + SrcElementAddr.getElementType()); // Store the source element value to the dest element address. CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, diff --git a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index c4c3e977b0..d636240f44 100644 --- a/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -168,9 +168,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -405,9 +405,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -714,18 +714,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp index d77231807f..696940bcf1 100644 --- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -168,9 +168,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -249,9 +249,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double* // CHECK: store double [[ELT_VAL]], double* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -298,25 +298,15 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] // // CHECK: [[REDUCE_CONT]] // CHECK: ret - - - - - - - - - - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}_worker() // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l33]]( @@ -480,9 +470,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -617,9 +607,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float* // CHECK: store float [[ELT_VAL]], float* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -690,24 +680,15 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] // // CHECK: [[REDUCE_CONT]] // CHECK: ret - - - - - - - - - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l40}}_worker() // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+template.+l40]]( @@ -903,18 +884,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -1035,9 +1016,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32* // CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4 @@ -1053,9 +1034,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16* // CHECK: store i16 [[ELT_VAL]], i16* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -1121,18 +1102,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] //