From: Yaxun Liu Date: Thu, 29 Jun 2017 18:47:45 +0000 (+0000) Subject: CodeGen: Fix invalid bitcast for coerced function argument X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6703d83ed551908566145c3c607e8d70da86d710;p=clang CodeGen: Fix invalid bitcast for coerced function argument Clang assumes coerced function argument is in address space 0, which is not always true and results in invalid bitcasts. This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info with amdgcn---amdgizcl triple, where non-zero alloca address space is used. Differential Revision: https://reviews.llvm.org/D34777 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@306721 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 8795e4a899..13a156c7bb 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1297,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src, // If store is legal, just bitcast the src pointer. if (SrcSize <= DstSize) { - Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy)); + Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); BuildAggStore(CGF, Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but @@ -2412,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, Address AddrToStoreInto = Address::invalid(); if (SrcSize <= DstSize) { - AddrToStoreInto = - Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy)); + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); } else { AddrToStoreInto = CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl index d711f78d4e..6ea0aff0a0 100644 --- a/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s typedef struct { int cells[9]; @@ -8,16 +9,57 @@ typedef struct { int cells[16]; } Mat4X4; +struct StructOneMember { + int2 x; +}; + +struct StructTwoMember { + int2 x; + int2 y; +}; + +// COM-LABEL: define void @foo Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { Mat4X4 out; return out; } +// COM-LABEL: define {{.*}} void @ker +// Expect two mem copies: one for the argument "in", and one for +// the return value. +// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* +// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* +// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* +// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8* -// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* +// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %u) +void FuncOneMember(struct StructOneMember u) { + u.x = (int2)(0, 0); +} + +// AMD-LABEL: define amdgpu_kernel void @KernelOneMember +// AMD-SAME: (<2 x i32> %[[u_coerce:.*]]) +// AMD: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) +// AMD: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0 +// AMD: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]] +// AMD: call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %[[u]]) +kernel void KernelOneMember(struct StructOneMember u) { + FuncOneMember(u); +} + +// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %u) +void FuncTwoMember(struct StructTwoMember u) { + u.x = (int2)(0, 0); +} + +// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember +// AMD-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) +// AMD: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) +// AMD: store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember addrspace(5)* %[[u]] +// AMD: call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %[[u]]) +kernel void KernelTwoMember(struct StructTwoMember u) { + FuncTwoMember(u); +}