CodeGen: Fix invalid bitcasts for memcpy

author Yaxun Liu <Yaxun.Liu@amd.com>

Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)

committer Yaxun Liu <Yaxun.Liu@amd.com>

Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)
author Yaxun Liu <Yaxun.Liu@amd.com>
Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)
committer Yaxun Liu <Yaxun.Liu@amd.com>
Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index 25297eb2e208cf45452b5ac10a00236f05597e7d..15c8553249b26b0c23de08155471eb055dfaf329 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -1234,8 +1234,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
  
    // Otherwise do coercion through memory. This is stupid, but simple.
    Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
-  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
-  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.Int8PtrTy);
+  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
+  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
    CGF.Builder.CreateMemCpy(Casted, SrcCasted,
        llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
        false);
@@ -1316,8 +1316,8 @@ static void CreateCoercedStore(llvm::Value *Src,
      // to that information.
      Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
      CGF.Builder.CreateStore(Src, Tmp);
-    Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
-    Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.Int8PtrTy);
+    Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
+    Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
      CGF.Builder.CreateMemCpy(DstCasted, Casted,
          llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
          false);
diff --git a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl

index a9d74ab6deac477f56b9c86b829829fb7706183d..21bdb15b094de691bf918231d0bc8e266e24fc93 100644 (file)
--- a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,6 +1,6 @@
  // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-amdgiz -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,R600 %s
  
  typedef __attribute__(( ext_vector_type(2) )) char char2;
  typedef __attribute__(( ext_vector_type(3) )) char char3;
@@ -309,7 +309,8 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
  // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
  void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
  
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg)
+// AMDGCN: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// R600: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg)
  void func_flexible_array_arg(flexible_array arg) { }
  
  // CHECK: define float @func_f32_ret()
@@ -404,14 +405,16 @@ struct_arr16 func_ret_struct_arr16()
    return s;
  }
  
-// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result)
+// AMDGCN: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
+// R600: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result)
  struct_arr32 func_ret_struct_arr32()
  {
    struct_arr32 s = { 0 };
    return s;
  }
  
-// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result)
+// AMDGCN: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
+// R600: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result)
  struct_arr33 func_ret_struct_arr33()
  {
    struct_arr33 s = { 0 };
@@ -440,7 +443,8 @@ different_size_type_pair func_different_size_type_pair_ret()
    return s;
  }
  
-// CHECK: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result)
+// AMDGCN: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
+// R600: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result)
  flexible_array func_flexible_array_ret()
  {
    flexible_array s = { 0 };
@@ -450,11 +454,13 @@ flexible_array func_flexible_array_ret()
  // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
  void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
  
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s)
+// AMDGCN: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// R600: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s)
  void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
  
  // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4)
+// AMDGCN: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// R600: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4)
  void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
  
  // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +475,8 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
  // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
  void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
  
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding* byval nocapture readonly align 8 %arg)
+// AMDGCN: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// R600: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding* byval nocapture readonly align 8 %arg)
  void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
    *out = arg;
  }
@@ -479,7 +486,8 @@ void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t ar
  
  // Function signature from blender, nothing should be passed byval. The v3i32
  // should not count as 4 passed registers.
-// CHECK: define void @v3i32_pair_reg_count(%struct.int3_pair* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
+// AMDGCN: define void @v3i32_pair_reg_count(%struct.int3_pair addrspace(5)* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
+// R600: define void @v3i32_pair_reg_count(%struct.int3_pair* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
  void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
  
  // Each short4 should fit pack into 2 registers.
@@ -487,7 +495,8 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
  void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                       short4 arg4, short4 arg5, struct_4regs arg6) { }
  
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// AMDGCN: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// R600: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
  void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                 short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
  
@@ -495,7 +504,8 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
  void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                       short3 arg4, short3 arg5, struct_4regs arg6) { }
  
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// AMDGCN: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// R600: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
  void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                            short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
  
@@ -505,7 +515,8 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                       short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                       struct_4regs arg13) { }
  
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs* byval nocapture align 4 %arg13)
+// AMDGCN: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// R600: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs* byval nocapture align 4 %arg13)
  void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                            short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                            short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +526,8 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
  void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                      char2 arg4, char2 arg5, struct_4regs arg6) { }
  
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// AMDGCN: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// R600: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
  void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                           char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
author	Yaxun Liu <Yaxun.Liu@amd.com>
	Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)
committer	Yaxun Liu <Yaxun.Liu@amd.com>
	Thu, 7 Dec 2017 01:39:52 +0000 (01:39 +0000)
lib/CodeGen/CGCall.cpp		patch \| blob \| history
test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl		patch \| blob \| history