From 5cb9343f53d83c9a8a33aac5b2ce01672ff02cf3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Aug 2016 19:25:59 +0000 Subject: [PATCH] AMDGPU: Handle structs directly in AMDGPUABIInfo MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Structs are currently handled as pointer + byval, which makes AMDGPU LLVM backend generate incorrect code when structs are used. This patch changes struct argument to be handled directly and without flattening, which Clover (Mesa 3D Gallium OpenCL state tracker) will be able to handle. Flattening would expand the struct to individual elements and pass each as a separate argument, which Clover can not handle. Furthermore, such expansion does not fit the OpenCL programming model which requires to explicitely specify each argument index, size and memory location. Patch by Vedran Miletić git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@279463 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetInfo.cpp | 42 +++++++++++- .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 66 +++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index bdf3e4b1ea..759e3d6f40 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -6876,10 +6876,50 @@ public: namespace { +class AMDGPUABIInfo final : public DefaultABIInfo { +public: + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override; +}; + +void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + unsigned CC = FI.getCallingConvention(); + for (auto &Arg : FI.arguments()) + if (CC == llvm::CallingConv::AMDGPU_KERNEL) + Arg.info = classifyArgumentType(Arg.type); + else + Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); +} + +/// \brief Classify argument of given type \p Ty. +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { + llvm::StructType *StrTy = dyn_cast(CGT.ConvertType(Ty)); + if (!StrTy) { + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Coerce single element structs to its element. + if (StrTy->getNumElements() == 1) { + return ABIArgInfo::getDirect(); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; diff --git a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl new file mode 100644 index 0000000000..f37fe66239 --- /dev/null +++ b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -0,0 +1,66 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK-NOT: %struct.single_element_struct_arg = type { i32 } +typedef struct single_element_struct_arg +{ + int i; +} single_element_struct_arg_t; + +// CHECK: %struct.struct_arg = type { i32, float, i32 } +typedef struct struct_arg +{ + int i1; + float f; + int i2; +} struct_arg_t; + +// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 } +typedef struct struct_of_arrays_arg +{ + int i1[2]; + float f1; + int i2[4]; + float f2[3]; + int i3; +} struct_of_arrays_arg_t; + +// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 } +typedef struct struct_of_structs_arg +{ + int i1; + float f1; + struct_arg_t s1; + int i2; +} struct_of_structs_arg_t; + +// CHECK-LABEL: @test_single_element_struct_arg +// CHECK: i32 %arg1.coerce +__kernel void test_single_element_struct_arg(single_element_struct_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_arg +// CHECK: %struct.struct_arg %arg1.coerce +__kernel void test_struct_arg(struct_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_of_arrays_arg +// CHECK: %struct.struct_of_arrays_arg %arg1.coerce +__kernel void test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_of_structs_arg +// CHECK: %struct.struct_of_structs_arg %arg1.coerce +__kernel void test_struct_of_structs_arg(struct_of_structs_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_non_kernel_struct_arg +// CHECK-NOT: %struct.struct_arg %arg1.coerce +// CHECK: %struct.struct_arg* byval +void test_non_kernel_struct_arg(struct_arg_t arg1) +{ +} -- 2.40.0