From 1b12bf4092573db0add5649e9fbe9925a141c328 Mon Sep 17 00:00:00 2001 From: Yaxun Liu Date: Mon, 19 Jun 2017 17:03:41 +0000 Subject: [PATCH] CodeGen: Cast temporary variable to proper address space In C++ all variables are in default address space. Previously change has been made to cast automatic variables to default address space. However that is not sufficient since all temporary variables need to be casted to default address space. This patch casts all temporary variables to default address space except those for passing indirect arguments since they are only used for load/store. This patch only affects target having non-zero alloca address space. Differential Revision: https://reviews.llvm.org/D33706 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@305711 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGCall.cpp | 6 ++- lib/CodeGen/CGDecl.cpp | 24 ++-------- lib/CodeGen/CGExpr.cpp | 44 ++++++++++++++----- lib/CodeGen/CodeGenFunction.h | 42 ++++++++++++++---- test/CodeGen/address-space.c | 4 +- test/CodeGen/default-address-space.c | 19 ++++---- test/CodeGen/x86_64-arguments.c | 4 +- test/CodeGenCXX/amdgcn-automatic-variable.cpp | 16 +++++-- 8 files changed, 102 insertions(+), 57 deletions(-) diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index c65dc18be3..0e5ccc0d9f 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -3813,7 +3813,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. - Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", false); IRCallArgs[FirstIRArg] = Addr.getPointer(); LValue argLV = MakeAddrLValue(Addr, I->Ty); @@ -3842,7 +3843,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", false); IRCallArgs[FirstIRArg] = AI.getPointer(); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 87bfa507a8..ccd3b8d513 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -954,6 +954,7 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); + assert(Ty.getAddressSpace() == LangAS::Default); AutoVarEmission emission(D); @@ -1046,8 +1047,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment); - address.getPointer()->setName(D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1107,27 +1107,9 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { llvm::Type *llvmTy = ConvertTypeForMem(elementType); // Allocate memory for the array. - llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); - vla->setAlignment(alignment.getQuantity()); - - address = Address(vla, alignment); + address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); } - // Alloca always returns a pointer in alloca address space, which may - // be different from the type defined by the language. For example, - // in C++ the auto variables are in the default address space. Therefore - // cast alloca to the expected address space when necessary. - auto T = D.getType(); - assert(T.getAddressSpace() == LangAS::Default); - if (getASTAllocaAddressSpace() != LangAS::Default) { - auto *Addr = getTargetHooks().performAddrSpaceCast( - *this, address.getPointer(), getASTAllocaAddressSpace(), - T.getAddressSpace(), - address.getElementType()->getPointerTo( - getContext().getTargetAddressSpace(T.getAddressSpace())), - /*non-null*/ true); - address = Address(Addr, address.getAlignment()); - } setAddrOfLocalVar(&D, address); emission.Addr = address; diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 7359006677..2ee1c96a66 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -61,18 +61,36 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + const Twine &Name, + llvm::Value *ArraySize, + bool CastToDefaultAddrSpace) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); + llvm::Value *V = Alloca; + // Alloca always returns a pointer in alloca address space, which may + // be different from the type defined by the language. For example, + // in C++ the auto variables are in the default address space. Therefore + // cast alloca to the default address space when necessary. + if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + V = getTargetHooks().performAddrSpaceCast( + *this, V, getASTAllocaAddressSpace(), LangAS::Default, + Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + } + + return Address(V, Align); } -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. +/// CreateTempAlloca - This creates an alloca and inserts it into the entry +/// block if \p ArraySize is nullptr, otherwise inserts it at the current +/// insertion point of the builder. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { + const Twine &Name, + llvm::Value *ArraySize) { + if (ArraySize) + return Builder.CreateAlloca(Ty, ArraySize, Name); return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), - nullptr, Name, AllocaInsertPt); + ArraySize, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -99,14 +117,18 @@ Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) { return CreateTempAlloca(ConvertType(Ty), Align, Name); } -Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) { +Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, + bool CastToDefaultAddrSpace) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, + CastToDefaultAddrSpace); } Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, - const Twine &Name) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name); + const Twine &Name, + bool CastToDefaultAddrSpace) { + return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, + CastToDefaultAddrSpace); } /// EvaluateExprAsBool - Perform the usual unary conversions on the specified diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 831eedf9e4..11dce073c9 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1916,13 +1916,36 @@ public: LValueBaseInfo *BaseInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); - /// CreateTempAlloca - This creates a alloca and inserts it into the entry - /// block. The caller is responsible for setting an appropriate alignment on + /// CreateTempAlloca - This creates an alloca and inserts it into the entry + /// block if \p ArraySize is nullptr, otherwise inserts it at the current + /// insertion point of the builder. The caller is responsible for setting an + /// appropriate alignment on /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// + /// \p ArraySize is the number of array elements to be allocated if it + /// is not nullptr. + /// + /// LangAS::Default is the address space of pointers to local variables and + /// temporaries, as exposed in the source language. In certain + /// configurations, this is not the same as the alloca address space, and a + /// cast is needed to lift the pointer from the alloca AS into + /// LangAS::Default. This can happen when the target uses a restricted + /// address space for the stack but the source language requires + /// LangAS::Default to be a generic address space. The latter condition is + /// common for most programming languages; OpenCL is an exception in that + /// LangAS::Default is the private address space, which naturally maps + /// to the stack. + /// + /// Because the address of a temporary is often exposed to the program in + /// various ways, this function will perform the cast by default. The cast + /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is + /// more efficient if the caller knows that the address will not be exposed. + llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, - const Twine &Name = "tmp"); + const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr, + bool CastToDefaultAddrSpace = true); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. @@ -1957,9 +1980,12 @@ public: Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with - /// appropriate alignment. - Address CreateMemTemp(QualType T, const Twine &Name = "tmp"); - Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp"); + /// appropriate alignment. Cast it to the default address space if + /// \p CastToDefaultAddrSpace is true. + Address CreateMemTemp(QualType T, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); + Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. diff --git a/test/CodeGen/address-space.c b/test/CodeGen/address-space.c index 35e3dbdcfa..54e0593857 100644 --- a/test/CodeGen/address-space.c +++ b/test/CodeGen/address-space.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s -// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s +// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s // CHECK: @foo = common addrspace(1) global int foo __attribute__((address_space(1))); diff --git a/test/CodeGen/default-address-space.c b/test/CodeGen/default-address-space.c index 07ddf48fac..fc5f55ffd6 100644 --- a/test/CodeGen/default-address-space.c +++ b/test/CodeGen/default-address-space.c @@ -22,9 +22,10 @@ int *B; int test1() { return foo; } // COM-LABEL: define i32 @test2(i32 %i) -// PIZ: load i32, i32 addrspace(4)* +// COM: %[[addr:.*]] = getelementptr +// PIZ: load i32, i32 addrspace(4)* %[[addr]] // PIZ-NEXT: ret i32 -// CHECK: load i32, i32* +// CHECK: load i32, i32* %[[addr]] // CHECK-NEXT: ret i32 int test2(int i) { return ban[i]; } @@ -42,15 +43,17 @@ void test3() { } // PIZ-LABEL: define void @test4(i32 addrspace(4)* %a) -// PIZ: %[[a_addr:.*]] = alloca i32 addrspace(4)* -// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)** %[[a_addr]] -// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[a_addr]] +// PIZ: %[[alloca:.*]] = alloca i32 addrspace(4)* +// PIZ: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32 addrspace(4)* addrspace(4)* +// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]] +// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]] // PIZ: %[[arrayidx:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[r0]] // PIZ: store i32 0, i32 addrspace(4)* %[[arrayidx]] // CHECK-LABEL: define void @test4(i32* %a) -// CHECK: %[[a_addr:.*]] = alloca i32*, align 4, addrspace(5) -// CHECK: store i32* %a, i32* addrspace(5)* %[[a_addr]] -// CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[a_addr]] +// CHECK: %[[alloca:.*]] = alloca i32*, align 4, addrspace(5) +// CHECK: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32** +// CHECK: store i32* %a, i32** %[[a_addr]] +// CHECK: %[[r0:.*]] = load i32*, i32** %[[a_addr]] // CHECK: %[[arrayidx:.*]] = getelementptr inbounds i32, i32* %[[r0]] // CHECK: store i32 0, i32* %[[arrayidx]] void test4(int *a) { diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c index 9f375d780c..d24ea4dbab 100644 --- a/test/CodeGen/x86_64-arguments.c +++ b/test/CodeGen/x86_64-arguments.c @@ -460,7 +460,7 @@ void test54() { test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); } // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) -// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) +// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) typedef float __m512 __attribute__ ((__vector_size__ (64))); typedef struct { @@ -529,7 +529,7 @@ void f63(__m512 *m, __builtin_va_list argList) { } // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) -// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) +// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) void f64_helper(__m512, ...); __m512 x64; void f64() { diff --git a/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/test/CodeGenCXX/amdgcn-automatic-variable.cpp index aab720770d..7df27c28e6 100644 --- a/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ b/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -3,9 +3,10 @@ // CHECK-LABEL: define void @_Z5func1Pi(i32* %x) void func1(int *x) { // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) - // CHECK: store i32* %x, i32* addrspace(5)* %[[x_addr]] - // CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]] - // CHECK: store i32 1, i32* %[[r0]] + // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32** + // CHECK: store i32* %x, i32** %[[r0]] + // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]] + // CHECK: store i32 1, i32* %[[r1]] *x = 1; } @@ -70,3 +71,12 @@ void func3() { // CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) A a; } + +// CHECK-LABEL: define void @_Z5func4i +void func4(int x) { + // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5) + // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32* + // CHECK: store i32 %x, i32* %[[r0]], align 4 + // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) + func1(&x); +} -- 2.40.0