From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Mon, 30 Oct 2017 14:38:30 +0000 (+0000)
Subject: CodeGen: Fix insertion position of addrspace cast for alloca
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5aa4dd9f9d3038556d897395f07c1ea40ab7a28b;p=clang

CodeGen: Fix insertion position of addrspace cast for alloca

For non-zero alloca addr space, alloca is usually casted to default addr
space immediately.

For non-vla, alloca is inserted at AllocaInsertPt, therefore the addr
space cast should also be insterted at AllocaInsertPt. However,
for vla, alloca is inserted at the current insertion point of IRBuilder,
therefore the addr space cast should also inserted at the current
insertion point of IRBuilder.

Currently clang always insert addr space cast at AllocaInsertPt, which
causes invalid IR.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D39374


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@316909 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 431ffa55d1..d3c2f47093 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -75,7 +75,11 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
   if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) {
     auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
     llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
-    Builder.SetInsertPoint(AllocaInsertPt);
+    // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
+    // otherwise alloca is inserted at the current insertion point of the
+    // builder.
+    if (!ArraySize)
+      Builder.SetInsertPoint(AllocaInsertPt);
     V = getTargetHooks().performAddrSpaceCast(
         *this, V, getASTAllocaAddressSpace(), LangAS::Default,
         Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
diff --git a/test/CodeGenCXX/vla.cpp b/test/CodeGenCXX/vla.cpp
index 957a9f9568..b8652f8329 100644
--- a/test/CodeGenCXX/vla.cpp
+++ b/test/CodeGenCXX/vla.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-apple-darwin %s -emit-llvm -o - | FileCheck -check-prefixes=X64,CHECK %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn---amdgiz %s -emit-llvm -o - | FileCheck -check-prefixes=AMD,CHECK %s
 
 template<typename T>
 struct S {
@@ -9,7 +10,7 @@ template<typename T> int S<T>::n = 5;
 int f() {
   // Make sure that the reference here is enough to trigger the instantiation of
   // the static data member.
-  // CHECK: @_ZN1SIiE1nE = linkonce_odr global i32 5
+  // CHECK: @_ZN1SIiE1nE = linkonce_odr{{.*}} global i32 5
   int a[S<int>::n];
   return sizeof a;
 }
@@ -17,10 +18,18 @@ int f() {
 // rdar://problem/9506377
 void test0(void *array, int n) {
   // CHECK-LABEL: define void @_Z5test0Pvi(
-  // CHECK:      [[ARRAY:%.*]] = alloca i8*, align 8
-  // CHECK-NEXT: [[N:%.*]] = alloca i32, align 4
-  // CHECK-NEXT: [[REF:%.*]] = alloca i16*, align 8
-  // CHECK-NEXT: [[S:%.*]] = alloca i16, align 2
+  // X64:        [[ARRAY:%.*]] = alloca i8*, align 8
+  // AMD:        [[ARRAY0:%.*]] = alloca i8*, align 8, addrspace(5)
+  // AMD-NEXT:   [[ARRAY:%.*]] = addrspacecast i8* addrspace(5)* [[ARRAY0]] to i8**
+  // X64-NEXT:   [[N:%.*]] = alloca i32, align 4
+  // AMD:        [[N0:%.*]] = alloca i32, align 4, addrspace(5)
+  // AMD-NEXT:   [[N:%.*]] = addrspacecast i32 addrspace(5)* [[N0]] to i32*
+  // X64-NEXT:   [[REF:%.*]] = alloca i16*, align 8
+  // AMD:        [[REF0:%.*]] = alloca i16*, align 8, addrspace(5)
+  // AMD-NEXT:   [[REF:%.*]] = addrspacecast i16* addrspace(5)* [[REF0]] to i16**
+  // X64-NEXT:   [[S:%.*]] = alloca i16, align 2
+  // AMD:        [[S0:%.*]] = alloca i16, align 2, addrspace(5)
+  // AMD-NEXT:   [[S:%.*]] = addrspacecast i16 addrspace(5)* [[S0]] to i16*
   // CHECK-NEXT: store i8* 
   // CHECK-NEXT: store i32
 
@@ -59,6 +68,8 @@ void test0(void *array, int n) {
 void test2(int b) {
   // CHECK-LABEL: define void {{.*}}test2{{.*}}(i32 %b)
   int varr[b];
+  // AMD: %__end = alloca i32*, align 8, addrspace(5)
+  // AMD: [[END:%.*]] = addrspacecast i32* addrspace(5)* %__end to i32**
   // get the address of %b by checking the first store that stores it 
   //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
 
@@ -75,13 +86,16 @@ void test2(int b) {
   //CHECK: [[VLA_SIZEOF:%.*]] = mul nuw i64 4, [[VLA_NUM_ELEMENTS_PRE]]
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS_POST:%.*]] = udiv i64 [[VLA_SIZEOF]], 4
   //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_NUM_ELEMENTS_POST]]
-  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //X64-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //AMD-NEXT: store i32* [[VLA_END_PTR]], i32** [[END]]
   for (int d : varr) 0;
 }
 
 void test3(int b, int c) {
   // CHECK-LABEL: define void {{.*}}test3{{.*}}(i32 %b, i32 %c)
   int varr[b][c];
+  // AMD: %__end = alloca i32*, align 8, addrspace(5)
+  // AMD: [[END:%.*]] = addrspacecast i32* addrspace(5)* %__end to i32**
   // get the address of %b by checking the first store that stores it 
   //CHECK: store i32 %b, i32* [[PTR_B:%.*]]
   //CHECK-NEXT: store i32 %c, i32* [[PTR_C:%.*]]
@@ -105,7 +119,8 @@ void test3(int b, int c) {
   //CHECK-NEXT: [[VLA_NUM_ELEMENTS:%.*]] = udiv i64 [[VLA_SIZEOF]], [[VLA_SIZEOF_DIM2]]
   //CHECK-NEXT: [[VLA_END_INDEX:%.*]] = mul nsw i64 [[VLA_NUM_ELEMENTS]], [[VLA_DIM2_PRE]]
   //CHECK-NEXT: [[VLA_END_PTR:%.*]] = getelementptr inbounds i32, i32* {{%.*}}, i64 [[VLA_END_INDEX]]
-  //CHECK-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //X64-NEXT: store i32* [[VLA_END_PTR]], i32** %__end
+  //AMD-NEXT: store i32* [[VLA_END_PTR]], i32** [[END]]
  
   for (auto &d : varr) 0;
 }