From: Yaxun Liu Date: Tue, 17 Oct 2017 14:19:29 +0000 (+0000) Subject: CodeGen: Fix invalid bitcasts for atomic builtins X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2965315f6c905268a70d66170f12a59d507263df;p=clang CodeGen: Fix invalid bitcasts for atomic builtins Currently clang assumes the temporary variables emitted during codegen of atomic builtins have address space 0, which is not true for target triple amdgcn---amdgiz and causes invalid bitcasts. This patch fixes that. Differential Revision: https://reviews.llvm.org/D38966 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@316000 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index fb79369d34..d90c3a53a6 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1226,7 +1226,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { return RValue::get(nullptr); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1298,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl index 27189f1b4c..160f7fbd52 100644 --- a/test/CodeGenOpenCL/atomic-ops.cl +++ b/test/CodeGenOpenCL/atomic-ops.cl @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s // Also test serialization of atomic operations here, to avoid duplicating the test. -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl -// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s #ifndef ALREADY_INCLUDED #define ALREADY_INCLUDED @@ -32,22 +32,22 @@ atomic_int j; void fi1(atomic_int *i) { // CHECK-LABEL: @fi1 - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } void fi2(atomic_int *i) { // CHECK-LABEL: @fi2 - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); } @@ -56,7 +56,7 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int * // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst @@ -65,25 +65,25 @@ void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int * void fi3(atomic_int *i, atomic_uint *ui) { // CHECK-LABEL: @fi3 - // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); } bool fi4(atomic_int *i) { // CHECK-LABEL: @fi4( - // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] @@ -100,16 +100,16 @@ void fi5(atomic_int *i, int scope) { // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] // CHECK-NEXT: ] // CHECK: [[opencl_workgroup]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: br label %[[continue:.*]] // CHECK: [[opencl_device]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst // CHECK: br label %[[continue]] // CHECK: [[opencl_allsvmdevices]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: br label %[[continue]] // CHECK: [[opencl_subgroup]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst // CHECK: br label %[[continue]] // CHECK: [[continue]]: int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); @@ -141,29 +141,29 @@ void fi6(atomic_int *i, int order, int scope) { // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] // CHECK-NEXT: ] // CHECK: [[MON_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic // CHECK: [[MON_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic // CHECK: [[MON_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic + // CHECK: load atomic i32, i32* %{{.*}} monotonic // CHECK: [[MON_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic // CHECK: [[ACQ_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire // CHECK: [[ACQ_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire // CHECK: [[ACQ_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire + // CHECK: load atomic i32, i32* %{{.*}} acquire // CHECK: [[ACQ_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire // CHECK: [[SEQ_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: [[SEQ_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst // CHECK: [[SEQ_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: [[SEQ_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst int x = __opencl_atomic_load(i, order, scope); } @@ -181,7 +181,7 @@ void ff2(atomic_float *d) { float ff3(atomic_float *d) { // CHECK-LABEL: @ff3 - // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); } @@ -198,10 +198,10 @@ void atomic_init_foo() // CHECK-LABEL: @failureOrder void failureOrder(atomic_int *ptr, int *ptr2) { - // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); - // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire + // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); } @@ -279,11 +279,11 @@ int test_volatile(volatile atomic_int *i) { // CHECK-LABEL: @test_volatile // CHECK: %[[i_addr:.*]] = alloca i32 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 - // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]] - // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]] - // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst - // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]] - // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]] + // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst + // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] + // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] // CHECK-NEXT: ret i32 %[[retval]] return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); }