#include "CGCall.h"
#include "CodeGenModule.h"
#include "clang/AST/ASTContext.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
return DeclPtr;
}
+static void
+AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
+ bool UseOptimizedLibcall, llvm::Value *Val,
+ QualType ValTy) {
+ if (UseOptimizedLibcall) {
+ // Load value and pass it to the function directly.
+ unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
+ Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy);
+ Args.add(RValue::get(Val), ValTy);
+ } else {
+ // Non-optimized functions always take a reference.
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)),
+ CGF.getContext().VoidPtrTy);
+ }
+}
+
RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
QualType MemTy = AtomicTy;
// Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
if (UseLibcall) {
+ bool UseOptimizedLibcall = false;
+ switch (E->getOp()) {
+ case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__atomic_fetch_xor:
+ // For these, only library calls for certain sizes exist.
+ UseOptimizedLibcall = true;
+ break;
+ default:
+ // Only use optimized library calls for sizes for which they exist.
+ if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
+ UseOptimizedLibcall = true;
+ break;
+ }
- SmallVector<QualType, 5> Params;
CallArgList Args;
- // Size is always the first parameter
- Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
- getContext().getSizeType());
- // Atomic address is always the second parameter
+ if (!UseOptimizedLibcall) {
+ // For non-optimized library calls, the size is the first parameter
+ Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+ getContext().getSizeType());
+ }
+ // Atomic address is the first or second parameter
Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
getContext().VoidPtrTy);
- const char* LibCallName;
- QualType RetTy = getContext().VoidTy;
+ std::string LibCallName;
+ QualType RetTy;
+ bool HaveRetTy = false;
switch (E->getOp()) {
// There is only one libcall for compare an exchange, because there is no
// optimisation benefit possible from a libcall version of a weak compare
// and exchange.
- // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+ // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
// void *desired, int success, int failure)
+ // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
+ // int success, int failure)
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
LibCallName = "__atomic_compare_exchange";
RetTy = getContext().BoolTy;
+ HaveRetTy = true;
Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
getContext().VoidPtrTy);
- Args.add(RValue::get(EmitCastToVoidPtr(Val2)),
- getContext().VoidPtrTy);
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy);
Args.add(RValue::get(Order),
getContext().IntTy);
Order = OrderFail;
break;
// void __atomic_exchange(size_t size, void *mem, void *val, void *return,
// int order)
+ // T __atomic_exchange_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
LibCallName = "__atomic_exchange";
- Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
- getContext().VoidPtrTy);
- Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
- getContext().VoidPtrTy);
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
break;
// void __atomic_store(size_t size, void *mem, void *val, int order)
+ // void __atomic_store_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
LibCallName = "__atomic_store";
- Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
- getContext().VoidPtrTy);
+ RetTy = getContext().VoidTy;
+ HaveRetTy = true;
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
break;
// void __atomic_load(size_t size, void *mem, void *return, int order)
+ // T __atomic_load_N(T *mem, int order)
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_load_n:
LibCallName = "__atomic_load";
- Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
- getContext().VoidPtrTy);
+ break;
+ // T __atomic_fetch_add_N(T *mem, T val, int order)
+ case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__atomic_fetch_add:
+ LibCallName = "__atomic_fetch_add";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+ break;
+ // T __atomic_fetch_and_N(T *mem, T val, int order)
+ case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__atomic_fetch_and:
+ LibCallName = "__atomic_fetch_and";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+ break;
+ // T __atomic_fetch_or_N(T *mem, T val, int order)
+ case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__atomic_fetch_or:
+ LibCallName = "__atomic_fetch_or";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+ break;
+ // T __atomic_fetch_sub_N(T *mem, T val, int order)
+ case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__atomic_fetch_sub:
+ LibCallName = "__atomic_fetch_sub";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+ break;
+ // T __atomic_fetch_xor_N(T *mem, T val, int order)
+ case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__atomic_fetch_xor:
+ LibCallName = "__atomic_fetch_xor";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
break;
default: return EmitUnsupportedRValue(E, "atomic library call");
}
+
+ // Optimized functions have the size in their name.
+ if (UseOptimizedLibcall)
+ LibCallName += "_" + llvm::utostr(Size);
+ // By default, assume we return a value of the atomic type.
+ if (!HaveRetTy) {
+ if (UseOptimizedLibcall) {
+ // Value is returned directly.
+ RetTy = MemTy;
+ } else {
+ // Value is returned through parameter before the order.
+ RetTy = getContext().VoidTy;
+ Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
+ getContext().VoidPtrTy);
+ }
+ }
// order is always the last parameter
Args.add(RValue::get(Order),
getContext().IntTy);
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
- if (E->isCmpXChg())
+ if (!RetTy->isVoidType())
return Res;
if (E->getType()->isVoidType())
return RValue::get(0);
+// RUN: %clang_cc1 -triple arm-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=ARM
// RUN: %clang_cc1 -triple powerpc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC32
// RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC64
// RUN: %clang_cc1 -triple mipsel-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS32
unsigned short s1, s2;
unsigned int i1, i2;
unsigned long long ll1, ll2;
+unsigned char a1[100], a2[100];
enum memory_order {
memory_order_relaxed,
void test1(void) {
(void)__atomic_load(&c1, &c2, memory_order_seq_cst);
+ (void)__atomic_store(&c1, &c2, memory_order_seq_cst);
(void)__atomic_load(&s1, &s2, memory_order_seq_cst);
+ (void)__atomic_store(&s1, &s2, memory_order_seq_cst);
(void)__atomic_load(&i1, &i2, memory_order_seq_cst);
+ (void)__atomic_store(&i1, &i2, memory_order_seq_cst);
(void)__atomic_load(&ll1, &ll2, memory_order_seq_cst);
+ (void)__atomic_store(&ll1, &ll2, memory_order_seq_cst);
+ (void)__atomic_load(&a1, &a2, memory_order_seq_cst);
+ (void)__atomic_store(&a1, &a2, memory_order_seq_cst);
+
+// ARM: define arm_aapcscc void @test1
+// ARM: = call arm_aapcscc zeroext i8 @__atomic_load_1(i8* @c1
+// ARM: call arm_aapcscc void @__atomic_store_1(i8* @c1, i8 zeroext
+// ARM: = call arm_aapcscc zeroext i16 @__atomic_load_2(i8* bitcast (i16* @s1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_2(i8* bitcast (i16* @s1 to i8*), i16 zeroext
+// ARM: = call arm_aapcscc i32 @__atomic_load_4(i8* bitcast (i32* @i1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_4(i8* bitcast (i32* @i1 to i8*), i32
+// ARM: = call arm_aapcscc i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// ARM: call arm_aapcscc void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// ARM: call arm_aapcscc void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
// PPC32: define void @test1
// PPC32: = load atomic i8* @c1 seq_cst
+// PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst
// PPC32: = load atomic i16* @s1 seq_cst
+// PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst
// PPC32: = load atomic i32* @i1 seq_cst
-// PPC32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
+// PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// PPC32: = call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// PPC32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
// PPC64: define void @test1
// PPC64: = load atomic i8* @c1 seq_cst
+// PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst
// PPC64: = load atomic i16* @s1 seq_cst
+// PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst
// PPC64: = load atomic i32* @i1 seq_cst
+// PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst
// PPC64: = load atomic i64* @ll1 seq_cst
+// PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
// MIPS32: define void @test1
// MIPS32: = load atomic i8* @c1 seq_cst
+// MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst
// MIPS32: = load atomic i16* @s1 seq_cst
+// MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst
// MIPS32: = load atomic i32* @i1 seq_cst
-// MIPS32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
+// MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// MIPS32: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// MIPS32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// MIPS32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// MIPS32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
// MIPS64: define void @test1
// MIPS64: = load atomic i8* @c1 seq_cst
+// MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst
// MIPS64: = load atomic i16* @s1 seq_cst
+// MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst
// MIPS64: = load atomic i32* @i1 seq_cst
+// MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst
// MIPS64: = load atomic i64* @ll1 seq_cst
+// MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// MIPS64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0)
+// MIPS64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
}