Add support for optimized (non-generic) atomic libcalls.

author Ed Schouten <ed@80386.nl>

Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)

committer Ed Schouten <ed@80386.nl>

Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)
author Ed Schouten <ed@80386.nl>
Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)
committer Ed Schouten <ed@80386.nl>
Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp

index 0b48a5c6643575ed22e368dc96eb170d747e65f2..ddacdd17eaa226d876639bafca60ae75ed496ece 100644 (file)
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -15,6 +15,7 @@
  #include "CGCall.h"
  #include "CodeGenModule.h"
  #include "clang/AST/ASTContext.h"
+#include "llvm/ADT/StringExtras.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/Operator.h"
@@ -317,6 +318,22 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
    return DeclPtr;
  }
  
+static void
+AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
+                       bool UseOptimizedLibcall, llvm::Value *Val,
+                       QualType ValTy) {
+  if (UseOptimizedLibcall) {
+    // Load value and pass it to the function directly.
+    unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
+    Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy);
+    Args.add(RValue::get(Val), ValTy);
+  } else {
+    // Non-optimized functions always take a reference.
+    Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)),
+                         CGF.getContext().VoidPtrTy);
+  }
+}
+
  RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
    QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
    QualType MemTy = AtomicTy;
@@ -424,67 +441,137 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
  
    // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
    if (UseLibcall) {
+    bool UseOptimizedLibcall = false;
+    switch (E->getOp()) {
+    case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__atomic_fetch_add:
+    case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__atomic_fetch_and:
+    case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__atomic_fetch_or:
+    case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__atomic_fetch_sub:
+    case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__atomic_fetch_xor:
+      // For these, only library calls for certain sizes exist.
+      UseOptimizedLibcall = true;
+      break;
+    default:
+      // Only use optimized library calls for sizes for which they exist.
+      if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
+        UseOptimizedLibcall = true;
+      break;
+    }
  
-    SmallVector<QualType, 5> Params;
      CallArgList Args;
-    // Size is always the first parameter
-    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
-             getContext().getSizeType());
-    // Atomic address is always the second parameter
+    if (!UseOptimizedLibcall) {
+      // For non-optimized library calls, the size is the first parameter
+      Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+               getContext().getSizeType());
+    }
+    // Atomic address is the first or second parameter
      Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
               getContext().VoidPtrTy);
  
-    const char* LibCallName;
-    QualType RetTy = getContext().VoidTy;
+    std::string LibCallName;
+    QualType RetTy;
+    bool HaveRetTy = false;
      switch (E->getOp()) {
      // There is only one libcall for compare an exchange, because there is no
      // optimisation benefit possible from a libcall version of a weak compare
      // and exchange.
-    // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+    // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
      //                                void *desired, int success, int failure)
+    // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
+    //                                  int success, int failure)
      case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
      case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
      case AtomicExpr::AO__atomic_compare_exchange:
      case AtomicExpr::AO__atomic_compare_exchange_n:
        LibCallName = "__atomic_compare_exchange";
        RetTy = getContext().BoolTy;
+      HaveRetTy = true;
        Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
                 getContext().VoidPtrTy);
-      Args.add(RValue::get(EmitCastToVoidPtr(Val2)),
-               getContext().VoidPtrTy);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy);
        Args.add(RValue::get(Order),
                 getContext().IntTy);
        Order = OrderFail;
        break;
      // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
      //                        int order)
+    // T __atomic_exchange_N(T *mem, T val, int order)
      case AtomicExpr::AO__c11_atomic_exchange:
      case AtomicExpr::AO__atomic_exchange_n:
      case AtomicExpr::AO__atomic_exchange:
        LibCallName = "__atomic_exchange";
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
-               getContext().VoidPtrTy);
-      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
-               getContext().VoidPtrTy);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
        break;
      // void __atomic_store(size_t size, void *mem, void *val, int order)
+    // void __atomic_store_N(T *mem, T val, int order)
      case AtomicExpr::AO__c11_atomic_store:
      case AtomicExpr::AO__atomic_store:
      case AtomicExpr::AO__atomic_store_n:
        LibCallName = "__atomic_store";
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
-               getContext().VoidPtrTy);
+      RetTy = getContext().VoidTy;
+      HaveRetTy = true;
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
        break;
      // void __atomic_load(size_t size, void *mem, void *return, int order)
+    // T __atomic_load_N(T *mem, int order)
      case AtomicExpr::AO__c11_atomic_load:
      case AtomicExpr::AO__atomic_load:
      case AtomicExpr::AO__atomic_load_n:
        LibCallName = "__atomic_load";
-      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
-               getContext().VoidPtrTy);
+      break;
+    // T __atomic_fetch_add_N(T *mem, T val, int order)
+    case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__atomic_fetch_add:
+      LibCallName = "__atomic_fetch_add";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+      break;
+    // T __atomic_fetch_and_N(T *mem, T val, int order)
+    case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__atomic_fetch_and:
+      LibCallName = "__atomic_fetch_and";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+      break;
+    // T __atomic_fetch_or_N(T *mem, T val, int order)
+    case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__atomic_fetch_or:
+      LibCallName = "__atomic_fetch_or";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+      break;
+    // T __atomic_fetch_sub_N(T *mem, T val, int order)
+    case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__atomic_fetch_sub:
+      LibCallName = "__atomic_fetch_sub";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
+      break;
+    // T __atomic_fetch_xor_N(T *mem, T val, int order)
+    case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__atomic_fetch_xor:
+      LibCallName = "__atomic_fetch_xor";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
        break;
      default: return EmitUnsupportedRValue(E, "atomic library call");
      }
+
+    // Optimized functions have the size in their name.
+    if (UseOptimizedLibcall)
+      LibCallName += "_" + llvm::utostr(Size);
+    // By default, assume we return a value of the atomic type.
+    if (!HaveRetTy) {
+      if (UseOptimizedLibcall) {
+        // Value is returned directly.
+        RetTy = MemTy;
+      } else {
+        // Value is returned through parameter before the order.
+        RetTy = getContext().VoidTy;
+        Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
+                 getContext().VoidPtrTy);
+      }
+    }
      // order is always the last parameter
      Args.add(RValue::get(Order),
               getContext().IntTy);
@@ -495,7 +582,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
      llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
      RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
-    if (E->isCmpXChg())
+    if (!RetTy->isVoidType())
        return Res;
      if (E->getType()->isVoidType())
        return RValue::get(0);
diff --git a/test/CodeGen/atomics-inlining.c b/test/CodeGen/atomics-inlining.c

index 3ef96e58d1c714bc9da50e9bca85fa04e18e686e..c69702f35fae4ae04530148cb6ebd493687b19e0 100644 (file)
--- a/test/CodeGen/atomics-inlining.c
+++ b/test/CodeGen/atomics-inlining.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple arm-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=ARM
  // RUN: %clang_cc1 -triple powerpc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC32
  // RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=PPC64
  // RUN: %clang_cc1 -triple mipsel-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS32
@@ -7,6 +8,7 @@ unsigned char c1, c2;
  unsigned short s1, s2;
  unsigned int i1, i2;
  unsigned long long ll1, ll2;
+unsigned char a1[100], a2[100];
  
  enum memory_order {
    memory_order_relaxed,
@@ -19,31 +21,73 @@ enum memory_order {
  
  void test1(void) {
    (void)__atomic_load(&c1, &c2, memory_order_seq_cst);
+  (void)__atomic_store(&c1, &c2, memory_order_seq_cst);
    (void)__atomic_load(&s1, &s2, memory_order_seq_cst);
+  (void)__atomic_store(&s1, &s2, memory_order_seq_cst);
    (void)__atomic_load(&i1, &i2, memory_order_seq_cst);
+  (void)__atomic_store(&i1, &i2, memory_order_seq_cst);
    (void)__atomic_load(&ll1, &ll2, memory_order_seq_cst);
+  (void)__atomic_store(&ll1, &ll2, memory_order_seq_cst);
+  (void)__atomic_load(&a1, &a2, memory_order_seq_cst);
+  (void)__atomic_store(&a1, &a2, memory_order_seq_cst);
+
+// ARM: define arm_aapcscc void @test1
+// ARM: = call arm_aapcscc zeroext i8 @__atomic_load_1(i8* @c1
+// ARM: call arm_aapcscc void @__atomic_store_1(i8* @c1, i8 zeroext
+// ARM: = call arm_aapcscc zeroext i16 @__atomic_load_2(i8* bitcast (i16* @s1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_2(i8* bitcast (i16* @s1 to i8*), i16 zeroext
+// ARM: = call arm_aapcscc i32 @__atomic_load_4(i8* bitcast (i32* @i1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_4(i8* bitcast (i32* @i1 to i8*), i32
+// ARM: = call arm_aapcscc i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// ARM: call arm_aapcscc void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// ARM: call arm_aapcscc void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// ARM: call arm_aapcscc void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
  
  // PPC32: define void @test1
  // PPC32: = load atomic i8* @c1 seq_cst
+// PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst
  // PPC32: = load atomic i16* @s1 seq_cst
+// PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst
  // PPC32: = load atomic i32* @i1 seq_cst
-// PPC32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
+// PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// PPC32: = call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// PPC32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
  
  // PPC64: define void @test1
  // PPC64: = load atomic i8* @c1 seq_cst
+// PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst
  // PPC64: = load atomic i16* @s1 seq_cst
+// PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst
  // PPC64: = load atomic i32* @i1 seq_cst
+// PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst
  // PPC64: = load atomic i64* @ll1 seq_cst
+// PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
  
  // MIPS32: define void @test1
  // MIPS32: = load atomic i8* @c1 seq_cst
+// MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst
  // MIPS32: = load atomic i16* @s1 seq_cst
+// MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst
  // MIPS32: = load atomic i32* @i1 seq_cst
-// MIPS32: call void @__atomic_load(i32 8, i8* bitcast (i64* @ll1 to i8*)
+// MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// MIPS32: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
+// MIPS32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
+// MIPS32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// MIPS32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
  
  // MIPS64: define void @test1
  // MIPS64: = load atomic i8* @c1 seq_cst
+// MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst
  // MIPS64: = load atomic i16* @s1 seq_cst
+// MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst
  // MIPS64: = load atomic i32* @i1 seq_cst
+// MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst
  // MIPS64: = load atomic i64* @ll1 seq_cst
+// MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// MIPS64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0)
+// MIPS64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
  }
author	Ed Schouten <ed@80386.nl>
	Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)
committer	Ed Schouten <ed@80386.nl>
	Fri, 31 May 2013 19:27:59 +0000 (19:27 +0000)
lib/CodeGen/CGAtomic.cpp		patch \| blob \| history
test/CodeGen/atomics-inlining.c		patch \| blob \| history