]> granicus.if.org Git - clang/commitdiff
Add OpenCL 2.0 atomic builtin functions as Clang builtin
authorYaxun Liu <Yaxun.Liu@amd.com>
Fri, 4 Aug 2017 18:16:31 +0000 (18:16 +0000)
committerYaxun Liu <Yaxun.Liu@amd.com>
Fri, 4 Aug 2017 18:16:31 +0000 (18:16 +0000)
OpenCL 2.0 atomic builtin functions have a scope argument which is ideally
represented as synchronization scope argument in LLVM atomic instructions.

Clang supports translating Clang atomic builtin functions to LLVM atomic
instructions. However it currently does not support synchronization scope
of LLVM atomic instructions. Without this, users have to use LLVM assembly
code to implement OpenCL atomic builtin functions.

This patch adds OpenCL 2.0 atomic builtin functions as Clang builtin
functions, which supports generating LLVM atomic instructions with
synchronization scope operand.

Currently only constant memory scope argument is supported. Support of
non-constant memory scope argument will be added later.

Differential Revision: https://reviews.llvm.org/D28691

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@310082 91177308-0d34-0410-b5e6-96231b3b80d8

21 files changed:
docs/LanguageExtensions.rst
include/clang/AST/Expr.h
include/clang/Basic/Builtins.def
include/clang/Basic/DiagnosticSemaKinds.td
include/clang/Basic/SyncScope.h [new file with mode: 0644]
lib/AST/ASTContext.cpp
lib/AST/Expr.cpp
lib/AST/StmtPrinter.cpp
lib/Basic/Targets/AMDGPU.cpp
lib/CodeGen/CGAtomic.cpp
lib/CodeGen/CGExpr.cpp
lib/CodeGen/TargetInfo.cpp
lib/CodeGen/TargetInfo.h
lib/Frontend/InitPreprocessor.cpp
lib/Headers/opencl-c.h
lib/Sema/SemaChecking.cpp
test/CodeGenOpenCL/atomic-ops-libcall.cl [new file with mode: 0644]
test/CodeGenOpenCL/atomic-ops.cl [new file with mode: 0644]
test/Preprocessor/init.c
test/Preprocessor/predefined-macros.c
test/SemaOpenCL/atomic-ops.cl [new file with mode: 0644]

index 78f987c4a8e8b25bde1ad42983bdc4fe60560e86..652145f66fc99f9c30ec7c1db8db4ae361fbd3ca 100644 (file)
@@ -1929,7 +1929,13 @@ provided, with values corresponding to the enumerators of C11's
 ``memory_order`` enumeration.
 
 (Note that Clang additionally provides GCC-compatible ``__atomic_*``
-builtins)
+builtins and OpenCL 2.0 ``__opencl_atomic_*`` builtins. The OpenCL 2.0
+atomic builtins are an explicit form of the corresponding OpenCL 2.0
+builtin function, and are named with a ``__opencl_`` prefix. The macros
+``__OPENCL_MEMORY_SCOPE_WORK_ITEM``, ``__OPENCL_MEMORY_SCOPE_WORK_GROUP``,
+``__OPENCL_MEMORY_SCOPE_DEVICE``, ``__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES``,
+and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
+corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
 
 Low-level ARM exclusive memory builtins
 ---------------------------------------
index 0cdbd2a97ee4f5ca9e68349c80fac27e82dab688..a2da38d6cc8ffe0e484b16ccebf928ae8f71e6a7 100644 (file)
@@ -5064,9 +5064,11 @@ public:
 
 /// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*,
 /// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the
-/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>.
-/// All of these instructions take one primary pointer and at least one memory
-/// order.
+/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>,
+/// and corresponding __opencl_atomic_* for OpenCL 2.0.
+/// All of these instructions take one primary pointer, at least one memory
+/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr
+/// always take the default synchronization scope.
 class AtomicExpr : public Expr {
 public:
   enum AtomicOp {
@@ -5078,7 +5080,7 @@ public:
   };
 
 private:
-  enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
+  enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
   Stmt* SubExprs[END_EXPR];
   unsigned NumSubExprs;
   SourceLocation BuiltinLoc, RParenLoc;
@@ -5103,8 +5105,11 @@ public:
   Expr *getOrder() const {
     return cast<Expr>(SubExprs[ORDER]);
   }
+  Expr *getScope() const {
+    return cast<Expr>(SubExprs[SCOPE]);
+  }
   Expr *getVal1() const {
-    if (Op == AO__c11_atomic_init)
+    if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init)
       return cast<Expr>(SubExprs[ORDER]);
     assert(NumSubExprs > VAL1);
     return cast<Expr>(SubExprs[VAL1]);
@@ -5123,6 +5128,7 @@ public:
     assert(NumSubExprs > WEAK);
     return cast<Expr>(SubExprs[WEAK]);
   }
+  QualType getValueType() const;
 
   AtomicOp getOp() const { return Op; }
   unsigned getNumSubExprs() const { return NumSubExprs; }
@@ -5139,10 +5145,17 @@ public:
   bool isCmpXChg() const {
     return getOp() == AO__c11_atomic_compare_exchange_strong ||
            getOp() == AO__c11_atomic_compare_exchange_weak ||
+           getOp() == AO__opencl_atomic_compare_exchange_strong ||
+           getOp() == AO__opencl_atomic_compare_exchange_weak ||
            getOp() == AO__atomic_compare_exchange ||
            getOp() == AO__atomic_compare_exchange_n;
   }
 
+  bool isOpenCL() const {
+    return getOp() >= AO__opencl_atomic_init &&
+           getOp() <= AO__opencl_atomic_fetch_max;
+  }
+
   SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
 
index 82242d9571600480ae3d01a313b883d562f702a1..724a8bf6b228b3196b5f953d7f19d53213d74197 100644 (file)
@@ -700,6 +700,21 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
 BUILTIN(__atomic_always_lock_free, "izvCD*", "n")
 BUILTIN(__atomic_is_lock_free, "izvCD*", "n")
 
+// OpenCL 2.0 atomic builtins.
+ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_strong, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_weak, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_sub, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")
+
 #undef ATOMIC_BUILTIN
 
 // Non-overloaded atomic builtins.
index e685311914752a581b964b90a271137dbb01bdc9..a07be3f82cd1f5b988da21ad2c09604fc400fd6b 100644 (file)
@@ -6995,8 +6995,8 @@ def err_atomic_op_needs_atomic : Error<
   "address argument to atomic operation must be a pointer to _Atomic "
   "type (%0 invalid)">;
 def err_atomic_op_needs_non_const_atomic : Error<
-  "address argument to atomic operation must be a pointer to non-const _Atomic "
-  "type (%0 invalid)">;
+  "address argument to atomic operation must be a pointer to non-%select{const|constant}0 _Atomic "
+  "type (%1 invalid)">;
 def err_atomic_op_needs_non_const_pointer : Error<
   "address argument to atomic operation must be a pointer to non-const "
   "type (%0 invalid)">;
@@ -7012,6 +7012,10 @@ def err_atomic_op_bitwise_needs_atomic_int : Error<
 def warn_atomic_op_has_invalid_memory_order : Warning<
   "memory order argument to atomic operation is invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
+def err_atomic_op_has_invalid_synch_scope : Error<
+  "synchronization scope argument to atomic operation is invalid">;
+def err_atomic_op_has_non_constant_synch_scope : Error<
+  "non-constant synchronization scope argument to atomic operation is not supported">;
 
 def err_overflow_builtin_must_be_int : Error<
   "operand argument to overflow builtin must be an integer (%0 invalid)">;
diff --git a/include/clang/Basic/SyncScope.h b/include/clang/Basic/SyncScope.h
new file mode 100644 (file)
index 0000000..28d6fa6
--- /dev/null
@@ -0,0 +1,39 @@
+//===--- SyncScope.h - Atomic synchronization scopes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for the atomic synchronization scopes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H
+#define LLVM_CLANG_BASIC_SYNCSCOPE_H
+
+namespace clang {
+
+/// \brief Defines the synch scope values used by the atomic builtins and
+/// expressions.
+///
+/// The enum values should match the pre-defined macros
+/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_*
+/// enums in opencl-c.h.
+enum class SyncScope {
+  OpenCLWorkGroup = 1,
+  OpenCLDevice = 2,
+  OpenCLAllSVMDevices = 3,
+  OpenCLSubGroup = 4,
+};
+
+inline bool isValidSyncScopeValue(unsigned Scope) {
+  return Scope >= static_cast<unsigned>(SyncScope::OpenCLWorkGroup) &&
+         Scope <= static_cast<unsigned>(SyncScope::OpenCLSubGroup);
+}
+}
+
+#endif
index c60373c5a90a4f54de70561a818f0b87996bc0bd..34a78c6a0c62feb28643394965d2b5795a5b9b02 100644 (file)
@@ -1182,7 +1182,14 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
   ObjCSuperType = QualType();
 
   // void * type
-  VoidPtrTy = getPointerType(VoidTy);
+  if (LangOpts.OpenCLVersion >= 200) {
+    auto Q = VoidTy.getQualifiers();
+    Q.setAddressSpace(LangAS::opencl_generic);
+    VoidPtrTy = getPointerType(getCanonicalType(
+        getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
+  } else {
+    VoidPtrTy = getPointerType(VoidTy);
+  }
 
   // nullptr type (C++0x 2.14.7)
   InitBuiltinType(NullPtrTy,           BuiltinType::NullPtr);
index afc7fa8ea0949c0b192d7fc19d56145cb4a0976a..8cb9f76d9654bce232d8aa6a0ab701601a5dd043 100644 (file)
@@ -3938,12 +3938,17 @@ AtomicExpr::AtomicExpr(SourceLocation BLoc, ArrayRef<Expr*> args,
 unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   switch (Op) {
   case AO__c11_atomic_init:
+  case AO__opencl_atomic_init:
+    return 2;
   case AO__c11_atomic_load:
+  case AO__opencl_atomic_load:
   case AO__atomic_load_n:
-    return 2;
+    return 3;
 
   case AO__c11_atomic_store:
   case AO__c11_atomic_exchange:
+  case AO__opencl_atomic_store:
+  case AO__opencl_atomic_exchange:
   case AO__atomic_load:
   case AO__atomic_store:
   case AO__atomic_store_n:
@@ -3953,6 +3958,13 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__c11_atomic_fetch_and:
   case AO__c11_atomic_fetch_or:
   case AO__c11_atomic_fetch_xor:
+  case AO__opencl_atomic_fetch_add:
+  case AO__opencl_atomic_fetch_sub:
+  case AO__opencl_atomic_fetch_and:
+  case AO__opencl_atomic_fetch_or:
+  case AO__opencl_atomic_fetch_xor:
+  case AO__opencl_atomic_fetch_min:
+  case AO__opencl_atomic_fetch_max:
   case AO__atomic_fetch_add:
   case AO__atomic_fetch_sub:
   case AO__atomic_fetch_and:
@@ -3965,22 +3977,31 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__atomic_or_fetch:
   case AO__atomic_xor_fetch:
   case AO__atomic_nand_fetch:
-    return 3;
+    return 4;
 
   case AO__atomic_exchange:
-    return 4;
+    return 5;
 
   case AO__c11_atomic_compare_exchange_strong:
   case AO__c11_atomic_compare_exchange_weak:
-    return 5;
+  case AO__opencl_atomic_compare_exchange_strong:
+  case AO__opencl_atomic_compare_exchange_weak:
+    return 6;
 
   case AO__atomic_compare_exchange:
   case AO__atomic_compare_exchange_n:
-    return 6;
+    return 7;
   }
   llvm_unreachable("unknown atomic op");
 }
 
+QualType AtomicExpr::getValueType() const {
+  auto T = getPtr()->getType()->castAs<PointerType>()->getPointeeType();
+  if (auto AT = T->getAs<AtomicType>())
+    return AT->getValueType();
+  return T;
+}
+
 QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) {
   unsigned ArraySectionCount = 0;
   while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
index f4418c92406351ff48c05f6293a9db82ff4fc616..e3b96284bd1ce700b69333eca751739d973e4288 100644 (file)
@@ -1891,7 +1891,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
   // AtomicExpr stores its subexpressions in a permuted order.
   PrintExpr(Node->getPtr());
   if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
-      Node->getOp() != AtomicExpr::AO__atomic_load_n) {
+      Node->getOp() != AtomicExpr::AO__atomic_load_n &&
+      Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
     OS << ", ";
     PrintExpr(Node->getVal1());
   }
@@ -1905,7 +1906,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
     OS << ", ";
     PrintExpr(Node->getWeak());
   }
-  if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) {
+  if (Node->getOp() != AtomicExpr::AO__c11_atomic_init &&
+      Node->getOp() != AtomicExpr::AO__opencl_atomic_init) {
     OS << ", ";
     PrintExpr(Node->getOrder());
   }
index 8c1b863229c4a5b5ce975fd48ea2261a96a669b8..395dbf822f8463160da224577093e5b7574e1111 100644 (file)
@@ -328,6 +328,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
     PtrDiffType = SignedLong;
     IntPtrType = SignedLong;
   }
+
+  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
 }
 
 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
index a6e6fec206d57eecfe10a761f87f903f638044d1..5e6633fff163597a5abac795c85dbdc74560d518 100644 (file)
@@ -15,6 +15,7 @@
 #include "CGRecordLayout.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -359,13 +360,15 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
                               Address Val1, Address Val2,
                               uint64_t Size,
                               llvm::AtomicOrdering SuccessOrder,
-                              llvm::AtomicOrdering FailureOrder) {
+                              llvm::AtomicOrdering FailureOrder,
+                              llvm::SyncScope::ID Scope) {
   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
   llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
   llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
 
   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
-      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder);
+      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
+      Scope);
   Pair->setVolatile(E->isVolatile());
   Pair->setWeak(IsWeak);
 
@@ -407,7 +410,8 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
                                         Address Val1, Address Val2,
                                         llvm::Value *FailureOrderVal,
                                         uint64_t Size,
-                                        llvm::AtomicOrdering SuccessOrder) {
+                                        llvm::AtomicOrdering SuccessOrder,
+                                        llvm::SyncScope::ID Scope) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
     auto FOS = FO->getSExtValue();
@@ -435,7 +439,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
           llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      FailureOrder);
+                      FailureOrder, Scope);
     return;
   }
 
@@ -460,13 +464,13 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
+                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
+                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -476,7 +480,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
-                      llvm::AtomicOrdering::SequentiallyConsistent);
+                      llvm::AtomicOrdering::SequentiallyConsistent, Scope);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
                 SeqCstBB);
@@ -488,27 +492,31 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
                          Address Ptr, Address Val1, Address Val2,
                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
-                         uint64_t Size, llvm::AtomicOrdering Order) {
+                         uint64_t Size, llvm::AtomicOrdering Order,
+                         llvm::SyncScope::ID Scope) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("Already handled!");
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Order);
+                                FailureOrder, Size, Order, Scope);
     return;
   case AtomicExpr::AO__atomic_compare_exchange:
   case AtomicExpr::AO__atomic_compare_exchange_n: {
     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
-                                  Val1, Val2, FailureOrder, Size, Order);
+                                  Val1, Val2, FailureOrder, Size, Order, Scope);
     } else {
       // Create all the relevant BB's
       llvm::BasicBlock *StrongBB =
@@ -522,12 +530,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
 
       CGF.Builder.SetInsertPoint(StrongBB);
       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(WeakBB);
       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Order);
+                                  FailureOrder, Size, Order, Scope);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(ContBB);
@@ -535,26 +543,29 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     return;
   }
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load: {
     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
-    Load->setAtomic(Order);
+    Load->setAtomic(Order, Scope);
     Load->setVolatile(E->isVolatile());
     CGF.Builder.CreateStore(Load, Dest);
     return;
   }
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n: {
     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
-    Store->setAtomic(Order);
+    Store->setAtomic(Order, Scope);
     Store->setVolatile(E->isVolatile());
     return;
   }
 
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
   case AtomicExpr::AO__atomic_exchange:
     Op = llvm::AtomicRMWInst::Xchg;
@@ -564,6 +575,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     PostOp = llvm::Instruction::Add;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_add:
     Op = llvm::AtomicRMWInst::Add;
     break;
@@ -572,14 +584,26 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     PostOp = llvm::Instruction::Sub;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
   case AtomicExpr::AO__atomic_fetch_sub:
     Op = llvm::AtomicRMWInst::Sub;
     break;
 
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+    Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
+                                                  : llvm::AtomicRMWInst::UMin;
+    break;
+
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+    Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
+                                                  : llvm::AtomicRMWInst::UMax;
+    break;
+
   case AtomicExpr::AO__atomic_and_fetch:
     PostOp = llvm::Instruction::And;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_and:
     Op = llvm::AtomicRMWInst::And;
     break;
@@ -588,6 +612,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     PostOp = llvm::Instruction::Or;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_or:
     Op = llvm::AtomicRMWInst::Or;
     break;
@@ -596,6 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
     PostOp = llvm::Instruction::Xor;
     // Fall through.
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_xor:
     Op = llvm::AtomicRMWInst::Xor;
     break;
@@ -610,7 +636,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
 
   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
   llvm::AtomicRMWInst *RMWI =
-      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order);
+      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
   RMWI->setVolatile(E->isVolatile());
 
   // For __atomic_*_fetch operations, perform the operation again to
@@ -677,19 +703,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   Address Dest = Address::invalid();
   Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
 
-  if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
+  if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+      E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
     LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
     EmitAtomicInit(E->getVal1(), lvalue);
     return RValue::get(nullptr);
   }
 
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
+  llvm::Value *Scope = EmitScalarExpr(E->getScope());
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("Already handled above with EmitAtomicInit!");
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
     break;
 
@@ -708,6 +738,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
   case AtomicExpr::AO__atomic_compare_exchange_n:
   case AtomicExpr::AO__atomic_compare_exchange:
     Val1 = EmitPointerWithAlignment(E->getVal1());
@@ -716,12 +748,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     else
       Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
-    if (E->getNumSubExprs() == 6)
+    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
+        E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
       IsWeak = EmitScalarExpr(E->getWeak());
     break;
 
   case AtomicExpr::AO__c11_atomic_fetch_add:
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
     if (MemTy->isPointerType()) {
       // For pointer arithmetic, we're required to do a bit of math:
       // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
@@ -744,11 +779,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   case AtomicExpr::AO__atomic_sub_fetch:
   case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_store_n:
   case AtomicExpr::AO__atomic_exchange_n:
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
   case AtomicExpr::AO__atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_xor:
@@ -784,18 +826,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     bool UseOptimizedLibcall = false;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
+    case AtomicExpr::AO__opencl_atomic_init:
       llvm_unreachable("Already handled above with EmitAtomicInit!");
 
     case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
     case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
     case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_nand:
     case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
     case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_min:
+    case AtomicExpr::AO__opencl_atomic_fetch_max:
     case AtomicExpr::AO__atomic_fetch_xor:
     case AtomicExpr::AO__atomic_add_fetch:
     case AtomicExpr::AO__atomic_and_fetch:
@@ -812,6 +862,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__c11_atomic_exchange:
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__opencl_atomic_load:
+    case AtomicExpr::AO__opencl_atomic_store:
+    case AtomicExpr::AO__opencl_atomic_exchange:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     case AtomicExpr::AO__atomic_load_n:
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_store_n:
@@ -833,7 +888,24 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                getContext().getSizeType());
     }
     // Atomic address is the first or second parameter
-    Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())),
+    // The OpenCL atomic library functions only accept pointer arguments to
+    // generic address space.
+    auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
+      if (!E->isOpenCL())
+        return V;
+      auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace();
+      if (AS == LangAS::opencl_generic)
+        return V;
+      auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
+      auto T = V->getType();
+      auto *DestType = T->getPointerElementType()->getPointerTo(DestAS);
+
+      return getTargetHooks().performAddrSpaceCast(
+          *this, V, AS, LangAS::opencl_generic, DestType, false);
+    };
+
+    Args.add(RValue::get(CastToGenericAddrSpace(
+                 EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())),
              getContext().VoidPtrTy);
 
     std::string LibCallName;
@@ -844,6 +916,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
+    case AtomicExpr::AO__opencl_atomic_init:
       llvm_unreachable("Already handled!");
 
     // There is only one libcall for compare an exchange, because there is no
@@ -855,13 +928,17 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     //                                  int success, int failure)
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
     case AtomicExpr::AO__atomic_compare_exchange:
     case AtomicExpr::AO__atomic_compare_exchange_n:
       LibCallName = "__atomic_compare_exchange";
       RetTy = getContext().BoolTy;
       HaveRetTy = true;
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())),
-               getContext().VoidPtrTy);
+      Args.add(
+          RValue::get(CastToGenericAddrSpace(
+              EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())),
+          getContext().VoidPtrTy);
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
                         MemTy, E->getExprLoc(), sizeChars);
       Args.add(RValue::get(Order), getContext().IntTy);
@@ -871,6 +948,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     //                        int order)
     // T __atomic_exchange_N(T *mem, T val, int order)
     case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__opencl_atomic_exchange:
     case AtomicExpr::AO__atomic_exchange_n:
     case AtomicExpr::AO__atomic_exchange:
       LibCallName = "__atomic_exchange";
@@ -880,6 +958,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     // void __atomic_store(size_t size, void *mem, void *val, int order)
     // void __atomic_store_N(T *mem, T val, int order)
     case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__opencl_atomic_store:
     case AtomicExpr::AO__atomic_store:
     case AtomicExpr::AO__atomic_store_n:
       LibCallName = "__atomic_store";
@@ -891,6 +970,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     // void __atomic_load(size_t size, void *mem, void *return, int order)
     // T __atomic_load_N(T *mem, int order)
     case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__opencl_atomic_load:
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_load_n:
       LibCallName = "__atomic_load";
@@ -901,6 +981,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       PostOp = llvm::Instruction::Add;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
       LibCallName = "__atomic_fetch_add";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -912,6 +993,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       PostOp = llvm::Instruction::And;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
       LibCallName = "__atomic_fetch_and";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -923,6 +1005,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       PostOp = llvm::Instruction::Or;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
       LibCallName = "__atomic_fetch_or";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -934,6 +1017,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       PostOp = llvm::Instruction::Sub;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
       LibCallName = "__atomic_fetch_sub";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -945,11 +1029,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       PostOp = llvm::Instruction::Xor;
     // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
       LibCallName = "__atomic_fetch_xor";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                         MemTy, E->getExprLoc(), sizeChars);
       break;
+    case AtomicExpr::AO__opencl_atomic_fetch_min:
+      LibCallName = E->getValueType()->isSignedIntegerType()
+                        ? "__atomic_fetch_min"
+                        : "__atomic_fetch_umin";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        LoweredMemTy, E->getExprLoc(), sizeChars);
+      break;
+    case AtomicExpr::AO__opencl_atomic_fetch_max:
+      LibCallName = E->getValueType()->isSignedIntegerType()
+                        ? "__atomic_fetch_max"
+                        : "__atomic_fetch_umax";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        LoweredMemTy, E->getExprLoc(), sizeChars);
+      break;
     // T __atomic_nand_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_nand_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_nand_fetch:
@@ -962,6 +1061,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       break;
     }
 
+    if (E->isOpenCL()) {
+      LibCallName = std::string("__opencl") +
+          StringRef(LibCallName).drop_front(1).str();
+
+    }
     // Optimized functions have the size in their name.
     if (UseOptimizedLibcall)
       LibCallName += "_" + llvm::utostr(Size);
@@ -982,6 +1086,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     // order is always the last parameter
     Args.add(RValue::get(Order),
              getContext().IntTy);
+    if (E->isOpenCL())
+      Args.add(RValue::get(Scope), getContext().IntTy);
 
     // PostOp is only needed for the atomic_*_fetch operations, and
     // thus is only needed for and implemented in the
@@ -1018,12 +1124,20 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   }
 
   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+                 E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
                  E->getOp() == AtomicExpr::AO__atomic_store ||
                  E->getOp() == AtomicExpr::AO__atomic_store_n;
   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+                E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
+  assert(isa<llvm::ConstantInt>(Scope) &&
+      "Non-constant synchronization scope not supported");
+  auto SCID = getTargetHooks().getLLVMSyncScopeID(
+      static_cast<SyncScope>(cast<llvm::ConstantInt>(Scope)->getZExtValue()),
+      getLLVMContext());
+
   if (isa<llvm::ConstantInt>(Order)) {
     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     // We should not ever get to a case where the ordering isn't a valid C ABI
@@ -1032,30 +1146,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       switch ((llvm::AtomicOrderingCABI)ord) {
       case llvm::AtomicOrderingCABI::relaxed:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Monotonic);
+                     llvm::AtomicOrdering::Monotonic, SCID);
         break;
       case llvm::AtomicOrderingCABI::consume:
       case llvm::AtomicOrderingCABI::acquire:
         if (IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Acquire);
+                     llvm::AtomicOrdering::Acquire, SCID);
         break;
       case llvm::AtomicOrderingCABI::release:
         if (IsLoad)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Release);
+                     llvm::AtomicOrdering::Release, SCID);
         break;
       case llvm::AtomicOrderingCABI::acq_rel:
         if (IsLoad || IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::AcquireRelease);
+                     llvm::AtomicOrdering::AcquireRelease, SCID);
         break;
       case llvm::AtomicOrderingCABI::seq_cst:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::SequentiallyConsistent);
+                     llvm::AtomicOrdering::SequentiallyConsistent, SCID);
         break;
       }
     if (RValTy->isVoidType())
@@ -1091,13 +1205,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
 
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::Monotonic);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+               llvm::AtomicOrdering::Monotonic, SCID);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Acquire);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::Acquire, SCID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -1106,23 +1220,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   }
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::Release);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::Release, SCID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
-    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AtomicOrdering::AcquireRelease);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                 llvm::AtomicOrdering::AcquireRelease, SCID);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
-  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::AtomicOrdering::SequentiallyConsistent);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+               llvm::AtomicOrdering::SequentiallyConsistent, SCID);
   Builder.CreateBr(ContBB);
   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
index ec48b2cc86e0d78745f659da35980fbed1c12d70..cd7911a6046bca353cd57ca833e1b7c86e733114 100644 (file)
@@ -48,7 +48,7 @@ using namespace CodeGen;
 
 llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
   unsigned addressSpace =
-    cast<llvm::PointerType>(value->getType())->getAddressSpace();
+      cast<llvm::PointerType>(value->getType())->getAddressSpace();
 
   llvm::PointerType *destType = Int8PtrTy;
   if (addressSpace)
index 12341431ac0c21abe8eb0eb4f01c0ebe3d561151..d2fcbeadbc1a371ec1cf49e56dfaa92fae36aa90 100644 (file)
@@ -444,6 +444,11 @@ TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
   return llvm::ConstantExpr::getPointerCast(Src, DestTy);
 }
 
+llvm::SyncScope::ID
+TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const {
+  return C.getOrInsertSyncScopeID(""); /* default sync scope */
+}
+
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
 
 /// isEmptyField - Return true iff a the field is "empty", that is it
@@ -7430,6 +7435,8 @@ public:
   }
   unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
                                     const VarDecl *D) const override;
+  llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+                                         llvm::LLVMContext &C) const override;
 };
 }
 
@@ -7539,6 +7546,26 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
   return DefaultGlobalAS;
 }
 
+llvm::SyncScope::ID
+AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
+                                            llvm::LLVMContext &C) const {
+  StringRef Name;
+  switch (S) {
+  case SyncScope::OpenCLWorkGroup:
+    Name = "workgroup";
+    break;
+  case SyncScope::OpenCLDevice:
+    Name = "agent";
+    break;
+  case SyncScope::OpenCLAllSVMDevices:
+    Name = "";
+    break;
+  case SyncScope::OpenCLSubGroup:
+    Name = "subgroup";
+  }
+  return C.getOrInsertSyncScopeID(Name);
+}
+
 //===----------------------------------------------------------------------===//
 // SPARC v8 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
index 7149dec1dada9a52772a51a21b1fb32352a303e7..c26d7c710c1a10c18a2043aa74cae1f5183e3122 100644 (file)
@@ -19,6 +19,7 @@
 #include "CGValue.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/SyncScope.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -260,6 +261,10 @@ public:
   virtual llvm::Constant *
   performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr,
                        unsigned DestAddr, llvm::Type *DestTy) const;
+
+  /// Get the syncscope used in LLVM IR.
+  virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+                                                 llvm::LLVMContext &C) const;
 };
 
 } // namespace CodeGen
index 64128dfdf534f6d2e2064495f81645ee75a8a641..493144fb953a55c0782ee5702f504ca2ed5e3a8a 100644 (file)
@@ -14,6 +14,7 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Version.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -575,6 +576,18 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__ATOMIC_ACQ_REL", "4");
   Builder.defineMacro("__ATOMIC_SEQ_CST", "5");
 
+  // Define macros for the OpenCL memory scope.
+  // The values should match clang SyncScope enum.
+  assert(static_cast<unsigned>(SyncScope::OpenCLWorkGroup) == 1 &&
+         static_cast<unsigned>(SyncScope::OpenCLDevice) == 2 &&
+         static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices) == 3 &&
+         static_cast<unsigned>(SyncScope::OpenCLSubGroup) == 4);
+  Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_ITEM", "0");
+  Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_GROUP", "1");
+  Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2");
+  Builder.defineMacro("__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES", "3");
+  Builder.defineMacro("__OPENCL_MEMORY_SCOPE_SUB_GROUP", "4");
+
   // Support for #pragma redefine_extname (Sun compatibility)
   Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1");
 
index 58c8daf3a5369d943027e5bd6a6a512f6a228fd0..9657e45a2f4ab9dd6141518b985d3a2476c9e360 100644 (file)
@@ -13141,13 +13141,14 @@ void __ovld __conv barrier(cl_mem_fence_flags flags);
 
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 
-typedef enum memory_scope
-{
-  memory_scope_work_item,
-  memory_scope_work_group,
-  memory_scope_device,
-  memory_scope_all_svm_devices,
-  memory_scope_sub_group
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
 } memory_scope;
 
 void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
@@ -13952,11 +13953,11 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
 // enum values aligned with what clang uses in EmitAtomicExpr()
 typedef enum memory_order
 {
-  memory_order_relaxed,
-  memory_order_acquire,
-  memory_order_release,
-  memory_order_acq_rel,
-  memory_order_seq_cst
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
 } memory_order;
 
 // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
index 81dd36cf67163bbe2530981449ffd632c6a39381..578dbf0aadcde1bca5bb5ed391e4a8c08a3e31bb 100644 (file)
@@ -25,6 +25,7 @@
 #include "clang/AST/StmtObjC.h"
 #include "clang/Analysis/Analyses/FormatString.h"
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
@@ -2787,15 +2788,18 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
   auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering;
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     llvm_unreachable("There is no ordering argument for an init");
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load:
     return OrderingCABI != llvm::AtomicOrderingCABI::release &&
            OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
     return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
@@ -2812,7 +2816,9 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   CallExpr *TheCall = cast<CallExpr>(TheCallResult.get());
   DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
 
-  // All these operations take one of the following forms:
+  // All the non-OpenCL operations take one of the following forms.
+  // The OpenCL operations take the __c11 forms with one extra argument for
+  // synchronization scope.
   enum {
     // C    __c11_atomic_init(A *, C)
     Init,
@@ -2833,6 +2839,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
     GNUCmpXchg
   } Form = Init;
+  const unsigned NumForm = GNUCmpXchg + 1;
   const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
   const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
   // where:
@@ -2842,12 +2849,18 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   //   M is C if C is an integer, and ptrdiff_t if C is a pointer, and
   //   the int parameters are for orderings.
 
+  static_assert(sizeof(NumArgs)/sizeof(NumArgs[0]) == NumForm
+      && sizeof(NumVals)/sizeof(NumVals[0]) == NumForm,
+      "need to update code for modified forms");
   static_assert(AtomicExpr::AO__c11_atomic_init == 0 &&
                     AtomicExpr::AO__c11_atomic_fetch_xor + 1 ==
                         AtomicExpr::AO__atomic_load,
                 "need to update code for modified C11 atomics");
-  bool IsC11 = Op >= AtomicExpr::AO__c11_atomic_init &&
-               Op <= AtomicExpr::AO__c11_atomic_fetch_xor;
+  bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
+                  Op <= AtomicExpr::AO__opencl_atomic_fetch_max;
+  bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
+               Op <= AtomicExpr::AO__c11_atomic_fetch_xor) ||
+               IsOpenCL;
   bool IsN = Op == AtomicExpr::AO__atomic_load_n ||
              Op == AtomicExpr::AO__atomic_store_n ||
              Op == AtomicExpr::AO__atomic_exchange_n ||
@@ -2856,10 +2869,12 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
 
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
     Form = Init;
     break;
 
   case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
     Form = Load;
     break;
@@ -2869,6 +2884,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     break;
 
   case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
     Form = Copy;
@@ -2876,6 +2892,10 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
 
   case AtomicExpr::AO__c11_atomic_fetch_add:
   case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
   case AtomicExpr::AO__atomic_fetch_add:
   case AtomicExpr::AO__atomic_fetch_sub:
   case AtomicExpr::AO__atomic_add_fetch:
@@ -2885,6 +2905,9 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
   case AtomicExpr::AO__atomic_fetch_and:
   case AtomicExpr::AO__atomic_fetch_or:
   case AtomicExpr::AO__atomic_fetch_xor:
@@ -2897,6 +2920,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     break;
 
   case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
   case AtomicExpr::AO__atomic_exchange_n:
     Form = Xchg;
     break;
@@ -2907,6 +2931,8 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
     Form = C11CmpXchg;
     break;
 
@@ -2916,16 +2942,19 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     break;
   }
 
+  unsigned AdjustedNumArgs = NumArgs[Form];
+  if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init)
+    ++AdjustedNumArgs;
   // Check we have the right number of arguments.
-  if (TheCall->getNumArgs() < NumArgs[Form]) {
+  if (TheCall->getNumArgs() < AdjustedNumArgs) {
     Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
+      << 0 << AdjustedNumArgs << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
-  } else if (TheCall->getNumArgs() > NumArgs[Form]) {
-    Diag(TheCall->getArg(NumArgs[Form])->getLocStart(),
+  } else if (TheCall->getNumArgs() > AdjustedNumArgs) {
+    Diag(TheCall->getArg(AdjustedNumArgs)->getLocStart(),
          diag::err_typecheck_call_too_many_args)
-      << 0 << NumArgs[Form] << TheCall->getNumArgs()
+      << 0 << AdjustedNumArgs << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
   }
@@ -2953,9 +2982,11 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
         << Ptr->getType() << Ptr->getSourceRange();
       return ExprError();
     }
-    if (AtomTy.isConstQualified()) {
+    if (AtomTy.isConstQualified() ||
+        AtomTy.getAddressSpace() == LangAS::opencl_constant) {
       Diag(DRE->getLocStart(), diag::err_atomic_op_needs_non_const_atomic)
-        << Ptr->getType() << Ptr->getSourceRange();
+          << (AtomTy.isConstQualified() ? 0 : 1) << Ptr->getType()
+          << Ptr->getSourceRange();
       return ExprError();
     }
     ValType = AtomTy->getAs<AtomicType>()->getValueType();
@@ -3024,7 +3055,8 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   ValType.removeLocalVolatile();
   ValType.removeLocalConst();
   QualType ResultType = ValType;
-  if (Form == Copy || Form == LoadCopy || Form == GNUXchg || Form == Init)
+  if (Form == Copy || Form == LoadCopy || Form == GNUXchg ||
+      Form == Init)
     ResultType = Context.VoidTy;
   else if (Form == C11CmpXchg || Form == GNUCmpXchg)
     ResultType = Context.BoolTy;
@@ -3038,7 +3070,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
-  for (unsigned i = 1; i != NumArgs[Form]; ++i) {
+  for (unsigned i = 1; i != TheCall->getNumArgs(); ++i) {
     QualType Ty;
     if (i < NumVals[Form] + 1) {
       switch (i) {
@@ -3080,7 +3112,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
         break;
       }
     } else {
-      // The order(s) are always converted to int.
+      // The order(s) and scope are always converted to int.
       Ty = Context.IntTy;
     }
 
@@ -3093,6 +3125,27 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     TheCall->setArg(i, Arg.get());
   }
 
+  Expr *Scope;
+  if (Form != Init) {
+    if (IsOpenCL) {
+      Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
+      llvm::APSInt Result(32);
+      if (!Scope->isIntegerConstantExpr(Result, Context))
+        Diag(Scope->getLocStart(),
+             diag::err_atomic_op_has_non_constant_synch_scope)
+            << Scope->getSourceRange();
+      else if (!isValidSyncScopeValue(Result.getZExtValue()))
+        Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope)
+            << Scope->getSourceRange();
+    } else {
+      Scope = IntegerLiteral::Create(
+          Context,
+          llvm::APInt(Context.getTypeSize(Context.IntTy),
+                      static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices)),
+          Context.IntTy, SourceLocation());
+    }
+  }
+
   // Permute the arguments into a 'consistent' order.
   SmallVector<Expr*, 5> SubExprs;
   SubExprs.push_back(Ptr);
@@ -3103,28 +3156,33 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     break;
   case Load:
     SubExprs.push_back(TheCall->getArg(1)); // Order
+    SubExprs.push_back(Scope);              // Scope
     break;
   case LoadCopy:
   case Copy:
   case Arithmetic:
   case Xchg:
     SubExprs.push_back(TheCall->getArg(2)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     break;
   case GNUXchg:
     // Note, AtomicExpr::getVal2() has a special case for this atomic.
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case C11CmpXchg:
     SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(4)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case GNUCmpXchg:
     SubExprs.push_back(TheCall->getArg(4)); // Order
+    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(5)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
@@ -3146,10 +3204,14 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
                                             TheCall->getRParenLoc());
   
   if ((Op == AtomicExpr::AO__c11_atomic_load ||
-       (Op == AtomicExpr::AO__c11_atomic_store)) &&
+       Op == AtomicExpr::AO__c11_atomic_store ||
+       Op == AtomicExpr::AO__opencl_atomic_load ||
+       Op == AtomicExpr::AO__opencl_atomic_store ) &&
       Context.AtomicUsesUnsupportedLibcall(AE))
-    Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib) <<
-    ((Op == AtomicExpr::AO__c11_atomic_load) ? 0 : 1);
+    Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib)
+        << ((Op == AtomicExpr::AO__c11_atomic_load ||
+            Op == AtomicExpr::AO__opencl_atomic_load)
+                ? 0 : 1);
 
   return AE;
 }
diff --git a/test/CodeGenOpenCL/atomic-ops-libcall.cl b/test/CodeGenOpenCL/atomic-ops-libcall.cl
new file mode 100644 (file)
index 0000000..1230852
--- /dev/null
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s
+
+void f(atomic_int *i, atomic_uint *ui, int cmp) {
+  int x;
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+  x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device);
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices);
+#ifdef cl_khr_subgroups
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4)
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
+#endif
+}
diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl
new file mode 100644 (file)
index 0000000..9558dcd
--- /dev/null
@@ -0,0 +1,176 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -finclude-default-header -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the test.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
+
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+atomic_int j;
+
+void fi1(atomic_int *i) {
+  // CHECK-LABEL: @fi1
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
+}
+
+void fi2(atomic_int *i) {
+  // CHECK-LABEL: @fi2
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+void fi3(atomic_int *i, atomic_uint *ui) {
+  // CHECK-LABEL: @fi3
+  // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+  // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+bool fi4(atomic_int *i) {
+  // CHECK-LABEL: @fi4(
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+  // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+  // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+  // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+  // CHECK: store i32 [[OLD]]
+  int cmp = 0;
+  return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
+}
+
+float ff1(global atomic_float *d) {
+  // CHECK-LABEL: @ff1
+  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+  return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
+}
+
+void ff2(atomic_float *d) {
+  // CHECK-LABEL: @ff2
+  // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+  __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
+}
+
+float ff3(atomic_float *d) {
+  // CHECK-LABEL: @ff3
+  // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst
+  return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @atomic_init_foo
+void atomic_init_foo()
+{
+  // CHECK-NOT: atomic
+  // CHECK: store
+  __opencl_atomic_init(&j, 42);
+
+  // CHECK-NOT: atomic
+  // CHECK: }
+}
+
+// CHECK-LABEL: @failureOrder
+void failureOrder(atomic_int *ptr, int *ptr2) {
+  // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+
+  // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
+  __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @generalFailureOrder
+void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
+  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
+  // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+  // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+  // CHECK: [[MONOTONIC]]
+  // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQUIRE]]
+  // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[RELEASE]]
+  // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: ]
+
+  // CHECK: [[ACQREL]]
+  // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[SEQCST]]
+  // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+  // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+  // CHECK-NEXT: ]
+
+  // CHECK: [[MONOTONIC_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} monotonic monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acquire monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQUIRE_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acquire acquire
+  // CHECK: br
+
+  // CHECK: [[ACQREL_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} acq_rel monotonic
+  // CHECK: br
+
+  // CHECK: [[ACQREL_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} acq_rel acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_MONOTONIC]]
+  // CHECK: cmpxchg {{.*}} seq_cst monotonic
+  // CHECK: br
+
+  // CHECK: [[SEQCST_ACQUIRE]]
+  // CHECK: cmpxchg {{.*}} seq_cst acquire
+  // CHECK: br
+
+  // CHECK: [[SEQCST_SEQCST]]
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: br
+}
+
+int test_volatile(volatile atomic_int *i) {
+  // CHECK-LABEL: @test_volatile
+  // CHECK:      %[[i_addr:.*]] = alloca i32
+  // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+  // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
+  // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst
+  // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
+  // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
+  // CHECK-NEXT: ret i32 %[[retval]]
+  return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+}
+
+#endif
index dd213308d6130fd8d7b95748def752144bb8b11c..87969ca6cee6b0348040ad0e1f8843ca528a103c 100644 (file)
 // WEBASSEMBLY32-NOT:#define __LP64__
 // WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1
 // WEBASSEMBLY32-NEXT:#define __OBJC_BOOL_IS_BOOL 0
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
 // WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
 // WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
 // WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
 // WEBASSEMBLY64-NEXT:#define __LP64__ 1
 // WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1
 // WEBASSEMBLY64-NEXT:#define __OBJC_BOOL_IS_BOOL 0
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
 // WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
 // WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
 // WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
index 662a712d64772aa1730cab7c58412e9d5e98b18b..d46be60acdd9c807100d9a8a896e7e4014ed8dfd 100644 (file)
 // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1
 // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1
 
+// RUN: %clang_cc1 %s -E -dM -o - -x cl \
+// RUN:   | FileCheck %s --check-prefix=MSCOPE
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
+
 // RUN: %clang_cc1 -triple aarch64-windows %s -E -dM -o - -x cl \
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM64-WIN
 
diff --git a/test/SemaOpenCL/atomic-ops.cl b/test/SemaOpenCL/atomic-ops.cl
new file mode 100644 (file)
index 0000000..5c89c60
--- /dev/null
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=spir64
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl
+
+// Basic parsing/Sema tests for __opencl_atomic_*
+
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+
+struct S { char c[3]; };
+
+char i8;
+short i16;
+int i32;
+int8 i64;
+
+atomic_int gn;
+
+void f(atomic_int *i, const atomic_int *ci,
+       atomic_intptr_t *p, atomic_float *d,
+       int *I, const int *CI,
+       intptr_t *P, float *D, struct S *s1, struct S *s2,
+       global atomic_int *i_g, local atomic_int *i_l, private atomic_int *i_p,
+       constant atomic_int *i_c) {
+  __opencl_atomic_init(I, 5); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}}
+  __opencl_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}}
+  __opencl_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
+  __opencl_atomic_store(0,0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}}
+  __opencl_atomic_store((int *)0, 0, 0, 0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}}
+  __opencl_atomic_store(i, 0, memory_order_relaxed, memory_scope_work_group);
+  __opencl_atomic_store(ci, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+  __opencl_atomic_store(i_g, 0, memory_order_relaxed, memory_scope_work_group);
+  __opencl_atomic_store(i_l, 0, memory_order_relaxed, memory_scope_work_group);
+  __opencl_atomic_store(i_p, 0, memory_order_relaxed, memory_scope_work_group);
+  __opencl_atomic_store(i_c, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-constant _Atomic type ('__constant atomic_int *' (aka '__constant _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_load(p, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_load(d, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_load(ci, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_store(p, 1, memory_order_seq_cst, memory_scope_work_group);
+  (int)__opencl_atomic_store(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{operand of type 'void' where arithmetic or pointer type is required}}
+
+  int exchange_1 = __opencl_atomic_exchange(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  int exchange_2 = __opencl_atomic_exchange(I, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}}
+
+  __opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+  __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to bitwise atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+
+  __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+  __opencl_atomic_fetch_min(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+  __opencl_atomic_fetch_max(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+
+  bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  bool cmpexch_3 = __opencl_atomic_compare_exchange_strong(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}}
+  (void)__opencl_atomic_compare_exchange_strong(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+  bool cmpexchw_1 = __opencl_atomic_compare_exchange_weak(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  bool cmpexchw_2 = __opencl_atomic_compare_exchange_weak(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+  bool cmpexchw_3 = __opencl_atomic_compare_exchange_weak(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}}
+  (void)__opencl_atomic_compare_exchange_weak(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+  // Pointers to different address spaces are allowed.
+  bool cmpexch_10 = __opencl_atomic_compare_exchange_strong((global atomic_int *)0x308, (constant int *)0x309, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+
+  __opencl_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+  __opencl_atomic_store(ci, 0, memory_order_release, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+  __opencl_atomic_load(ci, memory_order_acquire, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+  __opencl_atomic_init(&gn, 456);
+  __opencl_atomic_init(&gn, (void*)0); // expected-warning{{incompatible pointer to integer conversion passing '__generic void *' to parameter of type 'int'}}
+}
+
+void memory_checks(atomic_int *Ap, int *p, int val) {
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_group); // expected-error {{use of undeclared identifier 'memory_order_consume'}}
+  (void)__opencl_atomic_load(Ap, memory_order_release, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_load(Ap, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_load(Ap, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_store(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_store(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_store(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_store(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+  (void)__opencl_atomic_store(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_init(Ap, val);
+
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_group);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_group);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+  (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group);
+
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group);
+}
+
+void synchscope_checks(atomic_int *Ap, int scope) {
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_item); // expected-error{{synchronization scope argument to atomic operation is invalid}}
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group);
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device);
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices);
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group);
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}}
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10);    //expected-error{{synchronization scope argument to atomic operation is invalid}}
+}
+
+void nullPointerWarning(atomic_int *Ap, int *p, int val) {
+  // The 'expected' pointer shouldn't be NULL.
+  (void)__opencl_atomic_compare_exchange_strong(Ap, NULL, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); // expected-warning {{null passed to a callee that requires a non-null argument}}
+}