]> granicus.if.org Git - clang/commitdiff
[OpenCL] Support variable memory scope in atomic builtins
authorYaxun Liu <Yaxun.Liu@amd.com>
Tue, 15 Aug 2017 16:02:49 +0000 (16:02 +0000)
committerYaxun Liu <Yaxun.Liu@amd.com>
Tue, 15 Aug 2017 16:02:49 +0000 (16:02 +0000)
Differential Revision: https://reviews.llvm.org/D36580

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@310924 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/AST/Expr.h
include/clang/Basic/DiagnosticSemaKinds.td
include/clang/Basic/SyncScope.h
lib/AST/Expr.cpp
lib/CodeGen/CGAtomic.cpp
lib/Frontend/InitPreprocessor.cpp
lib/Sema/SemaChecking.cpp
test/CodeGenOpenCL/atomic-ops-libcall.cl
test/CodeGenOpenCL/atomic-ops.cl
test/SemaOpenCL/atomic-ops.cl

index a2da38d6cc8ffe0e484b16ccebf928ae8f71e6a7..7e5a6b3cc861b1db1f33804e55df0c7881f99257 100644 (file)
@@ -24,6 +24,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TypeTraits.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
@@ -5067,8 +5068,8 @@ public:
 /// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>,
 /// and corresponding __opencl_atomic_* for OpenCL 2.0.
 /// All of these instructions take one primary pointer, at least one memory
-/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr
-/// always take the default synchronization scope.
+/// order. The instructions for which getScopeModel returns non-null value
+/// take one synch scope.
 class AtomicExpr : public Expr {
 public:
   enum AtomicOp {
@@ -5080,14 +5081,16 @@ public:
   };
 
 private:
-  enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
-  Stmt* SubExprs[END_EXPR];
+  /// \brief Location of sub-expressions.
+  /// The location of Scope sub-expression is NumSubExprs - 1, which is
+  /// not fixed, therefore is not defined in enum.
+  enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
+  Stmt *SubExprs[END_EXPR + 1];
   unsigned NumSubExprs;
   SourceLocation BuiltinLoc, RParenLoc;
   AtomicOp Op;
 
   friend class ASTStmtReader;
-
 public:
   AtomicExpr(SourceLocation BLoc, ArrayRef<Expr*> args, QualType t,
              AtomicOp op, SourceLocation RP);
@@ -5106,7 +5109,8 @@ public:
     return cast<Expr>(SubExprs[ORDER]);
   }
   Expr *getScope() const {
-    return cast<Expr>(SubExprs[SCOPE]);
+    assert(getScopeModel() && "No scope");
+    return cast<Expr>(SubExprs[NumSubExprs - 1]);
   }
   Expr *getVal1() const {
     if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init)
@@ -5173,6 +5177,24 @@ public:
   const_child_range children() const {
     return const_child_range(SubExprs, SubExprs + NumSubExprs);
   }
+
+  /// \brief Get atomic scope model for the atomic op code.
+  /// \return empty atomic scope model if the atomic op code does not have
+  ///   scope operand.
+  static std::unique_ptr<AtomicScopeModel> getScopeModel(AtomicOp Op) {
+    auto Kind =
+        (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
+            ? AtomicScopeModelKind::OpenCL
+            : AtomicScopeModelKind::None;
+    return AtomicScopeModel::create(Kind);
+  }
+
+  /// \brief Get atomic scope model.
+  /// \return empty atomic scope model if this atomic expression does not have
+  ///   scope operand.
+  std::unique_ptr<AtomicScopeModel> getScopeModel() const {
+    return getScopeModel(getOp());
+  }
 };
 
 /// TypoExpr - Internal placeholder for expressions where typo correction
index 27ffef02fed43b08ce4d0473cf4fed0c52f5379c..856bd6a337b2bbaed3a96e98fc82bb7eed243158 100644 (file)
@@ -7012,8 +7012,6 @@ def warn_atomic_op_has_invalid_memory_order : Warning<
   InGroup<DiagGroup<"atomic-memory-ordering">>;
 def err_atomic_op_has_invalid_synch_scope : Error<
   "synchronization scope argument to atomic operation is invalid">;
-def err_atomic_op_has_non_constant_synch_scope : Error<
-  "non-constant synchronization scope argument to atomic operation is not supported">;
 
 def err_overflow_builtin_must_be_int : Error<
   "operand argument to overflow builtin must be an integer (%0 invalid)">;
index 28d6fa61ec8a21eeb596654e68e8c30e6794f374..09ac0052185af90d6156968c4b64cc9e95199151 100644 (file)
 #ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H
 #define LLVM_CLANG_BASIC_SYNCSCOPE_H
 
+#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+
 namespace clang {
 
-/// \brief Defines the synch scope values used by the atomic builtins and
-/// expressions.
+/// \brief Defines synch scope values used internally by clang.
+///
+/// The enum values start from 0 and are contiguous. They are mainly used for
+/// enumerating all supported synch scope values and mapping them to LLVM
+/// synch scopes. Their numerical values may be different from the corresponding
+/// synch scope enums used in source languages.
+///
+/// In atomic builtin and expressions, language-specific synch scope enums are
+/// used. Currently only OpenCL memory scope enums are supported and assumed
+/// to be used by all languages. However, in the future, other languages may
+/// define their own set of synch scope enums. The language-specific synch scope
+/// values are represented by class AtomicScopeModel and its derived classes.
+///
+/// To add a new enum value:
+///   Add the enum value to enum class SyncScope.
+///   Update enum value Last if necessary.
+///   Update getAsString.
 ///
-/// The enum values should match the pre-defined macros
-/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_*
-/// enums in opencl-c.h.
 enum class SyncScope {
-  OpenCLWorkGroup = 1,
-  OpenCLDevice = 2,
-  OpenCLAllSVMDevices = 3,
-  OpenCLSubGroup = 4,
+  OpenCLWorkGroup,
+  OpenCLDevice,
+  OpenCLAllSVMDevices,
+  OpenCLSubGroup,
+  Last = OpenCLSubGroup
+};
+
+inline llvm::StringRef getAsString(SyncScope S) {
+  switch (S) {
+  case SyncScope::OpenCLWorkGroup:
+    return "opencl_workgroup";
+  case SyncScope::OpenCLDevice:
+    return "opencl_device";
+  case SyncScope::OpenCLAllSVMDevices:
+    return "opencl_allsvmdevices";
+  case SyncScope::OpenCLSubGroup:
+    return "opencl_subgroup";
+  }
+  llvm_unreachable("Invalid synch scope");
+}
+
+/// \brief Defines the kind of atomic scope models.
+enum class AtomicScopeModelKind { None, OpenCL };
+
+/// \brief Defines the interface for synch scope model.
+class AtomicScopeModel {
+public:
+  virtual ~AtomicScopeModel() {}
+  /// \brief Maps language specific synch scope values to internal
+  /// SyncScope enum.
+  virtual SyncScope map(unsigned S) const = 0;
+
+  /// \brief Check if the compile-time constant synch scope value
+  /// is valid.
+  virtual bool isValid(unsigned S) const = 0;
+
+  /// \brief Get all possible synch scope values that might be
+  /// encountered at runtime for the current language.
+  virtual ArrayRef<unsigned> getRuntimeValues() const = 0;
+
+  /// \brief If atomic builtin function is called with invalid
+  /// synch scope value at runtime, it will fall back to a valid
+  /// synch scope value returned by this function.
+  virtual unsigned getFallBackValue() const = 0;
+
+  /// \brief Create an atomic scope model by AtomicScopeModelKind.
+  /// \return an empty std::unique_ptr for AtomicScopeModelKind::None.
+  static std::unique_ptr<AtomicScopeModel> create(AtomicScopeModelKind K);
+};
+
+/// \brief Defines the synch scope model for OpenCL.
+class AtomicScopeOpenCLModel : public AtomicScopeModel {
+public:
+  /// The enum values match the pre-defined macros
+  /// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_*
+  /// enums in opencl-c.h.
+  enum ID {
+    WorkGroup = 1,
+    Device = 2,
+    AllSVMDevices = 3,
+    SubGroup = 4,
+    Last = SubGroup
+  };
+
+  AtomicScopeOpenCLModel() {}
+
+  SyncScope map(unsigned S) const override {
+    switch (static_cast<ID>(S)) {
+    case WorkGroup:
+      return SyncScope::OpenCLWorkGroup;
+    case Device:
+      return SyncScope::OpenCLDevice;
+    case AllSVMDevices:
+      return SyncScope::OpenCLAllSVMDevices;
+    case SubGroup:
+      return SyncScope::OpenCLSubGroup;
+    }
+    llvm_unreachable("Invalid language synch scope value");
+  }
+
+  bool isValid(unsigned S) const override {
+    return S >= static_cast<unsigned>(WorkGroup) &&
+           S <= static_cast<unsigned>(Last);
+  }
+
+  ArrayRef<unsigned> getRuntimeValues() const override {
+    static_assert(Last == SubGroup, "Does not include all synch scopes");
+    static const unsigned Scopes[] = {
+        static_cast<unsigned>(WorkGroup), static_cast<unsigned>(Device),
+        static_cast<unsigned>(AllSVMDevices), static_cast<unsigned>(SubGroup)};
+    return llvm::makeArrayRef(Scopes);
+  }
+
+  unsigned getFallBackValue() const override {
+    return static_cast<unsigned>(AllSVMDevices);
+  }
 };
 
-inline bool isValidSyncScopeValue(unsigned Scope) {
-  return Scope >= static_cast<unsigned>(SyncScope::OpenCLWorkGroup) &&
-         Scope <= static_cast<unsigned>(SyncScope::OpenCLSubGroup);
+inline std::unique_ptr<AtomicScopeModel>
+AtomicScopeModel::create(AtomicScopeModelKind K) {
+  switch (K) {
+  case AtomicScopeModelKind::None:
+    return std::unique_ptr<AtomicScopeModel>{};
+  case AtomicScopeModelKind::OpenCL:
+    return llvm::make_unique<AtomicScopeOpenCLModel>();
+  }
+  llvm_unreachable("Invalid atomic scope model kind");
 }
 }
 
index 8cb9f76d9654bce232d8aa6a0ab701601a5dd043..7dc141835135b19a4392a97f050121b1b1e6278f 100644 (file)
@@ -3939,16 +3939,13 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   switch (Op) {
   case AO__c11_atomic_init:
   case AO__opencl_atomic_init:
-    return 2;
   case AO__c11_atomic_load:
-  case AO__opencl_atomic_load:
   case AO__atomic_load_n:
-    return 3;
+    return 2;
 
+  case AO__opencl_atomic_load:
   case AO__c11_atomic_store:
   case AO__c11_atomic_exchange:
-  case AO__opencl_atomic_store:
-  case AO__opencl_atomic_exchange:
   case AO__atomic_load:
   case AO__atomic_store:
   case AO__atomic_store_n:
@@ -3958,13 +3955,6 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__c11_atomic_fetch_and:
   case AO__c11_atomic_fetch_or:
   case AO__c11_atomic_fetch_xor:
-  case AO__opencl_atomic_fetch_add:
-  case AO__opencl_atomic_fetch_sub:
-  case AO__opencl_atomic_fetch_and:
-  case AO__opencl_atomic_fetch_or:
-  case AO__opencl_atomic_fetch_xor:
-  case AO__opencl_atomic_fetch_min:
-  case AO__opencl_atomic_fetch_max:
   case AO__atomic_fetch_add:
   case AO__atomic_fetch_sub:
   case AO__atomic_fetch_and:
@@ -3977,20 +3967,29 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   case AO__atomic_or_fetch:
   case AO__atomic_xor_fetch:
   case AO__atomic_nand_fetch:
-    return 4;
+    return 3;
 
+  case AO__opencl_atomic_store:
+  case AO__opencl_atomic_exchange:
+  case AO__opencl_atomic_fetch_add:
+  case AO__opencl_atomic_fetch_sub:
+  case AO__opencl_atomic_fetch_and:
+  case AO__opencl_atomic_fetch_or:
+  case AO__opencl_atomic_fetch_xor:
+  case AO__opencl_atomic_fetch_min:
+  case AO__opencl_atomic_fetch_max:
   case AO__atomic_exchange:
-    return 5;
+    return 4;
 
   case AO__c11_atomic_compare_exchange_strong:
   case AO__c11_atomic_compare_exchange_weak:
+    return 5;
+
   case AO__opencl_atomic_compare_exchange_strong:
   case AO__opencl_atomic_compare_exchange_weak:
-    return 6;
-
   case AO__atomic_compare_exchange:
   case AO__atomic_compare_exchange_n:
-    return 7;
+    return 6;
   }
   llvm_unreachable("unknown atomic op");
 }
index 5e6633fff163597a5abac795c85dbdc74560d518..d42740c06629459c5e9bc61cb4ffdf1caea40d06 100644 (file)
@@ -18,6 +18,7 @@
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
@@ -659,6 +660,61 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
   return DeclPtr;
 }
 
+static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
+                         Address Ptr, Address Val1, Address Val2,
+                         llvm::Value *IsWeak, llvm::Value *FailureOrder,
+                         uint64_t Size, llvm::AtomicOrdering Order,
+                         llvm::Value *Scope) {
+  auto ScopeModel = Expr->getScopeModel();
+
+  // LLVM atomic instructions always have synch scope. If clang atomic
+  // expression has no scope operand, use default LLVM synch scope.
+  if (!ScopeModel) {
+    EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+                 Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
+    return;
+  }
+
+  // Handle constant scope.
+  if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) {
+    auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
+        ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext());
+    EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+                 Order, SCID);
+    return;
+  }
+
+  // Handle non-constant scope.
+  auto &Builder = CGF.Builder;
+  auto Scopes = ScopeModel->getRuntimeValues();
+  llvm::DenseMap<unsigned, llvm::BasicBlock *> BB;
+  for (auto S : Scopes)
+    BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn);
+
+  llvm::BasicBlock *ContBB =
+      CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn);
+
+  auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
+  // If unsupported synch scope is encountered at run time, assume a fallback
+  // synch scope value.
+  auto FallBack = ScopeModel->getFallBackValue();
+  llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]);
+  for (auto S : Scopes) {
+    auto *B = BB[S];
+    if (S != FallBack)
+      SI->addCase(Builder.getInt32(S), B);
+
+    Builder.SetInsertPoint(B);
+    EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+                 Order,
+                 CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S),
+                                                         CGF.getLLVMContext()));
+    Builder.CreateBr(ContBB);
+  }
+
+  Builder.SetInsertPoint(ContBB);
+}
+
 static void
 AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
                   bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
@@ -711,7 +767,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   }
 
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
-  llvm::Value *Scope = EmitScalarExpr(E->getScope());
+  llvm::Value *Scope =
+      E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr;
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -1132,12 +1189,6 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
-  assert(isa<llvm::ConstantInt>(Scope) &&
-      "Non-constant synchronization scope not supported");
-  auto SCID = getTargetHooks().getLLVMSyncScopeID(
-      static_cast<SyncScope>(cast<llvm::ConstantInt>(Scope)->getZExtValue()),
-      getLLVMContext());
-
   if (isa<llvm::ConstantInt>(Order)) {
     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     // We should not ever get to a case where the ordering isn't a valid C ABI
@@ -1146,30 +1197,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       switch ((llvm::AtomicOrderingCABI)ord) {
       case llvm::AtomicOrderingCABI::relaxed:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Monotonic, SCID);
+                     llvm::AtomicOrdering::Monotonic, Scope);
         break;
       case llvm::AtomicOrderingCABI::consume:
       case llvm::AtomicOrderingCABI::acquire:
         if (IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Acquire, SCID);
+                     llvm::AtomicOrdering::Acquire, Scope);
         break;
       case llvm::AtomicOrderingCABI::release:
         if (IsLoad)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::Release, SCID);
+                     llvm::AtomicOrdering::Release, Scope);
         break;
       case llvm::AtomicOrderingCABI::acq_rel:
         if (IsLoad || IsStore)
           break; // Avoid crashing on code with undefined behavior
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::AcquireRelease, SCID);
+                     llvm::AtomicOrdering::AcquireRelease, Scope);
         break;
       case llvm::AtomicOrderingCABI::seq_cst:
         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                     llvm::AtomicOrdering::SequentiallyConsistent, SCID);
+                     llvm::AtomicOrdering::SequentiallyConsistent, Scope);
         break;
       }
     if (RValTy->isVoidType())
@@ -1206,12 +1257,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-               llvm::AtomicOrdering::Monotonic, SCID);
+               llvm::AtomicOrdering::Monotonic, Scope);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                 llvm::AtomicOrdering::Acquire, SCID);
+                 llvm::AtomicOrdering::Acquire, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
@@ -1221,7 +1272,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                 llvm::AtomicOrdering::Release, SCID);
+                 llvm::AtomicOrdering::Release, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
@@ -1229,14 +1280,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-                 llvm::AtomicOrdering::AcquireRelease, SCID);
+                 llvm::AtomicOrdering::AcquireRelease, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
-               llvm::AtomicOrdering::SequentiallyConsistent, SCID);
+               llvm::AtomicOrdering::SequentiallyConsistent, Scope);
   Builder.CreateBr(ContBB);
   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
index 286566673991247b704fd4cb62a5f79e3f1a7b8d..020cf85c01c4bba4fd1c2ccadd76071a568352f5 100644 (file)
@@ -579,10 +579,12 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
 
   // Define macros for the OpenCL memory scope.
   // The values should match clang SyncScope enum.
-  assert(static_cast<unsigned>(SyncScope::OpenCLWorkGroup) == 1 &&
-         static_cast<unsigned>(SyncScope::OpenCLDevice) == 2 &&
-         static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices) == 3 &&
-         static_cast<unsigned>(SyncScope::OpenCLSubGroup) == 4);
+  static_assert(
+      static_cast<unsigned>(AtomicScopeOpenCLModel::WorkGroup) == 1 &&
+          static_cast<unsigned>(AtomicScopeOpenCLModel::Device) == 2 &&
+          static_cast<unsigned>(AtomicScopeOpenCLModel::AllSVMDevices) == 3 &&
+          static_cast<unsigned>(AtomicScopeOpenCLModel::SubGroup) == 4,
+      "Invalid OpenCL memory scope enum definition");
   Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_ITEM", "0");
   Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_GROUP", "1");
   Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2");
index e75c15a37cd7805da76f1e1aae1183a618b23d2b..31700e78939d032f84bbb0ff6158f4d01329c8a4 100644 (file)
@@ -3145,27 +3145,6 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     TheCall->setArg(i, Arg.get());
   }
 
-  Expr *Scope;
-  if (Form != Init) {
-    if (IsOpenCL) {
-      Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
-      llvm::APSInt Result(32);
-      if (!Scope->isIntegerConstantExpr(Result, Context))
-        Diag(Scope->getLocStart(),
-             diag::err_atomic_op_has_non_constant_synch_scope)
-            << Scope->getSourceRange();
-      else if (!isValidSyncScopeValue(Result.getZExtValue()))
-        Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope)
-            << Scope->getSourceRange();
-    } else {
-      Scope = IntegerLiteral::Create(
-          Context,
-          llvm::APInt(Context.getTypeSize(Context.IntTy),
-                      static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices)),
-          Context.IntTy, SourceLocation());
-    }
-  }
-
   // Permute the arguments into a 'consistent' order.
   SmallVector<Expr*, 5> SubExprs;
   SubExprs.push_back(Ptr);
@@ -3176,33 +3155,28 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     break;
   case Load:
     SubExprs.push_back(TheCall->getArg(1)); // Order
-    SubExprs.push_back(Scope);              // Scope
     break;
   case LoadCopy:
   case Copy:
   case Arithmetic:
   case Xchg:
     SubExprs.push_back(TheCall->getArg(2)); // Order
-    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     break;
   case GNUXchg:
     // Note, AtomicExpr::getVal2() has a special case for this atomic.
     SubExprs.push_back(TheCall->getArg(3)); // Order
-    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case C11CmpXchg:
     SubExprs.push_back(TheCall->getArg(3)); // Order
-    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(4)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
     break;
   case GNUCmpXchg:
     SubExprs.push_back(TheCall->getArg(4)); // Order
-    SubExprs.push_back(Scope);              // Scope
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(5)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
@@ -3219,6 +3193,17 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
           << SubExprs[1]->getSourceRange();
   }
 
+  if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) {
+    auto *Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
+    llvm::APSInt Result(32);
+    if (Scope->isIntegerConstantExpr(Result, Context) &&
+        !ScopeModel->isValid(Result.getZExtValue())) {
+      Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope)
+          << Scope->getSourceRange();
+    }
+    SubExprs.push_back(Scope);
+  }
+
   AtomicExpr *AE = new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),
                                             SubExprs, ResultType, Op,
                                             TheCall->getRParenLoc());
index 123085282201aad2fec695a6bd7064e66a411180..d96c90c0f1e86fde8ff7f02ee7c3a8d74c1d87d1 100644 (file)
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s
 // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s
 
-void f(atomic_int *i, atomic_uint *ui, int cmp) {
+void f(atomic_int *i, atomic_uint *ui, int cmp, int order, int scope) {
   int x;
   // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1)
@@ -34,4 +34,7 @@ void f(atomic_int *i, atomic_uint *ui, int cmp) {
   // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
 #endif
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope);
 }
index 9558dcde517c33ed581c5c2152b5e910c2825802..da00eab49820663cd86ef96fc34559149133915e 100644 (file)
@@ -52,6 +52,81 @@ bool fi4(atomic_int *i) {
   return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
 }
 
+void fi5(atomic_int *i, int scope) {
+  // CHECK-LABEL: @fi5
+  // CHECK: switch i32 %{{.*}}, label %opencl_allsvmdevices [
+  // CHECK-NEXT: i32 1, label %opencl_workgroup
+  // CHECK-NEXT: i32 2, label %opencl_device
+  // CHECK-NEXT: i32 4, label %opencl_subgroup
+  // CHECK-NEXT: ]
+  // CHECK: opencl_workgroup:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: br label %atomic.scope.continue
+  // CHECK: opencl_device:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: br label %atomic.scope.continue
+  // CHECK: opencl_allsvmdevices:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst, align 4
+  // CHECK: br label %atomic.scope.continue
+  // CHECK: opencl_subgroup:
+  // CHECK: %5 = load atomic i32, i32 addrspace(4)* %0 syncscope("subgroup") seq_cst, align 4
+  // CHECK: br label %atomic.scope.continue
+  // CHECK: atomic.scope.continue:
+  int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
+}
+
+void fi6(atomic_int *i, int order, int scope) {
+  // CHECK-LABEL: @fi6
+  // CHECK: switch i32 %{{.*}}, label %monotonic [
+  // CHECK-NEXT: i32 1, label %acquire
+  // CHECK-NEXT: i32 2, label %acquire
+  // CHECK-NEXT: i32 5, label %seqcst
+  // CHECK-NEXT: ]
+  // CHECK: monotonic:
+  // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: acquire:
+  // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: seqcst:
+  // CHECK: switch i32 %2, label %[[SEQ_ALL:.*]] [
+  // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
+  // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
+  // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
+  // CHECK-NEXT: ]
+  // CHECK: [[MON_WG]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic
+  // CHECK: [[MON_DEV]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic
+  // CHECK: [[MON_ALL]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic
+  // CHECK: [[MON_SUB]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic
+  // CHECK: [[ACQ_WG]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire
+  // CHECK: [[ACQ_DEV]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire
+  // CHECK: [[ACQ_ALL]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire
+  // CHECK: [[ACQ_SUB]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire
+  // CHECK: [[SEQ_WG]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: [[SEQ_DEV]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: [[SEQ_ALL]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst
+  // CHECK: [[SEQ_SUB]]:
+  // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst
+  int x = __opencl_atomic_load(i, order, scope);
+}
+
 float ff1(global atomic_float *d) {
   // CHECK-LABEL: @ff1
   // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
index 5c89c60b2057cc62785e55cd081c9cd2b2846f32..4b7daa363ff78644cbf8a07bbbfcd6a44ac1b8c8 100644 (file)
@@ -14,7 +14,6 @@ int i32;
 int8 i64;
 
 atomic_int gn;
-
 void f(atomic_int *i, const atomic_int *ci,
        atomic_intptr_t *p, atomic_float *d,
        int *I, const int *CI,
@@ -81,6 +80,13 @@ void f(atomic_int *i, const atomic_int *ci,
 }
 
 void memory_checks(atomic_int *Ap, int *p, int val) {
+  // non-integer memory order argument is casted to integer type.
+  (void)__opencl_atomic_load(Ap, 1.0f, memory_scope_work_group);
+  float forder;
+  (void)__opencl_atomic_load(Ap, forder, memory_scope_work_group);
+  struct S s;
+  (void)__opencl_atomic_load(Ap, s, memory_scope_work_group); // expected-error {{passing 'struct S' to parameter of incompatible type 'int'}}
+
   (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group);
   (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_group);
   (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_group); // expected-error {{use of undeclared identifier 'memory_order_consume'}}
@@ -151,8 +157,15 @@ void synchscope_checks(atomic_int *Ap, int scope) {
   (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device);
   (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices);
   (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group);
-  (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}}
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope);
   (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10);    //expected-error{{synchronization scope argument to atomic operation is invalid}}
+
+  // non-integer memory scope is casted to integer type.
+  float fscope;
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, 1.0f);
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, fscope);
+  struct S s;
+  (void)__opencl_atomic_load(Ap, memory_order_relaxed, s); //expected-error{{passing 'struct S' to parameter of incompatible type 'int'}}
 }
 
 void nullPointerWarning(atomic_int *Ap, int *p, int val) {