``memory_order`` enumeration.
(Note that Clang additionally provides GCC-compatible ``__atomic_*``
-builtins)
+builtins and OpenCL 2.0 ``__opencl_atomic_*`` builtins. The OpenCL 2.0
+atomic builtins are an explicit form of the corresponding OpenCL 2.0
+builtin function, and are named with a ``__opencl_`` prefix. The macros
+``__OPENCL_MEMORY_SCOPE_WORK_ITEM``, ``__OPENCL_MEMORY_SCOPE_WORK_GROUP``,
+``__OPENCL_MEMORY_SCOPE_DEVICE``, ``__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES``,
+and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
+corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
Low-level ARM exclusive memory builtins
---------------------------------------
/// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*,
/// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the
-/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>.
-/// All of these instructions take one primary pointer and at least one memory
-/// order.
+/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>,
+/// and corresponding __opencl_atomic_* for OpenCL 2.0.
+/// All of these instructions take one primary pointer, at least one memory
+/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr
+/// always take the default synchronization scope.
class AtomicExpr : public Expr {
public:
enum AtomicOp {
};
private:
- enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
+ enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
Stmt* SubExprs[END_EXPR];
unsigned NumSubExprs;
SourceLocation BuiltinLoc, RParenLoc;
Expr *getOrder() const {
return cast<Expr>(SubExprs[ORDER]);
}
+ Expr *getScope() const {
+ return cast<Expr>(SubExprs[SCOPE]);
+ }
Expr *getVal1() const {
- if (Op == AO__c11_atomic_init)
+ if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init)
return cast<Expr>(SubExprs[ORDER]);
assert(NumSubExprs > VAL1);
return cast<Expr>(SubExprs[VAL1]);
assert(NumSubExprs > WEAK);
return cast<Expr>(SubExprs[WEAK]);
}
+ QualType getValueType() const;
AtomicOp getOp() const { return Op; }
unsigned getNumSubExprs() const { return NumSubExprs; }
bool isCmpXChg() const {
return getOp() == AO__c11_atomic_compare_exchange_strong ||
getOp() == AO__c11_atomic_compare_exchange_weak ||
+ getOp() == AO__opencl_atomic_compare_exchange_strong ||
+ getOp() == AO__opencl_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
getOp() == AO__atomic_compare_exchange_n;
}
+ bool isOpenCL() const {
+ return getOp() >= AO__opencl_atomic_init &&
+ getOp() <= AO__opencl_atomic_fetch_max;
+ }
+
SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
SourceLocation getRParenLoc() const { return RParenLoc; }
BUILTIN(__atomic_always_lock_free, "izvCD*", "n")
BUILTIN(__atomic_is_lock_free, "izvCD*", "n")
+// OpenCL 2.0 atomic builtins.
+ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_strong, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_weak, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_sub, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t")
+ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")
+
#undef ATOMIC_BUILTIN
// Non-overloaded atomic builtins.
"address argument to atomic operation must be a pointer to _Atomic "
"type (%0 invalid)">;
def err_atomic_op_needs_non_const_atomic : Error<
- "address argument to atomic operation must be a pointer to non-const _Atomic "
- "type (%0 invalid)">;
+ "address argument to atomic operation must be a pointer to non-%select{const|constant}0 _Atomic "
+ "type (%1 invalid)">;
def err_atomic_op_needs_non_const_pointer : Error<
"address argument to atomic operation must be a pointer to non-const "
"type (%0 invalid)">;
def warn_atomic_op_has_invalid_memory_order : Warning<
"memory order argument to atomic operation is invalid">,
InGroup<DiagGroup<"atomic-memory-ordering">>;
+def err_atomic_op_has_invalid_synch_scope : Error<
+ "synchronization scope argument to atomic operation is invalid">;
+def err_atomic_op_has_non_constant_synch_scope : Error<
+ "non-constant synchronization scope argument to atomic operation is not supported">;
def err_overflow_builtin_must_be_int : Error<
"operand argument to overflow builtin must be an integer (%0 invalid)">;
--- /dev/null
+//===--- SyncScope.h - Atomic synchronization scopes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for the atomic synchronization scopes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H
+#define LLVM_CLANG_BASIC_SYNCSCOPE_H
+
+namespace clang {
+
+/// \brief Defines the synch scope values used by the atomic builtins and
+/// expressions.
+///
+/// The enum values should match the pre-defined macros
+/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_*
+/// enums in opencl-c.h.
+enum class SyncScope {
+ OpenCLWorkGroup = 1,
+ OpenCLDevice = 2,
+ OpenCLAllSVMDevices = 3,
+ OpenCLSubGroup = 4,
+};
+
+inline bool isValidSyncScopeValue(unsigned Scope) {
+ return Scope >= static_cast<unsigned>(SyncScope::OpenCLWorkGroup) &&
+ Scope <= static_cast<unsigned>(SyncScope::OpenCLSubGroup);
+}
+}
+
+#endif
ObjCSuperType = QualType();
// void * type
- VoidPtrTy = getPointerType(VoidTy);
+ if (LangOpts.OpenCLVersion >= 200) {
+ auto Q = VoidTy.getQualifiers();
+ Q.setAddressSpace(LangAS::opencl_generic);
+ VoidPtrTy = getPointerType(getCanonicalType(
+ getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
+ } else {
+ VoidPtrTy = getPointerType(VoidTy);
+ }
// nullptr type (C++0x 2.14.7)
InitBuiltinType(NullPtrTy, BuiltinType::NullPtr);
unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
switch (Op) {
case AO__c11_atomic_init:
+ case AO__opencl_atomic_init:
+ return 2;
case AO__c11_atomic_load:
+ case AO__opencl_atomic_load:
case AO__atomic_load_n:
- return 2;
+ return 3;
case AO__c11_atomic_store:
case AO__c11_atomic_exchange:
+ case AO__opencl_atomic_store:
+ case AO__opencl_atomic_exchange:
case AO__atomic_load:
case AO__atomic_store:
case AO__atomic_store_n:
case AO__c11_atomic_fetch_and:
case AO__c11_atomic_fetch_or:
case AO__c11_atomic_fetch_xor:
+ case AO__opencl_atomic_fetch_add:
+ case AO__opencl_atomic_fetch_sub:
+ case AO__opencl_atomic_fetch_and:
+ case AO__opencl_atomic_fetch_or:
+ case AO__opencl_atomic_fetch_xor:
+ case AO__opencl_atomic_fetch_min:
+ case AO__opencl_atomic_fetch_max:
case AO__atomic_fetch_add:
case AO__atomic_fetch_sub:
case AO__atomic_fetch_and:
case AO__atomic_or_fetch:
case AO__atomic_xor_fetch:
case AO__atomic_nand_fetch:
- return 3;
+ return 4;
case AO__atomic_exchange:
- return 4;
+ return 5;
case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
- return 5;
+ case AO__opencl_atomic_compare_exchange_strong:
+ case AO__opencl_atomic_compare_exchange_weak:
+ return 6;
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
- return 6;
+ return 7;
}
llvm_unreachable("unknown atomic op");
}
+QualType AtomicExpr::getValueType() const {
+ auto T = getPtr()->getType()->castAs<PointerType>()->getPointeeType();
+ if (auto AT = T->getAs<AtomicType>())
+ return AT->getValueType();
+ return T;
+}
+
QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) {
unsigned ArraySectionCount = 0;
while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
// AtomicExpr stores its subexpressions in a permuted order.
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
- Node->getOp() != AtomicExpr::AO__atomic_load_n) {
+ Node->getOp() != AtomicExpr::AO__atomic_load_n &&
+ Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
OS << ", ";
PrintExpr(Node->getVal1());
}
OS << ", ";
PrintExpr(Node->getWeak());
}
- if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) {
+ if (Node->getOp() != AtomicExpr::AO__c11_atomic_init &&
+ Node->getOp() != AtomicExpr::AO__opencl_atomic_init) {
OS << ", ";
PrintExpr(Node->getOrder());
}
PtrDiffType = SignedLong;
IntPtrType = SignedLong;
}
+
+ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}
void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/IR/DataLayout.h"
Address Val1, Address Val2,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
- llvm::AtomicOrdering FailureOrder) {
+ llvm::AtomicOrdering FailureOrder,
+ llvm::SyncScope::ID Scope) {
// Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder);
+ Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
+ Scope);
Pair->setVolatile(E->isVolatile());
Pair->setWeak(IsWeak);
Address Val1, Address Val2,
llvm::Value *FailureOrderVal,
uint64_t Size,
- llvm::AtomicOrdering SuccessOrder) {
+ llvm::AtomicOrdering SuccessOrder,
+ llvm::SyncScope::ID Scope) {
llvm::AtomicOrdering FailureOrder;
if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
auto FOS = FO->getSExtValue();
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
}
emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- FailureOrder);
+ FailureOrder, Scope);
return;
}
// doesn't fold to a constant for the ordering.
CGF.Builder.SetInsertPoint(MonotonicBB);
emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
- Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
+ Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
CGF.Builder.CreateBr(ContBB);
if (AcquireBB) {
CGF.Builder.SetInsertPoint(AcquireBB);
emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
- Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
+ Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope);
CGF.Builder.CreateBr(ContBB);
SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
AcquireBB);
if (SeqCstBB) {
CGF.Builder.SetInsertPoint(SeqCstBB);
emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
- llvm::AtomicOrdering::SequentiallyConsistent);
+ llvm::AtomicOrdering::SequentiallyConsistent, Scope);
CGF.Builder.CreateBr(ContBB);
SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
SeqCstBB);
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
Address Ptr, Address Val1, Address Val2,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
- uint64_t Size, llvm::AtomicOrdering Order) {
+ uint64_t Size, llvm::AtomicOrdering Order,
+ llvm::SyncScope::ID Scope) {
llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled!");
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order);
+ FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order);
+ FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order);
+ Val1, Val2, FailureOrder, Size, Order, Scope);
} else {
// Create all the relevant BB's
llvm::BasicBlock *StrongBB =
CGF.Builder.SetInsertPoint(StrongBB);
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order);
+ FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(WeakBB);
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order);
+ FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
return;
}
case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
- Load->setAtomic(Order);
+ Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
CGF.Builder.CreateStore(Load, Dest);
return;
}
case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
- Store->setAtomic(Order);
+ Store->setAtomic(Order, Scope);
Store->setVolatile(E->isVolatile());
return;
}
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
Op = llvm::AtomicRMWInst::Xchg;
PostOp = llvm::Instruction::Add;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
Op = llvm::AtomicRMWInst::Add;
break;
PostOp = llvm::Instruction::Sub;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
Op = llvm::AtomicRMWInst::Sub;
break;
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
+ : llvm::AtomicRMWInst::UMin;
+ break;
+
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
+ Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
+ : llvm::AtomicRMWInst::UMax;
+ break;
+
case AtomicExpr::AO__atomic_and_fetch:
PostOp = llvm::Instruction::And;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
Op = llvm::AtomicRMWInst::And;
break;
PostOp = llvm::Instruction::Or;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
Op = llvm::AtomicRMWInst::Or;
break;
PostOp = llvm::Instruction::Xor;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
Op = llvm::AtomicRMWInst::Xor;
break;
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
- CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order);
+ CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());
// For __atomic_*_fetch operations, perform the operation again to
Address Dest = Address::invalid();
Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
- if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
+ if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+ E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
EmitAtomicInit(E->getVal1(), lvalue);
return RValue::get(nullptr);
}
llvm::Value *Order = EmitScalarExpr(E->getOrder());
+ llvm::Value *Scope = EmitScalarExpr(E->getScope());
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled above with EmitAtomicInit!");
case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__atomic_load_n:
break;
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__atomic_compare_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
else
Val2 = EmitValToTemp(*this, E->getVal2());
OrderFail = EmitScalarExpr(E->getOrderFail());
- if (E->getNumSubExprs() == 6)
+ if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
+ E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
IsWeak = EmitScalarExpr(E->getWeak());
break;
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
if (MemTy->isPointerType()) {
// For pointer arithmetic, we're required to do a bit of math:
// adding 1 to an int* is not the same as adding 1 to a uintptr_t.
case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_store_n:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_xor:
bool UseOptimizedLibcall = false;
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled above with EmitAtomicInit!");
case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_nand:
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_xor:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__atomic_and_fetch:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_store_n:
getContext().getSizeType());
}
// Atomic address is the first or second parameter
- Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())),
+ // The OpenCL atomic library functions only accept pointer arguments to
+ // generic address space.
+ auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
+ if (!E->isOpenCL())
+ return V;
+ auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace();
+ if (AS == LangAS::opencl_generic)
+ return V;
+ auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
+ auto T = V->getType();
+ auto *DestType = T->getPointerElementType()->getPointerTo(DestAS);
+
+ return getTargetHooks().performAddrSpaceCast(
+ *this, V, AS, LangAS::opencl_generic, DestType, false);
+ };
+
+ Args.add(RValue::get(CastToGenericAddrSpace(
+ EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())),
getContext().VoidPtrTy);
std::string LibCallName;
llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
switch (E->getOp()) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled!");
// There is only one libcall for compare an exchange, because there is no
// int success, int failure)
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
LibCallName = "__atomic_compare_exchange";
RetTy = getContext().BoolTy;
HaveRetTy = true;
- Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())),
- getContext().VoidPtrTy);
+ Args.add(
+ RValue::get(CastToGenericAddrSpace(
+ EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())),
+ getContext().VoidPtrTy);
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
MemTy, E->getExprLoc(), sizeChars);
Args.add(RValue::get(Order), getContext().IntTy);
// int order)
// T __atomic_exchange_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
LibCallName = "__atomic_exchange";
// void __atomic_store(size_t size, void *mem, void *val, int order)
// void __atomic_store_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
LibCallName = "__atomic_store";
// void __atomic_load(size_t size, void *mem, void *return, int order)
// T __atomic_load_N(T *mem, int order)
case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_load_n:
LibCallName = "__atomic_load";
PostOp = llvm::Instruction::Add;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
LibCallName = "__atomic_fetch_add";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
PostOp = llvm::Instruction::And;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
LibCallName = "__atomic_fetch_and";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
PostOp = llvm::Instruction::Or;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
LibCallName = "__atomic_fetch_or";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
PostOp = llvm::Instruction::Sub;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
LibCallName = "__atomic_fetch_sub";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
PostOp = llvm::Instruction::Xor;
// Fall through.
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
LibCallName = "__atomic_fetch_xor";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), sizeChars);
break;
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ LibCallName = E->getValueType()->isSignedIntegerType()
+ ? "__atomic_fetch_min"
+ : "__atomic_fetch_umin";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+ LoweredMemTy, E->getExprLoc(), sizeChars);
+ break;
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
+ LibCallName = E->getValueType()->isSignedIntegerType()
+ ? "__atomic_fetch_max"
+ : "__atomic_fetch_umax";
+ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+ LoweredMemTy, E->getExprLoc(), sizeChars);
+ break;
// T __atomic_nand_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_nand_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_nand_fetch:
break;
}
+ if (E->isOpenCL()) {
+ LibCallName = std::string("__opencl") +
+ StringRef(LibCallName).drop_front(1).str();
+
+ }
// Optimized functions have the size in their name.
if (UseOptimizedLibcall)
LibCallName += "_" + llvm::utostr(Size);
// order is always the last parameter
Args.add(RValue::get(Order),
getContext().IntTy);
+ if (E->isOpenCL())
+ Args.add(RValue::get(Scope), getContext().IntTy);
// PostOp is only needed for the atomic_*_fetch operations, and
// thus is only needed for and implemented in the
}
bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+ E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store_n;
bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+ E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load_n;
+ assert(isa<llvm::ConstantInt>(Scope) &&
+ "Non-constant synchronization scope not supported");
+ auto SCID = getTargetHooks().getLLVMSyncScopeID(
+ static_cast<SyncScope>(cast<llvm::ConstantInt>(Scope)->getZExtValue()),
+ getLLVMContext());
+
if (isa<llvm::ConstantInt>(Order)) {
auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
// We should not ever get to a case where the ordering isn't a valid C ABI
switch ((llvm::AtomicOrderingCABI)ord) {
case llvm::AtomicOrderingCABI::relaxed:
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Monotonic);
+ llvm::AtomicOrdering::Monotonic, SCID);
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Acquire);
+ llvm::AtomicOrdering::Acquire, SCID);
break;
case llvm::AtomicOrderingCABI::release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::Release);
+ llvm::AtomicOrdering::Release, SCID);
break;
case llvm::AtomicOrderingCABI::acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::AcquireRelease);
+ llvm::AtomicOrdering::AcquireRelease, SCID);
break;
case llvm::AtomicOrderingCABI::seq_cst:
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
- llvm::AtomicOrdering::SequentiallyConsistent);
+ llvm::AtomicOrdering::SequentiallyConsistent, SCID);
break;
}
if (RValTy->isVoidType())
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
- Size, llvm::AtomicOrdering::Monotonic);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ llvm::AtomicOrdering::Monotonic, SCID);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
- Size, llvm::AtomicOrdering::Acquire);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ llvm::AtomicOrdering::Acquire, SCID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
AcquireBB);
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
- Size, llvm::AtomicOrdering::Release);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ llvm::AtomicOrdering::Release, SCID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
ReleaseBB);
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
- Size, llvm::AtomicOrdering::AcquireRelease);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ llvm::AtomicOrdering::AcquireRelease, SCID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
- Size, llvm::AtomicOrdering::SequentiallyConsistent);
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ llvm::AtomicOrdering::SequentiallyConsistent, SCID);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
SeqCstBB);
llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
unsigned addressSpace =
- cast<llvm::PointerType>(value->getType())->getAddressSpace();
+ cast<llvm::PointerType>(value->getType())->getAddressSpace();
llvm::PointerType *destType = Int8PtrTy;
if (addressSpace)
return llvm::ConstantExpr::getPointerCast(Src, DestTy);
}
+llvm::SyncScope::ID
+TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const {
+ return C.getOrInsertSyncScopeID(""); /* default sync scope */
+}
+
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
/// isEmptyField - Return true iff a the field is "empty", that is it
}
unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
+ llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const override;
};
}
return DefaultGlobalAS;
}
+llvm::SyncScope::ID
+AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const {
+ StringRef Name;
+ switch (S) {
+ case SyncScope::OpenCLWorkGroup:
+ Name = "workgroup";
+ break;
+ case SyncScope::OpenCLDevice:
+ Name = "agent";
+ break;
+ case SyncScope::OpenCLAllSVMDevices:
+ Name = "";
+ break;
+ case SyncScope::OpenCLSubGroup:
+ Name = "subgroup";
+ }
+ return C.getOrInsertSyncScopeID(Name);
+}
+
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
#include "CGValue.h"
#include "clang/AST/Type.h"
#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SyncScope.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
virtual llvm::Constant *
performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr,
unsigned DestAddr, llvm::Type *DestTy) const;
+
+ /// Get the syncscope used in LLVM IR.
+ virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const;
};
} // namespace CodeGen
#include "clang/Basic/FileManager.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
#include "clang/Frontend/FrontendDiagnostic.h"
Builder.defineMacro("__ATOMIC_ACQ_REL", "4");
Builder.defineMacro("__ATOMIC_SEQ_CST", "5");
+ // Define macros for the OpenCL memory scope.
+ // The values should match clang SyncScope enum.
+ assert(static_cast<unsigned>(SyncScope::OpenCLWorkGroup) == 1 &&
+ static_cast<unsigned>(SyncScope::OpenCLDevice) == 2 &&
+ static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices) == 3 &&
+ static_cast<unsigned>(SyncScope::OpenCLSubGroup) == 4);
+ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_ITEM", "0");
+ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_GROUP", "1");
+ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2");
+ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES", "3");
+ Builder.defineMacro("__OPENCL_MEMORY_SCOPE_SUB_GROUP", "4");
+
// Support for #pragma redefine_extname (Sun compatibility)
Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1");
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
-typedef enum memory_scope
-{
- memory_scope_work_item,
- memory_scope_work_group,
- memory_scope_device,
- memory_scope_all_svm_devices,
- memory_scope_sub_group
+typedef enum memory_scope {
+ memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+ memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+ memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+ memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+ memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
} memory_scope;
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
- memory_order_relaxed,
- memory_order_acquire,
- memory_order_release,
- memory_order_acq_rel,
- memory_order_seq_cst
+ memory_order_relaxed = __ATOMIC_RELAXED,
+ memory_order_acquire = __ATOMIC_ACQUIRE,
+ memory_order_release = __ATOMIC_RELEASE,
+ memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
#include "clang/AST/StmtObjC.h"
#include "clang/Analysis/Analyses/FormatString.h"
#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering;
switch (Op) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("There is no ordering argument for an init");
case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load:
return OrderingCABI != llvm::AtomicOrderingCABI::release &&
OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
CallExpr *TheCall = cast<CallExpr>(TheCallResult.get());
DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
- // All these operations take one of the following forms:
+ // All the non-OpenCL operations take one of the following forms.
+ // The OpenCL operations take the __c11 forms with one extra argument for
+ // synchronization scope.
enum {
// C __c11_atomic_init(A *, C)
Init,
// bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
GNUCmpXchg
} Form = Init;
+ const unsigned NumForm = GNUCmpXchg + 1;
const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
// where:
// M is C if C is an integer, and ptrdiff_t if C is a pointer, and
// the int parameters are for orderings.
+ static_assert(sizeof(NumArgs)/sizeof(NumArgs[0]) == NumForm
+ && sizeof(NumVals)/sizeof(NumVals[0]) == NumForm,
+ "need to update code for modified forms");
static_assert(AtomicExpr::AO__c11_atomic_init == 0 &&
AtomicExpr::AO__c11_atomic_fetch_xor + 1 ==
AtomicExpr::AO__atomic_load,
"need to update code for modified C11 atomics");
- bool IsC11 = Op >= AtomicExpr::AO__c11_atomic_init &&
- Op <= AtomicExpr::AO__c11_atomic_fetch_xor;
+ bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
+ Op <= AtomicExpr::AO__opencl_atomic_fetch_max;
+ bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
+ Op <= AtomicExpr::AO__c11_atomic_fetch_xor) ||
+ IsOpenCL;
bool IsN = Op == AtomicExpr::AO__atomic_load_n ||
Op == AtomicExpr::AO__atomic_store_n ||
Op == AtomicExpr::AO__atomic_exchange_n ||
switch (Op) {
case AtomicExpr::AO__c11_atomic_init:
+ case AtomicExpr::AO__opencl_atomic_init:
Form = Init;
break;
case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__atomic_load_n:
Form = Load;
break;
break;
case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
Form = Copy;
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_xor:
break;
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
Form = Xchg;
break;
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
Form = C11CmpXchg;
break;
break;
}
+ unsigned AdjustedNumArgs = NumArgs[Form];
+ if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init)
+ ++AdjustedNumArgs;
// Check we have the right number of arguments.
- if (TheCall->getNumArgs() < NumArgs[Form]) {
+ if (TheCall->getNumArgs() < AdjustedNumArgs) {
Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
- << 0 << NumArgs[Form] << TheCall->getNumArgs()
+ << 0 << AdjustedNumArgs << TheCall->getNumArgs()
<< TheCall->getCallee()->getSourceRange();
return ExprError();
- } else if (TheCall->getNumArgs() > NumArgs[Form]) {
- Diag(TheCall->getArg(NumArgs[Form])->getLocStart(),
+ } else if (TheCall->getNumArgs() > AdjustedNumArgs) {
+ Diag(TheCall->getArg(AdjustedNumArgs)->getLocStart(),
diag::err_typecheck_call_too_many_args)
- << 0 << NumArgs[Form] << TheCall->getNumArgs()
+ << 0 << AdjustedNumArgs << TheCall->getNumArgs()
<< TheCall->getCallee()->getSourceRange();
return ExprError();
}
<< Ptr->getType() << Ptr->getSourceRange();
return ExprError();
}
- if (AtomTy.isConstQualified()) {
+ if (AtomTy.isConstQualified() ||
+ AtomTy.getAddressSpace() == LangAS::opencl_constant) {
Diag(DRE->getLocStart(), diag::err_atomic_op_needs_non_const_atomic)
- << Ptr->getType() << Ptr->getSourceRange();
+ << (AtomTy.isConstQualified() ? 0 : 1) << Ptr->getType()
+ << Ptr->getSourceRange();
return ExprError();
}
ValType = AtomTy->getAs<AtomicType>()->getValueType();
ValType.removeLocalVolatile();
ValType.removeLocalConst();
QualType ResultType = ValType;
- if (Form == Copy || Form == LoadCopy || Form == GNUXchg || Form == Init)
+ if (Form == Copy || Form == LoadCopy || Form == GNUXchg ||
+ Form == Init)
ResultType = Context.VoidTy;
else if (Form == C11CmpXchg || Form == GNUCmpXchg)
ResultType = Context.BoolTy;
// The first argument --- the pointer --- has a fixed type; we
// deduce the types of the rest of the arguments accordingly. Walk
// the remaining arguments, converting them to the deduced value type.
- for (unsigned i = 1; i != NumArgs[Form]; ++i) {
+ for (unsigned i = 1; i != TheCall->getNumArgs(); ++i) {
QualType Ty;
if (i < NumVals[Form] + 1) {
switch (i) {
break;
}
} else {
- // The order(s) are always converted to int.
+ // The order(s) and scope are always converted to int.
Ty = Context.IntTy;
}
TheCall->setArg(i, Arg.get());
}
+ Expr *Scope;
+ if (Form != Init) {
+ if (IsOpenCL) {
+ Scope = TheCall->getArg(TheCall->getNumArgs() - 1);
+ llvm::APSInt Result(32);
+ if (!Scope->isIntegerConstantExpr(Result, Context))
+ Diag(Scope->getLocStart(),
+ diag::err_atomic_op_has_non_constant_synch_scope)
+ << Scope->getSourceRange();
+ else if (!isValidSyncScopeValue(Result.getZExtValue()))
+ Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope)
+ << Scope->getSourceRange();
+ } else {
+ Scope = IntegerLiteral::Create(
+ Context,
+ llvm::APInt(Context.getTypeSize(Context.IntTy),
+ static_cast<unsigned>(SyncScope::OpenCLAllSVMDevices)),
+ Context.IntTy, SourceLocation());
+ }
+ }
+
// Permute the arguments into a 'consistent' order.
SmallVector<Expr*, 5> SubExprs;
SubExprs.push_back(Ptr);
break;
case Load:
SubExprs.push_back(TheCall->getArg(1)); // Order
+ SubExprs.push_back(Scope); // Scope
break;
case LoadCopy:
case Copy:
case Arithmetic:
case Xchg:
SubExprs.push_back(TheCall->getArg(2)); // Order
+ SubExprs.push_back(Scope); // Scope
SubExprs.push_back(TheCall->getArg(1)); // Val1
break;
case GNUXchg:
// Note, AtomicExpr::getVal2() has a special case for this atomic.
SubExprs.push_back(TheCall->getArg(3)); // Order
+ SubExprs.push_back(Scope); // Scope
SubExprs.push_back(TheCall->getArg(1)); // Val1
SubExprs.push_back(TheCall->getArg(2)); // Val2
break;
case C11CmpXchg:
SubExprs.push_back(TheCall->getArg(3)); // Order
+ SubExprs.push_back(Scope); // Scope
SubExprs.push_back(TheCall->getArg(1)); // Val1
SubExprs.push_back(TheCall->getArg(4)); // OrderFail
SubExprs.push_back(TheCall->getArg(2)); // Val2
break;
case GNUCmpXchg:
SubExprs.push_back(TheCall->getArg(4)); // Order
+ SubExprs.push_back(Scope); // Scope
SubExprs.push_back(TheCall->getArg(1)); // Val1
SubExprs.push_back(TheCall->getArg(5)); // OrderFail
SubExprs.push_back(TheCall->getArg(2)); // Val2
TheCall->getRParenLoc());
if ((Op == AtomicExpr::AO__c11_atomic_load ||
- (Op == AtomicExpr::AO__c11_atomic_store)) &&
+ Op == AtomicExpr::AO__c11_atomic_store ||
+ Op == AtomicExpr::AO__opencl_atomic_load ||
+ Op == AtomicExpr::AO__opencl_atomic_store ) &&
Context.AtomicUsesUnsupportedLibcall(AE))
- Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib) <<
- ((Op == AtomicExpr::AO__c11_atomic_load) ? 0 : 1);
+ Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib)
+ << ((Op == AtomicExpr::AO__c11_atomic_load ||
+ Op == AtomicExpr::AO__opencl_atomic_load)
+ ? 0 : 1);
return AE;
}
--- /dev/null
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s
+// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s
+
+void f(atomic_int *i, atomic_uint *ui, int cmp) {
+ int x;
+ // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1)
+ x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
+ x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+ x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+ // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
+ x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+ // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2)
+ x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device);
+ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+ // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3)
+ x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices);
+#ifdef cl_khr_subgroups
+ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4)
+ x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
+#endif
+}
--- /dev/null
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -finclude-default-header -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s
+
+// Also test serialization of atomic operations here, to avoid duplicating the test.
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
+
+#ifndef ALREADY_INCLUDED
+#define ALREADY_INCLUDED
+
+atomic_int j;
+
+void fi1(atomic_int *i) {
+ // CHECK-LABEL: @fi1
+ // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
+ x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
+ // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
+ x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
+ // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst
+ x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
+}
+
+void fi2(atomic_int *i) {
+ // CHECK-LABEL: @fi2
+ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+void fi3(atomic_int *i, atomic_uint *ui) {
+ // CHECK-LABEL: @fi3
+ // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+ // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+ x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
+}
+
+bool fi4(atomic_int *i) {
+ // CHECK-LABEL: @fi4(
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+ // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
+ // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
+ // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
+ // CHECK: store i32 [[OLD]]
+ int cmp = 0;
+ return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
+}
+
+float ff1(global atomic_float *d) {
+ // CHECK-LABEL: @ff1
+ // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+ return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
+}
+
+void ff2(atomic_float *d) {
+ // CHECK-LABEL: @ff2
+ // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+ __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
+}
+
+float ff3(atomic_float *d) {
+ // CHECK-LABEL: @ff3
+ // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst
+ return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @atomic_init_foo
+void atomic_init_foo()
+{
+ // CHECK-NOT: atomic
+ // CHECK: store
+ __opencl_atomic_init(&j, 42);
+
+ // CHECK-NOT: atomic
+ // CHECK: }
+}
+
+// CHECK-LABEL: @failureOrder
+void failureOrder(atomic_int *ptr, int *ptr2) {
+ // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+ __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+
+ // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
+ __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
+}
+
+// CHECK-LABEL: @generalFailureOrder
+void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
+ __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
+ // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
+ // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
+
+ // CHECK: [[MONOTONIC]]
+ // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQUIRE]]
+ // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[RELEASE]]
+ // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: ]
+
+ // CHECK: [[ACQREL]]
+ // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[SEQCST]]
+ // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
+ // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
+ // CHECK-NEXT: ]
+
+ // CHECK: [[MONOTONIC_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} monotonic monotonic
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acquire monotonic
+ // CHECK: br
+
+ // CHECK: [[ACQUIRE_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acquire acquire
+ // CHECK: br
+
+ // CHECK: [[ACQREL_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} acq_rel monotonic
+ // CHECK: br
+
+ // CHECK: [[ACQREL_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} acq_rel acquire
+ // CHECK: br
+
+ // CHECK: [[SEQCST_MONOTONIC]]
+ // CHECK: cmpxchg {{.*}} seq_cst monotonic
+ // CHECK: br
+
+ // CHECK: [[SEQCST_ACQUIRE]]
+ // CHECK: cmpxchg {{.*}} seq_cst acquire
+ // CHECK: br
+
+ // CHECK: [[SEQCST_SEQCST]]
+ // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+ // CHECK: br
+}
+
+int test_volatile(volatile atomic_int *i) {
+ // CHECK-LABEL: @test_volatile
+ // CHECK: %[[i_addr:.*]] = alloca i32
+ // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
+ // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]]
+ // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]]
+ // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst
+ // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
+ // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
+ // CHECK-NEXT: ret i32 %[[retval]]
+ return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+}
+
+#endif
// WEBASSEMBLY32-NOT:#define __LP64__
// WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1
// WEBASSEMBLY32-NEXT:#define __OBJC_BOOL_IS_BOOL 0
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
// WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
// WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
// WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
// WEBASSEMBLY64-NEXT:#define __LP64__ 1
// WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1
// WEBASSEMBLY64-NEXT:#define __OBJC_BOOL_IS_BOOL 0
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
// WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321
// WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234
// WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412
// CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1
// CHECK-FRM: #define __FAST_RELAXED_MATH__ 1
+// RUN: %clang_cc1 %s -E -dM -o - -x cl \
+// RUN: | FileCheck %s --check-prefix=MSCOPE
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_DEVICE 2
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
+// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
+
// RUN: %clang_cc1 -triple aarch64-windows %s -E -dM -o - -x cl \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM64-WIN
--- /dev/null
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=spir64
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl
+
+// Basic parsing/Sema tests for __opencl_atomic_*
+
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+
+struct S { char c[3]; };
+
+char i8;
+short i16;
+int i32;
+int8 i64;
+
+atomic_int gn;
+
+void f(atomic_int *i, const atomic_int *ci,
+ atomic_intptr_t *p, atomic_float *d,
+ int *I, const int *CI,
+ intptr_t *P, float *D, struct S *s1, struct S *s2,
+ global atomic_int *i_g, local atomic_int *i_l, private atomic_int *i_p,
+ constant atomic_int *i_c) {
+ __opencl_atomic_init(I, 5); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}}
+ __opencl_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+ __opencl_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}}
+ __opencl_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
+ __opencl_atomic_store(0,0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}}
+ __opencl_atomic_store((int *)0, 0, 0, 0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}}
+ __opencl_atomic_store(i, 0, memory_order_relaxed, memory_scope_work_group);
+ __opencl_atomic_store(ci, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+ __opencl_atomic_store(i_g, 0, memory_order_relaxed, memory_scope_work_group);
+ __opencl_atomic_store(i_l, 0, memory_order_relaxed, memory_scope_work_group);
+ __opencl_atomic_store(i_p, 0, memory_order_relaxed, memory_scope_work_group);
+ __opencl_atomic_store(i_c, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-constant _Atomic type ('__constant atomic_int *' (aka '__constant _Atomic(int) *') invalid)}}
+
+ __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_load(p, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_load(d, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_load(ci, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+ __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_store(p, 1, memory_order_seq_cst, memory_scope_work_group);
+ (int)__opencl_atomic_store(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{operand of type 'void' where arithmetic or pointer type is required}}
+
+ int exchange_1 = __opencl_atomic_exchange(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ int exchange_2 = __opencl_atomic_exchange(I, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}}
+
+ __opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+ __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to bitwise atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+
+ __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
+ __opencl_atomic_fetch_min(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+ __opencl_atomic_fetch_max(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+
+ bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ bool cmpexch_3 = __opencl_atomic_compare_exchange_strong(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}}
+ (void)__opencl_atomic_compare_exchange_strong(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+ bool cmpexchw_1 = __opencl_atomic_compare_exchange_weak(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ bool cmpexchw_2 = __opencl_atomic_compare_exchange_weak(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+ bool cmpexchw_3 = __opencl_atomic_compare_exchange_weak(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}}
+ (void)__opencl_atomic_compare_exchange_weak(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}}
+
+ // Pointers to different address spaces are allowed.
+ bool cmpexch_10 = __opencl_atomic_compare_exchange_strong((global atomic_int *)0x308, (constant int *)0x309, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+
+ __opencl_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+ __opencl_atomic_store(ci, 0, memory_order_release, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+ __opencl_atomic_load(ci, memory_order_acquire, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}}
+
+ __opencl_atomic_init(&gn, 456);
+ __opencl_atomic_init(&gn, (void*)0); // expected-warning{{incompatible pointer to integer conversion passing '__generic void *' to parameter of type 'int'}}
+}
+
+void memory_checks(atomic_int *Ap, int *p, int val) {
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_group); // expected-error {{use of undeclared identifier 'memory_order_consume'}}
+ (void)__opencl_atomic_load(Ap, memory_order_release, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+ (void)__opencl_atomic_load(Ap, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+ (void)__opencl_atomic_load(Ap, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_store(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_store(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+ (void)__opencl_atomic_store(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_store(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}}
+ (void)__opencl_atomic_store(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_init(Ap, val);
+
+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_exchange(Ap, val, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_group);
+ (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_group);
+ (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_group);
+ (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_group);
+
+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group);
+
+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group);
+}
+
+void synchscope_checks(atomic_int *Ap, int scope) {
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_item); // expected-error{{synchronization scope argument to atomic operation is invalid}}
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group);
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device);
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices);
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group);
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}}
+ (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10); //expected-error{{synchronization scope argument to atomic operation is invalid}}
+}
+
+void nullPointerWarning(atomic_int *Ap, int *p, int val) {
+ // The 'expected' pointer shouldn't be NULL.
+ (void)__opencl_atomic_compare_exchange_strong(Ap, NULL, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); // expected-warning {{null passed to a callee that requires a non-null argument}}
+}