}
case Type::Atomic: {
+ // Start with the base type information.
std::pair<uint64_t, unsigned> Info
= getTypeInfo(cast<AtomicType>(T)->getValueType());
Width = Info.first;
Align = Info.second;
- if (Width != 0 && Width <= Target->getMaxAtomicPromoteWidth() &&
- llvm::isPowerOf2_64(Width)) {
- // We can potentially perform lock-free atomic operations for this
- // type; promote the alignment appropriately.
- // FIXME: We could potentially promote the width here as well...
- // is that worthwhile? (Non-struct atomic types generally have
- // power-of-two size anyway, but structs might not. Requires a bit
- // of implementation work to make sure we zero out the extra bits.)
+
+ // If the size of the type doesn't exceed the platform's max
+ // atomic promotion width, make the size and alignment more
+ // favorable to atomic operations:
+ if (Width != 0 && Width <= Target->getMaxAtomicPromoteWidth()) {
+ // Round the size up to a power of 2.
+ if (!llvm::isPowerOf2_64(Width))
+ Width = llvm::NextPowerOf2(Width);
+
+ // Set the alignment equal to the size.
Align = static_cast<unsigned>(Width);
}
}
#include "clang/AST/ASTContext.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
using namespace clang;
using namespace CodeGen;
+// The ABI values for various atomic memory orderings.
+enum AtomicOrderingKind {
+ AO_ABI_memory_order_relaxed = 0,
+ AO_ABI_memory_order_consume = 1,
+ AO_ABI_memory_order_acquire = 2,
+ AO_ABI_memory_order_release = 3,
+ AO_ABI_memory_order_acq_rel = 4,
+ AO_ABI_memory_order_seq_cst = 5
+};
+
+namespace {
+ class AtomicInfo {
+ CodeGenFunction &CGF;
+ QualType AtomicTy;
+ QualType ValueTy;
+ uint64_t AtomicSizeInBits;
+ uint64_t ValueSizeInBits;
+ CharUnits AtomicAlign;
+ CharUnits ValueAlign;
+ CharUnits LValueAlign;
+ TypeEvaluationKind EvaluationKind;
+ bool UseLibcall;
+ public:
+ AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
+ assert(lvalue.isSimple());
+
+ AtomicTy = lvalue.getType();
+ ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
+ EvaluationKind = CGF.getEvaluationKind(ValueTy);
+
+ ASTContext &C = CGF.getContext();
+
+ uint64_t valueAlignInBits;
+ llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
+
+ uint64_t atomicAlignInBits;
+ llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
+
+ assert(ValueSizeInBits <= AtomicSizeInBits);
+ assert(valueAlignInBits <= atomicAlignInBits);
+
+ AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits);
+ ValueAlign = C.toCharUnitsFromBits(valueAlignInBits);
+ if (lvalue.getAlignment().isZero())
+ lvalue.setAlignment(AtomicAlign);
+
+ UseLibcall =
+ (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
+ AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+ }
+
+ QualType getAtomicType() const { return AtomicTy; }
+ QualType getValueType() const { return ValueTy; }
+ CharUnits getAtomicAlignment() const { return AtomicAlign; }
+ CharUnits getValueAlignment() const { return ValueAlign; }
+ uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
+ uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+ TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
+ bool shouldUseLibcall() const { return UseLibcall; }
+
+ /// Is the atomic size larger than the underlying value type?
+ ///
+ /// Note that the absence of padding does not mean that atomic
+ /// objects are completely interchangeable with non-atomic
+ /// objects: we might have promoted the alignment of a type
+ /// without making it bigger.
+ bool hasPadding() const {
+ return (ValueSizeInBits != AtomicSizeInBits);
+ }
+
+ void emitMemSetZeroIfNecessary(LValue dest) const;
+
+ llvm::Value *getAtomicSizeValue() const {
+ CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
+ return CGF.CGM.getSize(size);
+ }
+
+ /// Cast the given pointer to an integer pointer suitable for
+ /// atomic operations.
+ llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
+
+ /// Turn an atomic-layout object into an r-value.
+ RValue convertTempToRValue(llvm::Value *addr,
+ AggValueSlot resultSlot) const;
+
+ /// Copy an atomic r-value into atomic-layout memory.
+ void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+
+ /// Project an l-value down to the value field.
+ LValue projectValue(LValue lvalue) const {
+ llvm::Value *addr = lvalue.getAddress();
+ if (hasPadding())
+ addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+ return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
+ CGF.getContext(), lvalue.getTBAAInfo());
+ }
+
+ /// Materialize an atomic r-value in atomic-layout memory.
+ llvm::Value *materializeRValue(RValue rvalue) const;
+
+ private:
+ bool requiresMemSetZero(llvm::Type *type) const;
+ };
+}
+
+static RValue emitAtomicLibcall(CodeGenFunction &CGF,
+ StringRef fnName,
+ QualType resultType,
+ CallArgList &args) {
+ const CGFunctionInfo &fnInfo =
+ CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
+ FunctionType::ExtInfo(), RequiredArgs::All);
+ llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
+ llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
+ return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
+}
+
+/// Does a store of the given IR type modify the full expected width?
+static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
+ uint64_t expectedSize) {
+ return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
+}
+
+/// Does the atomic type require memsetting to zero before initialization?
+///
+/// The IR type is provided as a way of making certain queries faster.
+bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
+ // If the atomic type has size padding, we definitely need a memset.
+ if (hasPadding()) return true;
+
+ // Otherwise, do some simple heuristics to try to avoid it:
+ switch (getEvaluationKind()) {
+ // For scalars and complexes, check whether the store size of the
+ // type uses the full size.
+ case TEK_Scalar:
+ return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
+ case TEK_Complex:
+ return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
+ AtomicSizeInBits / 2);
+
+ // Just be pessimistic about aggregates.
+ case TEK_Aggregate:
+ return true;
+ }
+ llvm_unreachable("bad evaluation kind");
+}
+
+void AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
+ llvm::Value *addr = dest.getAddress();
+ if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
+ return;
+
+ CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+ AtomicSizeInBits / 8,
+ dest.getAlignment().getQuantity());
+}
+
static void
EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
assert(!Dest && "Init does not return a value");
- LValue LV = MakeAddrLValue(Ptr, AtomicTy, alignChars);
- switch (getEvaluationKind(E->getVal1()->getType())) {
- case TEK_Scalar:
- EmitScalarInit(EmitScalarExpr(E->getVal1()), LV);
- return RValue::get(0);
- case TEK_Complex:
- EmitComplexExprIntoLValue(E->getVal1(), LV, /*isInit*/ true);
- return RValue::get(0);
- case TEK_Aggregate: {
- AggValueSlot Slot = AggValueSlot::forLValue(LV,
- AggValueSlot::IsNotDestructed,
- AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
- EmitAggExpr(E->getVal1(), Slot);
- return RValue::get(0);
- }
- }
- llvm_unreachable("bad evaluation kind");
+ LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
+ EmitAtomicInit(E->getVal1(), lvalue);
+ return RValue::get(0);
}
Order = EmitScalarExpr(E->getOrder());
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
switch (ord) {
- case 0: // memory_order_relaxed
+ case AO_ABI_memory_order_relaxed:
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
llvm::Monotonic);
break;
- case 1: // memory_order_consume
- case 2: // memory_order_acquire
+ case AO_ABI_memory_order_consume:
+ case AO_ABI_memory_order_acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
llvm::Acquire);
break;
- case 3: // memory_order_release
+ case AO_ABI_memory_order_release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
llvm::Release);
break;
- case 4: // memory_order_acq_rel
+ case AO_ABI_memory_order_acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
llvm::AcquireRelease);
break;
- case 5: // memory_order_seq_cst
+ case AO_ABI_memory_order_seq_cst:
EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
llvm::SequentiallyConsistent);
break;
return RValue::get(0);
return convertTempToRValue(OrigDest, E->getType());
}
+
+llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
+ unsigned addrspace =
+ cast<llvm::PointerType>(addr->getType())->getAddressSpace();
+ llvm::IntegerType *ty =
+ llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
+ return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
+}
+
+RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
+ AggValueSlot resultSlot) const {
+ if (EvaluationKind == TEK_Aggregate) {
+ // Nothing to do if the result is ignored.
+ if (resultSlot.isIgnored()) return resultSlot.asRValue();
+
+ assert(resultSlot.getAddr() == addr || hasPadding());
+
+ // In these cases, we should have emitted directly into the result slot.
+ if (!hasPadding() || resultSlot.isValueOfAtomic())
+ return resultSlot.asRValue();
+
+ // Otherwise, fall into the common path.
+ }
+
+ // Drill into the padding structure if we have one.
+ if (hasPadding())
+ addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+ // If we're emitting to an aggregate, copy into the result slot.
+ if (EvaluationKind == TEK_Aggregate) {
+ CGF.EmitAggregateCopy(resultSlot.getAddr(), addr, getValueType(),
+ resultSlot.isVolatile());
+ return resultSlot.asRValue();
+ }
+
+ // Otherwise, just convert the temporary to an r-value using the
+ // normal conversion routine.
+ return CGF.convertTempToRValue(addr, getValueType());
+}
+
+/// Emit a load from an l-value of atomic type. Note that the r-value
+/// we produce is an r-value of the atomic *value* type.
+RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
+ AtomicInfo atomics(*this, src);
+
+ // Check whether we should use a library call.
+ if (atomics.shouldUseLibcall()) {
+ llvm::Value *tempAddr;
+ if (resultSlot.isValueOfAtomic()) {
+ assert(atomics.getEvaluationKind() == TEK_Aggregate);
+ tempAddr = resultSlot.getPaddedAtomicAddr();
+ } else if (!resultSlot.isIgnored() && !atomics.hasPadding()) {
+ assert(atomics.getEvaluationKind() == TEK_Aggregate);
+ tempAddr = resultSlot.getAddr();
+ } else {
+ tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+ }
+
+ // void __atomic_load(size_t size, void *mem, void *return, int order);
+ CallArgList args;
+ args.add(RValue::get(atomics.getAtomicSizeValue()),
+ getContext().getSizeType());
+ args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
+ getContext().VoidPtrTy);
+ args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
+ getContext().VoidPtrTy);
+ args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+ AO_ABI_memory_order_seq_cst)),
+ getContext().IntTy);
+ emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
+
+ // Produce the r-value.
+ return atomics.convertTempToRValue(tempAddr, resultSlot);
+ }
+
+ // Okay, we're doing this natively.
+ llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+ llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
+ load->setAtomic(llvm::SequentiallyConsistent);
+
+ // Other decoration.
+ load->setAlignment(src.getAlignment().getQuantity());
+ if (src.isVolatileQualified())
+ load->setVolatile(true);
+ if (src.getTBAAInfo())
+ CGM.DecorateInstruction(load, src.getTBAAInfo());
+
+ // Okay, turn that back into the original value type.
+ QualType valueType = atomics.getValueType();
+ llvm::Value *result = load;
+
+ // If we're ignoring an aggregate return, don't do anything.
+ if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
+ return RValue::getAggregate(0, false);
+
+ // The easiest way to do this this is to go through memory, but we
+ // try not to in some easy cases.
+ if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) {
+ llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
+ if (isa<llvm::IntegerType>(resultTy)) {
+ assert(result->getType() == resultTy);
+ result = EmitFromMemory(result, valueType);
+ } else if (isa<llvm::PointerType>(resultTy)) {
+ result = Builder.CreateIntToPtr(result, resultTy);
+ } else {
+ result = Builder.CreateBitCast(result, resultTy);
+ }
+ return RValue::get(result);
+ }
+
+ // Create a temporary. This needs to be big enough to hold the
+ // atomic integer.
+ llvm::Value *temp;
+ bool tempIsVolatile = false;
+ CharUnits tempAlignment;
+ if (atomics.getEvaluationKind() == TEK_Aggregate &&
+ (!atomics.hasPadding() || resultSlot.isValueOfAtomic())) {
+ assert(!resultSlot.isIgnored());
+ if (resultSlot.isValueOfAtomic()) {
+ temp = resultSlot.getPaddedAtomicAddr();
+ tempAlignment = atomics.getAtomicAlignment();
+ } else {
+ temp = resultSlot.getAddr();
+ tempAlignment = atomics.getValueAlignment();
+ }
+ tempIsVolatile = resultSlot.isVolatile();
+ } else {
+ temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+ tempAlignment = atomics.getAtomicAlignment();
+ }
+
+ // Slam the integer into the temporary.
+ llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp);
+ Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity())
+ ->setVolatile(tempIsVolatile);
+
+ return atomics.convertTempToRValue(temp, resultSlot);
+}
+
+
+
+/// Copy an r-value into memory as part of storing to an atomic type.
+/// This needs to create a bit-pattern suitable for atomic operations.
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+ // If we have an r-value, the rvalue should be of the atomic type,
+ // which means that the caller is responsible for having zeroed
+ // any padding. Just do an aggregate copy of that type.
+ if (rvalue.isAggregate()) {
+ CGF.EmitAggregateCopy(dest.getAddress(),
+ rvalue.getAggregateAddr(),
+ getAtomicType(),
+ (rvalue.isVolatileQualified()
+ || dest.isVolatileQualified()),
+ dest.getAlignment());
+ return;
+ }
+
+ // Okay, otherwise we're copying stuff.
+
+ // Zero out the buffer if necessary.
+ emitMemSetZeroIfNecessary(dest);
+
+ // Drill past the padding if present.
+ dest = projectValue(dest);
+
+ // Okay, store the rvalue in.
+ if (rvalue.isScalar()) {
+ CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+ } else {
+ CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+ }
+}
+
+
+/// Materialize an r-value into memory for the purposes of storing it
+/// to an atomic type.
+llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
+ // Aggregate r-values are already in memory, and EmitAtomicStore
+ // requires them to be values of the atomic type.
+ if (rvalue.isAggregate())
+ return rvalue.getAggregateAddr();
+
+ // Otherwise, make a temporary and materialize into it.
+ llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
+ LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+ emitCopyIntoMemory(rvalue, tempLV);
+ return temp;
+}
+
+/// Emit a store to an l-value of atomic type.
+///
+/// Note that the r-value is expected to be an r-value *of the atomic
+/// type*; this means that for aggregate r-values, it should include
+/// storage for any padding that was necessary.
+void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
+ bool isInit) {
+ // If this is an aggregate r-value, it should agree in type except
+ // maybe for address-space qualification.
+ assert(!rvalue.isAggregate() ||
+ rvalue.getAggregateAddr()->getType()->getPointerElementType()
+ == dest.getAddress()->getType()->getPointerElementType());
+
+ AtomicInfo atomics(*this, dest);
+
+ // If this is an initialization, just put the value there normally.
+ if (isInit) {
+ atomics.emitCopyIntoMemory(rvalue, dest);
+ return;
+ }
+
+ // Check whether we should use a library call.
+ if (atomics.shouldUseLibcall()) {
+ // Produce a source address.
+ llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+
+ // void __atomic_store(size_t size, void *mem, void *val, int order)
+ CallArgList args;
+ args.add(RValue::get(atomics.getAtomicSizeValue()),
+ getContext().getSizeType());
+ args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
+ getContext().VoidPtrTy);
+ args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
+ getContext().VoidPtrTy);
+ args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+ AO_ABI_memory_order_seq_cst)),
+ getContext().IntTy);
+ emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+ return;
+ }
+
+ // Okay, we're doing this natively.
+ llvm::Value *intValue;
+
+ // If we've got a scalar value of the right size, try to avoid going
+ // through memory.
+ if (rvalue.isScalar() && !atomics.hasPadding()) {
+ llvm::Value *value = rvalue.getScalarVal();
+ if (isa<llvm::IntegerType>(value->getType())) {
+ intValue = value;
+ } else {
+ llvm::IntegerType *inputIntTy =
+ llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
+ if (isa<llvm::PointerType>(value->getType())) {
+ intValue = Builder.CreatePtrToInt(value, inputIntTy);
+ } else {
+ intValue = Builder.CreateBitCast(value, inputIntTy);
+ }
+ }
+
+ // Otherwise, we need to go through memory.
+ } else {
+ // Put the r-value in memory.
+ llvm::Value *addr = atomics.materializeRValue(rvalue);
+
+ // Cast the temporary to the atomic int type and pull a value out.
+ addr = atomics.emitCastToAtomicIntPointer(addr);
+ intValue = Builder.CreateAlignedLoad(addr,
+ atomics.getAtomicAlignment().getQuantity());
+ }
+
+ // Do the atomic store.
+ llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
+ llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+
+ // Initializations don't need to be atomic.
+ if (!isInit) store->setAtomic(llvm::SequentiallyConsistent);
+
+ // Other decoration.
+ store->setAlignment(dest.getAlignment().getQuantity());
+ if (dest.isVolatileQualified())
+ store->setVolatile(true);
+ if (dest.getTBAAInfo())
+ CGM.DecorateInstruction(store, dest.getTBAAInfo());
+}
+
+void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
+ AtomicInfo atomics(*this, dest);
+
+ switch (atomics.getEvaluationKind()) {
+ case TEK_Scalar: {
+ llvm::Value *value = EmitScalarExpr(init);
+ atomics.emitCopyIntoMemory(RValue::get(value), dest);
+ return;
+ }
+
+ case TEK_Complex: {
+ ComplexPairTy value = EmitComplexExpr(init);
+ atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+ return;
+ }
+
+ case TEK_Aggregate: {
+ // Memset the buffer first if there's any possibility of
+ // uninitialized internal bits.
+ atomics.emitMemSetZeroIfNecessary(dest);
+
+ // HACK: whether the initializer actually has an atomic type
+ // doesn't really seem reliable right now.
+ if (!init->getType()->isAtomicType()) {
+ dest = atomics.projectValue(dest);
+ }
+
+ // Evaluate the expression directly into the destination.
+ AggValueSlot slot = AggValueSlot::forLValue(dest,
+ AggValueSlot::IsNotDestructed,
+ AggValueSlot::DoesNotNeedGCBarriers,
+ AggValueSlot::IsNotAliased);
+ EmitAggExpr(init, slot);
+ return;
+ }
+ }
+ llvm_unreachable("bad evaluation kind");
+}
return;
}
case TEK_Aggregate:
- // TODO: how can we delay here if D is captured by its initializer?
- EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
+ if (type->isAtomicType()) {
+ EmitAtomicInit(const_cast<Expr*>(init), lvalue);
+ } else {
+ // TODO: how can we delay here if D is captured by its initializer?
+ EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
AggValueSlot::IsDestructed,
AggValueSlot::DoesNotNeedGCBarriers,
AggValueSlot::IsNotAliased));
+ }
MaybeEmitStdInitializerListCleanup(lvalue.getAddress(), init);
return;
}
return EmitFromMemory(V, Ty);
}
}
+
+ // Atomic operations have to be done on integral types.
+ if (Ty->isAtomicType()) {
+ LValue lvalue = LValue::MakeAddr(Addr, Ty,
+ CharUnits::fromQuantity(Alignment),
+ getContext(), TBAAInfo);
+ return EmitAtomicLoad(lvalue).getScalarVal();
+ }
llvm::LoadInst *Load = Builder.CreateLoad(Addr);
if (Volatile)
Load->setAlignment(Alignment);
if (TBAAInfo)
CGM.DecorateInstruction(Load, TBAAInfo);
- // If this is an atomic type, all normal reads must be atomic
- if (Ty->isAtomicType())
- Load->setAtomic(llvm::SequentiallyConsistent);
if ((SanOpts->Bool && hasBooleanRepresentation(Ty)) ||
(SanOpts->Enum && Ty->getAs<EnumType>())) {
Value = EmitToMemory(Value, Ty);
+ if (Ty->isAtomicType()) {
+ EmitAtomicStore(RValue::get(Value),
+ LValue::MakeAddr(Addr, Ty,
+ CharUnits::fromQuantity(Alignment),
+ getContext(), TBAAInfo),
+ isInit);
+ return;
+ }
+
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
if (Alignment)
Store->setAlignment(Alignment);
if (TBAAInfo)
CGM.DecorateInstruction(Store, TBAAInfo);
- if (!isInit && Ty->isAtomicType())
- Store->setAtomic(llvm::SequentiallyConsistent);
}
void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
// Aggregate Expression Emitter
//===----------------------------------------------------------------------===//
+llvm::Value *AggValueSlot::getPaddedAtomicAddr() const {
+ assert(isValueOfAtomic());
+ llvm::GEPOperator *op = cast<llvm::GEPOperator>(getAddr());
+ assert(op->getNumIndices() == 2);
+ assert(op->hasAllZeroIndices());
+ return op->getPointerOperand();
+}
+
namespace {
class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
CodeGenFunction &CGF;
CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
}
};
+
+/// A helper class for emitting expressions into the value sub-object
+/// of a padded atomic type.
+class ValueDestForAtomic {
+ AggValueSlot Dest;
+public:
+ ValueDestForAtomic(CodeGenFunction &CGF, AggValueSlot dest, QualType type)
+ : Dest(dest) {
+ assert(!Dest.isValueOfAtomic());
+ if (!Dest.isIgnored() && CGF.CGM.isPaddedAtomicType(type)) {
+ llvm::Value *valueAddr = CGF.Builder.CreateStructGEP(Dest.getAddr(), 0);
+ Dest = AggValueSlot::forAddr(valueAddr,
+ Dest.getAlignment(),
+ Dest.getQualifiers(),
+ Dest.isExternallyDestructed(),
+ Dest.requiresGCollection(),
+ Dest.isPotentiallyAliased(),
+ Dest.isZeroed(),
+ AggValueSlot::IsValueOfAtomic);
+ }
+ }
+
+ const AggValueSlot &getDest() const { return Dest; }
+
+ ~ValueDestForAtomic() {
+ // Kill the GEP if we made one and it didn't end up used.
+ if (Dest.isValueOfAtomic()) {
+ llvm::Instruction *addr = cast<llvm::GetElementPtrInst>(Dest.getAddr());
+ if (addr->use_empty()) addr->eraseFromParent();
+ }
+ }
+};
} // end anonymous namespace.
//===----------------------------------------------------------------------===//
/// then loads the result into DestPtr.
void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
LValue LV = CGF.EmitLValue(E);
+
+ // If the type of the l-value is atomic, then do an atomic load.
+ if (LV.getType()->isAtomicType()) {
+ ValueDestForAtomic valueDest(CGF, Dest, LV.getType());
+ CGF.EmitAtomicLoad(LV, valueDest.getDest());
+ return;
+ }
+
EmitFinalDestCopy(E->getType(), LV);
}
CGF.EmitAggExpr(E->getInitializer(), Slot);
}
+/// Attempt to look through various unimportant expressions to find a
+/// cast of the given kind.
+static Expr *findPeephole(Expr *op, CastKind kind) {
+ while (true) {
+ op = op->IgnoreParens();
+ if (CastExpr *castE = dyn_cast<CastExpr>(op)) {
+ if (castE->getCastKind() == kind)
+ return castE->getSubExpr();
+ if (castE->getCastKind() == CK_NoOp)
+ continue;
+ }
+ return 0;
+ }
+}
void AggExprEmitter::VisitCastExpr(CastExpr *E) {
switch (E->getCastKind()) {
"should have been unpacked before we got here");
}
+ case CK_NonAtomicToAtomic:
+ case CK_AtomicToNonAtomic: {
+ bool isToAtomic = (E->getCastKind() == CK_NonAtomicToAtomic);
+
+ // Determine the atomic and value types.
+ QualType atomicType = E->getSubExpr()->getType();
+ QualType valueType = E->getType();
+ if (isToAtomic) std::swap(atomicType, valueType);
+
+ assert(atomicType->isAtomicType());
+ assert(CGF.getContext().hasSameUnqualifiedType(valueType,
+ atomicType->castAs<AtomicType>()->getValueType()));
+
+ // Just recurse normally if we're ignoring the result or the
+ // atomic type doesn't change representation.
+ if (Dest.isIgnored() || !CGF.CGM.isPaddedAtomicType(atomicType)) {
+ return Visit(E->getSubExpr());
+ }
+
+ CastKind peepholeTarget =
+ (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);
+
+ // These two cases are reverses of each other; try to peephole them.
+ if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) {
+ assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
+ E->getType()) &&
+ "peephole significantly changed types?");
+ return Visit(op);
+ }
+
+ // If we're converting an r-value of non-atomic type to an r-value
+ // of atomic type, just make an atomic temporary, emit into that,
+ // and then copy the value out. (FIXME: do we need to
+ // zero-initialize it first?)
+ if (isToAtomic) {
+ ValueDestForAtomic valueDest(CGF, Dest, atomicType);
+ CGF.EmitAggExpr(E->getSubExpr(), valueDest.getDest());
+ return;
+ }
+
+ // Otherwise, we're converting an atomic type to a non-atomic type.
+
+ // If the dest is a value-of-atomic subobject, drill back out.
+ if (Dest.isValueOfAtomic()) {
+ AggValueSlot atomicSlot =
+ AggValueSlot::forAddr(Dest.getPaddedAtomicAddr(),
+ Dest.getAlignment(),
+ Dest.getQualifiers(),
+ Dest.isExternallyDestructed(),
+ Dest.requiresGCollection(),
+ Dest.isPotentiallyAliased(),
+ Dest.isZeroed(),
+ AggValueSlot::IsNotValueOfAtomic);
+ CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+ return;
+ }
+
+ // Otherwise, make an atomic temporary, emit into that, and then
+ // copy the value out.
+ AggValueSlot atomicSlot =
+ CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
+ CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+
+ llvm::Value *valueAddr =
+ Builder.CreateStructGEP(atomicSlot.getAddr(), 0);
+ RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
+ return EmitFinalDestCopy(valueType, rvalue);
+ }
+
case CK_LValueToRValue:
// If we're loading from a volatile type, force the destination
// into existence.
EnsureDest(E->getType());
return Visit(E->getSubExpr());
}
+
// fallthrough
case CK_NoOp:
- case CK_AtomicToNonAtomic:
- case CK_NonAtomicToAtomic:
case CK_UserDefinedConversion:
case CK_ConstructorConversion:
assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(),
// Now emit the LHS and copy into it.
LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
+ // That copy is an atomic copy if the LHS is atomic.
+ if (LHS.getType()->isAtomicType()) {
+ CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+ return;
+ }
+
EmitCopy(E->getLHS()->getType(),
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
needsGC(E->getLHS()->getType()),
LValue LHS = CGF.EmitLValue(E->getLHS());
+ // If we have an atomic type, evaluate into the destination and then
+ // do an atomic copy.
+ if (LHS.getType()->isAtomicType()) {
+ EnsureDest(E->getRHS()->getType());
+ Visit(E->getRHS());
+ CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+ return;
+ }
+
// Codegen the RHS so that it stores directly into the LHS.
AggValueSlot LHSSlot =
AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
: public StmtVisitor<ComplexExprEmitter, ComplexPairTy> {
CodeGenFunction &CGF;
CGBuilderTy &Builder;
- // True is we should ignore the value of a
bool IgnoreReal;
bool IgnoreImag;
public:
/// load the real and imaginary pieces, returning them as Real/Imag.
ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue) {
assert(lvalue.isSimple() && "non-simple complex l-value?");
+ if (lvalue.getType()->isAtomicType())
+ return CGF.EmitAtomicLoad(lvalue).getComplexVal();
+
llvm::Value *SrcPtr = lvalue.getAddress();
bool isVolatile = lvalue.isVolatileQualified();
void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val,
LValue lvalue,
bool isInit) {
+ if (lvalue.getType()->isAtomicType())
+ return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
+
llvm::Value *Ptr = lvalue.getAddress();
llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, "real");
llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, "imag");
/// evaluating an expression which constructs such an object.
bool AliasedFlag : 1;
+ /// ValueOfAtomicFlag - This is set to true if the slot is the value
+ /// subobject of an object the size of an _Atomic(T). The specific
+ /// guarantees this makes are:
+ /// - the address is guaranteed to be a getelementptr into the
+ /// padding struct and
+ /// - it is okay to store something the width of an _Atomic(T)
+ /// into the address.
+ /// Tracking this allows us to avoid some obviously unnecessary
+ /// memcpys.
+ bool ValueOfAtomicFlag : 1;
+
public:
enum IsAliased_t { IsNotAliased, IsAliased };
enum IsDestructed_t { IsNotDestructed, IsDestructed };
enum IsZeroed_t { IsNotZeroed, IsZeroed };
enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
+ enum IsValueOfAtomic_t { IsNotValueOfAtomic, IsValueOfAtomic };
/// ignored - Returns an aggregate value slot indicating that the
/// aggregate value is being ignored.
IsDestructed_t isDestructed,
NeedsGCBarriers_t needsGC,
IsAliased_t isAliased,
- IsZeroed_t isZeroed = IsNotZeroed) {
+ IsZeroed_t isZeroed = IsNotZeroed,
+ IsValueOfAtomic_t isValueOfAtomic
+ = IsNotValueOfAtomic) {
AggValueSlot AV;
AV.Addr = addr;
AV.Alignment = align.getQuantity();
AV.ObjCGCFlag = needsGC;
AV.ZeroedFlag = isZeroed;
AV.AliasedFlag = isAliased;
+ AV.ValueOfAtomicFlag = isValueOfAtomic;
return AV;
}
IsDestructed_t isDestructed,
NeedsGCBarriers_t needsGC,
IsAliased_t isAliased,
- IsZeroed_t isZeroed = IsNotZeroed) {
+ IsZeroed_t isZeroed = IsNotZeroed,
+ IsValueOfAtomic_t isValueOfAtomic
+ = IsNotValueOfAtomic) {
return forAddr(LV.getAddress(), LV.getAlignment(),
- LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
+ LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed,
+ isValueOfAtomic);
}
IsDestructed_t isExternallyDestructed() const {
return Addr;
}
+ IsValueOfAtomic_t isValueOfAtomic() const {
+ return IsValueOfAtomic_t(ValueOfAtomicFlag);
+ }
+
+ llvm::Value *getPaddedAtomicAddr() const;
+
bool isIgnored() const {
return Addr == 0;
}
RValue convertTempToRValue(llvm::Value *addr, QualType type);
+ void EmitAtomicInit(Expr *E, LValue lvalue);
+
+ RValue EmitAtomicLoad(LValue lvalue,
+ AggValueSlot slot = AggValueSlot::ignored());
+
+ void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit);
+
/// EmitToMemory - Change a scalar value from its value
/// representation to its in-memory representation.
llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty);
namespace clang {
class TargetCodeGenInfo;
class ASTContext;
+ class AtomicType;
class FunctionDecl;
class IdentifierInfo;
class ObjCMethodDecl;
bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor);
+ bool isPaddedAtomicType(QualType type);
+ bool isPaddedAtomicType(const AtomicType *type);
+
static void DecorateInstruction(llvm::Instruction *Inst,
llvm::MDNode *TBAAInfo);
}
case Type::Atomic: {
- ResultType = ConvertType(cast<AtomicType>(Ty)->getValueType());
+ QualType valueType = cast<AtomicType>(Ty)->getValueType();
+ ResultType = ConvertTypeForMem(valueType);
+
+ // Pad out to the inflated size if necessary.
+ uint64_t valueSize = Context.getTypeSize(valueType);
+ uint64_t atomicSize = Context.getTypeSize(Ty);
+ if (valueSize != atomicSize) {
+ assert(valueSize < atomicSize);
+ llvm::Type *elts[] = {
+ ResultType,
+ llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8)
+ };
+ ResultType = llvm::StructType::get(getLLVMContext(),
+ llvm::makeArrayRef(elts));
+ }
break;
}
}
return ResultType;
}
+bool CodeGenModule::isPaddedAtomicType(QualType type) {
+ return isPaddedAtomicType(type->castAs<AtomicType>());
+}
+
+bool CodeGenModule::isPaddedAtomicType(const AtomicType *type) {
+ return Context.getTypeSize(type) != Context.getTypeSize(type->getValueType());
+}
+
/// ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
// TagDecl's are not necessarily unique, instead use the (clang)
--- /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-apple-ios -std=c11 | FileCheck %s
+
+// There isn't really anything special about iOS; it just happens to
+// only deploy on processors with native atomics support, so it's a good
+// way to test those code-paths.
+
+// This work was done in pursuit of <rdar://13338582>.
+
+// CHECK: define arm_aapcscc void @testFloat(float*
+void testFloat(_Atomic(float) *fp) {
+// CHECK: [[FP:%.*]] = alloca float*
+// CHECK-NEXT: [[X:%.*]] = alloca float
+// CHECK-NEXT: [[F:%.*]] = alloca float
+// CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
+
+// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
+ __c11_atomic_init(fp, 1.0f);
+
+// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
+ _Atomic(float) x = 2.0f;
+
+// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i32*
+// CHECK-NEXT: [[T2:%.*]] = load atomic i32* [[T1]] seq_cst, align 4
+// CHECK-NEXT: [[T3:%.*]] = bitcast i32 [[T2]] to float
+// CHECK-NEXT: store float [[T3]], float* [[F]]
+ float f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
+// CHECK-NEXT: [[T2:%.*]] = bitcast float [[T0]] to i32
+// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[T1]] to i32*
+// CHECK-NEXT: store atomic i32 [[T2]], i32* [[T3]] seq_cst, align 4
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
+void testComplexFloat(_Atomic(_Complex float) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: store [[CF]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
+ __c11_atomic_init(fp, 1.0f);
+
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
+ _Atomic(_Complex float) x = 2.0f;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i64*
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[CF]]* [[TMP0]] to i64*
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], float* [[T0]]
+// CHECK-NEXT: store float [[I]], float* [[T1]]
+ _Complex float f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], float* [[T0]]
+// CHECK-NEXT: store float [[I]], float* [[T1]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[TMP1]] to i64*
+// CHECK-NEXT: [[T1:%.*]] = load i64* [[T0]], align 8
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[DEST]] to i64*
+// CHECK-NEXT: store atomic i64 [[T1]], i64* [[T2]] seq_cst, align 8
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+typedef struct { short x, y, z, w; } S;
+// CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
+void testStruct(_Atomic(S) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[S]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: store [[S]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
+ __c11_atomic_init(fp, (S){1,2,3,4});
+
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
+ _Atomic(S) x = (S){1,2,3,4};
+
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i64*
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[F]] to i64*
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
+ S f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[TMP0]] to i64*
+// CHECK-NEXT: [[T4:%.*]] = load i64* [[T3]], align 8
+// CHECK-NEXT: [[T5:%.*]] = bitcast [[S]]* [[T0]] to i64*
+// CHECK-NEXT: store atomic i64 [[T4]], i64* [[T5]] seq_cst, align 8
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+typedef struct { short x, y, z; } PS;
+// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
+void testPromotedStruct(_Atomic(PS) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: store [[APS]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+ __c11_atomic_init(fp, (PS){1,2,3});
+
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+ _Atomic(PS) x = (PS){1,2,3};
+
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+ PS f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[TMP1]] to i64*
+// CHECK-NEXT: [[T5:%.*]] = load i64* [[T4]], align 8
+// CHECK-NEXT: [[T6:%.*]] = bitcast [[APS]]* [[T0]] to i64*
+// CHECK-NEXT: store atomic i64 [[T5]], i64* [[T6]] seq_cst, align 8
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+void testPromotedStructOps(_Atomic(PS) *p) {
+ PS a = __c11_atomic_load(p, 5);
+ __c11_atomic_store(p, a, 5);
+ PS b = __c11_atomic_exchange(p, a, 5);
+
+ _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
+ v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+}
s &= 42;
}
+// CHECK: define arm_aapcscc void @testFloat(float*
+void testFloat(_Atomic(float) *fp) {
+// CHECK: [[FP:%.*]] = alloca float*
+// CHECK-NEXT: [[X:%.*]] = alloca float
+// CHECK-NEXT: [[F:%.*]] = alloca float
+// CHECK-NEXT: [[TMP0:%.*]] = alloca float
+// CHECK-NEXT: [[TMP1:%.*]] = alloca float
+// CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
+
+// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
+ __c11_atomic_init(fp, 1.0f);
+
+// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
+ _Atomic(float) x = 2.0f;
+
+// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
+// CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
+// CHECK-NEXT: store float [[T3]], float* [[F]]
+ float f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
+// CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
+// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
+// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
+void testComplexFloat(_Atomic(_Complex float) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
+// CHECK-NEXT: store [[CF]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
+ __c11_atomic_init(fp, 1.0f);
+
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
+// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
+ _Atomic(_Complex float) x = 2.0f;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], float* [[T0]]
+// CHECK-NEXT: store float [[I]], float* [[T1]]
+ _Complex float f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: store float [[R]], float* [[T0]]
+// CHECK-NEXT: store float [[I]], float* [[T1]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+typedef struct { short x, y, z, w; } S;
+// CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
+void testStruct(_Atomic(S) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[S]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
+// CHECK-NEXT: store [[S]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
+ __c11_atomic_init(fp, (S){1,2,3,4});
+
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
+// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
+ _Atomic(S) x = (S){1,2,3,4};
+
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
+ S f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
+// CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+typedef struct { short x, y, z; } PS;
+// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
+void testPromotedStruct(_Atomic(PS) *fp) {
+// CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4
+// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: store [[APS]]*
+
+// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+ __c11_atomic_init(fp, (PS){1,2,3});
+
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+ _Atomic(PS) x = (PS){1,2,3};
+
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+ PS f = *fp;
+
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
+// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
+// CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
+ *fp = f;
+
+// CHECK-NEXT: ret void
+}
+
+// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
+
+// FIXME: none of these look right, but we can leave the "test" here
+// to make sure they at least don't crash.
+void testPromotedStructOps(_Atomic(PS) *p) {
+ PS a = __c11_atomic_load(p, 5);
+ __c11_atomic_store(p, a, 5);
+ PS b = __c11_atomic_exchange(p, a, 5);
+ _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
+ v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+}