LVal = lvalue;
} else if (lvalue.isBitField()) {
+ ValueTy = lvalue.getType();
+ ValueSizeInBits = C.getTypeSize(ValueTy);
auto &OrigBFI = lvalue.getBitFieldInfo();
auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
AtomicSizeInBits = C.toBits(
BFI.StorageSize = AtomicSizeInBits;
LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
lvalue.getAlignment());
+ LVal.setTBAAInfo(lvalue.getTBAAInfo());
+ AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
+ if (AtomicTy.isNull()) {
+ llvm::APInt Size(
+ /*numBits=*/32,
+ C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
+ AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ }
+ AtomicAlign = ValueAlign = lvalue.getAlignment();
} else if (lvalue.isVectorElt()) {
- AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+ ValueTy = lvalue.getType()->getAs<VectorType>()->getElementType();
+ ValueSizeInBits = C.getTypeSize(ValueTy);
+ AtomicTy = lvalue.getType();
+ AtomicSizeInBits = C.getTypeSize(AtomicTy);
+ AtomicAlign = ValueAlign = lvalue.getAlignment();
LVal = lvalue;
} else {
assert(lvalue.isExtVectorElt());
- AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+ ValueTy = lvalue.getType();
+ ValueSizeInBits = C.getTypeSize(ValueTy);
+ AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
+ lvalue.getType(), lvalue.getExtVectorAddr()
+ ->getType()
+ ->getPointerElementType()
+ ->getVectorNumElements());
+ AtomicSizeInBits = C.getTypeSize(AtomicTy);
+ AtomicAlign = ValueAlign = lvalue.getAlignment();
LVal = lvalue;
}
UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
bool shouldUseLibcall() const { return UseLibcall; }
const LValue &getAtomicLValue() const { return LVal; }
+ llvm::Value *getAtomicAddress() const {
+ if (LVal.isSimple())
+ return LVal.getAddress();
+ else if (LVal.isBitField())
+ return LVal.getBitFieldAddr();
+ else if (LVal.isVectorElt())
+ return LVal.getVectorAddr();
+ assert(LVal.isExtVectorElt());
+ return LVal.getExtVectorAddr();
+ }
/// Is the atomic size larger than the underlying value type?
///
llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
/// Turn an atomic-layout object into an r-value.
- RValue convertTempToRValue(llvm::Value *addr,
- AggValueSlot resultSlot,
- SourceLocation loc) const;
+ RValue convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot,
+ SourceLocation loc, bool AsValue) const;
/// \brief Converts a rvalue to integer value.
llvm::Value *convertRValueToInt(RValue RVal) const;
- RValue convertIntToValue(llvm::Value *IntVal, AggValueSlot ResultSlot,
- SourceLocation Loc) const;
+ RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
+ AggValueSlot ResultSlot,
+ SourceLocation Loc, bool AsValue) const;
/// Copy an atomic r-value into atomic-layout memory.
void emitCopyIntoMemory(RValue rvalue) const;
/// Project an l-value down to the value field.
LValue projectValue() const {
assert(LVal.isSimple());
- llvm::Value *addr = LVal.getAddress();
+ llvm::Value *addr = getAtomicAddress();
if (hasPadding())
addr = CGF.Builder.CreateStructGEP(addr, 0);
CGF.getContext(), LVal.getTBAAInfo());
}
+ /// \brief Emits atomic load.
+ /// \returns Loaded value.
+ RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
+ bool AsValue, llvm::AtomicOrdering AO,
+ bool IsVolatile);
+
+ /// \brief Emits atomic compare-and-exchange sequence.
+ /// \param Expected Expected value.
+ /// \param Desired Desired value.
+ /// \param Success Atomic ordering for success operation.
+ /// \param Failure Atomic ordering for failed operation.
+ /// \param IsWeak true if atomic operation is weak, false otherwise.
+ /// \returns Pair of values: previous value from storage (value type) and
+ /// boolean flag (i1 type) with true if success and false otherwise.
+ std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchange(
+ RValue Expected, RValue Desired,
+ llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+ llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+ bool IsWeak = false);
+
/// Materialize an atomic r-value in atomic-layout memory.
llvm::Value *materializeRValue(RValue rvalue) const;
+ /// \brief Translates LLVM atomic ordering to GNU atomic ordering for
+ /// libcalls.
+ static AtomicExpr::AtomicOrderingKind
+ translateAtomicOrdering(const llvm::AtomicOrdering AO);
+
private:
bool requiresMemSetZero(llvm::Type *type) const;
+
+ /// \brief Creates temp alloca for intermediate operations on atomic value.
+ llvm::Value *CreateTempAlloca() const;
+
+ /// \brief Emits atomic load as a libcall.
+ void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
+ llvm::AtomicOrdering AO, bool IsVolatile);
+ /// \brief Emits atomic load as LLVM instruction.
+ llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
+ /// \brief Emits atomic compare-and-exchange op as a libcall.
+ std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall(
+ llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
+ llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+ llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
+ /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
+ std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
+ llvm::Value *Expected, llvm::Value *Desired,
+ llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+ llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+ bool IsWeak = false);
};
}
+AtomicExpr::AtomicOrderingKind
+AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) {
+ switch (AO) {
+ case llvm::Unordered:
+ case llvm::NotAtomic:
+ case llvm::Monotonic:
+ return AtomicExpr::AO_ABI_memory_order_relaxed;
+ case llvm::Acquire:
+ return AtomicExpr::AO_ABI_memory_order_acquire;
+ case llvm::Release:
+ return AtomicExpr::AO_ABI_memory_order_release;
+ case llvm::AcquireRelease:
+ return AtomicExpr::AO_ABI_memory_order_acq_rel;
+ case llvm::SequentiallyConsistent:
+ return AtomicExpr::AO_ABI_memory_order_seq_cst;
+ }
+}
+
+llvm::Value *AtomicInfo::CreateTempAlloca() const {
+ auto *TempAlloca = CGF.CreateMemTemp(
+ (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
+ : AtomicTy,
+ "atomic-temp");
+ TempAlloca->setAlignment(getAtomicAlignment().getQuantity());
+ // Cast to pointer to value type for bitfields.
+ if (LVal.isBitField())
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TempAlloca, getAtomicAddress()->getType());
+ return TempAlloca;
+}
+
static RValue emitAtomicLibcall(CodeGenFunction &CGF,
StringRef fnName,
QualType resultType,
if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
return false;
- CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
- AtomicSizeInBits / 8,
- LVal.getAlignment().getQuantity());
+ CGF.Builder.CreateMemSet(
+ addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+ CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
+ LVal.getAlignment().getQuantity());
return true;
}
RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
AggValueSlot resultSlot,
- SourceLocation loc) const {
+ SourceLocation loc, bool AsValue) const {
if (LVal.isSimple()) {
if (EvaluationKind == TEK_Aggregate)
return resultSlot.asRValue();
// Otherwise, just convert the temporary to an r-value using the
// normal conversion routine.
return CGF.convertTempToRValue(addr, getValueType(), loc);
- } else if (LVal.isBitField())
+ } else if (!AsValue)
+ // Get RValue from temp memory as atomic for non-simple lvalues
+ return RValue::get(
+ CGF.Builder.CreateAlignedLoad(addr, AtomicAlign.getQuantity()));
+ else if (LVal.isBitField())
return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
else if (LVal.isVectorElt())
addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
}
-RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
- AggValueSlot ResultSlot,
- SourceLocation Loc) const {
- assert(LVal.isSimple());
+RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
+ AggValueSlot ResultSlot,
+ SourceLocation Loc,
+ bool AsValue) const {
// Try not to in some easy cases.
assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
- if (getEvaluationKind() == TEK_Scalar && !hasPadding()) {
- auto *ValTy = CGF.ConvertTypeForMem(ValueTy);
+ if (getEvaluationKind() == TEK_Scalar &&
+ (((!LVal.isBitField() ||
+ LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
+ !hasPadding()) ||
+ !AsValue)) {
+ auto *ValTy = AsValue
+ ? CGF.ConvertTypeForMem(ValueTy)
+ : getAtomicAddress()->getType()->getPointerElementType();
if (ValTy->isIntegerTy()) {
assert(IntVal->getType() == ValTy && "Different integer types.");
return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
llvm::Value *Temp;
bool TempIsVolatile = false;
CharUnits TempAlignment;
- if (getEvaluationKind() == TEK_Aggregate) {
+ if (AsValue && getEvaluationKind() == TEK_Aggregate) {
assert(!ResultSlot.isIgnored());
Temp = ResultSlot.getAddr();
TempAlignment = getValueAlignment();
TempIsVolatile = ResultSlot.isVolatile();
} else {
- Temp = CGF.CreateMemTemp(getAtomicType(), "atomic-temp");
+ Temp = CreateTempAlloca();
TempAlignment = getAtomicAlignment();
}
CGF.Builder.CreateAlignedStore(IntVal, CastTemp, TempAlignment.getQuantity())
->setVolatile(TempIsVolatile);
- return convertTempToRValue(Temp, ResultSlot, Loc);
+ return convertTempToRValue(Temp, ResultSlot, Loc, AsValue);
+}
+
+void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
+ llvm::AtomicOrdering AO, bool) {
+ // void __atomic_load(size_t size, void *mem, void *return, int order);
+ CallArgList Args;
+ Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+ CGF.getContext().VoidPtrTy);
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
+ CGF.getContext().VoidPtrTy);
+ Args.add(RValue::get(
+ llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))),
+ CGF.getContext().IntTy);
+ emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
+}
+
+llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
+ bool IsVolatile) {
+ // Okay, we're doing this natively.
+ llvm::Value *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
+ llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
+ Load->setAtomic(AO);
+
+ // Other decoration.
+ Load->setAlignment(getAtomicAlignment().getQuantity());
+ if (IsVolatile)
+ Load->setVolatile(true);
+ if (LVal.getTBAAInfo())
+ CGF.CGM.DecorateInstruction(Load, LVal.getTBAAInfo());
+ return Load;
}
/// An LValue is a candidate for having its loads and stores be made atomic if
return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
}
-/// Emit a load from an l-value of atomic type. Note that the r-value
-/// we produce is an r-value of the atomic *value* type.
-RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
- llvm::AtomicOrdering AO, bool IsVolatile,
- AggValueSlot resultSlot) {
- AtomicInfo atomics(*this, src);
- LValue LVal = atomics.getAtomicLValue();
- llvm::Value *SrcAddr = nullptr;
- llvm::AllocaInst *NonSimpleTempAlloca = nullptr;
- if (LVal.isSimple())
- SrcAddr = LVal.getAddress();
- else {
- if (LVal.isBitField())
- SrcAddr = LVal.getBitFieldAddr();
- else if (LVal.isVectorElt())
- SrcAddr = LVal.getVectorAddr();
- else {
- assert(LVal.isExtVectorElt());
- SrcAddr = LVal.getExtVectorAddr();
- }
- NonSimpleTempAlloca = CreateTempAlloca(
- SrcAddr->getType()->getPointerElementType(), "atomic-load-temp");
- NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
- }
-
+RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
+ bool AsValue, llvm::AtomicOrdering AO,
+ bool IsVolatile) {
// Check whether we should use a library call.
- if (atomics.shouldUseLibcall()) {
- llvm::Value *tempAddr;
- if (LVal.isSimple()) {
- if (!resultSlot.isIgnored()) {
- assert(atomics.getEvaluationKind() == TEK_Aggregate);
- tempAddr = resultSlot.getAddr();
- } else
- tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+ if (shouldUseLibcall()) {
+ llvm::Value *TempAddr;
+ if (LVal.isSimple() && !ResultSlot.isIgnored()) {
+ assert(getEvaluationKind() == TEK_Aggregate);
+ TempAddr = ResultSlot.getAddr();
} else
- tempAddr = NonSimpleTempAlloca;
-
- // void __atomic_load(size_t size, void *mem, void *return, int order);
- CallArgList args;
- args.add(RValue::get(atomics.getAtomicSizeValue()),
- getContext().getSizeType());
- args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy);
- args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy);
- args.add(RValue::get(llvm::ConstantInt::get(
- IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
- getContext().IntTy);
- emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
+ TempAddr = CreateTempAlloca();
- // Produce the r-value.
- return atomics.convertTempToRValue(tempAddr, resultSlot, loc);
+ EmitAtomicLoadLibcall(TempAddr, AO, IsVolatile);
+
+ // Okay, turn that back into the original value or whole atomic (for
+ // non-simple lvalues) type.
+ return convertTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
}
// Okay, we're doing this natively.
- llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr);
- llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
- load->setAtomic(AO);
-
- // Other decoration.
- load->setAlignment(src.getAlignment().getQuantity());
- if (IsVolatile)
- load->setVolatile(true);
- if (src.getTBAAInfo())
- CGM.DecorateInstruction(load, src.getTBAAInfo());
+ auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
// If we're ignoring an aggregate return, don't do anything.
- if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
+ if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
return RValue::getAggregate(nullptr, false);
- // Okay, turn that back into the original value type.
- if (src.isSimple())
- return atomics.convertIntToValue(load, resultSlot, loc);
-
- auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca);
- Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity());
- return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc);
+ // Okay, turn that back into the original value or atomic (for non-simple
+ // lvalues) type.
+ return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
}
-
+/// Emit a load from an l-value of atomic type. Note that the r-value
+/// we produce is an r-value of the atomic *value* type.
+RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
+ llvm::AtomicOrdering AO, bool IsVolatile,
+ AggValueSlot resultSlot) {
+ AtomicInfo Atomics(*this, src);
+ return Atomics.EmitAtomicLoad(resultSlot, loc, /*AsValue=*/true, AO,
+ IsVolatile);
+}
/// Copy an r-value into memory as part of storing to an atomic type.
/// This needs to create a bit-pattern suitable for atomic operations.
// which means that the caller is responsible for having zeroed
// any padding. Just do an aggregate copy of that type.
if (rvalue.isAggregate()) {
- CGF.EmitAggregateCopy(LVal.getAddress(),
+ CGF.EmitAggregateCopy(getAtomicAddress(),
rvalue.getAggregateAddr(),
getAtomicType(),
(rvalue.isVolatileQualified()
return rvalue.getAggregateAddr();
// Otherwise, make a temporary and materialize into it.
- llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
- LValue tempLV =
- CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
- AtomicInfo Atomics(CGF, tempLV);
+ LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType(),
+ getAtomicAlignment());
+ AtomicInfo Atomics(CGF, TempLV);
Atomics.emitCopyIntoMemory(rvalue);
- return temp;
+ return TempLV.getAddress();
}
llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
// If we've got a scalar value of the right size, try to avoid going
// through memory.
- if (RVal.isScalar() && !hasPadding()) {
+ if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
llvm::Value *Value = RVal.getScalarVal();
if (isa<llvm::IntegerType>(Value->getType()))
return Value;
else {
- llvm::IntegerType *InputIntTy =
- llvm::IntegerType::get(CGF.getLLVMContext(), getValueSizeInBits());
+ llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
+ CGF.getLLVMContext(),
+ LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
if (isa<llvm::PointerType>(Value->getType()))
return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
getAtomicAlignment().getQuantity());
}
+std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
+ llvm::Value *Expected, llvm::Value *Desired, llvm::AtomicOrdering Success,
+ llvm::AtomicOrdering Failure, bool IsWeak) {
+ // Do the atomic store.
+ auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
+ auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, Expected, Desired, Success,
+ Failure);
+ // Other decoration.
+ Inst->setVolatile(LVal.isVolatileQualified());
+ Inst->setWeak(IsWeak);
+
+ // Okay, turn that back into the original value type.
+ auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
+ auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
+ return std::make_pair(PreviousVal, SuccessFailureVal);
+}
+
+std::pair<llvm::Value *, llvm::Value *>
+AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
+ llvm::Value *DesiredAddr,
+ llvm::AtomicOrdering Success,
+ llvm::AtomicOrdering Failure) {
+ // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+ // void *desired, int success, int failure);
+ CallArgList Args;
+ Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+ CGF.getContext().VoidPtrTy);
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)),
+ CGF.getContext().VoidPtrTy);
+ Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)),
+ CGF.getContext().VoidPtrTy);
+ Args.add(RValue::get(llvm::ConstantInt::get(
+ CGF.IntTy, translateAtomicOrdering(Success))),
+ CGF.getContext().IntTy);
+ Args.add(RValue::get(llvm::ConstantInt::get(
+ CGF.IntTy, translateAtomicOrdering(Failure))),
+ CGF.getContext().IntTy);
+ auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
+ CGF.getContext().BoolTy, Args);
+ auto *PreviousVal = CGF.Builder.CreateAlignedLoad(
+ ExpectedAddr, getValueAlignment().getQuantity());
+ return std::make_pair(PreviousVal, SuccessFailureRVal.getScalarVal());
+}
+
+std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
+ RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
+ llvm::AtomicOrdering Failure, bool IsWeak) {
+ if (Failure >= Success)
+ // Don't assert on undefined behavior.
+ Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
+
+ // Check whether we should use a library call.
+ if (shouldUseLibcall()) {
+ auto *ExpectedAddr = materializeRValue(Expected);
+ // Produce a source address.
+ auto *DesiredAddr = materializeRValue(Desired);
+ return EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, Success,
+ Failure);
+ }
+
+ // If we've got a scalar value of the right size, try to avoid going
+ // through memory.
+ auto *ExpectedIntVal = convertRValueToInt(Expected);
+ auto *DesiredIntVal = convertRValueToInt(Desired);
+
+ return EmitAtomicCompareExchangeOp(ExpectedIntVal, DesiredIntVal, Success,
+ Failure, IsWeak);
+}
+
void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
bool isInit) {
bool IsVolatile = lvalue.isVolatileQualified();
== dest.getAddress()->getType()->getPointerElementType());
AtomicInfo atomics(*this, dest);
+ LValue LVal = atomics.getAtomicLValue();
// If this is an initialization, just put the value there normally.
- if (isInit) {
- atomics.emitCopyIntoMemory(rvalue);
- return;
- }
+ if (LVal.isSimple()) {
+ if (isInit) {
+ atomics.emitCopyIntoMemory(rvalue);
+ return;
+ }
- // Check whether we should use a library call.
- if (atomics.shouldUseLibcall()) {
- // Produce a source address.
- llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+ // Check whether we should use a library call.
+ if (atomics.shouldUseLibcall()) {
+ // Produce a source address.
+ llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
- // void __atomic_store(size_t size, void *mem, void *val, int order)
- CallArgList args;
- args.add(RValue::get(atomics.getAtomicSizeValue()),
- getContext().getSizeType());
- args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
- getContext().VoidPtrTy);
- args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
- getContext().VoidPtrTy);
- args.add(RValue::get(llvm::ConstantInt::get(
- IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
- getContext().IntTy);
- emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+ // void __atomic_store(size_t size, void *mem, void *val, int order)
+ CallArgList args;
+ args.add(RValue::get(atomics.getAtomicSizeValue()),
+ getContext().getSizeType());
+ args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicAddress())),
+ getContext().VoidPtrTy);
+ args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), getContext().VoidPtrTy);
+ args.add(RValue::get(llvm::ConstantInt::get(
+ IntTy, AtomicInfo::translateAtomicOrdering(AO))),
+ getContext().IntTy);
+ emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+ return;
+ }
+
+ // Okay, we're doing this natively.
+ llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+
+ // Do the atomic store.
+ llvm::Value *addr =
+ atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress());
+ intValue = Builder.CreateIntCast(
+ intValue, addr->getType()->getPointerElementType(), /*isSigned=*/false);
+ llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+
+ // Initializations don't need to be atomic.
+ if (!isInit)
+ store->setAtomic(AO);
+
+ // Other decoration.
+ store->setAlignment(dest.getAlignment().getQuantity());
+ if (IsVolatile)
+ store->setVolatile(true);
+ if (dest.getTBAAInfo())
+ CGM.DecorateInstruction(store, dest.getTBAAInfo());
return;
}
- // Okay, we're doing this natively.
- llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
-
- // Do the atomic store.
- llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
- llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
-
- // Initializations don't need to be atomic.
- if (!isInit) store->setAtomic(AO);
-
- // Other decoration.
- store->setAlignment(dest.getAlignment().getQuantity());
- if (IsVolatile)
- store->setVolatile(true);
- if (dest.getTBAAInfo())
- CGM.DecorateInstruction(store, dest.getTBAAInfo());
+ // Atomic load of prev value.
+ RValue OldRVal =
+ atomics.EmitAtomicLoad(AggValueSlot::ignored(), SourceLocation(),
+ /*AsValue=*/false, AO, IsVolatile);
+ // For non-simple lvalues perform compare-and-swap procedure.
+ auto *ContBB = createBasicBlock("atomic_cont");
+ auto *ExitBB = createBasicBlock("atomic_exit");
+ auto *CurBB = Builder.GetInsertBlock();
+ EmitBlock(ContBB);
+ llvm::PHINode *PHI = Builder.CreatePHI(OldRVal.getScalarVal()->getType(),
+ /*NumReservedValues=*/2);
+ PHI->addIncoming(OldRVal.getScalarVal(), CurBB);
+ RValue OriginalRValue = RValue::get(PHI);
+ // Build new lvalue for temp address
+ auto *Ptr = atomics.materializeRValue(OriginalRValue);
+ // Build new lvalue for temp address
+ LValue UpdateLVal;
+ if (LVal.isBitField())
+ UpdateLVal = LValue::MakeBitfield(Ptr, LVal.getBitFieldInfo(),
+ LVal.getType(), LVal.getAlignment());
+ else if (LVal.isVectorElt())
+ UpdateLVal = LValue::MakeVectorElt(Ptr, LVal.getVectorIdx(), LVal.getType(),
+ LVal.getAlignment());
+ else {
+ assert(LVal.isExtVectorElt());
+ UpdateLVal = LValue::MakeExtVectorElt(Ptr, LVal.getExtVectorElts(),
+ LVal.getType(), LVal.getAlignment());
+ }
+ UpdateLVal.setTBAAInfo(LVal.getTBAAInfo());
+ // Store new value in the corresponding memory area
+ EmitStoreThroughLValue(rvalue, UpdateLVal);
+ // Load new value
+ RValue NewRValue = RValue::get(EmitLoadOfScalar(
+ Ptr, LVal.isVolatile(), atomics.getAtomicAlignment().getQuantity(),
+ atomics.getAtomicType(), SourceLocation()));
+ // Try to write new value using cmpxchg operation
+ auto Pair = atomics.EmitAtomicCompareExchange(OriginalRValue, NewRValue, AO);
+ llvm::Value *OldValue = Pair.first;
+ if (!atomics.shouldUseLibcall())
+ // Convert integer value to original atomic type
+ OldValue = atomics.ConvertIntToValueOrAtomic(
+ OldValue, AggValueSlot::ignored(), SourceLocation(),
+ /*AsValue=*/false).getScalarVal();
+ PHI->addIncoming(OldValue, ContBB);
+ Builder.CreateCondBr(Pair.second, ContBB, ExitBB);
+ EmitBlock(ExitBB);
}
/// Emit a compare-and-exchange op for atomic type.
Obj.getAddress()->getType()->getPointerElementType());
AtomicInfo Atomics(*this, Obj);
- if (Failure >= Success)
- // Don't assert on undefined behavior.
- Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
-
- auto Alignment = Atomics.getValueAlignment();
- // Check whether we should use a library call.
- if (Atomics.shouldUseLibcall()) {
- auto *ExpectedAddr = Atomics.materializeRValue(Expected);
- // Produce a source address.
- auto *DesiredAddr = Atomics.materializeRValue(Desired);
- // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
- // void *desired, int success, int failure);
- CallArgList Args;
- Args.add(RValue::get(Atomics.getAtomicSizeValue()),
- getContext().getSizeType());
- Args.add(RValue::get(EmitCastToVoidPtr(Obj.getAddress())),
- getContext().VoidPtrTy);
- Args.add(RValue::get(EmitCastToVoidPtr(ExpectedAddr)),
- getContext().VoidPtrTy);
- Args.add(RValue::get(EmitCastToVoidPtr(DesiredAddr)),
- getContext().VoidPtrTy);
- Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)),
- getContext().IntTy);
- Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)),
- getContext().IntTy);
- auto SuccessFailureRVal = emitAtomicLibcall(
- *this, "__atomic_compare_exchange", getContext().BoolTy, Args);
- auto *PreviousVal =
- Builder.CreateAlignedLoad(ExpectedAddr, Alignment.getQuantity());
- return std::make_pair(RValue::get(PreviousVal), SuccessFailureRVal);
- }
-
- // If we've got a scalar value of the right size, try to avoid going
- // through memory.
- auto *ExpectedIntVal = Atomics.convertRValueToInt(Expected);
- auto *DesiredIntVal = Atomics.convertRValueToInt(Desired);
-
- // Do the atomic store.
- auto *Addr = Atomics.emitCastToAtomicIntPointer(Obj.getAddress());
- auto *Inst = Builder.CreateAtomicCmpXchg(Addr, ExpectedIntVal, DesiredIntVal,
- Success, Failure);
- // Other decoration.
- Inst->setVolatile(Obj.isVolatileQualified());
- Inst->setWeak(IsWeak);
-
- // Okay, turn that back into the original value type.
- auto *PreviousVal = Builder.CreateExtractValue(Inst, /*Idxs=*/0);
- auto *SuccessFailureVal = Builder.CreateExtractValue(Inst, /*Idxs=*/1);
- return std::make_pair(Atomics.convertIntToValue(PreviousVal, Slot, Loc),
- RValue::get(SuccessFailureVal));
+ auto Pair = Atomics.EmitAtomicCompareExchange(Expected, Desired, Success,
+ Failure, IsWeak);
+ return std::make_pair(Atomics.shouldUseLibcall()
+ ? RValue::get(Pair.first)
+ : Atomics.ConvertIntToValueOrAtomic(
+ Pair.first, Slot, Loc, /*AsValue=*/true),
+ RValue::get(Pair.second));
}
void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
? CGF.EmitLoadOfLValue(XLValue, Loc)
: CGF.EmitAtomicLoad(XLValue, Loc,
IsSeqCst ? llvm::SequentiallyConsistent
- : llvm::Monotonic);
+ : llvm::Monotonic,
+ XLValue.isVolatile());
// OpenMP, 2.12.6, atomic Construct
// Any atomic construct with a seq_cst clause forces the atomically
// performed operation to include an implicit flush operation without a
}
}
+static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
+ const Expr *X, const Expr *E,
+ SourceLocation Loc) {
+ // x = expr;
+ assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
+ LValue XLValue = CGF.EmitLValue(X);
+ RValue ExprRValue = CGF.EmitAnyExpr(E);
+ if (XLValue.isGlobalReg())
+ CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
+ else
+ CGF.EmitAtomicStore(ExprRValue, XLValue,
+ IsSeqCst ? llvm::SequentiallyConsistent
+ : llvm::Monotonic,
+ XLValue.isVolatile(), /*IsInit=*/false);
+ // OpenMP, 2.12.6, atomic Construct
+ // Any atomic construct with a seq_cst clause forces the atomically
+ // performed operation to include an implicit flush operation without a
+ // list.
+ if (IsSeqCst)
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+}
+
static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
bool IsSeqCst, const Expr *X, const Expr *V,
- const Expr *, SourceLocation Loc) {
+ const Expr *E, SourceLocation Loc) {
switch (Kind) {
case OMPC_read:
EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
break;
case OMPC_write:
+ EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
+ break;
case OMPC_update:
case OMPC_capture:
llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
auto AtomicBinOp =
dyn_cast<BinaryOperator>(AtomicBody->IgnoreParenImpCasts());
if (AtomicBinOp && AtomicBinOp->getOpcode() == BO_Assign) {
- X = AtomicBinOp->getLHS()->IgnoreParenImpCasts();
- E = AtomicBinOp->getRHS()->IgnoreParenImpCasts();
+ X = AtomicBinOp->getLHS();
+ E = AtomicBinOp->getRHS();
if ((X->isInstantiationDependent() || X->getType()->isScalarType()) &&
(E->isInstantiationDependent() || E->getType()->isScalarType())) {
if (!X->isLValue()) {
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
// CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
// CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
// CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
+ // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
// CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
// CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
// CHECK32: [[ATOMIC_CONT]]
#pragma omp atomic read
ldv = bfx.a;
// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8*
-// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @bfx_packed to i8*), i64 4), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @bfx_packed to i8*), i64 4), i8* [[LDTEMP_VOID_PTR]], i32 0)
// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
// CHECK: ashr i32 [[SHL]], 1
#pragma omp atomic read
ldv = bfx3.a;
// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i24* [[LDTEMP:%.+]] to i8*
-// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @bfx3_packed to i8*), i64 1), i8* [[LDTEMP_VOID_PTR]], i32 5)
+// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @bfx3_packed to i8*), i64 1), i8* [[LDTEMP_VOID_PTR]], i32 0)
// CHECK: [[LD:%.+]] = load i24* [[LDTEMP]]
// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7
// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10
// CHECK: store x86_fp80
#pragma omp atomic read
ldv = bfx4_packed.b;
-// CHECK: [[LD:%.+]] = load atomic i32* bitcast (<2 x float>* @{{.+}} to i32*) monotonic
-// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i32*
-// CHECK: store i32 [[LD]], i32* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load atomic i64* bitcast (<2 x float>* @{{.+}} to i64*) monotonic
+// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i64*
+// CHECK: store i64 [[LD]], i64* [[BITCAST]]
// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]]
// CHECK: extractelement <2 x float> [[LD]]
// CHECK: store i64
--- /dev/null
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+_Bool bv, bx;
+char cv, cx;
+unsigned char ucv, ucx;
+short sv, sx;
+unsigned short usv, usx;
+int iv, ix;
+unsigned int uiv, uix;
+long lv, lx;
+unsigned long ulv, ulx;
+long long llv, llx;
+unsigned long long ullv, ullx;
+float fv, fx;
+double dv, dx;
+long double ldv, ldx;
+_Complex int civ, cix;
+_Complex float cfv, cfx;
+_Complex double cdv, cdx;
+
+typedef int int4 __attribute__((__vector_size__(16)));
+int4 int4x;
+
+struct BitFields {
+ int : 32;
+ int a : 31;
+} bfx;
+
+struct BitFields_packed {
+ int : 32;
+ int a : 31;
+} __attribute__ ((__packed__)) bfx_packed;
+
+struct BitFields2 {
+ int : 31;
+ int a : 1;
+} bfx2;
+
+struct BitFields2_packed {
+ int : 31;
+ int a : 1;
+} __attribute__ ((__packed__)) bfx2_packed;
+
+struct BitFields3 {
+ int : 11;
+ int a : 14;
+} bfx3;
+
+struct BitFields3_packed {
+ int : 11;
+ int a : 14;
+} __attribute__ ((__packed__)) bfx3_packed;
+
+struct BitFields4 {
+ short : 16;
+ int a: 1;
+ long b : 7;
+} bfx4;
+
+struct BitFields4_packed {
+ short : 16;
+ int a: 1;
+ long b : 7;
+} __attribute__ ((__packed__)) bfx4_packed;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+float2 float2x;
+
+register int rix __asm__("0");
+
+int main() {
+// CHECK: load i8*
+// CHECK: store atomic i8
+#pragma omp atomic write
+ bx = bv;
+// CHECK: load i8*
+// CHECK: store atomic i8
+#pragma omp atomic write
+ cx = cv;
+// CHECK: load i8*
+// CHECK: store atomic i8
+#pragma omp atomic write
+ ucx = ucv;
+// CHECK: load i16*
+// CHECK: store atomic i16
+#pragma omp atomic write
+ sx = sv;
+// CHECK: load i16*
+// CHECK: store atomic i16
+#pragma omp atomic write
+ usx = usv;
+// CHECK: load i32*
+// CHECK: store atomic i32
+#pragma omp atomic write
+ ix = iv;
+// CHECK: load i32*
+// CHECK: store atomic i32
+#pragma omp atomic write
+ uix = uiv;
+// CHECK: load i64*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ lx = lv;
+// CHECK: load i64*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ ulx = ulv;
+// CHECK: load i64*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ llx = llv;
+// CHECK: load i64*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ ullx = ullv;
+// CHECK: load float*
+// CHECK: bitcast float {{.*}} to i32
+// CHECK: store atomic i32 {{.*}}, i32* bitcast (float*
+#pragma omp atomic write
+ fx = fv;
+// CHECK: load double*
+// CHECK: bitcast double {{.*}} to i64
+// CHECK: store atomic i64 {{.*}}, i64* bitcast (double*
+#pragma omp atomic write
+ dx = dv;
+// CHECK: [[LD:%.+]] = load x86_fp80*
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false)
+// CHECK: store x86_fp80 [[LD]], x86_fp80* [[LDTEMP]]
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
+// CHECK: [[LD:%.+]] = load i128* [[BITCAST]]
+// CHECK: store atomic i128 [[LD]], i128* bitcast (x86_fp80*
+#pragma omp atomic write
+ ldx = ldv;
+// CHECK: [[REAL_VAL:%.+]] = load i32* getelementptr inbounds ({ i32, i32 }* @{{.*}}, i32 0, i32 0)
+// CHECK: [[IMG_VAL:%.+]] = load i32* getelementptr inbounds ({ i32, i32 }* @{{.*}}, i32 0, i32 1)
+// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0
+// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1
+// CHECK: store i32 [[REAL_VAL]], i32* [[TEMP_REAL_REF]]
+// CHECK: store i32 [[IMG_VAL]], i32* [[TEMP_IMG_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8*
+// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.*}} to i8*), i8* [[BITCAST]], i32 0)
+#pragma omp atomic write
+ cix = civ;
+// CHECK: [[REAL_VAL:%.+]] = load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 0)
+// CHECK: [[IMG_VAL:%.+]] = load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 1)
+// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { float, float }* [[TEMP:%.+]], i32 0, i32 0
+// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { float, float }* [[TEMP]], i32 0, i32 1
+// CHECK: store float [[REAL_VAL]], float* [[TEMP_REAL_REF]]
+// CHECK: store float [[IMG_VAL]], float* [[TEMP_IMG_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast { float, float }* [[TEMP]] to i8*
+// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ float, float }* @{{.*}} to i8*), i8* [[BITCAST]], i32 0)
+#pragma omp atomic write
+ cfx = cfv;
+// CHECK: [[REAL_VAL:%.+]] = load double* getelementptr inbounds ({ double, double }* @{{.*}}, i32 0, i32 0)
+// CHECK: [[IMG_VAL:%.+]] = load double* getelementptr inbounds ({ double, double }* @{{.*}}, i32 0, i32 1)
+// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { double, double }* [[TEMP:%.+]], i32 0, i32 0
+// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { double, double }* [[TEMP]], i32 0, i32 1
+// CHECK: store double [[REAL_VAL]], double* [[TEMP_REAL_REF]]
+// CHECK: store double [[IMG_VAL]], double* [[TEMP_IMG_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast { double, double }* [[TEMP]] to i8*
+// CHECK: call void @__atomic_store(i64 16, i8* bitcast ({ double, double }* @{{.*}} to i8*), i8* [[BITCAST]], i32 5)
+// CHECK: call{{.*}} @__kmpc_flush(
+#pragma omp atomic seq_cst write
+ cdx = cdv;
+// CHECK: load i8*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ ulx = bv;
+// CHECK: load i8*
+// CHECK: store atomic i8
+#pragma omp atomic write
+ bx = cv;
+// CHECK: load i8*
+// CHECK: store atomic i8
+// CHECK: call{{.*}} @__kmpc_flush(
+#pragma omp atomic write, seq_cst
+ cx = ucv;
+// CHECK: load i16*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ ulx = sv;
+// CHECK: load i16*
+// CHECK: store atomic i64
+#pragma omp atomic write
+ lx = usv;
+// CHECK: load i32*
+// CHECK: store atomic i32
+// CHECK: call{{.*}} @__kmpc_flush(
+#pragma omp atomic seq_cst, write
+ uix = iv;
+// CHECK: load i32*
+// CHECK: store atomic i32
+#pragma omp atomic write
+ ix = uiv;
+// CHECK: load i64*
+// CHECK: [[VAL:%.+]] = trunc i64 %{{.*}} to i32
+// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0
+// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1
+// CHECK: store i32 [[VAL]], i32* [[TEMP_REAL_REF]]
+// CHECK: store i32 0, i32* [[TEMP_IMG_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8*
+// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.+}} to i8*), i8* [[BITCAST]], i32 0)
+#pragma omp atomic write
+ cix = lv;
+// CHECK: load i64*
+// CHECK: store atomic i32 %{{.+}}, i32* bitcast (float*
+#pragma omp atomic write
+ fx = ulv;
+// CHECK: load i64*
+// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double*
+#pragma omp atomic write
+ dx = llv;
+// CHECK: load i64*
+// CHECK: [[VAL:%.+]] = uitofp i64 %{{.+}} to x86_fp80
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP:%.+]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false)
+// CHECK: store x86_fp80 [[VAL]], x86_fp80* [[TEMP]]
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP]] to i128*
+// CHECK: [[VAL:%.+]] = load i128* [[BITCAST]]
+// CHECK: store atomic i128 [[VAL]], i128* bitcast (x86_fp80*
+#pragma omp atomic write
+ ldx = ullv;
+// CHECK: load float*
+// CHECK: [[VAL:%.+]] = fptosi float %{{.*}} to i32
+// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0
+// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1
+// CHECK: store i32 [[VAL]], i32* [[TEMP_REAL_REF]]
+// CHECK: store i32 0, i32* [[TEMP_IMG_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8*
+// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.+}} to i8*), i8* [[BITCAST]], i32 0)
+#pragma omp atomic write
+ cix = fv;
+// CHECK: load double*
+// CHECK: store atomic i16
+#pragma omp atomic write
+ sx = dv;
+// CHECK: load x86_fp80*
+// CHECK: store atomic i8
+#pragma omp atomic write
+ bx = ldv;
+// CHECK: load i32* getelementptr inbounds ({ i32, i32 }* @{{.+}}, i32 0, i32 0)
+// CHECK: load i32* getelementptr inbounds ({ i32, i32 }* @{{.+}}, i32 0, i32 1)
+// CHECK: icmp ne i32 %{{.+}}, 0
+// CHECK: icmp ne i32 %{{.+}}, 0
+// CHECK: or i1
+// CHECK: store atomic i8
+#pragma omp atomic write
+ bx = civ;
+// CHECK: load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 0)
+// CHECK: store atomic i16
+#pragma omp atomic write
+ usx = cfv;
+// CHECK: load double* getelementptr inbounds ({ double, double }* @{{.+}}, i32 0, i32 0)
+// CHECK: store atomic i64
+#pragma omp atomic write
+ llx = cdv;
+// CHECK: [[IDX:%.+]] = load i16* @{{.+}}
+// CHECK: load i8*
+// CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32
+// CHECK: [[I128VAL:%.+]] = load atomic i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic
+// CHECK: [[LD:%.+]] = bitcast i128 [[I128VAL]] to <4 x i32>
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_VEC_VAL:%.+]] = phi <4 x i32> [ [[LD]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store <4 x i32> [[OLD_VEC_VAL]], <4 x i32>* [[LDTEMP:%.+]],
+// CHECK: [[VEC_VAL:%.+]] = load <4 x i32>* [[LDTEMP]]
+// CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <4 x i32> [[VEC_VAL]], i32 [[VEC_ITEM_VAL]], i16 [[IDX]]
+// CHECK: store <4 x i32> [[NEW_VEC_VAL]], <4 x i32>* [[LDTEMP]]
+// CHECK: [[NEW_VEC_VAL:%.+]] = load <4 x i32>* [[LDTEMP]]
+// CHECK: [[OLD_I128:%.+]] = bitcast <4 x i32> [[OLD_VEC_VAL]] to i128
+// CHECK: [[NEW_I128:%.+]] = bitcast <4 x i32> [[NEW_VEC_VAL]] to i128
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic
+// CHECK: [[FAILED_I128_OLD_VAL:%.+]] = extractvalue { i128, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
+// CHECK: [[FAILED_OLD_VAL]] = bitcast i128 [[FAILED_I128_OLD_VAL]] to <4 x i32>
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ int4x[sv] = bv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* bitcast (i8* getelementptr (i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[BITCAST:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8*
+// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @{{.+}} to i8*), i64 4), i8* [[BITCAST]], i32 0)
+// CHECK: [[PREV_VALUE:%.+]] = load i32* [[LDTEMP]]
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]]
+// CHECK: store i32 [[OLD_BF_VALUE]], i32* [[TEMP_OLD_BF_ADDR:%.+]],
+// CHECK: store i32 [[NEW_BF_VALUE]], i32* [[TEMP_NEW_BF_ADDR:%.+]],
+// CHECK: [[BITCAST_TEMP_OLD_BF_ADDR:%.+]] = bitcast i32* [[TEMP_OLD_BF_ADDR]] to i8*
+// CHECK: [[BITCAST_TEMP_NEW_BF_ADDR:%.+]] = bitcast i32* [[TEMP_NEW_BF_ADDR]] to i8*
+// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @{{.+}} to i8*), i64 4), i8* [[BITCAST_TEMP_OLD_BF_ADDR]], i8* [[BITCAST_TEMP_NEW_BF_ADDR]], i32 0, i32 0)
+// CHECK: [[FAILED_OLD_VAL]] = load i32* [[TEMP_OLD_BF_ADDR]]
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx_packed.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx2.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8
+// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7
+// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127
+// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx2_packed.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -33552385
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx3.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[LDTEMP:%.+]] = bitcast i32* %{{.+}} to i24*
+// CHECK: [[BITCAST:%.+]] = bitcast i24* %{{.+}} to i8*
+// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @{{.+}} to i8*), i64 1), i8* [[BITCAST]], i32 0)
+// CHECK: [[PREV_VALUE:%.+]] = load i24* [[LDTEMP]]
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i24 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24
+// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383
+// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3
+// CHECK: [[BF_CLEAR:%.+]] = and i24 %{{.+}}, -131065
+// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i24 %{{.+}}, i24* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i24* [[LDTEMP]]
+// CHECK: [[TEMP_OLD_BF_ADDR:%.+]] = bitcast i32* %{{.+}} to i24*
+// CHECK: store i24 [[OLD_BF_VALUE]], i24* [[TEMP_OLD_BF_ADDR]]
+// CHECK: [[TEMP_NEW_BF_ADDR:%.+]] = bitcast i32* %{{.+}} to i24*
+// CHECK: store i24 [[NEW_BF_VALUE]], i24* [[TEMP_NEW_BF_ADDR]]
+// CHECK: [[BITCAST_TEMP_OLD_BF_ADDR:%.+]] = bitcast i24* [[TEMP_OLD_BF_ADDR]] to i8*
+// CHECK: [[BITCAST_TEMP_NEW_BF_ADDR:%.+]] = bitcast i24* [[TEMP_NEW_BF_ADDR]] to i8*
+// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @{{.+}} to i8*), i64 1), i8* [[BITCAST_TEMP_OLD_BF_ADDR]], i8* [[BITCAST_TEMP_NEW_BF_ADDR]], i32 0, i32 0)
+// CHECK: [[FAILED_OLD_VAL]] = load i24* [[TEMP_OLD_BF_ADDR]]
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx3_packed.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64
+// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16
+// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -65537
+// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx4.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8
+// CHECK: [[BF_VALUE:%.+]] = and i8 [[TRUNC]], 1
+// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, -2
+// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx4_packed.a = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_AND:%.+]] = and i64 [[NEW_VAL]], 127
+// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 17
+// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -16646145
+// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i64* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx4.b = ldv;
+// CHECK: load x86_fp80* @{{.+}}
+// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[TRUNC:%.+]] = trunc i64 [[NEW_VAL]] to i8
+// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 127
+// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 1
+// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 1
+// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ bfx4_packed.b = ldv;
+// CHECK: load i64*
+// CHECK: [[VEC_ITEM_VAL:%.+]] = uitofp i64 %{{.+}} to float
+// CHECK: [[I64VAL:%.+]] = load atomic i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) monotonic
+// CHECK: [[LD:%.+]] = bitcast i64 [[I64VAL]] to <2 x float>
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_VEC_VAL:%.+]] = phi <2 x float> [ [[LD]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store <2 x float> [[OLD_VEC_VAL]], <2 x float>* [[LDTEMP:%.+]],
+// CHECK: [[VEC_VAL:%.+]] = load <2 x float>* [[LDTEMP]]
+// CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <2 x float> [[VEC_VAL]], float [[VEC_ITEM_VAL]], i64 0
+// CHECK: store <2 x float> [[NEW_VEC_VAL]], <2 x float>* [[LDTEMP]]
+// CHECK: [[NEW_VEC_VAL:%.+]] = load <2 x float>* [[LDTEMP]]
+// CHECK: [[OLD_I64:%.+]] = bitcast <2 x float> [[OLD_VEC_VAL]] to i64
+// CHECK: [[NEW_I64:%.+]] = bitcast <2 x float> [[NEW_VEC_VAL]] to i64
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] monotonic monotonic
+// CHECK: [[FAILED_I64_OLD_VAL:%.+]] = extractvalue { i64, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
+// CHECK: [[FAILED_OLD_VAL]] = bitcast i64 [[FAILED_I64_OLD_VAL]] to <2 x float>
+// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]]
+// CHECK: [[EXIT]]
+#pragma omp atomic write
+ float2x.x = ulv;
+// CHECK: call i32 @llvm.read_register.i32(
+// CHECK: sitofp i32 %{{.+}} to double
+// CHECK: bitcast double %{{.+}} to i64
+// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* @{{.+}} to i64*) seq_cst
+// CHECK: call{{.*}} @__kmpc_flush(
+#pragma omp atomic write seq_cst
+ dv = rix;
+ return 0;
+}
+
+#endif