From 336d9df5e628279425344d754dc68047fa5b00a7 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Thu, 11 Jul 2013 01:32:21 +0000
Subject: [PATCH] Simplify atomic load/store IRGen.

Also fixes a couple minor bugs along the way; see testcases.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@186049 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/CGAtomic.cpp      | 64 ++++++++------------------
 lib/CodeGen/CGExpr.cpp        |  5 +-
 lib/CodeGen/CGExprAgg.cpp     | 86 +++++++++--------------------------
 lib/CodeGen/CGValue.h         | 30 ++----------
 test/CodeGen/c11atomics-ios.c | 10 ++--
 test/CodeGen/c11atomics.c     | 25 ++++++++--
 6 files changed, 71 insertions(+), 149 deletions(-)

diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 1fc4f94d44..b5b74d25b3 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -93,7 +93,7 @@ namespace {
       return (ValueSizeInBits != AtomicSizeInBits);
     }
 
-    void emitMemSetZeroIfNecessary(LValue dest) const;
+    bool emitMemSetZeroIfNecessary(LValue dest) const;
 
     llvm::Value *getAtomicSizeValue() const {
       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
@@ -164,21 +164,22 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
     return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
                            AtomicSizeInBits / 2);
 
-  // Just be pessimistic about aggregates.
+  // Padding in structs has an undefined bit pattern.  User beware.
   case TEK_Aggregate:
-    return true;
+    return false;
   }
   llvm_unreachable("bad evaluation kind");
 }
 
-void AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
+bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
   llvm::Value *addr = dest.getAddress();
   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
-    return;
+    return false;
 
   CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
                            AtomicSizeInBits / 8,
                            dest.getAlignment().getQuantity());
+  return true;
 }
 
 static void
@@ -715,30 +716,13 @@ llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
 
 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
                                        AggValueSlot resultSlot) const {
-  if (EvaluationKind == TEK_Aggregate) {
-    // Nothing to do if the result is ignored.
-    if (resultSlot.isIgnored()) return resultSlot.asRValue();
-
-    assert(resultSlot.getAddr() == addr || hasPadding());
-
-    // In these cases, we should have emitted directly into the result slot.
-    if (!hasPadding() || resultSlot.isValueOfAtomic())
-      return resultSlot.asRValue();
-
-    // Otherwise, fall into the common path.
-  }
+  if (EvaluationKind == TEK_Aggregate)
+    return resultSlot.asRValue();
 
   // Drill into the padding structure if we have one.
   if (hasPadding())
     addr = CGF.Builder.CreateStructGEP(addr, 0);
 
-  // If we're emitting to an aggregate, copy into the result slot.
-  if (EvaluationKind == TEK_Aggregate) {
-    CGF.EmitAggregateCopy(resultSlot.getAddr(), addr, getValueType(),
-                          resultSlot.isVolatile());
-    return resultSlot.asRValue();
-  }
-
   // Otherwise, just convert the temporary to an r-value using the
   // normal conversion routine.
   return CGF.convertTempToRValue(addr, getValueType());
@@ -752,10 +736,7 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
   // Check whether we should use a library call.
   if (atomics.shouldUseLibcall()) {
     llvm::Value *tempAddr;
-    if (resultSlot.isValueOfAtomic()) {
-      assert(atomics.getEvaluationKind() == TEK_Aggregate);
-      tempAddr = resultSlot.getPaddedAtomicAddr();
-    } else if (!resultSlot.isIgnored() && !atomics.hasPadding()) {
+    if (!resultSlot.isIgnored()) {
       assert(atomics.getEvaluationKind() == TEK_Aggregate);
       tempAddr = resultSlot.getAddr();
     } else {
@@ -819,16 +800,10 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
   llvm::Value *temp;
   bool tempIsVolatile = false;
   CharUnits tempAlignment;
-  if (atomics.getEvaluationKind() == TEK_Aggregate &&
-      (!atomics.hasPadding() || resultSlot.isValueOfAtomic())) {
+  if (atomics.getEvaluationKind() == TEK_Aggregate) {
     assert(!resultSlot.isIgnored());
-    if (resultSlot.isValueOfAtomic()) {
-      temp = resultSlot.getPaddedAtomicAddr();
-      tempAlignment = atomics.getAtomicAlignment();
-    } else {
-      temp = resultSlot.getAddr();
-      tempAlignment = atomics.getValueAlignment();
-    }
+    temp = resultSlot.getAddr();
+    tempAlignment = atomics.getValueAlignment();
     tempIsVolatile = resultSlot.isVolatile();
   } else {
     temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
@@ -996,13 +971,11 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
   }
 
   case TEK_Aggregate: {
-    // Memset the buffer first if there's any possibility of
-    // uninitialized internal bits.
-    atomics.emitMemSetZeroIfNecessary(dest);
-
-    // HACK: whether the initializer actually has an atomic type
-    // doesn't really seem reliable right now.
+    // Fix up the destination if the initializer isn't an expression
+    // of atomic type.
+    bool Zeroed = false;
     if (!init->getType()->isAtomicType()) {
+      Zeroed = atomics.emitMemSetZeroIfNecessary(dest);
       dest = atomics.projectValue(dest);
     }
 
@@ -1010,7 +983,10 @@ void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
     AggValueSlot slot = AggValueSlot::forLValue(dest,
                                         AggValueSlot::IsNotDestructed,
                                         AggValueSlot::DoesNotNeedGCBarriers,
-                                        AggValueSlot::IsNotAliased);
+                                        AggValueSlot::IsNotAliased,
+                                        Zeroed ? AggValueSlot::IsZeroed :
+                                                 AggValueSlot::IsNotZeroed);
+
     EmitAggExpr(init, slot);
     return;
   }
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 419075fe17..b283174514 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2717,11 +2717,10 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_BuiltinFnToFnPtr:
     llvm_unreachable("builtin functions are handled elsewhere");
 
-  // These two casts are currently treated as no-ops, although they could
-  // potentially be real operations depending on the target's ABI.
+  // These are never l-values; just use the aggregate emission code.
   case CK_NonAtomicToAtomic:
   case CK_AtomicToNonAtomic:
-    return EmitLValue(E->getSubExpr());
+    return EmitAggExprToLValue(E);
 
   case CK_Dynamic: {
     LValue LV = EmitLValue(E->getSubExpr());
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 8efe018fca..a67f6593f3 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -29,14 +29,6 @@ using namespace CodeGen;
 //                        Aggregate Expression Emitter
 //===----------------------------------------------------------------------===//
 
-llvm::Value *AggValueSlot::getPaddedAtomicAddr() const {
-  assert(isValueOfAtomic());
-  llvm::GEPOperator *op = cast<llvm::GEPOperator>(getAddr());
-  assert(op->getNumIndices() == 2);
-  assert(op->hasAllZeroIndices());
-  return op->getPointerOperand();
-}
-
 namespace  {
 class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
   CodeGenFunction &CGF;
@@ -202,38 +194,6 @@ public:
     CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
   }
 };
-
-/// A helper class for emitting expressions into the value sub-object
-/// of a padded atomic type.
-class ValueDestForAtomic {
-  AggValueSlot Dest;
-public:
-  ValueDestForAtomic(CodeGenFunction &CGF, AggValueSlot dest, QualType type)
-    : Dest(dest) {
-    assert(!Dest.isValueOfAtomic());
-    if (!Dest.isIgnored() && CGF.CGM.isPaddedAtomicType(type)) {
-      llvm::Value *valueAddr = CGF.Builder.CreateStructGEP(Dest.getAddr(), 0);
-      Dest = AggValueSlot::forAddr(valueAddr,
-                                   Dest.getAlignment(),
-                                   Dest.getQualifiers(),
-                                   Dest.isExternallyDestructed(),
-                                   Dest.requiresGCollection(),
-                                   Dest.isPotentiallyAliased(),
-                                   Dest.isZeroed(),
-                                   AggValueSlot::IsValueOfAtomic);
-    }
-  }
-
-  const AggValueSlot &getDest() const { return Dest; }
-
-  ~ValueDestForAtomic() {
-    // Kill the GEP if we made one and it didn't end up used.
-    if (Dest.isValueOfAtomic()) {
-      llvm::Instruction *addr = cast<llvm::GetElementPtrInst>(Dest.getAddr());
-      if (addr->use_empty()) addr->eraseFromParent();
-    }
-  }
-};
 }  // end anonymous namespace.
 
 //===----------------------------------------------------------------------===//
@@ -248,8 +208,7 @@ void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
 
   // If the type of the l-value is atomic, then do an atomic load.
   if (LV.getType()->isAtomicType()) {
-    ValueDestForAtomic valueDest(CGF, Dest, LV.getType());
-    CGF.EmitAtomicLoad(LV, valueDest.getDest());
+    CGF.EmitAtomicLoad(LV, Dest);
     return;
   }
 
@@ -653,34 +612,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
     }
 
     // If we're converting an r-value of non-atomic type to an r-value
-    // of atomic type, just make an atomic temporary, emit into that,
-    // and then copy the value out.  (FIXME: do we need to
-    // zero-initialize it first?)
+    // of atomic type, just emit directly into the relevant sub-object.
     if (isToAtomic) {
-      ValueDestForAtomic valueDest(CGF, Dest, atomicType);
+      AggValueSlot valueDest = Dest;
+      if (!valueDest.isIgnored() && CGF.CGM.isPaddedAtomicType(atomicType)) {
+        // Zero-initialize.  (Strictly speaking, we only need to intialize
+        // the padding at the end, but this is simpler.)
+        if (!Dest.isZeroed())
+          CGF.EmitNullInitialization(Dest.getAddr(), type);
+
+        // Build a GEP to refer to the subobject.
+        llvm::Value *valueAddr =
+            CGF.Builder.CreateStructGEP(valueDest.getAddr(), 0);
+        valueDest = AggValueSlot::forAddr(valueAddr,
+                                          valueDest.getAlignment(),
+                                          valueDest.getQualifiers(),
+                                          valueDest.isExternallyDestructed(),
+                                          valueDest.requiresGCollection(),
+                                          valueDest.isPotentiallyAliased(),
+                                          AggValueSlot::IsZeroed);
+      }
+      
       CGF.EmitAggExpr(E->getSubExpr(), valueDest.getDest());
       return;
     }
 
     // Otherwise, we're converting an atomic type to a non-atomic type.
-
-    // If the dest is a value-of-atomic subobject, drill back out.
-    if (Dest.isValueOfAtomic()) {
-      AggValueSlot atomicSlot =
-        AggValueSlot::forAddr(Dest.getPaddedAtomicAddr(),
-                              Dest.getAlignment(),
-                              Dest.getQualifiers(),
-                              Dest.isExternallyDestructed(),
-                              Dest.requiresGCollection(),
-                              Dest.isPotentiallyAliased(),
-                              Dest.isZeroed(),
-                              AggValueSlot::IsNotValueOfAtomic);
-      CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
-      return;
-    }
-
-    // Otherwise, make an atomic temporary, emit into that, and then
-    // copy the value out.
+    // Make an atomic temporary, emit into that, and then copy the value out.
     AggValueSlot atomicSlot =
       CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
     CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index b625b866c0..da2a03437d 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -381,23 +381,11 @@ class AggValueSlot {
   /// evaluating an expression which constructs such an object.
   bool AliasedFlag : 1;
 
-  /// ValueOfAtomicFlag - This is set to true if the slot is the value
-  /// subobject of an object the size of an _Atomic(T).  The specific
-  /// guarantees this makes are:
-  ///   - the address is guaranteed to be a getelementptr into the
-  ///     padding struct and
-  ///   - it is okay to store something the width of an _Atomic(T)
-  ///     into the address.
-  /// Tracking this allows us to avoid some obviously unnecessary
-  /// memcpys.
-  bool ValueOfAtomicFlag : 1;
-
 public:
   enum IsAliased_t { IsNotAliased, IsAliased };
   enum IsDestructed_t { IsNotDestructed, IsDestructed };
   enum IsZeroed_t { IsNotZeroed, IsZeroed };
   enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
-  enum IsValueOfAtomic_t { IsNotValueOfAtomic, IsValueOfAtomic };
 
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
@@ -421,9 +409,7 @@ public:
                               IsDestructed_t isDestructed,
                               NeedsGCBarriers_t needsGC,
                               IsAliased_t isAliased,
-                              IsZeroed_t isZeroed = IsNotZeroed,
-                              IsValueOfAtomic_t isValueOfAtomic
-                                = IsNotValueOfAtomic) {
+                              IsZeroed_t isZeroed = IsNotZeroed) {
     AggValueSlot AV;
     AV.Addr = addr;
     AV.Alignment = align.getQuantity();
@@ -432,7 +418,6 @@ public:
     AV.ObjCGCFlag = needsGC;
     AV.ZeroedFlag = isZeroed;
     AV.AliasedFlag = isAliased;
-    AV.ValueOfAtomicFlag = isValueOfAtomic;
     return AV;
   }
 
@@ -440,12 +425,9 @@ public:
                                 IsDestructed_t isDestructed,
                                 NeedsGCBarriers_t needsGC,
                                 IsAliased_t isAliased,
-                                IsZeroed_t isZeroed = IsNotZeroed,
-                                IsValueOfAtomic_t isValueOfAtomic
-                                  = IsNotValueOfAtomic) {
+                                IsZeroed_t isZeroed = IsNotZeroed) {
     return forAddr(LV.getAddress(), LV.getAlignment(),
-                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed,
-                   isValueOfAtomic);
+                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
   }
 
   IsDestructed_t isExternallyDestructed() const {
@@ -477,12 +459,6 @@ public:
     return Addr;
   }
 
-  IsValueOfAtomic_t isValueOfAtomic() const {
-    return IsValueOfAtomic_t(ValueOfAtomicFlag);
-  }
-
-  llvm::Value *getPaddedAtomicAddr() const;
-
   bool isIgnored() const {
     return Addr == 0;
   }
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index d1c9b14330..7992948bb6 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -102,8 +102,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store [[S]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
@@ -114,8 +112,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
@@ -169,7 +165,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
@@ -183,7 +179,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
 // CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
-// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
@@ -191,6 +187,8 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index 6c845e9e01..fce5ec48d1 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -233,8 +233,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store [[S]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[P]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
@@ -245,8 +243,6 @@ void testStruct(_Atomic(S) *fp) {
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[S]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
@@ -283,6 +279,9 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
+// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
+// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: store [[APS]]*
 
 // CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
@@ -298,7 +297,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
@@ -319,6 +318,8 @@ void testPromotedStruct(_Atomic(PS) *fp) {
   PS f = *fp;
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
@@ -328,6 +329,20 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
   *fp = f;
 
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** %fp.addr, align 4
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
+// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
+// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
+// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
+// CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
+  int a = ((PS)*fp).x;
+
 // CHECK-NEXT: ret void
 }
 
-- 
2.40.0