From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Fri, 10 Jul 2015 11:31:43 +0000 (+0000)
Subject: Respect alignment when loading up a coerced function argument
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4c911d0a2dd3fcf0b356b59f3b21257f1ff95e45;p=clang

Respect alignment when loading up a coerced function argument

Code in CGCall.cpp that loads up function arguments that need to be
coerced to a different type may in some cases ignore the fact that
the source of the argument is not naturally aligned. This may cause
incorrect code to be generated. In some places in CreateCoercedLoad,
we already have setAlignment calls to address this, but I ran into one
where it was missing, causing wrong code generation on SystemZ.

However, in that location, we do not actually know what alignment of
the source location we can rely on; the callers do not pass anything
to this routine. This is already an issue in other places in
CreateCoercedLoad; and the same problem exists for CreateCoercedStore.

To avoid pessimising code, and to fix the FIXMEs already in place,
this patch also adds an alignment argument to the CreateCoerced*
routines and uses it instead of forcing an alignment of 1. The
callers are changed to pass in the best information they have.

This actually requires changes in a number of existing test cases
since we now get better alignment in many places.

Differential Revision: http://reviews.llvm.org/D11033


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@241898 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 6192244e42..3e4d7f323d 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -912,20 +912,21 @@ static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
 
 
 /// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
-/// a pointer to an object of type \arg Ty.
+/// a pointer to an object of type \arg Ty, known to be aligned to
+/// \arg SrcAlign bytes.
 ///
 /// This safely handles the case when the src type is smaller than the
 /// destination type; in this situation the values of bits which not
 /// present in the src are undefined.
 static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
-                                      llvm::Type *Ty,
+                                      llvm::Type *Ty, CharUnits SrcAlign,
                                       CodeGenFunction &CGF) {
   llvm::Type *SrcTy =
     cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
 
   // If SrcTy and Ty are the same, just do a load.
   if (SrcTy == Ty)
-    return CGF.Builder.CreateLoad(SrcPtr);
+    return CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
 
   uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
 
@@ -940,7 +941,8 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
   // extension or truncation to the desired type.
   if ((isa<llvm::IntegerType>(Ty) || isa<llvm::PointerType>(Ty)) &&
       (isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy))) {
-    llvm::LoadInst *Load = CGF.Builder.CreateLoad(SrcPtr);
+    llvm::LoadInst *Load =
+      CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
     return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
   }
 
@@ -954,23 +956,20 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
     // to that information.
     llvm::Value *Casted =
       CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty));
-    llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
-    // FIXME: Use better alignment / avoid requiring aligned load.
-    Load->setAlignment(1);
-    return Load;
+    return CGF.Builder.CreateAlignedLoad(Casted, SrcAlign.getQuantity());
   }
 
   // Otherwise do coercion through memory. This is stupid, but
   // simple.
-  llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
+  llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(Ty);
+  Tmp->setAlignment(SrcAlign.getQuantity());
   llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
   llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
   llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy);
-  // FIXME: Use better alignment.
   CGF.Builder.CreateMemCpy(Casted, SrcCasted,
       llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
-      1, false);
-  return CGF.Builder.CreateLoad(Tmp);
+      SrcAlign.getQuantity(), false);
+  return CGF.Builder.CreateAlignedLoad(Tmp, SrcAlign.getQuantity());
 }
 
 // Function to store a first-class aggregate into memory.  We prefer to
@@ -979,39 +978,45 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
 // FIXME: Do we need to recurse here?
 static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
                           llvm::Value *DestPtr, bool DestIsVolatile,
-                          bool LowAlignment) {
+                          CharUnits DestAlign) {
   // Prefer scalar stores to first-class aggregate stores.
   if (llvm::StructType *STy =
         dyn_cast<llvm::StructType>(Val->getType())) {
+    const llvm::StructLayout *Layout =
+      CGF.CGM.getDataLayout().getStructLayout(STy);
+
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(STy, DestPtr, 0, i);
       llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
-      llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr,
-                                                    DestIsVolatile);
-      if (LowAlignment)
-        SI->setAlignment(1);
+      uint64_t EltOffset = Layout->getElementOffset(i);
+      CharUnits EltAlign =
+        DestAlign.alignmentAtOffset(CharUnits::fromQuantity(EltOffset));
+      CGF.Builder.CreateAlignedStore(Elt, EltPtr, EltAlign.getQuantity(),
+                                     DestIsVolatile);
     }
   } else {
-    llvm::StoreInst *SI = CGF.Builder.CreateStore(Val, DestPtr, DestIsVolatile);
-    if (LowAlignment)
-      SI->setAlignment(1);
+    CGF.Builder.CreateAlignedStore(Val, DestPtr, DestAlign.getQuantity(),
+                                   DestIsVolatile);
   }
 }
 
 /// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
-/// where the source and destination may have different types.
+/// where the source and destination may have different types.  The
+/// destination is known to be aligned to \arg DstAlign bytes.
 ///
 /// This safely handles the case when the src type is larger than the
 /// destination type; the upper bits of the src will be lost.
 static void CreateCoercedStore(llvm::Value *Src,
                                llvm::Value *DstPtr,
                                bool DstIsVolatile,
+                               CharUnits DstAlign,
                                CodeGenFunction &CGF) {
   llvm::Type *SrcTy = Src->getType();
   llvm::Type *DstTy =
     cast<llvm::PointerType>(DstPtr->getType())->getElementType();
   if (SrcTy == DstTy) {
-    CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
+    CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
+                                   DstIsVolatile);
     return;
   }
 
@@ -1027,7 +1032,8 @@ static void CreateCoercedStore(llvm::Value *Src,
   if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
       (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
     Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
-    CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
+    CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
+                                   DstIsVolatile);
     return;
   }
 
@@ -1037,8 +1043,7 @@ static void CreateCoercedStore(llvm::Value *Src,
   if (SrcSize <= DstSize) {
     llvm::Value *Casted =
       CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy));
-    // FIXME: Use better alignment / avoid requiring aligned store.
-    BuildAggStore(CGF, Src, Casted, DstIsVolatile, true);
+    BuildAggStore(CGF, Src, Casted, DstIsVolatile, DstAlign);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
     // simple.
@@ -1049,15 +1054,15 @@ static void CreateCoercedStore(llvm::Value *Src,
     //
     // FIXME: Assert that we aren't truncating non-padding bits when have access
     // to that information.
-    llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy);
-    CGF.Builder.CreateStore(Src, Tmp);
+    llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(SrcTy);
+    Tmp->setAlignment(DstAlign.getQuantity());
+    CGF.Builder.CreateAlignedStore(Src, Tmp, DstAlign.getQuantity());
     llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
     llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
     llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy);
-    // FIXME: Use better alignment.
     CGF.Builder.CreateMemCpy(DstCasted, Casted,
         llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
-        1, false);
+        DstAlign.getQuantity(), false);
   }
 }
 
@@ -1996,6 +2001,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       Alloca->setAlignment(AlignmentToUse);
       llvm::Value *V = Alloca;
       llvm::Value *Ptr = V;    // Pointer to store into.
+      CharUnits PtrAlign = CharUnits::fromQuantity(AlignmentToUse);
 
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = ArgI.getDirectOffset()) {
@@ -2003,6 +2009,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         Ptr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), Ptr, Offs);
         Ptr = Builder.CreateBitCast(Ptr,
                           llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
+        PtrAlign = PtrAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
       }
 
       // Fast-isel and the optimizer generally like scalar values better than
@@ -2047,7 +2054,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         assert(NumIRArgs == 1);
         auto AI = FnArgs[FirstIRArg];
         AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this);
+        CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, PtrAlign, *this);
       }
 
 
@@ -2415,15 +2422,17 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       }
     } else {
       llvm::Value *V = ReturnValue;
+      CharUnits Align = getContext().getTypeAlignInChars(RetTy);
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = RetAI.getDirectOffset()) {
         V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
         V = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), V, Offs);
         V = Builder.CreateBitCast(V,
                          llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
+        Align = Align.alignmentAtOffset(CharUnits::fromQuantity(Offs));
       }
 
-      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
+      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), Align, *this);
     }
 
     // In ARC, end functions that return a retainable type with a call
@@ -3286,12 +3295,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
       // FIXME: Avoid the conversion through memory if possible.
       llvm::Value *SrcPtr;
+      CharUnits SrcAlign;
       if (RV.isScalar() || RV.isComplex()) {
         SrcPtr = CreateMemTemp(I->Ty, "coerce");
+        SrcAlign = TypeAlign;
         LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
         EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
-      } else
+      } else {
         SrcPtr = RV.getAggregateAddr();
+        // This alignment is guaranteed by EmitCallArg.
+        SrcAlign = TypeAlign;
+      }
 
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = ArgInfo.getDirectOffset()) {
@@ -3299,7 +3313,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         SrcPtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), SrcPtr, Offs);
         SrcPtr = Builder.CreateBitCast(SrcPtr,
                        llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
-
+        SrcAlign = SrcAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
       }
 
       // Fast-isel and the optimizer generally like scalar values better than
@@ -3338,7 +3352,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         // In the simple case, just pass the coerced loaded value.
         assert(NumIRArgs == 1);
         IRCallArgs[FirstIRArg] =
-            CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
+            CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
+                              SrcAlign, *this);
       }
 
       break;
@@ -3535,12 +3550,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         case TEK_Aggregate: {
           llvm::Value *DestPtr = ReturnValue.getValue();
           bool DestIsVolatile = ReturnValue.isVolatile();
+          CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
 
           if (!DestPtr) {
             DestPtr = CreateMemTemp(RetTy, "agg.tmp");
             DestIsVolatile = false;
           }
-          BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false);
+          BuildAggStore(*this, CI, DestPtr, DestIsVolatile, DestAlign);
           return RValue::getAggregate(DestPtr);
         }
         case TEK_Scalar: {
@@ -3557,6 +3573,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
       llvm::Value *DestPtr = ReturnValue.getValue();
       bool DestIsVolatile = ReturnValue.isVolatile();
+      CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
 
       if (!DestPtr) {
         DestPtr = CreateMemTemp(RetTy, "coerce");
@@ -3565,14 +3582,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
       // If the value is offset in memory, apply the offset now.
       llvm::Value *StorePtr = DestPtr;
+      CharUnits StoreAlign = DestAlign;
       if (unsigned Offs = RetAI.getDirectOffset()) {
         StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
         StorePtr =
             Builder.CreateConstGEP1_32(Builder.getInt8Ty(), StorePtr, Offs);
         StorePtr = Builder.CreateBitCast(StorePtr,
                            llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
+        StoreAlign =
+          StoreAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
       }
-      CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+      CreateCoercedStore(CI, StorePtr, DestIsVolatile, StoreAlign, *this);
 
       return convertTempToRValue(DestPtr, RetTy, SourceLocation());
     }
diff --git a/test/CodeGen/align-systemz.c b/test/CodeGen/align-systemz.c
index 68a21e39ab..eaa1de64d3 100644
--- a/test/CodeGen/align-systemz.c
+++ b/test/CodeGen/align-systemz.c
@@ -25,3 +25,19 @@ void func (void)
   s = es;
 }
 
+
+// Alignment should be respected for coerced argument loads
+
+struct arg { long y __attribute__((packed, aligned(4))); };
+
+extern struct arg x;
+void f(struct arg);
+
+void test (void)
+{
+  f(x);
+}
+
+// CHECK-LABEL: @test
+// CHECK: load i64, i64* getelementptr inbounds (%struct.arg, %struct.arg* @x, i32 0, i32 0), align 4
+
diff --git a/test/CodeGen/arm64-abi-vector.c b/test/CodeGen/arm64-abi-vector.c
index 4566c41790..ebf7f51126 100644
--- a/test/CodeGen/arm64-abi-vector.c
+++ b/test/CodeGen/arm64-abi-vector.c
@@ -309,7 +309,7 @@ __attribute__((noinline)) double args_vec_5c(int fixed, __char5 c5) {
 // CHECK: args_vec_5c
 // CHECK: [[C5:%.*]] = alloca <5 x i8>, align 8
 // CHECK: [[TMP:%.*]] = bitcast <5 x i8>* [[C5]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
   double sum = fixed;
   sum = sum + c5.x + c5.y;
   return sum;
@@ -325,7 +325,7 @@ __attribute__((noinline)) double args_vec_9c(int fixed, __char9 c9) {
 // CHECK: args_vec_9c
 // CHECK: [[C9:%.*]] = alloca <9 x i8>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <9 x i8>* [[C9]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c9.x + c9.y;
   return sum;
@@ -355,7 +355,7 @@ __attribute__((noinline)) double args_vec_3s(int fixed, __short3 c3) {
 // CHECK: args_vec_3s
 // CHECK: [[C3:%.*]] = alloca <3 x i16>, align 8
 // CHECK: [[TMP:%.*]] = bitcast <3 x i16>* [[C3]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
   double sum = fixed;
   sum = sum + c3.x + c3.y;
   return sum;
@@ -371,7 +371,7 @@ __attribute__((noinline)) double args_vec_5s(int fixed, __short5 c5) {
 // CHECK: args_vec_5s
 // CHECK: [[C5:%.*]] = alloca <5 x i16>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <5 x i16>* [[C5]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c5.x + c5.y;
   return sum;
@@ -387,7 +387,7 @@ __attribute__((noinline)) double args_vec_3i(int fixed, __int3 c3) {
 // CHECK: args_vec_3i
 // CHECK: [[C3:%.*]] = alloca <3 x i32>, align 16
 // CHECK: [[TMP:%.*]] = bitcast <3 x i32>* [[C3]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
   double sum = fixed;
   sum = sum + c3.x + c3.y;
   return sum;
diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c
index 4486bb4b18..8b551c4af4 100644
--- a/test/CodeGen/arm64-arguments.c
+++ b/test/CodeGen/arm64-arguments.c
@@ -219,8 +219,8 @@ int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s36, align 16
 // CHECK: %s2 = alloca %struct.s36, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
@@ -275,8 +275,8 @@ int f38(int i, s38_no_align s1, s38_no_align s2) {
 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s38, align 8
 // CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -287,8 +287,8 @@ s38_no_align g38;
 s38_no_align g38_2;
 int caller38() {
 // CHECK: define i32 @caller38()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
   return f38(3, g38, g38_2);
 }
@@ -299,8 +299,8 @@ int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s38, align 8
 // CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -309,8 +309,8 @@ int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 }
 int caller38_stack() {
 // CHECK: define i32 @caller38_stack()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
 }
@@ -328,8 +328,8 @@ int f39(int i, s39_with_align s1, s39_with_align s2) {
 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -340,8 +340,8 @@ s39_with_align g39;
 s39_with_align g39_2;
 int caller39() {
 // CHECK: define i32 @caller39()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
   return f39(3, g39, g39_2);
 }
@@ -352,8 +352,8 @@ int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s39, align 16
 // CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -362,8 +362,8 @@ int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 }
 int caller39_stack() {
 // CHECK: define i32 @caller39_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
 }
@@ -383,8 +383,8 @@ int f40(int i, s40_no_align s1, s40_no_align s2) {
 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s40, align 8
 // CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -395,8 +395,8 @@ s40_no_align g40;
 s40_no_align g40_2;
 int caller40() {
 // CHECK: define i32 @caller40()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40(3, g40, g40_2);
 }
@@ -407,8 +407,8 @@ int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s40, align 8
 // CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -417,8 +417,8 @@ int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 }
 int caller40_stack() {
 // CHECK: define i32 @caller40_stack()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
 }
@@ -438,8 +438,8 @@ int f41(int i, s41_with_align s1, s41_with_align s2) {
 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -450,8 +450,8 @@ s41_with_align g41;
 s41_with_align g41_2;
 int caller41() {
 // CHECK: define i32 @caller41()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
   return f41(3, g41, g41_2);
 }
@@ -462,8 +462,8 @@ int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
 // CHECK: %s1 = alloca %struct.s41, align 16
 // CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -472,8 +472,8 @@ int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
 }
 int caller41_stack() {
 // CHECK: define i32 @caller41_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
 }
diff --git a/test/CodeGen/arm64-be-bitfield.c b/test/CodeGen/arm64-be-bitfield.c
index b8d497c558..132239ab83 100644
--- a/test/CodeGen/arm64-be-bitfield.c
+++ b/test/CodeGen/arm64-be-bitfield.c
@@ -7,7 +7,7 @@ struct bt3 { signed b2:10; signed b3:10; } b16;
 // Get the high 32-bits and then shift appropriately for big-endian.
 signed callee_b0f(struct bt3 bp11) {
 // IR: callee_b0f(i64 [[ARG:%.*]])
-// IR: store i64 [[ARG]], i64* [[PTR:%.*]]
+// IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8
 // IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8*
 // IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4
 // ARM: asr x0, x0, #54
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
index d0ccfbe34a..534e5116f9 100644
--- a/test/CodeGen/ppc64-struct-onefloat.c
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -14,15 +14,15 @@ void bar(Sf a, Sd b, SSf d, SSd e) {}
 // CHECK:  %d = alloca %struct.s4, align 4
 // CHECK:  %e = alloca %struct.s5, align 8
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0
-// CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0
-// CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  ret void
 
 void foo(void) 
@@ -36,14 +36,14 @@ void foo(void)
 
 // CHECK-LABEL: define void @foo
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0
 // CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}})
 // CHECK:  ret void
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
index 76798c1327..3ad4b06c68 100644
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -54,49 +54,49 @@ struct fabc func_fabc(struct fabc x) { return x; }
 struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
 
 // CHECK-LABEL: @call_f1
-// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 4
 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
 struct f1 global_f1;
 void call_f1(void) { global_f1 = func_f1(global_f1); }
 
 // CHECK-LABEL: @call_f2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 4
 // CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
 struct f2 global_f2;
 void call_f2(void) { global_f2 = func_f2(global_f2); }
 
 // CHECK-LABEL: @call_f3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 4
 // CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
 struct f3 global_f3;
 void call_f3(void) { global_f3 = func_f3(global_f3); }
 
 // CHECK-LABEL: @call_f4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 4
 // CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
 struct f4 global_f4;
 void call_f4(void) { global_f4 = func_f4(global_f4); }
 
 // CHECK-LABEL: @call_f5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 4
 // CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
 struct f5 global_f5;
 void call_f5(void) { global_f5 = func_f5(global_f5); }
 
 // CHECK-LABEL: @call_f6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 4
 // CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
 struct f6 global_f6;
 void call_f6(void) { global_f6 = func_f6(global_f6); }
 
 // CHECK-LABEL: @call_f7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 4
 // CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
 struct f7 global_f7;
 void call_f7(void) { global_f7 = func_f7(global_f7); }
 
 // CHECK-LABEL: @call_f8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 4
 // CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
 struct f8 global_f8;
 void call_f8(void) { global_f8 = func_f8(global_f8); }
@@ -104,7 +104,7 @@ void call_f8(void) { global_f8 = func_f8(global_f8); }
 // CHECK-LABEL: @call_f9
 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
 // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
diff --git a/test/CodeGenCXX/2012-03-16-StoreAlign.cpp b/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
index 7e82ca544d..5f6189e245 100644
--- a/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
+++ b/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
@@ -28,7 +28,7 @@ struct Foo {
 };
 
 // CHECK: @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth = linkonce_odr global %struct.Length zeroinitializer, align 4
-// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 1
+// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 4
 
 bool bar(Length &b) {
   Foo f;
diff --git a/test/CodeGenCXX/varargs.cpp b/test/CodeGenCXX/varargs.cpp
index 1ea072e2eb..e0165994d0 100644
--- a/test/CodeGenCXX/varargs.cpp
+++ b/test/CodeGenCXX/varargs.cpp
@@ -37,7 +37,7 @@ namespace test1 {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[X]] to i8*
   // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 8, i32 4, i1 false)
   // CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i64*
-  // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 1
+  // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 4
   // CHECK-NEXT: call void (...) @_ZN5test13fooEz(i64 [[T1]])
   // CHECK-NEXT: ret void
 }