}
// CHECK-LABEL: @test_vgetq_lane_f16(
-// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
+// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 8
// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
-// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
+// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 8
// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
-// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
+// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 8
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
}
// CHECK-LABEL: @test_vld2q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
}
// CHECK-LABEL: @test_vld2q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld2q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld2q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
int8x16x2_t test_vld2q_s8(int8_t const * a) {
}
// CHECK-LABEL: @test_vld2q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld2q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld2q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld2q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
}
// CHECK-LABEL: @test_vld2q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
}
// CHECK-LABEL: @test_vld2q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld2q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld2q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld2q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld2q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld2q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld2q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
}
// CHECK-LABEL: @test_vld2q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld3q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
}
// CHECK-LABEL: @test_vld3q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld3q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld3q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
int8x16x3_t test_vld3q_s8(int8_t const * a) {
}
// CHECK-LABEL: @test_vld3q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld3q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld3q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld3q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
}
// CHECK-LABEL: @test_vld3q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
}
// CHECK-LABEL: @test_vld3q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld3q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld3q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld3q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld3q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld3q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld3q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
}
// CHECK-LABEL: @test_vld3q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld4q_u8(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
}
// CHECK-LABEL: @test_vld4q_u16(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld4q_u32(
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld4q_s8(
-// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
int8x16x4_t test_vld4q_s8(int8_t const * a) {
}
// CHECK-LABEL: @test_vld4q_s16(
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld4q_s32(
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
}
// CHECK-LABEL: @test_vld4q_f16(
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld4q_f32(
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
}
// CHECK-LABEL: @test_vld4q_p8(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
}
// CHECK-LABEL: @test_vld4q_p16(
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
}
// CHECK-LABEL: @test_vld4q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld4q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld4q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld4q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
}
// CHECK-LABEL: @test_vld4q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
}
// CHECK-LABEL: @test_vld4q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
}
// CHECK-LABEL: @test_vld4q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8
+// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
// CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
// CHECK-LABEL: @test_vsetq_lane_f16(
// CHECK: [[__REINT_248:%.*]] = alloca half, align 2
-// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
-// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
+// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 8
+// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 8
// CHECK: [[TMP0:%.*]] = load half, half* %a, align 2
// CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2
-// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
+// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 8
// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3
-// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
+// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
-// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 8
// CHECK: ret <8 x half> [[TMP8]]
float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
return vsetq_lane_f16(*a, b, 3);
}
// CHECK-LABEL: @test_vst2q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
// CHECK: ret void
void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
}
// CHECK-LABEL: @test_vst2q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst2q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
// CHECK: ret void
void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
}
// CHECK-LABEL: @test_vst2q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst2q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst2q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
// CHECK: ret void
void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
}
// CHECK-LABEL: @test_vst2q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst2q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst2q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst2q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst2q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
-// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
+// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
// CHECK: ret void
void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
}
// CHECK-LABEL: @test_vst3q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst3q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
// CHECK: ret void
void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
}
// CHECK-LABEL: @test_vst3q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst3q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst3q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
// CHECK: ret void
void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
}
// CHECK-LABEL: @test_vst3q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst3q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst3q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst3q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst3q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
-// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
+// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_u8(
-// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8
// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
// CHECK: ret void
void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
}
// CHECK-LABEL: @test_vst4q_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst4q_s8(
-// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8
// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
// CHECK: ret void
void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
}
// CHECK-LABEL: @test_vst4q_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst4q_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst4q_p8(
-// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 8
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 8
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 8
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 8
// CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
// CHECK: ret void
void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
}
// CHECK-LABEL: @test_vst4q_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_lane_u16(
-// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_lane_u32(
-// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst4q_lane_s16(
-// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_lane_s32(
-// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
}
// CHECK-LABEL: @test_vst4q_lane_f16(
-// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast half* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vst4q_lane_f32(
-// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast float* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
}
// CHECK-LABEL: @test_vst4q_lane_p16(
-// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
-// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
+// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 8
// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
// CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 8, i1 false)
// CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8*
// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
+// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 8
// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
-// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
+// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
-// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
+// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 8
// CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
-// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
+// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 8
// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
}
// CHECK-LABEL: @test_vtrnq_s8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30
// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
return vtrnq_s8(a, b);
}
// CHECK-LABEL: @test_vtrnq_s16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33
// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
return vtrnq_s16(a, b);
}
// CHECK-LABEL: @test_vtrnq_s32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36
// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
return vtrnq_s32(a, b);
}
// CHECK-LABEL: @test_vtrnq_u8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
return vtrnq_u8(a, b);
}
// CHECK-LABEL: @test_vtrnq_u16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
return vtrnq_u16(a, b);
}
// CHECK-LABEL: @test_vtrnq_u32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
return vtrnq_u32(a, b);
}
// CHECK-LABEL: @test_vtrnq_f32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48
// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
return vtrnq_f32(a, b);
}
// CHECK-LABEL: @test_vtrnq_p8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
return vtrnq_p8(a, b);
}
// CHECK-LABEL: @test_vtrnq_p16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54
// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
return vtrnq_p16(a, b);
}
// CHECK-LABEL: @test_vuzpq_s8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84
// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
return vuzpq_s8(a, b);
}
// CHECK-LABEL: @test_vuzpq_s16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87
// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
return vuzpq_s16(a, b);
}
// CHECK-LABEL: @test_vuzpq_s32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90
// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
return vuzpq_s32(a, b);
}
// CHECK-LABEL: @test_vuzpq_u8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
return vuzpq_u8(a, b);
}
// CHECK-LABEL: @test_vuzpq_u16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
return vuzpq_u16(a, b);
}
// CHECK-LABEL: @test_vuzpq_u32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
return vuzpq_u32(a, b);
}
// CHECK-LABEL: @test_vuzpq_f32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102
// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
return vuzpq_f32(a, b);
}
// CHECK-LABEL: @test_vuzpq_p8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
return vuzpq_p8(a, b);
}
// CHECK-LABEL: @test_vuzpq_p16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108
// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
return vuzpq_p16(a, b);
}
// CHECK-LABEL: @test_vzipq_s8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138
// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
return vzipq_s8(a, b);
}
// CHECK-LABEL: @test_vzipq_s16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141
// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
return vzipq_s16(a, b);
}
// CHECK-LABEL: @test_vzipq_s32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144
// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
return vzipq_s32(a, b);
}
// CHECK-LABEL: @test_vzipq_u8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147
// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
return vzipq_u8(a, b);
}
// CHECK-LABEL: @test_vzipq_u16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
return vzipq_u16(a, b);
}
// CHECK-LABEL: @test_vzipq_u32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153
// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
return vzipq_u32(a, b);
}
// CHECK-LABEL: @test_vzipq_f32(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156
// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
return vzipq_f32(a, b);
}
// CHECK-LABEL: @test_vzipq_p8(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
// CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159
// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8*
// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
return vzipq_p8(a, b);
}
// CHECK-LABEL: @test_vzipq_p16(
-// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162
// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8*
// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 8, i1 false)
// CHECK: ret void
poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
return vzipq_p16(a, b);